Tweaking the profilers parameters¶

[1]:

import tslumen
import pandas as pd
from pprint import pprint

[3]:

df = pd.read_csv(
    'https://datahub.io/core/gdp/r/gdp.csv',
    parse_dates=[2],
    date_parser=lambda dt: pd.to_datetime(f'{dt}-12-31'),
)
df = df[df['Country Code'].isin(['WLD', 'EUU', 'USA', 'HIC', 'MIC', 'LIC'])]\
    .set_index(['Year', 'Country Code'])['Value'].unstack(1)
meta = {
    'frame': {
        'Description': 'Country, regional and world GDP in current US Dollars ($).',
        'Source': '<a href="https://datahub.io/core/gold-prices">DataHub</a>',
    },
    'series': {
        'WLD': 'World GDP in current USD',
        'EUU': 'European Union GDP in current USD',
        'USA': 'United States GDP in current USD',
        'HIC': 'High income GDP in current USD',
        'MIC': 'Middle income GDP in current USD',
        'LIC': 'Low income GDP in current USD',
    }
}

Unless explicitly provided, tslumen’s HtmlReport uses the DefaultProfiler to profile the time series data.

A dictionary with all the configurations can be obtained by calling its class method get_config_defaults.

[4]:

pprint(tslumen.DefaultProfiler.get_config_defaults())

{'acf': {'adjusted': False,
         'alpha': 0.05,
         'fft': False,
         'lags': 40,
         'missing': 'none'},
 'acf_1d': {'adjusted': False,
            'alpha': 0.05,
            'fft': False,
            'lags': 40,
            'missing': 'none'},
 'acf_2d': {'adjusted': False,
            'alpha': 0.05,
            'fft': False,
            'lags': 40,
            'missing': 'none'},
 'adfuller_stationarity': {'confidence_level': 0.05},
 'binned': {'nbins': None},
 'corr_kendall': {},
 'corr_pearson': {},
 'corr_spearman': {},
 'cov': {},
 'df_scaled': {},
 'dt_end': {},
 'dt_start': {},
 'freq': {},
 'ft_acf': {'n_diff': (0, 1, 2), 'n_size': (1, 10)},
 'ft_adfuller': {},
 'ft_cross_pts': {},
 'ft_entropy': {'n_per_segment': None, 'sampling_frequency': 1.0},
 'ft_kpss': {},
 'ft_pacf': {'n_diff': (0, 1, 2), 'n_size': (5,)},
 'ft_stl': {'freq': None},
 'ft_tilewin': {},
 'granger_causality': {'addconst': True,
                       'adf_confidence': 0.1,
                       'max_diff': 3,
                       'maxlag': 5,
                       'test': 'ssr_chi2test'},
 'infinite': {},
 'iqr': {},
 'jarque_bera_normality': {'confidence_level': 0.05},
 'kpss_stationarity': {'confidence_level': 0.05},
 'kurtosis': {},
 'lag_corr': {'lags': ()},
 'length': {},
 'levene_constant_variance': {'confidence_level': 0.05},
 'ljungbox_autocorrelation': {'confidence_level': 0.05, 'n_lags': None},
 'lowess': {'delta': 0.0,
            'fracs': (0.05, 0.1, 0.15),
            'it': 3,
            'missing': 'drop'},
 'mad': {},
 'maximum': {},
 'mean': {},
 'median': {},
 'minimum': {},
 'missing': {},
 'n_series': {},
 'omnibus_normality': {'confidence_level': 0.05},
 'pacf': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
 'pacf_1d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
 'pacf_2d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
 'pd_percentiles': {},
 'pd_quantiles': {},
 'period': {},
 'q25': {},
 'q50': {},
 'q75': {},
 'rolling_avg': {'max_win_frac': 10, 'wins': ()},
 'sample': {'sample_size': 10},
 'seasonal_split': {},
 'skew': {},
 'std': {},
 'stl': {'low_pass': None,
         'low_pass_deg': 0,
         'low_pass_jump': 1,
         'period': None,
         'robust': False,
         'seasonal': 7,
         'seasonal_deg': 0,
         'seasonal_jump': 1,
         'trend': None,
         'trend_deg': 0,
         'trend_jump': 1},
 'supsmu': {'alpha': None,
            'final_span': 0.05,
            'middle_span': 0.2,
            'period': None,
            'primary_spans': (0.05, 0.2, 0.5)},
 'sz_total': {'memory_deep': True},
 'var': {},
 'zeros': {}}

In this example we’ll be changing the confidence level of the statistical tests.

[5]:

profiler_config = {
    'adfuller_stationarity': {'confidence_level': 0.17},
    'jarque_bera_normality': {'confidence_level': 0.17},
    'kpss_stationarity': {'confidence_level': 0.17},
    'levene_constant_variance': {'confidence_level': 0.17},
    'ljungbox_autocorrelation': {'confidence_level': 0.17},
    'omnibus_normality': {'confidence_level': 0.17},
}

After rendering the report, you can see on the Stats tab, Confidence column, that all values are now set to 0.17.

[6]:

report = tslumen.HtmlReport(df, meta, profiler_config=profiler_config)
report.SECTIONS = report.SECTIONS[1:2]
report

[6]: