Tweaking the profilers parameters¶
import tslumen
import pandas as pd
from pprint import pprint
df = pd.read_csv(
date_parser=lambda dt: pd.to_datetime(f'{dt}-12-31'),
df = df[df['Country Code'].isin(['WLD', 'EUU', 'USA', 'HIC', 'MIC', 'LIC'])]\
.set_index(['Year', 'Country Code'])['Value'].unstack(1)
meta = {
'frame': {
'Description': 'Country, regional and world GDP in current US Dollars ($).',
'Source': '<a href="">DataHub</a>',
'series': {
'WLD': 'World GDP in current USD',
'EUU': 'European Union GDP in current USD',
'USA': 'United States GDP in current USD',
'HIC': 'High income GDP in current USD',
'MIC': 'Middle income GDP in current USD',
'LIC': 'Low income GDP in current USD',
Unless explicitly provided, tslumen’s
uses the DefaultProfiler
to profile the time series data.A dictionary with all the configurations can be obtained by calling its class method
{'acf': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'acf_1d': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'acf_2d': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'adfuller_stationarity': {'confidence_level': 0.05},
'binned': {'nbins': None},
'corr_kendall': {},
'corr_pearson': {},
'corr_spearman': {},
'cov': {},
'df_scaled': {},
'dt_end': {},
'dt_start': {},
'freq': {},
'ft_acf': {'n_diff': (0, 1, 2), 'n_size': (1, 10)},
'ft_adfuller': {},
'ft_cross_pts': {},
'ft_entropy': {'n_per_segment': None, 'sampling_frequency': 1.0},
'ft_kpss': {},
'ft_pacf': {'n_diff': (0, 1, 2), 'n_size': (5,)},
'ft_stl': {'freq': None},
'ft_tilewin': {},
'granger_causality': {'addconst': True,
'adf_confidence': 0.1,
'max_diff': 3,
'maxlag': 5,
'test': 'ssr_chi2test'},
'infinite': {},
'iqr': {},
'jarque_bera_normality': {'confidence_level': 0.05},
'kpss_stationarity': {'confidence_level': 0.05},
'kurtosis': {},
'lag_corr': {'lags': ()},
'length': {},
'levene_constant_variance': {'confidence_level': 0.05},
'ljungbox_autocorrelation': {'confidence_level': 0.05, 'n_lags': None},
'lowess': {'delta': 0.0,
'fracs': (0.05, 0.1, 0.15),
'it': 3,
'missing': 'drop'},
'mad': {},
'maximum': {},
'mean': {},
'median': {},
'minimum': {},
'missing': {},
'n_series': {},
'omnibus_normality': {'confidence_level': 0.05},
'pacf': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pacf_1d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pacf_2d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pd_percentiles': {},
'pd_quantiles': {},
'period': {},
'q25': {},
'q50': {},
'q75': {},
'rolling_avg': {'max_win_frac': 10, 'wins': ()},
'sample': {'sample_size': 10},
'seasonal_split': {},
'skew': {},
'std': {},
'stl': {'low_pass': None,
'low_pass_deg': 0,
'low_pass_jump': 1,
'period': None,
'robust': False,
'seasonal': 7,
'seasonal_deg': 0,
'seasonal_jump': 1,
'trend': None,
'trend_deg': 0,
'trend_jump': 1},
'supsmu': {'alpha': None,
'final_span': 0.05,
'middle_span': 0.2,
'period': None,
'primary_spans': (0.05, 0.2, 0.5)},
'sz_total': {'memory_deep': True},
'var': {},
'zeros': {}}
In this example we’ll be changing the confidence level of the statistical tests.
profiler_config = {
'adfuller_stationarity': {'confidence_level': 0.17},
'jarque_bera_normality': {'confidence_level': 0.17},
'kpss_stationarity': {'confidence_level': 0.17},
'levene_constant_variance': {'confidence_level': 0.17},
'ljungbox_autocorrelation': {'confidence_level': 0.17},
'omnibus_normality': {'confidence_level': 0.17},
After rendering the report, you can see on the Stats tab, Confidence column, that all values are now set to 0.17
report = tslumen.HtmlReport(df, meta, profiler_config=profiler_config)
report.SECTIONS = report.SECTIONS[1:2]