Tweaking the profilers parameters¶
[1]:
import tslumen
import pandas as pd
from pprint import pprint
[3]:
df = pd.read_csv(
'https://datahub.io/core/gdp/r/gdp.csv',
parse_dates=[2],
date_parser=lambda dt: pd.to_datetime(f'{dt}-12-31'),
)
df = df[df['Country Code'].isin(['WLD', 'EUU', 'USA', 'HIC', 'MIC', 'LIC'])]\
.set_index(['Year', 'Country Code'])['Value'].unstack(1)
meta = {
'frame': {
'Description': 'Country, regional and world GDP in current US Dollars ($).',
'Source': '<a href="https://datahub.io/core/gold-prices">DataHub</a>',
},
'series': {
'WLD': 'World GDP in current USD',
'EUU': 'European Union GDP in current USD',
'USA': 'United States GDP in current USD',
'HIC': 'High income GDP in current USD',
'MIC': 'Middle income GDP in current USD',
'LIC': 'Low income GDP in current USD',
}
}
Unless explicitly provided, tslumen’s
HtmlReport
uses the DefaultProfiler
to profile the time series data.A dictionary with all the configurations can be obtained by calling its class method
get_config_defaults
.[4]:
pprint(tslumen.DefaultProfiler.get_config_defaults())
{'acf': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'acf_1d': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'acf_2d': {'adjusted': False,
'alpha': 0.05,
'fft': False,
'lags': 40,
'missing': 'none'},
'adfuller_stationarity': {'confidence_level': 0.05},
'binned': {'nbins': None},
'corr_kendall': {},
'corr_pearson': {},
'corr_spearman': {},
'cov': {},
'df_scaled': {},
'dt_end': {},
'dt_start': {},
'freq': {},
'ft_acf': {'n_diff': (0, 1, 2), 'n_size': (1, 10)},
'ft_adfuller': {},
'ft_cross_pts': {},
'ft_entropy': {'n_per_segment': None, 'sampling_frequency': 1.0},
'ft_kpss': {},
'ft_pacf': {'n_diff': (0, 1, 2), 'n_size': (5,)},
'ft_stl': {'freq': None},
'ft_tilewin': {},
'granger_causality': {'addconst': True,
'adf_confidence': 0.1,
'max_diff': 3,
'maxlag': 5,
'test': 'ssr_chi2test'},
'infinite': {},
'iqr': {},
'jarque_bera_normality': {'confidence_level': 0.05},
'kpss_stationarity': {'confidence_level': 0.05},
'kurtosis': {},
'lag_corr': {'lags': ()},
'length': {},
'levene_constant_variance': {'confidence_level': 0.05},
'ljungbox_autocorrelation': {'confidence_level': 0.05, 'n_lags': None},
'lowess': {'delta': 0.0,
'fracs': (0.05, 0.1, 0.15),
'it': 3,
'missing': 'drop'},
'mad': {},
'maximum': {},
'mean': {},
'median': {},
'minimum': {},
'missing': {},
'n_series': {},
'omnibus_normality': {'confidence_level': 0.05},
'pacf': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pacf_1d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pacf_2d': {'alpha': 0.05, 'lags': 40, 'method': 'ywadjusted'},
'pd_percentiles': {},
'pd_quantiles': {},
'period': {},
'q25': {},
'q50': {},
'q75': {},
'rolling_avg': {'max_win_frac': 10, 'wins': ()},
'sample': {'sample_size': 10},
'seasonal_split': {},
'skew': {},
'std': {},
'stl': {'low_pass': None,
'low_pass_deg': 0,
'low_pass_jump': 1,
'period': None,
'robust': False,
'seasonal': 7,
'seasonal_deg': 0,
'seasonal_jump': 1,
'trend': None,
'trend_deg': 0,
'trend_jump': 1},
'supsmu': {'alpha': None,
'final_span': 0.05,
'middle_span': 0.2,
'period': None,
'primary_spans': (0.05, 0.2, 0.5)},
'sz_total': {'memory_deep': True},
'var': {},
'zeros': {}}
In this example we’ll be changing the confidence level of the statistical tests.
[5]:
profiler_config = {
'adfuller_stationarity': {'confidence_level': 0.17},
'jarque_bera_normality': {'confidence_level': 0.17},
'kpss_stationarity': {'confidence_level': 0.17},
'levene_constant_variance': {'confidence_level': 0.17},
'ljungbox_autocorrelation': {'confidence_level': 0.17},
'omnibus_normality': {'confidence_level': 0.17},
}
After rendering the report, you can see on the Stats tab, Confidence column, that all values are now set to 0.17
.
[6]:
report = tslumen.HtmlReport(df, meta, profiler_config=profiler_config)
report.SECTIONS = report.SECTIONS[1:2]
report
[6]: