Source code for tslumen.profile.components

"""Functions for decomposing the time series in trend/seasonality/residual."""
from typing import Optional

import pandas as pd
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.tsatools import freq_to_period

from tslumen.profile.base import ProfilingFunction


__all__ = ["stl", "seasonal_split"]


[docs]@ProfilingFunction def stl( data: pd.Series, period: Optional[int] = None, seasonal: int = 7, trend: Optional[int] = None, low_pass: Optional[int] = None, seasonal_deg: Optional[int] = 0, trend_deg: Optional[int] = 0, low_pass_deg: Optional[int] = 0, robust: bool = False, seasonal_jump: int = 1, trend_jump: int = 1, low_pass_jump: int = 1, ) -> pd.DataFrame: """Season-Trend decomposition using LOESS. Returns: pd.DataFrame: DataFrame with 3 columns: trend, seasonality and residual. See Also: statsmodels STL: https://www.statsmodels.org/stable/generated/statsmodels.tsa.seasonal.STL.html """ period_ = period or freq_to_period(data.index.inferred_freq) if period_ < 2: return pd.DataFrame() res = STL( data.fillna(method="bfill").fillna(method="ffill"), period=period, seasonal=seasonal, trend=trend, low_pass=low_pass, seasonal_deg=seasonal_deg, trend_deg=trend_deg, low_pass_deg=low_pass_deg, robust=robust, seasonal_jump=seasonal_jump, trend_jump=trend_jump, low_pass_jump=low_pass_jump, ).fit() return pd.DataFrame({"trend": res.trend, "seasonality": res.seasonal, "residual": res.resid})
[docs]@ProfilingFunction def seasonal_split(data: pd.Series) -> pd.DataFrame: """ Splits the data by season: * Quarterly - Years by Quarters * Monthly - Years by Months * Weekly - Years by Weeks * Daily - Year+Months by Days * Business daily - Week by Day of the Week * Hourly - Year+Month+Day by Hours Args: data (pd.Series): Time series split. Returns: pd.DataFrame: Seasonally split data. """ freq_by = { "Q": ("%Y", "%m"), "M": ("%Y", "%m"), "W": ("%G", "W%V"), "D": ("%Y-%m", "%d"), "B": ("%G-%V", "%w-%a"), "H": ("%Y-%m-%d", "%H"), } freq = data.index.inferred_freq freq = freq[0] if freq else "" if freq in freq_by: fmt_season, fmt_freq = freq_by[freq] return ( pd.DataFrame( { "season": data.index.strftime(fmt_season), "freq": data.index.strftime(fmt_freq), "value": data.values, } ) .set_index(["season", "freq"]) .unstack(0) .droplevel(0, axis=1) ) return pd.DataFrame()