diff --git a/wsgi.py b/wsgi.py index a24c658..b2c4dab 100644 --- a/wsgi.py +++ b/wsgi.py @@ -42,10 +42,10 @@ def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]: def create_plot_df( - current_df: pd.DataFrame | None, - data_folder: str, - sheet_name: str, curr_datetime, + current_df: pd.DataFrame | None, + data_folder: str = "data", + sheet_name: str = "digital", ) -> pd.DataFrame: data_dict = {} @@ -90,21 +90,13 @@ def create_plot_df( def plot( - current_df: pd.DataFrame | None = None, - data_folder: str = "data", - sheet_name: str = "digital", + curr_datetime, + df: pd.DataFrame, + annotate_current: bool = False, total_targets: tuple[int, ...] = (1500,), alpha: float | None = None, landesbez_str: str | None = None, ) -> str: - curr_datetime = datetime.datetime.now() - df = create_plot_df( - current_df=current_df, - data_folder=data_folder, - sheet_name=sheet_name, - curr_datetime=curr_datetime, - ) - fig = plt.figure(dpi=300) # fill weekends @@ -114,51 +106,53 @@ def plot( if day.weekday() >= 5: plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray") - series = df.sum(axis=1) if landesbez_str is None else df[landesbez_str] - plot_df = series.to_frame("Digitale Befragung").replace(0, np.nan) - plot_df = plot_df.astype({"Digitale Befragung": "float32"}) - if not pd.isna(plot_df).all().item(): - if alpha is not None: - plt.fill_between( + for bez in landesbez_str: + series = df.sum(axis=1) if bez is None else df[bez] + plot_df = series.to_frame("Digitale Befragung").replace(0, np.nan) + plot_df = plot_df.astype({"Digitale Befragung": "float32"}) + if not pd.isna(plot_df).all().item(): + if alpha is not None: + plt.fill_between( + plot_df.dropna().index, + plot_df.dropna()["Digitale Befragung"], + color="#e4004e", + alpha=alpha, + ) + + (line,) = plt.plot( plot_df.dropna().index, plot_df.dropna()["Digitale Befragung"], - color="#e4004e", - alpha=alpha, + ls="--", + marker="o", + lw=1, + color="#e4004e" if bez is None else None, + markersize=4, + label=bez if bez is not None else "Bundesweit", ) - plt.plot( - plot_df.dropna().index, - plot_df.dropna()["Digitale Befragung"], - ls="--", - marker="o", - lw=1, - color="#e4004e", - markersize=4, - label=landesbez_str, - ) + if annotate_current and bez is None: + plt.annotate( + "Jetzt", + ( + plot_df.dropna().index[-1], + plot_df.dropna()["Digitale Befragung"][-1] * 1.03, + ), + fontsize=8, + ha="center", + ) - if current_df is not None: - plt.annotate( - "Jetzt", - ( - plot_df.dropna().index[-1], - plot_df.dropna()["Digitale Befragung"][-1] * 1.03, - ), - fontsize=8, - ha="center", + plt.plot( + plot_df.index, + plot_df["Digitale Befragung"], + lw=1.5, + color=line.get_color(), + # label=bez, ) - plt.plot( - plot_df.index, - plot_df["Digitale Befragung"], - lw=1.5, - color="#e4004e", - label=landesbez_str, - ) - plt.title("Teilnahme an Digitaler Beschäftigtenbefragung") plt.ylabel("# Teilnahmen") plt.ylim(0, total_targets[0] + 100) + plt.legend() # use timezone offset to center tick labels plt.gca().xaxis.set_major_locator( @@ -203,6 +197,7 @@ def convert_fig_to_svg(fig: plt.Figure) -> str: @cache.cached() def tables( url: str = "https://beschaeftigtenbefragung.verdi.de/", + importance_factor: float = 1, ): def _print_as_html(df: pd.DataFrame): df = df.astype({"Digitale Befragung": "Int32"}) @@ -236,6 +231,8 @@ def tables( output_str = [] + curr_datetime = datetime.datetime.now() + try: df, df_state = get_tables(url) @@ -246,7 +243,8 @@ def tables( df_state = df_state.sort_values("Landesbezirk") - fig = plot(df_state) + plot_df = create_plot_df(curr_datetime, df_state) + annotate_current = True timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") except Exception as e: @@ -263,9 +261,23 @@ def tables( {"Digitale Befragung": "Int32"} ) - fig = plot() + plot_df = create_plot_df(curr_datetime) + annotate_current = False timestamp = Markup(f'{key} 10:00:00') + total = plot_df.loc[curr_datetime].sum() + landesbez_strs = [None] + [ + bez + for bez in plot_df.columns + if plot_df.loc[curr_datetime][bez] >= importance_factor * total + ] + fig = plot( + curr_datetime, + plot_df, + annotate_current=annotate_current, + landesbez_str=landesbez_strs, + ) + _print_as_html(df_state) _print_as_html(df)