Add possibility for multi-plots

2023-08-29 01:26:40 +02:00 · 2023-08-29 01:26:40 +02:00 · dbce381a71
commit dbce381a71
parent bb214df990
1 changed files with 63 additions and 51 deletions
--- a/wsgi.py
+++ b/wsgi.py
@ -42,10 +42,10 @@ def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:


 def create_plot_df(
-    current_df: pd.DataFrame | None,
-    data_folder: str,
-    sheet_name: str,
    curr_datetime,
+    current_df: pd.DataFrame | None,
+    data_folder: str = "data",
+    sheet_name: str = "digital",
 ) -> pd.DataFrame:
    data_dict = {}

@ -90,21 +90,13 @@ def create_plot_df(


 def plot(
-    current_df: pd.DataFrame | None = None,
-    data_folder: str = "data",
-    sheet_name: str = "digital",
+    curr_datetime,
+    df: pd.DataFrame,
+    annotate_current: bool = False,
    total_targets: tuple[int, ...] = (1500,),
    alpha: float | None = None,
    landesbez_str: str | None = None,
 ) -> str:
-    curr_datetime = datetime.datetime.now()
-    df = create_plot_df(
-        current_df=current_df,
-        data_folder=data_folder,
-        sheet_name=sheet_name,
-        curr_datetime=curr_datetime,
-    )
-
    fig = plt.figure(dpi=300)

    # fill weekends
@ -114,51 +106,53 @@ def plot(
        if day.weekday() >= 5:
            plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray")

-    series = df.sum(axis=1) if landesbez_str is None else df[landesbez_str]
-    plot_df = series.to_frame("Digitale Befragung").replace(0, np.nan)
-    plot_df = plot_df.astype({"Digitale Befragung": "float32"})
-    if not pd.isna(plot_df).all().item():
-        if alpha is not None:
-            plt.fill_between(
+    for bez in landesbez_str:
+        series = df.sum(axis=1) if bez is None else df[bez]
+        plot_df = series.to_frame("Digitale Befragung").replace(0, np.nan)
+        plot_df = plot_df.astype({"Digitale Befragung": "float32"})
+        if not pd.isna(plot_df).all().item():
+            if alpha is not None:
+                plt.fill_between(
+                    plot_df.dropna().index,
+                    plot_df.dropna()["Digitale Befragung"],
+                    color="#e4004e",
+                    alpha=alpha,
+                )
+
+            (line,) = plt.plot(
                plot_df.dropna().index,
                plot_df.dropna()["Digitale Befragung"],
-                color="#e4004e",
-                alpha=alpha,
+                ls="--",
+                marker="o",
+                lw=1,
+                color="#e4004e" if bez is None else None,
+                markersize=4,
+                label=bez if bez is not None else "Bundesweit",
            )

-        plt.plot(
-            plot_df.dropna().index,
-            plot_df.dropna()["Digitale Befragung"],
-            ls="--",
-            marker="o",
-            lw=1,
-            color="#e4004e",
-            markersize=4,
-            label=landesbez_str,
-        )
+            if annotate_current and bez is None:
+                plt.annotate(
+                    "Jetzt",
+                    (
+                        plot_df.dropna().index[-1],
+                        plot_df.dropna()["Digitale Befragung"][-1] * 1.03,
+                    ),
+                    fontsize=8,
+                    ha="center",
+                )

-        if current_df is not None:
-            plt.annotate(
-                "Jetzt",
-                (
-                    plot_df.dropna().index[-1],
-                    plot_df.dropna()["Digitale Befragung"][-1] * 1.03,
-                ),
-                fontsize=8,
-                ha="center",
+            plt.plot(
+                plot_df.index,
+                plot_df["Digitale Befragung"],
+                lw=1.5,
+                color=line.get_color(),
+                # label=bez,
            )

-        plt.plot(
-            plot_df.index,
-            plot_df["Digitale Befragung"],
-            lw=1.5,
-            color="#e4004e",
-            label=landesbez_str,
-        )
-
    plt.title("Teilnahme an Digitaler Beschäftigtenbefragung")
    plt.ylabel("# Teilnahmen")
    plt.ylim(0, total_targets[0] + 100)
+    plt.legend()

    # use timezone offset to center tick labels
    plt.gca().xaxis.set_major_locator(
@ -203,6 +197,7 @@ def convert_fig_to_svg(fig: plt.Figure) -> str:
@cache.cached()
 def tables(
    url: str = "https://beschaeftigtenbefragung.verdi.de/",
+    importance_factor: float = 1,
 ):
    def _print_as_html(df: pd.DataFrame):
        df = df.astype({"Digitale Befragung": "Int32"})
@ -236,6 +231,8 @@ def tables(

    output_str = []

+    curr_datetime = datetime.datetime.now()
+
    try:
        df, df_state = get_tables(url)

@ -246,7 +243,8 @@ def tables(

        df_state = df_state.sort_values("Landesbezirk")

-        fig = plot(df_state)
+        plot_df = create_plot_df(curr_datetime, df_state)
+        annotate_current = True
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    except Exception as e:
@ -263,9 +261,23 @@ def tables(
                {"Digitale Befragung": "Int32"}
            )

-        fig = plot()
+        plot_df = create_plot_df(curr_datetime)
+        annotate_current = False
        timestamp = Markup(f'<font color="red">{key} 10:00:00</font>')

+    total = plot_df.loc[curr_datetime].sum()
+    landesbez_strs = [None] + [
+        bez
+        for bez in plot_df.columns
+        if plot_df.loc[curr_datetime][bez] >= importance_factor * total
+    ]
+    fig = plot(
+        curr_datetime,
+        plot_df,
+        annotate_current=annotate_current,
+        landesbez_str=landesbez_strs,
+    )
+
    _print_as_html(df_state)
    _print_as_html(df)