From dc806712953f49bde06aaef9586e3f21ccc5cca9 Mon Sep 17 00:00:00 2001 From: Felix Blanke Date: Mon, 28 Aug 2023 16:56:01 +0200 Subject: [PATCH] Group by Landesbezirk --- download_digital.py | 4 ++++ wsgi.py | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/download_digital.py b/download_digital.py index ca3035c..c8119da 100644 --- a/download_digital.py +++ b/download_digital.py @@ -85,6 +85,10 @@ landesbezirk_dict = { } +def get_landesbezirk(id: str): + return landesbezirk_dict[str((int(id) // 100) * 100)] + + def get_bez_data( tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/" ) -> list[dict]: diff --git a/wsgi.py b/wsgi.py index 3786245..8e6ed3f 100644 --- a/wsgi.py +++ b/wsgi.py @@ -11,7 +11,7 @@ import pandas as pd from flask import Flask, Markup, render_template, request from flask_caching import Cache -from download_digital import construct_dataframe, get_bez_data +from download_digital import construct_dataframe, get_bez_data, get_landesbezirk config = { "CACHE_TYPE": "FileSystemCache", @@ -52,26 +52,32 @@ def create_plot_df( for f in sorted(Path(data_folder).iterdir()): with f.open("rb") as ff: df = pd.read_excel(ff, sheet_name=sheet_name, index_col=0) + + if "Landesbezirk" not in df.columns: + df["Landesbezirk"] = df.index.map(get_landesbezirk) + df = df.astype({"Digitale Befragung": "Int32"}) - sum_val = df[["Digitale Befragung"]].sum().iloc[0] + df = df.groupby("Landesbezirk")[["Digitale Befragung"]].sum() + key = f.name[:10] - data_dict[key] = sum_val + data_dict[key] = df["Digitale Befragung"] - data_dict["2023-08-15"] = 275 + df = pd.DataFrame(data=data_dict).T - series = pd.Series(data_dict.values(), index=data_dict) - series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10) + df.index = df.index.astype("datetime64[ns]") + pd.DateOffset(hours=10) - df = series.to_frame("Digitale Befragung") df = df.reindex( pd.date_range(start="2023-08-15", end=curr_datetime) + pd.DateOffset(hours=10) ) if current_df is not None: + if "Landesbezirk" not in current_df.columns: + current_df["Landesbezirk"] = current_df.index.map(get_landesbezirk) current_df = current_df.astype({"Digitale Befragung": "Int32"}) - sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] - df.loc[curr_datetime] = sum_val + current_df = current_df.groupby("Landesbezirk")[["Digitale Befragung"]].sum() + + df.loc[curr_datetime] = current_df["Digitale Befragung"] if pd.isna(df.loc[df.index.max()][0]): df = df.drop([df.index.max()])