Group by Landesbezirk

This commit is contained in:
Felix Blanke 2023-08-28 16:56:01 +02:00
parent 046fce6bb0
commit dc80671295
2 changed files with 19 additions and 9 deletions

View File

@ -85,6 +85,10 @@ landesbezirk_dict = {
} }
def get_landesbezirk(id: str):
return landesbezirk_dict[str((int(id) // 100) * 100)]
def get_bez_data( def get_bez_data(
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/" tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
) -> list[dict]: ) -> list[dict]:

24
wsgi.py
View File

@ -11,7 +11,7 @@ import pandas as pd
from flask import Flask, Markup, render_template, request from flask import Flask, Markup, render_template, request
from flask_caching import Cache from flask_caching import Cache
from download_digital import construct_dataframe, get_bez_data from download_digital import construct_dataframe, get_bez_data, get_landesbezirk
config = { config = {
"CACHE_TYPE": "FileSystemCache", "CACHE_TYPE": "FileSystemCache",
@ -52,26 +52,32 @@ def create_plot_df(
for f in sorted(Path(data_folder).iterdir()): for f in sorted(Path(data_folder).iterdir()):
with f.open("rb") as ff: with f.open("rb") as ff:
df = pd.read_excel(ff, sheet_name=sheet_name, index_col=0) df = pd.read_excel(ff, sheet_name=sheet_name, index_col=0)
if "Landesbezirk" not in df.columns:
df["Landesbezirk"] = df.index.map(get_landesbezirk)
df = df.astype({"Digitale Befragung": "Int32"}) df = df.astype({"Digitale Befragung": "Int32"})
sum_val = df[["Digitale Befragung"]].sum().iloc[0] df = df.groupby("Landesbezirk")[["Digitale Befragung"]].sum()
key = f.name[:10] key = f.name[:10]
data_dict[key] = sum_val data_dict[key] = df["Digitale Befragung"]
data_dict["2023-08-15"] = 275 df = pd.DataFrame(data=data_dict).T
series = pd.Series(data_dict.values(), index=data_dict) df.index = df.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
df = series.to_frame("Digitale Befragung")
df = df.reindex( df = df.reindex(
pd.date_range(start="2023-08-15", end=curr_datetime) pd.date_range(start="2023-08-15", end=curr_datetime)
+ pd.DateOffset(hours=10) + pd.DateOffset(hours=10)
) )
if current_df is not None: if current_df is not None:
if "Landesbezirk" not in current_df.columns:
current_df["Landesbezirk"] = current_df.index.map(get_landesbezirk)
current_df = current_df.astype({"Digitale Befragung": "Int32"}) current_df = current_df.astype({"Digitale Befragung": "Int32"})
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] current_df = current_df.groupby("Landesbezirk")[["Digitale Befragung"]].sum()
df.loc[curr_datetime] = sum_val
df.loc[curr_datetime] = current_df["Digitale Befragung"]
if pd.isna(df.loc[df.index.max()][0]): if pd.isna(df.loc[df.index.max()][0]):
df = df.drop([df.index.max()]) df = df.drop([df.index.max()])