Compare commits

...

4 Commits

Author SHA1 Message Date
Felix Blanke
4e43916fb1 If no dataframe was yet created for the current day remove the entry for plotting purposes 2023-08-28 00:12:01 +02:00
Felix Blanke
598a22ad87 Reuse current datetime 2023-08-28 00:11:14 +02:00
Felix Blanke
71a5ded906 Remove unneeded param 2023-08-28 00:10:46 +02:00
Felix Blanke
c4f3fb2120 Make skript handle case of verdi page failing gracefully 2023-08-27 23:54:16 +02:00

62
wsgi.py
View File

@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
import matplotlib.ticker as mtick import matplotlib.ticker as mtick
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from flask import Flask, render_template, request from flask import Flask, Markup, render_template, request
from flask_caching import Cache from flask_caching import Cache
from download_digital import construct_dataframe, get_bez_data from download_digital import construct_dataframe, get_bez_data
@ -48,9 +48,10 @@ def plot(
data_folder: str = "data", data_folder: str = "data",
sheet_name: str = "digital", sheet_name: str = "digital",
total_target: int = 1500, total_target: int = 1500,
plot_all: bool = False,
alpha: float | None = None, alpha: float | None = None,
) -> str: ) -> str:
curr_datetime = datetime.datetime.now()
data_dict = {} data_dict = {}
## Important: If multiple results are stored for the same date ## Important: If multiple results are stored for the same date
@ -73,22 +74,22 @@ def plot(
df = series.to_frame("Digitale Befragung") df = series.to_frame("Digitale Befragung")
df = df.reindex( df = df.reindex(
pd.date_range(start="2023-08-15", end=max(data_dict.keys())) pd.date_range(start="2023-08-15", end=curr_datetime)
+ pd.DateOffset(hours=10) + pd.DateOffset(hours=10)
) )
if current_df is not None: if current_df is not None:
current_df = current_df.astype({"Digitale Befragung": "Int32"}) current_df = current_df.astype({"Digitale Befragung": "Int32"})
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] sum_val = current_df[["Digitale Befragung"]].sum().iloc[0]
df.loc[datetime.datetime.now()] = sum_val df.loc[curr_datetime] = sum_val
if pd.isna(df.loc[df.index.max()][0]):
df = df.drop([df.index.max()])
fig = plt.figure(dpi=300) fig = plt.figure(dpi=300)
# fill weekends # fill weekends
max_date = max(data_dict.keys()) max_date = curr_datetime + datetime.timedelta(days=1)
max_date = datetime.datetime.strptime(max_date, "%Y-%m-%d") + datetime.timedelta(
days=1
)
days = pd.date_range(start="2023-08-14", end=max_date) days = pd.date_range(start="2023-08-14", end=max_date)
for idx, day in enumerate(days[:-1]): for idx, day in enumerate(days[:-1]):
if day.weekday() >= 5: if day.weekday() >= 5:
@ -166,16 +167,6 @@ def plot(
def tables( def tables(
url: str = "https://beschaeftigtenbefragung.verdi.de/", url: str = "https://beschaeftigtenbefragung.verdi.de/",
): ):
df, df_state = get_tables(url)
df = df.sort_values(
["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True]
)
df_state = df_state.sort_values("Landesbezirk")
output_str = []
def _print_as_html(df: pd.DataFrame): def _print_as_html(df: pd.DataFrame):
df = df.astype({"Digitale Befragung": "Int32"}) df = df.astype({"Digitale Befragung": "Int32"})
with pd.option_context("display.max_rows", None): with pd.option_context("display.max_rows", None):
@ -206,15 +197,44 @@ def tables(
output_str.append(tfoot) output_str.append(tfoot)
output_str.append(table[idx:]) output_str.append(table[idx:])
output_str = []
try:
df, df_state = get_tables(url)
df = df.sort_values(
["Digitale Befragung", "Bundesland", "Bezirk"],
ascending=[False, True, True],
)
df_state = df_state.sort_values("Landesbezirk")
image = plot(df_state)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
except Exception:
last_file = sorted(Path("data").iterdir())[-1]
key = last_file.name[:10]
with (Path("data") / f"{key}_data.ods").open("rb") as ff:
df = pd.read_excel(ff, sheet_name="digital").astype(
{"Digitale Befragung": "Int32"}
)
with (Path("data") / f"{key}_state_data.ods").open("rb") as ff:
df_state = pd.read_excel(ff, sheet_name="digital").astype(
{"Digitale Befragung": "Int32"}
)
image = plot()
timestamp = Markup(f'<font color="red">{key} 10:00:00</font>')
_print_as_html(df_state) _print_as_html(df_state)
_print_as_html(df) _print_as_html(df)
image = plot(df_state, plot_all=True)
return render_template( return render_template(
"base.html", "base.html",
tables="\n".join(output_str), tables="\n".join(output_str),
timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), timestamp=timestamp,
image=image, image=image,
) )