diff --git a/wsgi.py b/wsgi.py index efbeb09..a70c4e5 100644 --- a/wsgi.py +++ b/wsgi.py @@ -1,16 +1,17 @@ -import datetime -from pathlib import Path -import io import base64 +import datetime +import io +from itertools import chain +from pathlib import Path - +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick +import numpy as np import pandas as pd from flask import Flask, render_template, request from flask_caching import Cache - from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -import matplotlib.pyplot as plt -import matplotlib.ticker as mtick from download_digital import construct_dataframe, get_bez_data @@ -42,7 +43,14 @@ def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]: return df, df_state -def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", sheet_name: str = "digital", total_target: int = 1500, plot_all: bool = False) -> str: +def plot( + current_df: pd.DataFrame | None = None, + data_folder: str = "data", + sheet_name: str = "digital", + total_target: int = 1500, + plot_all: bool = False, + alpha: float | None = None, +) -> str: data_dict = {} for f in sorted(Path(data_folder).iterdir()): with f.open("rb") as ff: @@ -50,32 +58,76 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee df = df.astype({"Digitale Befragung": "Int32"}) sum_val = df[["Digitale Befragung"]].sum().iloc[0] key = f.name[:10] - if plot_all or not (int(key[-2:]) - 15) % 7: - data_dict[key] = sum_val + data_dict[key] = sum_val data_dict["2023-08-15"] = 275 - if not plot_all and current_df is not None: + + series = pd.Series(data_dict.values(), index=data_dict) + series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10) + + df = series.to_frame("Digitale Befragung") + df = df.reindex( + pd.date_range(start="2023-08-15", end=max(data_dict.keys())) + + pd.DateOffset(hours=10) + ) + + if current_df is not None: current_df = current_df.astype({"Digitale Befragung": "Int32"}) sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] - data_dict["jetzt"] = sum_val + df.loc[datetime.datetime.now()] = sum_val - if plot_all: - for day in range(16, 19): - data_dict[f"2023-08-{day}"] = 0 - for day in range(25, 27): - data_dict[f"2023-08-{day}"] = 0 + plt.figure(dpi=300) - sorted_data_dict = { - k: v - for k, v in sorted(data_dict.items(), key=lambda x: x[0]) - } + # fill weekends + max_date = max(data_dict.keys()) + max_date = datetime.datetime.strptime(max_date, "%Y-%m-%d") + datetime.timedelta( + days=1 + ) + days = pd.date_range(start="2023-08-14", end=max_date) + for idx, day in enumerate(days[:-1]): + if day.weekday() >= 5: + plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray") - plt.figure(figsize=(6, 5), dpi=300) + if alpha is not None: + plt.fill_between( + df.dropna().index, + df.dropna()["Digitale Befragung"], + color="#e4004e", + alpha=alpha, + ) - plt.bar(sorted_data_dict.keys(), sorted_data_dict.values(), color="#e4004e") - plt.title("Entwicklung Digitale Beschäftigtenbefragung") + plt.plot( + df.dropna().index, + df.dropna()["Digitale Befragung"], + ls="--", + marker="o", + lw=1, + color="#e4004e", + markersize=4, + ) + if current_df is not None: + plt.annotate( + "Jetzt", + (df.dropna().index[-1], df.dropna()["Digitale Befragung"][-1] * 1.03), + fontsize=8, + ha="center", + ) + + plt.plot(df.index, df["Digitale Befragung"], lw=1.5, color="#e4004e") + + plt.title("Teilnahme an Digitaler Beschäftigtenbefragung") plt.ylabel("# Teilnahmen") + plt.ylim(0, total_target + 100) + + # plt.gcf().autofmt_xdate() + + plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator([mdates.TU])) + plt.gca().xaxis.set_minor_locator(mdates.DayLocator()) + plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%a %d.%m.")) + + plt.grid(True, which="major", axis="both") + plt.grid(True, which="minor", axis="x") def val_to_perc(val): return 100 * val / total_target @@ -88,10 +140,6 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter()) plt.axhline(y=total_target, color="#48a9be", linestyle="--") - # plt.axhline(y=federal_target_sheets[1], color="#48a9be", linestyle="--") - # plt.axhline(y=federal_target_sheets[2], color="#48a9be", linestyle="--") - - # tikzplotlib.save("plot.tikz", axis_width="\\linewidth") plt.tight_layout() # Convert plot to PNG image @@ -100,7 +148,7 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee # Encode PNG image to base64 string pngImageB64String = "data:image/png;base64," - pngImageB64String += base64.b64encode(pngImage.getvalue()).decode('utf8') + pngImageB64String += base64.b64encode(pngImage.getvalue()).decode("utf8") return pngImageB64String @@ -152,9 +200,7 @@ def tables( _print_as_html(df_state) _print_as_html(df) - - image = plot(df_state) - + image = plot(df_state, plot_all=True) return render_template( "base.html",