Add line plot

This commit is contained in:
Felix Blanke 2023-08-27 18:57:29 +02:00
parent 95ba5a57a7
commit 19dd8c5f92

110
wsgi.py
View File

@ -1,16 +1,17 @@
import datetime
from pathlib import Path
import io
import base64 import base64
import datetime
import io
from itertools import chain
from pathlib import Path
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd import pandas as pd
from flask import Flask, render_template, request from flask import Flask, render_template, request
from flask_caching import Cache from flask_caching import Cache
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from download_digital import construct_dataframe, get_bez_data from download_digital import construct_dataframe, get_bez_data
@ -42,7 +43,14 @@ def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
return df, df_state return df, df_state
def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", sheet_name: str = "digital", total_target: int = 1500, plot_all: bool = False) -> str: def plot(
current_df: pd.DataFrame | None = None,
data_folder: str = "data",
sheet_name: str = "digital",
total_target: int = 1500,
plot_all: bool = False,
alpha: float | None = None,
) -> str:
data_dict = {} data_dict = {}
for f in sorted(Path(data_folder).iterdir()): for f in sorted(Path(data_folder).iterdir()):
with f.open("rb") as ff: with f.open("rb") as ff:
@ -50,32 +58,76 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
df = df.astype({"Digitale Befragung": "Int32"}) df = df.astype({"Digitale Befragung": "Int32"})
sum_val = df[["Digitale Befragung"]].sum().iloc[0] sum_val = df[["Digitale Befragung"]].sum().iloc[0]
key = f.name[:10] key = f.name[:10]
if plot_all or not (int(key[-2:]) - 15) % 7: data_dict[key] = sum_val
data_dict[key] = sum_val
data_dict["2023-08-15"] = 275 data_dict["2023-08-15"] = 275
if not plot_all and current_df is not None:
series = pd.Series(data_dict.values(), index=data_dict)
series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
df = series.to_frame("Digitale Befragung")
df = df.reindex(
pd.date_range(start="2023-08-15", end=max(data_dict.keys()))
+ pd.DateOffset(hours=10)
)
if current_df is not None:
current_df = current_df.astype({"Digitale Befragung": "Int32"}) current_df = current_df.astype({"Digitale Befragung": "Int32"})
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] sum_val = current_df[["Digitale Befragung"]].sum().iloc[0]
data_dict["jetzt"] = sum_val df.loc[datetime.datetime.now()] = sum_val
if plot_all: plt.figure(dpi=300)
for day in range(16, 19):
data_dict[f"2023-08-{day}"] = 0
for day in range(25, 27):
data_dict[f"2023-08-{day}"] = 0
sorted_data_dict = { # fill weekends
k: v max_date = max(data_dict.keys())
for k, v in sorted(data_dict.items(), key=lambda x: x[0]) max_date = datetime.datetime.strptime(max_date, "%Y-%m-%d") + datetime.timedelta(
} days=1
)
days = pd.date_range(start="2023-08-14", end=max_date)
for idx, day in enumerate(days[:-1]):
if day.weekday() >= 5:
plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray")
plt.figure(figsize=(6, 5), dpi=300) if alpha is not None:
plt.fill_between(
df.dropna().index,
df.dropna()["Digitale Befragung"],
color="#e4004e",
alpha=alpha,
)
plt.bar(sorted_data_dict.keys(), sorted_data_dict.values(), color="#e4004e") plt.plot(
plt.title("Entwicklung Digitale Beschäftigtenbefragung") df.dropna().index,
df.dropna()["Digitale Befragung"],
ls="--",
marker="o",
lw=1,
color="#e4004e",
markersize=4,
)
if current_df is not None:
plt.annotate(
"Jetzt",
(df.dropna().index[-1], df.dropna()["Digitale Befragung"][-1] * 1.03),
fontsize=8,
ha="center",
)
plt.plot(df.index, df["Digitale Befragung"], lw=1.5, color="#e4004e")
plt.title("Teilnahme an Digitaler Beschäftigtenbefragung")
plt.ylabel("# Teilnahmen") plt.ylabel("# Teilnahmen")
plt.ylim(0, total_target + 100)
# plt.gcf().autofmt_xdate()
plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator([mdates.TU]))
plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%a %d.%m."))
plt.grid(True, which="major", axis="both")
plt.grid(True, which="minor", axis="x")
def val_to_perc(val): def val_to_perc(val):
return 100 * val / total_target return 100 * val / total_target
@ -88,10 +140,6 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter()) sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.axhline(y=total_target, color="#48a9be", linestyle="--") plt.axhline(y=total_target, color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[1], color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[2], color="#48a9be", linestyle="--")
# tikzplotlib.save("plot.tikz", axis_width="\\linewidth")
plt.tight_layout() plt.tight_layout()
# Convert plot to PNG image # Convert plot to PNG image
@ -100,7 +148,7 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
# Encode PNG image to base64 string # Encode PNG image to base64 string
pngImageB64String = "data:image/png;base64," pngImageB64String = "data:image/png;base64,"
pngImageB64String += base64.b64encode(pngImage.getvalue()).decode('utf8') pngImageB64String += base64.b64encode(pngImage.getvalue()).decode("utf8")
return pngImageB64String return pngImageB64String
@ -152,9 +200,7 @@ def tables(
_print_as_html(df_state) _print_as_html(df_state)
_print_as_html(df) _print_as_html(df)
image = plot(df_state, plot_all=True)
image = plot(df_state)
return render_template( return render_template(
"base.html", "base.html",