Add line plot

This commit is contained in:
Felix Blanke 2023-08-27 18:57:29 +02:00
parent 95ba5a57a7
commit 19dd8c5f92

110
wsgi.py
View File

@ -1,16 +1,17 @@
import datetime
from pathlib import Path
import io
import base64
import datetime
import io
from itertools import chain
from pathlib import Path
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
from flask import Flask, render_template, request
from flask_caching import Cache
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from download_digital import construct_dataframe, get_bez_data
@ -42,7 +43,14 @@ def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
return df, df_state
def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", sheet_name: str = "digital", total_target: int = 1500, plot_all: bool = False) -> str:
def plot(
current_df: pd.DataFrame | None = None,
data_folder: str = "data",
sheet_name: str = "digital",
total_target: int = 1500,
plot_all: bool = False,
alpha: float | None = None,
) -> str:
data_dict = {}
for f in sorted(Path(data_folder).iterdir()):
with f.open("rb") as ff:
@ -50,32 +58,76 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
df = df.astype({"Digitale Befragung": "Int32"})
sum_val = df[["Digitale Befragung"]].sum().iloc[0]
key = f.name[:10]
if plot_all or not (int(key[-2:]) - 15) % 7:
data_dict[key] = sum_val
data_dict[key] = sum_val
data_dict["2023-08-15"] = 275
if not plot_all and current_df is not None:
series = pd.Series(data_dict.values(), index=data_dict)
series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
df = series.to_frame("Digitale Befragung")
df = df.reindex(
pd.date_range(start="2023-08-15", end=max(data_dict.keys()))
+ pd.DateOffset(hours=10)
)
if current_df is not None:
current_df = current_df.astype({"Digitale Befragung": "Int32"})
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0]
data_dict["jetzt"] = sum_val
df.loc[datetime.datetime.now()] = sum_val
if plot_all:
for day in range(16, 19):
data_dict[f"2023-08-{day}"] = 0
for day in range(25, 27):
data_dict[f"2023-08-{day}"] = 0
plt.figure(dpi=300)
sorted_data_dict = {
k: v
for k, v in sorted(data_dict.items(), key=lambda x: x[0])
}
# fill weekends
max_date = max(data_dict.keys())
max_date = datetime.datetime.strptime(max_date, "%Y-%m-%d") + datetime.timedelta(
days=1
)
days = pd.date_range(start="2023-08-14", end=max_date)
for idx, day in enumerate(days[:-1]):
if day.weekday() >= 5:
plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray")
plt.figure(figsize=(6, 5), dpi=300)
if alpha is not None:
plt.fill_between(
df.dropna().index,
df.dropna()["Digitale Befragung"],
color="#e4004e",
alpha=alpha,
)
plt.bar(sorted_data_dict.keys(), sorted_data_dict.values(), color="#e4004e")
plt.title("Entwicklung Digitale Beschäftigtenbefragung")
plt.plot(
df.dropna().index,
df.dropna()["Digitale Befragung"],
ls="--",
marker="o",
lw=1,
color="#e4004e",
markersize=4,
)
if current_df is not None:
plt.annotate(
"Jetzt",
(df.dropna().index[-1], df.dropna()["Digitale Befragung"][-1] * 1.03),
fontsize=8,
ha="center",
)
plt.plot(df.index, df["Digitale Befragung"], lw=1.5, color="#e4004e")
plt.title("Teilnahme an Digitaler Beschäftigtenbefragung")
plt.ylabel("# Teilnahmen")
plt.ylim(0, total_target + 100)
# plt.gcf().autofmt_xdate()
plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator([mdates.TU]))
plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%a %d.%m."))
plt.grid(True, which="major", axis="both")
plt.grid(True, which="minor", axis="x")
def val_to_perc(val):
return 100 * val / total_target
@ -88,10 +140,6 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.axhline(y=total_target, color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[1], color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[2], color="#48a9be", linestyle="--")
# tikzplotlib.save("plot.tikz", axis_width="\\linewidth")
plt.tight_layout()
# Convert plot to PNG image
@ -100,7 +148,7 @@ def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", shee
# Encode PNG image to base64 string
pngImageB64String = "data:image/png;base64,"
pngImageB64String += base64.b64encode(pngImage.getvalue()).decode('utf8')
pngImageB64String += base64.b64encode(pngImage.getvalue()).decode("utf8")
return pngImageB64String
@ -152,9 +200,7 @@ def tables(
_print_as_html(df_state)
_print_as_html(df)
image = plot(df_state)
image = plot(df_state, plot_all=True)
return render_template(
"base.html",