2023-08-27 16:22:06 +02:00

169 lines
4.8 KiB
Python

import datetime
from pathlib import Path
import io
import base64
import pandas as pd
from flask import Flask, render_template, request
from flask_caching import Cache
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from download_digital import construct_dataframe, get_bez_data
config = {
"CACHE_TYPE": "FileSystemCache",
"CACHE_DEFAULT_TIMEOUT": 300,
"CACHE_THRESHOLD": 1000,
"CACHE_DIR": "cache",
}
app = Flask(__name__)
app.config.from_mapping(config)
cache = Cache(app)
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
df = construct_dataframe(
bez_data=bez_data[0],
grouped=False,
special_tag="stud",
)
df_state = construct_dataframe(
bez_data=bez_data[1], grouped=False, no_processing=True
)
return df, df_state
def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", sheet_name: str = "digital", total_target: int = 1500, plot_all: bool = False) -> str:
data_dict = {}
for f in sorted(Path(data_folder).iterdir()):
with f.open("rb") as ff:
df = pd.read_excel(ff, sheet_name=sheet_name)
df = df.astype({"Digitale Befragung": "Int32"})
sum_val = df[["Digitale Befragung"]].sum().iloc[0]
key = f.name[:10]
if plot_all or not (int(key[-2:]) - 15) % 7:
data_dict[key] = sum_val
data_dict["2023-08-15"] = 275
if not plot_all and current_df is not None:
current_df = current_df.astype({"Digitale Befragung": "Int32"})
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0]
data_dict["jetzt"] = sum_val
if plot_all:
for day in range(16, 19):
data_dict[f"2023-08-{day}"] = 0
for day in range(25, 27):
data_dict[f"2023-08-{day}"] = 0
sorted_data_dict = {
k: v
for k, v in sorted(data_dict.items(), key=lambda x: x[0])
}
plt.figure(figsize=(6, 5), dpi=300)
plt.bar(sorted_data_dict.keys(), sorted_data_dict.values(), color="#e4004e")
plt.title("Entwicklung Digitale Beschäftigtenbefragung")
plt.ylabel("# Teilnahmen")
def val_to_perc(val):
return 100 * val / total_target
def perc_to_val(perc):
return perc * total_target / 100
sec_ax = plt.gca().secondary_yaxis("right", functions=(val_to_perc, perc_to_val))
sec_ax.set_ylabel("# Teilnahmen [% Erfolg]")
sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.axhline(y=total_target, color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[1], color="#48a9be", linestyle="--")
# plt.axhline(y=federal_target_sheets[2], color="#48a9be", linestyle="--")
# tikzplotlib.save("plot.tikz", axis_width="\\linewidth")
plt.tight_layout()
# Convert plot to PNG image
pngImage = io.BytesIO()
FigureCanvas(plt.gcf()).print_png(pngImage)
# Encode PNG image to base64 string
pngImageB64String = "data:image/png;base64,"
pngImageB64String += base64.b64encode(pngImage.getvalue()).decode('utf8')
return pngImageB64String
@app.route("/")
@cache.cached(timeout=50)
def tables(
url: str = "https://beschaeftigtenbefragung.verdi.de/",
):
df, df_state = get_tables(url)
df = df.sort_values(
["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True]
)
df_state = df_state.sort_values("Landesbezirk")
output_str = []
def _print_as_html(df: pd.DataFrame):
df = df.astype({"Digitale Befragung": "Int32"})
with pd.option_context("display.max_rows", None):
table = df.to_html(
index_names=False,
justify="left",
index=False,
classes="sortable dataframe",
)
tfoot = [
" <tfoot>",
" <td>Gesamt</td>",
]
for i in range(len(df.columns) - 2):
tfoot.append(" <td/>")
tfoot.extend(
[
f" <td>{df['Digitale Befragung'].sum()}</td>",
" </tr>",
" </tfoot>",
]
)
tfoot = "\n".join(tfoot)
idx = table.index("</table>")
output_str.append(table[: idx - 1])
output_str.append(tfoot)
output_str.append(table[idx:])
_print_as_html(df_state)
_print_as_html(df)
image = plot(df_state)
return render_template(
"base.html",
tables="\n".join(output_str),
timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
image=image,
)
if __name__ == "__main__":
app.run()