import datetime from pathlib import Path import io import base64 import pandas as pd from flask import Flask, render_template, request from flask_caching import Cache from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas import matplotlib.pyplot as plt import matplotlib.ticker as mtick from download_digital import construct_dataframe, get_bez_data config = { "CACHE_TYPE": "FileSystemCache", "CACHE_DEFAULT_TIMEOUT": 300, "CACHE_THRESHOLD": 1000, "CACHE_DIR": "cache", } app = Flask(__name__) app.config.from_mapping(config) cache = Cache(app) def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]: bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url) df = construct_dataframe( bez_data=bez_data[0], grouped=False, special_tag="stud", ) df_state = construct_dataframe( bez_data=bez_data[1], grouped=False, no_processing=True ) return df, df_state def plot(current_df: pd.DataFrame | None = None, data_folder: str = "data", sheet_name: str = "digital", total_target: int = 1500, plot_all: bool = False) -> str: data_dict = {} for f in sorted(Path(data_folder).iterdir()): with f.open("rb") as ff: df = pd.read_excel(ff, sheet_name=sheet_name) df = df.astype({"Digitale Befragung": "Int32"}) sum_val = df[["Digitale Befragung"]].sum().iloc[0] key = f.name[:10] if plot_all or not (int(key[-2:]) - 15) % 7: data_dict[key] = sum_val data_dict["2023-08-15"] = 275 if not plot_all and current_df is not None: current_df = current_df.astype({"Digitale Befragung": "Int32"}) sum_val = current_df[["Digitale Befragung"]].sum().iloc[0] data_dict["jetzt"] = sum_val if plot_all: for day in range(16, 19): data_dict[f"2023-08-{day}"] = 0 for day in range(25, 27): data_dict[f"2023-08-{day}"] = 0 sorted_data_dict = { k: v for k, v in sorted(data_dict.items(), key=lambda x: x[0]) } plt.figure(figsize=(6, 5), dpi=300) plt.bar(sorted_data_dict.keys(), sorted_data_dict.values(), color="#e4004e") plt.title("Entwicklung Digitale Beschäftigtenbefragung") plt.ylabel("# Teilnahmen") def val_to_perc(val): return 100 * val / total_target def perc_to_val(perc): return perc * total_target / 100 sec_ax = plt.gca().secondary_yaxis("right", functions=(val_to_perc, perc_to_val)) sec_ax.set_ylabel("# Teilnahmen [% Erfolg]") sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter()) plt.axhline(y=total_target, color="#48a9be", linestyle="--") # plt.axhline(y=federal_target_sheets[1], color="#48a9be", linestyle="--") # plt.axhline(y=federal_target_sheets[2], color="#48a9be", linestyle="--") # tikzplotlib.save("plot.tikz", axis_width="\\linewidth") plt.tight_layout() # Convert plot to PNG image pngImage = io.BytesIO() FigureCanvas(plt.gcf()).print_png(pngImage) # Encode PNG image to base64 string pngImageB64String = "data:image/png;base64," pngImageB64String += base64.b64encode(pngImage.getvalue()).decode('utf8') return pngImageB64String @app.route("/") @cache.cached(timeout=50) def tables( url: str = "https://beschaeftigtenbefragung.verdi.de/", ): df, df_state = get_tables(url) df = df.sort_values( ["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True] ) df_state = df_state.sort_values("Landesbezirk") output_str = [] def _print_as_html(df: pd.DataFrame): df = df.astype({"Digitale Befragung": "Int32"}) with pd.option_context("display.max_rows", None): table = df.to_html( index_names=False, justify="left", index=False, classes="sortable dataframe", ) tfoot = [ "
", "