Compare commits

..

9 Commits

Author SHA1 Message Date
Felix Blanke
e4f9c3ddd5 Add paragraph to explain incompleteness of bezirk data 2023-08-27 00:32:24 +02:00
Felix Blanke
6d2ea05fbf Sort flask tables 2023-08-27 00:30:57 +02:00
Felix Blanke
a3a1af1842 Format 2023-08-27 00:23:00 +02:00
Felix Blanke
195338aa89 Make downloaded site reusable 2023-08-27 00:22:44 +02:00
Felix Blanke
6bc8c909d5 Default to Landesbezirk as col heading 2023-08-27 00:17:09 +02:00
Felix Blanke
4e8a8b50d3 Load Landesbezirk table 2023-08-27 00:16:54 +02:00
Felix Blanke
ca64496aba Add no-processing mode 2023-08-27 00:09:20 +02:00
Felix Blanke
efea733447 Add command for special tag 2023-08-27 00:00:02 +02:00
Felix Blanke
8aff06b217 Fix col dtype 2023-08-26 23:59:30 +02:00
3 changed files with 69 additions and 32 deletions

View File

@ -71,32 +71,50 @@ bundesland_dict = {
} }
def construct_dataframe( def get_bez_data(
url: str = "https://beschaeftigtenbefragung.verdi.de/", tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
tag: str = "bez_data_2", ) -> list[dict]:
grouped: bool = False,
):
r = requests.get(url) r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.text, "html.parser")
bez_data = []
for tag in tags:
for a in soup.find_all("script"): for a in soup.find_all("script"):
script_contents = a.decode_contents() script_contents = a.decode_contents()
if script_contents.find(tag) >= 0: if script_contents.find(tag) >= 0:
break break
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :] substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
bez_data = json.loads(substring[: substring.find("\n") - 1]) bez_data.append(json.loads(substring[: substring.find("\n") - 1]))
return bez_data
def construct_dataframe(
bez_data: dict[str, dict],
grouped: bool = False,
special_tag: str | None = None,
no_processing: bool = False,
):
data = {} data = {}
if not no_processing:
data["Bundesland"] = pd.Series( data["Bundesland"] = pd.Series(
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys()) [bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
) )
data["Bezirk"] = pd.Series( data["Bezirk"] = pd.Series(
[v["name"] for v in bez_data.values()], index=list(bez_data.keys()) [v["name"] for v in bez_data.values()], index=list(bez_data.keys())
) )
else:
data["Landesbezirk"] = pd.Series(
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
)
tot_col_data = [] tot_col_data = []
tot_col_index = [] tot_col_index = []
if special_tag:
for k, v in bez_data.items():
if "sp" in v and special_tag in v["sp"]:
tot_col_data.append(v["sp"][special_tag])
tot_col_index.append(k)
else:
for k, v in bez_data.items(): for k, v in bez_data.items():
if "tot" in v: if "tot" in v:
tot_col_data.append(v["tot"]) tot_col_data.append(v["tot"])
@ -104,7 +122,11 @@ def construct_dataframe(
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index) data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
df = pd.DataFrame(data=data) df = pd.DataFrame(data=data)
if grouped: df = df.astype({"Digitale Befragung": "Int32"})
if grouped and no_processing:
raise ValueError
elif grouped:
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum() df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
return df return df
@ -115,8 +137,16 @@ def main(
tag: str = "bez_data_2", tag: str = "bez_data_2",
dry_run: bool = False, dry_run: bool = False,
grouped: bool = False, grouped: bool = False,
special_tag: str | None = None,
no_processing: bool = False,
) -> None: ) -> None:
df = construct_dataframe(url=url, tag=tag, grouped=grouped) bez_data = get_bez_data([tag], url)[0]
df = construct_dataframe(
bez_data=bez_data,
grouped=grouped,
special_tag=special_tag,
no_processing=no_processing,
)
if dry_run: if dry_run:
print(df) print(df)

View File

@ -5,6 +5,7 @@
<title>Digitale Beschäftigtenbefragung</title> <title>Digitale Beschäftigtenbefragung</title>
<p>Bei einzelnen ver.di-Bezirken liegen Daten nur zu den Bezirken vor, bei denen TVStud einen Schwerpunkt bildet.</p>
<p><a href="https://zusammen-geht-mehr.verdi.de/beschaeftigtenbefragung">Karte der digitalen Beschäftigtenbefragung</a></p> <p><a href="https://zusammen-geht-mehr.verdi.de/beschaeftigtenbefragung">Karte der digitalen Beschäftigtenbefragung</a></p>
{{ tables|safe }} {{ tables|safe }}

24
wsgi.py
View File

@ -1,7 +1,7 @@
import pandas as pd import pandas as pd
from flask import Flask, render_template, request from flask import Flask, render_template, request
from download_digital import construct_dataframe from download_digital import construct_dataframe, get_bez_data
app = Flask(__name__) app = Flask(__name__)
@ -9,12 +9,20 @@ app = Flask(__name__)
@app.route("/") @app.route("/")
def tables( def tables(
url: str = "https://beschaeftigtenbefragung.verdi.de/", url: str = "https://beschaeftigtenbefragung.verdi.de/",
default_tag: str = "bez_data_2",
): ):
tag = request.args.get("tag") bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
if tag is None:
tag = default_tag df = construct_dataframe(
df = construct_dataframe(url=url, tag=tag, grouped=False) bez_data=bez_data[0],
grouped=False,
special_tag="stud",
).sort_values(
["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True]
)
df_state = construct_dataframe(
bez_data=bez_data[1], grouped=False, no_processing=True
).sort_values("Landesbezirk")
output_str = [] output_str = []
@ -47,9 +55,7 @@ def tables(
output_str.append(tfoot) output_str.append(tfoot)
output_str.append(table[idx:]) output_str.append(table[idx:])
_print_as_html( _print_as_html(df_state)
df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
)
_print_as_html(df) _print_as_html(df)
return render_template("base.html", tables="\n".join(output_str)) return render_template("base.html", tables="\n".join(output_str))