Compare commits
9 Commits
9d3d0f8766
...
e4f9c3ddd5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4f9c3ddd5 | ||
|
|
6d2ea05fbf | ||
|
|
a3a1af1842 | ||
|
|
195338aa89 | ||
|
|
6bc8c909d5 | ||
|
|
4e8a8b50d3 | ||
|
|
ca64496aba | ||
|
|
efea733447 | ||
|
|
8aff06b217 |
@ -71,40 +71,62 @@ bundesland_dict = {
|
||||
}
|
||||
|
||||
|
||||
def construct_dataframe(
|
||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
||||
tag: str = "bez_data_2",
|
||||
grouped: bool = False,
|
||||
):
|
||||
def get_bez_data(
|
||||
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
||||
) -> list[dict]:
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
bez_data = []
|
||||
for tag in tags:
|
||||
for a in soup.find_all("script"):
|
||||
script_contents = a.decode_contents()
|
||||
if script_contents.find(tag) >= 0:
|
||||
break
|
||||
|
||||
for a in soup.find_all("script"):
|
||||
script_contents = a.decode_contents()
|
||||
if script_contents.find(tag) >= 0:
|
||||
break
|
||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||
bez_data.append(json.loads(substring[: substring.find("\n") - 1]))
|
||||
return bez_data
|
||||
|
||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||
bez_data = json.loads(substring[: substring.find("\n") - 1])
|
||||
|
||||
def construct_dataframe(
|
||||
bez_data: dict[str, dict],
|
||||
grouped: bool = False,
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
):
|
||||
data = {}
|
||||
data["Bundesland"] = pd.Series(
|
||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||
)
|
||||
data["Bezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
if not no_processing:
|
||||
data["Bundesland"] = pd.Series(
|
||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||
)
|
||||
data["Bezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
else:
|
||||
data["Landesbezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
|
||||
tot_col_data = []
|
||||
tot_col_index = []
|
||||
for k, v in bez_data.items():
|
||||
if "tot" in v:
|
||||
tot_col_data.append(v["tot"])
|
||||
tot_col_index.append(k)
|
||||
if special_tag:
|
||||
for k, v in bez_data.items():
|
||||
if "sp" in v and special_tag in v["sp"]:
|
||||
tot_col_data.append(v["sp"][special_tag])
|
||||
tot_col_index.append(k)
|
||||
else:
|
||||
for k, v in bez_data.items():
|
||||
if "tot" in v:
|
||||
tot_col_data.append(v["tot"])
|
||||
tot_col_index.append(k)
|
||||
|
||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||
df = pd.DataFrame(data=data)
|
||||
if grouped:
|
||||
df = df.astype({"Digitale Befragung": "Int32"})
|
||||
|
||||
if grouped and no_processing:
|
||||
raise ValueError
|
||||
elif grouped:
|
||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||
|
||||
return df
|
||||
@ -115,8 +137,16 @@ def main(
|
||||
tag: str = "bez_data_2",
|
||||
dry_run: bool = False,
|
||||
grouped: bool = False,
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
) -> None:
|
||||
df = construct_dataframe(url=url, tag=tag, grouped=grouped)
|
||||
bez_data = get_bez_data([tag], url)[0]
|
||||
df = construct_dataframe(
|
||||
bez_data=bez_data,
|
||||
grouped=grouped,
|
||||
special_tag=special_tag,
|
||||
no_processing=no_processing,
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
print(df)
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
|
||||
<title>Digitale Beschäftigtenbefragung</title>
|
||||
|
||||
<p>Bei einzelnen ver.di-Bezirken liegen Daten nur zu den Bezirken vor, bei denen TVStud einen Schwerpunkt bildet.</p>
|
||||
<p><a href="https://zusammen-geht-mehr.verdi.de/beschaeftigtenbefragung">Karte der digitalen Beschäftigtenbefragung</a></p>
|
||||
|
||||
{{ tables|safe }}
|
||||
|
||||
24
wsgi.py
24
wsgi.py
@ -1,7 +1,7 @@
|
||||
import pandas as pd
|
||||
from flask import Flask, render_template, request
|
||||
|
||||
from download_digital import construct_dataframe
|
||||
from download_digital import construct_dataframe, get_bez_data
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@ -9,12 +9,20 @@ app = Flask(__name__)
|
||||
@app.route("/")
|
||||
def tables(
|
||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
||||
default_tag: str = "bez_data_2",
|
||||
):
|
||||
tag = request.args.get("tag")
|
||||
if tag is None:
|
||||
tag = default_tag
|
||||
df = construct_dataframe(url=url, tag=tag, grouped=False)
|
||||
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||
|
||||
df = construct_dataframe(
|
||||
bez_data=bez_data[0],
|
||||
grouped=False,
|
||||
special_tag="stud",
|
||||
).sort_values(
|
||||
["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True]
|
||||
)
|
||||
|
||||
df_state = construct_dataframe(
|
||||
bez_data=bez_data[1], grouped=False, no_processing=True
|
||||
).sort_values("Landesbezirk")
|
||||
|
||||
output_str = []
|
||||
|
||||
@ -47,9 +55,7 @@ def tables(
|
||||
output_str.append(tfoot)
|
||||
output_str.append(table[idx:])
|
||||
|
||||
_print_as_html(
|
||||
df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||
)
|
||||
_print_as_html(df_state)
|
||||
_print_as_html(df)
|
||||
|
||||
return render_template("base.html", tables="\n".join(output_str))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user