Compare commits
9 Commits
9d3d0f8766
...
e4f9c3ddd5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4f9c3ddd5 | ||
|
|
6d2ea05fbf | ||
|
|
a3a1af1842 | ||
|
|
195338aa89 | ||
|
|
6bc8c909d5 | ||
|
|
4e8a8b50d3 | ||
|
|
ca64496aba | ||
|
|
efea733447 | ||
|
|
8aff06b217 |
@ -71,32 +71,50 @@ bundesland_dict = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def construct_dataframe(
|
def get_bez_data(
|
||||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
||||||
tag: str = "bez_data_2",
|
) -> list[dict]:
|
||||||
grouped: bool = False,
|
|
||||||
):
|
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
soup = BeautifulSoup(r.text, "html.parser")
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
|
bez_data = []
|
||||||
|
for tag in tags:
|
||||||
for a in soup.find_all("script"):
|
for a in soup.find_all("script"):
|
||||||
script_contents = a.decode_contents()
|
script_contents = a.decode_contents()
|
||||||
if script_contents.find(tag) >= 0:
|
if script_contents.find(tag) >= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||||
bez_data = json.loads(substring[: substring.find("\n") - 1])
|
bez_data.append(json.loads(substring[: substring.find("\n") - 1]))
|
||||||
|
return bez_data
|
||||||
|
|
||||||
|
|
||||||
|
def construct_dataframe(
|
||||||
|
bez_data: dict[str, dict],
|
||||||
|
grouped: bool = False,
|
||||||
|
special_tag: str | None = None,
|
||||||
|
no_processing: bool = False,
|
||||||
|
):
|
||||||
data = {}
|
data = {}
|
||||||
|
if not no_processing:
|
||||||
data["Bundesland"] = pd.Series(
|
data["Bundesland"] = pd.Series(
|
||||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
data["Bezirk"] = pd.Series(
|
data["Bezirk"] = pd.Series(
|
||||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
data["Landesbezirk"] = pd.Series(
|
||||||
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
|
)
|
||||||
|
|
||||||
tot_col_data = []
|
tot_col_data = []
|
||||||
tot_col_index = []
|
tot_col_index = []
|
||||||
|
if special_tag:
|
||||||
|
for k, v in bez_data.items():
|
||||||
|
if "sp" in v and special_tag in v["sp"]:
|
||||||
|
tot_col_data.append(v["sp"][special_tag])
|
||||||
|
tot_col_index.append(k)
|
||||||
|
else:
|
||||||
for k, v in bez_data.items():
|
for k, v in bez_data.items():
|
||||||
if "tot" in v:
|
if "tot" in v:
|
||||||
tot_col_data.append(v["tot"])
|
tot_col_data.append(v["tot"])
|
||||||
@ -104,7 +122,11 @@ def construct_dataframe(
|
|||||||
|
|
||||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||||
df = pd.DataFrame(data=data)
|
df = pd.DataFrame(data=data)
|
||||||
if grouped:
|
df = df.astype({"Digitale Befragung": "Int32"})
|
||||||
|
|
||||||
|
if grouped and no_processing:
|
||||||
|
raise ValueError
|
||||||
|
elif grouped:
|
||||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||||
|
|
||||||
return df
|
return df
|
||||||
@ -115,8 +137,16 @@ def main(
|
|||||||
tag: str = "bez_data_2",
|
tag: str = "bez_data_2",
|
||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
grouped: bool = False,
|
grouped: bool = False,
|
||||||
|
special_tag: str | None = None,
|
||||||
|
no_processing: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
df = construct_dataframe(url=url, tag=tag, grouped=grouped)
|
bez_data = get_bez_data([tag], url)[0]
|
||||||
|
df = construct_dataframe(
|
||||||
|
bez_data=bez_data,
|
||||||
|
grouped=grouped,
|
||||||
|
special_tag=special_tag,
|
||||||
|
no_processing=no_processing,
|
||||||
|
)
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(df)
|
print(df)
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
<title>Digitale Beschäftigtenbefragung</title>
|
<title>Digitale Beschäftigtenbefragung</title>
|
||||||
|
|
||||||
|
<p>Bei einzelnen ver.di-Bezirken liegen Daten nur zu den Bezirken vor, bei denen TVStud einen Schwerpunkt bildet.</p>
|
||||||
<p><a href="https://zusammen-geht-mehr.verdi.de/beschaeftigtenbefragung">Karte der digitalen Beschäftigtenbefragung</a></p>
|
<p><a href="https://zusammen-geht-mehr.verdi.de/beschaeftigtenbefragung">Karte der digitalen Beschäftigtenbefragung</a></p>
|
||||||
|
|
||||||
{{ tables|safe }}
|
{{ tables|safe }}
|
||||||
|
|||||||
24
wsgi.py
24
wsgi.py
@ -1,7 +1,7 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from flask import Flask, render_template, request
|
from flask import Flask, render_template, request
|
||||||
|
|
||||||
from download_digital import construct_dataframe
|
from download_digital import construct_dataframe, get_bez_data
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
@ -9,12 +9,20 @@ app = Flask(__name__)
|
|||||||
@app.route("/")
|
@app.route("/")
|
||||||
def tables(
|
def tables(
|
||||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
||||||
default_tag: str = "bez_data_2",
|
|
||||||
):
|
):
|
||||||
tag = request.args.get("tag")
|
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||||
if tag is None:
|
|
||||||
tag = default_tag
|
df = construct_dataframe(
|
||||||
df = construct_dataframe(url=url, tag=tag, grouped=False)
|
bez_data=bez_data[0],
|
||||||
|
grouped=False,
|
||||||
|
special_tag="stud",
|
||||||
|
).sort_values(
|
||||||
|
["Digitale Befragung", "Bundesland", "Bezirk"], ascending=[False, True, True]
|
||||||
|
)
|
||||||
|
|
||||||
|
df_state = construct_dataframe(
|
||||||
|
bez_data=bez_data[1], grouped=False, no_processing=True
|
||||||
|
).sort_values("Landesbezirk")
|
||||||
|
|
||||||
output_str = []
|
output_str = []
|
||||||
|
|
||||||
@ -47,9 +55,7 @@ def tables(
|
|||||||
output_str.append(tfoot)
|
output_str.append(tfoot)
|
||||||
output_str.append(table[idx:])
|
output_str.append(table[idx:])
|
||||||
|
|
||||||
_print_as_html(
|
_print_as_html(df_state)
|
||||||
df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
|
||||||
)
|
|
||||||
_print_as_html(df)
|
_print_as_html(df)
|
||||||
|
|
||||||
return render_template("base.html", tables="\n".join(output_str))
|
return render_template("base.html", tables="\n".join(output_str))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user