Simplify download script
This commit is contained in:
parent
3bb4f432e4
commit
ae6beafa3d
@ -71,6 +71,20 @@ bundesland_dict = {
|
||||
}
|
||||
|
||||
|
||||
landesbezirk_dict = {
|
||||
"100": "Nord",
|
||||
"200": "Niedersachsen-Bremen",
|
||||
"300": "Berlin-Brandenburg",
|
||||
"400": "Nordrhein-Westfalen",
|
||||
"500": "Rheinland-Pfalz-Saarland",
|
||||
"600": "Hessen",
|
||||
"700": "Sachsen, Sachsen-Anhalt, Thüringen",
|
||||
"800": "Bayern",
|
||||
"900": "Baden-Württemberg",
|
||||
"1000": "Hamburg",
|
||||
}
|
||||
|
||||
|
||||
def get_bez_data(
|
||||
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
||||
) -> list[dict]:
|
||||
@ -90,22 +104,22 @@ def get_bez_data(
|
||||
|
||||
def construct_dataframe(
|
||||
bez_data: dict[str, dict],
|
||||
grouped: bool = False,
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
):
|
||||
data = {}
|
||||
if not no_processing:
|
||||
|
||||
first_key = next(iter(bez_data.keys()))
|
||||
if first_key in landesbezirk_dict:
|
||||
data["Landesbezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
else:
|
||||
data["Bundesland"] = pd.Series(
|
||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||
)
|
||||
data["Bezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
else:
|
||||
data["Landesbezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
|
||||
tot_col_data = []
|
||||
tot_col_index = []
|
||||
@ -121,15 +135,7 @@ def construct_dataframe(
|
||||
tot_col_index.append(k)
|
||||
|
||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||
df = pd.DataFrame(data=data)
|
||||
df = df.astype({"Digitale Befragung": "Int32"})
|
||||
|
||||
if grouped and no_processing:
|
||||
raise ValueError
|
||||
elif grouped:
|
||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||
|
||||
return df
|
||||
return pd.DataFrame(data=data).astype({"Digitale Befragung": "Int32"})
|
||||
|
||||
|
||||
def main(
|
||||
@ -138,7 +144,6 @@ def main(
|
||||
dry_run: bool = False,
|
||||
grouped: bool = False,
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
folder: str = "data",
|
||||
name: str = "data",
|
||||
sheet_name: str = "digital",
|
||||
@ -146,11 +151,12 @@ def main(
|
||||
bez_data = get_bez_data([tag], url)[0]
|
||||
df = construct_dataframe(
|
||||
bez_data=bez_data,
|
||||
grouped=grouped,
|
||||
special_tag=special_tag,
|
||||
no_processing=no_processing,
|
||||
)
|
||||
|
||||
if grouped:
|
||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||
|
||||
if dry_run:
|
||||
print(df)
|
||||
else:
|
||||
|
||||
11
wsgi.py
11
wsgi.py
@ -30,15 +30,8 @@ cache = Cache(app)
|
||||
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||
|
||||
df = construct_dataframe(
|
||||
bez_data=bez_data[0],
|
||||
grouped=False,
|
||||
special_tag="stud",
|
||||
)
|
||||
|
||||
df_state = construct_dataframe(
|
||||
bez_data=bez_data[1], grouped=False, no_processing=True
|
||||
)
|
||||
df = construct_dataframe(bez_data=bez_data[0], special_tag="stud")
|
||||
df_state = construct_dataframe(bez_data=bez_data[1])
|
||||
|
||||
return df, df_state
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user