Add download_digital
This commit is contained in:
commit
2e91f30d7f
116
download_digital.py
Normal file
116
download_digital.py
Normal file
@ -0,0 +1,116 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
import json
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
bundesland_dict = {
|
||||
"110": "Schleswig-Holstein",
|
||||
"112": "Schleswig-Holstein",
|
||||
"103": "Schleswig-Holstein",
|
||||
"111": "Schleswig-Holstein",
|
||||
"109": "Mecklenburg-Vorpommern",
|
||||
"105": "Mecklenburg-Vorpommern",
|
||||
"108": "Mecklenburg-Vorpommern",
|
||||
"213": "Niedersachsen",
|
||||
"214": "Bremen",
|
||||
"217": "Niedersachsen",
|
||||
"215": "Niedersachsen",
|
||||
"305": "Berlin",
|
||||
"306": "Brandenburg",
|
||||
"307": "Brandenburg",
|
||||
"308": "Brandenburg",
|
||||
"432": "NRW",
|
||||
"433": "NRW",
|
||||
"435": "NRW",
|
||||
"437": "NRW",
|
||||
"442": "NRW",
|
||||
"443": "NRW",
|
||||
"444": "NRW",
|
||||
"445": "NRW",
|
||||
"446": "NRW",
|
||||
"447": "NRW",
|
||||
"448": "NRW",
|
||||
"506": "Rheinland-Pfalz",
|
||||
"507": "Rheinland-Pfalz",
|
||||
"508": "Rheinland-Pfalz",
|
||||
"601": "Hessen",
|
||||
"603": "Hessen",
|
||||
"604": "Hessen",
|
||||
"608": "Hessen",
|
||||
"609": "Hessen",
|
||||
"610": "Hessen",
|
||||
"701": "Sachsen",
|
||||
"706": "Sachsen-Anhalt",
|
||||
"707": "Sachsen-Anhalt",
|
||||
"712": "Thüringen",
|
||||
"713": "Sachsen",
|
||||
"801": "Bayern",
|
||||
"802": "Bayern",
|
||||
"803": "Bayern",
|
||||
"804": "Bayern",
|
||||
"806": "Bayern",
|
||||
"808": "Bayern",
|
||||
"809": "Bayern",
|
||||
"810": "Bayern",
|
||||
"811": "Bayern",
|
||||
"812": "Bayern",
|
||||
"813": "Bayern",
|
||||
"814": "Bayern",
|
||||
"904": "Baden-Württemberg",
|
||||
"905": "Baden-Württemberg",
|
||||
"912": "Baden-Württemberg",
|
||||
"914": "Baden-Württemberg",
|
||||
"915": "Baden-Württemberg",
|
||||
"916": "Baden-Württemberg",
|
||||
"917": "Baden-Württemberg",
|
||||
"1001": "Hamburg",
|
||||
}
|
||||
|
||||
|
||||
def main(
|
||||
url: str = "https://beschaeftigtenbefragung.verdi.de/", tag: str = "bez_data_2"
|
||||
) -> pd.DataFrame:
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
driver = webdriver.Firefox(options=options)
|
||||
driver.get(url)
|
||||
|
||||
for a in driver.find_elements(By.XPATH, "//script"):
|
||||
script_contents = a.get_attribute("innerHTML")
|
||||
if script_contents.find(tag) >= 0:
|
||||
break
|
||||
|
||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||
bez_data = json.loads(substring[: substring.find("\n") - 1])
|
||||
|
||||
data = {}
|
||||
data["Bundesland"] = pd.Series(
|
||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||
)
|
||||
data["Bezirk"] = pd.Series(
|
||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||
)
|
||||
|
||||
tot_col_data = []
|
||||
tot_col_index = []
|
||||
for k, v in bez_data.items():
|
||||
if "tot" in v:
|
||||
tot_col_data.append(v["tot"])
|
||||
tot_col_index.append(k)
|
||||
|
||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||
df = pd.DataFrame(data=data)
|
||||
filename = f"data/{datetime.today().strftime('%Y-%m-%d')}_data.ods"
|
||||
if Path(filename).exists():
|
||||
print("File already exists!")
|
||||
else:
|
||||
df.to_excel(filename, sheet_name="digital")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user