diff --git a/download_digital.py b/download_digital.py index 0bef92f..24a0754 100644 --- a/download_digital.py +++ b/download_digital.py @@ -4,9 +4,8 @@ from pathlib import Path import fire import pandas as pd -from selenium import webdriver -from selenium.webdriver.common.by import By -from selenium.webdriver.firefox.options import Options +import requests +from bs4 import BeautifulSoup bundesland_dict = { "110": "Schleswig-Holstein", @@ -78,13 +77,11 @@ def main( dry_run: bool = False, grouped: bool = False, ) -> pd.DataFrame: - options = Options() - options.add_argument("--headless") - driver = webdriver.Firefox(options=options) - driver.get(url) + r = requests.get(url) + soup = BeautifulSoup(r.text, "html.parser") - for a in driver.find_elements(By.XPATH, "//script"): - script_contents = a.get_attribute("innerHTML") + for a in soup.find_all("script"): + script_contents = a.decode_contents() if script_contents.find(tag) >= 0: break