scraper for searching of feeds in html sites

2021-03-16 09:02:21 +01:00 · 2021-03-16 09:02:21 +01:00 · a96e83fb10
commit a96e83fb10
parent 24df33dd89
1 changed files with 6 additions and 3 deletions
--- a/resources/scripts/scrapers/search-xml-feeds.py
+++ b/resources/scripts/scrapers/search-xml-feeds.py
@ -16,8 +16,11 @@ regexp_href = re.compile("href=\"([^\"]+)\"")

 for url in urls_lines:
  # Download HTML data.
+  try:
    url_response = urllib.request.urlopen(url)
    html =  url_response.read().decode("utf-8")
+  except:
+    continue

  # Search for XML feeds with regexps.
  for link_tag in re.findall(regexp_link, html):