scraper for searching of feeds in html sites

2021-03-16 09:02:21 +01:00 · 2021-03-16 09:02:21 +01:00 · a96e83fb10
commit a96e83fb10
parent 24df33dd89
1 changed files with 6 additions and 3 deletions
--- a/resources/scripts/scrapers/search-xml-feeds.py
+++ b/resources/scripts/scrapers/search-xml-feeds.py
@ -16,9 +16,12 @@ regexp_href = re.compile("href=\"([^\"]+)\"")
 for url in urls_lines:
  # Download HTML data.
-  url_response = urllib.request.urlopen(url)
+  try:
-  html =  url_response.read().decode("utf-8")
+    url_response = urllib.request.urlopen(url)
-  
+    html =  url_response.read().decode("utf-8")
  except:
    continue
  # Search for XML feeds with regexps.
  for link_tag in re.findall(regexp_link, html):
    for link_xml_feed in re.findall(regexp_href, link_tag):