From a96e83fb10750d14b06cb7b8beb167a3b4ec3c3a Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Tue, 16 Mar 2021 09:02:21 +0100 Subject: [PATCH] scraper for searching of feeds in html sites --- resources/scripts/scrapers/search-xml-feeds.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/resources/scripts/scrapers/search-xml-feeds.py b/resources/scripts/scrapers/search-xml-feeds.py index b59c6274b..62b6ef8da 100755 --- a/resources/scripts/scrapers/search-xml-feeds.py +++ b/resources/scripts/scrapers/search-xml-feeds.py @@ -16,9 +16,12 @@ regexp_href = re.compile("href=\"([^\"]+)\"") for url in urls_lines: # Download HTML data. - url_response = urllib.request.urlopen(url) - html = url_response.read().decode("utf-8") - + try: + url_response = urllib.request.urlopen(url) + html = url_response.read().decode("utf-8") + except: + continue + # Search for XML feeds with regexps. for link_tag in re.findall(regexp_link, html): for link_xml_feed in re.findall(regexp_href, link_tag):