70 lines
		
	
	
		
			No EOL
		
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			No EOL
		
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| # Translates entries of RSS 2.0 feed into different locale.
 | |
| #
 | |
| # Make sure to have all dependencies installed:
 | |
| #   pip3 install googletrans==4.0.0-rc1
 | |
| #   pip3 install asyncio (if using parallel version of the script)
 | |
| #
 | |
| # You must provide raw RSS 2.0 UTF-8 feed XML data as input, for example with curl:
 | |
| #   curl 'https://phys.org/rss-feed/' | python ./translate-rss2.py "en" "pt_BR" "true"
 | |
| #
 | |
| # You must provide three command line arguments:
 | |
| #   translate-rss2.py [FROM-LANGUAGE] [TO-LANGUAGE] [RUN-PARALLEL]
 | |
| 
 | |
| import json
 | |
| import re
 | |
| import sys
 | |
| import time
 | |
| import html
 | |
| import requests
 | |
| import distutils.util
 | |
| import xml.etree.ElementTree as ET
 | |
| from googletrans import Translator
 | |
| 
 | |
| lang_from = sys.argv[1]
 | |
| lang_to = sys.argv[2]
 | |
| parallel = bool(distutils.util.strtobool(sys.argv[3]))
 | |
| 
 | |
| if parallel:
 | |
|   import asyncio
 | |
|   from concurrent.futures import ThreadPoolExecutor
 | |
| 
 | |
| sys.stdin.reconfigure(encoding='utf-8')
 | |
| rss_data = sys.stdin.read()
 | |
| rss_document = ET.fromstring(rss_data)
 | |
| translator = Translator()
 | |
| 
 | |
| def translate_string(to_translate):
 | |
|   try:
 | |
|     translated_text = translator.translate(to_translate, src = lang_from, dest = lang_to)
 | |
| 
 | |
|     if not parallel:
 | |
|       time.sleep(0.2)
 | |
| 
 | |
|     return translated_text.text
 | |
|   except:
 | |
|     return to_translate
 | |
| 
 | |
| def process_article(article):
 | |
|   title = article.find("title")
 | |
|   title.text = translate_string(title.text)
 | |
| 
 | |
|   contents = article.find("description")
 | |
|   contents.text = translate_string(" ".join(contents.itertext()))
 | |
| 
 | |
| # Translate title.
 | |
| title = rss_document.find(".//channel").find("title")
 | |
| title.text = translate_string(title.text)
 | |
| 
 | |
| # Translate articles.
 | |
| if parallel:
 | |
|   with ThreadPoolExecutor(max_workers = 2) as executor:
 | |
|     futures = []
 | |
|     for article in rss_document.findall(".//item"):
 | |
|       futures.append(executor.submit(process_article, article))
 | |
|     for future in futures:
 | |
|       future.result()
 | |
| else:
 | |
|   for article in rss_document.findall(".//item"):
 | |
|     process_article(article)
 | |
| 
 | |
| print(ET.tostring(rss_document, encoding = "unicode")) |