very experimental way of PoC for JS-enabled websites scraping
This commit is contained in:
		
							parent
							
								
									0cbd356c4a
								
							
						
					
					
						commit
						accb478bee
					
				
					 4 changed files with 34 additions and 3 deletions
				
			
		|  | @ -108,9 +108,9 @@ void CookieJar::saveCookies() { | |||
|   sett->endGroup(); | ||||
| 
 | ||||
|   for (const QNetworkCookie& cookie : cookies) { | ||||
|     if (cookie.isSessionCookie()) { | ||||
|     /*if (cookie.isSessionCookie()) {
 | ||||
|       continue; | ||||
|     } | ||||
|     }*/ | ||||
|     sett->setPassword(GROUP(Cookies), | ||||
|                       QSL("%1-%2").arg(QString::number(i++), QString::fromUtf8(cookie.name())), | ||||
|                       cookie.toRawForm(QNetworkCookie::RawForm::Full)); | ||||
|  |  | |||
|  | @ -12,6 +12,7 @@ | |||
| 
 | ||||
| #include <QString> | ||||
| #include <QStringList> | ||||
| #include <QTimer> | ||||
| #include <QUrl> | ||||
| #include <QUrlQuery> | ||||
| #include <QWebEngineScript> | ||||
|  | @ -33,8 +34,12 @@ WebEngineViewer* WebEnginePage::view() const { | |||
| QString WebEnginePage::pageHtml(const QString& url) { | ||||
|   QEventLoop loop; | ||||
|   QString html; | ||||
|   QTimer tmr; | ||||
| 
 | ||||
|   connect(this, &WebEnginePage::loadFinished, &loop, &QEventLoop::quit); | ||||
|   tmr.setInterval(15000); | ||||
| 
 | ||||
|   connect(&tmr, &QTimer::timeout, &loop, &QEventLoop::quit); | ||||
|   connect(this, &WebEnginePage::loadFinished, &tmr, QOverload<>::of(&QTimer::start)); | ||||
| 
 | ||||
|   load(url); | ||||
|   loop.exec(); | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ | |||
| #include "services/standard/standardserviceroot.h" | ||||
| 
 | ||||
| #if defined(NO_LITE) | ||||
| #include "gui/webviewers/webengine/webengineviewer.h" | ||||
| #include "network-web/webengine/webenginepage.h" | ||||
| #endif | ||||
| 
 | ||||
|  | @ -297,8 +298,16 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type, | |||
|   else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) { | ||||
| #if defined(NO_LITE) | ||||
|     WebEnginePage page; | ||||
|     WebEngineViewer viewer; | ||||
| 
 | ||||
|     // NOTE: Viewer must be present or JavaScript just does not run.
 | ||||
|     viewer.setPage(&page); | ||||
|     viewer.setAttribute(Qt::WA_DontShowOnScreen); | ||||
|     viewer.show(); | ||||
| 
 | ||||
|     feed_contents = page.pageHtml(source).toUtf8(); | ||||
| 
 | ||||
|     // IOFactory::writeFile("a.html", feed_contents);
 | ||||
| #else | ||||
|     throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME))); | ||||
| #endif | ||||
|  |  | |||
|  | @ -31,6 +31,7 @@ | |||
| #include "services/standard/standardserviceentrypoint.h" | ||||
| 
 | ||||
| #if defined(NO_LITE) | ||||
| #include "gui/webviewers/webengine/webengineviewer.h" | ||||
| #include "network-web/webengine/webenginepage.h" | ||||
| #endif | ||||
| 
 | ||||
|  | @ -242,9 +243,24 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed, | |||
|   else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) { | ||||
| #if defined(NO_LITE) | ||||
|     WebEnginePage* page = new WebEnginePage(); | ||||
|     WebEngineViewer* viewer = nullptr; | ||||
| 
 | ||||
|     QMetaObject::invokeMethod( | ||||
|       qApp, | ||||
|       [&] { | ||||
|         // NOTE: Must be create on main thread.
 | ||||
|         viewer = new WebEngineViewer(); | ||||
|       }, | ||||
|       Qt::ConnectionType::BlockingQueuedConnection); | ||||
| 
 | ||||
|     viewer->moveToThread(qApp->thread()); | ||||
|     page->moveToThread(qApp->thread()); | ||||
| 
 | ||||
|     viewer->setPage(page); | ||||
|     viewer->setAttribute(Qt::WA_DontShowOnScreen); | ||||
| 
 | ||||
|     QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection); | ||||
| 
 | ||||
|     QString html; | ||||
|     QMetaObject::invokeMethod(page, | ||||
|                               "pageHtml", | ||||
|  | @ -255,6 +271,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed, | |||
|     feed_contents = html.toUtf8(); | ||||
| 
 | ||||
|     page->deleteLater(); | ||||
|     viewer->deleteLater(); | ||||
| #else | ||||
|     throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME))); | ||||
| #endif | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue