Save work.

This commit is contained in:
Martin Rotter 2017-07-19 13:51:24 +02:00
parent dbfb1afbe0
commit 1de3862c79
2 changed files with 573 additions and 594 deletions

View file

@ -45,10 +45,11 @@
* SUCH DAMAGE.
*/
#include "adblockrule.h"
#include "adblocksubscription.h"
#include "qztools.h"
#include "qzregexp.h"
#include "network-web/adblock/adblockrule.h"
#include "network-web/adblock/adblocksubscription.h"
#include "definitions/definitions.h"
#include "miscellaneous/simpleregexp.h"
#include <QUrl>
#include <QString>
@ -56,6 +57,7 @@
#include <QWebEnginePage>
#include <QWebEngineUrlRequestInfo>
static QString toSecondLevelDomain(const QUrl &url)
{
const QString topLevelDomain = url.topLevelDomain();
@ -67,37 +69,30 @@ static QString toSecondLevelDomain(const QUrl &url)
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
if (domain.count(QL1C('.')) == 0) {
if (domain.count(QSL('.')) == 0) {
return urlHost;
}
while (domain.count(QL1C('.')) != 0) {
domain = domain.mid(domain.indexOf(QL1C('.')) + 1);
while (domain.count(QSL('.')) != 0) {
domain = domain.mid(domain.indexOf(QSL('.')) + 1);
}
return domain + topLevelDomain;
}
AdBlockRule::AdBlockRule(const QString &filter, AdBlockSubscription* subscription)
: m_subscription(subscription)
, m_type(StringContainsMatchRule)
, m_caseSensitivity(Qt::CaseInsensitive)
, m_isEnabled(true)
, m_isException(false)
, m_isInternalDisabled(false)
, m_regExp(0)
{
: m_subscription(subscription), m_type(StringContainsMatchRule), m_caseSensitivity(Qt::CaseInsensitive),
m_isEnabled(true), m_isException(false), m_isInternalDisabled(false), m_regExp(0) {
setFilter(filter);
}
AdBlockRule::~AdBlockRule()
{
AdBlockRule::~AdBlockRule() {
delete m_regExp;
}
AdBlockRule* AdBlockRule::copy() const
{
AdBlockRule* AdBlockRule::copy() const {
AdBlockRule *rule = new AdBlockRule();
rule->m_subscription = m_subscription;
rule->m_type = m_type;
rule->m_options = m_options;
@ -120,97 +115,80 @@ AdBlockRule* AdBlockRule::copy() const
return rule;
}
AdBlockSubscription* AdBlockRule::subscription() const
{
AdBlockSubscription *AdBlockRule::subscription() const {
return m_subscription;
}
void AdBlockRule::setSubscription(AdBlockSubscription* subscription)
{
void AdBlockRule::setSubscription(AdBlockSubscription *subscription) {
m_subscription = subscription;
}
QString AdBlockRule::filter() const
{
QString AdBlockRule::filter() const {
return m_filter;
}
void AdBlockRule::setFilter(const QString &filter)
{
void AdBlockRule::setFilter(const QString &filter) {
m_filter = filter;
parseFilter();
}
bool AdBlockRule::isCssRule() const
{
bool AdBlockRule::isCssRule() const {
return m_type == CssRule;
}
QString AdBlockRule::cssSelector() const
{
QString AdBlockRule::cssSelector() const {
return m_matchString;
}
bool AdBlockRule::isDocument() const
{
bool AdBlockRule::isDocument() const {
return hasOption(DocumentOption);
}
bool AdBlockRule::isElemhide() const
{
bool AdBlockRule::isElemhide() const {
return hasOption(ElementHideOption);
}
bool AdBlockRule::isDomainRestricted() const
{
bool AdBlockRule::isDomainRestricted() const {
return hasOption(DomainRestrictedOption);
}
bool AdBlockRule::isException() const
{
bool AdBlockRule::isException() const {
return m_isException;
}
bool AdBlockRule::isComment() const
{
return m_filter.startsWith(QL1C('!'));
bool AdBlockRule::isComment() const {
return m_filter.startsWith(QSL('!'));
}
bool AdBlockRule::isEnabled() const
{
bool AdBlockRule::isEnabled() const {
return m_isEnabled;
}
void AdBlockRule::setEnabled(bool enabled)
{
void AdBlockRule::setEnabled(bool enabled) {
m_isEnabled = enabled;
}
bool AdBlockRule::isSlow() const
{
bool AdBlockRule::isSlow() const {
return m_regExp != 0;
}
bool AdBlockRule::isInternalDisabled() const
{
bool AdBlockRule::isInternalDisabled() const {
return m_isInternalDisabled;
}
bool AdBlockRule::urlMatch(const QUrl &url) const
{
bool AdBlockRule::urlMatch(const QUrl &url) const {
if (!hasOption(DocumentOption) && !hasOption(ElementHideOption)) {
return false;
}
else {
const QString encodedUrl = url.toEncoded();
const QString domain = url.host();
return stringMatch(domain, encodedUrl);
}
}
bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const
{
bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const {
if (m_type == CssRule || !m_isEnabled || m_isInternalDisabled) {
return false;
}
@ -218,47 +196,47 @@ bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QS
bool matched = stringMatch(domain, encodedUrl);
if (matched) {
// Check domain restrictions
// Check domain restrictions.
if (hasOption(DomainRestrictedOption) && !matchDomain(request.firstPartyUrl().host())) {
return false;
}
// Check third-party restriction
// Check third-party restriction.
if (hasOption(ThirdPartyOption) && !matchThirdParty(request)) {
return false;
}
// Check object restrictions
// Check object restrictions.
if (hasOption(ObjectOption) && !matchObject(request)) {
return false;
}
// Check subdocument restriction
// Check subdocument restriction.
if (hasOption(SubdocumentOption) && !matchSubdocument(request)) {
return false;
}
// Check xmlhttprequest restriction
// Check xmlhttprequest restriction.
if (hasOption(XMLHttpRequestOption) && !matchXmlHttpRequest(request)) {
return false;
}
// Check image restriction
// Check image restriction.
if (hasOption(ImageOption) && !matchImage(request)) {
return false;
}
// Check script restriction
// Check script restriction.
if (hasOption(ScriptOption) && !matchScript(request)) {
return false;
}
// Check stylesheet restriction
// Check stylesheet restriction.
if (hasOption(StyleSheetOption) && !matchStyleSheet(request)) {
return false;
}
// Check object-subrequest restriction
// Check object-subrequest restriction.
if (hasOption(ObjectSubrequestOption) && !matchObjectSubrequest(request)) {
return false;
}
@ -267,8 +245,7 @@ bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QS
return matched;
}
bool AdBlockRule::matchDomain(const QString &domain) const
{
bool AdBlockRule::matchDomain(const QString &domain) const {
if (!m_isEnabled) {
return false;
}
@ -309,9 +286,8 @@ bool AdBlockRule::matchDomain(const QString &domain) const
return false;
}
bool AdBlockRule::matchThirdParty(const QWebEngineUrlRequestInfo &request) const
{
// Third-party matching should be performed on second-level domains
bool AdBlockRule::matchThirdParty(const QWebEngineUrlRequestInfo &request) const {
// Third-party matching should be performed on second-level domains.
const QString firstPartyHost = toSecondLevelDomain(request.firstPartyUrl());
const QString host = toSecondLevelDomain(request.requestUrl());
@ -320,75 +296,67 @@ bool AdBlockRule::matchThirdParty(const QWebEngineUrlRequestInfo &request) const
return hasException(ThirdPartyOption) ? !match : match;
}
bool AdBlockRule::matchObject(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchObject(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeObject;
return hasException(ObjectOption) ? !match : match;
}
bool AdBlockRule::matchSubdocument(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchSubdocument(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeSubFrame;
return hasException(SubdocumentOption) ? !match : match;
}
bool AdBlockRule::matchXmlHttpRequest(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchXmlHttpRequest(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeXhr;
return hasException(XMLHttpRequestOption) ? !match : match;
}
bool AdBlockRule::matchImage(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchImage(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeImage;
return hasException(ImageOption) ? !match : match;
}
bool AdBlockRule::matchScript(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchScript(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeScript;
return hasException(ScriptOption) ? !match : match;
}
bool AdBlockRule::matchStyleSheet(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchStyleSheet(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeStylesheet;
return hasException(StyleSheetOption) ? !match : match;
}
bool AdBlockRule::matchObjectSubrequest(const QWebEngineUrlRequestInfo &request) const
{
bool AdBlockRule::matchObjectSubrequest(const QWebEngineUrlRequestInfo &request) const {
bool match = request.resourceType() == QWebEngineUrlRequestInfo::ResourceTypeSubResource;
return hasException(ObjectSubrequestOption) ? !match : match;
}
void AdBlockRule::parseFilter()
{
void AdBlockRule::parseFilter() {
QString parsedLine = m_filter;
// Empty rule or just comment
// Empty rule or just comment.
if (m_filter.trimmed().isEmpty() || m_filter.startsWith(QL1C('!'))) {
// We want to differentiate rule disabled by user and rule disabled in subscription file
// m_isInternalDisabled is also used when rule is disabled due to all options not being supported
// m_isInternalDisabled is also used when rule is disabled due to all options not being supported.
m_isEnabled = false;
m_isInternalDisabled = true;
m_type = Invalid;
return;
}
// CSS Element hiding rule
// CSS Element hiding rule.
if (parsedLine.contains(QL1S("##")) || parsedLine.contains(QL1S("#@#"))) {
m_type = CssRule;
int pos = parsedLine.indexOf(QL1C('#'));
// Domain restricted rule
// Domain restricted rule.
if (!parsedLine.startsWith(QL1S("##"))) {
QString domains = parsedLine.left(pos);
parseDomains(domains, QL1C(','));
@ -397,11 +365,11 @@ void AdBlockRule::parseFilter()
m_isException = parsedLine.at(pos + 1) == QL1C('@');
m_matchString = parsedLine.mid(m_isException ? pos + 3 : pos + 2);
// CSS rule cannot have more options -> stop parsing
// CSS rule cannot have more options -> stop parsing.
return;
}
// Exception always starts with @@
// Exception always starts with @@.
if (parsedLine.startsWith(QL1S("@@"))) {
m_isException = true;
parsedLine = parsedLine.mid(2);
@ -409,10 +377,11 @@ void AdBlockRule::parseFilter()
// Parse all options following $ char
int optionsIndex = parsedLine.indexOf(QL1C('$'));
if (optionsIndex >= 0) {
const QStringList options = parsedLine.mid(optionsIndex + 1).split(QL1C(','), QString::SkipEmptyParts);
int handledOptions = 0;
foreach (const QString &option, options) {
if (option.startsWith(QL1S("domain="))) {
parseDomains(option.mid(7), QL1C('|'));
@ -471,12 +440,12 @@ void AdBlockRule::parseFilter()
++handledOptions;
}
else if (option == QL1S("collapse")) {
// Hiding placeholders of blocked elements is enabled by default
// Hiding placeholders of blocked elements is enabled by default.
++handledOptions;
}
}
// If we don't handle all options, it's safer to just disable this rule
// If we don't handle all options, it's safer to just disable this rule.
if (handledOptions != options.count()) {
m_isInternalDisabled = true;
m_type = Invalid;
@ -485,20 +454,20 @@ void AdBlockRule::parseFilter()
parsedLine = parsedLine.left(optionsIndex);
}
// Rule is classic regexp
.
// Rule is classic regexp.
if (parsedLine.startsWith(QL1C('/')) && parsedLine.endsWith(QL1C('/'))) {
parsedLine = parsedLine.mid(1);
parsedLine = parsedLine.left(parsedLine.size() - 1);
m_type = RegExpMatchRule;
m_regExp = new RegExp;
m_regExp->regExp = QzRegExp(parsedLine, m_caseSensitivity);
m_regExp->regExp = SimpleRegExp(parsedLine, m_caseSensitivity);
m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine));
return;
}
// Remove starting and ending wildcards (*)
// Remove starting and ending wildcards (*).
if (parsedLine.startsWith(QL1C('*'))) {
parsedLine = parsedLine.mid(1);
}
@ -507,7 +476,7 @@ void AdBlockRule::parseFilter()
parsedLine = parsedLine.left(parsedLine.size() - 1);
}
// We can use fast string matching for domain here
// We can use fast string matching for domain here.
if (filterIsOnlyDomain(parsedLine)) {
parsedLine = parsedLine.mid(2);
parsedLine = parsedLine.left(parsedLine.size() - 1);
@ -517,7 +486,7 @@ void AdBlockRule::parseFilter()
return;
}
// If rule contains only | at end, we can also use string matching
// If rule contains only | at end, we can also use string matching.
if (filterIsOnlyEndsMatch(parsedLine)) {
parsedLine = parsedLine.left(parsedLine.size() - 1);
@ -527,14 +496,11 @@ void AdBlockRule::parseFilter()
}
// If we still find a wildcard (*) or separator (^) or (|)
// we must modify parsedLine to comply with QzRegExp
if (parsedLine.contains(QL1C('*')) ||
parsedLine.contains(QL1C('^')) ||
parsedLine.contains(QL1C('|'))
) {
// we must modify parsedLine to comply with SimpleRegExp.
if (parsedLine.contains(QL1C('*')) || parsedLine.contains(QL1C('^')) ||parsedLine.contains(QL1C('|'))) {
m_type = RegExpMatchRule;
m_regExp = new RegExp;
m_regExp->regExp = QzRegExp(createRegExpFromFilter(parsedLine), m_caseSensitivity);
m_regExp->regExp = SimpleRegExp(createRegExpFromFilter(parsedLine), m_caseSensitivity);
m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine));
return;
}
@ -544,8 +510,7 @@ void AdBlockRule::parseFilter()
m_matchString = parsedLine;
}
void AdBlockRule::parseDomains(const QString &domains, const QChar &separator)
{
void AdBlockRule::parseDomains(const QString &domains, const QChar &separator) {
QStringList domainsList = domains.split(separator, QString::SkipEmptyParts);
foreach (const QString domain, domainsList) {
@ -565,8 +530,7 @@ void AdBlockRule::parseDomains(const QString &domains, const QChar &separator)
}
}
bool AdBlockRule::filterIsOnlyDomain(const QString &filter) const
{
bool AdBlockRule::filterIsOnlyDomain(const QString &filter) const {
if (!filter.endsWith(QL1C('^')) || !filter.startsWith(QL1S("||")))
return false;
@ -587,8 +551,7 @@ bool AdBlockRule::filterIsOnlyDomain(const QString &filter) const
return true;
}
bool AdBlockRule::filterIsOnlyEndsMatch(const QString &filter) const
{
bool AdBlockRule::filterIsOnlyEndsMatch(const QString &filter) const {
for (int i = 0; i < filter.size(); ++i) {
switch (filter.at(i).toLatin1()) {
case '^':
@ -604,17 +567,15 @@ bool AdBlockRule::filterIsOnlyEndsMatch(const QString &filter) const
return false;
}
static bool wordCharacter(const QChar &c)
{
static bool wordCharacter(const QChar &c) {
return c.isLetterOrNumber() || c.isMark() || c == QL1C('_');
}
QString AdBlockRule::createRegExpFromFilter(const QString &filter) const
{
QString AdBlockRule::createRegExpFromFilter(const QString &filter) const {
QString parsed;
parsed.reserve(filter.size());
bool hadWildcard = false; // Filter multiple wildcards
bool hadWildcard = false; // Filter multiple wildcards.
for (int i = 0; i < filter.size(); ++i) {
const QChar c = filter.at(i);
@ -624,8 +585,9 @@ QString AdBlockRule::createRegExpFromFilter(const QString &filter) const
break;
case '*':
if (!hadWildcard)
if (!hadWildcard) {
parsed.append(QL1S(".*"));
}
break;
case '|':
@ -646,11 +608,13 @@ QString AdBlockRule::createRegExpFromFilter(const QString &filter) const
// fallthrough
default:
if (!wordCharacter(c))
if (!wordCharacter(c)) {
parsed.append(QL1C('\\') + c);
else
}
else {
parsed.append(c);
}
}
hadWildcard = c == QL1C('*');
}
@ -658,8 +622,7 @@ QString AdBlockRule::createRegExpFromFilter(const QString &filter) const
return parsed;
}
QList<QStringMatcher> AdBlockRule::createStringMatchers(const QStringList &filters) const
{
QList<QStringMatcher> AdBlockRule::createStringMatchers(const QStringList &filters) const {
QList<QStringMatcher> matchers;
matchers.reserve(filters.size());
@ -670,8 +633,7 @@ QList<QStringMatcher> AdBlockRule::createStringMatchers(const QStringList &filte
return matchers;
}
bool AdBlockRule::stringMatch(const QString &domain, const QString &encodedUrl) const
{
bool AdBlockRule::stringMatch(const QString &domain, const QString &encodedUrl) const {
if (m_type == StringContainsMatchRule) {
return encodedUrl.contains(m_matchString, m_caseSensitivity);
}
@ -685,73 +647,89 @@ bool AdBlockRule::stringMatch(const QString &domain, const QString &encodedUrl)
if (!isMatchingRegExpStrings(encodedUrl)) {
return false;
}
else {
return (m_regExp->regExp.indexIn(encodedUrl) != -1);
}
}
return false;
}
bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const
{
return QzTools::matchDomain(filter, domain);
bool AdBlockRule::matchDomain(const QString &pattern, const QString &domain) {
if (pattern == domain) {
return true;
}
bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const
{
if (!domain.endsWith(pattern)) {
return false;
}
int index = domain.indexOf(pattern);
return index > 0 && domain[index - 1] == QLatin1Char('.');
}
bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const {
return matchDomain(filter, domain);
}
bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const {
Q_ASSERT(m_regExp);
foreach (const QStringMatcher &matcher, m_regExp->matchers) {
if (matcher.indexIn(url) == -1)
if (matcher.indexIn(url) == -1) {
return false;
}
}
return true;
}
// Split regexp filter into strings that can be used with QString::contains
// Don't use parts that contains only 1 char and duplicated parts
QStringList AdBlockRule::parseRegExpFilter(const QString &filter) const
{
// Don't use parts that contains only 1 char and duplicated parts.
QStringList AdBlockRule::parseRegExpFilter(const QString &filter) const {
QStringList list;
int startPos = -1;
for (int i = 0; i < filter.size(); ++i) {
const QChar c = filter.at(i);
// Meta characters in AdBlock rules are | * ^
if (c == QL1C('|') || c == QL1C('*') || c == QL1C('^')) {
const QString sub = filter.mid(startPos, i - startPos);
if (sub.size() > 1)
if (sub.size() > 1) {
list.append(sub);
}
startPos = i + 1;
}
}
const QString sub = filter.mid(startPos);
if (sub.size() > 1)
if (sub.size() > 1) {
list.append(sub);
}
list.removeDuplicates();
return list;
}
bool AdBlockRule::hasOption(const AdBlockRule::RuleOption &opt) const
{
bool AdBlockRule::hasOption(const AdBlockRule::RuleOption &opt) const {
return (m_options & opt);
}
bool AdBlockRule::hasException(const AdBlockRule::RuleOption &opt) const
{
bool AdBlockRule::hasException(const AdBlockRule::RuleOption &opt) const {
return (m_exceptions & opt);
}
void AdBlockRule::setOption(const AdBlockRule::RuleOption &opt)
{
void AdBlockRule::setOption(const AdBlockRule::RuleOption &opt) {
m_options |= opt;
}
void AdBlockRule::setException(const AdBlockRule::RuleOption &opt, bool on)
{
void AdBlockRule::setException(const AdBlockRule::RuleOption &opt, bool on) {
if (on) {
m_exceptions |= opt;
}

View file

@ -103,6 +103,7 @@ class AdBlockRule {
bool matchObjectSubrequest(const QWebEngineUrlRequestInfo &request) const;
protected:
bool matchDomain(const QString &pattern, const QString &domain) const;
bool stringMatch(const QString &domain, const QString &encodedUrl) const;
bool isMatchingDomain(const QString &domain, const QString &filter) const;
bool isMatchingRegExpStrings(const QString &url) const;