From 65bb665f92936beb4ca83635a52e8353faa4653e Mon Sep 17 00:00:00 2001 From: Martin Rotter Date: Wed, 18 Dec 2024 14:32:08 +0100 Subject: [PATCH] initial state based parser for gemini --- .../network-web/gemini/geminiparser.cpp | 98 +++++++++++++++---- .../network-web/gemini/geminiparser.h | 24 ++++- 2 files changed, 102 insertions(+), 20 deletions(-) diff --git a/src/librssguard/network-web/gemini/geminiparser.cpp b/src/librssguard/network-web/gemini/geminiparser.cpp index af2c2940b..df8649e04 100644 --- a/src/librssguard/network-web/gemini/geminiparser.cpp +++ b/src/librssguard/network-web/gemini/geminiparser.cpp @@ -10,50 +10,119 @@ QString GeminiParser::geminiToHtml(const QByteArray& gemini_data) { QString gemini_hypertext = QString::fromUtf8(gemini_data).replace(QSL("\r\n"), QSL("\n")).replace(QSL("\r"), QSL("\n")); QStringList lines = gemini_hypertext.split(QL1C('\n')); - bool normal_mode = true; + mode = State::Normal; static QRegularExpression exp_link(R"(^=>\s+([^\s]+)(?:\s+(\w.+))?$)"); static QRegularExpression exp_heading(R"(^(#{1,6})\s+(.+)$)"); static QRegularExpression exp_list(R"(^\*\s(.+)$)"); static QRegularExpression exp_quote(R"((?:^>$|^>\s?(.+)$))"); static QRegularExpression exp_pre(R"(^```.*$)"); - static QRegularExpression exp_text(R"()"); QRegularExpressionMatch mtch; QString title; for (const QString& line : lines) { if ((mtch = exp_pre.match(line)).hasMatch()) { - normal_mode = !normal_mode; + // Begin or end PRE block. + switch (mode) { + case State::Pre: + // Ending of a PRE block. + html += endBlock(State::Normal); + break; + + default: + // Beginning of a PRE block. + html += endBlock(State::Normal); + html += beginBlock(State::Pre); + break; + } continue; } - if (normal_mode) { + if (mode != State::Pre) { if ((mtch = exp_link.match(line)).hasMatch()) { + html += endBlock(State::Normal); html += parseLink(mtch); } else if ((mtch = exp_heading.match(line)).hasMatch()) { + html += endBlock(State::Normal); html += parseHeading(mtch, title.isEmpty() ? &title : nullptr); } else if ((mtch = exp_list.match(line)).hasMatch()) { + html += beginBlock(State::List); html += parseList(mtch); } else if ((mtch = exp_quote.match(line)).hasMatch()) { + html += beginBlock(State::Quote); html += parseQuote(mtch); } else { + html += endBlock(State::Normal); html += parseTextInNormalMode(line); } } else { + // Add new line in PRE mode. html += parseInPreMode(line); } } + html += endBlock(State::Normal); + + // IOFactory::writeFile("aa", html.toUtf8()); + return QSL("" "%1" "%2" - "").arg(title, html); + "") + .arg(title, html); +} + +QString GeminiParser::beginBlock(State new_mode) { + if (new_mode != mode) { + mode = new_mode; + + switch (new_mode) { + case State::List: + return "\n"; + break; + + case State::Quote: + to_return = "\n"; + break; + + case State::Pre: + to_return = "\n"; + break; + } + + mode = new_mode; + } + + return to_return; } QString GeminiParser::parseLink(const QRegularExpressionMatch& mtch) const { @@ -75,29 +144,22 @@ QString GeminiParser::parseHeading(const QRegularExpressionMatch& mtch, QString* return QSL("%2\n").arg(QString::number(level), header); } -QString GeminiParser::parseQuote(const QRegularExpressionMatch &mtch) const { +QString GeminiParser::parseQuote(const QRegularExpressionMatch& mtch) const { QString text = mtch.captured(1); - return QSL("

%1

\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text)); + return QSL("
%1
\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text)); } -QString GeminiParser::parseList(const QRegularExpressionMatch &mtch) const { +QString GeminiParser::parseList(const QRegularExpressionMatch& mtch) const { QString text = mtch.captured(1); - return QSL("

• %1

\n").arg(text); + return QSL("
  • %1
  • \n").arg(text); } -QString GeminiParser::parseTextInNormalMode(const QString &line) const{ +QString GeminiParser::parseTextInNormalMode(const QString& line) const { return QSL("

    %1

    \n").arg(line); } QString GeminiParser::parseInPreMode(const QString& line) const { - return QSL("
    %1
    \n").arg(line); + return QSL("%1\n").arg(line.toHtmlEscaped()); } diff --git a/src/librssguard/network-web/gemini/geminiparser.h b/src/librssguard/network-web/gemini/geminiparser.h index 84732a368..b4f146699 100644 --- a/src/librssguard/network-web/gemini/geminiparser.h +++ b/src/librssguard/network-web/gemini/geminiparser.h @@ -3,20 +3,40 @@ #ifndef GEMINIPARSER_H #define GEMINIPARSER_H -#include #include +#include class GeminiParser { public: QString geminiToHtml(const QByteArray& gemini_data); private: + enum class State { + // Regular state. + Normal, + + // Inside list. + List, + + // Inside quote. + Quote, + + // Inside PRE. + Pre + }; + QString parseLink(const QRegularExpressionMatch& mtch) const; - QString parseHeading(const QRegularExpressionMatch& mtch, QString *clean_header = nullptr) const; + QString parseHeading(const QRegularExpressionMatch& mtch, QString* clean_header = nullptr) const; QString parseQuote(const QRegularExpressionMatch& mtch) const; QString parseList(const QRegularExpressionMatch& mtch) const; QString parseTextInNormalMode(const QString& line) const; QString parseInPreMode(const QString& line) const; + + QString beginBlock(State new_mode); + QString endBlock(State new_mode); + + private: + State mode; }; #endif // GEMINIPARSER_H