initial state based parser for gemini

This commit is contained in:
Martin Rotter 2024-12-18 14:32:08 +01:00
parent bdc71f3408
commit 65bb665f92
2 changed files with 102 additions and 20 deletions

View file

@ -10,50 +10,119 @@ QString GeminiParser::geminiToHtml(const QByteArray& gemini_data) {
QString gemini_hypertext = QString gemini_hypertext =
QString::fromUtf8(gemini_data).replace(QSL("\r\n"), QSL("\n")).replace(QSL("\r"), QSL("\n")); QString::fromUtf8(gemini_data).replace(QSL("\r\n"), QSL("\n")).replace(QSL("\r"), QSL("\n"));
QStringList lines = gemini_hypertext.split(QL1C('\n')); QStringList lines = gemini_hypertext.split(QL1C('\n'));
bool normal_mode = true; mode = State::Normal;
static QRegularExpression exp_link(R"(^=>\s+([^\s]+)(?:\s+(\w.+))?$)"); static QRegularExpression exp_link(R"(^=>\s+([^\s]+)(?:\s+(\w.+))?$)");
static QRegularExpression exp_heading(R"(^(#{1,6})\s+(.+)$)"); static QRegularExpression exp_heading(R"(^(#{1,6})\s+(.+)$)");
static QRegularExpression exp_list(R"(^\*\s(.+)$)"); static QRegularExpression exp_list(R"(^\*\s(.+)$)");
static QRegularExpression exp_quote(R"((?:^>$|^>\s?(.+)$))"); static QRegularExpression exp_quote(R"((?:^>$|^>\s?(.+)$))");
static QRegularExpression exp_pre(R"(^```.*$)"); static QRegularExpression exp_pre(R"(^```.*$)");
static QRegularExpression exp_text(R"()");
QRegularExpressionMatch mtch; QRegularExpressionMatch mtch;
QString title; QString title;
for (const QString& line : lines) { for (const QString& line : lines) {
if ((mtch = exp_pre.match(line)).hasMatch()) { if ((mtch = exp_pre.match(line)).hasMatch()) {
normal_mode = !normal_mode; // Begin or end PRE block.
switch (mode) {
case State::Pre:
// Ending of a PRE block.
html += endBlock(State::Normal);
break;
default:
// Beginning of a PRE block.
html += endBlock(State::Normal);
html += beginBlock(State::Pre);
break;
}
continue; continue;
} }
if (normal_mode) { if (mode != State::Pre) {
if ((mtch = exp_link.match(line)).hasMatch()) { if ((mtch = exp_link.match(line)).hasMatch()) {
html += endBlock(State::Normal);
html += parseLink(mtch); html += parseLink(mtch);
} }
else if ((mtch = exp_heading.match(line)).hasMatch()) { else if ((mtch = exp_heading.match(line)).hasMatch()) {
html += endBlock(State::Normal);
html += parseHeading(mtch, title.isEmpty() ? &title : nullptr); html += parseHeading(mtch, title.isEmpty() ? &title : nullptr);
} }
else if ((mtch = exp_list.match(line)).hasMatch()) { else if ((mtch = exp_list.match(line)).hasMatch()) {
html += beginBlock(State::List);
html += parseList(mtch); html += parseList(mtch);
} }
else if ((mtch = exp_quote.match(line)).hasMatch()) { else if ((mtch = exp_quote.match(line)).hasMatch()) {
html += beginBlock(State::Quote);
html += parseQuote(mtch); html += parseQuote(mtch);
} }
else { else {
html += endBlock(State::Normal);
html += parseTextInNormalMode(line); html += parseTextInNormalMode(line);
} }
} }
else { else {
// Add new line in PRE mode.
html += parseInPreMode(line); html += parseInPreMode(line);
} }
} }
html += endBlock(State::Normal);
// IOFactory::writeFile("aa", html.toUtf8());
return QSL("<html>" return QSL("<html>"
"<head><title>%1</title></head>" "<head><title>%1</title></head>"
"<body>%2</body>" "<body>%2</body>"
"</html>").arg(title, html); "</html>")
.arg(title, html);
}
QString GeminiParser::beginBlock(State new_mode) {
if (new_mode != mode) {
mode = new_mode;
switch (new_mode) {
case State::List:
return "<ul>\n";
case State::Quote:
return "<div align=\"center\" style=\""
"background-color: #E1E5EE;"
"font-style: italic;"
"margin-left: 20px;"
"margin-right: 20px;\">\n";
case State::Pre:
return "<pre style=\"background-color: #E1E5EE;\">\n";
}
}
return QString();
}
QString GeminiParser::endBlock(State new_mode) {
QString to_return;
if (new_mode != mode) {
switch (mode) {
case State::List:
to_return = "</ul>\n";
break;
case State::Quote:
to_return = "</div>\n";
break;
case State::Pre:
to_return = "</pre>\n";
break;
}
mode = new_mode;
}
return to_return;
} }
QString GeminiParser::parseLink(const QRegularExpressionMatch& mtch) const { QString GeminiParser::parseLink(const QRegularExpressionMatch& mtch) const {
@ -75,29 +144,22 @@ QString GeminiParser::parseHeading(const QRegularExpressionMatch& mtch, QString*
return QSL("<h%1>%2</h%1>\n").arg(QString::number(level), header); return QSL("<h%1>%2</h%1>\n").arg(QString::number(level), header);
} }
QString GeminiParser::parseQuote(const QRegularExpressionMatch &mtch) const { QString GeminiParser::parseQuote(const QRegularExpressionMatch& mtch) const {
QString text = mtch.captured(1); QString text = mtch.captured(1);
return QSL("<p align=\"center\" style=\"" return QSL("<div>%1</div>\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text));
"background-color: #E1E5EE;"
"font-style: italic;"
"margin-left: 20px;"
"margin-right: 20px;"
"\">%1</p>\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text));
} }
QString GeminiParser::parseList(const QRegularExpressionMatch &mtch) const { QString GeminiParser::parseList(const QRegularExpressionMatch& mtch) const {
QString text = mtch.captured(1); QString text = mtch.captured(1);
return QSL("<p style=\"" return QSL("<li>%1</li>\n").arg(text);
"margin-left: 20px;"
"\">• %1</p>\n").arg(text);
} }
QString GeminiParser::parseTextInNormalMode(const QString &line) const{ QString GeminiParser::parseTextInNormalMode(const QString& line) const {
return QSL("<p>%1</p>\n").arg(line); return QSL("<p>%1</p>\n").arg(line);
} }
QString GeminiParser::parseInPreMode(const QString& line) const { QString GeminiParser::parseInPreMode(const QString& line) const {
return QSL("<pre>%1</pre>\n").arg(line); return QSL("%1\n").arg(line.toHtmlEscaped());
} }

View file

@ -3,20 +3,40 @@
#ifndef GEMINIPARSER_H #ifndef GEMINIPARSER_H
#define GEMINIPARSER_H #define GEMINIPARSER_H
#include <QString>
#include <QRegularExpressionMatch> #include <QRegularExpressionMatch>
#include <QString>
class GeminiParser { class GeminiParser {
public: public:
QString geminiToHtml(const QByteArray& gemini_data); QString geminiToHtml(const QByteArray& gemini_data);
private: private:
enum class State {
// Regular state.
Normal,
// Inside list.
List,
// Inside quote.
Quote,
// Inside PRE.
Pre
};
QString parseLink(const QRegularExpressionMatch& mtch) const; QString parseLink(const QRegularExpressionMatch& mtch) const;
QString parseHeading(const QRegularExpressionMatch& mtch, QString *clean_header = nullptr) const; QString parseHeading(const QRegularExpressionMatch& mtch, QString* clean_header = nullptr) const;
QString parseQuote(const QRegularExpressionMatch& mtch) const; QString parseQuote(const QRegularExpressionMatch& mtch) const;
QString parseList(const QRegularExpressionMatch& mtch) const; QString parseList(const QRegularExpressionMatch& mtch) const;
QString parseTextInNormalMode(const QString& line) const; QString parseTextInNormalMode(const QString& line) const;
QString parseInPreMode(const QString& line) const; QString parseInPreMode(const QString& line) const;
QString beginBlock(State new_mode);
QString endBlock(State new_mode);
private:
State mode;
}; };
#endif // GEMINIPARSER_H #endif // GEMINIPARSER_H