This commit is contained in:
Penelope Gwen 2026-03-13 15:13:55 -07:00
parent 08a5816f83
commit ea4cfb8d4a
5 changed files with 328 additions and 88 deletions

119
Cargo.lock generated
View file

@ -62,6 +62,15 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "ansi_colours"
version = "1.2.3"
@ -389,6 +398,19 @@ dependencies = [
"rand_core 0.10.0",
]
[[package]]
name = "chrono"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clap"
version = "4.5.58"
@ -519,6 +541,12 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core2"
version = "0.4.0"
@ -1150,6 +1178,30 @@ dependencies = [
"tower-service",
]
[[package]]
name = "iana-time-zone"
version = "0.1.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "id-arena"
version = "2.3.0"
@ -1256,6 +1308,16 @@ dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "json5"
version = "0.4.1"
@ -1379,9 +1441,10 @@ dependencies = [
[[package]]
name = "mdws"
version = "0.1.5"
version = "0.1.6"
dependencies = [
"anyhow",
"chrono",
"dirs",
"http",
"lipgloss",
@ -1391,6 +1454,7 @@ dependencies = [
"ptree",
"rand 0.10.0",
"serde",
"serde_json",
"strip-ansi-escapes",
"text-template",
"tokio",
@ -3211,12 +3275,65 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.48.0"

View file

@ -1,6 +1,6 @@
[package]
name = "mdws"
version = "0.1.5"
version = "0.1.6"
edition = "2024"
authors = ["Penelope Gwen <support@pogmom.me>"]
license-file = "LICENSE.md"
@ -22,11 +22,13 @@ toml = "1.0.1"
http = "1.4.0"
anyhow = "1.0.101"
toml-frontmatter = "0.1.0"
serde = "1.0.228"
serde = { version = "1.0.228", features = ["derive"] }
rand = "0.10.0"
strip-ansi-escapes = "0.2.1"
ptree = "0.5.2"
dirs = "6.0.0"
chrono = "0.4.44"
serde_json = "1.0.149"
[package.metadata.deb]
changelog = "debian/changelog"

8
debian/changelog vendored
View file

@ -1,3 +1,11 @@
mdws 0.1.6-1 semistable; urgency=medium
* restructure visit counter code
* implement visit counter cooldown
* log llm addresses
-- Penelope Gwen <support@pogmom.me> Tue, 13 Mar 2026 14:24:43 -0700
mdws 0.1.5-1 semistable; urgency=medium
* don't server .git lmao

103
src/lib/page_visits.rs Normal file
View file

@ -0,0 +1,103 @@
use http::uri::Authority;
use std::path::PathBuf;
use std::time::SystemTime;
#[derive(serde::Serialize, serde::Deserialize, Debug)]
struct Visitor {
address: String,
recent_visit: SystemTime,
visit_count: i32,
}
fn ua_filter(user_agent: String) -> bool {
let ua_counter_whitelist = [
"Safari", "curl", "Windows", "Mac OS", "Linux", "iPhone", "iPad", "Android",
];
let ua_counter_blacklist = ["Kuma", "Mastodon", "Go-http-client", "Misskey", "Iceshrimp"];
ua_counter_whitelist
.iter()
.any(|ua| user_agent.contains(ua))
&& !ua_counter_blacklist
.iter()
.any(|ua| user_agent.contains(ua))
}
fn increment_visitor(path: PathBuf, visitor_stats: Vec<Visitor>, visits_total: i32) {
match std::fs::write(
path.join(".visitors.json"),
serde_json::to_string(&visitor_stats).unwrap(),
) {
Err(e) => println!("write failed: {:?}", e),
Ok(()) => (),
};
match std::fs::write(path.join(".visits"), format!("{}", visits_total)) {
Err(e) => println!("write failed: {:?}", e),
Ok(()) => (),
};
}
fn check_count_cooldown(path: PathBuf, remote_address: String) -> i32 {
let mut visitor = Visitor {
address: remote_address,
recent_visit: SystemTime::now(),
visit_count: 0,
};
let visit_count = if let Ok(total_count_file) = std::fs::read_to_string(path.join(".visits")) {
total_count_file.parse::<i32>().unwrap()
} else {
0
} + 1;
let mut visitor_stats: Vec<Visitor> =
if let Ok(visitor_file) = std::fs::read_to_string(path.join(".visitors.json")) {
serde_json::from_str(visitor_file.as_str()).expect("could not parse visitors file")
} else {
vec![]
};
if let Some(returning_visitor) = visitor_stats
.iter()
.find(|&v| v.address.eq(visitor.address.as_str()))
{
// returning visitor by ip
if returning_visitor
.recent_visit
.elapsed()
.expect("could not determine time since last visit")
.as_secs()
.gt(&600)
{
// previous visit cooldown expired, new visit will count
visitor.visit_count = returning_visitor.visit_count + 1;
let index = visitor_stats
.iter()
.position(|v| v.address.eq(&returning_visitor.address))
.expect("error parsing past visitor location in json");
visitor_stats.remove(index);
visitor_stats.push(visitor);
increment_visitor(path, visitor_stats, visit_count);
} else {
// not counting too-fast return visit
()
}
} else {
// new visitor by ip
visitor.visit_count = 1;
visitor_stats.push(visitor);
increment_visitor(path, visitor_stats, visit_count);
};
visit_count
}
pub fn count_visit(
host: Option<Authority>,
path: PathBuf,
user_agent: String,
server_domain: String,
remote_address: String,
) -> i32 {
if host.is_some_and(|a| a.as_str().eq(server_domain.as_str())) && ua_filter(user_agent) {
check_count_cooldown(path, remote_address)
} else {
0
}
}

View file

@ -14,11 +14,13 @@ mod curl;
mod html;
#[path = "lib/markdowner.rs"]
mod markdowner;
#[path = "lib/page_visits.rs"]
mod page_visits;
#[path = "lib/sidebar.rs"]
mod sidebar;
use crate::{
config::ServerConfig, curl::curl_response, html::html_response, markdowner::MarkdownModule,
sidebar::sidebar_content,
page_visits::count_visit, sidebar::sidebar_content,
};
#[derive(Serialize, Deserialize)]
@ -26,25 +28,17 @@ struct WebQuery {
width: Option<i32>,
}
fn renderer(
path: FullPath,
#[derive(Serialize, Deserialize)]
struct LlmEntry {
address: String,
user_agent: String,
query: WebQuery,
host: Option<Authority>,
config: ServerConfig, //server_root: PathBuf,
) -> Box<dyn warp::Reply> {
println!("{:?} requested by {}", path, user_agent);
let request_path: PathBuf = path.as_str().strip_prefix("/").unwrap_or_default().into();
let target_path = config.server_root.join(request_path.clone());
if !target_path.exists()
|| target_path.is_file()
|| (((request_path.starts_with("assets") || request_path.starts_with(".git"))
|| request_path.starts_with(".noserve"))
&& target_path.is_dir())
{
return Box::new(warp::redirect(Uri::from_static("/")));
}
}
fn llm_refuse(
user_agent: String,
remote_address: String,
path: PathBuf,
) -> Option<Box<dyn warp::Reply>> {
// this list will grow
let llm_user_agent_list = vec![
"GPTBot",
@ -59,65 +53,81 @@ fn renderer(
];
if llm_user_agent_list.iter().any(|ua| user_agent.contains(ua)) {
println!("│refused to serve llm scraper {}", user_agent);
return Box::new(warp::reply::with_status(
"llms breaks the internet and our world, go fuck yourself",
warp::http::StatusCode::OK,
));
};
let target_page_visits = target_path.join(".visits");
let mut counter = if target_page_visits.exists() {
std::fs::read_to_string(target_page_visits.clone())
.unwrap_or_default()
.parse::<i32>()
.unwrap()
} else {
0
};
let ua_counter_whitelist = [
"Safari", "curl", "Windows", "Mac OS", "Linux", "iPhone", "iPad", "Android",
];
let ua_counter_blacklist = ["Kuma", "Mastodon", "Go-http-client", "Misskey", "Iceshrimp"];
if host.is_some_and(|x| x.as_str().eq("pogmom.me"))
&& ua_counter_whitelist
.iter()
.any(|ua| user_agent.contains(ua))
&& !ua_counter_blacklist
.iter()
.any(|ua| user_agent.contains(ua))
{
if target_page_visits.exists() {
if target_page_visits
.metadata()
.unwrap()
.modified()
.unwrap()
.elapsed()
.unwrap()
.as_secs()
.gt(&1)
{
println!(
"│Incrementing page counter at {} for visit from {}",
path.as_str(),
user_agent
);
counter = counter + 1;
let _ = std::fs::write(target_page_visits, format!("{}", counter));
}
} else {
println!(
"│Incrementing page counter at {} for visit from {}",
path.as_str(),
user_agent
);
counter = counter + 1;
let _ = std::fs::write(target_page_visits, format!("{}", counter));
let mut ai_suspects: Vec<LlmEntry> =
if let Ok(llm_log) = std::fs::read_to_string(path.clone()) {
serde_json::from_str(&llm_log).expect("could not parse llm log")
} else {
vec![]
};
if !ai_suspects.iter().any(|a| a.address.eq(&remote_address)) {
ai_suspects.push(LlmEntry {
address: remote_address,
user_agent: user_agent.clone(),
});
}
match std::fs::write(
path,
serde_json::to_string(&ai_suspects).unwrap_or_default(),
) {
Err(e) => println!("error writing ai log: {}", e),
Ok(()) => (),
};
println!("│refused to serve llm scraper: {}", user_agent);
Some(Box::new(warp::reply::with_status(
"llms breaks the internet and ruin our world, fuck off",
warp::http::StatusCode::OK,
)))
} else {
None
}
}
fn renderer(
path: FullPath,
user_agent: String,
query: WebQuery,
host: Option<Authority>,
x_forwarded_for: Option<String>,
config: ServerConfig,
) -> Box<dyn warp::Reply> {
println!(
"┌{:?} requested by {} from {}",
path,
user_agent,
x_forwarded_for.clone().unwrap_or_default()
);
let request_path: PathBuf = path.as_str().strip_prefix("/").unwrap_or_default().into();
let target_path = config.server_root.join(request_path.clone());
if !target_path.exists()
|| target_path.is_file()
|| (((request_path.starts_with("assets") || request_path.starts_with(".git"))
|| request_path.starts_with(".noserve"))
&& target_path.is_dir())
{
return Box::new(warp::redirect(Uri::from_static("/")));
}
if let Some(llm_refusal) = llm_refuse(
user_agent.clone(),
x_forwarded_for.clone().unwrap_or_default(),
config.server_root.join(".llm_log"),
) {
return llm_refusal;
}
let counter = if let Some(remote_address) = x_forwarded_for {
count_visit(
host,
target_path.clone(),
user_agent.clone(),
config.server_domain.clone(),
remote_address,
)
} else {
-1
};
println!("└serving path: {} to {}", path.as_str(), user_agent);
let page_contents = markdowner::get_markdown_modules(&target_path);
@ -172,14 +182,6 @@ async fn main() {
));
let robots =
warp::path("robots.txt").and(warp::fs::file(config.server_root.join("assets/robots.txt")));
//let robots = include_str!("../robots.txt").to_string().clone();
// let robots_reply = warp::path("robots.txt").map(||warp::reply::with_status(robots, warp:http::StatusCode::OK));
/* .and(warp::reply::with_status(
format!("{}", robots.clone()),
warp::http::StatusCode::OK,
)); //.and(warp::reply::with_status(robots, warp::http::StatusCode::OK)); */
let config_clone = config.clone();
@ -188,9 +190,17 @@ async fn main() {
.and(warp::header("user-agent"))
.and(warp::query::<WebQuery>())
.and(warp::host::optional())
.and(warp::header::optional::<String>("X-Forwarded-For"))
.map(
move |path: FullPath, agent: String, query: WebQuery, host| {
renderer(path, agent, query, host, config_clone.clone())
move |path: FullPath, agent: String, query: WebQuery, host, x_forwarded_for| {
renderer(
path,
agent,
query,
host,
x_forwarded_for,
config_clone.clone(),
)
},
);