v0.1.6
This commit is contained in:
parent
08a5816f83
commit
ea4cfb8d4a
5 changed files with 328 additions and 88 deletions
119
Cargo.lock
generated
119
Cargo.lock
generated
|
|
@ -62,6 +62,15 @@ version = "0.2.21"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_colours"
|
||||
version = "1.2.3"
|
||||
|
|
@ -389,6 +398,19 @@ dependencies = [
|
|||
"rand_core 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
|
||||
dependencies = [
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.58"
|
||||
|
|
@ -519,6 +541,12 @@ dependencies = [
|
|||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "core2"
|
||||
version = "0.4.0"
|
||||
|
|
@ -1150,6 +1178,30 @@ dependencies = [
|
|||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"iana-time-zone-haiku",
|
||||
"js-sys",
|
||||
"log",
|
||||
"wasm-bindgen",
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone-haiku"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "id-arena"
|
||||
version = "2.3.0"
|
||||
|
|
@ -1256,6 +1308,16 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json5"
|
||||
version = "0.4.1"
|
||||
|
|
@ -1379,9 +1441,10 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "mdws"
|
||||
version = "0.1.5"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"dirs",
|
||||
"http",
|
||||
"lipgloss",
|
||||
|
|
@ -1391,6 +1454,7 @@ dependencies = [
|
|||
"ptree",
|
||||
"rand 0.10.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strip-ansi-escapes",
|
||||
"text-template",
|
||||
"tokio",
|
||||
|
|
@ -3211,12 +3275,65 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-link",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.60.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "mdws"
|
||||
version = "0.1.5"
|
||||
version = "0.1.6"
|
||||
edition = "2024"
|
||||
authors = ["Penelope Gwen <support@pogmom.me>"]
|
||||
license-file = "LICENSE.md"
|
||||
|
|
@ -22,11 +22,13 @@ toml = "1.0.1"
|
|||
http = "1.4.0"
|
||||
anyhow = "1.0.101"
|
||||
toml-frontmatter = "0.1.0"
|
||||
serde = "1.0.228"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
rand = "0.10.0"
|
||||
strip-ansi-escapes = "0.2.1"
|
||||
ptree = "0.5.2"
|
||||
dirs = "6.0.0"
|
||||
chrono = "0.4.44"
|
||||
serde_json = "1.0.149"
|
||||
|
||||
[package.metadata.deb]
|
||||
changelog = "debian/changelog"
|
||||
|
|
|
|||
8
debian/changelog
vendored
8
debian/changelog
vendored
|
|
@ -1,3 +1,11 @@
|
|||
mdws 0.1.6-1 semistable; urgency=medium
|
||||
|
||||
* restructure visit counter code
|
||||
* implement visit counter cooldown
|
||||
* log llm addresses
|
||||
|
||||
-- Penelope Gwen <support@pogmom.me> Tue, 13 Mar 2026 14:24:43 -0700
|
||||
|
||||
mdws 0.1.5-1 semistable; urgency=medium
|
||||
|
||||
* don't server .git lmao
|
||||
|
|
|
|||
103
src/lib/page_visits.rs
Normal file
103
src/lib/page_visits.rs
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
use http::uri::Authority;
|
||||
use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||
struct Visitor {
|
||||
address: String,
|
||||
recent_visit: SystemTime,
|
||||
visit_count: i32,
|
||||
}
|
||||
|
||||
fn ua_filter(user_agent: String) -> bool {
|
||||
let ua_counter_whitelist = [
|
||||
"Safari", "curl", "Windows", "Mac OS", "Linux", "iPhone", "iPad", "Android",
|
||||
];
|
||||
let ua_counter_blacklist = ["Kuma", "Mastodon", "Go-http-client", "Misskey", "Iceshrimp"];
|
||||
|
||||
ua_counter_whitelist
|
||||
.iter()
|
||||
.any(|ua| user_agent.contains(ua))
|
||||
&& !ua_counter_blacklist
|
||||
.iter()
|
||||
.any(|ua| user_agent.contains(ua))
|
||||
}
|
||||
|
||||
fn increment_visitor(path: PathBuf, visitor_stats: Vec<Visitor>, visits_total: i32) {
|
||||
match std::fs::write(
|
||||
path.join(".visitors.json"),
|
||||
serde_json::to_string(&visitor_stats).unwrap(),
|
||||
) {
|
||||
Err(e) => println!("write failed: {:?}", e),
|
||||
Ok(()) => (),
|
||||
};
|
||||
match std::fs::write(path.join(".visits"), format!("{}", visits_total)) {
|
||||
Err(e) => println!("write failed: {:?}", e),
|
||||
Ok(()) => (),
|
||||
};
|
||||
}
|
||||
|
||||
fn check_count_cooldown(path: PathBuf, remote_address: String) -> i32 {
|
||||
let mut visitor = Visitor {
|
||||
address: remote_address,
|
||||
recent_visit: SystemTime::now(),
|
||||
visit_count: 0,
|
||||
};
|
||||
let visit_count = if let Ok(total_count_file) = std::fs::read_to_string(path.join(".visits")) {
|
||||
total_count_file.parse::<i32>().unwrap()
|
||||
} else {
|
||||
0
|
||||
} + 1;
|
||||
let mut visitor_stats: Vec<Visitor> =
|
||||
if let Ok(visitor_file) = std::fs::read_to_string(path.join(".visitors.json")) {
|
||||
serde_json::from_str(visitor_file.as_str()).expect("could not parse visitors file")
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
if let Some(returning_visitor) = visitor_stats
|
||||
.iter()
|
||||
.find(|&v| v.address.eq(visitor.address.as_str()))
|
||||
{
|
||||
// returning visitor by ip
|
||||
if returning_visitor
|
||||
.recent_visit
|
||||
.elapsed()
|
||||
.expect("could not determine time since last visit")
|
||||
.as_secs()
|
||||
.gt(&600)
|
||||
{
|
||||
// previous visit cooldown expired, new visit will count
|
||||
visitor.visit_count = returning_visitor.visit_count + 1;
|
||||
let index = visitor_stats
|
||||
.iter()
|
||||
.position(|v| v.address.eq(&returning_visitor.address))
|
||||
.expect("error parsing past visitor location in json");
|
||||
visitor_stats.remove(index);
|
||||
visitor_stats.push(visitor);
|
||||
increment_visitor(path, visitor_stats, visit_count);
|
||||
} else {
|
||||
// not counting too-fast return visit
|
||||
()
|
||||
}
|
||||
} else {
|
||||
// new visitor by ip
|
||||
visitor.visit_count = 1;
|
||||
visitor_stats.push(visitor);
|
||||
increment_visitor(path, visitor_stats, visit_count);
|
||||
};
|
||||
visit_count
|
||||
}
|
||||
|
||||
pub fn count_visit(
|
||||
host: Option<Authority>,
|
||||
path: PathBuf,
|
||||
user_agent: String,
|
||||
server_domain: String,
|
||||
remote_address: String,
|
||||
) -> i32 {
|
||||
if host.is_some_and(|a| a.as_str().eq(server_domain.as_str())) && ua_filter(user_agent) {
|
||||
check_count_cooldown(path, remote_address)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
180
src/main.rs
180
src/main.rs
|
|
@ -14,11 +14,13 @@ mod curl;
|
|||
mod html;
|
||||
#[path = "lib/markdowner.rs"]
|
||||
mod markdowner;
|
||||
#[path = "lib/page_visits.rs"]
|
||||
mod page_visits;
|
||||
#[path = "lib/sidebar.rs"]
|
||||
mod sidebar;
|
||||
use crate::{
|
||||
config::ServerConfig, curl::curl_response, html::html_response, markdowner::MarkdownModule,
|
||||
sidebar::sidebar_content,
|
||||
page_visits::count_visit, sidebar::sidebar_content,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
|
@ -26,25 +28,17 @@ struct WebQuery {
|
|||
width: Option<i32>,
|
||||
}
|
||||
|
||||
fn renderer(
|
||||
path: FullPath,
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct LlmEntry {
|
||||
address: String,
|
||||
user_agent: String,
|
||||
query: WebQuery,
|
||||
host: Option<Authority>,
|
||||
config: ServerConfig, //server_root: PathBuf,
|
||||
) -> Box<dyn warp::Reply> {
|
||||
println!("┌{:?} requested by {}", path, user_agent);
|
||||
let request_path: PathBuf = path.as_str().strip_prefix("/").unwrap_or_default().into();
|
||||
let target_path = config.server_root.join(request_path.clone());
|
||||
if !target_path.exists()
|
||||
|| target_path.is_file()
|
||||
|| (((request_path.starts_with("assets") || request_path.starts_with(".git"))
|
||||
|| request_path.starts_with(".noserve"))
|
||||
&& target_path.is_dir())
|
||||
{
|
||||
return Box::new(warp::redirect(Uri::from_static("/")));
|
||||
}
|
||||
}
|
||||
|
||||
fn llm_refuse(
|
||||
user_agent: String,
|
||||
remote_address: String,
|
||||
path: PathBuf,
|
||||
) -> Option<Box<dyn warp::Reply>> {
|
||||
// this list will grow
|
||||
let llm_user_agent_list = vec![
|
||||
"GPTBot",
|
||||
|
|
@ -59,65 +53,81 @@ fn renderer(
|
|||
];
|
||||
|
||||
if llm_user_agent_list.iter().any(|ua| user_agent.contains(ua)) {
|
||||
println!("│refused to serve llm scraper {}", user_agent);
|
||||
return Box::new(warp::reply::with_status(
|
||||
"llms breaks the internet and our world, go fuck yourself",
|
||||
warp::http::StatusCode::OK,
|
||||
));
|
||||
};
|
||||
|
||||
let target_page_visits = target_path.join(".visits");
|
||||
let mut counter = if target_page_visits.exists() {
|
||||
std::fs::read_to_string(target_page_visits.clone())
|
||||
.unwrap_or_default()
|
||||
.parse::<i32>()
|
||||
.unwrap()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let ua_counter_whitelist = [
|
||||
"Safari", "curl", "Windows", "Mac OS", "Linux", "iPhone", "iPad", "Android",
|
||||
];
|
||||
let ua_counter_blacklist = ["Kuma", "Mastodon", "Go-http-client", "Misskey", "Iceshrimp"];
|
||||
|
||||
if host.is_some_and(|x| x.as_str().eq("pogmom.me"))
|
||||
&& ua_counter_whitelist
|
||||
.iter()
|
||||
.any(|ua| user_agent.contains(ua))
|
||||
&& !ua_counter_blacklist
|
||||
.iter()
|
||||
.any(|ua| user_agent.contains(ua))
|
||||
{
|
||||
if target_page_visits.exists() {
|
||||
if target_page_visits
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.modified()
|
||||
.unwrap()
|
||||
.elapsed()
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
.gt(&1)
|
||||
{
|
||||
println!(
|
||||
"│Incrementing page counter at {} for visit from {}",
|
||||
path.as_str(),
|
||||
user_agent
|
||||
);
|
||||
counter = counter + 1;
|
||||
let _ = std::fs::write(target_page_visits, format!("{}", counter));
|
||||
}
|
||||
} else {
|
||||
println!(
|
||||
"│Incrementing page counter at {} for visit from {}",
|
||||
path.as_str(),
|
||||
user_agent
|
||||
);
|
||||
counter = counter + 1;
|
||||
let _ = std::fs::write(target_page_visits, format!("{}", counter));
|
||||
let mut ai_suspects: Vec<LlmEntry> =
|
||||
if let Ok(llm_log) = std::fs::read_to_string(path.clone()) {
|
||||
serde_json::from_str(&llm_log).expect("could not parse llm log")
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
if !ai_suspects.iter().any(|a| a.address.eq(&remote_address)) {
|
||||
ai_suspects.push(LlmEntry {
|
||||
address: remote_address,
|
||||
user_agent: user_agent.clone(),
|
||||
});
|
||||
}
|
||||
match std::fs::write(
|
||||
path,
|
||||
serde_json::to_string(&ai_suspects).unwrap_or_default(),
|
||||
) {
|
||||
Err(e) => println!("error writing ai log: {}", e),
|
||||
Ok(()) => (),
|
||||
};
|
||||
|
||||
println!("│refused to serve llm scraper: {}", user_agent);
|
||||
Some(Box::new(warp::reply::with_status(
|
||||
"llms breaks the internet and ruin our world, fuck off",
|
||||
warp::http::StatusCode::OK,
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn renderer(
|
||||
path: FullPath,
|
||||
user_agent: String,
|
||||
query: WebQuery,
|
||||
host: Option<Authority>,
|
||||
x_forwarded_for: Option<String>,
|
||||
config: ServerConfig,
|
||||
) -> Box<dyn warp::Reply> {
|
||||
println!(
|
||||
"┌{:?} requested by {} from {}",
|
||||
path,
|
||||
user_agent,
|
||||
x_forwarded_for.clone().unwrap_or_default()
|
||||
);
|
||||
let request_path: PathBuf = path.as_str().strip_prefix("/").unwrap_or_default().into();
|
||||
let target_path = config.server_root.join(request_path.clone());
|
||||
if !target_path.exists()
|
||||
|| target_path.is_file()
|
||||
|| (((request_path.starts_with("assets") || request_path.starts_with(".git"))
|
||||
|| request_path.starts_with(".noserve"))
|
||||
&& target_path.is_dir())
|
||||
{
|
||||
return Box::new(warp::redirect(Uri::from_static("/")));
|
||||
}
|
||||
|
||||
if let Some(llm_refusal) = llm_refuse(
|
||||
user_agent.clone(),
|
||||
x_forwarded_for.clone().unwrap_or_default(),
|
||||
config.server_root.join(".llm_log"),
|
||||
) {
|
||||
return llm_refusal;
|
||||
}
|
||||
|
||||
let counter = if let Some(remote_address) = x_forwarded_for {
|
||||
count_visit(
|
||||
host,
|
||||
target_path.clone(),
|
||||
user_agent.clone(),
|
||||
config.server_domain.clone(),
|
||||
remote_address,
|
||||
)
|
||||
} else {
|
||||
-1
|
||||
};
|
||||
|
||||
println!("└serving path: {} to {}", path.as_str(), user_agent);
|
||||
|
||||
let page_contents = markdowner::get_markdown_modules(&target_path);
|
||||
|
|
@ -172,14 +182,6 @@ async fn main() {
|
|||
));
|
||||
let robots =
|
||||
warp::path("robots.txt").and(warp::fs::file(config.server_root.join("assets/robots.txt")));
|
||||
//let robots = include_str!("../robots.txt").to_string().clone();
|
||||
|
||||
// let robots_reply = warp::path("robots.txt").map(||warp::reply::with_status(robots, warp:http::StatusCode::OK));
|
||||
|
||||
/* .and(warp::reply::with_status(
|
||||
format!("{}", robots.clone()),
|
||||
warp::http::StatusCode::OK,
|
||||
)); //.and(warp::reply::with_status(robots, warp::http::StatusCode::OK)); */
|
||||
|
||||
let config_clone = config.clone();
|
||||
|
||||
|
|
@ -188,9 +190,17 @@ async fn main() {
|
|||
.and(warp::header("user-agent"))
|
||||
.and(warp::query::<WebQuery>())
|
||||
.and(warp::host::optional())
|
||||
.and(warp::header::optional::<String>("X-Forwarded-For"))
|
||||
.map(
|
||||
move |path: FullPath, agent: String, query: WebQuery, host| {
|
||||
renderer(path, agent, query, host, config_clone.clone())
|
||||
move |path: FullPath, agent: String, query: WebQuery, host, x_forwarded_for| {
|
||||
renderer(
|
||||
path,
|
||||
agent,
|
||||
query,
|
||||
host,
|
||||
x_forwarded_for,
|
||||
config_clone.clone(),
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue