neptune-explorer/src/bin/scraper.rs
Alan Szepieniec edf7800a88 security: Add attack binary
Add a binary that deploys a flooding attack.

Also, add a binary that periodically queries a random URL from the website. This
scraper will be used to measure whether the attack (or its countermeasure) is
effective.
2025-10-09 18:32:51 +02:00

104 lines
4.0 KiB
Rust

use env_logger;
use log::LevelFilter;
use log::{error, info, warn};
use rand::seq::IteratorRandom;
use regex::Regex;
use reqwest::Client;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use tokio::{signal, time};
use url::Url;
/// Scrape the explorer website when running locally.
///
/// This program maintains a dictionary of URLs, which is initially populated
/// with 'http://localhost:3000'. It fetches a random URL from the dictionary in
/// each iteration, logs positive messages if successful, extracts new URLs from
/// the response body to add to the dictionary, logs warnings or errors for
/// request failures or timeouts, sleeps for a bit, and continues until Ctrl-C
/// is pressed.
///
/// Run with:
/// `> cargo run --bin scraper`
#[tokio::main]
async fn main() {
// Initialize logger
env_logger::builder().filter_level(LevelFilter::Info).init();
let client = Client::builder()
.timeout(Duration::from_millis(300))
.build()
.expect("Failed to build HTTP client");
let root_url = "http://localhost:3000".to_string();
let urls = Arc::new(Mutex::new(HashSet::from([root_url.clone()])));
let href_regex = Regex::new(r#"<a\s+(?:[^>]*?\s+)?href=['\"](.*?)['\"]"#).unwrap();
info!("Starting fetch loop. Press Ctrl-C to stop.");
let urls_clone = Arc::clone(&urls);
let fetch_loop = async move {
loop {
// Pick a random URL safely
let url_opt = {
let urls_guard = urls_clone.lock().unwrap();
urls_guard.iter().choose(&mut rand::rng()).cloned()
};
if let Some(url) = url_opt {
match client.get(&url).send().await {
Ok(resp) => {
if resp.status().is_success() {
match resp.text().await {
Ok(text) => {
info!("Success fetching {}", url);
let mut urls_guard = urls_clone.lock().unwrap();
for cap in href_regex.captures_iter(&text) {
let href = &cap[1];
if let Ok(parsed_url) =
Url::parse(&[&root_url.clone(), href].concat())
{
let normalized = parsed_url.as_str();
if urls_guard.insert(normalized.to_owned()) {
info!(
"Added new URL to dictionary: {}",
normalized
);
}
}
}
}
Err(e) => {
warn!("Failed to read response body from {}: {}", url, e);
}
}
} else {
warn!("Non-success status {} from {}", resp.status(), url);
}
}
Err(err) => {
if err.is_timeout() {
warn!("Timeout fetching {}", url);
} else {
error!("Error fetching {}: {}", url, err);
}
}
}
} else {
warn!("URL dictionary is empty, no URL to fetch");
}
time::sleep(Duration::from_millis(500)).await;
}
};
tokio::select! {
_ = fetch_loop => {}, // This runs indefinitely unless stopped
_ = signal::ctrl_c() => {
info!("Ctrl-C received, stopping...");
}
}
}