Removing scheme from block urls. Fixes #4656 (#4659)

* Removing scheme from block urls. Fixes #4656

* Fix comment.

* Fixing domain checking.

* Removing pointless URL building in url blocklist regex.

* Remove trailing /
This commit is contained in:
Dessalines 2024-04-23 23:15:20 -04:00 committed by GitHub
parent 6b9d9dfaa5
commit 66e06b3952
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 48 additions and 38 deletions

View file

@ -536,25 +536,8 @@ pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet>
.try_get_with::<_, LemmyError>((), async { .try_get_with::<_, LemmyError>((), async {
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
let regexes = urls.iter().map(|url| { // The urls are already validated on saving, so just escape them.
let url = &url.url; let regexes = urls.iter().map(|url| escape(&url.url));
let parsed = Url::parse(url).expect("Coundln't parse URL.");
if url.ends_with('/') {
format!(
"({}://)?{}{}?",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
} else {
format!(
"({}://)?{}{}",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
}
});
let set = RegexSet::new(regexes)?; let set = RegexSet::new(regexes)?;
Ok(set) Ok(set)

View file

@ -309,21 +309,44 @@ pub fn is_url_blocked(url: &Option<Url>, blocklist: &RegexSet) -> LemmyResult<()
Ok(()) Ok(())
} }
/// Check that urls are valid, and also remove the scheme, and uniques
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> { pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
let mut parsed_urls = vec![]; let mut parsed_urls = vec![];
for url in urls { for url in urls {
let url = Url::parse(url).or_else(|e| { parsed_urls.push(build_url_str_without_scheme(url)?);
if e == ParseError::RelativeUrlWithoutBase {
Url::parse(&format!("https://{url}"))
} else {
Err(e)
}
})?;
parsed_urls.push(url.to_string());
} }
Ok(parsed_urls) let unique_urls = parsed_urls.into_iter().unique().collect();
Ok(unique_urls)
}
pub fn build_url_str_without_scheme(url_str: &str) -> LemmyResult<String> {
// Parse and check for errors
let mut url = Url::parse(url_str).or_else(|e| {
if e == ParseError::RelativeUrlWithoutBase {
Url::parse(&format!("http://{url_str}"))
} else {
Err(e)
}
})?;
// Set the scheme to http, then remove the http:// part
url
.set_scheme("http")
.map_err(|_| LemmyErrorType::InvalidUrl)?;
let mut out = url
.to_string()
.get(7..)
.ok_or(LemmyErrorType::InvalidUrl)?
.to_string();
// Remove trailing / if necessary
if out.ends_with('/') {
out.pop();
}
Ok(out)
} }
#[cfg(test)] #[cfg(test)]
@ -600,17 +623,21 @@ mod tests {
#[test] #[test]
fn test_url_parsed() { fn test_url_parsed() {
// Make sure the scheme is removed, and uniques also
assert_eq!( assert_eq!(
vec![String::from("https://example.com/")], &check_urls_are_valid(&vec![
check_urls_are_valid(&vec![String::from("example.com")]).unwrap() "example.com".to_string(),
"http://example.com".to_string(),
"https://example.com".to_string(),
"https://example.com/test?q=test2&q2=test3#test4".to_string(),
])
.unwrap(),
&vec![
"example.com".to_string(),
"example.com/test?q=test2&q2=test3#test4".to_string()
],
); );
assert!(check_urls_are_valid(&vec![ assert!(check_urls_are_valid(&vec!["https://example .com".to_string()]).is_err());
String::from("example.com"),
String::from("https://example.blog")
])
.is_ok());
assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err());
} }
} }