mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-11-23 01:41:01 +00:00
* Removing scheme from block urls. Fixes #4656 * Fix comment. * Fixing domain checking. * Removing pointless URL building in url blocklist regex. * Remove trailing /
This commit is contained in:
parent
6b9d9dfaa5
commit
66e06b3952
2 changed files with 48 additions and 38 deletions
|
@ -536,25 +536,8 @@ pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet>
|
||||||
.try_get_with::<_, LemmyError>((), async {
|
.try_get_with::<_, LemmyError>((), async {
|
||||||
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
|
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
|
||||||
|
|
||||||
let regexes = urls.iter().map(|url| {
|
// The urls are already validated on saving, so just escape them.
|
||||||
let url = &url.url;
|
let regexes = urls.iter().map(|url| escape(&url.url));
|
||||||
let parsed = Url::parse(url).expect("Coundln't parse URL.");
|
|
||||||
if url.ends_with('/') {
|
|
||||||
format!(
|
|
||||||
"({}://)?{}{}?",
|
|
||||||
parsed.scheme(),
|
|
||||||
escape(parsed.domain().expect("No domain.")),
|
|
||||||
escape(parsed.path())
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
"({}://)?{}{}",
|
|
||||||
parsed.scheme(),
|
|
||||||
escape(parsed.domain().expect("No domain.")),
|
|
||||||
escape(parsed.path())
|
|
||||||
)
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let set = RegexSet::new(regexes)?;
|
let set = RegexSet::new(regexes)?;
|
||||||
Ok(set)
|
Ok(set)
|
||||||
|
|
|
@ -309,21 +309,44 @@ pub fn is_url_blocked(url: &Option<Url>, blocklist: &RegexSet) -> LemmyResult<()
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check that urls are valid, and also remove the scheme, and uniques
|
||||||
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
|
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
|
||||||
let mut parsed_urls = vec![];
|
let mut parsed_urls = vec![];
|
||||||
for url in urls {
|
for url in urls {
|
||||||
let url = Url::parse(url).or_else(|e| {
|
parsed_urls.push(build_url_str_without_scheme(url)?);
|
||||||
if e == ParseError::RelativeUrlWithoutBase {
|
|
||||||
Url::parse(&format!("https://{url}"))
|
|
||||||
} else {
|
|
||||||
Err(e)
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
|
|
||||||
parsed_urls.push(url.to_string());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(parsed_urls)
|
let unique_urls = parsed_urls.into_iter().unique().collect();
|
||||||
|
Ok(unique_urls)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_url_str_without_scheme(url_str: &str) -> LemmyResult<String> {
|
||||||
|
// Parse and check for errors
|
||||||
|
let mut url = Url::parse(url_str).or_else(|e| {
|
||||||
|
if e == ParseError::RelativeUrlWithoutBase {
|
||||||
|
Url::parse(&format!("http://{url_str}"))
|
||||||
|
} else {
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Set the scheme to http, then remove the http:// part
|
||||||
|
url
|
||||||
|
.set_scheme("http")
|
||||||
|
.map_err(|_| LemmyErrorType::InvalidUrl)?;
|
||||||
|
|
||||||
|
let mut out = url
|
||||||
|
.to_string()
|
||||||
|
.get(7..)
|
||||||
|
.ok_or(LemmyErrorType::InvalidUrl)?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Remove trailing / if necessary
|
||||||
|
if out.ends_with('/') {
|
||||||
|
out.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(out)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -600,17 +623,21 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_url_parsed() {
|
fn test_url_parsed() {
|
||||||
|
// Make sure the scheme is removed, and uniques also
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
vec![String::from("https://example.com/")],
|
&check_urls_are_valid(&vec![
|
||||||
check_urls_are_valid(&vec![String::from("example.com")]).unwrap()
|
"example.com".to_string(),
|
||||||
|
"http://example.com".to_string(),
|
||||||
|
"https://example.com".to_string(),
|
||||||
|
"https://example.com/test?q=test2&q2=test3#test4".to_string(),
|
||||||
|
])
|
||||||
|
.unwrap(),
|
||||||
|
&vec![
|
||||||
|
"example.com".to_string(),
|
||||||
|
"example.com/test?q=test2&q2=test3#test4".to_string()
|
||||||
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
assert!(check_urls_are_valid(&vec![
|
assert!(check_urls_are_valid(&vec!["https://example .com".to_string()]).is_err());
|
||||||
String::from("example.com"),
|
|
||||||
String::from("https://example.blog")
|
|
||||||
])
|
|
||||||
.is_ok());
|
|
||||||
|
|
||||||
assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue