Plume/plume-models/src/search/mod.rs

251 lines
8.4 KiB
Rust
Raw Normal View History

2021-01-06 19:46:35 +00:00
pub mod actor;
mod query;
2019-03-20 16:56:17 +00:00
mod searcher;
mod tokenizer;
pub use self::query::PlumeQuery as Query;
2019-03-20 16:56:17 +00:00
pub use self::searcher::*;
pub use self::tokenizer::TokenizerKind;
#[cfg(test)]
pub(crate) mod tests {
use super::{Query, Searcher};
2020-01-21 06:02:03 +00:00
use crate::{
blogs::tests::fill_database,
config::SearchTokenizerConfig,
2020-01-21 06:02:03 +00:00
post_authors::*,
posts::{NewPost, Post},
safe_string::SafeString,
tests::db,
CONFIG,
2020-01-21 06:02:03 +00:00
};
2021-01-08 16:08:31 +00:00
use diesel::Connection;
use plume_common::utils::random_hex;
use std::env::temp_dir;
use std::str::FromStr;
pub(crate) fn get_searcher(tokenizers: &SearchTokenizerConfig) -> Searcher {
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
2019-10-07 17:08:20 +00:00
let dir = temp_dir().join(&format!("plume-test-{}", random_hex()));
if dir.exists() {
Searcher::open(&dir, tokenizers)
} else {
Searcher::create(&dir, tokenizers)
2019-03-20 16:56:17 +00:00
}
.unwrap()
}
#[test]
fn get_first_token() {
let vector = vec![
("+\"my token\" other", ("+\"my token\"", " other")),
("-\"my token\" other", ("-\"my token\"", " other")),
(" \"my token\" other", ("\"my token\"", " other")),
("\"my token\" other", ("\"my token\"", " other")),
("+my token other", ("+my", " token other")),
("-my token other", ("-my", " token other")),
(" my token other", ("my", " token other")),
("my token other", ("my", " token other")),
("+\"my token other", ("+\"my token other", "")),
("-\"my token other", ("-\"my token other", "")),
(" \"my token other", ("\"my token other", "")),
("\"my token other", ("\"my token other", "")),
];
for (source, res) in vector {
assert_eq!(Query::get_first_token(source), res);
}
}
#[test]
fn from_str() {
let vector = vec![
("", ""),
("a query", "a query"),
("\"a query\"", "\"a query\""),
("+a -\"query\"", "+a -query"),
("title:\"something\" a query", "a query title:something"),
("-title:\"something\" a query", "a query -title:something"),
("author:user@domain", "author:user@domain"),
("-author:@user@domain", "-author:user@domain"),
("before:2017-11-05 before:2018-01-01", "before:2017-11-05"),
("after:2017-11-05 after:2018-01-01", "after:2018-01-01"),
];
for (source, res) in vector {
assert_eq!(&Query::from_str(source).unwrap().to_string(), res);
assert_eq!(Query::new().parse_query(source).to_string(), res);
}
}
#[test]
fn setters() {
let vector = vec![
("something", "title:something"),
("+something", "+title:something"),
("-something", "-title:something"),
("+\"something\"", "+title:something"),
("+some thing", "+title:\"some thing\""),
];
for (source, res) in vector {
assert_eq!(&Query::new().title(source, None).to_string(), res);
}
let vector = vec![
("something", "author:something"),
("+something", "+author:something"),
("-something", "-author:something"),
("+\"something\"", "+author:something"),
("+@someone@somewhere", "+author:someone@somewhere"),
];
for (source, res) in vector {
assert_eq!(&Query::new().author(source, None).to_string(), res);
}
}
#[test]
fn open() {
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
2019-10-07 17:08:20 +00:00
let dir = temp_dir().join(format!("plume-test-{}", random_hex()));
2019-03-20 16:56:17 +00:00
{
Searcher::create(&dir, &CONFIG.search_tokenizers).unwrap();
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
2019-10-07 17:08:20 +00:00
}
Searcher::open(&dir, &CONFIG.search_tokenizers).unwrap();
}
#[test]
fn create() {
let dir = temp_dir().join(format!("plume-test-{}", random_hex()));
assert!(Searcher::open(&dir, &CONFIG.search_tokenizers).is_err());
2019-03-20 16:56:17 +00:00
{
Searcher::create(&dir, &CONFIG.search_tokenizers).unwrap();
2019-03-20 16:56:17 +00:00
}
Searcher::open(&dir, &CONFIG.search_tokenizers).unwrap(); //verify it's well created
}
#[test]
fn search() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let searcher = get_searcher(&CONFIG.search_tokenizers);
let blog = &fill_database(conn).1[0];
let author = &blog.list_authors(conn).unwrap()[0];
let title = random_hex()[..8].to_owned();
2019-03-20 16:56:17 +00:00
let mut post = Post::insert(
conn,
NewPost {
blog_id: blog.id,
slug: title.clone(),
title: title.clone(),
content: SafeString::new(""),
published: true,
license: "CC-BY-SA".to_owned(),
ap_url: "".to_owned(),
creation_date: None,
subtitle: "".to_owned(),
source: "".to_owned(),
cover_id: None,
},
)
.unwrap();
PostAuthor::insert(
conn,
NewPostAuthor {
post_id: post.id,
author_id: author.id,
},
)
.unwrap();
2023-01-02 17:45:13 +00:00
searcher.add_document(conn, &post).unwrap();
searcher.commit();
2019-03-20 16:56:17 +00:00
assert_eq!(
searcher.search_document(conn, Query::from_str(&title).unwrap(), (0, 1))[0].id,
post.id
);
let newtitle = random_hex()[..8].to_owned();
post.title = newtitle.clone();
2021-01-08 16:08:31 +00:00
post.update(conn).unwrap();
searcher.update_document(conn, &post).unwrap();
searcher.commit();
2019-03-20 16:56:17 +00:00
assert_eq!(
searcher.search_document(conn, Query::from_str(&newtitle).unwrap(), (0, 1))[0].id,
post.id
);
assert!(searcher
.search_document(conn, Query::from_str(&title).unwrap(), (0, 1))
.is_empty());
searcher.delete_document(&post);
searcher.commit();
2019-03-20 16:56:17 +00:00
assert!(searcher
.search_document(conn, Query::from_str(&newtitle).unwrap(), (0, 1))
.is_empty());
Ok(())
});
}
#[cfg(feature = "search-lindera")]
#[test]
fn search_japanese() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let tokenizers = SearchTokenizerConfig {
tag_tokenizer: TokenizerKind::Lindera,
content_tokenizer: TokenizerKind::Lindera,
property_tokenizer: TokenizerKind::Ngram,
};
let searcher = get_searcher(&tokenizers);
let blog = &fill_database(conn).1[0];
let title = random_hex()[..8].to_owned();
let post = Post::insert(
conn,
NewPost {
blog_id: blog.id,
slug: title.clone(),
title: title.clone(),
content: SafeString::new("ブログエンジンPlumeです。"),
published: true,
license: "CC-BY-SA".to_owned(),
ap_url: "".to_owned(),
creation_date: None,
subtitle: "".to_owned(),
source: "".to_owned(),
cover_id: None,
},
)
.unwrap();
searcher.commit();
assert_eq!(
searcher.search_document(conn, Query::from_str("ブログエンジン").unwrap(), (0, 1))
[0]
.id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("Plume").unwrap(), (0, 1))[0].id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("です").unwrap(), (0, 1))[0].id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("").unwrap(), (0, 1))[0].id,
post.id
);
Ok(())
});
}
#[test]
fn drop_writer() {
let searcher = get_searcher(&CONFIG.search_tokenizers);
searcher.drop_writer();
get_searcher(&CONFIG.search_tokenizers);
}
}