mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-09-02 11:13:51 +00:00
use aho-corasick for chunk filtering
This commit is contained in:
parent
664453cf88
commit
6b1f3e4416
4 changed files with 102 additions and 66 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3366,6 +3366,7 @@ dependencies = [
|
|||
name = "lemmy_db_schema_file"
|
||||
version = "1.0.0-alpha.4"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"diesel",
|
||||
|
|
|
@ -46,3 +46,4 @@ tracing = { workspace = true, optional = true }
|
|||
serial_test = { workspace = true }
|
||||
diff = "0.1.13"
|
||||
itertools = { workspace = true }
|
||||
aho-corasick = "1.1.3"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#![cfg(test)]
|
||||
#![expect(clippy::expect_used)]
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickKind};
|
||||
use itertools::Itertools;
|
||||
use lemmy_utils::settings::SETTINGS;
|
||||
use std::{
|
||||
|
@ -45,39 +46,83 @@ pub fn get_dump() -> String {
|
|||
String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text")
|
||||
}
|
||||
|
||||
pub fn check_dump_diff(mut dumps: [&str; 2], label_of_change_from_dump_0_to_dump_1: &str) {
|
||||
// Performance optimizations
|
||||
if dumps[0] == dumps[1] {
|
||||
return;
|
||||
}
|
||||
dumps = trim_matching_chunks_at_beginning_and_end(dumps);
|
||||
pub struct DiffChecker {
|
||||
ignored_chunk_prefixes: AhoCorasick,
|
||||
}
|
||||
|
||||
let [before_chunks, after_chunks] = dumps.map(|dump| {
|
||||
dump
|
||||
.split("\n\n")
|
||||
.filter_map(remove_ignored_details_from_chunk)
|
||||
.sorted_unstable()
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
let diff_results = diff::slice(&before_chunks, &after_chunks);
|
||||
let mut only_in_before = HashSet::new();
|
||||
let mut only_in_after = HashSet::new();
|
||||
for res in diff_results {
|
||||
match res {
|
||||
diff::Result::Left(chunk) => only_in_before.insert(&**chunk),
|
||||
diff::Result::Right(chunk) => only_in_after.insert(&**chunk),
|
||||
diff::Result::Both(_, _) => continue,
|
||||
};
|
||||
}
|
||||
|
||||
if !(only_in_before.is_empty() && only_in_after.is_empty()) {
|
||||
panic!(
|
||||
"{}",
|
||||
[&label_of_change_from_dump_0_to_dump_1, "\n\n"]
|
||||
.into_iter()
|
||||
.chain(display_diffs(only_in_before, only_in_after))
|
||||
.collect::<String>()
|
||||
impl DiffChecker {
|
||||
pub fn new() -> Self {
|
||||
let prefixes = [
|
||||
"CREATE MATERIALIZED VIEW ",
|
||||
"CREATE OR REPLACE VIEW ",
|
||||
"CREATE VIEW ",
|
||||
]
|
||||
.into_iter()
|
||||
.map(|s| s.to_owned())
|
||||
.chain(
|
||||
[
|
||||
"refresh_comment",
|
||||
"refresh_comment_like",
|
||||
"refresh_community",
|
||||
"refresh_community_follower",
|
||||
"refresh_community_user_ban",
|
||||
"refresh_post",
|
||||
"refresh_post_like",
|
||||
"refresh_private_message",
|
||||
"refresh_user",
|
||||
]
|
||||
.into_iter()
|
||||
.flat_map(|s| {
|
||||
[
|
||||
format!("CREATE FUNCTION public.{s}("),
|
||||
format!("CREATE TRIGGER {s} "),
|
||||
]
|
||||
}),
|
||||
);
|
||||
|
||||
DiffChecker {
|
||||
ignored_chunk_prefixes: AhoCorasick::builder()
|
||||
.start_kind(aho_corasick::StartKind::Anchored)
|
||||
.kind(Some(AhoCorasickKind::DFA))
|
||||
.build(prefixes)
|
||||
.expect("AhoCorasick build failed"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_dump_diff(&self, mut dumps: [&str; 2], label_of_change_from_dump_0_to_dump_1: &str) {
|
||||
// Performance optimizations
|
||||
if dumps[0] == dumps[1] {
|
||||
return;
|
||||
}
|
||||
dumps = trim_matching_chunks_at_beginning_and_end(dumps);
|
||||
|
||||
let [before_chunks, after_chunks] = dumps.map(|dump| {
|
||||
dump
|
||||
.split("\n\n")
|
||||
.filter_map(|chunk| remove_ignored_details_from_chunk(chunk, &self.ignored_chunk_prefixes))
|
||||
.sorted_unstable()
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
let diff_results = diff::slice(&before_chunks, &after_chunks);
|
||||
let mut only_in_before = HashSet::new();
|
||||
let mut only_in_after = HashSet::new();
|
||||
for res in diff_results {
|
||||
match res {
|
||||
diff::Result::Left(chunk) => only_in_before.insert(&**chunk),
|
||||
diff::Result::Right(chunk) => only_in_after.insert(&**chunk),
|
||||
diff::Result::Both(_, _) => continue,
|
||||
};
|
||||
}
|
||||
|
||||
if !(only_in_before.is_empty() && only_in_after.is_empty()) {
|
||||
panic!(
|
||||
"{}",
|
||||
[&label_of_change_from_dump_0_to_dump_1, "\n\n"]
|
||||
.into_iter()
|
||||
.chain(display_diffs(only_in_before, only_in_after))
|
||||
.collect::<String>()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,25 +258,17 @@ fn count_inserted_or_deleted_chars_in_diff(a: &str, b: &str) -> usize {
|
|||
.count()
|
||||
}
|
||||
|
||||
fn remove_ignored_details_from_chunk(mut chunk: &str) -> Option<Cow<'_, str>> {
|
||||
fn remove_ignored_details_from_chunk<'a>(
|
||||
mut chunk: &'a str,
|
||||
ignored_chunk_prefixes: &AhoCorasick,
|
||||
) -> Option<Cow<'a, str>> {
|
||||
while let Some(s) = trim_start_of_chunk(chunk) {
|
||||
chunk = s;
|
||||
}
|
||||
if chunk.is_empty() ||
|
||||
// Skip old views and fast table triggers
|
||||
chunk.strip_prefix("CREATE ").is_some_and(|c| {
|
||||
c
|
||||
.starts_with("VIEW ")
|
||||
|| c.starts_with("OR REPLACE VIEW ")
|
||||
|| c.starts_with("MATERIALIZED VIEW ")
|
||||
|| c.strip_prefix("FUNCTION public.")
|
||||
.and_then(after_skipped_trigger_name)
|
||||
.is_some_and(|a| a.starts_with('('))
|
||||
||
|
||||
c.strip_prefix("TRIGGER ")
|
||||
.and_then(after_skipped_trigger_name)
|
||||
.is_some_and(|a| a.starts_with(' '))
|
||||
}) {
|
||||
if chunk.is_empty()
|
||||
|| ignored_chunk_prefixes
|
||||
.is_match(aho_corasick::Input::new(chunk).anchored(aho_corasick::Anchored::Yes))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut chunk = Cow::Borrowed(chunk);
|
||||
|
@ -281,18 +318,6 @@ fn after_first_occurence<'a>(s: &'a str, pat: &str) -> &'a str {
|
|||
s.split_once(pat).unwrap_or_default().1
|
||||
}
|
||||
|
||||
fn after_skipped_trigger_name(s: &str) -> Option<&str> {
|
||||
s.strip_prefix("refresh_comment_like")
|
||||
.or_else(|| s.strip_prefix("refresh_comment"))
|
||||
.or_else(|| s.strip_prefix("refresh_community_follower"))
|
||||
.or_else(|| s.strip_prefix("refresh_community_user_ban"))
|
||||
.or_else(|| s.strip_prefix("refresh_community"))
|
||||
.or_else(|| s.strip_prefix("refresh_post_like"))
|
||||
.or_else(|| s.strip_prefix("refresh_post"))
|
||||
.or_else(|| s.strip_prefix("refresh_private_message"))
|
||||
.or_else(|| s.strip_prefix("refresh_user"))
|
||||
}
|
||||
|
||||
// cfg(test) would be redundant here
|
||||
mod tests {
|
||||
#[test]
|
||||
|
|
|
@ -51,7 +51,7 @@ const REPLACEABLE_SCHEMA_PATH: &str = "crates/db_schema/replaceable_schema";
|
|||
struct MigrationHarnessWrapper<'a> {
|
||||
conn: &'a mut PgConnection,
|
||||
#[cfg(test)]
|
||||
enable_diff_check: bool,
|
||||
diff_checker: Option<&'a diff_check::DiffChecker>,
|
||||
options: &'a Options,
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ impl MigrationHarness<Pg> for MigrationHarnessWrapper<'_> {
|
|||
migration: &dyn Migration<Pg>,
|
||||
) -> diesel::migration::Result<MigrationVersion<'static>> {
|
||||
#[cfg(test)]
|
||||
if self.enable_diff_check {
|
||||
if let Some(diff_checker) = self.diff_checker {
|
||||
let before = diff_check::get_dump();
|
||||
|
||||
self.run_migration_inner(migration)?;
|
||||
|
@ -88,7 +88,7 @@ impl MigrationHarness<Pg> for MigrationHarnessWrapper<'_> {
|
|||
|
||||
let after = diff_check::get_dump();
|
||||
|
||||
diff_check::check_dump_diff(
|
||||
diff_checker.check_dump_diff(
|
||||
[&after, &before],
|
||||
&format!(
|
||||
"These changes need to be applied in migrations/{}/down.sql:",
|
||||
|
@ -180,6 +180,8 @@ pub enum Branch {
|
|||
|
||||
pub fn run(options: Options) -> LemmyResult<Branch> {
|
||||
let db_url = SETTINGS.get_database_url();
|
||||
#[cfg(test)]
|
||||
let diff_checker = options.enable_diff_check.then(diff_check::DiffChecker::new);
|
||||
|
||||
// Migrations don't support async connection, and this function doesn't need to be async
|
||||
let mut conn = PgConnection::establish(&db_url)?;
|
||||
|
@ -216,7 +218,13 @@ pub fn run(options: Options) -> LemmyResult<Branch> {
|
|||
// it existing
|
||||
revert_replaceable_schema(&mut conn)?;
|
||||
|
||||
run_selected_migrations(&mut conn, &options).map_err(convert_err)?;
|
||||
run_selected_migrations(
|
||||
&mut conn,
|
||||
&options,
|
||||
#[cfg(test)]
|
||||
diff_checker.as_ref(),
|
||||
)
|
||||
.map_err(convert_err)?;
|
||||
|
||||
// Only run replaceable_schema if newest migration was applied
|
||||
let output = if (options.run && options.limit.is_none())
|
||||
|
@ -225,7 +233,7 @@ pub fn run(options: Options) -> LemmyResult<Branch> {
|
|||
.map_err(convert_err)?
|
||||
{
|
||||
#[cfg(test)]
|
||||
if options.enable_diff_check {
|
||||
if let Some(diff_checker) = diff_checker {
|
||||
let before = diff_check::get_dump();
|
||||
|
||||
run_replaceable_schema(&mut conn)?;
|
||||
|
@ -233,7 +241,7 @@ pub fn run(options: Options) -> LemmyResult<Branch> {
|
|||
|
||||
let after = diff_check::get_dump();
|
||||
|
||||
diff_check::check_dump_diff([&before, &after], "The code in crates/db_schema/replaceable_schema incorrectly created or modified things outside of the `r` schema, causing these changes to be left behind after dropping the schema:");
|
||||
diff_checker.check_dump_diff([&before, &after], "The code in crates/db_schema/replaceable_schema incorrectly created or modified things outside of the `r` schema, causing these changes to be left behind after dropping the schema:");
|
||||
}
|
||||
|
||||
run_replaceable_schema(&mut conn)?;
|
||||
|
@ -278,12 +286,13 @@ fn revert_replaceable_schema(conn: &mut PgConnection) -> LemmyResult<()> {
|
|||
fn run_selected_migrations(
|
||||
conn: &mut PgConnection,
|
||||
options: &Options,
|
||||
#[cfg(test)] diff_checker: Option<&diff_check::DiffChecker>,
|
||||
) -> diesel::migration::Result<()> {
|
||||
let mut wrapper = MigrationHarnessWrapper {
|
||||
conn,
|
||||
options,
|
||||
#[cfg(test)]
|
||||
enable_diff_check: options.enable_diff_check,
|
||||
diff_checker,
|
||||
};
|
||||
|
||||
if options.revert {
|
||||
|
|
Loading…
Reference in a new issue