finish smart_split

This commit is contained in:
Ondřej Hruška 2021-09-19 14:52:20 +02:00
parent 748023c410
commit f492e9c44a

View file

@ -806,46 +806,69 @@ fn smart_split(msg : &str, prefix: Option<String>, limit: usize) -> Vec<String>
let mut parts_to_send = vec![];
let mut this_piece = prefix.clone();
for l in msg.split("\n") {
println!("* Line: {:?}", l);
if this_piece.len() + l.len() == limit {
println!("exactly fits within limit");
// this line exactly reaches the limit
this_piece.push_str(l);
parts_to_send.push(std::mem::take(&mut this_piece).trim().to_owned());
this_piece.push_str(&prefix);
} else if this_piece.len() + l.len() > limit {
println!("too long to append (already {} + new {})", this_piece.len(), l.len());
// line too long to append
if this_piece != prefix {
let trimmed = this_piece.trim();
if !trimmed.is_empty() {
println!("flush buffer: {:?}", trimmed);
parts_to_send.push(trimmed.to_owned());
}
}
// start new piece
// start new piece with the line. If the line is too long, break it up.
this_piece = format!("{}{}", prefix, l);
while this_piece.len() > limit {
let to_send = if let Some(last_space) = (&this_piece[..limit]).rfind(' ') {
let mut p = this_piece.split_off(last_space);
// line too long, try splitting at the last space, if any
let to_send = if let Some(last_space) = (&this_piece[..=limit]).rfind(' ') {
println!("line split at word boundary");
let mut p = this_piece.split_off(last_space + 1);
std::mem::swap(&mut p, &mut this_piece);
p
} else {
println!("line split at exact len (no word boundary found)");
let mut p = this_piece.split_off(limit);
std::mem::swap(&mut p, &mut this_piece);
p
};
parts_to_send.push(to_send);
this_piece = format!("{}{}", prefix, this_piece);
let part_trimmed = to_send.trim();
println!("flush buffer: {:?}", part_trimmed);
parts_to_send.push(part_trimmed.to_owned());
this_piece = format!("{}{}", prefix, this_piece.trim());
}
this_piece.push('\n');
} else {
println!("append line");
// this line still fits comfortably
this_piece.push_str(l);
this_piece.push('\n');
}
}
if this_piece != prefix {
let leftover_trimmed = this_piece.trim();
if !leftover_trimmed.is_empty() {
println!("flush buffer: {:?}", leftover_trimmed);
parts_to_send.push(leftover_trimmed.to_owned());
}
}
parts_to_send
}
#[cfg(test)]
mod test {
#[test]
fn test_smart_split1() {
fn test_smart_split_lines() {
let to_split = "a234567890\nb234567890\nc234567890\nd234\n67890\ne234567890\n";
let parts = super::smart_split(to_split, None, 10);
@ -859,7 +882,7 @@ mod test {
}
#[test]
fn test_smart_split2() {
fn test_smart_split_nosplit() {
let to_split = "foo\nbar\nbaz";
let parts = super::smart_split(to_split, None, 1000);
@ -869,7 +892,7 @@ mod test {
}
#[test]
fn test_smart_split3() {
fn test_smart_split_nosplit_prefix() {
let to_split = "foo\nbar\nbaz";
let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 1000);
assert_eq!(vec![
@ -878,7 +901,7 @@ mod test {
}
#[test]
fn test_smart_split4() {
fn test_smart_split_prefix_each() {
let to_split = "1234\n56\n7";
let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 10);
assert_eq!(vec![
@ -886,4 +909,79 @@ mod test {
"PREFIX56\n7".to_string(),
], parts);
}
#[test]
fn test_smart_split_words() {
let to_split = "one two three four five six seven eight nine ten";
let parts = super::smart_split(to_split, None, 10);
assert_eq!(vec![
"one two".to_string(),
"three four".to_string(),
"five six".to_string(),
"seven".to_string(),
"eight nine".to_string(),
"ten".to_string(),
], parts);
}
#[test]
fn test_smart_split_words_multispace() {
let to_split = "one two three four five six seven eight nine ten ";
let parts = super::smart_split(to_split, None, 10);
assert_eq!(vec![
"one two".to_string(),
"three four".to_string(),
"five six".to_string(),
"seven".to_string(),
"eight nine".to_string(),
"ten".to_string(),
], parts);
}
#[test]
fn test_smart_split_words_longword() {
let to_split = "one two threefourfive six";
let parts = super::smart_split(to_split, None, 10);
assert_eq!(vec![
"one two".to_string(),
"threefourf".to_string(),
"ive six".to_string(),
], parts);
}
#[test]
fn test_smart_split_words_prefix() {
let to_split = "one two three four five six seven eight nine ten";
let parts = super::smart_split(to_split, Some("PREFIX".to_string()), 15);
assert_eq!(vec![
"PREFIXone two".to_string(),
"PREFIXthree".to_string(),
"PREFIXfour five".to_string(),
"PREFIXsix seven".to_string(),
"PREFIXeight".to_string(),
"PREFIXnine ten".to_string(),
], parts);
}
#[test]
fn test_smart_split_realistic() {
let to_split = "\
Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n\
Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.\n\
Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\n\
Sed dolor ex, vestibulum at malesuada ut, faucibus ac ante.\n\
Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\n\
Curabitur pretium condimentum pharetra.\n\
Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.\n\
Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate.";
let parts = super::smart_split(to_split, Some("@pepa@pig.club ".to_string()), 140);
assert_eq!(vec![
"@pepa@pig.club Lorem ipsum dolor sit amet, consectetur adipiscing elit.".to_string(),
"@pepa@pig.club Aenean venenatis libero ac ex suscipit, nec efficitur arcu convallis.".to_string(),
"@pepa@pig.club Nulla ante neque, efficitur nec fermentum a, fermentum nec nisl.\nSed dolor ex, vestibulum at malesuada ut, faucibus ac ante.".to_string(),
"@pepa@pig.club Nullam scelerisque magna dui, id tempor purus faucibus sit amet.\nCurabitur pretium condimentum pharetra.".to_string(),
"@pepa@pig.club Aenean dictum, tortor et ultrices fermentum, mauris erat vehicula lectus.".to_string(),
"@pepa@pig.club Nec varius mauris sem sollicitudin dolor. Nunc porta in urna nec vulputate.".to_string(),
], parts);
}
}