mirror of
https://git.asonix.dog/asonix/pict-rs.git
synced 2025-04-22 16:14:09 +00:00
Fix UTF8 errors in repo migrations
As it turns out, sometimes UUIDs are made entirely of bytes that are valid utf-8. This causes problems if a repo migration takes place (and in some other scenarios, such as deleting aliases). Here is an example of the problem 1. A UUID is generated which is valid utf8 for an Alias and stored in sled. 2. A migration is triggered, and this Alias is read out of sled as a Name, rather than a UUID. 3. This alias is inserted into postgres with a representation that isn't formatted like a UUID. 4. That alias is requested by a user, and doesn't match because the representation is different. Another example of the problem is the following 1. A Delete Token is generated which is valid utf8 and contains a null byte. Rust says this is fine. This delete token is stored in sled. 2. A migration is triggered, and this Delete Token is read out of sled as a Name, rather than a UUID. 3. This Delete Token is not inserted into postgres, because postgres does not permit null bytes in utf8 strings. Of these scenarios, only the first can be fixed retroactively. This commit includes a script which runs on pict-rs' first launch to find and fix any aliases or tokens that have improper representations in the postgres database. Any aliases lost to null bytes in past migrations cannot be recovered without running the migration a second time, but it's likely that a given postgres install has diverged enough from it's previous database that a new migration would cause more problems than it would solve. In order to fix this going forward, pict-rs always represents aliases and delete tokens that are 16 bytes long as UUIDs in memory, which matches the expected format in the repos. This means any future migrations will not experience this problem.
This commit is contained in:
parent
aa6b4b47e6
commit
805e3b5ca5
4 changed files with 190 additions and 15 deletions
|
@ -906,6 +906,8 @@ impl Repo {
|
|||
let repo =
|
||||
self::postgres::PostgresRepo::connect(url, use_tls, certificate_file).await?;
|
||||
|
||||
repo.fix_tokens_and_aliases().await?;
|
||||
|
||||
Ok(Self::Postgres(repo))
|
||||
}
|
||||
}
|
||||
|
@ -920,9 +922,12 @@ impl Repo {
|
|||
}
|
||||
|
||||
impl MaybeUuid {
|
||||
// interpret 16 byte strings as UUIDs
|
||||
fn from_str(s: &str) -> Self {
|
||||
if let Ok(uuid) = Uuid::parse_str(s) {
|
||||
MaybeUuid::Uuid(uuid)
|
||||
} else if let Ok(bytes) = s.as_bytes().try_into() {
|
||||
MaybeUuid::Uuid(Uuid::from_bytes(bytes))
|
||||
} else {
|
||||
MaybeUuid::Name(s.into())
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ impl Alias {
|
|||
}
|
||||
|
||||
fn split_at_dot(s: &str) -> Option<(&str, &str)> {
|
||||
let index = s.find('.')?;
|
||||
let index = s.rfind('.')?;
|
||||
|
||||
Some(s.split_at(index))
|
||||
}
|
||||
|
@ -271,4 +271,36 @@ mod tests {
|
|||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_utf8_uuid() {
|
||||
let bytes = "abcdefghijklmnop";
|
||||
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
|
||||
|
||||
let alias_str = format!("{bytes}.mp4");
|
||||
let alias = Alias::from_existing(&alias_str);
|
||||
|
||||
assert_eq!(
|
||||
alias,
|
||||
Alias {
|
||||
id: MaybeUuid::Uuid(uuid),
|
||||
extension: Some(String::from(".mp4")),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_utf8_uuid_matches() {
|
||||
let bytes = "abcdefghijklmnop";
|
||||
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
|
||||
|
||||
let alias_str = format!("{bytes}.mp4");
|
||||
let alias = Alias::from_existing(&alias_str);
|
||||
|
||||
let alias_str_2 = format!("{uuid}.mp4");
|
||||
let alias_2 = Alias::from_existing(&alias_str_2);
|
||||
|
||||
assert_ne!(alias_str, alias_str_2);
|
||||
assert_eq!(alias, alias_2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,14 +39,8 @@ where
|
|||
|
||||
impl DeleteToken {
|
||||
pub(crate) fn from_existing(existing: &str) -> Self {
|
||||
if let Ok(uuid) = Uuid::parse_str(existing) {
|
||||
DeleteToken {
|
||||
id: MaybeUuid::Uuid(uuid),
|
||||
}
|
||||
} else {
|
||||
DeleteToken {
|
||||
id: MaybeUuid::Name(existing.into()),
|
||||
}
|
||||
DeleteToken {
|
||||
id: MaybeUuid::from_str(existing),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,12 +55,14 @@ impl DeleteToken {
|
|||
}
|
||||
|
||||
pub(crate) fn from_slice(bytes: &[u8]) -> Option<Self> {
|
||||
if let Ok(s) = std::str::from_utf8(bytes) {
|
||||
Some(DeleteToken::from_existing(s))
|
||||
} else if bytes.len() == 16 {
|
||||
// delete tokens were 10 character strings before they were UUIDs
|
||||
// UUID strings are longer than 16 characters
|
||||
if let Ok(b) = bytes.try_into() {
|
||||
Some(DeleteToken {
|
||||
id: MaybeUuid::Uuid(Uuid::from_slice(bytes).ok()?),
|
||||
id: MaybeUuid::Uuid(Uuid::from_bytes(b)),
|
||||
})
|
||||
} else if let Ok(s) = std::str::from_utf8(bytes) {
|
||||
Some(DeleteToken::from_existing(s))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -161,4 +157,33 @@ mod tests {
|
|||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_utf8_uuid() {
|
||||
let bytes = "abcdefghijklmnop";
|
||||
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
|
||||
|
||||
let delete_token = DeleteToken::from_existing(bytes);
|
||||
|
||||
assert_eq!(
|
||||
delete_token,
|
||||
DeleteToken {
|
||||
id: MaybeUuid::Uuid(uuid),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_utf8_uuid_matches() {
|
||||
let bytes = "abcdefghijklmnop";
|
||||
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
|
||||
|
||||
let delete_token = DeleteToken::from_existing(bytes);
|
||||
|
||||
let delete_token_str_2 = uuid.to_string();
|
||||
let delete_token_2 = DeleteToken::from_existing(&delete_token_str_2);
|
||||
|
||||
assert_ne!(bytes, delete_token_str_2);
|
||||
assert_eq!(delete_token, delete_token_2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -386,10 +386,12 @@ impl PostgresRepo {
|
|||
|
||||
let notifications = Arc::new(handle);
|
||||
|
||||
Ok(PostgresRepo {
|
||||
let repo = PostgresRepo {
|
||||
inner,
|
||||
notifications,
|
||||
})
|
||||
};
|
||||
|
||||
Ok(repo)
|
||||
}
|
||||
|
||||
async fn get_connection(
|
||||
|
@ -495,6 +497,117 @@ impl PostgresRepo {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fix_proxy(
|
||||
&self,
|
||||
old_alias: &str,
|
||||
fixed_alias: &Alias,
|
||||
conn: &mut PooledConnection<'_, AsyncPgConnection>,
|
||||
) -> Result<(), RepoError> {
|
||||
use schema::proxies::dsl::*;
|
||||
|
||||
diesel::update(proxies)
|
||||
.filter(alias.eq(old_alias))
|
||||
.set(alias.eq(fixed_alias))
|
||||
.execute(conn)
|
||||
.await
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self))]
|
||||
pub(super) async fn fix_tokens_and_aliases(&self) -> Result<(), RepoError> {
|
||||
use schema::aliases::dsl::*;
|
||||
|
||||
const FIX_TOKENS_AND_ALIASES_KEY: &str = "fix-tokens-and-aliases";
|
||||
|
||||
// check if we already fixed the DB
|
||||
if self.get(FIX_TOKENS_AND_ALIASES_KEY).await?.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut conn = self.get_connection().await?;
|
||||
|
||||
let total = aliases
|
||||
.count()
|
||||
.get_result::<i64>(&mut conn)
|
||||
.await
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
|
||||
tracing::info!("Running alias & delete token fix script");
|
||||
tracing::info!("Checking {total} aliases");
|
||||
|
||||
let mut max: Option<String> = None;
|
||||
let mut fixed = 0;
|
||||
let mut seen = 0;
|
||||
let pct = (total / 100).max(1);
|
||||
|
||||
loop {
|
||||
let page = if let Some(max) = max {
|
||||
aliases
|
||||
.select((alias, token))
|
||||
.filter(alias.lt(max))
|
||||
.order_by(alias.desc())
|
||||
.limit(1000)
|
||||
.get_results::<(String, String)>(&mut conn)
|
||||
.await
|
||||
} else {
|
||||
aliases
|
||||
.select((alias, token))
|
||||
.order_by(alias.desc())
|
||||
.limit(1000)
|
||||
.get_results::<(String, String)>(&mut conn)
|
||||
.await
|
||||
};
|
||||
|
||||
let page = page.map_err(PostgresError::Diesel)?;
|
||||
|
||||
if page.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
max = page.last().map(|(a, _)| a.to_string());
|
||||
|
||||
for (old_alias, old_token) in page {
|
||||
let fixed_alias = Alias::from_existing(&old_alias);
|
||||
let fixed_token = DeleteToken::from_existing(&old_token);
|
||||
|
||||
if fixed_alias.to_string() != old_alias || fixed_token.to_string() != old_token {
|
||||
tracing::info!("Fixing alias {old_alias} with token {old_token} - new alias {fixed_alias} and token {fixed_token}");
|
||||
|
||||
let records = diesel::update(aliases)
|
||||
.filter(alias.eq(&old_alias))
|
||||
.set((alias.eq(&fixed_alias), token.eq(fixed_token)))
|
||||
.execute(&mut conn)
|
||||
.await
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
|
||||
if records > 0 {
|
||||
self.fix_proxy(&old_alias, &fixed_alias, &mut conn).await?;
|
||||
}
|
||||
|
||||
fixed += 1;
|
||||
}
|
||||
|
||||
seen += 1;
|
||||
|
||||
if (seen % pct) == 0 {
|
||||
let percent = seen / pct;
|
||||
tracing::info!("Fix {percent}% complete - {seen}/{total}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(conn);
|
||||
|
||||
self.set(FIX_TOKENS_AND_ALIASES_KEY, Arc::from(Vec::from(b"fixed")))
|
||||
.await?;
|
||||
|
||||
tracing::info!("Fix complete. {fixed} records required modification.");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct AlreadyInserted;
|
||||
|
|
Loading…
Reference in a new issue