1
0
Fork 0
mirror of https://git.asonix.dog/asonix/pict-rs.git synced 2025-04-22 16:14:09 +00:00

Fix UTF8 errors in repo migrations

As it turns out, sometimes UUIDs are made entirely of bytes that are
valid utf-8. This causes problems if a repo migration takes place (and
in some other scenarios, such as deleting aliases).

Here is an example of the problem

1. A UUID is generated which is valid utf8 for an Alias and stored in
   sled.
2. A migration is triggered, and this Alias is read out of sled as a
   Name, rather than a UUID.
3. This alias is inserted into postgres with a representation that isn't
   formatted like a UUID.
4. That alias is requested by a user, and doesn't match because the
   representation is different.

Another example of the problem is the following

1. A Delete Token is generated which is valid utf8 and contains a null
   byte. Rust says this is fine. This delete token is stored in sled.
2. A migration is triggered, and this Delete Token is read out of sled
   as a Name, rather than a UUID.
3. This Delete Token is not inserted into postgres, because postgres
   does not permit null bytes in utf8 strings.

Of these scenarios, only the first can be fixed retroactively. This
commit includes a script which runs on pict-rs' first launch to find and
fix any aliases or tokens that have improper representations in the
postgres database.

Any aliases lost to null bytes in past migrations cannot be recovered
without running the migration a second time, but it's likely that a
given postgres install has diverged enough from it's previous database
that a new migration would cause more problems than it would solve.

In order to fix this going forward, pict-rs always represents aliases
and delete tokens that are 16 bytes long as UUIDs in memory, which
matches the expected format in the repos. This means any future
migrations will not experience this problem.
This commit is contained in:
asonix 2025-04-04 17:19:57 -05:00
parent aa6b4b47e6
commit 805e3b5ca5
4 changed files with 190 additions and 15 deletions

View file

@ -906,6 +906,8 @@ impl Repo {
let repo =
self::postgres::PostgresRepo::connect(url, use_tls, certificate_file).await?;
repo.fix_tokens_and_aliases().await?;
Ok(Self::Postgres(repo))
}
}
@ -920,9 +922,12 @@ impl Repo {
}
impl MaybeUuid {
// interpret 16 byte strings as UUIDs
fn from_str(s: &str) -> Self {
if let Ok(uuid) = Uuid::parse_str(s) {
MaybeUuid::Uuid(uuid)
} else if let Ok(bytes) = s.as_bytes().try_into() {
MaybeUuid::Uuid(Uuid::from_bytes(bytes))
} else {
MaybeUuid::Name(s.into())
}

View file

@ -97,7 +97,7 @@ impl Alias {
}
fn split_at_dot(s: &str) -> Option<(&str, &str)> {
let index = s.find('.')?;
let index = s.rfind('.')?;
Some(s.split_at(index))
}
@ -271,4 +271,36 @@ mod tests {
}
)
}
#[test]
fn valid_utf8_uuid() {
let bytes = "abcdefghijklmnop";
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
let alias_str = format!("{bytes}.mp4");
let alias = Alias::from_existing(&alias_str);
assert_eq!(
alias,
Alias {
id: MaybeUuid::Uuid(uuid),
extension: Some(String::from(".mp4")),
}
);
}
#[test]
fn valid_utf8_uuid_matches() {
let bytes = "abcdefghijklmnop";
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
let alias_str = format!("{bytes}.mp4");
let alias = Alias::from_existing(&alias_str);
let alias_str_2 = format!("{uuid}.mp4");
let alias_2 = Alias::from_existing(&alias_str_2);
assert_ne!(alias_str, alias_str_2);
assert_eq!(alias, alias_2);
}
}

View file

@ -39,14 +39,8 @@ where
impl DeleteToken {
pub(crate) fn from_existing(existing: &str) -> Self {
if let Ok(uuid) = Uuid::parse_str(existing) {
DeleteToken {
id: MaybeUuid::Uuid(uuid),
}
} else {
DeleteToken {
id: MaybeUuid::Name(existing.into()),
}
DeleteToken {
id: MaybeUuid::from_str(existing),
}
}
@ -61,12 +55,14 @@ impl DeleteToken {
}
pub(crate) fn from_slice(bytes: &[u8]) -> Option<Self> {
if let Ok(s) = std::str::from_utf8(bytes) {
Some(DeleteToken::from_existing(s))
} else if bytes.len() == 16 {
// delete tokens were 10 character strings before they were UUIDs
// UUID strings are longer than 16 characters
if let Ok(b) = bytes.try_into() {
Some(DeleteToken {
id: MaybeUuid::Uuid(Uuid::from_slice(bytes).ok()?),
id: MaybeUuid::Uuid(Uuid::from_bytes(b)),
})
} else if let Ok(s) = std::str::from_utf8(bytes) {
Some(DeleteToken::from_existing(s))
} else {
None
}
@ -161,4 +157,33 @@ mod tests {
}
)
}
#[test]
fn valid_utf8_uuid() {
let bytes = "abcdefghijklmnop";
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
let delete_token = DeleteToken::from_existing(bytes);
assert_eq!(
delete_token,
DeleteToken {
id: MaybeUuid::Uuid(uuid),
}
);
}
#[test]
fn valid_utf8_uuid_matches() {
let bytes = "abcdefghijklmnop";
let uuid = Uuid::from_slice(bytes.as_bytes()).unwrap();
let delete_token = DeleteToken::from_existing(bytes);
let delete_token_str_2 = uuid.to_string();
let delete_token_2 = DeleteToken::from_existing(&delete_token_str_2);
assert_ne!(bytes, delete_token_str_2);
assert_eq!(delete_token, delete_token_2);
}
}

View file

@ -386,10 +386,12 @@ impl PostgresRepo {
let notifications = Arc::new(handle);
Ok(PostgresRepo {
let repo = PostgresRepo {
inner,
notifications,
})
};
Ok(repo)
}
async fn get_connection(
@ -495,6 +497,117 @@ impl PostgresRepo {
Ok(())
}
async fn fix_proxy(
&self,
old_alias: &str,
fixed_alias: &Alias,
conn: &mut PooledConnection<'_, AsyncPgConnection>,
) -> Result<(), RepoError> {
use schema::proxies::dsl::*;
diesel::update(proxies)
.filter(alias.eq(old_alias))
.set(alias.eq(fixed_alias))
.execute(conn)
.await
.map_err(PostgresError::Diesel)?;
Ok(())
}
#[tracing::instrument(skip(self))]
pub(super) async fn fix_tokens_and_aliases(&self) -> Result<(), RepoError> {
use schema::aliases::dsl::*;
const FIX_TOKENS_AND_ALIASES_KEY: &str = "fix-tokens-and-aliases";
// check if we already fixed the DB
if self.get(FIX_TOKENS_AND_ALIASES_KEY).await?.is_some() {
return Ok(());
}
let mut conn = self.get_connection().await?;
let total = aliases
.count()
.get_result::<i64>(&mut conn)
.await
.map_err(PostgresError::Diesel)?;
tracing::info!("Running alias & delete token fix script");
tracing::info!("Checking {total} aliases");
let mut max: Option<String> = None;
let mut fixed = 0;
let mut seen = 0;
let pct = (total / 100).max(1);
loop {
let page = if let Some(max) = max {
aliases
.select((alias, token))
.filter(alias.lt(max))
.order_by(alias.desc())
.limit(1000)
.get_results::<(String, String)>(&mut conn)
.await
} else {
aliases
.select((alias, token))
.order_by(alias.desc())
.limit(1000)
.get_results::<(String, String)>(&mut conn)
.await
};
let page = page.map_err(PostgresError::Diesel)?;
if page.is_empty() {
break;
}
max = page.last().map(|(a, _)| a.to_string());
for (old_alias, old_token) in page {
let fixed_alias = Alias::from_existing(&old_alias);
let fixed_token = DeleteToken::from_existing(&old_token);
if fixed_alias.to_string() != old_alias || fixed_token.to_string() != old_token {
tracing::info!("Fixing alias {old_alias} with token {old_token} - new alias {fixed_alias} and token {fixed_token}");
let records = diesel::update(aliases)
.filter(alias.eq(&old_alias))
.set((alias.eq(&fixed_alias), token.eq(fixed_token)))
.execute(&mut conn)
.await
.map_err(PostgresError::Diesel)?;
if records > 0 {
self.fix_proxy(&old_alias, &fixed_alias, &mut conn).await?;
}
fixed += 1;
}
seen += 1;
if (seen % pct) == 0 {
let percent = seen / pct;
tracing::info!("Fix {percent}% complete - {seen}/{total}");
}
}
}
drop(conn);
self.set(FIX_TOKENS_AND_ALIASES_KEY, Arc::from(Vec::from(b"fixed")))
.await?;
tracing::info!("Fix complete. {fixed} records required modification.");
Ok(())
}
}
struct AlreadyInserted;