// Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package db import ( "errors" "fmt" "strings" "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "xorm.io/xorm" "xorm.io/xorm/schemas" ) type CheckCollationsResult struct { ExpectedCollation string AvailableCollation container.Set[string] DatabaseCollation string IsCollationCaseSensitive func(s string) bool CollationEquals func(a, b string) bool ExistingTableNumber int InconsistentCollationColumns []string } func findAvailableCollationsMySQL(x *xorm.Engine) (ret container.Set[string], err error) { var res []struct { Collation string } if err = x.SQL("SHOW COLLATION WHERE (Collation = 'utf8mb4_bin') OR (Collation LIKE '%\\_as\\_cs%')").Find(&res); err != nil { return nil, err } ret = make(container.Set[string], len(res)) for _, r := range res { ret.Add(r.Collation) } return ret, nil } func CheckCollations(x *xorm.Engine) (*CheckCollationsResult, error) { dbTables, err := x.DBMetas() if err != nil { return nil, err } res := &CheckCollationsResult{ ExistingTableNumber: len(dbTables), CollationEquals: func(a, b string) bool { return a == b }, } var candidateCollations []string if x.Dialect().URI().DBType == schemas.MYSQL { if _, err = x.SQL("SELECT @@collation_database").Get(&res.DatabaseCollation); err != nil { return nil, err } res.IsCollationCaseSensitive = func(s string) bool { return s == "utf8mb4_bin" || strings.HasSuffix(s, "_as_cs") } candidateCollations = []string{"utf8mb4_0900_as_cs", "uca1400_as_cs", "utf8mb4_bin"} res.AvailableCollation, err = findAvailableCollationsMySQL(x) if err != nil { return nil, err } res.CollationEquals = func(a, b string) bool { // MariaDB adds the "utf8mb4_" prefix, eg: "utf8mb4_uca1400_as_cs", but not the name "uca1400_as_cs" in "SHOW COLLATION" // At the moment, it's safe to ignore the database difference, just trim the prefix and compare. It could be fixed easily if there is any problem in the future. return a == b || strings.TrimPrefix(a, "utf8mb4_") == strings.TrimPrefix(b, "utf8mb4_") } } else { return nil, nil } if res.DatabaseCollation == "" { return nil, errors.New("unable to get collation for current database") } res.ExpectedCollation = setting.Database.CharsetCollation if res.ExpectedCollation == "" { for _, collation := range candidateCollations { if res.AvailableCollation.Contains(collation) { res.ExpectedCollation = collation break } } } if res.ExpectedCollation == "" { return nil, errors.New("unable to find a suitable collation for current database") } allColumnsMatchExpected := true allColumnsMatchDatabase := true for _, table := range dbTables { for _, col := range table.Columns() { if col.Collation != "" { allColumnsMatchExpected = allColumnsMatchExpected && res.CollationEquals(col.Collation, res.ExpectedCollation) allColumnsMatchDatabase = allColumnsMatchDatabase && res.CollationEquals(col.Collation, res.DatabaseCollation) if !res.IsCollationCaseSensitive(col.Collation) || !res.CollationEquals(col.Collation, res.DatabaseCollation) { res.InconsistentCollationColumns = append(res.InconsistentCollationColumns, fmt.Sprintf("%s.%s", table.Name, col.Name)) } } } } // if all columns match expected collation or all match database collation, then it could also be considered as "consistent" if allColumnsMatchExpected || allColumnsMatchDatabase { res.InconsistentCollationColumns = nil } return res, nil } func CheckCollationsDefaultEngine() (*CheckCollationsResult, error) { return CheckCollations(x) } func alterDatabaseCollation(x *xorm.Engine, collation string) error { if x.Dialect().URI().DBType == schemas.MYSQL { _, err := x.Exec("ALTER DATABASE CHARACTER SET utf8mb4 COLLATE " + collation) return err } return errors.New("unsupported database type") } // preprocessDatabaseCollation checks database & table column collation, and alter the database collation if needed func preprocessDatabaseCollation(x *xorm.Engine) { r, err := CheckCollations(x) if err != nil { log.Error("Failed to check database collation: %v", err) } if r == nil { return // no check result means the database doesn't need to do such check/process (at the moment ....) } // try to alter database collation to expected if the database is empty, it might fail in some cases (and it isn't necessary to succeed) // at the moment. if !r.CollationEquals(r.DatabaseCollation, r.ExpectedCollation) && r.ExistingTableNumber == 0 { if err = alterDatabaseCollation(x, r.ExpectedCollation); err != nil { log.Error("Failed to change database collation to %q: %v", r.ExpectedCollation, err) } else { if r, err = CheckCollations(x); err != nil { log.Error("Failed to check database collation again after altering: %v", err) // impossible case return } log.Warn("Current database has been altered to use collation %q", r.DatabaseCollation) } } // check column collation, and show warning/error to end users -- no need to fatal, do not block the startup if !r.IsCollationCaseSensitive(r.DatabaseCollation) { log.Warn("Current database is using a case-insensitive collation %q, although Forgejo could work with it, there might be some rare cases which don't work as expected.", r.DatabaseCollation) } if len(r.InconsistentCollationColumns) > 0 { log.Error("There are %d table columns using inconsistent collation, they should use %q. Please go to admin panel Self Check page", len(r.InconsistentCollationColumns), r.DatabaseCollation) } }