// GoToSocial // Copyright (C) GoToSocial Authors admin@gotosocial.org // SPDX-License-Identifier: AGPL-3.0-or-later // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package text import ( "strings" "github.com/superseriousbusiness/gotosocial/internal/util" "golang.org/x/text/unicode/norm" ) const ( maximumHashtagLength = 100 ) // NormalizeHashtag normalizes the given hashtag text by // removing the initial '#' symbol, and then decomposing // and canonically recomposing chars + combining diacritics // in the text to single unicode characters, following // Normalization Form C (https://unicode.org/reports/tr15/). // // Finally, it will do a check on the normalized string to // ensure that it's below maximumHashtagLength chars, and // contains only unicode letters and numbers. If this passes, // returned bool will be true. func NormalizeHashtag(text string) (string, bool) { // This normalization is specifically to avoid cases // where visually-identical hashtags are stored with // different unicode representations (e.g. with combining // diacritics). It allows a tasteful number of combining // diacritics to be used, as long as they can be combined // with parent characters to form regular letter symbols. normalized := norm.NFC.String(strings.TrimPrefix(text, "#")) // Validate normalized. ok := true for i, r := range normalized { if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { ok = false break } } return normalized, ok }