forgejo/modules/git/repo_language_stats.go
Gergely Nagy 8eaa8aeaf9
[GITEA] Improved Linguist compatibility
Recognise the `linguist-documentation` and `linguist-detectable`
attributes in `.gitattributes` files, and use them in
`GetLanguageStats()` to make a decision whether to include a particular
file in the stats or not.

This allows one more control over which files in their repositories
contribute toward the language statistics, so that for a project that is
mostly documentation, the language stats can reflect that.

Fixes #1672.

Signed-off-by: Gergely Nagy <forgejo@gergo.csillger.hu>
(cherry picked from commit 6d4e02fe5f)
(cherry picked from commit ee1ead8189)
(cherry picked from commit 2dbec730e8)
2024-02-05 16:09:43 +01:00

61 lines
1.3 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"strings"
"unicode"
)
const (
fileSizeLimit int64 = 16 * 1024 // 16 KiB
bigFileSize int64 = 1024 * 1024 // 1 MiB
)
type LinguistBoolAttrib struct {
Value string
}
func (attrib *LinguistBoolAttrib) IsTrue() bool {
return attrib.Value == "set" || attrib.Value == "true"
}
func (attrib *LinguistBoolAttrib) IsFalse() bool {
return attrib.Value == "unset" || attrib.Value == "false"
}
// mergeLanguageStats mergers language names with different cases. The name with most upper case letters is used.
func mergeLanguageStats(stats map[string]int64) map[string]int64 {
names := map[string]struct {
uniqueName string
upperCount int
}{}
countUpper := func(s string) (count int) {
for _, r := range s {
if unicode.IsUpper(r) {
count++
}
}
return count
}
for name := range stats {
cnt := countUpper(name)
lower := strings.ToLower(name)
if cnt >= names[lower].upperCount {
names[lower] = struct {
uniqueName string
upperCount int
}{uniqueName: name, upperCount: cnt}
}
}
res := make(map[string]int64, len(names))
for name, num := range stats {
res[names[strings.ToLower(name)].uniqueName] += num
}
return res
}