Cache last commit to accelerate the repository directory page visit (#10069)

* Cache last commit to accelerate the repository directory page visit

* Default use default cache configuration

* add tests for last commit cache

* Simplify last commit cache

* Revert Enabled back

* Change the last commit cache default ttl to 8760h

* Fix test
This commit is contained in:
Lunny Xiao 2020-02-02 03:11:32 +08:00 committed by GitHub
parent 046bb05979
commit ce7062a422
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 273 additions and 23 deletions

View file

@ -632,6 +632,8 @@ SENDMAIL_PATH = sendmail
SENDMAIL_ARGS = SENDMAIL_ARGS =
[cache] [cache]
; if the cache enabled
ENABLED = true
; Either "memory", "redis", or "memcache", default is "memory" ; Either "memory", "redis", or "memcache", default is "memory"
ADAPTER = memory ADAPTER = memory
; For "memory" only, GC interval in seconds, default is 60 ; For "memory" only, GC interval in seconds, default is 60
@ -644,6 +646,16 @@ HOST =
; Setting it to 0 disables caching ; Setting it to 0 disables caching
ITEM_TTL = 16h ITEM_TTL = 16h
; Last commit cache
[cache.last_commit]
; if the cache enabled
ENABLED = true
; Time to keep items in cache if not used, default is 8760 hours.
; Setting it to 0 disables caching
ITEM_TTL = 8760h
; Only enable the cache when repository's commits count great than
COMMITS_COUNT = 1000
[session] [session]
; Either "memory", "file", or "redis", default is "memory" ; Either "memory", "file", or "redis", default is "memory"
PROVIDER = memory PROVIDER = memory

View file

@ -383,6 +383,7 @@ relation to port exhaustion.
## Cache (`cache`) ## Cache (`cache`)
- `ENABLED`: **true**: Enable the cache.
- `ADAPTER`: **memory**: Cache engine adapter, either `memory`, `redis`, or `memcache`. - `ADAPTER`: **memory**: Cache engine adapter, either `memory`, `redis`, or `memcache`.
- `INTERVAL`: **60**: Garbage Collection interval (sec), for memory cache only. - `INTERVAL`: **60**: Garbage Collection interval (sec), for memory cache only.
- `HOST`: **\<empty\>**: Connection string for `redis` and `memcache`. - `HOST`: **\<empty\>**: Connection string for `redis` and `memcache`.
@ -390,6 +391,12 @@ relation to port exhaustion.
- Memcache: `127.0.0.1:9090;127.0.0.1:9091` - Memcache: `127.0.0.1:9090;127.0.0.1:9091`
- `ITEM_TTL`: **16h**: Time to keep items in cache if not used, Setting it to 0 disables caching. - `ITEM_TTL`: **16h**: Time to keep items in cache if not used, Setting it to 0 disables caching.
## Cache - LastCommitCache settings (`cache.last_commit`)
- `ENABLED`: **true**: Enable the cache.
- `ITEM_TTL`: **8760h**: Time to keep items in cache if not used, Setting it to 0 disables caching.
- `COMMITS_COUNT`: **1000**: Only enable the cache when repository's commits count great than.
## Session (`session`) ## Session (`session`)
- `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, mysql, couchbase, memcache, nodb, postgres\]. - `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, mysql, couchbase, memcache, nodb, postgres\].

View file

@ -148,6 +148,7 @@ menu:
## Cache (`cache`) ## Cache (`cache`)
- `ENABLED`: **true**: 是否启用。
- `ADAPTER`: **memory**: 缓存引擎,可以为 `memory`, `redis``memcache` - `ADAPTER`: **memory**: 缓存引擎,可以为 `memory`, `redis``memcache`
- `INTERVAL`: **60**: 只对内存缓存有效GC间隔单位秒。 - `INTERVAL`: **60**: 只对内存缓存有效GC间隔单位秒。
- `HOST`: **\<empty\>**: 针对redis和memcache有效主机地址和端口。 - `HOST`: **\<empty\>**: 针对redis和memcache有效主机地址和端口。
@ -155,6 +156,12 @@ menu:
- Memache: `127.0.0.1:9090;127.0.0.1:9091` - Memache: `127.0.0.1:9090;127.0.0.1:9091`
- `ITEM_TTL`: **16h**: 缓存项目失效时间,设置为 0 则禁用缓存。 - `ITEM_TTL`: **16h**: 缓存项目失效时间,设置为 0 则禁用缓存。
## Cache - LastCommitCache settings (`cache.last_commit`)
- `ENABLED`: **true**: 是否启用。
- `ITEM_TTL`: **8760h**: 缓存项目失效时间,设置为 0 则禁用缓存。
- `COMMITS_COUNT`: **1000**: 仅当仓库的提交数大于时才启用缓存。
## Session (`session`) ## Session (`session`)
- `PROVIDER`: Session 内容存储方式,可选 `memory`, `file`, `redis``mysql` - `PROVIDER`: Session 内容存储方式,可选 `memory`, `file`, `redis``mysql`

View file

@ -7,8 +7,10 @@ package integrations
import ( import (
"fmt" "fmt"
"net/http" "net/http"
"path"
"strings" "strings"
"testing" "testing"
"time"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -29,12 +31,71 @@ func TestViewRepo(t *testing.T) {
session.MakeRequest(t, req, http.StatusNotFound) session.MakeRequest(t, req, http.StatusNotFound)
} }
func TestViewRepo2(t *testing.T) { func testViewRepo(t *testing.T) {
defer prepareTestEnv(t)() defer prepareTestEnv(t)()
req := NewRequest(t, "GET", "/user3/repo3") req := NewRequest(t, "GET", "/user3/repo3")
session := loginUser(t, "user2") session := loginUser(t, "user2")
session.MakeRequest(t, req, http.StatusOK) resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
files := htmlDoc.doc.Find("#repo-files-table > TBODY > TR")
type file struct {
fileName string
commitID string
commitMsg string
commitTime string
}
var items []file
files.Each(func(i int, s *goquery.Selection) {
tds := s.Find("td")
var f file
tds.Each(func(i int, s *goquery.Selection) {
if i == 0 {
f.fileName = strings.TrimSpace(s.Text())
} else if i == 1 {
a := s.Find("a")
f.commitMsg = strings.TrimSpace(a.Text())
l, _ := a.Attr("href")
f.commitID = path.Base(l)
}
})
f.commitTime, _ = s.Find("span.time-since").Attr("title")
items = append(items, f)
})
assert.EqualValues(t, []file{
{
fileName: "doc",
commitID: "2a47ca4b614a9f5a43abbd5ad851a54a616ffee6",
commitMsg: "init project",
commitTime: time.Date(2017, time.June, 14, 13, 54, 21, 0, time.UTC).Format(time.RFC1123),
},
{
fileName: "README.md",
commitID: "2a47ca4b614a9f5a43abbd5ad851a54a616ffee6",
commitMsg: "init project",
commitTime: time.Date(2017, time.June, 14, 13, 54, 21, 0, time.UTC).Format(time.RFC1123),
},
}, items)
}
func TestViewRepo2(t *testing.T) {
// no last commit cache
testViewRepo(t)
// enable last commit cache for all repositories
oldCommitsCount := setting.CacheService.LastCommit.CommitsCount
setting.CacheService.LastCommit.CommitsCount = 0
// first view will not hit the cache
testViewRepo(t)
// second view will hit the cache
testViewRepo(t)
setting.CacheService.LastCommit.CommitsCount = oldCommitsCount
} }
func TestViewRepo3(t *testing.T) { func TestViewRepo3(t *testing.T) {

View file

@ -16,20 +16,28 @@ import (
_ "gitea.com/macaron/cache/redis" _ "gitea.com/macaron/cache/redis"
) )
var conn mc.Cache var (
conn mc.Cache
)
func newCache(cacheConfig setting.Cache) (mc.Cache, error) {
return mc.NewCacher(cacheConfig.Adapter, mc.Options{
Adapter: cacheConfig.Adapter,
AdapterConfig: cacheConfig.Conn,
Interval: cacheConfig.Interval,
})
}
// NewContext start cache service // NewContext start cache service
func NewContext() error { func NewContext() error {
if setting.CacheService == nil || conn != nil { var err error
return nil
if conn == nil && setting.CacheService.Enabled {
if conn, err = newCache(setting.CacheService.Cache); err != nil {
return err
}
} }
var err error
conn, err = mc.NewCacher(setting.CacheService.Adapter, mc.Options{
Adapter: setting.CacheService.Adapter,
AdapterConfig: setting.CacheService.Conn,
Interval: setting.CacheService.Interval,
})
return err return err
} }

64
modules/cache/last_commit.go vendored Normal file
View file

@ -0,0 +1,64 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package cache
import (
"fmt"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
mc "gitea.com/macaron/cache"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
// LastCommitCache represents a cache to store last commit
type LastCommitCache struct {
repoPath string
ttl int64
repo *git.Repository
commitCache map[string]*object.Commit
mc.Cache
}
// NewLastCommitCache creates a new last commit cache for repo
func NewLastCommitCache(repoPath string, gitRepo *git.Repository, ttl int64) *LastCommitCache {
return &LastCommitCache{
repoPath: repoPath,
repo: gitRepo,
commitCache: make(map[string]*object.Commit),
ttl: ttl,
Cache: conn,
}
}
// Get get the last commit information by commit id and entry path
func (c LastCommitCache) Get(ref, entryPath string) (*object.Commit, error) {
v := c.Cache.Get(fmt.Sprintf("last_commit:%s:%s:%s", c.repoPath, ref, entryPath))
if vs, ok := v.(string); ok {
log.Trace("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs)
if commit, ok := c.commitCache[vs]; ok {
log.Trace("LastCommitCache hit level 2: [%s:%s:%s]", ref, entryPath, vs)
return commit, nil
}
id, err := c.repo.ConvertToSHA1(vs)
if err != nil {
return nil, err
}
commit, err := c.repo.GoGitRepo().CommitObject(id)
if err != nil {
return nil, err
}
c.commitCache[vs] = commit
return commit, nil
}
return nil, nil
}
// Put put the last commit id with commit and entry path
func (c LastCommitCache) Put(ref, entryPath, commitID string) error {
log.Trace("LastCommitCache save: [%s:%s:%s]", ref, entryPath, commitID)
return c.Cache.Put(fmt.Sprintf("last_commit:%s:%s:%s", c.repoPath, ref, entryPath), commitID, c.ttl)
}

View file

@ -4,8 +4,10 @@
package git package git
import "gopkg.in/src-d/go-git.v4/plumbing/object"
// LastCommitCache cache // LastCommitCache cache
type LastCommitCache interface { type LastCommitCache interface {
Get(repoPath, ref, entryPath string) (*Commit, error) Get(ref, entryPath string) (*object.Commit, error)
Put(repoPath, ref, entryPath string, commit *Commit) error Put(ref, entryPath, commitID string) error
} }

View file

@ -5,6 +5,8 @@
package git package git
import ( import (
"path"
"github.com/emirpasic/gods/trees/binaryheap" "github.com/emirpasic/gods/trees/binaryheap"
"gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/object"
@ -30,7 +32,29 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom
return nil, nil, err return nil, nil, err
} }
revs, err := getLastCommitForPaths(c, treePath, entryPaths) var revs map[string]*object.Commit
if cache != nil {
var unHitPaths []string
revs, unHitPaths, err = getLastCommitForPathsByCache(commit.ID.String(), treePath, entryPaths, cache)
if err != nil {
return nil, nil, err
}
if len(unHitPaths) > 0 {
revs2, err := getLastCommitForPaths(c, treePath, unHitPaths)
if err != nil {
return nil, nil, err
}
for k, v := range revs2 {
if err := cache.Put(commit.ID.String(), path.Join(treePath, k), v.ID().String()); err != nil {
return nil, nil, err
}
revs[k] = v
}
}
} else {
revs, err = getLastCommitForPaths(c, treePath, entryPaths)
}
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -127,6 +151,25 @@ func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[
return hashes, nil return hashes, nil
} }
func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache LastCommitCache) (map[string]*object.Commit, []string, error) {
var unHitEntryPaths []string
var results = make(map[string]*object.Commit)
for _, p := range paths {
lastCommit, err := cache.Get(commitID, path.Join(treePath, p))
if err != nil {
return nil, nil, err
}
if lastCommit != nil {
results[p] = lastCommit
continue
}
unHitEntryPaths = append(unHitEntryPaths, p)
}
return results, unHitEntryPaths, nil
}
func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) {
// We do a tree traversal with nodes sorted by commit time // We do a tree traversal with nodes sorted by commit time
heap := binaryheap.NewWith(func(a, b interface{}) int { heap := binaryheap.NewWith(func(a, b interface{}) int {

View file

@ -13,31 +13,71 @@ import (
// Cache represents cache settings // Cache represents cache settings
type Cache struct { type Cache struct {
Enabled bool
Adapter string Adapter string
Interval int Interval int
Conn string Conn string
TTL time.Duration TTL time.Duration `ini:"ITEM_TTL"`
} }
var ( var (
// CacheService the global cache // CacheService the global cache
CacheService *Cache CacheService = struct {
Cache
LastCommit struct {
Enabled bool
TTL time.Duration `ini:"ITEM_TTL"`
CommitsCount int64
} `ini:"cache.last_commit"`
}{
Cache: Cache{
Enabled: true,
Adapter: "memory",
Interval: 60,
TTL: 16 * time.Hour,
},
LastCommit: struct {
Enabled bool
TTL time.Duration `ini:"ITEM_TTL"`
CommitsCount int64
}{
Enabled: true,
TTL: 8760 * time.Hour,
CommitsCount: 1000,
},
}
) )
func newCacheService() { func newCacheService() {
sec := Cfg.Section("cache") sec := Cfg.Section("cache")
CacheService = &Cache{ if err := sec.MapTo(&CacheService); err != nil {
Adapter: sec.Key("ADAPTER").In("memory", []string{"memory", "redis", "memcache"}), log.Fatal("Failed to map Cache settings: %v", err)
} }
CacheService.Adapter = sec.Key("ADAPTER").In("memory", []string{"memory", "redis", "memcache"})
switch CacheService.Adapter { switch CacheService.Adapter {
case "memory": case "memory":
CacheService.Interval = sec.Key("INTERVAL").MustInt(60)
case "redis", "memcache": case "redis", "memcache":
CacheService.Conn = strings.Trim(sec.Key("HOST").String(), "\" ") CacheService.Conn = strings.Trim(sec.Key("HOST").String(), "\" ")
case "": // disable cache
CacheService.Enabled = false
default: default:
log.Fatal("Unknown cache adapter: %s", CacheService.Adapter) log.Fatal("Unknown cache adapter: %s", CacheService.Adapter)
} }
CacheService.TTL = sec.Key("ITEM_TTL").MustDuration(16 * time.Hour)
log.Info("Cache Service Enabled") if CacheService.Enabled {
log.Info("Cache Service Enabled")
}
sec = Cfg.Section("cache.last_commit")
if !CacheService.Enabled {
CacheService.LastCommit.Enabled = false
}
CacheService.LastCommit.CommitsCount = sec.Key("COMMITS_COUNT").MustInt64(1000)
if CacheService.LastCommit.Enabled {
log.Info("Last Commit Cache Service Enabled")
}
} }

View file

@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
@ -49,8 +50,13 @@ func renderDirectory(ctx *context.Context, treeLink string) {
} }
entries.CustomSort(base.NaturalSortLess) entries.CustomSort(base.NaturalSortLess)
var c git.LastCommitCache
if setting.CacheService.LastCommit.Enabled && ctx.Repo.CommitsCount >= setting.CacheService.LastCommit.CommitsCount {
c = cache.NewLastCommitCache(ctx.Repo.Repository.FullName(), ctx.Repo.GitRepo, int64(setting.CacheService.LastCommit.TTL.Seconds()))
}
var latestCommit *git.Commit var latestCommit *git.Commit
ctx.Data["Files"], latestCommit, err = entries.GetCommitsInfo(ctx.Repo.Commit, ctx.Repo.TreePath, nil) ctx.Data["Files"], latestCommit, err = entries.GetCommitsInfo(ctx.Repo.Commit, ctx.Repo.TreePath, c)
if err != nil { if err != nil {
ctx.ServerError("GetCommitsInfo", err) ctx.ServerError("GetCommitsInfo", err)
return return