add repeat boost filtering logic, update go-structr, general improvements

This commit is contained in:
kim 2025-04-07 17:46:17 +01:00
parent 55e03f0164
commit 6c82766e6b
10 changed files with 153 additions and 114 deletions

2
go.mod
View file

@ -26,7 +26,7 @@ require (
codeberg.org/gruf/go-runners v1.6.3
codeberg.org/gruf/go-sched v1.2.4
codeberg.org/gruf/go-storage v0.2.0
codeberg.org/gruf/go-structr v0.9.6
codeberg.org/gruf/go-structr v0.9.7
codeberg.org/superseriousbusiness/activity v1.13.0-gts
codeberg.org/superseriousbusiness/exif-terminator v0.10.0
codeberg.org/superseriousbusiness/httpsig v1.3.0-SSB

4
go.sum generated
View file

@ -36,8 +36,8 @@ codeberg.org/gruf/go-sched v1.2.4 h1:ddBB9o0D/2oU8NbQ0ldN5aWxogpXPRBATWi58+p++Hw
codeberg.org/gruf/go-sched v1.2.4/go.mod h1:wad6l+OcYGWMA2TzNLMmLObsrbBDxdJfEy5WvTgBjNk=
codeberg.org/gruf/go-storage v0.2.0 h1:mKj3Lx6AavEkuXXtxqPhdq+akW9YwrnP16yQBF7K5ZI=
codeberg.org/gruf/go-storage v0.2.0/go.mod h1:o3GzMDE5QNUaRnm/daUzFqvuAaC4utlgXDXYO79sWKU=
codeberg.org/gruf/go-structr v0.9.6 h1:FSbJ1A0ubTQB82rC0K4o6qyiqrDGH1t9ivttm8Zy64o=
codeberg.org/gruf/go-structr v0.9.6/go.mod h1:9k5hYztZ4PsBS+m1v5hUTeFiVUBTLF5VA7d9cd1OEMs=
codeberg.org/gruf/go-structr v0.9.7 h1:yQeIxTjYb6reNdgESk915twyjolydYBqat/mlZrP7bg=
codeberg.org/gruf/go-structr v0.9.7/go.mod h1:9k5hYztZ4PsBS+m1v5hUTeFiVUBTLF5VA7d9cd1OEMs=
codeberg.org/superseriousbusiness/activity v1.13.0-gts h1:4WZLc/SNt+Vt5x2UjL2n6V5dHlIL9ECudUPx8Ld5rxw=
codeberg.org/superseriousbusiness/activity v1.13.0-gts/go.mod h1:enxU1Lva4OcK6b/NBXscoHSEgEMsKJvdHrQFifQxp4o=
codeberg.org/superseriousbusiness/exif-terminator v0.10.0 h1:FiLX/AK07tzceS36I+kOP2aEH+aytjPSIlFoYePMEyg=

View file

@ -212,8 +212,8 @@ func (c *Caches) Sweep(threshold float64) {
c.DB.User.Trim(threshold)
c.DB.UserMute.Trim(threshold)
c.DB.UserMuteIDs.Trim(threshold)
c.Timelines.Home.Trim(threshold)
c.Timelines.List.Trim(threshold)
c.Timelines.Home.Trim()
c.Timelines.List.Trim()
c.Visibility.Trim(threshold)
}

View file

@ -23,18 +23,18 @@ import (
)
type TimelineCaches struct {
// Home ...
// Home provides a concurrency-safe map of status timeline
// caches for home timelines, keyed by home's account ID.
Home timeline.StatusTimelines
// List ...
// List provides a concurrency-safe map of status
// timeline caches for lists, keyed by list ID.
List timeline.StatusTimelines
}
func (c *Caches) initHomeTimelines() {
// Per-user cache
// so use smaller.
cap := 400
// TODO: configurable
cap := 800
log.Infof(nil, "cache size = %d", cap)
@ -42,9 +42,8 @@ func (c *Caches) initHomeTimelines() {
}
func (c *Caches) initListTimelines() {
// Per-user cache
// so use smaller.
cap := 400
// TODO: configurable
cap := 800
log.Infof(nil, "cache size = %d", cap)

View file

@ -43,6 +43,12 @@ type StatusMeta struct {
BoostOfAccountID string
Local bool
// is an internal flag that may be set on
// a StatusMeta object that will prevent
// preparation of its apimodel.Status, due
// to it being a recently repeated boost.
repeatBoost bool
// prepared contains prepared frontend API
// model for the referenced status. This may
// or may-not be nil depending on whether the
@ -50,7 +56,7 @@ type StatusMeta struct {
// call to "prepare" the frontend model.
prepared *apimodel.Status
// Loaded is a temporary field that may be
// loaded is a temporary field that may be
// set for a newly loaded timeline status
// so that statuses don't need to be loaded
// from the database twice in succession.
@ -61,16 +67,18 @@ type StatusMeta struct {
loaded *gtsmodel.Status
}
// StatusTimelines ...
// StatusTimelines is a concurrency safe map of StatusTimeline{}
// objects, optimizing *very heavily* for reads over writes.
type StatusTimelines struct {
ptr atomic.Pointer[map[string]*StatusTimeline] // ronly except by CAS
cap int
}
// Init ...
// Init stores the given argument(s) such that any created StatusTimeline{}
// objects by MustGet() will initialize them with the given arguments.
func (t *StatusTimelines) Init(cap int) { t.cap = cap }
// MustGet ...
// MustGet will attempt to fetch StatusTimeline{} stored under key, else creating one.
func (t *StatusTimelines) MustGet(key string) *StatusTimeline {
var tt *StatusTimeline
@ -121,7 +129,7 @@ func (t *StatusTimelines) MustGet(key string) *StatusTimeline {
}
}
// Delete ...
// Delete will delete the stored StatusTimeline{} under key, if any.
func (t *StatusTimelines) Delete(key string) {
for {
// Load current ptr.
@ -153,7 +161,7 @@ func (t *StatusTimelines) Delete(key string) {
}
}
// RemoveByStatusIDs ...
// RemoveByStatusIDs calls RemoveByStatusIDs() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) RemoveByStatusIDs(statusIDs ...string) {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -162,7 +170,7 @@ func (t *StatusTimelines) RemoveByStatusIDs(statusIDs ...string) {
}
}
// RemoveByAccountIDs ...
// RemoveByAccountIDs calls RemoveByAccountIDs() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) RemoveByAccountIDs(accountIDs ...string) {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -171,7 +179,7 @@ func (t *StatusTimelines) RemoveByAccountIDs(accountIDs ...string) {
}
}
// UnprepareByStatusIDs ...
// UnprepareByStatusIDs calls UnprepareByStatusIDs() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) UnprepareByStatusIDs(statusIDs ...string) {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -180,7 +188,7 @@ func (t *StatusTimelines) UnprepareByStatusIDs(statusIDs ...string) {
}
}
// UnprepareByAccountIDs ...
// UnprepareByAccountIDs calls UnprepareByAccountIDs() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) UnprepareByAccountIDs(accountIDs ...string) {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -189,7 +197,7 @@ func (t *StatusTimelines) UnprepareByAccountIDs(accountIDs ...string) {
}
}
// Unprepare ...
// Unprepare attempts to call UnprepareAll() for StatusTimeline{} under key.
func (t *StatusTimelines) Unprepare(key string) {
if p := t.ptr.Load(); p != nil {
if tt := (*p)[key]; tt != nil {
@ -198,7 +206,7 @@ func (t *StatusTimelines) Unprepare(key string) {
}
}
// UnprepareAll ...
// UnprepareAll calls UnprepareAll() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) UnprepareAll() {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -207,16 +215,16 @@ func (t *StatusTimelines) UnprepareAll() {
}
}
// Trim ...
func (t *StatusTimelines) Trim(threshold float64) {
// Trim calls Trim() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) Trim() {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
tt.Trim(threshold)
tt.Trim()
}
}
}
// Clear ...
// Clear attempts to call Clear() for StatusTimeline{} under key.
func (t *StatusTimelines) Clear(key string) {
if p := t.ptr.Load(); p != nil {
if tt := (*p)[key]; tt != nil {
@ -225,7 +233,7 @@ func (t *StatusTimelines) Clear(key string) {
}
}
// ClearAll ...
// ClearAll calls Clear() for each of the stored StatusTimeline{}s.
func (t *StatusTimelines) ClearAll() {
if p := t.ptr.Load(); p != nil {
for _, tt := range *p {
@ -234,7 +242,12 @@ func (t *StatusTimelines) ClearAll() {
}
}
// StatusTimeline ...
// StatusTimeline provides a concurrency-safe timeline
// cache of status information. Internally only StatusMeta{}
// objects are stored, and the statuses themselves are loaded
// as-needed, caching prepared frontend representations where
// possible. This is largely wrapping code for our own codebase
// to be able to smoothly interact with structr.Timeline{}.
type StatusTimeline struct {
// underlying timeline cache of *StatusMeta{},
@ -247,23 +260,16 @@ type StatusTimeline struct {
idx_BoostOfID *structr.Index //nolint:revive
idx_BoostOfAccountID *structr.Index //nolint:revive
// lasOrder stores the last fetched direction
// of the timeline, which in turn determines
// where we will next trim from in keeping the
// timeline underneath configured 'max'.
// cutoff and maximum item lengths.
// the timeline is trimmed back to
// cutoff on each call to Trim(),
// and maximum len triggers a Trim().
//
// TODO: this could be more intelligent with
// a sliding average. a problem for future kim!
lastOrder atomic.Pointer[structr.Direction]
// defines the 'maximum' count of
// entries in the timeline that we
// apply our Trim() call threshold
// to. the timeline itself does not
// the timeline itself does not
// limit items due to complexities
// it would introduce, so we apply
// a 'cut-off' at regular intervals.
max int
cut, max int
}
// Init will initialize the timeline for usage,
@ -294,6 +300,7 @@ func (t *StatusTimeline) Init(cap int) {
AccountID: s.AccountID,
BoostOfID: s.BoostOfID,
BoostOfAccountID: s.BoostOfAccountID,
repeatBoost: s.repeatBoost,
loaded: nil, // NEVER stored
prepared: prepared,
}
@ -306,7 +313,9 @@ func (t *StatusTimeline) Init(cap int) {
t.idx_BoostOfID = t.cache.Index("BoostOfID")
t.idx_BoostOfAccountID = t.cache.Index("BoostOfAccountID")
// Set max.
// Set maximum capacity and
// cutoff threshold we trim to.
t.cut = int(0.60 * float64(cap))
t.max = cap
}
@ -347,11 +356,6 @@ func (t *StatusTimeline) Load(
panic("nil load page func")
}
// TODO: there's quite a few opportunities for
// optimization here, with a lot of frequently
// used slices of the same types. depending on
// profiles it may be advantageous to pool some.
// Get paging details.
lo := page.Min.Value
hi := page.Max.Value
@ -376,9 +380,6 @@ func (t *StatusTimeline) Load(
dir,
)
// Mark last select order.
t.lastOrder.Store(&dir)
// We now reset the lo,hi values to
// represent the lowest and highest
// index values of loaded statuses.
@ -506,9 +507,9 @@ func (t *StatusTimeline) Load(
}
if len(justLoaded) > 0 {
// Even if we don't return them, insert
// the excess (post-filtered) into cache.
t.cache.Insert(justLoaded...)
// Even if not returning them, insert
// the excess (filtered) into cache.
t.insert(justLoaded...)
}
return apiStatuses, lo, hi, nil
@ -643,22 +644,48 @@ func LoadStatusTimeline(
return apiStatuses, lo, hi, nil
}
// InsertOne allows you to insert a single status into the timeline, with optional prepared API model.
func (t *StatusTimeline) InsertOne(status *gtsmodel.Status, prepared *apimodel.Status) {
t.cache.Insert(&StatusMeta{
// InsertOne allows you to insert a single status into the timeline, with optional prepared API model,
// the return value indicates whether the passed status has been boosted recently on the timeline.
func (t *StatusTimeline) InsertOne(status *gtsmodel.Status, prepared *apimodel.Status) (repeatBoost bool) {
if status.BoostOfID != "" {
const repeatBoostDepth = 40
// Check through top $repeatBoostDepth number of timeline items.
for i, value := range t.cache.RangeUnsafe(structr.Desc) {
if i >= repeatBoostDepth {
break
}
// If inserted status has already been boosted, or original was posted
// within last $repeatBoostDepth, we indicate it as a repeated boost.
if value.ID == status.BoostOfID || value.BoostOfID == status.BoostOfID {
repeatBoost = true
break
}
}
}
// Insert into timeline.
t.insert(&StatusMeta{
ID: status.ID,
AccountID: status.AccountID,
BoostOfID: status.BoostOfID,
BoostOfAccountID: status.BoostOfAccountID,
Local: *status.Local,
loaded: status,
repeatBoost: repeatBoost,
loaded: nil,
prepared: prepared,
})
return
}
// Insert allows you to bulk insert many statuses into the timeline.
func (t *StatusTimeline) Insert(statuses ...*gtsmodel.Status) {
t.cache.Insert(toStatusMeta(statuses)...)
func (t *StatusTimeline) insert(metas ...*StatusMeta) {
if t.cache.Insert(metas...) > t.max {
// If cache reached beyond
// maximum, perform a trim.
t.Trim()
}
}
// RemoveByStatusID removes all cached timeline entries pertaining to
@ -784,33 +811,16 @@ func (t *StatusTimeline) UnprepareByAccountIDs(accountIDs ...string) {
// UnprepareAll removes cached frontend API
// models for all cached timeline entries.
func (t *StatusTimeline) UnprepareAll() {
for value := range t.cache.RangeUnsafe(structr.Asc) {
for _, value := range t.cache.RangeUnsafe(structr.Asc) {
value.prepared = nil
}
}
// Trim will ensure that receiving timeline is less than or
// equal in length to the given threshold percentage of the
// timeline's preconfigured maximum capacity. This will trim
// from top / bottom depending on which was recently accessed.
func (t *StatusTimeline) Trim(threshold float64) {
// Default trim dir.
dir := structr.Asc
// Calculate maximum allowed no.
// items as a percentage of max.
max := threshold * float64(t.max)
// Load last fetched timeline ordering,
// using the inverse value for trimming.
if p := t.lastOrder.Load(); p != nil {
dir = !(*p)
}
// Trim timeline to 'max'.
t.cache.Trim(int(max), dir)
}
// timeline's preconfigured maximum capacity. This will always
// trim from the bottom-up to prioritize streamed inserts.
func (t *StatusTimeline) Trim() { t.cache.Trim(t.cut, structr.Asc) }
// Clear will remove all cached entries from underlying timeline.
func (t *StatusTimeline) Clear() { t.cache.Trim(0, structr.Desc) }
@ -845,6 +855,12 @@ func prepareStatuses(
continue
}
if meta.repeatBoost {
// This is a repeat boost in
// short timespan, skip it.
continue
}
if meta.prepared == nil {
var err error

View file

@ -383,19 +383,21 @@ func (s *Surface) timelineStatus(
}
// Insert status to timeline cache regardless of
// if API model was successfully prepared or not.
timeline.InsertOne(status, apiModel)
// if API model was succesfully prepared or not.
repeatBoost := timeline.InsertOne(status, apiModel)
if apiModel != nil {
// Only send the status to user's stream if not
// filtered / muted, i.e. successfully prepared model.
s.Stream.Update(ctx, account, apiModel, streamType)
return true
if apiModel == nil {
// Status was
// filtered / muted.
return false
}
// Status was
// filtered / muted.
return false
if !repeatBoost {
// Only stream if not repeated boost of recent status.
s.Stream.Update(ctx, account, apiModel, streamType)
}
return true
}
// timelineAndNotifyStatusForTagFollowers inserts the status into the

View file

@ -1,6 +1,7 @@
package structr
import (
"fmt"
"os"
"reflect"
"strings"
@ -222,10 +223,10 @@ func (i *Index) get(key string, hook func(*indexed_item)) {
func (i *Index) key(buf *byteutil.Buffer, parts []unsafe.Pointer) string {
buf.B = buf.B[:0]
if len(parts) != len(i.fields) {
panicf("incorrect number key parts: want=%d received=%d",
panic(fmt.Sprintf("incorrect number key parts: want=%d received=%d",
len(i.fields),
len(parts),
)
))
}
if !allow_zero(i.flags) {
for x, field := range i.fields {

View file

@ -70,7 +70,7 @@ func find_field(t reflect.Type, names []string) (sfield struct_field) {
name := names[0]
names = names[1:]
if !is_exported(name) {
panicf("field is not exported: %s", name)
panic(fmt.Sprintf("field is not exported: %s", name))
}
return name
}
@ -94,7 +94,7 @@ func find_field(t reflect.Type, names []string) (sfield struct_field) {
// Check for valid struct type.
if t.Kind() != reflect.Struct {
panicf("field %s is not struct (or ptr-to): %s", t, name)
panic(fmt.Sprintf("field %s is not struct (or ptr-to): %s", t, name))
}
var ok bool
@ -102,7 +102,7 @@ func find_field(t reflect.Type, names []string) (sfield struct_field) {
// Look for next field by name.
field, ok = t.FieldByName(name)
if !ok {
panicf("unknown field: %s", name)
panic(fmt.Sprintf("unknown field: %s", name))
}
// Set next offset value.
@ -258,11 +258,6 @@ func eface_data(a any) unsafe.Pointer {
return (*eface)(unsafe.Pointer(&a)).data
}
// panicf provides a panic with string formatting.
func panicf(format string, args ...any) {
panic(fmt.Sprintf(format, args...))
}
// assert can be called to indicated a block
// of code should not be able to be reached,
// it returns a BUG report with callsite.

View file

@ -190,7 +190,8 @@ func (t *Timeline[T, PK]) Select(min, max *PK, length *int, dir Direction) (valu
// Insert will insert the given values into the timeline,
// calling any set invalidate hook on each inserted value.
func (t *Timeline[T, PK]) Insert(values ...T) {
// Returns current list length after performing inserts.
func (t *Timeline[T, PK]) Insert(values ...T) int {
// Acquire lock.
t.mutex.Lock()
@ -269,6 +270,10 @@ func (t *Timeline[T, PK]) Insert(values ...T) {
// Get func ptrs.
invalid := t.invalid
// Get length AFTER
// insert to return.
len := t.list.len
// Done with lock.
t.mutex.Unlock()
@ -279,6 +284,8 @@ func (t *Timeline[T, PK]) Insert(values ...T) {
invalid(value)
}
}
return len
}
// Invalidate invalidates all entries stored in index under given keys.
@ -336,8 +343,8 @@ func (t *Timeline[T, PK]) Invalidate(index *Index, keys ...Key) {
//
// Please note that the entire Timeline{} will be locked for the duration of the range
// operation, i.e. from the beginning of the first yield call until the end of the last.
func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(T) bool) {
return func(yield func(T) bool) {
func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(index int, value T) bool) {
return func(yield func(int, T) bool) {
if t.copy == nil {
panic("not initialized")
} else if yield == nil {
@ -348,7 +355,9 @@ func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(T) bool) {
t.mutex.Lock()
defer t.mutex.Unlock()
var i int
switch dir {
case Asc:
// Iterate through linked list from bottom (i.e. tail).
for prev := t.list.tail; prev != nil; prev = prev.prev {
@ -360,9 +369,12 @@ func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(T) bool) {
value := t.copy(item.data.(T))
// Pass to given function.
if !yield(value) {
if !yield(i, value) {
break
}
// Iter
i++
}
case Desc:
@ -376,9 +388,12 @@ func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(T) bool) {
value := t.copy(item.data.(T))
// Pass to given function.
if !yield(value) {
if !yield(i, value) {
break
}
// Iter
i++
}
}
}
@ -390,8 +405,8 @@ func (t *Timeline[T, PK]) Range(dir Direction) func(yield func(T) bool) {
//
// Please note that the entire Timeline{} will be locked for the duration of the range
// operation, i.e. from the beginning of the first yield call until the end of the last.
func (t *Timeline[T, PK]) RangeUnsafe(dir Direction) func(yield func(T) bool) {
return func(yield func(T) bool) {
func (t *Timeline[T, PK]) RangeUnsafe(dir Direction) func(yield func(index int, value T) bool) {
return func(yield func(int, T) bool) {
if t.copy == nil {
panic("not initialized")
} else if yield == nil {
@ -402,7 +417,9 @@ func (t *Timeline[T, PK]) RangeUnsafe(dir Direction) func(yield func(T) bool) {
t.mutex.Lock()
defer t.mutex.Unlock()
var i int
switch dir {
case Asc:
// Iterate through linked list from bottom (i.e. tail).
for prev := t.list.tail; prev != nil; prev = prev.prev {
@ -411,9 +428,12 @@ func (t *Timeline[T, PK]) RangeUnsafe(dir Direction) func(yield func(T) bool) {
item := (*timeline_item)(prev.data)
// Pass to given function.
if !yield(item.data.(T)) {
if !yield(i, item.data.(T)) {
break
}
// Iter
i++
}
case Desc:
@ -424,9 +444,12 @@ func (t *Timeline[T, PK]) RangeUnsafe(dir Direction) func(yield func(T) bool) {
item := (*timeline_item)(next.data)
// Pass to given function.
if !yield(item.data.(T)) {
if !yield(i, item.data.(T)) {
break
}
// Iter
i++
}
}
}
@ -1033,6 +1056,9 @@ indexing:
// checking for collisions.
if !idx.add(key, i_item) {
// This key already appears
// in this unique index. So
// drop new timeline item.
t.delete(t_item)
free_buffer(buf)
return last

2
vendor/modules.txt vendored
View file

@ -60,7 +60,7 @@ codeberg.org/gruf/go-storage/disk
codeberg.org/gruf/go-storage/internal
codeberg.org/gruf/go-storage/memory
codeberg.org/gruf/go-storage/s3
# codeberg.org/gruf/go-structr v0.9.6
# codeberg.org/gruf/go-structr v0.9.7
## explicit; go 1.22
codeberg.org/gruf/go-structr
# codeberg.org/superseriousbusiness/activity v1.13.0-gts