Halfway done with hot rank indexes.

This commit is contained in:
Dessalines 2021-01-05 23:42:48 -05:00
parent 61c6f33103
commit d227000de3
22 changed files with 227 additions and 87 deletions

3
.gitignore vendored
View file

@ -15,8 +15,7 @@ volumes
# local build files
target
env_setup.sh
query_testing/*.json
query_testing/*.json.old
query_testing/**/reports/*.json
# API tests
api_tests/node_modules

View file

@ -10,6 +10,7 @@ pub struct CommentAggregates {
pub score: i64,
pub upvotes: i64,
pub downvotes: i64,
pub published: chrono::NaiveDateTime,
}
impl CommentAggregates {

View file

@ -10,6 +10,7 @@ pub struct CommunityAggregates {
pub subscribers: i64,
pub posts: i64,
pub comments: i64,
pub published: chrono::NaiveDateTime,
}
impl CommunityAggregates {

View file

@ -11,6 +11,7 @@ pub struct PostAggregates {
pub score: i64,
pub upvotes: i64,
pub downvotes: i64,
pub published: chrono::NaiveDateTime,
pub newest_comment_time: chrono::NaiveDateTime,
}

View file

@ -41,6 +41,7 @@ table! {
score -> Int8,
upvotes -> Int8,
downvotes -> Int8,
published -> Timestamp,
}
}
@ -108,6 +109,7 @@ table! {
subscribers -> Int8,
posts -> Int8,
comments -> Int8,
published -> Timestamp,
}
}
@ -280,6 +282,7 @@ table! {
score -> Int8,
upvotes -> Int8,
downvotes -> Int8,
published -> Timestamp,
newest_comment_time -> Timestamp,
}
}

View file

@ -365,8 +365,8 @@ impl<'a> CommentQueryBuilder<'a> {
query = match self.sort {
SortType::Hot | SortType::Active => query
.order_by(hot_rank(comment_aggregates::score, comment::published).desc())
.then_order_by(comment::published.desc()),
.order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
.then_order_by(comment_aggregates::published.desc()),
SortType::New => query.order_by(comment::published.desc()),
SortType::TopAll => query.order_by(comment_aggregates::score.desc()),
SortType::TopYear => query
@ -614,6 +614,7 @@ mod tests {
score: 1,
upvotes: 1,
downvotes: 0,
published: agg.published,
},
};

View file

@ -349,10 +349,10 @@ impl<'a> PostQueryBuilder<'a> {
.then_order_by(
hot_rank(post_aggregates::score, post_aggregates::newest_comment_time).desc(),
)
.then_order_by(post::published.desc()),
.then_order_by(post_aggregates::newest_comment_time.desc()),
SortType::Hot => query
.then_order_by(hot_rank(post_aggregates::score, post::published).desc())
.then_order_by(post::published.desc()),
.then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc())
.then_order_by(post_aggregates::published.desc()),
SortType::New => query.then_order_by(post::published.desc()),
SortType::TopAll => query.then_order_by(post_aggregates::score.desc()),
SortType::TopYear => query
@ -601,6 +601,7 @@ mod tests {
score: 1,
upvotes: 1,
downvotes: 0,
published: agg.published,
newest_comment_time: inserted_post.published,
},
subscribed: false,

View file

@ -186,9 +186,14 @@ impl<'a> CommunityQueryBuilder<'a> {
// Covers all other sorts, including hot
_ => {
query = query
// TODO do custom sql function for hot_rank, make sure this works
.order_by(hot_rank(community_aggregates::subscribers, community::published).desc())
.then_order_by(community_aggregates::subscribers.desc())
.order_by(
hot_rank(
community_aggregates::subscribers,
community_aggregates::published,
)
.desc(),
)
.then_order_by(community_aggregates::published.desc())
}
};

View file

@ -268,8 +268,8 @@ impl<'a> UserMentionQueryBuilder<'a> {
query = match self.sort {
SortType::Hot | SortType::Active => query
.order_by(hot_rank(comment_aggregates::score, comment::published).desc())
.then_order_by(comment::published.desc()),
.order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
.then_order_by(comment_aggregates::published.desc()),
SortType::New => query.order_by(comment::published.desc()),
SortType::TopAll => query.order_by(comment_aggregates::score.desc()),
SortType::TopYear => query

View file

@ -5,15 +5,17 @@ create table community_aggregates (
subscribers bigint not null default 0,
posts bigint not null default 0,
comments bigint not null default 0,
published timestamp not null default now(),
unique (community_id)
);
insert into community_aggregates (community_id, subscribers, posts, comments)
insert into community_aggregates (community_id, subscribers, posts, comments, published)
select
c.id,
coalesce(cf.subs, 0) as subscribers,
coalesce(cd.posts, 0) as posts,
coalesce(cd.comments, 0) as comments
coalesce(cd.comments, 0) as comments,
c.published
from community c
left join (
select

View file

@ -6,17 +6,19 @@ create table post_aggregates (
score bigint not null default 0,
upvotes bigint not null default 0,
downvotes bigint not null default 0,
published timestamp not null default now(),
newest_comment_time timestamp not null default now(),
unique (post_id)
);
insert into post_aggregates (post_id, comments, score, upvotes, downvotes, newest_comment_time)
insert into post_aggregates (post_id, comments, score, upvotes, downvotes, published, newest_comment_time)
select
p.id,
coalesce(ct.comments, 0::bigint) as comments,
coalesce(pl.score, 0::bigint) as score,
coalesce(pl.upvotes, 0::bigint) as upvotes,
coalesce(pl.downvotes, 0::bigint) as downvotes,
p.published,
greatest(ct.recent_comment_time, p.published) as newest_activity_time
from post p
left join (
@ -64,7 +66,9 @@ begin
update post_aggregates pa
set comments = comments + 1,
newest_comment_time = NEW.published
where pa.post_id = NEW.post_id;
where pa.post_id = NEW.post_id
-- A 2 day necro-bump limit
and published > ('now'::timestamp - '2 days'::interval);
ELSIF (TG_OP = 'DELETE') THEN
-- Join to post because that post may not exist anymore
update post_aggregates pa

View file

@ -5,15 +5,17 @@ create table comment_aggregates (
score bigint not null default 0,
upvotes bigint not null default 0,
downvotes bigint not null default 0,
published timestamp not null default now(),
unique (comment_id)
);
insert into comment_aggregates (comment_id, score, upvotes, downvotes)
insert into comment_aggregates (comment_id, score, upvotes, downvotes, published)
select
c.id,
COALESCE(cl.total, 0::bigint) AS score,
COALESCE(cl.up, 0::bigint) AS upvotes,
COALESCE(cl.down, 0::bigint) AS downvotes
COALESCE(cl.down, 0::bigint) AS downvotes,
c.published
from comment c
left join ( select l.comment_id as id,
sum(l.score) as total,

View file

@ -0,0 +1,25 @@
-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity
create or replace function hot_rank(
score numeric,
published timestamp without time zone)
returns integer as $$
begin
-- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600
return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer;
end; $$
LANGUAGE plpgsql;
drop index
idx_post_published,
idx_post_stickied,
idx_post_aggregates_hot,
idx_post_aggregates_active,
idx_post_aggregates_score,
idx_comment_published,
idx_comment_aggregates_hot,
idx_comment_aggregates_score,
idx_user_published,
idx_user_aggregates_comment_score,
idx_community_published,
idx_community_aggregates_hot,
idx_community_aggregates_subscribers;

View file

@ -0,0 +1,45 @@
-- Need to add immutable to the hot_rank function in order to index by it
-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity
create or replace function hot_rank(
score numeric,
published timestamp without time zone)
returns integer as $$
begin
-- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600
return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer;
end; $$
LANGUAGE plpgsql
IMMUTABLE;
-- Post
create index idx_post_published on post (published desc);
create index idx_post_stickied on post (stickied desc);
-- Post_aggregates
create index idx_post_aggregates_hot on post_aggregates (hot_rank(score, published) desc, published desc);
create index idx_post_aggregates_active on post_aggregates (hot_rank(score, newest_comment_time) desc, newest_comment_time desc);
create index idx_post_aggregates_score on post_aggregates (score desc);
-- Comment
create index idx_comment_published on comment (published desc);
-- Comment_aggregates
create index idx_comment_aggregates_hot on comment_aggregates (hot_rank(score, published) desc, published desc);
create index idx_comment_aggregates_score on comment_aggregates (score desc);
-- User
create index idx_user_published on user_ (published desc);
-- User_aggregates
create index idx_user_aggregates_comment_score on user_aggregates (comment_score desc);
-- Community
create index idx_community_published on community (published desc);
-- Community_aggregates
create index idx_community_aggregates_hot on community_aggregates (hot_rank(subscribers, published) desc, published desc);
create index idx_community_aggregates_subscribers on community_aggregates (subscribers desc);

View file

@ -1,13 +0,0 @@
comment_fast_view.json: "Execution Time": 400.841
comment_view.json: "Execution Time": 2312.899
community_fast_view.json: "Execution Time": 0.272
community_view.json: "Execution Time": 36.572
post_fast_view.json: "Execution Time": 128.839
post_view.json: "Execution Time": 970.671
private_message_view.json: "Execution Time": 1.426
reply_fast_view.json: "Execution Time": 426.179
site_view.json: "Execution Time": 2.453
user_fast.json: "Execution Time": 0.400
user_mention_fast_view.json: "Execution Time": 0.179
user_mention_view.json: "Execution Time": 95.815
user_view.json: "Execution Time": 44.692

View file

@ -1,13 +0,0 @@
comment_fast_view.json: "Execution Time": 3.501
comment_view.json: "Execution Time": 2312.899
community_fast_view.json: "Execution Time": 0.372
community_view.json: "Execution Time": 36.572
post_fast_view.json: "Execution Time": 78.920
post_view.json: "Execution Time": 970.671
private_message_view.json: "Execution Time": 1.426
reply_fast_view.json: "Execution Time": 32.875
site_view.json: "Execution Time": 2.593
user_fast.json: "Execution Time": 0.155
user_mention_fast_view.json: "Execution Time": 0.171
user_mention_view.json: "Execution Time": 1468.291
user_view.json: "Execution Time": 44.692

View file

@ -1,43 +0,0 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
# Do the views first
echo "explain (analyze, format json) select * from user_fast" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_fast.json
echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > post_view.json
echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > post_fast_view.json
echo "explain (analyze, format json) select * from comment_view where user_id is null" > explain.sql
psql -qAt -U lemmy -f explain.sql > comment_view.json
echo "explain (analyze, format json) select * from comment_fast_view where user_id is null" > explain.sql
psql -qAt -U lemmy -f explain.sql > comment_fast_view.json
echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > community_view.json
echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > community_fast_view.json
echo "explain (analyze, format json) select * from site_view limit 1" > explain.sql
psql -qAt -U lemmy -f explain.sql > site_view.json
echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > reply_fast_view.json
echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_mention_view.json
echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_mention_fast_view.json
grep "Execution Time" *.json
rm explain.sql

View file

@ -0,0 +1,49 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
pushd reports
# Do the views first
PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy"
echo "explain (analyze, format json) select * from user_fast limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_fast.json
echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_view.json
echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_fast_view.json
echo "explain (analyze, format json) select * from comment_view where user_id is null limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment_view.json
echo "explain (analyze, format json) select * from comment_fast_view where user_id is null limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment_fast_view.json
echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_view.json
echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_fast_view.json
echo "explain (analyze, format json) select * from site_view limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > site_view.json
echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > reply_fast_view.json
echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention_view.json
echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention_fast_view.json
grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out
rm explain.sql
popd

View file

@ -0,0 +1,11 @@
comment_fast_view.json: "Execution Time": 93.165
comment_view.json: "Execution Time": 4513.485
community_fast_view.json: "Execution Time": 3.998
community_view.json: "Execution Time": 561.814
post_fast_view.json: "Execution Time": 1604.543
post_view.json: "Execution Time": 11630.471
reply_fast_view.json: "Execution Time": 85.708
site_view.json: "Execution Time": 27.264
user_fast.json: "Execution Time": 0.135
user_mention_fast_view.json: "Execution Time": 6.665
user_mention_view.json: "Execution Time": 4996.688

View file

@ -0,0 +1,41 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
pushd reports
PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy"
echo "explain (analyze, format json) select * from user_ limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_.json
echo "explain (analyze, format json) select * from post p limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post.json
echo "explain (analyze, format json) select * from post p, post_aggregates pa where p.id = pa.post_id order by hot_rank(pa.score, pa.published) desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_ordered_by_rank.json
echo "explain (analyze, format json) select * from comment limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment.json
echo "explain (analyze, format json) select * from community limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community.json
echo "explain (analyze, format json) select * from community c, community_aggregates ca where c.id = ca.community_id order by hot_rank(ca.subscribers, ca.published) desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_ordered_by_subscribers.json
echo "explain (analyze, format json) select * from site s" > explain.sql
cat explain.sql | $PSQL_CMD > site.json
echo "explain (analyze, format json) select * from user_mention limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention.json
echo "explain (analyze, format json) select * from private_message limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > private_message.json
grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out
rm explain.sql
popd

View file

@ -0,0 +1,9 @@
comment.json: "Execution Time": 12.263
community.json: "Execution Time": 1.225
community_ordered_by_subscribers.json: "Execution Time": 170.255
post.json: "Execution Time": 5.373
post_ordered_by_rank.json: "Execution Time": 1458.801
private_message.json: "Execution Time": 0.306
site.json: "Execution Time": 0.064
user_.json: "Execution Time": 2.606
user_mention.json: "Execution Time": 0.135

View file

@ -0,0 +1,9 @@
comment.json: "Execution Time": 0.136
community.json: "Execution Time": 0.157
community_ordered_by_subscribers.json: "Execution Time": 16.036
post.json: "Execution Time": 0.129
post_ordered_by_rank.json: "Execution Time": 15.969
private_message.json: "Execution Time": 0.133
site.json: "Execution Time": 0.056
user_.json: "Execution Time": 0.300
user_mention.json: "Execution Time": 0.122