From 8cb5939f5048c3eab293884923d4c3d5fcc08e2f Mon Sep 17 00:00:00 2001 From: Dessalines Date: Thu, 8 Jun 2023 16:15:15 -0400 Subject: [PATCH] Adding hot_rank columns in place of function sorting. (#2952) * Adding hot_rank columns in place of function sorting. - Creates hot_rank columns for post, comment, and community. - Fixes #2932 * Updating all hot ranks on startup. * Fixing post.url migration. * Removing update_instance_software from startup. * Adding post_rank query --- crates/db_schema/src/aggregates/structs.rs | 4 + crates/db_schema/src/schema.rs | 7 +- crates/db_views/src/comment_report_view.rs | 1 + crates/db_views/src/comment_view.rs | 7 +- crates/db_views/src/post_report_view.rs | 2 + crates/db_views/src/post_view.rs | 18 +-- .../db_views_actor/src/comment_reply_view.rs | 6 +- crates/db_views_actor/src/community_view.rs | 2 +- .../db_views_actor/src/person_mention_view.rs | 6 +- .../2023-06-06-104440_index_post_url/down.sql | 4 + .../2023-06-06-104440_index_post_url/up.sql | 8 ++ .../down.sql | 35 ++++++ .../up.sql | 51 ++++++++ scripts/query_testing/post_query_hot_rank.sh | 3 + src/scheduled_tasks.rs | 118 +++++++++++------- 15 files changed, 199 insertions(+), 73 deletions(-) create mode 100644 migrations/2023-06-07-105918_add_hot_rank_columns/down.sql create mode 100644 migrations/2023-06-07-105918_add_hot_rank_columns/up.sql create mode 100755 scripts/query_testing/post_query_hot_rank.sh diff --git a/crates/db_schema/src/aggregates/structs.rs b/crates/db_schema/src/aggregates/structs.rs index d75098c59..592c63abb 100644 --- a/crates/db_schema/src/aggregates/structs.rs +++ b/crates/db_schema/src/aggregates/structs.rs @@ -27,6 +27,7 @@ pub struct CommentAggregates { pub published: chrono::NaiveDateTime, /// The total number of children in this comment branch. pub child_count: i32, + pub hot_rank: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] @@ -53,6 +54,7 @@ pub struct CommunityAggregates { pub users_active_month: i64, /// The number of users with any activity in the last year. pub users_active_half_year: i64, + pub hot_rank: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)] @@ -92,6 +94,8 @@ pub struct PostAggregates { pub featured_community: bool, /// If the post is featured on the site / to local. pub featured_local: bool, + pub hot_rank: i32, + pub hot_rank_active: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index 6fe139126..ac4ddc47a 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -95,6 +95,7 @@ diesel::table! { downvotes -> Int8, published -> Timestamp, child_count -> Int4, + hot_rank -> Int4, } } @@ -191,6 +192,7 @@ diesel::table! { users_active_week -> Int8, users_active_month -> Int8, users_active_half_year -> Int8, + hot_rank -> Int4, } } @@ -634,7 +636,8 @@ diesel::table! { id -> Int4, #[max_length = 200] name -> Varchar, - url -> Nullable, + #[max_length = 512] + url -> Nullable, body -> Nullable, creator_id -> Int4, community_id -> Int4, @@ -670,6 +673,8 @@ diesel::table! { newest_comment_time -> Timestamp, featured_community -> Bool, featured_local -> Bool, + hot_rank -> Int4, + hot_rank_active -> Int4, } } diff --git a/crates/db_views/src/comment_report_view.rs b/crates/db_views/src/comment_report_view.rs index 8d4ae54ee..4a5397b32 100644 --- a/crates/db_views/src/comment_report_view.rs +++ b/crates/db_views/src/comment_report_view.rs @@ -477,6 +477,7 @@ mod tests { downvotes: 0, published: agg.published, child_count: 0, + hot_rank: 1728, }, my_vote: None, resolver: None, diff --git a/crates/db_views/src/comment_view.rs b/crates/db_views/src/comment_view.rs index 3657d9429..6ac629b5a 100644 --- a/crates/db_views/src/comment_view.rs +++ b/crates/db_views/src/comment_view.rs @@ -36,7 +36,7 @@ use lemmy_db_schema::{ post::Post, }, traits::JoinView, - utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool}, + utils::{fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool}, CommentSortType, ListingType, }; @@ -346,9 +346,7 @@ impl<'a> CommentQuery<'a> { }; query = match self.sort.unwrap_or(CommentSortType::Hot) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), CommentSortType::New => query.then_order_by(comment::published.desc()), CommentSortType::Old => query.then_order_by(comment::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), @@ -909,6 +907,7 @@ mod tests { downvotes: 0, published: agg.published, child_count: 5, + hot_rank: 1728, }, } } diff --git a/crates/db_views/src/post_report_view.rs b/crates/db_views/src/post_report_view.rs index 14a91fc2d..19822c795 100644 --- a/crates/db_views/src/post_report_view.rs +++ b/crates/db_views/src/post_report_view.rs @@ -468,6 +468,8 @@ mod tests { newest_comment_time: inserted_post.published, featured_community: false, featured_local: false, + hot_rank: 1728, + hot_rank_active: 1728, }, resolver: None, }; diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index f822f76f5..2c1b9d1df 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -40,7 +40,7 @@ use lemmy_db_schema::{ post::{Post, PostRead, PostSaved}, }, traits::JoinView, - utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset, DbPool}, + utils::{fuzzy_search, get_conn, limit_and_offset, DbPool}, ListingType, SortType, }; @@ -387,18 +387,8 @@ impl<'a> PostQuery<'a> { } query = match self.sort.unwrap_or(SortType::Hot) { - SortType::Active => query - .then_order_by( - hot_rank( - post_aggregates::score, - post_aggregates::newest_comment_time_necro, - ) - .desc(), - ) - .then_order_by(post_aggregates::newest_comment_time_necro.desc()), - SortType::Hot => query - .then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc()) - .then_order_by(post_aggregates::published.desc()), + SortType::Active => query.then_order_by(post_aggregates::hot_rank_active.desc()), + SortType::Hot => query.then_order_by(post_aggregates::hot_rank.desc()), SortType::New => query.then_order_by(post_aggregates::published.desc()), SortType::Old => query.then_order_by(post_aggregates::published.asc()), SortType::NewComments => query.then_order_by(post_aggregates::newest_comment_time.desc()), @@ -945,6 +935,8 @@ mod tests { newest_comment_time: inserted_post.published, featured_community: false, featured_local: false, + hot_rank: 1728, + hot_rank_active: 1728, }, subscribed: SubscribedType::NotSubscribed, read: false, diff --git a/crates/db_views_actor/src/comment_reply_view.rs b/crates/db_views_actor/src/comment_reply_view.rs index 2a3c58d6c..42c3a53e0 100644 --- a/crates/db_views_actor/src/comment_reply_view.rs +++ b/crates/db_views_actor/src/comment_reply_view.rs @@ -33,7 +33,7 @@ use lemmy_db_schema::{ post::Post, }, traits::JoinView, - utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool}, + utils::{get_conn, limit_and_offset, DbPool}, CommentSortType, }; use typed_builder::TypedBuilder; @@ -266,9 +266,7 @@ impl<'a> CommentReplyQuery<'a> { }; query = match self.sort.unwrap_or(CommentSortType::New) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), CommentSortType::New => query.then_order_by(comment_reply::published.desc()), CommentSortType::Old => query.then_order_by(comment_reply::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), diff --git a/crates/db_views_actor/src/community_view.rs b/crates/db_views_actor/src/community_view.rs index 9d48f17d6..abab023a6 100644 --- a/crates/db_views_actor/src/community_view.rs +++ b/crates/db_views_actor/src/community_view.rs @@ -181,7 +181,7 @@ impl<'a> CommunityQuery<'a> { SortType::TopAll => query = query.order_by(community_aggregates::subscribers.desc()), SortType::TopMonth => query = query.order_by(community_aggregates::users_active_month.desc()), SortType::Hot => { - query = query.order_by(community_aggregates::users_active_month.desc()); + query = query.order_by(community_aggregates::hot_rank.desc()); // Don't show hidden communities in Hot (trending) query = query.filter( community::hidden diff --git a/crates/db_views_actor/src/person_mention_view.rs b/crates/db_views_actor/src/person_mention_view.rs index 1b0e6415e..803abdb39 100644 --- a/crates/db_views_actor/src/person_mention_view.rs +++ b/crates/db_views_actor/src/person_mention_view.rs @@ -34,7 +34,7 @@ use lemmy_db_schema::{ post::Post, }, traits::JoinView, - utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool}, + utils::{get_conn, limit_and_offset, DbPool}, CommentSortType, }; use typed_builder::TypedBuilder; @@ -271,9 +271,7 @@ impl<'a> PersonMentionQuery<'a> { }; query = match self.sort.unwrap_or(CommentSortType::Hot) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), CommentSortType::New => query.then_order_by(comment::published.desc()), CommentSortType::Old => query.then_order_by(comment::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), diff --git a/migrations/2023-06-06-104440_index_post_url/down.sql b/migrations/2023-06-06-104440_index_post_url/down.sql index 5a60b131b..ccc9e938f 100644 --- a/migrations/2023-06-06-104440_index_post_url/down.sql +++ b/migrations/2023-06-06-104440_index_post_url/down.sql @@ -1 +1,5 @@ +-- Change back the column type +alter table post alter column url type text; + +-- Drop the index drop index idx_post_url; diff --git a/migrations/2023-06-06-104440_index_post_url/up.sql b/migrations/2023-06-06-104440_index_post_url/up.sql index 4f8ea66ff..ce3532887 100644 --- a/migrations/2023-06-06-104440_index_post_url/up.sql +++ b/migrations/2023-06-06-104440_index_post_url/up.sql @@ -1 +1,9 @@ +-- Make a hard limit of 512 for the post.url column +-- Truncate existing long rows. +update post set url = left(url, 512) where length(url) > 512; + +-- Enforce the limit +alter table post alter column url type varchar (512); + +-- Add the index create index idx_post_url on post(url); diff --git a/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql b/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql new file mode 100644 index 000000000..71529bdf0 --- /dev/null +++ b/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql @@ -0,0 +1,35 @@ + +-- Remove the new columns + +alter table post_aggregates drop column hot_rank; +alter table post_aggregates drop column hot_rank_active; + +alter table comment_aggregates drop column hot_rank; + +alter table community_aggregates drop column hot_rank; + +-- Drop some new indexes +drop index idx_post_aggregates_score; +drop index idx_post_aggregates_published; +drop index idx_post_aggregates_newest_comment_time; +drop index idx_post_aggregates_newest_comment_time_necro; +drop index idx_post_aggregates_featured_community; +drop index idx_post_aggregates_featured_local; + +-- Recreate the old indexes +CREATE INDEX idx_post_aggregates_featured_local_newest_comment_time ON public.post_aggregates USING btree (featured_local DESC, newest_comment_time DESC); +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time ON public.post_aggregates USING btree (featured_community DESC, newest_comment_time DESC); +CREATE INDEX idx_post_aggregates_featured_local_comments ON public.post_aggregates USING btree (featured_local DESC, comments DESC); +CREATE INDEX idx_post_aggregates_featured_community_comments ON public.post_aggregates USING btree (featured_community DESC, comments DESC); +CREATE INDEX idx_post_aggregates_featured_local_hot ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, published) DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_community_hot ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, published) DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_local_score ON public.post_aggregates USING btree (featured_local DESC, score DESC); +CREATE INDEX idx_post_aggregates_featured_community_score ON public.post_aggregates USING btree (featured_community DESC, score DESC); +CREATE INDEX idx_post_aggregates_featured_local_published ON public.post_aggregates USING btree (featured_local DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_community_published ON public.post_aggregates USING btree (featured_community DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_local_active ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC); +CREATE INDEX idx_post_aggregates_featured_community_active ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC); + +CREATE INDEX idx_comment_aggregates_hot ON public.comment_aggregates USING btree (hot_rank((score)::numeric, published) DESC, published DESC); + +CREATE INDEX idx_community_aggregates_hot ON public.community_aggregates USING btree (hot_rank((subscribers)::numeric, published) DESC, published DESC); diff --git a/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql b/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql new file mode 100644 index 000000000..92f03e5f3 --- /dev/null +++ b/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql @@ -0,0 +1,51 @@ +-- This converts the old hot_rank functions, to columns + +-- Remove the old compound indexes +DROP INDEX idx_post_aggregates_featured_local_newest_comment_time; +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time; +DROP INDEX idx_post_aggregates_featured_local_comments; +DROP INDEX idx_post_aggregates_featured_community_comments; +DROP INDEX idx_post_aggregates_featured_local_hot; +DROP INDEX idx_post_aggregates_featured_community_hot; +DROP INDEX idx_post_aggregates_featured_local_score; +DROP INDEX idx_post_aggregates_featured_community_score; +DROP INDEX idx_post_aggregates_featured_local_published; +DROP INDEX idx_post_aggregates_featured_community_published; +DROP INDEX idx_post_aggregates_featured_local_active; +DROP INDEX idx_post_aggregates_featured_community_active; + +DROP INDEX idx_comment_aggregates_hot; + +DROP INDEX idx_community_aggregates_hot; + +-- Add the new hot rank columns for post and comment aggregates +-- Note: 1728 is the result of the hot_rank function, with a score of 1, posted now +-- hot_rank = 10000*log10(1 + 3)/Power(2, 1.8) +alter table post_aggregates add column hot_rank integer not null default 1728; +alter table post_aggregates add column hot_rank_active integer not null default 1728; + +alter table comment_aggregates add column hot_rank integer not null default 1728; + +alter table community_aggregates add column hot_rank integer not null default 1728; + +-- Populate them initially +-- Note: After initial population, these are updated in a periodic scheduled job, +-- with only the last week being updated. +update post_aggregates set hot_rank_active = hot_rank(score::numeric, newest_comment_time_necro); +update post_aggregates set hot_rank = hot_rank(score::numeric, published); +update comment_aggregates set hot_rank = hot_rank(score::numeric, published); +update community_aggregates set hot_rank = hot_rank(subscribers::numeric, published); + +-- Create single column indexes +create index idx_post_aggregates_score on post_aggregates (score desc); +create index idx_post_aggregates_published on post_aggregates (published desc); +create index idx_post_aggregates_newest_comment_time on post_aggregates (newest_comment_time desc); +create index idx_post_aggregates_newest_comment_time_necro on post_aggregates (newest_comment_time_necro desc); +create index idx_post_aggregates_featured_community on post_aggregates (featured_community desc); +create index idx_post_aggregates_featured_local on post_aggregates (featured_local desc); +create index idx_post_aggregates_hot on post_aggregates (hot_rank desc); +create index idx_post_aggregates_active on post_aggregates (hot_rank_active desc); + +create index idx_comment_aggregates_hot on comment_aggregates (hot_rank desc); + +create index idx_community_aggregates_hot on community_aggregates (hot_rank desc); diff --git a/scripts/query_testing/post_query_hot_rank.sh b/scripts/query_testing/post_query_hot_rank.sh new file mode 100755 index 000000000..dad746065 --- /dev/null +++ b/scripts/query_testing/post_query_hot_rank.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sudo docker exec -i docker-postgres-1 psql -Ulemmy -c "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) SELECT post.id, post.name, post.url, post.body, post.creator_id, post.community_id, post.removed, post.locked, post.published, post.updated, post.deleted, post.nsfw, post.embed_title, post.embed_description, post.embed_video_url, post.thumbnail_url, post.ap_id, post.local, post.language_id, post.featured_community, post.featured_local, person.id, person.name, person.display_name, person.avatar, person.banned, person.published, person.updated, person.actor_id, person.bio, person.local, person.banner, person.deleted, person.inbox_url, person.shared_inbox_url, person.matrix_user_id, person.admin, person.bot_account, person.ban_expires, person.instance_id, community.id, community.name, community.title, community.description, community.removed, community.published, community.updated, community.deleted, community.nsfw, community.actor_id, community.local, community.icon, community.banner, community.hidden, community.posting_restricted_to_mods, community.instance_id, community_person_ban.id, community_person_ban.community_id, community_person_ban.person_id, community_person_ban.published, community_person_ban.expires, post_aggregates.id, post_aggregates.post_id, post_aggregates.comments, post_aggregates.score, post_aggregates.upvotes, post_aggregates.downvotes, post_aggregates.published, post_aggregates.newest_comment_time_necro, post_aggregates.newest_comment_time, post_aggregates.featured_community, post_aggregates.featured_local, community_follower.id, community_follower.community_id, community_follower.person_id, community_follower.published, community_follower.pending, post_saved.id, post_saved.post_id, post_saved.person_id, post_saved.published, post_read.id, post_read.post_id, post_read.person_id, post_read.published, person_block.id, person_block.person_id, person_block.target_id, person_block.published, post_like.score, coalesce((post_aggregates.comments - person_post_aggregates.read_comments), post_aggregates.comments) FROM ((((((((((((post INNER JOIN person ON (post.creator_id = person.id)) INNER JOIN community ON (post.community_id = community.id)) LEFT OUTER JOIN community_person_ban ON (((post.community_id = community_person_ban.community_id) AND (community_person_ban.person_id = post.creator_id)) AND ((community_person_ban.expires IS NULL) OR (community_person_ban.expires > CURRENT_TIMESTAMP)))) INNER JOIN post_aggregates ON (post_aggregates.post_id = post.id)) LEFT OUTER JOIN community_follower ON ((post.community_id = community_follower.community_id) AND (community_follower.person_id = '33517'))) LEFT OUTER JOIN post_saved ON ((post.id = post_saved.post_id) AND (post_saved.person_id = '33517'))) LEFT OUTER JOIN post_read ON ((post.id = post_read.post_id) AND (post_read.person_id = '33517'))) LEFT OUTER JOIN person_block ON ((post.creator_id = person_block.target_id) AND (person_block.person_id = '33517'))) LEFT OUTER JOIN community_block ON ((community.id = community_block.community_id) AND (community_block.person_id = '33517'))) LEFT OUTER JOIN post_like ON ((post.id = post_like.post_id) AND (post_like.person_id = '33517'))) LEFT OUTER JOIN person_post_aggregates ON ((post.id = person_post_aggregates.post_id) AND (person_post_aggregates.person_id = '33517'))) LEFT OUTER JOIN local_user_language ON ((post.language_id = local_user_language.language_id) AND (local_user_language.local_user_id = '11402'))) WHERE ((((((((((community_follower.person_id IS NOT NULL) AND (post.nsfw = 'f')) AND (community.nsfw = 'f')) AND (local_user_language.language_id IS NOT NULL)) AND (community_block.person_id IS NULL)) AND (person_block.person_id IS NULL)) AND (post.removed = 'f')) AND (post.deleted = 'f')) AND (community.removed = 'f')) AND (community.deleted = 'f')) ORDER BY post_aggregates.featured_local DESC , post_aggregates.hot_rank DESC LIMIT '40' OFFSET '0';" > query_results.json diff --git a/src/scheduled_tasks.rs b/src/scheduled_tasks.rs index 5d98baf9b..e42100b89 100644 --- a/src/scheduled_tasks.rs +++ b/src/scheduled_tasks.rs @@ -8,9 +8,17 @@ use diesel::{ // Import week days and WeekDay use diesel::{sql_query, PgConnection, RunQueryDsl}; use lemmy_db_schema::{ - schema::{activity, community_person_ban, instance, person}, + schema::{ + activity, + comment_aggregates, + community_aggregates, + community_person_ban, + instance, + person, + post_aggregates, + }, source::instance::{Instance, InstanceForm}, - utils::naive_now, + utils::{functions::hot_rank, naive_now}, }; use lemmy_routes::nodeinfo::NodeInfo; use lemmy_utils::{error::LemmyError, REQWEST_TIMEOUT}; @@ -23,33 +31,35 @@ pub fn setup(db_url: String, user_agent: String) -> Result<(), LemmyError> { // Setup the connections let mut scheduler = Scheduler::new(); - let mut conn = PgConnection::establish(&db_url).expect("could not establish connection"); - + let mut conn_1 = PgConnection::establish(&db_url).expect("could not establish connection"); let mut conn_2 = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_3 = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_4 = PgConnection::establish(&db_url).expect("could not establish connection"); - active_counts(&mut conn); - update_banned_when_expired(&mut conn); + // Run on startup + active_counts(&mut conn_1); + update_hot_ranks(&mut conn_1, false); + update_banned_when_expired(&mut conn_1); + clear_old_activities(&mut conn_1); - // On startup, reindex the tables non-concurrently - // TODO remove this for now, since it slows down startup a lot on lemmy.ml - reindex_aggregates_tables(&mut conn, true); + // Update active counts every hour scheduler.every(CTimeUnits::hour(1)).run(move || { - let conn = &mut PgConnection::establish(&db_url) - .unwrap_or_else(|_| panic!("Error connecting to {db_url}")); - active_counts(conn); - update_banned_when_expired(conn); - reindex_aggregates_tables(conn, true); - drop_ccnew_indexes(conn); + active_counts(&mut conn_1); + update_banned_when_expired(&mut conn_1); }); - clear_old_activities(&mut conn); + // Update hot ranks every 5 minutes + scheduler.every(CTimeUnits::minutes(5)).run(move || { + update_hot_ranks(&mut conn_2, true); + }); + + // Clear old activities every week scheduler.every(CTimeUnits::weeks(1)).run(move || { - clear_old_activities(&mut conn); + clear_old_activities(&mut conn_3); }); - update_instance_software(&mut conn_2, &user_agent); scheduler.every(CTimeUnits::days(1)).run(move || { - update_instance_software(&mut conn_2, &user_agent); + update_instance_software(&mut conn_4, &user_agent); }); // Manually run the scheduler in an event loop @@ -59,24 +69,50 @@ pub fn setup(db_url: String, user_agent: String) -> Result<(), LemmyError> { } } -/// Reindex the aggregates tables every one hour -/// This is necessary because hot_rank is actually a mutable function: -/// https://dba.stackexchange.com/questions/284052/how-to-create-an-index-based-on-a-time-based-function-in-postgres?noredirect=1#comment555727_284052 -fn reindex_aggregates_tables(conn: &mut PgConnection, concurrently: bool) { - for table_name in &[ - "post_aggregates", - "comment_aggregates", - "community_aggregates", - ] { - reindex_table(conn, table_name, concurrently); - } -} +/// Update the hot_rank columns for the aggregates tables +fn update_hot_ranks(conn: &mut PgConnection, last_week_only: bool) { + let mut post_update = diesel::update(post_aggregates::table).into_boxed(); + let mut comment_update = diesel::update(comment_aggregates::table).into_boxed(); + let mut community_update = diesel::update(community_aggregates::table).into_boxed(); -fn reindex_table(conn: &mut PgConnection, table_name: &str, concurrently: bool) { - let concurrently_str = if concurrently { "concurrently" } else { "" }; - info!("Reindexing table {} {} ...", concurrently_str, table_name); - let query = format!("reindex table {concurrently_str} {table_name}"); - sql_query(query).execute(conn).expect("reindex table"); + // Only update for the last week of content + if last_week_only { + info!("Updating hot ranks for last week..."); + let last_week = now - diesel::dsl::IntervalDsl::weeks(1); + + post_update = post_update.filter(post_aggregates::published.gt(last_week)); + comment_update = comment_update.filter(comment_aggregates::published.gt(last_week)); + community_update = community_update.filter(community_aggregates::published.gt(last_week)); + } else { + info!("Updating hot ranks for all history..."); + } + + post_update + .set(( + post_aggregates::hot_rank.eq(hot_rank(post_aggregates::score, post_aggregates::published)), + post_aggregates::hot_rank_active.eq(hot_rank( + post_aggregates::score, + post_aggregates::newest_comment_time_necro, + )), + )) + .execute(conn) + .expect("update post_aggregate hot_ranks"); + + comment_update + .set(comment_aggregates::hot_rank.eq(hot_rank( + comment_aggregates::score, + comment_aggregates::published, + ))) + .execute(conn) + .expect("update comment_aggregate hot_ranks"); + + community_update + .set(community_aggregates::hot_rank.eq(hot_rank( + community_aggregates::subscribers, + community_aggregates::published, + ))) + .execute(conn) + .expect("update community_aggregate hot_ranks"); info!("Done."); } @@ -136,16 +172,6 @@ fn update_banned_when_expired(conn: &mut PgConnection) { .expect("remove community_ban expired rows"); } -/// Drops the phantom CCNEW indexes created by postgres -/// https://github.com/LemmyNet/lemmy/issues/2431 -fn drop_ccnew_indexes(conn: &mut PgConnection) { - info!("Dropping phantom ccnew indexes..."); - let drop_stmt = "select drop_ccnew_indexes()"; - sql_query(drop_stmt) - .execute(conn) - .expect("drop ccnew indexes"); -} - /// Updates the instance software and version fn update_instance_software(conn: &mut PgConnection, user_agent: &str) { info!("Updating instances software and versions...");