From 3f8045f4128d14558636b1c8a9445c29e8e1583d Mon Sep 17 00:00:00 2001 From: Michael Manfre Date: Sun, 4 Dec 2022 20:08:23 -0500 Subject: [PATCH] [WIP] Sentry improvements (#108) Stator clears scope during the main loop to behave more like transactions. Transaction names are set. Sentry tags: * 'takahe.version' * 'takahe.app' values 'web' or 'stator' Added settings: * TAKAHE_SENTRY_SAMPLE_RATE * TAKAHE_SENTRY_TRACES_SAMPLE_RATE --- core/middleware.py | 19 ++++++++++++ core/sentry.py | 49 +++++++++++++++++++++++++++++ stator/runner.py | 77 ++++++++++++++++++++++++++++++---------------- takahe/settings.py | 9 +++++- 4 files changed, 126 insertions(+), 28 deletions(-) create mode 100644 core/sentry.py diff --git a/core/middleware.py b/core/middleware.py index fca5dd8..08c28fa 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -1,3 +1,6 @@ +from django.core.exceptions import MiddlewareNotUsed + +from core import sentry from core.models import Config @@ -13,3 +16,19 @@ class ConfigLoadingMiddleware: Config.system = Config.load_system() response = self.get_response(request) return response + + +class SentryTaggingMiddleware: + """ + Sets Sentry tags at the start of the request if Sentry is configured. + """ + + def __init__(self, get_response): + if not sentry.SENTRY_ENABLED: + raise MiddlewareNotUsed() + self.get_response = get_response + + def __call__(self, request): + sentry.set_takahe_app("web") + response = self.get_response(request) + return response diff --git a/core/sentry.py b/core/sentry.py new file mode 100644 index 0000000..9a7b100 --- /dev/null +++ b/core/sentry.py @@ -0,0 +1,49 @@ +from contextlib import contextmanager + +from django.conf import settings + +SENTRY_ENABLED = False +try: + if settings.SETUP.SENTRY_DSN: + import sentry_sdk + + SENTRY_ENABLED = True +except ImportError: + pass + + +def noop(*args, **kwargs): + pass + + +@contextmanager +def noop_context(*args, **kwargs): + yield + + +if SENTRY_ENABLED: + configure_scope = sentry_sdk.configure_scope + push_scope = sentry_sdk.push_scope + set_context = sentry_sdk.set_context + set_tag = sentry_sdk.set_tag + start_transaction = sentry_sdk.start_transaction +else: + configure_scope = noop_context + push_scope = noop_context + set_context = noop + set_tag = noop + start_transaction = noop_context + + +def set_takahe_app(name: str): + set_tag("takahe.app", name) + + +def scope_clear(scope): + if scope: + scope.clear() + + +def set_transaction_name(scope, name: str): + if scope: + scope.set_transaction_name(name) diff --git a/stator/runner.py b/stator/runner.py index 5cc0091..ecbaae6 100644 --- a/stator/runner.py +++ b/stator/runner.py @@ -7,7 +7,7 @@ from typing import List, Optional, Type from django.utils import timezone -from core import exceptions +from core import exceptions, sentry from core.models import Config from stator.models import StatorModel @@ -38,6 +38,7 @@ class StatorRunner: self.run_for = run_for async def run(self): + sentry.set_takahe_app("stator") self.handled = 0 self.started = time.monotonic() self.last_clean = time.monotonic() - self.schedule_interval @@ -45,23 +46,32 @@ class StatorRunner: # For the first time period, launch tasks print("Running main task loop") try: - while True: - # Do we need to do cleaning? - if (time.monotonic() - self.last_clean) >= self.schedule_interval: - # Refresh the config - Config.system = await Config.aload_system() - print(f"{self.handled} tasks processed so far") - print("Running cleaning and scheduling") - await self.run_scheduling() + with sentry.configure_scope() as scope: + while True: + # Do we need to do cleaning? + if (time.monotonic() - self.last_clean) >= self.schedule_interval: + # Refresh the config + Config.system = await Config.aload_system() + print(f"{self.handled} tasks processed so far") + print("Running cleaning and scheduling") + await self.run_scheduling() - self.remove_completed_tasks() - await self.fetch_and_process_tasks() + # Clear the cleaning breadcrumbs/extra for the main part of the loop + sentry.scope_clear(scope) - # Are we in limited run mode? - if self.run_for and (time.monotonic() - self.started) > self.run_for: - break - # Prevent busylooping - await asyncio.sleep(0.5) + self.remove_completed_tasks() + await self.fetch_and_process_tasks() + + # Are we in limited run mode? + if ( + self.run_for + and (time.monotonic() - self.started) > self.run_for + ): + break + # Prevent busylooping + await asyncio.sleep(0.5) + # Clear the Sentry breadcrumbs and extra for next loop + sentry.scope_clear(scope) except KeyboardInterrupt: pass # Wait for tasks to finish @@ -79,10 +89,11 @@ class StatorRunner: """ Do any transition cleanup tasks """ - for model in self.models: - asyncio.create_task(model.atransition_clean_locks()) - asyncio.create_task(model.atransition_schedule_due()) - self.last_clean = time.monotonic() + with sentry.start_transaction(op="task", name="stator.run_scheduling"): + for model in self.models: + asyncio.create_task(model.atransition_clean_locks()) + asyncio.create_task(model.atransition_schedule_due()) + self.last_clean = time.monotonic() async def fetch_and_process_tasks(self): # Calculate space left for tasks @@ -106,14 +117,26 @@ class StatorRunner: """ Wrapper for atransition_attempt with fallback error handling """ - try: - print( - f"Attempting transition on {instance._meta.label_lower}#{instance.pk} from state {instance.state}" + task_name = f"stator.run_transition:{instance._meta.label_lower}#{{id}} from {instance.state}" + with sentry.start_transaction(op="task", name=task_name): + sentry.set_context( + "instance", + { + "model": instance._meta.label_lower, + "pk": instance.pk, + "state": instance.state, + "state_age": instance.state_age, + }, ) - await instance.atransition_attempt() - except BaseException as e: - await exceptions.acapture_exception(e) - traceback.print_exc() + + try: + print( + f"Attempting transition on {instance._meta.label_lower}#{instance.pk} from state {instance.state}" + ) + await instance.atransition_attempt() + except BaseException as e: + await exceptions.acapture_exception(e) + traceback.print_exc() def remove_completed_tasks(self): """ diff --git a/takahe/settings.py b/takahe/settings.py index 856c22c..15f30ee 100644 --- a/takahe/settings.py +++ b/takahe/settings.py @@ -10,6 +10,8 @@ import sentry_sdk from pydantic import AnyUrl, BaseSettings, EmailStr, Field, validator from sentry_sdk.integrations.django import DjangoIntegration +from takahe import __version__ + BASE_DIR = Path(__file__).resolve().parent.parent @@ -77,6 +79,8 @@ class Settings(BaseSettings): #: An optional Sentry DSN for error reporting. SENTRY_DSN: Optional[str] = None + SENTRY_SAMPLE_RATE: float = 1.0 + SENTRY_TRACES_SAMPLE_RATE: float = 1.0 #: Fallback domain for links. MAIN_DOMAIN: str = "example.com" @@ -150,6 +154,7 @@ INSTALLED_APPS = [ ] MIDDLEWARE = [ + "core.middleware.SentryTaggingMiddleware", "django.middleware.security.SecurityMiddleware", "whitenoise.middleware.WhiteNoiseMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", @@ -269,10 +274,12 @@ if SETUP.SENTRY_DSN: integrations=[ DjangoIntegration(), ], - traces_sample_rate=1.0, + traces_sample_rate=SETUP.SENTRY_TRACES_SAMPLE_RATE, + sample_rate=SETUP.SENTRY_SAMPLE_RATE, send_default_pii=True, environment=SETUP.ENVIRONMENT, ) + sentry_sdk.set_tag("takahe.version", __version__) SERVER_EMAIL = SETUP.EMAIL_FROM if SETUP.EMAIL_SERVER: