From ca40a0d150cccee50ee39862f99679e8e79b9f5f Mon Sep 17 00:00:00 2001 From: Michael Manfre Date: Tue, 27 Dec 2022 18:50:39 -0500 Subject: [PATCH] Set User-Agent and Accept for outbound requests (#293) --- core/files.py | 6 ++++- users/models/identity.py | 48 +++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/core/files.py b/core/files.py index 7501cbb..dc82aa4 100644 --- a/core/files.py +++ b/core/files.py @@ -52,7 +52,11 @@ async def get_remote_file( """ Download a URL and return the File and content-type. """ - async with httpx.AsyncClient() as client: + headers = { + "User-Agent": settings.TAKAHE_USER_AGENT, + } + + async with httpx.AsyncClient(headers=headers) as client: async with client.stream("GET", url, timeout=timeout) as stream: allow_download = max_size is None if max_size: diff --git a/users/models/identity.py b/users/models/identity.py index 4a536af..a77f9e4 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -561,15 +561,17 @@ class Identity(StatorModel): (actor uri, canonical handle) or None, None if it does not resolve. """ domain = handle.split("@")[1].lower() - webfinger_url = "https://{domain}/.well-known/webfinger?resource={uri}" + webfinger_url = f"https://{domain}/.well-known/webfinger?resource={{uri}}" - try: - async with httpx.AsyncClient( - timeout=settings.SETUP.REMOTE_TIMEOUT - ) as client: + async with httpx.AsyncClient( + timeout=settings.SETUP.REMOTE_TIMEOUT, + headers={"User-Agent": settings.TAKAHE_USER_AGENT}, + ) as client: + try: response = await client.get( f"https://{domain}/.well-known/host-meta", follow_redirects=True, + headers={"Accept": "application/xml"}, ) # In the case of anything other than a success, we'll still try @@ -582,29 +584,29 @@ class Identity(StatorModel): ) if template: webfinger_url = template - except (httpx.RequestError, etree.ParseError): - pass + except (httpx.RequestError, etree.ParseError): + pass - try: - async with httpx.AsyncClient( - timeout=settings.SETUP.REMOTE_TIMEOUT - ) as client: + try: response = await client.get( - webfinger_url.format(domain=domain, uri=f"acct:{handle}"), + webfinger_url.format(uri=f"acct:{handle}"), follow_redirects=True, + headers={"Accept": "application/json"}, ) - except httpx.RequestError: - return None, None + response.raise_for_status() + except httpx.RequestError as ex: + response = getattr(ex, "response", None) + if ( + response + and response.status_code < 500 + and response.status_code not in [404, 410] + ): + raise ValueError( + f"Client error fetching webfinger: {response.status_code}", + response.content, + ) + return None, None - if response.status_code in [404, 410]: - return None, None - if response.status_code >= 500: - return None, None - if response.status_code >= 400: - raise ValueError( - f"Client error fetching webfinger: {response.status_code}", - response.content, - ) try: data = response.json() except ValueError: