Do actual ISO date parsing

Fixes #187
This commit is contained in:
Andrew Godwin 2022-12-18 10:45:32 -07:00
parent b8dca2b71f
commit 770aa1531a
5 changed files with 32 additions and 15 deletions

View file

@ -46,4 +46,11 @@ repos:
rev: v0.991 rev: v0.991
hooks: hooks:
- id: mypy - id: mypy
additional_dependencies: [types-pyopenssl, types-bleach, types-mock, types-cachetools] additional_dependencies:
[
types-pyopenssl,
types-bleach,
types-mock,
types-cachetools,
types-python-dateutil,
]

View file

@ -726,7 +726,7 @@ class Post(StatorModel):
# Resolve the author # Resolve the author
author = Identity.by_actor_uri(data["attributedTo"], create=create) author = Identity.by_actor_uri(data["attributedTo"], create=create)
# If the post is from a blocked domain, stop and drop # If the post is from a blocked domain, stop and drop
if author.domain.blocked: if author.domain and author.domain.blocked:
raise cls.DoesNotExist("Post is from a blocked domain") raise cls.DoesNotExist("Post is from a blocked domain")
post = cls.objects.create( post = cls.objects.create(
object_uri=data["id"], object_uri=data["id"],

View file

@ -2,6 +2,7 @@ import datetime
import os import os
import urllib.parse as urllib_parse import urllib.parse as urllib_parse
from dateutil import parser
from pyld import jsonld from pyld import jsonld
from pyld.jsonld import JsonLdError from pyld.jsonld import JsonLdError
@ -366,6 +367,7 @@ schemas = {
} }
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
DATETIME_TZ_FORMAT = "%Y-%m-%dT%H:%M:%S+00:00"
DATETIME_MS_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" DATETIME_MS_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
@ -448,15 +450,7 @@ def format_ld_date(value: datetime.datetime) -> str:
def parse_ld_date(value: str | None) -> datetime.datetime | None: def parse_ld_date(value: str | None) -> datetime.datetime | None:
if value is None: if value is None:
return None return None
try: return parser.isoparse(value).replace(microsecond=0)
return datetime.datetime.strptime(value, DATETIME_FORMAT).replace(
tzinfo=datetime.timezone.utc
)
except ValueError:
return datetime.datetime.strptime(value, DATETIME_MS_FORMAT).replace(
tzinfo=datetime.timezone.utc,
microsecond=0,
)
def get_first_image_url(data) -> str | None: def get_first_image_url(data) -> str | None:

View file

@ -20,6 +20,7 @@ pydantic~=1.10.2
pyld~=2.0.3 pyld~=2.0.3
pylibmc~=1.6.3 pylibmc~=1.6.3
pymemcache~=4.0.0 pymemcache~=4.0.0
python-dateutil~=2.8.2
python-dotenv~=0.21.0 python-dotenv~=0.21.0
redis~=4.4.0 redis~=4.4.0
sentry-sdk~=1.11.0 sentry-sdk~=1.11.0

View file

@ -1,5 +1,7 @@
import datetime import datetime
from dateutil.tz import tzutc
from core.ld import parse_ld_date from core.ld import parse_ld_date
@ -7,22 +9,35 @@ def test_parse_ld_date():
""" """
Tests that the various kinds of LD dates that we see will work Tests that the various kinds of LD dates that we see will work
""" """
assert parse_ld_date("2022-11-16T15:57:58Z") == datetime.datetime( difference = parse_ld_date("2022-11-16T15:57:58Z") - datetime.datetime(
2022, 2022,
11, 11,
16, 16,
15, 15,
57, 57,
58, 58,
tzinfo=datetime.timezone.utc, tzinfo=tzutc(),
) )
assert difference.total_seconds() == 0
assert parse_ld_date("2022-11-16T15:57:58.123Z") == datetime.datetime( difference = parse_ld_date("2022-11-16T15:57:58.123Z") - datetime.datetime(
2022, 2022,
11, 11,
16, 16,
15, 15,
57, 57,
58, 58,
tzinfo=datetime.timezone.utc, tzinfo=tzutc(),
) )
assert difference.total_seconds() == 0
difference = parse_ld_date("2022-12-16T13:32:08+00:00") - datetime.datetime(
2022,
12,
16,
13,
32,
8,
tzinfo=tzutc(),
)
assert difference.total_seconds() == 0