diff --git a/scripts/make_checkpoint.py b/scripts/make_checkpoint.py index 85744709..d690826b 100644 --- a/scripts/make_checkpoint.py +++ b/scripts/make_checkpoint.py @@ -7,7 +7,7 @@ import os.path import random import sys -from datetime import datetime +from datetime import datetime, timezone from whoosh import fields, index from whoosh.compat import u @@ -46,7 +46,12 @@ frac += 0.15 path = u(f"{segnum}/{num}") title = " ".join(random.choice(words) for _ in range(100)) - dt = datetime(year=2000 + counter, month=(counter % 12) + 1, day=15) + dt = datetime( + year=2000 + counter, + month=(counter % 12) + 1, + day=15, + tzinfo=timezone.utc, + ) w.add_document( path=path, diff --git a/src/whoosh/qparser/dateparse.py b/src/whoosh/qparser/dateparse.py index 6a29b252..35552c17 100644 --- a/src/whoosh/qparser/dateparse.py +++ b/src/whoosh/qparser/dateparse.py @@ -27,7 +27,7 @@ import re import sys -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from whoosh.compat import iteritems, string_type from whoosh.qparser import plugins, syntax @@ -89,7 +89,7 @@ def parse(self, text, dt, pos=0, debug=-9999): def date_from(self, text, dt=None, pos=0, debug=-9999): if dt is None: - dt = datetime.now() + dt = datetime.now(tz=timezone.utc) d, pos = self.parse(text, dt, pos, debug + 1) return d @@ -661,7 +661,7 @@ def parse(self, text, dt, pos=0, debug=-9999): def date_from(self, text, basedate=None, pos=0, debug=-9999, toend=True): if basedate is None: - basedate = datetime.utcnow() + basedate = datetime.now(tz=timezone.utc) parser = self.get_parser() if toend: diff --git a/src/whoosh/util/times.py b/src/whoosh/util/times.py index 55df3e1d..fff02d54 100644 --- a/src/whoosh/util/times.py +++ b/src/whoosh/util/times.py @@ -27,7 +27,7 @@ import calendar import copy -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from whoosh.compat import iteritems @@ -171,7 +171,7 @@ def tuple(self): ) def date(self): - return date(self.year, self.month, self.day) + return date(self.year, self.month, self.day, tzinfo=timezone.utc) def copy(self): return adatetime( @@ -237,7 +237,7 @@ def floor(self): s = 0 if ms is None: ms = 0 - return datetime(y, m, d, h, mn, s, ms) + return datetime(y, m, d, h, mn, s, ms, tzinfo=timezone.utc) def ceil(self): """Returns a ``datetime`` version of this object with all unspecified @@ -275,7 +275,7 @@ def ceil(self): s = 59 if ms is None: ms = 999999 - return datetime(y, m, d, h, mn, s, ms) + return datetime(y, m, d, h, mn, s, ms, tzinfo=timezone.utc) def disambiguated(self, basedate): """Returns either a ``datetime`` or unambiguous ``timespan`` version @@ -514,4 +514,5 @@ def fix(at): minute=at.minute, second=at.second, microsecond=at.microsecond, + tzinfo=timezone.utc, ) diff --git a/stress/test_bigsort.py b/stress/test_bigsort.py index e28efa31..b25b9007 100644 --- a/stress/test_bigsort.py +++ b/stress/test_bigsort.py @@ -1,7 +1,7 @@ import os.path import random import shutil -from datetime import datetime +from datetime import datetime, timezone from whoosh import fields, index, query from whoosh.compat import text_type @@ -24,7 +24,9 @@ def test_bigsort(): t = now() w = ix.writer(limitmb=512) for i in range(times): - dt = datetime.fromtimestamp(random.randint(15839593, 1294102139)) + dt = datetime.fromtimestamp( + random.randint(15839593, 1294102139), tz=timezone.utc + ) w.add_document(id=text_type(i), date=dt) w.commit() print("Writing took ", now() - t) diff --git a/tests/test_dateparse.py b/tests/test_dateparse.py index a87e5e20..9acba143 100644 --- a/tests/test_dateparse.py +++ b/tests/test_dateparse.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from whoosh.qparser.dateparse import ( English, @@ -8,7 +8,7 @@ timespan, ) -basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) +basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc) english = English() diff --git a/tests/test_fields.py b/tests/test_fields.py index dae12205..47acff8e 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import pytest from whoosh import fields, qparser, query @@ -340,7 +340,7 @@ def test_nontext_document(): ) ix = RamStorage().create_index(schema) - dt = datetime.now() + dt = datetime.now(tz=timezone.utc) w = ix.writer() for i in range(50): w.add_document(id=i, num=i, date=dt + timedelta(days=i), even=not (i % 2)) @@ -365,7 +365,7 @@ def test_nontext_update(): ) ix = RamStorage().create_index(schema) - dt = datetime.now() + dt = datetime.now(tz=timezone.utc) w = ix.writer() for i in range(10): w.add_document(id=i, num=i, date=dt + timedelta(days=i)) @@ -391,7 +391,8 @@ def test_datetime(): for month in range(1, 12): for day in range(1, 28): w.add_document( - id=u("%s-%s") % (month, day), date=datetime(2010, month, day, 14, 0, 0) + id=u("%s-%s") % (month, day), + date=datetime(2010, month, day, 14, 0, 0, tzinfo=timezone.utc), ) w.commit() @@ -409,8 +410,8 @@ def test_datetime(): assert len(r) == 27 q = qp.parse(u("date:[2010-05 to 2010-08]")) - startdt = datetime(2010, 5, 1, 0, 0, 0, 0) - enddt = datetime(2010, 8, 31, 23, 59, 59, 999999) + startdt = datetime(2010, 5, 1, 0, 0, 0, 0, tzinfo=timezone.utc) + enddt = datetime(2010, 8, 31, 23, 59, 59, 999999, tzinfo=timezone.utc) assert q.__class__ is query.NumericRange assert q.start == times.datetime_to_long(startdt) assert q.end == times.datetime_to_long(enddt) @@ -694,9 +695,11 @@ def test_valid_date_string(): query = field.parse_query("date", date_string) # Define the expected start and end dates - expected_start = datetime_to_long(datetime.datetime(2022, 1, 1)) + expected_start = datetime_to_long( + datetime.datetime(2022, 1, 1, tzinfo=timezone.utc) + ) expected_end = datetime_to_long( - datetime.datetime(2022, 1, 1) + datetime.datetime(2022, 1, 1, tzinfo=timezone.utc) + datetime.timedelta(days=1) - datetime.timedelta(microseconds=1) ) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index e91cb7d2..01c1fb07 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1,6 +1,6 @@ import random from collections import defaultdict -from datetime import datetime +from datetime import datetime, timezone import pytest from whoosh import __version__, analysis, fields, index, qparser, query @@ -629,15 +629,20 @@ def test_multivalue(): ) ix = RamStorage().create_index(schema) with ix.writer() as w: - w.add_document(id=1, date=datetime(2001, 1, 1), num=5) + w.add_document(id=1, date=datetime(2001, 1, 1, tzinfo=timezone.utc), num=5) w.add_document( - id=2, date=[datetime(2002, 2, 2), datetime(2003, 3, 3)], num=[1, 2, 3, 12] + id=2, + date=[ + datetime(2002, 2, 2, tzinfo=timezone.utc), + datetime(2003, 3, 3, tzinfo=timezone.utc), + ], + num=[1, 2, 3, 12], ) w.add_document(txt=u("a b c").split()) with ix.reader() as r: assert ("num", 3) in r - assert ("date", datetime(2003, 3, 3)) in r + assert ("date", datetime(2003, 3, 3, tzinfo=timezone.utc)) in r assert " ".join(r.field_terms("txt")) == "a b c" diff --git a/tests/test_matching.py b/tests/test_matching.py index f817e576..295d80df 100644 --- a/tests/test_matching.py +++ b/tests/test_matching.py @@ -500,12 +500,12 @@ def test_dismax(): def test_exclusion(): - from datetime import datetime + from datetime import datetime, timezone schema = fields.Schema(id=fields.ID(stored=True), date=fields.DATETIME) ix = RamStorage().create_index(schema) - dt1 = datetime(1950, 1, 1) - dt2 = datetime(1960, 1, 1) + dt1 = datetime(1950, 1, 1, tzinfo=timezone.utc) + dt2 = datetime(1960, 1, 1, tzinfo=timezone.utc) with ix.writer() as w: # Make 39 documents with dates != dt1 and then make a last document # with feed == dt1. diff --git a/tests/test_parse_plugins.py b/tests/test_parse_plugins.py index a3058d6a..eecfbe00 100644 --- a/tests/test_parse_plugins.py +++ b/tests/test_parse_plugins.py @@ -1,5 +1,5 @@ import inspect -from datetime import datetime +from datetime import datetime, timezone from whoosh import analysis, fields, formats, qparser, query from whoosh.compat import text_type, u @@ -69,7 +69,7 @@ def test_dateparser(): def cb(arg): errs.append(arg) - basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) + basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc) qp.add_plugin(dateparse.DateParserPlugin(basedate, callback=cb)) q = qp.parse(u("hello date:'last tuesday'")) @@ -118,7 +118,7 @@ def cb(arg): def test_date_range(): schema = fields.Schema(text=fields.TEXT, date=fields.DATETIME) qp = qparser.QueryParser("text", schema) - basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) + basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc) qp.add_plugin(dateparse.DateParserPlugin(basedate)) q = qp.parse(u("date:['30 march' to 'next wednesday']")) @@ -155,7 +155,7 @@ def test_date_range(): def test_daterange_multi(): schema = fields.Schema(text=fields.TEXT, start=fields.DATETIME, end=fields.DATETIME) qp = qparser.QueryParser("text", schema) - basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) + basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc) qp.add_plugin(dateparse.DateParserPlugin(basedate)) q = qp.parse("start:[2008 to] AND end:[2011 to 2011]") @@ -177,7 +177,11 @@ def test_daterange_empty_field(): writer.commit() with ix.searcher() as s: - q = query.DateRange("test", datetime.fromtimestamp(86400), datetime.today()) + q = query.DateRange( + "test", + datetime.fromtimestamp(86400, tz=timezone.utc), + datetime.now(tz=timezone.utc), + ) r = s.search(q) assert len(r) == 0 @@ -186,7 +190,7 @@ def test_free_dates(): a = analysis.StandardAnalyzer(stoplist=None) schema = fields.Schema(text=fields.TEXT(analyzer=a), date=fields.DATETIME) qp = qparser.QueryParser("text", schema) - basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) + basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc) qp.add_plugin(dateparse.DateParserPlugin(basedate, free=True)) q = qp.parse(u("hello date:last tuesday")) @@ -366,7 +370,9 @@ def test_gtlt(): assert len(q) == 3 assert q[0] == query.Term("a", "hello") # As of this writing, date ranges don't support startexcl/endexcl - assert q[1] == query.DateRange("e", datetime(2001, 3, 29, 0, 0), None) + assert q[1] == query.DateRange( + "e", datetime(2001, 3, 29, 0, 0, tzinfo=timezone.utc), None + ) assert q[2] == query.Term("a", "there") q = qp.parse(u("a:> alfa c:<= bravo")) diff --git a/tests/test_queries.py b/tests/test_queries.py index a8a7558f..b9a6090e 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -397,7 +397,7 @@ def test_requires(): def test_highlight_daterange(): - from datetime import datetime + from datetime import datetime, timezone schema = fields.Schema( id=fields.ID(unique=True, stored=True), @@ -412,7 +412,7 @@ def test_highlight_daterange(): id=u("1"), title=u("Life Aquatic"), content=u("A nautic film crew sets out to kill a gigantic shark."), - released=datetime(2004, 12, 25), + released=datetime(2004, 12, 25, tzinfo=timezone.utc), ) w.update_document( id=u("2"), @@ -420,7 +420,7 @@ def test_highlight_daterange(): content=u( "Three brothers meet in India for a life changing train " + "journey." ), - released=datetime(2007, 10, 27), + released=datetime(2007, 10, 27, tzinfo=timezone.utc), ) w.commit() @@ -433,7 +433,7 @@ def test_highlight_daterange(): == 'for a life changing train journey' ) - r = s.search(DateRange("released", datetime(2007, 1, 1), None)) + r = s.search(DateRange("released", datetime(2007, 1, 1, tzinfo=timezone.utc), None)) assert len(r) == 1 assert r[0].highlights("content") == "" diff --git a/tests/test_searching.py b/tests/test_searching.py index d76c8094..3b4c7e56 100644 --- a/tests/test_searching.py +++ b/tests/test_searching.py @@ -1,5 +1,5 @@ import copy -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import pytest from whoosh import analysis, fields, index, qparser, query, scoring @@ -366,7 +366,7 @@ def test_open_numeric_ranges(): def test_open_date_ranges(): - basedate = datetime(2011, 1, 24, 6, 25, 0, 0) + basedate = datetime(2011, 1, 24, 6, 25, 0, 0, tzinfo=timezone.utc) domain = [basedate + timedelta(days=n) for n in range(-20, 20)] schema = fields.Schema(date=fields.DATETIME(stored=True)) @@ -382,13 +382,17 @@ def test_open_date_ranges(): q = qp.parse("[2011-01-10 to]") r = [hit["date"] for hit in s.search(q, limit=None)] assert len(r) > 0 - target = [d for d in domain if d >= datetime(2011, 1, 10, 6, 25)] + target = [ + d for d in domain if d >= datetime(2011, 1, 10, 6, 25, tzinfo=timezone.utc) + ] assert r == target q = qp.parse("[to 2011-01-30]") r = [hit["date"] for hit in s.search(q, limit=None)] assert len(r) > 0 - target = [d for d in domain if d <= datetime(2011, 1, 30, 6, 25)] + target = [ + d for d in domain if d <= datetime(2011, 1, 30, 6, 25, tzinfo=timezone.utc) + ] assert r == target # With date parser @@ -399,13 +403,17 @@ def test_open_date_ranges(): q = qp.parse("[10 jan 2011 to]") r = [hit["date"] for hit in s.search(q, limit=None)] assert len(r) > 0 - target = [d for d in domain if d >= datetime(2011, 1, 10, 6, 25)] + target = [ + d for d in domain if d >= datetime(2011, 1, 10, 6, 25, tzinfo=timezone.utc) + ] assert r == target q = qp.parse("[to 30 jan 2011]") r = [hit["date"] for hit in s.search(q, limit=None)] assert len(r) > 0 - target = [d for d in domain if d <= datetime(2011, 1, 30, 6, 25)] + target = [ + d for d in domain if d <= datetime(2011, 1, 30, 6, 25, tzinfo=timezone.utc) + ] assert r == target @@ -420,7 +428,7 @@ def test_negated_unlimited_ranges(): domain = text_type(ascii_letters) - dt = datetime.now() + dt = datetime.now(tz=timezone.utc) for i, letter in enumerate(domain): w.add_document(id=letter, num=i, date=dt + timedelta(days=i)) w.commit() diff --git a/tests/test_sorting.py b/tests/test_sorting.py index ac0fd817..2883defd 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -1,5 +1,5 @@ import random -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from whoosh import columns, fields, query, sorting from whoosh.compat import permutations, u @@ -329,8 +329,8 @@ def test_date_facet(): ix = RamStorage().create_index(schema) w = ix.writer() - d1 = datetime(2011, 7, 13) - d2 = datetime(1984, 3, 29) + d1 = datetime(2011, 7, 13) # noqa: DTZ001 + d2 = datetime(1984, 3, 29) # noqa: DTZ001 w.add_document(id=0, date=d1) w.add_document(id=1, date=d1) w.add_document(id=2) @@ -392,24 +392,36 @@ def test_daterange_facet(): schema = fields.Schema(id=fields.STORED, date=fields.DATETIME) ix = RamStorage().create_index(schema) w = ix.writer() - w.add_document(id=0, date=datetime(2001, 1, 15)) - w.add_document(id=1, date=datetime(2001, 1, 10)) + w.add_document(id=0, date=datetime(2001, 1, 15, tzinfo=timezone.utc)) + w.add_document(id=1, date=datetime(2001, 1, 10, tzinfo=timezone.utc)) w.add_document(id=2) - w.add_document(id=3, date=datetime(2001, 1, 3)) - w.add_document(id=4, date=datetime(2001, 1, 8)) - w.add_document(id=5, date=datetime(2001, 1, 6)) + w.add_document(id=3, date=datetime(2001, 1, 3, tzinfo=timezone.utc)) + w.add_document(id=4, date=datetime(2001, 1, 8, tzinfo=timezone.utc)) + w.add_document(id=5, date=datetime(2001, 1, 6, tzinfo=timezone.utc)) w.commit() with ix.searcher() as s: rf = sorting.DateRangeFacet( - "date", datetime(2001, 1, 1), datetime(2001, 1, 20), timedelta(days=5) + "date", + datetime(2001, 1, 1, tzinfo=timezone.utc), + datetime(2001, 1, 20, tzinfo=timezone.utc), + timedelta(days=5), ) r = s.search(query.Every(), groupedby={"date": rf}) dt = datetime assert r.groups("date") == { - (dt(2001, 1, 1, 0, 0), dt(2001, 1, 6, 0, 0)): [3], - (dt(2001, 1, 6, 0, 0), dt(2001, 1, 11, 0, 0)): [1, 4, 5], - (dt(2001, 1, 11, 0, 0), dt(2001, 1, 16, 0, 0)): [0], + ( + dt(2001, 1, 1, 0, 0, tzinfo=timezone.utc), + dt(2001, 1, 6, 0, 0, tzinfo=timezone.utc), + ): [3], + ( + dt(2001, 1, 6, 0, 0, tzinfo=timezone.utc), + dt(2001, 1, 11, 0, 0, tzinfo=timezone.utc), + ): [1, 4, 5], + ( + dt(2001, 1, 11, 0, 0, tzinfo=timezone.utc), + dt(2001, 1, 16, 0, 0, tzinfo=timezone.utc), + ): [0], None: [2], } @@ -421,30 +433,68 @@ def test_relative_daterange(): schema = fields.Schema(id=fields.STORED, date=fields.DATETIME) ix = RamStorage().create_index(schema) - basedate = datetime(2001, 1, 1) + basedate = datetime(2001, 1, 1, tzinfo=timezone.utc) count = 0 with ix.writer() as w: - while basedate < datetime(2001, 12, 1): + while basedate < datetime(2001, 12, 1, tzinfo=timezone.utc): w.add_document(id=count, date=basedate) basedate += timedelta(days=14, hours=16) count += 1 with ix.searcher() as s: gap = relativedelta(months=1) - rf = sorting.DateRangeFacet("date", dt(2001, 1, 1), dt(2001, 12, 31), gap) + rf = sorting.DateRangeFacet( + "date", + dt(2001, 1, 1, tzinfo=timezone.utc), + dt(2001, 12, 31, tzinfo=timezone.utc), + gap, + ) r = s.search(query.Every(), groupedby={"date": rf}) assert r.groups("date") == { - (dt(2001, 1, 1), dt(2001, 2, 1)): [0, 1, 2], - (dt(2001, 2, 1), dt(2001, 3, 1)): [3, 4], - (dt(2001, 3, 1), dt(2001, 4, 1)): [5, 6], - (dt(2001, 4, 1), dt(2001, 5, 1)): [7, 8], - (dt(2001, 5, 1), dt(2001, 6, 1)): [9, 10], - (dt(2001, 6, 1), dt(2001, 7, 1)): [11, 12], - (dt(2001, 7, 1), dt(2001, 8, 1)): [13, 14], - (dt(2001, 8, 1), dt(2001, 9, 1)): [15, 16], - (dt(2001, 9, 1), dt(2001, 10, 1)): [17, 18], - (dt(2001, 10, 1), dt(2001, 11, 1)): [19, 20], - (dt(2001, 11, 1), dt(2001, 12, 1)): [21, 22], + ( + dt(2001, 1, 1, tzinfo=timezone.utc), + dt(2001, 2, 1, tzinfo=timezone.utc), + ): [0, 1, 2], + ( + dt(2001, 2, 1, tzinfo=timezone.utc), + dt(2001, 3, 1, tzinfo=timezone.utc), + ): [3, 4], + ( + dt(2001, 3, 1, tzinfo=timezone.utc), + dt(2001, 4, 1, tzinfo=timezone.utc), + ): [5, 6], + ( + dt(2001, 4, 1, tzinfo=timezone.utc), + dt(2001, 5, 1, tzinfo=timezone.utc), + ): [7, 8], + ( + dt(2001, 5, 1, tzinfo=timezone.utc), + dt(2001, 6, 1, tzinfo=timezone.utc), + ): [9, 10], + ( + dt(2001, 6, 1, tzinfo=timezone.utc), + dt(2001, 7, 1, tzinfo=timezone.utc), + ): [11, 12], + ( + dt(2001, 7, 1, tzinfo=timezone.utc), + dt(2001, 8, 1, tzinfo=timezone.utc), + ): [13, 14], + ( + dt(2001, 8, 1, tzinfo=timezone.utc), + dt(2001, 9, 1, tzinfo=timezone.utc), + ): [15, 16], + ( + dt(2001, 9, 1, tzinfo=timezone.utc), + dt(2001, 10, 1, tzinfo=timezone.utc), + ): [17, 18], + ( + dt(2001, 10, 1, tzinfo=timezone.utc), + dt(2001, 11, 1, tzinfo=timezone.utc), + ): [19, 20], + ( + dt(2001, 11, 1, tzinfo=timezone.utc), + dt(2001, 12, 1, tzinfo=timezone.utc), + ): [21, 22], }