Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove compatibility with legacy versions of Python #64

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/enron.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tarfile
from email import message_from_string
from marshal import dump, load
from urllib.request import urlretrieve
from zlib import compress, decompress

try:
Expand All @@ -10,7 +11,6 @@
pass

from whoosh import analysis, fields
from whoosh.compat import next, urlretrieve
from whoosh.support.bench import Bench, Spec
from whoosh.util import now

Expand Down
5 changes: 2 additions & 3 deletions scripts/make_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from datetime import datetime, timezone

from whoosh import fields, index
from whoosh.compat import u

if len(sys.argv) < 2:
print("USAGE: make_checkpoint.py <dir>")
Expand All @@ -28,7 +27,7 @@
ngrams=fields.NGRAMWORDS,
)

words = u(
words = (
"alfa bravo charlie delta echo foxtrot golf hotel india"
"juliet kilo lima mike november oskar papa quebec romeo"
"sierra tango"
Expand All @@ -44,7 +43,7 @@
with ix.writer() as w:
for num in range(100):
frac += 0.15
path = u(f"{segnum}/{num}")
path = f"{segnum}/{num}"
title = " ".join(random.choice(words) for _ in range(100))
dt = datetime(
year=2000 + counter,
Expand Down
3 changes: 1 addition & 2 deletions scripts/read_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
import sys

from whoosh import index, query
from whoosh.compat import u

if len(sys.argv) < 2:
print("USAGE: read_checkpoint.py <dir>")
sys.exit(1)
indexdir = sys.argv[1]
print("Reading checkpoint index in", indexdir)

words = u(
words = (
"alfa bravo charlie delta echo foxtrot golf hotel india"
"juliet kilo lima mike november oskar papa quebec romeo"
"sierra tango"
Expand Down
8 changes: 2 additions & 6 deletions src/whoosh/analysis/acore.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.

from whoosh.compat import iteritems

# Exceptions


Expand Down Expand Up @@ -126,9 +124,7 @@ def __init__(
self.__dict__.update(kwargs)

def __repr__(self):
parms = ", ".join(
f"{name}={value!r}" for name, value in iteritems(self.__dict__)
)
parms = ", ".join(f"{name}={value!r}" for name, value in self.__dict__.items())
return f"{self.__class__.__name__}({parms})"

def copy(self):
Expand All @@ -153,7 +149,7 @@ def __repr__(self):
attrs = ""
if self.__dict__:
attrs = ", ".join(
f"{key}={value!r}" for key, value in iteritems(self.__dict__)
f"{key}={value!r}" for key, value in self.__dict__.items()
)
return self.__class__.__name__ + f"({attrs})"

Expand Down
1 change: 0 additions & 1 deletion src/whoosh/analysis/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from itertools import chain

from whoosh.analysis.acore import Composable
from whoosh.compat import next
from whoosh.util.text import rcompile

# Default list of stop words (words so common it's usually wasteful to index
Expand Down
21 changes: 10 additions & 11 deletions src/whoosh/analysis/intraword.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from collections import deque

from whoosh.analysis.filters import Filter
from whoosh.compat import text_type, u


class CompoundWordFilter(Filter):
Expand Down Expand Up @@ -279,7 +278,7 @@ class IntraWordFilter(Filter):
is_morph = True

__inittypes__ = {
"delims": text_type,
"delims": str,
"splitwords": bool,
"splitnums": bool,
"mergewords": bool,
Expand All @@ -288,7 +287,7 @@ class IntraWordFilter(Filter):

def __init__(
self,
delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
delims="-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+",
splitwords=True,
splitnums=True,
mergewords=False,
Expand All @@ -311,22 +310,22 @@ def __init__(
self.delims = re.escape(delims)

# Expression for text between delimiter characters
self.between = re.compile(u("[^%s]+") % (self.delims,), re.UNICODE)
self.between = re.compile(f"[^{self.delims}]+", re.UNICODE)
# Expression for removing "'s" from the end of sub-words
dispat = u("(?<=[%s%s])'[Ss](?=$|[%s])") % (lowercase, uppercase, self.delims)
dispat = f"(?<=[{lowercase}{uppercase}])'[Ss](?=$|[{self.delims}])"
self.possessive = re.compile(dispat, re.UNICODE)

# Expression for finding case and letter-number transitions
lower2upper = u("[%s][%s]") % (lowercase, uppercase)
letter2digit = u("[%s%s][%s]") % (lowercase, uppercase, digits)
digit2letter = u("[%s][%s%s]") % (digits, lowercase, uppercase)
lower2upper = f"[{lowercase}][{uppercase}]"
letter2digit = f"[{lowercase}{uppercase}][{digits}]"
digit2letter = f"[{digits}][{lowercase}{uppercase}]"
if splitwords and splitnums:
splitpat = u("(%s|%s|%s)") % (lower2upper, letter2digit, digit2letter)
splitpat = f"({lower2upper}|{letter2digit}|{digit2letter})"
self.boundary = re.compile(splitpat, re.UNICODE)
elif splitwords:
self.boundary = re.compile(text_type(lower2upper), re.UNICODE)
self.boundary = re.compile(str(lower2upper), re.UNICODE)
elif splitnums:
numpat = u("(%s|%s)") % (letter2digit, digit2letter)
numpat = f"({letter2digit}|{digit2letter})"
self.boundary = re.compile(numpat, re.UNICODE)

self.splitting = splitwords or splitnums
Expand Down
3 changes: 1 addition & 2 deletions src/whoosh/analysis/morph.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
# policies, either expressed or implied, of Matt Chaput.

from whoosh.analysis.filters import Filter
from whoosh.compat import integer_types
from whoosh.lang.dmetaphone import double_metaphone
from whoosh.lang.porter import stem
from whoosh.util.cache import lfu_cache, unbound_cache
Expand Down Expand Up @@ -120,7 +119,7 @@ def clear(self):
else:
stemfn = self.stemfn

if isinstance(self.cachesize, integer_types) and self.cachesize != 0:
if isinstance(self.cachesize, int) and self.cachesize != 0:
if self.cachesize < 0:
self._stem = unbound_cache(stemfn)
elif self.cachesize > 1:
Expand Down
3 changes: 1 addition & 2 deletions src/whoosh/analysis/ngrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from whoosh.analysis.acore import Token
from whoosh.analysis.filters import Filter, LowercaseFilter
from whoosh.analysis.tokenizers import RegexTokenizer, Tokenizer
from whoosh.compat import text_type

# Tokenizer

Expand Down Expand Up @@ -79,7 +78,7 @@ def __call__(
mode="",
**kwargs,
):
assert isinstance(value, text_type), f"{value!r} is not unicode"
assert isinstance(value, str), f"{value!r} is not unicode"

inlen = len(value)
t = Token(positions, chars, removestops=removestops, mode=mode)
Expand Down
15 changes: 7 additions & 8 deletions src/whoosh/analysis/tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
# policies, either expressed or implied, of Matt Chaput.

from whoosh.analysis.acore import Composable, Token
from whoosh.compat import text_type, u
from whoosh.util.text import rcompile

default_pattern = rcompile(r"[\w\*]+(\.?[\w\*]+)*")
Expand Down Expand Up @@ -63,7 +62,7 @@ def __call__(
mode="",
**kwargs,
):
assert isinstance(value, text_type), f"{value!r} is not unicode"
assert isinstance(value, str), f"{value!r} is not unicode"
t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
t.text = value
t.boost = 1.0
Expand All @@ -82,7 +81,7 @@ class RegexTokenizer(Tokenizer):
Uses a regular expression to extract tokens from text.

>>> rex = RegexTokenizer()
>>> [token.text for token in rex(u("hi there 3.141 big-time under_score"))]
>>> [token.text for token in rex("hi there 3.141 big-time under_score")]
["hi", "there", "3.141", "big", "time", "under_score"]
"""

Expand Down Expand Up @@ -131,7 +130,7 @@ def __call__(
:param tokenize: if True, the text should be tokenized.
"""

assert isinstance(value, text_type), f"{repr(value)} is not unicode"
assert isinstance(value, str), f"{repr(value)} is not unicode"

t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
if not tokenize:
Expand Down Expand Up @@ -264,7 +263,7 @@ def __call__(
:param tokenize: if True, the text should be tokenized.
"""

assert isinstance(value, text_type), f"{value!r} is not unicode"
assert isinstance(value, str), f"{value!r} is not unicode"

t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
if not tokenize:
Expand All @@ -277,7 +276,7 @@ def __call__(
t.endchar = start_char + len(value)
yield t
else:
text = u("")
text = ""
charmap = self.charmap
pos = start_pos
startchar = currentchar = start_char
Expand All @@ -299,7 +298,7 @@ def __call__(
t.endchar = currentchar
yield t
startchar = currentchar + 1
text = u("")
text = ""

currentchar += 1

Expand Down Expand Up @@ -352,7 +351,7 @@ def __init__(self, expression="[^/]+"):
self.expr = rcompile(expression)

def __call__(self, value, positions=False, start_pos=0, **kwargs):
assert isinstance(value, text_type), f"{value!r} is not unicode"
assert isinstance(value, str), f"{value!r} is not unicode"
token = Token(positions, **kwargs)
pos = start_pos
for match in self.expr.finditer(value):
Expand Down
Loading