diff --git a/benchmark/enron.py b/benchmark/enron.py
index 175c0ef6..38504221 100644
--- a/benchmark/enron.py
+++ b/benchmark/enron.py
@@ -2,6 +2,7 @@
import tarfile
from email import message_from_string
from marshal import dump, load
+from urllib.request import urlretrieve
from zlib import compress, decompress
try:
@@ -10,7 +11,6 @@
pass
from whoosh import analysis, fields
-from whoosh.compat import next, urlretrieve
from whoosh.support.bench import Bench, Spec
from whoosh.util import now
diff --git a/scripts/make_checkpoint.py b/scripts/make_checkpoint.py
index d690826b..9c1b818b 100644
--- a/scripts/make_checkpoint.py
+++ b/scripts/make_checkpoint.py
@@ -10,7 +10,6 @@
from datetime import datetime, timezone
from whoosh import fields, index
-from whoosh.compat import u
if len(sys.argv) < 2:
print("USAGE: make_checkpoint.py
")
@@ -28,7 +27,7 @@
ngrams=fields.NGRAMWORDS,
)
-words = u(
+words = (
"alfa bravo charlie delta echo foxtrot golf hotel india"
"juliet kilo lima mike november oskar papa quebec romeo"
"sierra tango"
@@ -44,7 +43,7 @@
with ix.writer() as w:
for num in range(100):
frac += 0.15
- path = u(f"{segnum}/{num}")
+ path = f"{segnum}/{num}"
title = " ".join(random.choice(words) for _ in range(100))
dt = datetime(
year=2000 + counter,
diff --git a/scripts/read_checkpoint.py b/scripts/read_checkpoint.py
index c6947f0c..d8a9d77c 100644
--- a/scripts/read_checkpoint.py
+++ b/scripts/read_checkpoint.py
@@ -6,7 +6,6 @@
import sys
from whoosh import index, query
-from whoosh.compat import u
if len(sys.argv) < 2:
print("USAGE: read_checkpoint.py ")
@@ -14,7 +13,7 @@
indexdir = sys.argv[1]
print("Reading checkpoint index in", indexdir)
-words = u(
+words = (
"alfa bravo charlie delta echo foxtrot golf hotel india"
"juliet kilo lima mike november oskar papa quebec romeo"
"sierra tango"
diff --git a/src/whoosh/analysis/acore.py b/src/whoosh/analysis/acore.py
index 74bf926a..318f1129 100644
--- a/src/whoosh/analysis/acore.py
+++ b/src/whoosh/analysis/acore.py
@@ -25,8 +25,6 @@
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
-from whoosh.compat import iteritems
-
# Exceptions
@@ -126,9 +124,7 @@ def __init__(
self.__dict__.update(kwargs)
def __repr__(self):
- parms = ", ".join(
- f"{name}={value!r}" for name, value in iteritems(self.__dict__)
- )
+ parms = ", ".join(f"{name}={value!r}" for name, value in self.__dict__.items())
return f"{self.__class__.__name__}({parms})"
def copy(self):
@@ -153,7 +149,7 @@ def __repr__(self):
attrs = ""
if self.__dict__:
attrs = ", ".join(
- f"{key}={value!r}" for key, value in iteritems(self.__dict__)
+ f"{key}={value!r}" for key, value in self.__dict__.items()
)
return self.__class__.__name__ + f"({attrs})"
diff --git a/src/whoosh/analysis/filters.py b/src/whoosh/analysis/filters.py
index 8a295cbf..1fabefa8 100644
--- a/src/whoosh/analysis/filters.py
+++ b/src/whoosh/analysis/filters.py
@@ -28,7 +28,6 @@
from itertools import chain
from whoosh.analysis.acore import Composable
-from whoosh.compat import next
from whoosh.util.text import rcompile
# Default list of stop words (words so common it's usually wasteful to index
diff --git a/src/whoosh/analysis/intraword.py b/src/whoosh/analysis/intraword.py
index 61663edf..ae22e58b 100644
--- a/src/whoosh/analysis/intraword.py
+++ b/src/whoosh/analysis/intraword.py
@@ -29,7 +29,6 @@
from collections import deque
from whoosh.analysis.filters import Filter
-from whoosh.compat import text_type, u
class CompoundWordFilter(Filter):
@@ -279,7 +278,7 @@ class IntraWordFilter(Filter):
is_morph = True
__inittypes__ = {
- "delims": text_type,
+ "delims": str,
"splitwords": bool,
"splitnums": bool,
"mergewords": bool,
@@ -288,7 +287,7 @@ class IntraWordFilter(Filter):
def __init__(
self,
- delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
+ delims="-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+",
splitwords=True,
splitnums=True,
mergewords=False,
@@ -311,22 +310,22 @@ def __init__(
self.delims = re.escape(delims)
# Expression for text between delimiter characters
- self.between = re.compile(u("[^%s]+") % (self.delims,), re.UNICODE)
+ self.between = re.compile(f"[^{self.delims}]+", re.UNICODE)
# Expression for removing "'s" from the end of sub-words
- dispat = u("(?<=[%s%s])'[Ss](?=$|[%s])") % (lowercase, uppercase, self.delims)
+ dispat = f"(?<=[{lowercase}{uppercase}])'[Ss](?=$|[{self.delims}])"
self.possessive = re.compile(dispat, re.UNICODE)
# Expression for finding case and letter-number transitions
- lower2upper = u("[%s][%s]") % (lowercase, uppercase)
- letter2digit = u("[%s%s][%s]") % (lowercase, uppercase, digits)
- digit2letter = u("[%s][%s%s]") % (digits, lowercase, uppercase)
+ lower2upper = f"[{lowercase}][{uppercase}]"
+ letter2digit = f"[{lowercase}{uppercase}][{digits}]"
+ digit2letter = f"[{digits}][{lowercase}{uppercase}]"
if splitwords and splitnums:
- splitpat = u("(%s|%s|%s)") % (lower2upper, letter2digit, digit2letter)
+ splitpat = f"({lower2upper}|{letter2digit}|{digit2letter})"
self.boundary = re.compile(splitpat, re.UNICODE)
elif splitwords:
- self.boundary = re.compile(text_type(lower2upper), re.UNICODE)
+ self.boundary = re.compile(str(lower2upper), re.UNICODE)
elif splitnums:
- numpat = u("(%s|%s)") % (letter2digit, digit2letter)
+ numpat = f"({letter2digit}|{digit2letter})"
self.boundary = re.compile(numpat, re.UNICODE)
self.splitting = splitwords or splitnums
diff --git a/src/whoosh/analysis/morph.py b/src/whoosh/analysis/morph.py
index 982e5fa4..addbfad6 100644
--- a/src/whoosh/analysis/morph.py
+++ b/src/whoosh/analysis/morph.py
@@ -26,7 +26,6 @@
# policies, either expressed or implied, of Matt Chaput.
from whoosh.analysis.filters import Filter
-from whoosh.compat import integer_types
from whoosh.lang.dmetaphone import double_metaphone
from whoosh.lang.porter import stem
from whoosh.util.cache import lfu_cache, unbound_cache
@@ -120,7 +119,7 @@ def clear(self):
else:
stemfn = self.stemfn
- if isinstance(self.cachesize, integer_types) and self.cachesize != 0:
+ if isinstance(self.cachesize, int) and self.cachesize != 0:
if self.cachesize < 0:
self._stem = unbound_cache(stemfn)
elif self.cachesize > 1:
diff --git a/src/whoosh/analysis/ngrams.py b/src/whoosh/analysis/ngrams.py
index 423b4aa5..a42fc37e 100644
--- a/src/whoosh/analysis/ngrams.py
+++ b/src/whoosh/analysis/ngrams.py
@@ -28,7 +28,6 @@
from whoosh.analysis.acore import Token
from whoosh.analysis.filters import Filter, LowercaseFilter
from whoosh.analysis.tokenizers import RegexTokenizer, Tokenizer
-from whoosh.compat import text_type
# Tokenizer
@@ -79,7 +78,7 @@ def __call__(
mode="",
**kwargs,
):
- assert isinstance(value, text_type), f"{value!r} is not unicode"
+ assert isinstance(value, str), f"{value!r} is not unicode"
inlen = len(value)
t = Token(positions, chars, removestops=removestops, mode=mode)
diff --git a/src/whoosh/analysis/tokenizers.py b/src/whoosh/analysis/tokenizers.py
index 58461b0c..449576ab 100644
--- a/src/whoosh/analysis/tokenizers.py
+++ b/src/whoosh/analysis/tokenizers.py
@@ -26,7 +26,6 @@
# policies, either expressed or implied, of Matt Chaput.
from whoosh.analysis.acore import Composable, Token
-from whoosh.compat import text_type, u
from whoosh.util.text import rcompile
default_pattern = rcompile(r"[\w\*]+(\.?[\w\*]+)*")
@@ -63,7 +62,7 @@ def __call__(
mode="",
**kwargs,
):
- assert isinstance(value, text_type), f"{value!r} is not unicode"
+ assert isinstance(value, str), f"{value!r} is not unicode"
t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
t.text = value
t.boost = 1.0
@@ -82,7 +81,7 @@ class RegexTokenizer(Tokenizer):
Uses a regular expression to extract tokens from text.
>>> rex = RegexTokenizer()
- >>> [token.text for token in rex(u("hi there 3.141 big-time under_score"))]
+ >>> [token.text for token in rex("hi there 3.141 big-time under_score")]
["hi", "there", "3.141", "big", "time", "under_score"]
"""
@@ -131,7 +130,7 @@ def __call__(
:param tokenize: if True, the text should be tokenized.
"""
- assert isinstance(value, text_type), f"{repr(value)} is not unicode"
+ assert isinstance(value, str), f"{repr(value)} is not unicode"
t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
if not tokenize:
@@ -264,7 +263,7 @@ def __call__(
:param tokenize: if True, the text should be tokenized.
"""
- assert isinstance(value, text_type), f"{value!r} is not unicode"
+ assert isinstance(value, str), f"{value!r} is not unicode"
t = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
if not tokenize:
@@ -277,7 +276,7 @@ def __call__(
t.endchar = start_char + len(value)
yield t
else:
- text = u("")
+ text = ""
charmap = self.charmap
pos = start_pos
startchar = currentchar = start_char
@@ -299,7 +298,7 @@ def __call__(
t.endchar = currentchar
yield t
startchar = currentchar + 1
- text = u("")
+ text = ""
currentchar += 1
@@ -352,7 +351,7 @@ def __init__(self, expression="[^/]+"):
self.expr = rcompile(expression)
def __call__(self, value, positions=False, start_pos=0, **kwargs):
- assert isinstance(value, text_type), f"{value!r} is not unicode"
+ assert isinstance(value, str), f"{value!r} is not unicode"
token = Token(positions, **kwargs)
pos = start_pos
for match in self.expr.finditer(value):
diff --git a/src/whoosh/automata/fsa.py b/src/whoosh/automata/fsa.py
index 06706c5e..024dc8c6 100644
--- a/src/whoosh/automata/fsa.py
+++ b/src/whoosh/automata/fsa.py
@@ -3,10 +3,7 @@
import sys
from bisect import bisect_left
-from whoosh.compat import iteritems, next, text_type, unichr
-
-unull = unichr(0)
-
+unull = chr(0)
# Marker constants
@@ -46,13 +43,13 @@ def __eq__(self, other):
def all_states(self):
stateset = set(self.transitions)
- for src, trans in iteritems(self.transitions):
+ for trans in self.transitions.values():
stateset.update(trans.values())
return stateset
def all_labels(self):
labels = set()
- for src, trans in iteritems(self.transitions):
+ for trans in self.transitions.values():
labels.update(trans)
return labels
@@ -134,8 +131,8 @@ def add_final_state(self, state):
self.final_states.add(state)
def triples(self):
- for src, trans in iteritems(self.transitions):
- for label, dests in iteritems(trans):
+ for src, trans in self.transitions.items():
+ for label, dests in trans.items():
for dest in dests:
yield src, label, dest
@@ -175,9 +172,9 @@ def get_labels(self, states):
def embed(self, other):
# Copy all transitions from the other NFA into this one
- for s, othertrans in iteritems(other.transitions):
+ for s, othertrans in other.transitions.items():
trans = self.transitions.setdefault(s, {})
- for label, otherdests in iteritems(othertrans):
+ for label, otherdests in othertrans.items():
dests = trans.setdefault(label, set())
dests.update(otherdests)
@@ -286,7 +283,7 @@ def find_next_edge(self, s, label, asbytes):
if label is None:
label = b"\x00" if asbytes else "\0"
else:
- label = (label + 1) if asbytes else unichr(ord(label) + 1)
+ label = (label + 1) if asbytes else chr(ord(label) + 1)
trans = self.transitions.get(s, {})
if label in trans or s in self.defaults:
return label
@@ -313,7 +310,7 @@ def reachable_from(self, src, inclusive=True):
while stack:
src = stack.pop()
seen.add(src)
- for _, dest in iteritems(transitions[src]):
+ for dest in transitions[src].values():
reached.add(dest)
if dest not in seen:
stack.append(dest)
@@ -381,9 +378,9 @@ def minimize(self):
# Apply mapping to existing transitions
new_finals = {mapping[s] for s in final_states}
- for state, d in iteritems(new_trans):
+ for state, d in new_trans.items():
trans = transitions[state]
- for label, dest in iteritems(trans):
+ for label, dest in trans.items():
d[label] = mapping[dest]
# Remove dead states - non-final states with no outgoing arcs except
@@ -426,12 +423,12 @@ def remap(state):
return newnum
newdfa = DFA(remap(dfa.initial))
- for src, trans in iteritems(dfa.transitions):
- for label, dest in iteritems(trans):
+ for src, trans in dfa.transitions.items():
+ for label, dest in trans.items():
newdfa.add_transition(remap(src), label, remap(dest))
for finalstate in dfa.final_states:
newdfa.add_final_state(remap(finalstate))
- for src, dest in iteritems(dfa.defaults):
+ for src, dest in dfa.defaults.items():
newdfa.set_default_transition(remap(src), remap(dest))
return newdfa
@@ -440,15 +437,15 @@ def u_to_utf8(dfa, base=0):
c = itertools.count(base)
transitions = dfa.transitions
- for src, trans in iteritems(transitions):
+ for src, trans in transitions.items():
trans = transitions[src]
- for label, dest in list(iteritems(trans)):
+ for label, dest in list(trans.items()):
if label is EPSILON:
continue
elif label is ANY:
raise ValueError
else:
- assert isinstance(label, text_type)
+ assert isinstance(label, str)
label8 = label.encode("utf8")
for i, byte in enumerate(label8):
if i < len(label8) - 1:
@@ -490,8 +487,8 @@ def find_all_matches(dfa, lookup_func, first=unull):
def reverse_nfa(n):
s = object()
nfa = NFA(s)
- for src, trans in iteritems(n.transitions):
- for label, destset in iteritems(trans):
+ for src, trans in n.transitions.items():
+ for label, destset in trans.items():
for dest in destset:
nfa.add_transition(dest, label, src)
for finalstate in n.final_states:
@@ -638,7 +635,7 @@ def __hash__(self):
return hash(self.tuple())
def tuple(self):
- arcs = tuple(sorted(iteritems(self.arcs)))
+ arcs = tuple(sorted(self.arcs.items()))
return arcs, self.final
@@ -703,5 +700,5 @@ def add_suffix(dfa, nodes, last, downto, seen):
parent.arcs[inlabel] = this
# Add the node's transitions to the DFA
- for label, dest in iteritems(node.arcs):
+ for label, dest in node.arcs.items():
dfa.add_transition(this, label, dest)
diff --git a/src/whoosh/automata/fst.py b/src/whoosh/automata/fst.py
index 907f36c2..0762b9ce 100644
--- a/src/whoosh/automata/fst.py
+++ b/src/whoosh/automata/fst.py
@@ -43,19 +43,8 @@
import sys
from array import array
from hashlib import sha1 # type: ignore @UnresolvedImport
+from io import BytesIO
-from whoosh.compat import (
- BytesIO,
- array_tobytes,
- b,
- bytes_type,
- iteritems,
- iterkeys,
- izip,
- range,
- text_type,
- u,
-)
from whoosh.filedb.structfile import StructFile
from whoosh.system import (
_INT_SIZE,
@@ -69,6 +58,14 @@
from whoosh.util.varints import varint
+def b(s):
+ return s.encode("latin-1")
+
+
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
class FileVersionError(Exception):
pass
@@ -266,7 +263,7 @@ class BytesValues(SequenceValues):
@staticmethod
def is_valid(v):
- return isinstance(v, bytes_type)
+ return isinstance(v, bytes)
@staticmethod
def write(dbfile, v):
@@ -315,7 +312,7 @@ def skip(self, dbfile):
@staticmethod
def to_bytes(v):
- return array_tobytes(v)
+ return v.tobytes()
class IntListValues(SequenceValues):
@@ -379,7 +376,7 @@ def __init__(self, owner, address, accept=False):
def __iter__(self):
if not self._edges:
self._load()
- return iterkeys(self._edges)
+ return self._edges.keys()
def __contains__(self, key):
if self._edges is None:
@@ -847,11 +844,11 @@ def digest(self):
if arc.target:
d.update(pack_long(arc.target))
else:
- d.update(b("z"))
+ d.update(b"z")
if arc.value:
d.update(vtype.to_bytes(arc.value))
if arc.accept:
- d.update(b("T"))
+ d.update(b"T")
self._digest = d.digest()
return self._digest
@@ -1011,7 +1008,7 @@ def __init__(self, dbfile, vtype=None, merge=None):
self.node_count = 0
self.fixed_count = 0
- dbfile.write(b("GRPH"))
+ dbfile.write(b"GRPH")
dbfile.write_int(self.version)
dbfile.write_uint(0)
@@ -1308,7 +1305,7 @@ def __init__(self, dbfile, rootname=None, vtype=None, filebase=0):
dbfile.seek(filebase)
magic = dbfile.read(4)
- if magic != b("GRPH"):
+ if magic != b"GRPH":
raise FileVersionError
self.version = dbfile.read_int()
dbfile.seek(dbfile.read_uint())
@@ -1458,13 +1455,13 @@ def to_labels(key):
# I hate the Python 3 bytes object so friggin much
if keytype is tuple or keytype is list:
- if not all(isinstance(e, bytes_type) for e in key):
+ if not all(isinstance(e, bytes) for e in key):
raise TypeError(f"{key!r} contains a non-bytestring")
if keytype is list:
key = tuple(key)
- elif isinstance(key, bytes_type):
+ elif isinstance(key, bytes):
key = tuple(key[i : i + 1] for i in range(len(key)))
- elif isinstance(key, text_type):
+ elif isinstance(key, str):
key = tuple(utf8encode(key[i : i + 1])[0] for i in range(len(key)))
else:
raise TypeError(f"Don't know how to convert {key!r}")
@@ -1528,8 +1525,7 @@ def within(graph, text, k=1, prefix=0, address=None):
arcs = graph.arc_dict(address)
# Insertions
stack.extend(
- (arc.target, k, i, sofar + char, arc.accept)
- for char, arc in iteritems(arcs)
+ (arc.target, k, i, sofar + char, arc.accept) for char, arc in arcs.items()
)
# Deletion, replacement, and transpo only work before the end
@@ -1540,7 +1536,7 @@ def within(graph, text, k=1, prefix=0, address=None):
# Deletion
stack.append((address, k, i + 1, sofar, False))
# Replacement
- for char2, arc in iteritems(arcs):
+ for char2, arc in arcs.items():
if char2 != char:
stack.append((arc.target, k, i + 1, sofar + char2, arc.accept))
# Transposition
diff --git a/src/whoosh/classify.py b/src/whoosh/classify.py
index 4678ac0b..eff94eec 100644
--- a/src/whoosh/classify.py
+++ b/src/whoosh/classify.py
@@ -34,8 +34,6 @@
from collections import defaultdict
from math import log
-from whoosh.compat import iteritems
-
# Expansion models
@@ -186,7 +184,7 @@ def expanded_terms(self, number, normalize=True):
if not self.topN_weight:
return []
- for word, weight in iteritems(self.topN_weight):
+ for word, weight in self.topN_weight.items():
btext = field.to_bytes(word)
if (fieldname, btext) in ixreader:
cf = ixreader.frequency(fieldname, btext)
@@ -212,7 +210,7 @@ def shingles(input, size=2):
d = defaultdict(int)
for shingle in (input[i : i + size] for i in range(len(input) - (size - 1))):
d[shingle] += 1
- return iteritems(d)
+ return d.items()
def simhash(features, hashbits=32):
diff --git a/src/whoosh/codec/base.py b/src/whoosh/codec/base.py
index 23fbb594..e360ff52 100644
--- a/src/whoosh/codec/base.py
+++ b/src/whoosh/codec/base.py
@@ -29,11 +29,11 @@
This module contains base classes/interfaces for "codec" objects.
"""
+from abc import abstractmethod
from bisect import bisect_right
from whoosh import columns
from whoosh.automata import lev
-from whoosh.compat import abstractmethod, izip, unichr
from whoosh.filedb.compound import CompoundStorage
from whoosh.system import emptybytes
from whoosh.util import random_name
@@ -367,7 +367,7 @@ def levenshtein_dfa(uterm, maxdist, prefix=0):
@staticmethod
def find_matches(dfa, cur):
- unull = unichr(0)
+ unull = chr(0)
term = cur.text()
if term is None:
@@ -769,12 +769,12 @@ def is_deleted(self, docnum):
return self._readers[x].is_deleted(y)
def deleted_docs(self):
- for r, offset in izip(self._readers, self._doc_offsets):
+ for r, offset in zip(self._readers, self._doc_offsets):
for docnum in r.deleted_docs():
yield docnum + offset
def all_doc_ids(self):
- for r, offset in izip(self._readers, self._doc_offsets):
+ for r, offset in zip(self._readers, self._doc_offsets):
for docnum in r.all_doc_ids():
yield docnum + offset
diff --git a/src/whoosh/codec/plaintext.py b/src/whoosh/codec/plaintext.py
index 95b54b5d..1e58ecb1 100644
--- a/src/whoosh/codec/plaintext.py
+++ b/src/whoosh/codec/plaintext.py
@@ -26,29 +26,13 @@
# policies, either expressed or implied, of Matt Chaput.
from ast import literal_eval
+from pickle import dumps, loads
from whoosh.codec import base
-from whoosh.compat import (
- PY3,
- b,
- bytes_type,
- dumps,
- integer_types,
- iteritems,
- loads,
- range,
- text_type,
-)
from whoosh.matching import ListMatcher
from whoosh.reading import TermInfo, TermNotFound
-if not PY3:
-
- class memoryview:
- pass
-
-
-_reprable = (bytes_type, text_type, integer_types, float)
+_reprable = (bytes, str, int, float)
# Mixin classes for producing and consuming the simple text format
@@ -56,15 +40,15 @@ class memoryview:
class LineWriter:
def _print_line(self, indent, command, **kwargs):
- self._dbfile.write(b(" ") * indent)
+ self._dbfile.write(b" " * indent)
self._dbfile.write(command.encode("latin1"))
- for k, v in iteritems(kwargs):
+ for k, v in kwargs.items():
if isinstance(v, memoryview):
v = bytes(v)
if v is not None and not isinstance(v, _reprable):
raise TypeError(type(v))
self._dbfile.write(f"\t{k}={v!r}".encode("latin1"))
- self._dbfile.write(b("\n"))
+ self._dbfile.write(b"\n")
class LineReader:
diff --git a/src/whoosh/codec/whoosh2.py b/src/whoosh/codec/whoosh2.py
index c146aff2..66042554 100644
--- a/src/whoosh/codec/whoosh2.py
+++ b/src/whoosh/codec/whoosh2.py
@@ -32,6 +32,7 @@
from collections import defaultdict
from decimal import Decimal
from hashlib import md5 # type: ignore @UnresolvedImport
+from pickle import dumps, loads
from struct import Struct
try:
@@ -41,20 +42,6 @@
from whoosh.automata.fst import GraphReader, GraphWriter
from whoosh.codec import base
-from whoosh.compat import (
- PY3,
- array_frombytes,
- array_tobytes,
- b,
- bytes_type,
- dumps,
- integer_types,
- iteritems,
- loads,
- range,
- string_type,
- text_type,
-)
from whoosh.filedb.filestore import Storage
from whoosh.matching import LeafMatcher, ListMatcher, ReadTooFar
from whoosh.reading import NoGraphError, TermInfo, TermNotFound
@@ -129,9 +116,9 @@ def __init__(self, dbfile, hashtype=2):
self.extras = {}
self.startoffset = dbfile.tell()
- dbfile.write(b("HASH")) # Magic tag
+ dbfile.write(b"HASH") # Magic tag
dbfile.write_byte(self.hashtype) # Identify hashing function used
- dbfile.write(b("\x00\x00\x00")) # Unused bytes
+ dbfile.write(b"\x00\x00\x00") # Unused bytes
dbfile.write_long(0) # Pointer to end of hashes
self.header_size = 16 + 256 * header_entry_size
@@ -144,8 +131,8 @@ def __init__(self, dbfile, hashtype=2):
self.hashes = defaultdict(list)
def add(self, key, value):
- assert isinstance(key, bytes_type)
- assert isinstance(value, bytes_type)
+ assert isinstance(key, bytes)
+ assert isinstance(value, bytes)
dbfile = self.dbfile
pos = dbfile.tell()
@@ -226,7 +213,7 @@ def __init__(self, dbfile, startoffset=0):
dbfile.seek(startoffset)
# Check magic tag
magic = dbfile.read(4)
- if magic != b("HASH"):
+ if magic != b"HASH":
raise ValueError(f"Unknown file header {magic}")
self.hashtype = dbfile.read_byte() # Hash function type
@@ -335,7 +322,7 @@ def _key_at(self, pos):
def ranges_for_key(self, key):
read = self.read
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key} should be bytes")
keyhash = self.hash_func(key)
hpos, hslots = self._hashtable_info(keyhash)
@@ -422,7 +409,7 @@ def _closest_key(self, key):
lo = 0
hi = self.indexlen
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key} should be bytes")
while lo < hi:
mid = (lo + hi) // 2
@@ -986,7 +973,7 @@ def _read_extras(self):
dbfile.seek(self.indexbase + self.length * _LONG_SIZE)
self.fieldmap = dbfile.read_pickle()
self.names = [None] * len(self.fieldmap)
- for name, num in iteritems(self.fieldmap):
+ for name, num in self.fieldmap.items():
self.names[num] = name
def _closest_key(self, key):
@@ -995,7 +982,7 @@ def _closest_key(self, key):
indexbase = self.indexbase
lo = 0
hi = self.length
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key!r} should be bytes")
while lo < hi:
mid = (lo + hi) // 2
@@ -1101,7 +1088,7 @@ def matcher(self, fieldname, text, format_, scorer=None):
raise TermNotFound(f"No term {fieldname}:{text!r}")
p = terminfo.postings
- if isinstance(p, integer_types):
+ if isinstance(p, int):
# terminfo.postings is an offset into the posting file
pr = W2LeafMatcher(pf, p, format_, scorer=scorer, term=term)
else:
@@ -1116,20 +1103,20 @@ def keycoder(self, key):
return pack_ushort(fnum) + tbytes
def keydecoder(self, v):
- assert isinstance(v, bytes_type)
+ assert isinstance(v, bytes)
return (self.names[unpack_ushort(v[:2])[0]], v[2:])
def valuedecoder(self, v):
- assert isinstance(v, bytes_type)
+ assert isinstance(v, bytes)
return FileTermInfo.from_string(v)
def frequency(self, fieldname, btext):
- assert isinstance(btext, bytes_type)
+ assert isinstance(btext, bytes)
datapos = self.range_for_key((fieldname, btext))[0]
return FileTermInfo.read_weight(self.dbfile, datapos)
def doc_frequency(self, fieldname, btext):
- assert isinstance(btext, bytes_type)
+ assert isinstance(btext, bytes)
datapos = self.range_for_key((fieldname, btext))[0]
return FileTermInfo.read_doc_freq(self.dbfile, datapos)
@@ -1240,7 +1227,7 @@ def stored_fields(self, docnum):
class ByteLengthsBase:
- magic = b("~LN1")
+ magic = b"~LN1"
def __init__(self):
self.starts = {}
@@ -1320,7 +1307,7 @@ def to_file(self, dbfile, doccount):
def from_file(cls, dbfile, doccount=None):
obj = cls()
obj._read_header(dbfile, doccount)
- for fieldname, start in iteritems(obj.starts):
+ for fieldname, start in obj.starts.items():
obj.lengths[fieldname] = dbfile.get_array(start, "B", obj._count)
dbfile.close()
return obj
@@ -1450,7 +1437,7 @@ def add(self, vdict):
name_map = self.name_map
vlist = [None] * len(names)
- for k, v in iteritems(vdict):
+ for k, v in vdict.items():
if k in name_map:
vlist[name_map[k]] = v
else:
@@ -1497,7 +1484,7 @@ def __init__(self, dbfile):
# Previous versions stored the list of names as a map of names to
# positions... it seemed to make sense at the time...
self.names = [None] * len(nameobj)
- for name, pos in iteritems(nameobj):
+ for name, pos in nameobj.items():
self.names[pos] = name
else:
self.names = nameobj
@@ -1518,7 +1505,7 @@ def __iter__(self):
dbfile.seek(self.basepos)
for length in lengths:
- vlist = loads(dbfile.read(length) + b("."))
+ vlist = loads(dbfile.read(length) + b".")
vdict = {
names[i]: vlist[i] for i in range(len(vlist)) if vlist[i] is not None
}
@@ -1538,7 +1525,7 @@ def __getitem__(self, num):
)
position, length = unpack_stored_pointer(ptr)
dbfile.seek(position)
- vlist = loads(dbfile.read(length) + b("."))
+ vlist = loads(dbfile.read(length) + b".")
names = self.names
# Recreate a dictionary by putting the field names and values back
@@ -1562,9 +1549,9 @@ def __init__(self, indexname, doccount=0, segid=None, deleted=None):
deleted documents exist in this segment.
"""
- assert isinstance(indexname, string_type)
+ assert isinstance(indexname, str)
self.indexname = indexname
- assert isinstance(doccount, integer_types)
+ assert isinstance(doccount, int)
self.doccount = doccount
self.segid = self._random_id() if segid is None else segid
self.deleted = deleted
@@ -1614,7 +1601,7 @@ def deleted_docs(self):
class W2Block:
- magic = b("Blk3")
+ magic = b"Blk3"
infokeys = (
"count",
@@ -1828,9 +1815,9 @@ def to_string(self):
@classmethod
def from_string(cls, s):
- assert isinstance(s, bytes_type)
+ assert isinstance(s, bytes)
- if isinstance(s, string_type):
+ if isinstance(s, str):
hbyte = ord(s[0]) # Python 2.x - str
else:
hbyte = s[0] # Python 3 - bytes
@@ -1846,10 +1833,10 @@ def from_string(cls, s):
if hbyte == 0:
p = unpack_long(pstr)[0]
else:
- p = loads(pstr + b("."))
+ p = loads(pstr + b".")
else:
# Old format was encoded as a variable length pickled tuple
- v = loads(s + b("."))
+ v = loads(s + b".")
if len(v) == 1:
w = df = 1
p = v[0]
@@ -1912,7 +1899,7 @@ def minimize_ids(arry, stringids, compression=0):
arry = array(typecode, iter(arry))
if not IS_LITTLE:
arry.byteswap()
- string = array_tobytes(arry)
+ string = arry.tobytes()
if compression:
string = zlib.compress(string, compression)
return (typecode, string)
@@ -1925,7 +1912,7 @@ def deminimize_ids(typecode, count, string, compression=0):
return loads(string)
else:
arry = array(typecode)
- array_frombytes(arry, string)
+ arry.frombytes(string)
if not IS_LITTLE:
arry.byteswap()
return arry
@@ -1933,11 +1920,11 @@ def deminimize_ids(typecode, count, string, compression=0):
def minimize_weights(weights, compression=0):
if all(w == 1.0 for w in weights):
- string = b("")
+ string = b""
else:
if not IS_LITTLE:
weights.byteswap()
- string = array_tobytes(weights)
+ string = weights.tobytes()
if string and compression:
string = zlib.compress(string, compression)
return string
@@ -1949,7 +1936,7 @@ def deminimize_weights(count, string, compression=0):
if compression:
string = zlib.decompress(string)
arry = array("f")
- array_frombytes(arry, string)
+ arry.frombytes(string)
if not IS_LITTLE:
arry.byteswap()
return arry
@@ -1959,9 +1946,9 @@ def minimize_values(postingsize, values, compression=0):
if postingsize < 0:
string = dumps(values, -1)[2:]
elif postingsize == 0:
- string = b("")
+ string = b""
else:
- string = b("").join(values)
+ string = b"".join(values)
if string and compression:
string = zlib.compress(string, compression)
return string
@@ -1981,7 +1968,6 @@ def deminimize_values(postingsize, count, string, compression=0):
# Legacy field types
-from whoosh.compat import long_type
from whoosh.fields import NUMERIC
@@ -2012,7 +1998,7 @@ def __init__(
from whoosh import analysis, formats
self.type = type
- if self.type is long_type:
+ if self.type is int:
# This will catch the Python 3 int type
self._to_text = self._long_to_text
self._from_text = self._text_to_long
@@ -2045,7 +2031,7 @@ def __init__(
def __setstate__(self, d):
self.__dict__.update(d)
self.numtype = d["type"]
- self.bits = 32 if (d["type"] is int and not PY3) else 64
+ self.bits = 64
def prepare_number(self, x):
if x is None or x == emptybytes:
@@ -2064,7 +2050,7 @@ def unprepare_number(self, x):
return x
def to_bytes(self, x, shift=0):
- if isinstance(x, bytes_type):
+ if isinstance(x, bytes):
return x
return utf8encode(self.to_text(x, shift))[0]
@@ -2132,9 +2118,7 @@ def sortable_terms(self, ixreader, fieldname):
class OLD_DATETIME(OLD_NUMERIC):
def __init__(self, stored=False, unique=False):
- OLD_NUMERIC.__init__(
- self, type=long_type, stored=stored, unique=unique, shift_step=8
- )
+ OLD_NUMERIC.__init__(self, type=int, stored=stored, unique=unique, shift_step=8)
def to_text(self, x, shift=0):
from datetime import datetime
@@ -2142,13 +2126,13 @@ def to_text(self, x, shift=0):
from whoosh.util.times import floor
try:
- if isinstance(x, text_type):
+ if isinstance(x, str):
# For indexing, support same strings as for query parsing
x = self._parse_datestring(x)
x = floor(x) # this makes most sense (unspecified = lowest)
if isinstance(x, datetime):
x = datetime_to_long(x)
- elif not isinstance(x, integer_types):
+ elif not isinstance(x, int):
raise TypeError()
except ValueError:
raise ValueError(f"DATETIME.to_text can't convert from {x!r}")
@@ -2236,13 +2220,13 @@ def text_to_int(text, signed=True):
def long_to_text(x, shift=0, signed=True):
- x = to_sortable(long_type, 64, signed, x)
+ x = to_sortable(int, 64, signed, x)
return sortable_long_to_text(x, shift)
def text_to_long(text, signed=True):
x = text_to_sortable_long(text)
- x = from_sortable(long_type, 64, signed, x)
+ x = from_sortable(int, 64, signed, x)
return x
diff --git a/src/whoosh/codec/whoosh3.py b/src/whoosh/codec/whoosh3.py
index 16107445..96a06961 100644
--- a/src/whoosh/codec/whoosh3.py
+++ b/src/whoosh/codec/whoosh3.py
@@ -32,19 +32,10 @@
import struct
from array import array
from collections import defaultdict
+from pickle import dumps, loads
from whoosh import columns, formats
from whoosh.codec import base
-from whoosh.compat import (
- b,
- bytes_type,
- dumps,
- integer_types,
- iteritems,
- loads,
- range,
- string_type,
-)
from whoosh.filedb import compound, filetables
from whoosh.matching import LeafMatcher, ListMatcher, ReadTooFar
from whoosh.reading import TermInfo, TermNotFound
@@ -72,7 +63,7 @@
# This byte sequence is written at the start of a posting list to identify the
# codec/version
-WHOOSH3_HEADER_MAGIC = b("W3Bl")
+WHOOSH3_HEADER_MAGIC = b"W3Bl"
# Column type to store field length info
LENGTHS_COLUMN = columns.NumericColumn("B", default=0)
@@ -225,7 +216,7 @@ def _prep_vectors(self):
self._vpostfile = self._create_file(W3Codec.VPOSTS_EXT)
# We'll use offset==0 as a marker for "no vectors", so we can't start
# postings at position 0, so just write a few header bytes :)
- self._vpostfile.write(b("VPST"))
+ self._vpostfile.write(b"VPST")
def start_doc(self, docnum):
if self._indoc:
@@ -551,7 +542,7 @@ def first(self):
return self.next()
def find(self, term):
- if not isinstance(term, bytes_type):
+ if not isinstance(term, bytes):
term = self._fieldobj.to_bytes(term)
key = self._keycoder(self._fieldname, term)
self._pos = self._tindex.closest_key_pos(key)
@@ -596,11 +587,11 @@ def __init__(self, codec, dbfile, length, postfile):
self._postfile = postfile
self._fieldunmap = [None] * len(self._fieldmap)
- for fieldname, num in iteritems(self._fieldmap):
+ for fieldname, num in self._fieldmap.items():
self._fieldunmap[num] = fieldname
def _keycoder(self, fieldname, tbytes):
- assert isinstance(tbytes, bytes_type), f"tbytes={tbytes!r}"
+ assert isinstance(tbytes, bytes), f"tbytes={tbytes!r}"
fnum = self._fieldmap.get(fieldname, 65535)
return pack_ushort(fnum) + tbytes
@@ -730,12 +721,12 @@ def add_posting(self, id_, weight, vbytes, length=None):
# Check types
if self._byteids:
- assert isinstance(id_, string_type), f"id_={id_!r}"
+ assert isinstance(id_, str), f"id_={id_!r}"
else:
- assert isinstance(id_, integer_types), f"id_={id_!r}"
+ assert isinstance(id_, int), f"id_={id_!r}"
assert isinstance(weight, (int, float)), f"weight={weight!r}"
- assert isinstance(vbytes, bytes_type), f"vbytes={vbytes!r}"
- assert length is None or isinstance(length, integer_types)
+ assert isinstance(vbytes, bytes), f"vbytes={vbytes!r}"
+ assert length is None or isinstance(length, int)
self._ids.append(id_)
self._weights.append(weight)
@@ -1176,7 +1167,7 @@ def _read_values(self):
elif fixedsize == 0:
self._values = (None,) * self._blocklength
else:
- assert isinstance(vs, bytes_type)
+ assert isinstance(vs, bytes)
self._values = tuple(
vs[i : i + fixedsize] for i in range(0, len(vs), fixedsize)
)
diff --git a/src/whoosh/collectors.py b/src/whoosh/collectors.py
index 4d1d1ba1..cab75c10 100644
--- a/src/whoosh/collectors.py
+++ b/src/whoosh/collectors.py
@@ -77,13 +77,13 @@ def collect(self, sub_docnum):
import os
import threading
+from abc import abstractmethod
from array import array
from bisect import insort
from collections import defaultdict
from heapq import heapify, heappush, heapreplace
from whoosh import sorting
-from whoosh.compat import abstractmethod, iteritems, itervalues
from whoosh.searching import Results, TimeLimit
from whoosh.util import now
@@ -816,7 +816,7 @@ def set_subsearcher(self, subsearcher, offset):
WrappingCollector.set_subsearcher(self, subsearcher, offset)
# Tell each categorizer about the new subsearcher and offset
- for categorizer in itervalues(self.categorizers):
+ for categorizer in self.categorizers.values():
categorizer.set_searcher(self.child.subsearcher, self.child.offset)
def collect(self, sub_docnum):
@@ -828,7 +828,7 @@ def collect(self, sub_docnum):
sortkey = self.child.collect(sub_docnum)
# For each facet we're grouping by
- for name, categorizer in iteritems(self.categorizers):
+ for name, categorizer in self.categorizers.items():
add = self.facetmaps[name].add
# We have to do more work if the facet allows overlapping groups
diff --git a/src/whoosh/columns.py b/src/whoosh/columns.py
index e51392c8..475b8db8 100644
--- a/src/whoosh/columns.py
+++ b/src/whoosh/columns.py
@@ -51,13 +51,14 @@
import warnings
from array import array
from bisect import bisect_right
+from io import BytesIO
+from pickle import dumps, loads
try:
import zlib
except ImportError:
zlib = None
-from whoosh.compat import BytesIO, array_tobytes, b, bytes_type, dumps, loads
from whoosh.filedb.structfile import StructFile
from whoosh.idsets import BitSet, OnDiskBitSet
from whoosh.system import emptybytes
@@ -327,7 +328,7 @@ def __init__(self, fixedlen, default=None):
self._fixedlen = fixedlen
if default is None:
- default = b("\x00") * fixedlen
+ default = b"\x00" * fixedlen
elif len(default) != fixedlen:
raise ValueError
self._default = default
@@ -428,7 +429,7 @@ def __init__(self, fixedlen=0, default=None):
self._fixedlen = fixedlen
if default is None:
- default = b("\x00") * fixedlen if fixedlen else emptybytes
+ default = b"\x00" * fixedlen if fixedlen else emptybytes
elif fixedlen and len(default) != fixedlen:
raise ValueError
self._default = default
@@ -703,7 +704,7 @@ def finish(self, doccount):
bits = self._bitset.bits
if zlib and len(bits) <= self._compressat:
- compressed = zlib.compress(array_tobytes(bits), 3)
+ compressed = zlib.compress(bits.tobytes(), 3)
dbfile.write(compressed)
dbfile.write_byte(1)
else:
@@ -1253,7 +1254,7 @@ class Writer(WrappedColumnWriter):
def add(self, docnum, ls):
out = [varint(len(ls))]
for v in ls:
- assert isinstance(v, bytes_type)
+ assert isinstance(v, bytes)
out.append(varint(len(v)))
out.append(v)
self._child.add(docnum, emptybytes.join(out))
diff --git a/src/whoosh/compat.py b/src/whoosh/compat.py
deleted file mode 100644
index f098d036..00000000
--- a/src/whoosh/compat.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import array
-import sys
-
-# Run time aliasing of Python2/3 differences
-
-
-def htmlescape(s, quote=True):
- # this is html.escape reimplemented with cgi.escape,
- # so it works for python 2.x, 3.0 and 3.1
- import cgi
-
- s = cgi.escape(s, quote)
- if quote:
- # python 3.2 also replaces the single quotes:
- s = s.replace("'", "'")
- return s
-
-
-if sys.version_info[0] < 3:
- PY3 = False
-
- def b(s):
- return s
-
- import cStringIO as StringIO
-
- StringIO = BytesIO = StringIO.StringIO
- callable = callable
- integer_types = (int, long)
- iteritems = lambda o: o.iteritems()
- itervalues = lambda o: o.itervalues()
- iterkeys = lambda o: o.iterkeys()
- from itertools import izip
-
- long_type = long
- next = lambda o: o.next()
- # import cPickle as pickle
- from cPickle import dump, dumps, load, loads
-
- string_type = basestring
- text_type = unicode
- bytes_type = str
- unichr = unichr
- from urllib import urlretrieve
-
- import Queue as queue
-
- def byte(num):
- return chr(num)
-
- def u(s):
- return unicode(s, "unicode_escape")
-
- def with_metaclass(meta, base=object):
- class _WhooshBase(base):
- __metaclass__ = meta
-
- return _WhooshBase
-
- # range = range
- zip_ = zip
-
- def memoryview_(source, offset=None, length=None):
- if offset or length:
- return buffer(source, offset, length)
- else:
- return buffer(source)
-
-else:
- PY3 = True
- import collections
-
- def b(s):
- return s.encode("latin-1")
-
- import io
-
- BytesIO = io.BytesIO
- callable = lambda o: isinstance(o, collections.abc.Callable)
- exec_ = eval("exec")
- integer_types = (int,)
- iteritems = lambda o: o.items()
- itervalues = lambda o: o.values()
- iterkeys = lambda o: iter(o.keys())
- izip = zip
- long_type = int
- next = next
- import pickle
- from pickle import dump, dumps, load, loads
-
- StringIO = io.StringIO
- string_type = str
- text_type = str
- bytes_type = bytes
- unichr = chr
- import queue
- from urllib.request import urlretrieve
-
- def byte(num):
- return bytes((num,))
-
- def u(s):
- if isinstance(s, bytes):
- return s.decode("ascii")
- return s
-
- def with_metaclass(meta, base=object):
- ns = {"base": base, "meta": meta}
- exec_(
- """class _WhooshBase(base, metaclass=meta):
- pass""",
- ns,
- )
- return ns["_WhooshBase"]
-
- range = range
- zip_ = lambda *args: list(zip(*args))
-
- def memoryview_(source, offset=None, length=None):
- mv = memoryview(source)
- if offset or length:
- return mv[offset : offset + length]
- else:
- return mv
-
- try:
- # for python >= 3.2, avoid DeprecationWarning for cgi.escape
- from html import escape as htmlescape
- except ImportError:
- pass
-
-
-if hasattr(array.array, "tobytes"):
-
- def array_tobytes(arry):
- return arry.tobytes()
-
- def array_frombytes(arry, bs):
- return arry.frombytes(bs)
-
-else:
-
- def array_tobytes(arry):
- return arry.tostring()
-
- def array_frombytes(arry, bs):
- return arry.fromstring(bs)
-
-
-# Implementations missing from older versions of Python
-
-try:
- from itertools import permutations # @UnusedImport
-except ImportError:
- # Python 2.5
- def permutations(iterable, r=None):
- pool = tuple(iterable)
- n = len(pool)
- r = n if r is None else r
- if r > n:
- return
- indices = range(n)
- cycles = range(n, n - r, -1)
- yield tuple(pool[i] for i in indices[:r])
- while n:
- for i in reversed(range(r)):
- cycles[i] -= 1
- if cycles[i] == 0:
- indices[i:] = indices[i + 1 :] + indices[i : i + 1]
- cycles[i] = n - i
- else:
- j = cycles[i]
- indices[i], indices[-j] = indices[-j], indices[i]
- yield tuple(pool[i] for i in indices[:r])
- break
- else:
- return
-
-
-try:
- # Python 2.6-2.7
- from itertools import izip_longest # @UnusedImport
-except ImportError:
- try:
- # Python 3.0
- from itertools import zip_longest as izip_longest # @UnusedImport
- except ImportError:
- # Python 2.5
- from itertools import chain, izip, repeat
-
- def izip_longest(*args, **kwds):
- fillvalue = kwds.get("fillvalue")
-
- def sentinel(counter=([fillvalue] * (len(args) - 1)).pop):
- yield counter()
-
- fillers = repeat(fillvalue)
- iters = [chain(it, sentinel(), fillers) for it in args]
- try:
- yield from izip(*iters)
- except IndexError:
- pass
-
-
-try:
- from operator import methodcaller # @UnusedImport
-except ImportError:
- # Python 2.5
- def methodcaller(name, *args, **kwargs):
- def caller(obj):
- return getattr(obj, name)(*args, **kwargs)
-
- return caller
-
-
-try:
- from abc import abstractmethod # @UnusedImport
-except ImportError:
- # Python 2.5
- def abstractmethod(funcobj):
- """A decorator indicating abstract methods."""
- funcobj.__isabstractmethod__ = True
- return funcobj
diff --git a/src/whoosh/externalsort.py b/src/whoosh/externalsort.py
index 46fd39b8..255c59fa 100644
--- a/src/whoosh/externalsort.py
+++ b/src/whoosh/externalsort.py
@@ -33,8 +33,7 @@
import os
import tempfile
from heapq import heapify, heappop, heapreplace
-
-from whoosh.compat import dump, load
+from pickle import dump, load
## Python 3.2 had a bug that make marshal.load unusable
# if (hasattr(platform, "python_implementation")
diff --git a/src/whoosh/fields.py b/src/whoosh/fields.py
index ef585192..1ba30e45 100644
--- a/src/whoosh/fields.py
+++ b/src/whoosh/fields.py
@@ -38,7 +38,6 @@
from decimal import Decimal
from whoosh import analysis, columns, formats
-from whoosh.compat import bytes_type, itervalues, string_type, text_type, with_metaclass
from whoosh.system import emptybytes, pack_byte
from whoosh.util.numeric import NaN, from_sortable, to_sortable, typecode_max
from whoosh.util.text import utf8decode, utf8encode
@@ -171,7 +170,7 @@ def index(self, value, **kwargs):
"%s field %r cannot index without a format"
% (self.__class__.__name__, self)
)
- if not isinstance(value, (text_type, list, tuple)):
+ if not isinstance(value, (str, list, tuple)):
raise ValueError(f"{value!r} is not unicode or sequence")
assert isinstance(self.format, formats.Format)
@@ -218,7 +217,7 @@ def to_bytes(self, value):
if isinstance(value, (list, tuple)):
value = value[0]
- if not isinstance(value, bytes_type):
+ if not isinstance(value, bytes):
value = utf8encode(value)[0]
return value
@@ -704,7 +703,7 @@ def prepare_number(self, x):
return x
dc = self.decimal_places
- if dc and isinstance(x, (string_type, Decimal)):
+ if dc and isinstance(x, (str, Decimal)):
x = Decimal(x) * (10**dc)
elif isinstance(x, Decimal):
raise TypeError(
@@ -745,7 +744,7 @@ def to_bytes(self, x, shift=0):
# Try to avoid re-encoding; this sucks because on Python 2 we can't
# tell the difference between a string and encoded bytes, so we have
# to require the user use unicode when they mean string
- if isinstance(x, bytes_type):
+ if isinstance(x, bytes):
return x
if x == emptybytes or x is None:
@@ -843,7 +842,7 @@ def __init__(self, stored=False, unique=False, sortable=False):
def prepare_datetime(self, x):
from whoosh.util.times import floor
- if isinstance(x, text_type):
+ if isinstance(x, str):
# For indexing, support same strings as for query parsing --
# convert unicode to datetime object
x = self._parse_datestring(x)
@@ -851,13 +850,13 @@ def prepare_datetime(self, x):
if isinstance(x, datetime.datetime):
return datetime_to_long(x)
- elif isinstance(x, bytes_type):
+ elif isinstance(x, bytes):
return x
else:
raise Exception(f"{x!r} is not a datetime")
def to_column_value(self, x):
- if isinstance(x, bytes_type):
+ if isinstance(x, bytes):
raise Exception(f"{x!r} is not a datetime")
if isinstance(x, (list, tuple)):
x = x[0]
@@ -966,18 +965,18 @@ def _obj_to_bool(self, x):
# otherwise call bool() on the query value. This lets you pass objects
# as query values and do the right thing.
- if isinstance(x, string_type) and x.lower() in self.trues:
+ if isinstance(x, str) and x.lower() in self.trues:
x = True
- elif isinstance(x, string_type) and x.lower() in self.falses:
+ elif isinstance(x, str) and x.lower() in self.falses:
x = False
else:
x = bool(x)
return x
def to_bytes(self, x):
- if isinstance(x, bytes_type):
+ if isinstance(x, bytes):
return x
- elif isinstance(x, string_type):
+ elif isinstance(x, str):
x = x.lower() in self.trues
else:
x = bool(x)
@@ -985,7 +984,7 @@ def to_bytes(self, x):
return bs
def index(self, bit, **kwargs):
- if isinstance(bit, string_type):
+ if isinstance(bit, str):
bit = bit.lower() in self.trues
else:
bit = bool(bit)
@@ -1446,7 +1445,7 @@ def __getitem__(self, name):
return self._fields[name]
# Check if the name matches a dynamic field
- for expr, fieldtype in itervalues(self._dyn_fields):
+ for expr, fieldtype in self._dyn_fields.values():
if expr.match(name):
return fieldtype
@@ -1604,7 +1603,7 @@ def scorable_names(self):
return [name for name, field in self.items() if field.scorable]
-class SchemaClass(with_metaclass(MetaSchema, Schema)):
+class SchemaClass(Schema, metaclass=MetaSchema):
"""
Allows you to define a schema using declarative syntax, similar to
Django models::
diff --git a/src/whoosh/filedb/compound.py b/src/whoosh/filedb/compound.py
index 7f1e2793..26190152 100644
--- a/src/whoosh/filedb/compound.py
+++ b/src/whoosh/filedb/compound.py
@@ -28,6 +28,7 @@
import errno
import os
import sys
+from io import BytesIO
from shutil import copyfileobj
from threading import Lock
@@ -36,13 +37,20 @@
except ImportError:
mmap = None
-from whoosh.compat import BytesIO, memoryview_
from whoosh.filedb.filestore import FileStorage, StorageError
from whoosh.filedb.structfile import BufferFile, StructFile
from whoosh.system import emptybytes
from whoosh.util import random_name
+def memoryview_(source, offset=None, length=None):
+ mv = memoryview(source)
+ if offset or length:
+ return mv[offset : offset + length]
+ else:
+ return mv
+
+
class CompoundStorage(FileStorage):
readonly = True
diff --git a/src/whoosh/filedb/filestore.py b/src/whoosh/filedb/filestore.py
index d142e898..ba715c9f 100644
--- a/src/whoosh/filedb/filestore.py
+++ b/src/whoosh/filedb/filestore.py
@@ -30,14 +30,23 @@
import os
import sys
import tempfile
+from io import BytesIO
from threading import Lock
-from whoosh.compat import BytesIO, memoryview_
from whoosh.filedb.structfile import BufferFile, StructFile
from whoosh.index import _DEF_INDEX_NAME, EmptyIndexError
from whoosh.util import random_name
from whoosh.util.filelock import FileLock
+
+def memoryview_(source, offset=None, length=None):
+ mv = memoryview(source)
+ if offset or length:
+ return mv[offset : offset + length]
+ else:
+ return mv
+
+
# Exceptions
diff --git a/src/whoosh/filedb/filetables.py b/src/whoosh/filedb/filetables.py
index 46045012..59db4d6d 100644
--- a/src/whoosh/filedb/filetables.py
+++ b/src/whoosh/filedb/filetables.py
@@ -36,7 +36,6 @@
from binascii import crc32
from hashlib import md5 # type: ignore @UnresolvedImport
-from whoosh.compat import b, bytes_type
from whoosh.system import _INT_SIZE, emptybytes
from whoosh.util.numlists import GrowableArray
@@ -97,7 +96,7 @@ class HashWriter:
2 GB in length.
"""
- def __init__(self, dbfile, magic=b("HSH3"), hashtype=0):
+ def __init__(self, dbfile, magic=b"HSH3", hashtype=0):
"""
:param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object
to write to.
@@ -135,8 +134,8 @@ def add(self, key, value):
them using :meth:`HashReader.all`.
"""
- assert isinstance(key, bytes_type)
- assert isinstance(value, bytes_type)
+ assert isinstance(key, bytes)
+ assert isinstance(value, bytes)
dbfile = self.dbfile
pos = dbfile.tell()
@@ -224,7 +223,7 @@ class HashReader:
:class:`HashWriter`.
"""
- def __init__(self, dbfile, length=None, magic=b("HSH3"), startoffset=0):
+ def __init__(self, dbfile, length=None, magic=b"HSH3", startoffset=0):
"""
:param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object
to read from.
@@ -389,7 +388,7 @@ def ranges_for_key(self, key):
with the given key.
"""
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key!r} should be bytes")
dbfile = self.dbfile
@@ -544,7 +543,7 @@ def _read_extras(self):
def closest_key_pos(self, key):
# Given a key, return the position of that key OR the next highest key
# if the given key does not exist
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key!r} should be bytes")
indexbase = self.indexbase
@@ -674,7 +673,7 @@ def term_get(self, fieldname, btext, default=None):
def closest_term_pos(self, fieldname, key):
# Given a key, return the position of that key OR the next highest key
# if the given key does not exist
- if not isinstance(key, bytes_type):
+ if not isinstance(key, bytes):
raise TypeError(f"Key {key!r} should be bytes")
dbfile = self.dbfile
diff --git a/src/whoosh/filedb/gae.py b/src/whoosh/filedb/gae.py
index b2363c20..bfd1b80c 100644
--- a/src/whoosh/filedb/gae.py
+++ b/src/whoosh/filedb/gae.py
@@ -18,11 +18,11 @@
"""
import time
+from io import BytesIO
from google.appengine.api import memcache # type: ignore @UnresolvedImport
from google.appengine.ext import db # type: ignore @UnresolvedImport
-from whoosh.compat import BytesIO
from whoosh.filedb.filestore import ReadOnlyError, Storage
from whoosh.filedb.structfile import StructFile
from whoosh.index import _DEF_INDEX_NAME, TOC, FileIndex
diff --git a/src/whoosh/filedb/structfile.py b/src/whoosh/filedb/structfile.py
index 9db58ffd..dec60071 100644
--- a/src/whoosh/filedb/structfile.py
+++ b/src/whoosh/filedb/structfile.py
@@ -27,11 +27,10 @@
from array import array
from copy import copy
+from io import BytesIO
+from pickle import dump, load
from struct import calcsize
-from whoosh.compat import BytesIO, array_frombytes, array_tobytes, bytes_type
-from whoosh.compat import dump as dump_pickle
-from whoosh.compat import load as load_pickle
from whoosh.system import (
_FLOAT_SIZE,
_INT_SIZE,
@@ -247,11 +246,11 @@ def read_byte(self):
def write_pickle(self, obj, protocol=-1):
"""Writes a pickled representation of obj to the wrapped file."""
- dump_pickle(obj, self.file, protocol)
+ dump(obj, self.file, protocol)
def read_pickle(self):
"""Reads a pickled object from the wrapped file."""
- return load_pickle(self.file)
+ return load(self.file)
def write_sbyte(self, n):
self.write(pack_sbyte(n))
@@ -287,7 +286,7 @@ def write_array(self, arry):
if self.is_real:
arry.tofile(self.file)
else:
- self.write(array_tobytes(arry))
+ self.write(arry.tobytes())
def read_sbyte(self):
return unpack_sbyte(self.read(1))[0]
@@ -321,7 +320,7 @@ def read_array(self, typecode, length):
if self.is_real:
a.fromfile(self.file, length)
else:
- array_frombytes(a, self.read(length * _SIZEMAP[typecode]))
+ a.frombytes(self.read(length * _SIZEMAP[typecode]))
if IS_LITTLE:
a.byteswap()
return a
@@ -374,11 +373,11 @@ def subset(self, position, length, name=None):
return BufferFile(self.get(position, length), name=name)
def get(self, position, length):
- return bytes_type(self._buf[position : position + length])
+ return bytes(self._buf[position : position + length])
def get_array(self, position, typecode, length):
a = array(typecode)
- array_frombytes(a, self.get(position, length * _SIZEMAP[typecode]))
+ a.frombytes(self.get(position, length * _SIZEMAP[typecode]))
if IS_LITTLE:
a.byteswap()
return a
diff --git a/src/whoosh/formats.py b/src/whoosh/formats.py
index 7f9c39f2..4c7dce8f 100644
--- a/src/whoosh/formats.py
+++ b/src/whoosh/formats.py
@@ -32,9 +32,9 @@
"""
from collections import defaultdict
+from pickle import dumps, loads
from whoosh.analysis import entoken, unstopped
-from whoosh.compat import b, dumps, iteritems, loads
from whoosh.system import (
_FLOAT_SIZE,
_INT_SIZE,
@@ -199,9 +199,7 @@ def word_values(self, value, analyzer, **kwargs):
freqs[t.text] += 1
weights[t.text] += t.boost
- wvs = (
- (w, freq, weights[w] * fb, pack_uint(freq)) for w, freq in iteritems(freqs)
- )
+ wvs = ((w, freq, weights[w] * fb, pack_uint(freq)) for w, freq in freqs.items())
return wvs
def decode_frequency(self, valuestring):
@@ -233,7 +231,7 @@ def word_values(self, value, analyzer, **kwargs):
poses[t.text].append(t.pos)
weights[t.text] += t.boost
- for w, poslist in iteritems(poses):
+ for w, poslist in poses.items():
value = self.encode(poslist)
yield (w, len(poslist), weights[w] * fb, value)
@@ -246,8 +244,8 @@ def encode(self, poslist):
return pack_uint(len(deltas)) + dumps(deltas, 2)
def decode_positions(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE:])
position = 0
positions = []
@@ -292,7 +290,7 @@ def word_values(self, value, analyzer, **kwargs):
seen[t.text].append((t.pos, t.startchar, t.endchar))
weights[t.text] += t.boost
- for w, poslist in iteritems(seen):
+ for w, poslist in seen.items():
value = self.encode(poslist)
yield (w, len(poslist), weights[w] * fb, value)
@@ -307,8 +305,8 @@ def encode(self, poslist):
return pack_uint(len(deltas)) + dumps(deltas, 2)
def decode_characters(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE:])
position = 0
endchar = 0
@@ -321,8 +319,8 @@ def decode_characters(self, valuestring):
return posns_chars
def decode_positions(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE:])
position = 0
posns = []
@@ -362,7 +360,7 @@ def word_values(self, value, analyzer, **kwargs):
boost = t.boost
seen[t.text].append((pos, boost))
- for w, poses in iteritems(seen):
+ for w, poses in seen.items():
value = self.encode(poses)
yield (w, len(poses), sum(p[1] for p in poses) * fb, value)
@@ -377,8 +375,8 @@ def encode(self, poses):
return pack_uint(len(poses)) + pack_float(summedboost) + dumps(codes, 2)
def decode_position_boosts(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE :])
position = 0
posns_boosts = []
@@ -388,8 +386,8 @@ def decode_position_boosts(self, valuestring):
return posns_boosts
def decode_positions(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE :])
position = 0
posns = []
@@ -427,7 +425,7 @@ def word_values(self, value, analyzer, **kwargs):
for t in tokens(value, analyzer, kwargs):
seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost))
- for w, poses in iteritems(seen):
+ for w, poses in seen.items():
value, summedboost = self.encode(poses)
yield (w, len(poses), summedboost, value)
@@ -452,8 +450,8 @@ def encode(self, poses):
)
def decode_character_boosts(self, valuestring):
- if not valuestring.endswith(b(".")):
- valuestring += b(".")
+ if not valuestring.endswith(b"."):
+ valuestring += b"."
codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE :])
position = 0
endchar = 0
diff --git a/src/whoosh/highlight.py b/src/whoosh/highlight.py
index 562c68c1..27b1a05a 100644
--- a/src/whoosh/highlight.py
+++ b/src/whoosh/highlight.py
@@ -51,10 +51,10 @@
from collections import deque
from heapq import nlargest
+from html import escape as htmlescape
from itertools import groupby
from whoosh.analysis import Token
-from whoosh.compat import htmlescape
# The default value for the maximum chars to examine when fragmenting
DEFAULT_CHARLIMIT = 2**15
diff --git a/src/whoosh/idsets.py b/src/whoosh/idsets.py
index c2ce3885..9e0eec56 100644
--- a/src/whoosh/idsets.py
+++ b/src/whoosh/idsets.py
@@ -5,8 +5,8 @@
import operator
from array import array
from bisect import bisect_left, bisect_right
+from itertools import zip_longest
-from whoosh.compat import izip, izip_longest, next
from whoosh.util.numeric import bytes_for_bits
# Number of '1' bits in each byte (0-255)
@@ -284,7 +284,7 @@ class DocIdSet:
"""
def __eq__(self, other):
- for a, b in izip(self, other):
+ for a, b in zip(self, other):
if a != b:
return False
return True
@@ -596,7 +596,7 @@ def _zero_extra_bits(self, size):
def _logic(self, obj, op, other):
objbits = obj.bits
for i, (byte1, byte2) in enumerate(
- izip_longest(objbits, other.bits, fillvalue=0)
+ zip_longest(objbits, other.bits, fillvalue=0)
):
value = op(byte1, byte2) & 0xFF
if i >= len(objbits):
@@ -942,7 +942,7 @@ def __len__(self):
return sum(len(idset) for idset in self.idsets)
def __iter__(self):
- for idset, offset in izip(self.idsets, self.offsets):
+ for idset, offset in zip(self.idsets, self.offsets):
for docnum in idset:
yield docnum + offset
diff --git a/src/whoosh/index.py b/src/whoosh/index.py
index eac6c603..56a62556 100644
--- a/src/whoosh/index.py
+++ b/src/whoosh/index.py
@@ -31,12 +31,12 @@
import os.path
+import pickle
import re
import sys
from time import sleep, time
from whoosh import __version__
-from whoosh.compat import pickle, string_type
from whoosh.fields import ensure_schema
from whoosh.legacy import toc_loaders
from whoosh.system import _FLOAT_SIZE, _INT_SIZE, _LONG_SIZE
@@ -408,7 +408,7 @@ def __init__(self, storage, schema=None, indexname=_DEF_INDEX_NAME):
if not isinstance(storage, Storage):
raise ValueError(f"{storage!r} is not a Storage object")
- if not isinstance(indexname, string_type):
+ if not isinstance(indexname, str):
raise ValueError(f"indexname {indexname!r} is not a string")
if schema:
diff --git a/src/whoosh/lang/dmetaphone.py b/src/whoosh/lang/dmetaphone.py
index ac5a78a4..c7ecff2c 100644
--- a/src/whoosh/lang/dmetaphone.py
+++ b/src/whoosh/lang/dmetaphone.py
@@ -6,8 +6,6 @@
import re
-from whoosh.compat import u
-
vowels = frozenset("AEIOUY")
slavo_germ_exp = re.compile("W|K|CZ|WITZ")
silent_starts = re.compile("GN|KN|PN|WR|PS")
@@ -139,7 +137,7 @@ def double_metaphone(text): # noqa: C901, PLR0912, PLR0915
next = ("K", 2)
else: # default for 'C'
next = ("K", 1)
- elif ch == u("\xc7"):
+ elif ch == "\xc7":
next = ("S", 1)
elif ch == "D":
if text[pos : pos + 2] == "DG":
@@ -320,7 +318,7 @@ def double_metaphone(text): # noqa: C901, PLR0912, PLR0915
next = ("N", 2)
else:
next = ("N", 1)
- elif ch == u("\xd1"):
+ elif ch == "\xd1":
next = ("N", 1)
elif ch == "P":
if text[pos + 1] == "H":
diff --git a/src/whoosh/lang/morph_en.py b/src/whoosh/lang/morph_en.py
index 14dd4c97..43b6459b 100644
--- a/src/whoosh/lang/morph_en.py
+++ b/src/whoosh/lang/morph_en.py
@@ -8,8 +8,6 @@ class of Sun's `Minion search engine `_.
import re
-from whoosh.compat import iteritems
-
# Rule exceptions
exceptions = [
@@ -1125,7 +1123,7 @@ def variations(word):
num = int(
[
k
- for k, v in iteritems(match.groupdict())
+ for k, v in match.groupdict().items()
if v is not None and k.startswith("_g")
][0][2:]
)
diff --git a/src/whoosh/lang/phonetic.py b/src/whoosh/lang/phonetic.py
index fcec3c7c..a4adb143 100644
--- a/src/whoosh/lang/phonetic.py
+++ b/src/whoosh/lang/phonetic.py
@@ -4,8 +4,6 @@
import re
-from whoosh.compat import iteritems
-
# This soundex implementation is adapted from the recipe here:
# http://code.activestate.com/recipes/52213/
@@ -86,17 +84,15 @@ def soundex_esp(word):
# Create a dictionary mapping arabic characters to digits
_arabic_codes = {}
-for chars, code in iteritems(
- {
- "\u0627\u0623\u0625\u0622\u062d\u062e\u0647\u0639\u063a\u0634\u0648\u064a": "0",
- "\u0641\u0628": "1",
- "\u062c\u0632\u0633\u0635\u0638\u0642\u0643": "2",
- "\u062a\u062b\u062f\u0630\u0636\u0637": "3",
- "\u0644": "4",
- "\u0645\u0646": "5",
- "\u0631": "6",
- }
-):
+for chars, code in {
+ "\u0627\u0623\u0625\u0622\u062d\u062e\u0647\u0639\u063a\u0634\u0648\u064a": "0",
+ "\u0641\u0628": "1",
+ "\u062c\u0632\u0633\u0635\u0638\u0642\u0643": "2",
+ "\u062a\u062b\u062f\u0630\u0636\u0637": "3",
+ "\u0644": "4",
+ "\u0645\u0646": "5",
+ "\u0631": "6",
+}.items():
for char in chars:
_arabic_codes[char] = code
diff --git a/src/whoosh/lang/snowball/danish.py b/src/whoosh/lang/snowball/danish.py
index 9a4351af..504c471b 100644
--- a/src/whoosh/lang/snowball/danish.py
+++ b/src/whoosh/lang/snowball/danish.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _ScandinavianStemmer
@@ -28,7 +26,7 @@ class DanishStemmer(_ScandinavianStemmer):
"""
# The language's vowels and other important characters are defined.
- __vowels = u("aeiouy\xE6\xE5\xF8")
+ __vowels = "aeiouy\xE6\xE5\xF8"
__consonants = "bcdfghjklmnpqrstvwxz"
__double_consonants = (
"bb",
@@ -52,7 +50,7 @@ class DanishStemmer(_ScandinavianStemmer):
"xx",
"zz",
)
- __s_ending = u("abcdfghjklmnoprtvyz\xE5")
+ __s_ending = "abcdfghjklmnoprtvyz\xE5"
# The different suffixes, divided into the algorithm's steps
# and organized by length, are listed in tuples.
@@ -91,7 +89,7 @@ class DanishStemmer(_ScandinavianStemmer):
"s",
)
__step2_suffixes = ("gd", "dt", "gt", "kt")
- __step3_suffixes = ("elig", u("l\xF8st"), "lig", "els", "ig")
+ __step3_suffixes = ("elig", "l\xF8st", "lig", "els", "ig")
def stem(self, word):
"""
@@ -140,7 +138,7 @@ def stem(self, word):
for suffix in self.__step3_suffixes:
if r1.endswith(suffix):
- if suffix == u("l\xF8st"):
+ if suffix == "l\xF8st":
word = word[:-1]
r1 = r1[:-1]
else:
diff --git a/src/whoosh/lang/snowball/dutch.py b/src/whoosh/lang/snowball/dutch.py
index 8f73195a..0af0769c 100644
--- a/src/whoosh/lang/snowball/dutch.py
+++ b/src/whoosh/lang/snowball/dutch.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -18,7 +16,7 @@ class DutchStemmer(_StandardStemmer):
http://snowball.tartarus.org/algorithms/dutch/stemmer.html
"""
- __vowels = u("aeiouy\xE8")
+ __vowels = "aeiouy\xE8"
__step1_suffixes = ("heden", "ene", "en", "se", "s")
__step3b_suffixes = ("baar", "lijk", "bar", "end", "ing", "ig")
@@ -38,16 +36,16 @@ def stem(self, word):
# Vowel accents are removed.
word = (
- word.replace(u("\xE4"), "a")
- .replace(u("\xE1"), "a")
- .replace(u("\xEB"), "e")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
- .replace(u("\xEF"), "i")
- .replace(u("\xF6"), "o")
- .replace(u("\xF3"), "o")
- .replace(u("\xFC"), "u")
- .replace(u("\xFA"), "u")
+ word.replace("\xE4", "a")
+ .replace("\xE1", "a")
+ .replace("\xEB", "e")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
+ .replace("\xEF", "i")
+ .replace("\xF6", "o")
+ .replace("\xF3", "o")
+ .replace("\xFC", "u")
+ .replace("\xFA", "u")
)
# An initial 'y', a 'y' after a vowel,
diff --git a/src/whoosh/lang/snowball/english.py b/src/whoosh/lang/snowball/english.py
index aae50791..135817ea 100644
--- a/src/whoosh/lang/snowball/english.py
+++ b/src/whoosh/lang/snowball/english.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -163,12 +161,12 @@ def stem(self, word): # noqa: C901, PLR0912
# Map the different apostrophe characters to a single consistent one
word = (
- word.replace(u("\u2019"), u("\x27"))
- .replace(u("\u2018"), u("\x27"))
- .replace(u("\u201B"), u("\x27"))
+ word.replace("\u2019", "\x27")
+ .replace("\u2018", "\x27")
+ .replace("\u201B", "\x27")
)
- if word.startswith(u("\x27")):
+ if word.startswith("\x27"):
word = word[1:]
if word.startswith("y"):
diff --git a/src/whoosh/lang/snowball/finnish.py b/src/whoosh/lang/snowball/finnish.py
index 6119db65..96b467fd 100644
--- a/src/whoosh/lang/snowball/finnish.py
+++ b/src/whoosh/lang/snowball/finnish.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -30,9 +28,9 @@ class FinnishStemmer(_StandardStemmer):
http://snowball.tartarus.org/algorithms/finnish/stemmer.html
"""
- __vowels = u("aeiouy\xE4\xF6")
- __restricted_vowels = u("aeiou\xE4\xF6")
- __long_vowels = ("aa", "ee", "ii", "oo", "uu", u("\xE4\xE4"), u("\xF6\xF6"))
+ __vowels = "aeiouy\xE4\xF6"
+ __restricted_vowels = "aeiou\xE4\xF6"
+ __long_vowels = ("aa", "ee", "ii", "oo", "uu", "\xE4\xE4", "\xF6\xF6")
__consonants = "bcdfghjklmnpqrstvwxz"
__double_consonants = (
"bb",
@@ -58,25 +56,25 @@ class FinnishStemmer(_StandardStemmer):
)
__step1_suffixes = (
"kaan",
- u("k\xE4\xE4n"),
+ "k\xE4\xE4n",
"sti",
"kin",
"han",
- u("h\xE4n"),
+ "h\xE4n",
"ko",
- u("k\xF6"),
+ "k\xF6",
"pa",
- u("p\xE4"),
+ "p\xE4",
)
__step2_suffixes = (
"nsa",
- u("ns\xE4"),
+ "ns\xE4",
"mme",
"nne",
"si",
"ni",
"an",
- u("\xE4n"),
+ "\xE4n",
"en",
)
__step3_suffixes = (
@@ -87,45 +85,45 @@ class FinnishStemmer(_StandardStemmer):
"hen",
"hin",
"hon",
- u("h\xE4n"),
- u("h\xF6n"),
+ "h\xE4n",
+ "h\xF6n",
"den",
"tta",
- u("tt\xE4"),
+ "tt\xE4",
"ssa",
- u("ss\xE4"),
+ "ss\xE4",
"sta",
- u("st\xE4"),
+ "st\xE4",
"lla",
- u("ll\xE4"),
+ "ll\xE4",
"lta",
- u("lt\xE4"),
+ "lt\xE4",
"lle",
"ksi",
"ine",
"ta",
- u("t\xE4"),
+ "t\xE4",
"na",
- u("n\xE4"),
+ "n\xE4",
"a",
- u("\xE4"),
+ "\xE4",
"n",
)
__step4_suffixes = (
"impi",
"impa",
- u("imp\xE4"),
+ "imp\xE4",
"immi",
"imma",
- u("imm\xE4"),
+ "imm\xE4",
"mpi",
"mpa",
- u("mp\xE4"),
+ "mp\xE4",
"mmi",
"mma",
- u("mm\xE4"),
+ "mm\xE4",
"eja",
- u("ej\xE4"),
+ "ej\xE4",
)
def stem(self, word): # noqa: C901
@@ -157,7 +155,7 @@ def stem(self, word): # noqa: C901
r1 = r1[:-3]
r2 = r2[:-3]
else:
- if word[-len(suffix) - 1] in u("ntaeiouy\xE4\xF6"):
+ if word[-len(suffix) - 1] in "ntaeiouy\xE4\xF6":
word = word[: -len(suffix)]
r1 = r1[: -len(suffix)]
r2 = r2[: -len(suffix)]
@@ -196,12 +194,12 @@ def stem(self, word): # noqa: C901
r1 = r1[:-2]
r2 = r2[:-2]
- elif suffix == u("\xE4n"):
- if word[-4:-2] in (u("t\xE4"), u("n\xE4")) or word[-5:-2] in (
- u("ss\xE4"),
- u("st\xE4"),
- u("ll\xE4"),
- u("lt\xE4"),
+ elif suffix == "\xE4n":
+ if word[-4:-2] in ("t\xE4", "n\xE4") or word[-5:-2] in (
+ "ss\xE4",
+ "st\xE4",
+ "ll\xE4",
+ "lt\xE4",
):
word = word[:-2]
r1 = r1[:-2]
@@ -221,14 +219,14 @@ def stem(self, word): # noqa: C901
# STEP 3: Cases
for suffix in self.__step3_suffixes:
if r1.endswith(suffix):
- if suffix in ("han", "hen", "hin", "hon", u("h\xE4n"), u("h\xF6n")):
+ if suffix in ("han", "hen", "hin", "hon", "h\xE4n", "h\xF6n"):
if (
(suffix == "han" and word[-4] == "a")
or (suffix == "hen" and word[-4] == "e")
or (suffix == "hin" and word[-4] == "i")
or (suffix == "hon" and word[-4] == "o")
- or (suffix == u("h\xE4n") and word[-4] == u("\xE4"))
- or (suffix == u("h\xF6n") and word[-4] == u("\xF6"))
+ or (suffix == "h\xE4n" and word[-4] == "\xE4")
+ or (suffix == "h\xF6n" and word[-4] == "\xF6")
):
word = word[:-3]
r1 = r1[:-3]
@@ -256,14 +254,14 @@ def stem(self, word): # noqa: C901
else:
continue
- elif suffix in ("a", u("\xE4")):
+ elif suffix in ("a", "\xE4"):
if word[-2] in self.__vowels and word[-3] in self.__consonants:
word = word[:-1]
r1 = r1[:-1]
r2 = r2[:-1]
step3_success = True
- elif suffix in ("tta", u("tt\xE4")):
+ elif suffix in ("tta", "tt\xE4"):
if word[-4] == "e":
word = word[:-3]
r1 = r1[:-3]
@@ -290,7 +288,7 @@ def stem(self, word): # noqa: C901
# STEP 4: Other endings
for suffix in self.__step4_suffixes:
if r2.endswith(suffix):
- if suffix in ("mpi", "mpa", u("mp\xE4"), "mmi", "mma", u("mm\xE4")):
+ if suffix in ("mpi", "mpa", "mp\xE4", "mmi", "mma", "mm\xE4"):
if word[-5:-3] != "po":
word = word[:-3]
r1 = r1[:-3]
@@ -327,7 +325,7 @@ def stem(self, word): # noqa: C901
word = word[:-1]
r1 = r1[:-1]
- if len(r1) >= 2 and r1[-2] in self.__consonants and r1[-1] in u("a\xE4ei"):
+ if len(r1) >= 2 and r1[-2] in self.__consonants and r1[-1] in "a\xE4ei":
word = word[:-1]
r1 = r1[:-1]
diff --git a/src/whoosh/lang/snowball/french.py b/src/whoosh/lang/snowball/french.py
index c7ddd402..adacf635 100644
--- a/src/whoosh/lang/snowball/french.py
+++ b/src/whoosh/lang/snowball/french.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -23,7 +21,7 @@ class FrenchStemmer(_StandardStemmer):
http://snowball.tartarus.org/algorithms/french/stemmer.html
"""
- __vowels = u("aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9")
+ __vowels = "aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9"
__step1_suffixes = (
"issements",
"issement",
@@ -57,13 +55,13 @@ class FrenchStemmer(_StandardStemmer):
"able",
"iste",
"ence",
- u("it\xE9s"),
+ "it\xE9s",
"ives",
"eaux",
"euse",
"ment",
"eux",
- u("it\xE9"),
+ "it\xE9",
"ive",
"ifs",
"aux",
@@ -91,15 +89,15 @@ class FrenchStemmer(_StandardStemmer):
"iront",
"isses",
"issez",
- u("\xEEmes"),
- u("\xEEtes"),
+ "\xEEmes",
+ "\xEEtes",
"irai",
"iras",
"irez",
"isse",
"ies",
"ira",
- u("\xEEt"),
+ "\xEEt",
"ie",
"ir",
"is",
@@ -112,7 +110,7 @@ class FrenchStemmer(_StandardStemmer):
"erions",
"assent",
"assiez",
- u("\xE8rent"),
+ "\xE8rent",
"erais",
"erait",
"eriez",
@@ -125,28 +123,28 @@ class FrenchStemmer(_StandardStemmer):
"erai",
"eras",
"erez",
- u("\xE2mes"),
- u("\xE2tes"),
+ "\xE2mes",
+ "\xE2tes",
"ante",
"ants",
"asse",
- u("\xE9es"),
+ "\xE9es",
"era",
"iez",
"ais",
"ait",
"ant",
- u("\xE9e"),
- u("\xE9s"),
+ "\xE9e",
+ "\xE9s",
"er",
"ez",
- u("\xE2t"),
+ "\xE2t",
"ai",
"as",
- u("\xE9"),
+ "\xE9",
"a",
)
- __step4_suffixes = (u("i\xE8re"), u("I\xE8re"), "ion", "ier", "Ier", "e", u("\xEB"))
+ __step4_suffixes = ("i\xE8re", "I\xE8re", "ion", "ier", "Ier", "e", "\xEB")
def stem(self, word): # noqa: C901
"""
@@ -229,8 +227,8 @@ def stem(self, word): # noqa: C901
if "abl" in r2 or "iqU" in r2:
word = word[:-3]
- elif word[-3:] in (u("i\xE8r"), u("I\xE8r")):
- if u("i\xE8r") in rv or u("I\xE8r") in rv:
+ elif word[-3:] in ("i\xE8r", "I\xE8r"):
+ if "i\xE8r" in rv or "I\xE8r" in rv:
word = "".join((word[:-3], "i"))
elif suffix == "amment" and suffix in rv:
@@ -310,7 +308,7 @@ def stem(self, word): # noqa: C901
word = "".join((word[: -len(suffix)], "ent"))
step1_success = True
- elif suffix in (u("it\xE9"), u("it\xE9s")) and suffix in r2:
+ elif suffix in ("it\xE9", "it\xE9s") and suffix in r2:
word = word[: -len(suffix)]
step1_success = True
@@ -368,7 +366,7 @@ def stem(self, word): # noqa: C901
elif suffix in (
"eraIent",
"erions",
- u("\xE8rent"),
+ "\xE8rent",
"erais",
"erait",
"eriez",
@@ -377,14 +375,14 @@ def stem(self, word): # noqa: C901
"erai",
"eras",
"erez",
- u("\xE9es"),
+ "\xE9es",
"era",
"iez",
- u("\xE9e"),
- u("\xE9s"),
+ "\xE9e",
+ "\xE9s",
"er",
"ez",
- u("\xE9"),
+ "\xE9",
):
word = word[: -len(suffix)]
step2b_success = True
@@ -396,15 +394,15 @@ def stem(self, word): # noqa: C901
"aIent",
"antes",
"asses",
- u("\xE2mes"),
- u("\xE2tes"),
+ "\xE2mes",
+ "\xE2tes",
"ante",
"ants",
"asse",
"ais",
"ait",
"ant",
- u("\xE2t"),
+ "\xE2t",
"ai",
"as",
"a",
@@ -420,12 +418,12 @@ def stem(self, word): # noqa: C901
if step1_success or step2a_success or step2b_success:
if word[-1] == "Y":
word = "".join((word[:-1], "i"))
- elif word[-1] == u("\xE7"):
+ elif word[-1] == "\xE7":
word = "".join((word[:-1], "c"))
# STEP 4: Residual suffixes
else:
- if len(word) >= 2 and word[-1] == "s" and word[-2] not in u("aiou\xE8s"):
+ if len(word) >= 2 and word[-1] == "s" and word[-2] not in "aiou\xE8s":
word = word[:-1]
for suffix in self.__step4_suffixes:
@@ -434,13 +432,13 @@ def stem(self, word): # noqa: C901
if suffix == "ion" and suffix in r2 and rv[-4] in "st":
word = word[:-3]
- elif suffix in ("ier", u("i\xE8re"), "Ier", u("I\xE8re")):
+ elif suffix in ("ier", "i\xE8re", "Ier", "I\xE8re"):
word = "".join((word[: -len(suffix)], "i"))
elif suffix == "e":
word = word[:-1]
- elif suffix == u("\xEB") and word[-3:-1] == "gu":
+ elif suffix == "\xEB" and word[-3:-1] == "gu":
word = word[:-1]
break
@@ -453,7 +451,7 @@ def stem(self, word): # noqa: C901
if word[-i] not in self.__vowels:
i += 1
else:
- if i != 1 and word[-i] in (u("\xE9"), u("\xE8")):
+ if i != 1 and word[-i] in ("\xE9", "\xE8"):
word = "".join((word[:-i], "e", word[-i + 1 :]))
break
diff --git a/src/whoosh/lang/snowball/german.py b/src/whoosh/lang/snowball/german.py
index 263b4972..7caedd66 100644
--- a/src/whoosh/lang/snowball/german.py
+++ b/src/whoosh/lang/snowball/german.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -26,7 +24,7 @@ class GermanStemmer(_StandardStemmer):
"""
- __vowels = u("aeiouy\xE4\xF6\xFC")
+ __vowels = "aeiouy\xE4\xF6\xFC"
__s_ending = "bdfghklmnrt"
__st_ending = "bdfghklmnt"
@@ -46,7 +44,7 @@ def stem(self, word):
"""
word = word.lower()
- word = word.replace(u("\xDF"), "ss")
+ word = word.replace("\xDF", "ss")
# Every occurrence of 'u' and 'y'
# between vowels is put into upper case.
@@ -146,9 +144,9 @@ def stem(self, word):
# Umlaut accents are removed and
# 'u' and 'y' are put back into lower case.
word = (
- word.replace(u("\xE4"), "a")
- .replace(u("\xF6"), "o")
- .replace(u("\xFC"), "u")
+ word.replace("\xE4", "a")
+ .replace("\xF6", "o")
+ .replace("\xFC", "u")
.replace("U", "u")
.replace("Y", "y")
)
diff --git a/src/whoosh/lang/snowball/hungarian.py b/src/whoosh/lang/snowball/hungarian.py
index b3050721..2c34aabe 100644
--- a/src/whoosh/lang/snowball/hungarian.py
+++ b/src/whoosh/lang/snowball/hungarian.py
@@ -1,6 +1,3 @@
-from whoosh.compat import u
-
-
class HungarianStemmer:
"""
@@ -36,7 +33,7 @@ class HungarianStemmer:
"""
- __vowels = u("aeiou\xF6\xFC\xE1\xE9\xED\xF3\xF5\xFA\xFB")
+ __vowels = "aeiou\xF6\xFC\xE1\xE9\xED\xF3\xF5\xFA\xFB"
__digraphs = ("cs", "dz", "dzs", "gy", "ly", "ny", "ty", "zs")
__double_consonants = (
"bb",
@@ -66,30 +63,30 @@ class HungarianStemmer:
__step1_suffixes = ("al", "el")
__step2_suffixes = (
- u("k\xE9ppen"),
- u("onk\xE9nt"),
- u("enk\xE9nt"),
- u("ank\xE9nt"),
- u("k\xE9pp"),
- u("k\xE9nt"),
+ "k\xE9ppen",
+ "onk\xE9nt",
+ "enk\xE9nt",
+ "ank\xE9nt",
+ "k\xE9pp",
+ "k\xE9nt",
"ban",
"ben",
"nak",
"nek",
"val",
"vel",
- u("t\xF3l"),
- u("t\xF5l"),
- u("r\xF3l"),
- u("r\xF5l"),
- u("b\xF3l"),
- u("b\xF5l"),
+ "t\xF3l",
+ "t\xF5l",
+ "r\xF3l",
+ "r\xF5l",
+ "b\xF3l",
+ "b\xF5l",
"hoz",
"hez",
- u("h\xF6z"),
- u("n\xE1l"),
- u("n\xE9l"),
- u("\xE9rt"),
+ "h\xF6z",
+ "n\xE1l",
+ "n\xE9l",
+ "\xE9rt",
"kor",
"ba",
"be",
@@ -99,74 +96,74 @@ class HungarianStemmer:
"at",
"et",
"ot",
- u("\xF6t"),
+ "\xF6t",
"ul",
- u("\xFCl"),
- u("v\xE1"),
- u("v\xE9"),
+ "\xFCl",
+ "v\xE1",
+ "v\xE9",
"en",
"on",
"an",
- u("\xF6n"),
+ "\xF6n",
"n",
"t",
)
- __step3_suffixes = (u("\xE1nk\xE9nt"), u("\xE1n"), u("\xE9n"))
+ __step3_suffixes = ("\xE1nk\xE9nt", "\xE1n", "\xE9n")
__step4_suffixes = (
"astul",
- u("est\xFCl"),
- u("\xE1stul"),
- u("\xE9st\xFCl"),
+ "est\xFCl",
+ "\xE1stul",
+ "\xE9st\xFCl",
"stul",
- u("st\xFCl"),
+ "st\xFCl",
)
- __step5_suffixes = (u("\xE1"), u("\xE9"))
+ __step5_suffixes = ("\xE1", "\xE9")
__step6_suffixes = (
- u("ok\xE9"),
- u("\xF6k\xE9"),
- u("ak\xE9"),
- u("ek\xE9"),
- u("\xE1k\xE9"),
- u("\xE1\xE9i"),
- u("\xE9k\xE9"),
- u("\xE9\xE9i"),
- u("k\xE9"),
- u("\xE9i"),
- u("\xE9\xE9"),
- u("\xE9"),
+ "ok\xE9",
+ "\xF6k\xE9",
+ "ak\xE9",
+ "ek\xE9",
+ "\xE1k\xE9",
+ "\xE1\xE9i",
+ "\xE9k\xE9",
+ "\xE9\xE9i",
+ "k\xE9",
+ "\xE9i",
+ "\xE9\xE9",
+ "\xE9",
)
__step7_suffixes = (
- u("\xE1juk"),
- u("\xE9j\xFCk"),
- u("\xFCnk"),
+ "\xE1juk",
+ "\xE9j\xFCk",
+ "\xFCnk",
"unk",
"juk",
- u("j\xFCk"),
- u("\xE1nk"),
- u("\xE9nk"),
+ "j\xFCk",
+ "\xE1nk",
+ "\xE9nk",
"nk",
"uk",
- u("\xFCk"),
+ "\xFCk",
"em",
"om",
"am",
"od",
"ed",
"ad",
- u("\xF6d"),
+ "\xF6d",
"ja",
"je",
- u("\xE1m"),
- u("\xE1d"),
- u("\xE9m"),
- u("\xE9d"),
+ "\xE1m",
+ "\xE1d",
+ "\xE9m",
+ "\xE9d",
"m",
"d",
"a",
"e",
"o",
- u("\xE1"),
- u("\xE9"),
+ "\xE1",
+ "\xE9",
)
__step8_suffixes = (
"jaitok",
@@ -175,8 +172,8 @@ class HungarianStemmer:
"jeink",
"aitok",
"eitek",
- u("\xE1itok"),
- u("\xE9itek"),
+ "\xE1itok",
+ "\xE9itek",
"jaim",
"jeim",
"jaid",
@@ -186,8 +183,8 @@ class HungarianStemmer:
"itek",
"jeik",
"jaik",
- u("\xE1ink"),
- u("\xE9ink"),
+ "\xE1ink",
+ "\xE9ink",
"aim",
"eim",
"aid",
@@ -197,22 +194,22 @@ class HungarianStemmer:
"ink",
"aik",
"eik",
- u("\xE1im"),
- u("\xE1id"),
- u("\xE1ik"),
- u("\xE9im"),
- u("\xE9id"),
- u("\xE9ik"),
+ "\xE1im",
+ "\xE1id",
+ "\xE1ik",
+ "\xE9im",
+ "\xE9id",
+ "\xE9ik",
"im",
"id",
"ai",
"ei",
"ik",
- u("\xE1i"),
- u("\xE9i"),
+ "\xE1i",
+ "\xE9i",
"i",
)
- __step9_suffixes = (u("\xE1k"), u("\xE9k"), u("\xF6k"), "ok", "ek", "ak", "k")
+ __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok", "ek", "ak", "k")
def stem(self, word):
"""
@@ -245,11 +242,11 @@ def stem(self, word):
word = word[: -len(suffix)]
r1 = r1[: -len(suffix)]
- if r1.endswith(u("\xE1")):
+ if r1.endswith("\xE1"):
word = "".join((word[:-1], "a"))
r1 = "".join((r1[:-1], "a"))
- elif r1.endswith(u("\xE9")):
+ elif r1.endswith("\xE9"):
word = "".join((word[:-1], "e"))
r1 = "".join((r1[:-1], "e"))
break
@@ -257,7 +254,7 @@ def stem(self, word):
# STEP 3: Remove special cases
for suffix in self.__step3_suffixes:
if r1.endswith(suffix):
- if suffix == u("\xE9n"):
+ if suffix == "\xE9n":
word = "".join((word[:-2], "e"))
r1 = "".join((r1[:-2], "e"))
else:
@@ -268,11 +265,11 @@ def stem(self, word):
# STEP 4: Remove other cases
for suffix in self.__step4_suffixes:
if r1.endswith(suffix):
- if suffix == u("\xE1stul"):
+ if suffix == "\xE1stul":
word = "".join((word[:-5], "a"))
r1 = "".join((r1[:-5], "a"))
- elif suffix == u("\xE9st\xFCl"):
+ elif suffix == "\xE9st\xFCl":
word = "".join((word[:-5], "e"))
r1 = "".join((r1[:-5], "e"))
else:
@@ -294,11 +291,11 @@ def stem(self, word):
# STEP 6: Remove owned
for suffix in self.__step6_suffixes:
if r1.endswith(suffix):
- if suffix in (u("\xE1k\xE9"), u("\xE1\xE9i")):
+ if suffix in ("\xE1k\xE9", "\xE1\xE9i"):
word = "".join((word[:-3], "a"))
r1 = "".join((r1[:-3], "a"))
- elif suffix in (u("\xE9k\xE9"), u("\xE9\xE9i"), u("\xE9\xE9")):
+ elif suffix in ("\xE9k\xE9", "\xE9\xE9i", "\xE9\xE9"):
word = "".join((word[: -len(suffix)], "e"))
r1 = "".join((r1[: -len(suffix)], "e"))
else:
@@ -311,21 +308,21 @@ def stem(self, word):
if word.endswith(suffix):
if r1.endswith(suffix):
if suffix in (
- u("\xE1nk"),
- u("\xE1juk"),
- u("\xE1m"),
- u("\xE1d"),
- u("\xE1"),
+ "\xE1nk",
+ "\xE1juk",
+ "\xE1m",
+ "\xE1d",
+ "\xE1",
):
word = "".join((word[: -len(suffix)], "a"))
r1 = "".join((r1[: -len(suffix)], "a"))
elif suffix in (
- u("\xE9nk"),
- u("\xE9j\xFCk"),
- u("\xE9m"),
- u("\xE9d"),
- u("\xE9"),
+ "\xE9nk",
+ "\xE9j\xFCk",
+ "\xE9m",
+ "\xE9d",
+ "\xE9",
):
word = "".join((word[: -len(suffix)], "e"))
r1 = "".join((r1[: -len(suffix)], "e"))
@@ -339,23 +336,23 @@ def stem(self, word):
if word.endswith(suffix):
if r1.endswith(suffix):
if suffix in (
- u("\xE1im"),
- u("\xE1id"),
- u("\xE1i"),
- u("\xE1ink"),
- u("\xE1itok"),
- u("\xE1ik"),
+ "\xE1im",
+ "\xE1id",
+ "\xE1i",
+ "\xE1ink",
+ "\xE1itok",
+ "\xE1ik",
):
word = "".join((word[: -len(suffix)], "a"))
r1 = "".join((r1[: -len(suffix)], "a"))
elif suffix in (
- u("\xE9im"),
- u("\xE9id"),
- u("\xE9i"),
- u("\xE9ink"),
- u("\xE9itek"),
- u("\xE9ik"),
+ "\xE9im",
+ "\xE9id",
+ "\xE9i",
+ "\xE9ink",
+ "\xE9itek",
+ "\xE9ik",
):
word = "".join((word[: -len(suffix)], "e"))
r1 = "".join((r1[: -len(suffix)], "e"))
@@ -368,9 +365,9 @@ def stem(self, word):
for suffix in self.__step9_suffixes:
if word.endswith(suffix):
if r1.endswith(suffix):
- if suffix == u("\xE1k"):
+ if suffix == "\xE1k":
word = "".join((word[:-2], "a"))
- elif suffix == u("\xE9k"):
+ elif suffix == "\xE9k":
word = "".join((word[:-2], "e"))
else:
word = word[: -len(suffix)]
diff --git a/src/whoosh/lang/snowball/italian.py b/src/whoosh/lang/snowball/italian.py
index 2165a8d5..0fec6abf 100644
--- a/src/whoosh/lang/snowball/italian.py
+++ b/src/whoosh/lang/snowball/italian.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -22,7 +20,7 @@ class ItalianStemmer(_StandardStemmer):
"""
- __vowels = u("aeiou\xE0\xE8\xEC\xF2\xF9")
+ __vowels = "aeiou\xE0\xE8\xEC\xF2\xF9"
__step0_suffixes = (
"gliela",
"gliele",
@@ -94,9 +92,9 @@ class ItalianStemmer(_StandardStemmer):
"ista",
"iste",
"isti",
- u("ist\xE0"),
- u("ist\xE8"),
- u("ist\xEC"),
+ "ist\xE0",
+ "ist\xE8",
+ "ist\xEC",
"ante",
"anti",
"enza",
@@ -109,7 +107,7 @@ class ItalianStemmer(_StandardStemmer):
"osi",
"osa",
"ose",
- u("it\xE0"),
+ "it\xE0",
"ivo",
"ivi",
"iva",
@@ -179,16 +177,16 @@ class ItalianStemmer(_StandardStemmer):
"ava",
"avi",
"avo",
- u("er\xE0"),
+ "er\xE0",
"ere",
- u("er\xF2"),
+ "er\xF2",
"ete",
"eva",
"evi",
"evo",
- u("ir\xE0"),
+ "ir\xE0",
"ire",
- u("ir\xF2"),
+ "ir\xF2",
"ita",
"ite",
"iti",
@@ -221,11 +219,11 @@ def stem(self, word):
# All acute accents are replaced by grave accents.
word = (
- word.replace(u("\xE1"), u("\xE0"))
- .replace(u("\xE9"), u("\xE8"))
- .replace(u("\xED"), u("\xEC"))
- .replace(u("\xF3"), u("\xF2"))
- .replace(u("\xFA"), u("\xF9"))
+ word.replace("\xE1", "\xE0")
+ .replace("\xE9", "\xE8")
+ .replace("\xED", "\xEC")
+ .replace("\xF3", "\xF2")
+ .replace("\xFA", "\xF9")
)
# Every occurrence of 'u' after 'q'
@@ -318,7 +316,7 @@ def stem(self, word):
word = "".join((word[:-2], "te"))
rv = "".join((rv[:-2], "te"))
- elif suffix == u("it\xE0"):
+ elif suffix == "it\xE0":
word = word[:-3]
r2 = r2[:-3]
rv = rv[:-3]
@@ -358,9 +356,7 @@ def stem(self, word):
break
# STEP 3a
- if rv.endswith(
- ("a", "e", "i", "o", u("\xE0"), u("\xE8"), u("\xEC"), u("\xF2"))
- ):
+ if rv.endswith(("a", "e", "i", "o", "\xE0", "\xE8", "\xEC", "\xF2")):
word = word[:-1]
rv = rv[:-1]
diff --git a/src/whoosh/lang/snowball/norwegian.py b/src/whoosh/lang/snowball/norwegian.py
index c011ca94..a68b40b1 100644
--- a/src/whoosh/lang/snowball/norwegian.py
+++ b/src/whoosh/lang/snowball/norwegian.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _ScandinavianStemmer
@@ -24,7 +22,7 @@ class NorwegianStemmer(_ScandinavianStemmer):
"""
- __vowels = u("aeiouy\xE6\xE5\xF8")
+ __vowels = "aeiouy\xE6\xE5\xF8"
__s_ending = "bcdfghjlmnoprtvyz"
__step1_suffixes = (
"hetenes",
diff --git a/src/whoosh/lang/snowball/portugese.py b/src/whoosh/lang/snowball/portugese.py
index bed4e943..87cde22a 100644
--- a/src/whoosh/lang/snowball/portugese.py
+++ b/src/whoosh/lang/snowball/portugese.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -22,7 +20,7 @@ class PortugueseStemmer(_StandardStemmer):
"""
- __vowels = u("aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4")
+ __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4"
__step1_suffixes = (
"amentos",
"imentos",
@@ -31,28 +29,28 @@ class PortugueseStemmer(_StandardStemmer):
"imento",
"adoras",
"adores",
- u("a\xE7o~es"),
- u("log\xEDas"),
- u("\xEAncias"),
+ "a\xE7o~es",
+ "log\xEDas",
+ "\xEAncias",
"amente",
"idades",
"ismos",
"istas",
"adora",
- u("a\xE7a~o"),
+ "a\xE7a~o",
"antes",
- u("\xE2ncia"),
- u("log\xEDa"),
- u("uci\xF3n"),
- u("\xEAncia"),
+ "\xE2ncia",
+ "log\xEDa",
+ "uci\xF3n",
+ "\xEAncia",
"mente",
"idade",
"ezas",
"icos",
"icas",
"ismo",
- u("\xE1vel"),
- u("\xEDvel"),
+ "\xE1vel",
+ "\xEDvel",
"ista",
"osos",
"osas",
@@ -71,22 +69,22 @@ class PortugueseStemmer(_StandardStemmer):
"ira",
)
__step2_suffixes = (
- u("ar\xEDamos"),
- u("er\xEDamos"),
- u("ir\xEDamos"),
- u("\xE1ssemos"),
- u("\xEAssemos"),
- u("\xEDssemos"),
- u("ar\xEDeis"),
- u("er\xEDeis"),
- u("ir\xEDeis"),
- u("\xE1sseis"),
- u("\xE9sseis"),
- u("\xEDsseis"),
- u("\xE1ramos"),
- u("\xE9ramos"),
- u("\xEDramos"),
- u("\xE1vamos"),
+ "ar\xEDamos",
+ "er\xEDamos",
+ "ir\xEDamos",
+ "\xE1ssemos",
+ "\xEAssemos",
+ "\xEDssemos",
+ "ar\xEDeis",
+ "er\xEDeis",
+ "ir\xEDeis",
+ "\xE1sseis",
+ "\xE9sseis",
+ "\xEDsseis",
+ "\xE1ramos",
+ "\xE9ramos",
+ "\xEDramos",
+ "\xE1vamos",
"aremos",
"eremos",
"iremos",
@@ -111,14 +109,14 @@ class PortugueseStemmer(_StandardStemmer):
"astes",
"estes",
"istes",
- u("\xE1reis"),
+ "\xE1reis",
"areis",
- u("\xE9reis"),
+ "\xE9reis",
"ereis",
- u("\xEDreis"),
+ "\xEDreis",
"ireis",
- u("\xE1veis"),
- u("\xEDamos"),
+ "\xE1veis",
+ "\xEDamos",
"armos",
"ermos",
"irmos",
@@ -146,30 +144,30 @@ class PortugueseStemmer(_StandardStemmer):
"indo",
"adas",
"idas",
- u("ar\xE1s"),
+ "ar\xE1s",
"aras",
- u("er\xE1s"),
+ "er\xE1s",
"eras",
- u("ir\xE1s"),
+ "ir\xE1s",
"avas",
"ares",
"eres",
"ires",
- u("\xEDeis"),
+ "\xEDeis",
"ados",
"idos",
- u("\xE1mos"),
+ "\xE1mos",
"amos",
"emos",
"imos",
"iras",
"ada",
"ida",
- u("ar\xE1"),
+ "ar\xE1",
"ara",
- u("er\xE1"),
+ "er\xE1",
"era",
- u("ir\xE1"),
+ "ir\xE1",
"ava",
"iam",
"ado",
@@ -192,7 +190,7 @@ class PortugueseStemmer(_StandardStemmer):
"iu",
"ou",
)
- __step4_suffixes = ("os", "a", "i", "o", u("\xE1"), u("\xED"), u("\xF3"))
+ __step4_suffixes = ("os", "a", "i", "o", "\xE1", "\xED", "\xF3")
def stem(self, word):
"""
@@ -209,7 +207,7 @@ def stem(self, word):
step1_success = False
step2_success = False
- word = word.replace(u("\xE3"), "a~").replace(u("\xF5"), "o~")
+ word = word.replace("\xE3", "a~").replace("\xF5", "o~")
r1, r2 = self._r1r2_standard(word, self.__vowels)
rv = self._rv_standard(word, self.__vowels)
@@ -250,15 +248,15 @@ def stem(self, word):
elif r2.endswith(suffix):
step1_success = True
- if suffix in (u("log\xEDa"), u("log\xEDas")):
+ if suffix in ("log\xEDa", "log\xEDas"):
word = word[:-2]
rv = rv[:-2]
- elif suffix in (u("uci\xF3n"), "uciones"):
+ elif suffix in ("uci\xF3n", "uciones"):
word = "".join((word[: -len(suffix)], "u"))
rv = "".join((rv[: -len(suffix)], "u"))
- elif suffix in (u("\xEAncia"), u("\xEAncias")):
+ elif suffix in ("\xEAncia", "\xEAncias"):
word = "".join((word[: -len(suffix)], "ente"))
rv = "".join((rv[: -len(suffix)], "ente"))
@@ -267,7 +265,7 @@ def stem(self, word):
r2 = r2[:-5]
rv = rv[:-5]
- if r2.endswith(("ante", "avel", u("\xEDvel"))):
+ if r2.endswith(("ante", "avel", "\xEDvel")):
word = word[:-4]
rv = rv[:-4]
@@ -322,7 +320,7 @@ def stem(self, word):
break
# STEP 5
- if rv.endswith(("e", u("\xE9"), u("\xEA"))):
+ if rv.endswith(("e", "\xE9", "\xEA")):
word = word[:-1]
rv = rv[:-1]
@@ -331,8 +329,8 @@ def stem(self, word):
):
word = word[:-1]
- elif word.endswith(u("\xE7")):
+ elif word.endswith("\xE7"):
word = "".join((word[:-1], "c"))
- word = word.replace("a~", u("\xE3")).replace("o~", u("\xF5"))
+ word = word.replace("a~", "\xE3").replace("o~", "\xF5")
return word
diff --git a/src/whoosh/lang/snowball/romanian.py b/src/whoosh/lang/snowball/romanian.py
index c33b0d90..a580d357 100644
--- a/src/whoosh/lang/snowball/romanian.py
+++ b/src/whoosh/lang/snowball/romanian.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -24,7 +22,7 @@ class RomanianStemmer(_StandardStemmer):
"""
- __vowels = u("aeiou\u0103\xE2\xEE")
+ __vowels = "aeiou\u0103\xE2\xEE"
__step0_suffixes = (
"iilor",
"ului",
@@ -32,8 +30,8 @@ class RomanianStemmer(_StandardStemmer):
"iile",
"ilor",
"atei",
- u("a\u0163ie"),
- u("a\u0163ia"),
+ "a\u0163ie",
+ "a\u0163ia",
"aua",
"ele",
"iua",
@@ -46,48 +44,48 @@ class RomanianStemmer(_StandardStemmer):
__step1_suffixes = (
"abilitate",
"abilitati",
- u("abilit\u0103\u0163i"),
+ "abilit\u0103\u0163i",
"ibilitate",
- u("abilit\u0103i"),
+ "abilit\u0103i",
"ivitate",
"ivitati",
- u("ivit\u0103\u0163i"),
+ "ivit\u0103\u0163i",
"icitate",
"icitati",
- u("icit\u0103\u0163i"),
+ "icit\u0103\u0163i",
"icatori",
- u("ivit\u0103i"),
- u("icit\u0103i"),
+ "ivit\u0103i",
+ "icit\u0103i",
"icator",
- u("a\u0163iune"),
+ "a\u0163iune",
"atoare",
- u("\u0103toare"),
- u("i\u0163iune"),
+ "\u0103toare",
+ "i\u0163iune",
"itoare",
"iciva",
"icive",
"icivi",
- u("iciv\u0103"),
+ "iciv\u0103",
"icala",
"icale",
"icali",
- u("ical\u0103"),
+ "ical\u0103",
"ativa",
"ative",
"ativi",
- u("ativ\u0103"),
+ "ativ\u0103",
"atori",
- u("\u0103tori"),
+ "\u0103tori",
"itiva",
"itive",
"itivi",
- u("itiv\u0103"),
+ "itiv\u0103",
"itori",
"iciv",
"ical",
"ativ",
"ator",
- u("\u0103tor"),
+ "\u0103tor",
"itiv",
"itor",
)
@@ -95,57 +93,57 @@ class RomanianStemmer(_StandardStemmer):
"abila",
"abile",
"abili",
- u("abil\u0103"),
+ "abil\u0103",
"ibila",
"ibile",
"ibili",
- u("ibil\u0103"),
+ "ibil\u0103",
"atori",
"itate",
"itati",
- u("it\u0103\u0163i"),
+ "it\u0103\u0163i",
"abil",
"ibil",
"oasa",
- u("oas\u0103"),
+ "oas\u0103",
"oase",
"anta",
"ante",
"anti",
- u("ant\u0103"),
+ "ant\u0103",
"ator",
- u("it\u0103i"),
+ "it\u0103i",
"iune",
"iuni",
"isme",
"ista",
"iste",
"isti",
- u("ist\u0103"),
- u("i\u015Fti"),
+ "ist\u0103",
+ "i\u015Fti",
"ata",
- u("at\u0103"),
+ "at\u0103",
"ati",
"ate",
"uta",
- u("ut\u0103"),
+ "ut\u0103",
"uti",
"ute",
"ita",
- u("it\u0103"),
+ "it\u0103",
"iti",
"ite",
"ica",
"ice",
"ici",
- u("ic\u0103"),
+ "ic\u0103",
"osi",
- u("o\u015Fi"),
+ "o\u015Fi",
"ant",
"iva",
"ive",
"ivi",
- u("iv\u0103"),
+ "iv\u0103",
"ism",
"ist",
"at",
@@ -156,53 +154,53 @@ class RomanianStemmer(_StandardStemmer):
"iv",
)
__step3_suffixes = (
- u("seser\u0103\u0163i"),
- u("aser\u0103\u0163i"),
- u("iser\u0103\u0163i"),
- u("\xE2ser\u0103\u0163i"),
- u("user\u0103\u0163i"),
- u("seser\u0103m"),
- u("aser\u0103m"),
- u("iser\u0103m"),
- u("\xE2ser\u0103m"),
- u("user\u0103m"),
- u("ser\u0103\u0163i"),
- u("sese\u015Fi"),
- u("seser\u0103"),
- u("easc\u0103"),
- u("ar\u0103\u0163i"),
- u("ur\u0103\u0163i"),
- u("ir\u0103\u0163i"),
- u("\xE2r\u0103\u0163i"),
- u("ase\u015Fi"),
- u("aser\u0103"),
- u("ise\u015Fi"),
- u("iser\u0103"),
- u("\xe2se\u015Fi"),
- u("\xE2ser\u0103"),
- u("use\u015Fi"),
- u("user\u0103"),
- u("ser\u0103m"),
+ "seser\u0103\u0163i",
+ "aser\u0103\u0163i",
+ "iser\u0103\u0163i",
+ "\xE2ser\u0103\u0163i",
+ "user\u0103\u0163i",
+ "seser\u0103m",
+ "aser\u0103m",
+ "iser\u0103m",
+ "\xE2ser\u0103m",
+ "user\u0103m",
+ "ser\u0103\u0163i",
+ "sese\u015Fi",
+ "seser\u0103",
+ "easc\u0103",
+ "ar\u0103\u0163i",
+ "ur\u0103\u0163i",
+ "ir\u0103\u0163i",
+ "\xE2r\u0103\u0163i",
+ "ase\u015Fi",
+ "aser\u0103",
+ "ise\u015Fi",
+ "iser\u0103",
+ "\xe2se\u015Fi",
+ "\xE2ser\u0103",
+ "use\u015Fi",
+ "user\u0103",
+ "ser\u0103m",
"sesem",
"indu",
"\xE2ndu",
- u("eaz\u0103"),
- u("e\u015Fti"),
- u("e\u015Fte"),
- u("\u0103\u015Fti"),
- u("\u0103\u015Fte"),
- u("ea\u0163i"),
- u("ia\u0163i"),
- u("ar\u0103m"),
- u("ur\u0103m"),
- u("ir\u0103m"),
- u("\xE2r\u0103m"),
+ "eaz\u0103",
+ "e\u015Fti",
+ "e\u015Fte",
+ "\u0103\u015Fti",
+ "\u0103\u015Fte",
+ "ea\u0163i",
+ "ia\u0163i",
+ "ar\u0103m",
+ "ur\u0103m",
+ "ir\u0103m",
+ "\xE2r\u0103m",
"asem",
"isem",
"\xE2sem",
"usem",
- u("se\u015Fi"),
- u("ser\u0103"),
+ "se\u015Fi",
+ "ser\u0103",
"sese",
"are",
"ere",
@@ -213,29 +211,29 @@ class RomanianStemmer(_StandardStemmer):
"eze",
"ezi",
"esc",
- u("\u0103sc"),
+ "\u0103sc",
"eam",
"eai",
"eau",
"iam",
"iai",
"iau",
- u("a\u015Fi"),
- u("ar\u0103"),
- u("u\u015Fi"),
- u("ur\u0103"),
- u("i\u015Fi"),
- u("ir\u0103"),
- u("\xE2\u015Fi"),
- u("\xe2r\u0103"),
+ "a\u015Fi",
+ "ar\u0103",
+ "u\u015Fi",
+ "ur\u0103",
+ "i\u015Fi",
+ "ir\u0103",
+ "\xE2\u015Fi",
+ "\xe2r\u0103",
"ase",
"ise",
"\xE2se",
"use",
- u("a\u0163i"),
- u("e\u0163i"),
- u("i\u0163i"),
- u("\xe2\u0163i"),
+ "a\u0163i",
+ "e\u0163i",
+ "i\u0163i",
+ "\xe2\u0163i",
"sei",
"ez",
"am",
@@ -245,7 +243,7 @@ class RomanianStemmer(_StandardStemmer):
"ia",
"ui",
"\xE2i",
- u("\u0103m"),
+ "\u0103m",
"em",
"im",
"\xE2m",
@@ -440,22 +438,22 @@ def stem(self, word):
if word.endswith(suffix):
if suffix in rv:
if suffix in (
- u("seser\u0103\u0163i"),
- u("seser\u0103m"),
- u("ser\u0103\u0163i"),
- u("sese\u015Fi"),
- u("seser\u0103"),
- u("ser\u0103m"),
+ "seser\u0103\u0163i",
+ "seser\u0103m",
+ "ser\u0103\u0163i",
+ "sese\u015Fi",
+ "seser\u0103",
+ "ser\u0103m",
"sesem",
- u("se\u015Fi"),
- u("ser\u0103"),
+ "se\u015Fi",
+ "ser\u0103",
"sese",
- u("a\u0163i"),
- u("e\u0163i"),
- u("i\u0163i"),
- u("\xE2\u0163i"),
+ "a\u0163i",
+ "e\u0163i",
+ "i\u0163i",
+ "\xE2\u0163i",
"sei",
- u("\u0103m"),
+ "\u0103m",
"em",
"im",
"\xE2m",
diff --git a/src/whoosh/lang/snowball/russian.py b/src/whoosh/lang/snowball/russian.py
index 76e0ccb7..56ef75d9 100644
--- a/src/whoosh/lang/snowball/russian.py
+++ b/src/whoosh/lang/snowball/russian.py
@@ -1,6 +1,3 @@
-from whoosh.compat import u
-
-
class RussianStemmer:
"""
The Russian Snowball stemmer.
@@ -710,72 +707,72 @@ def __cyrillic_to_roman(self, word):
"""
word = (
- word.replace(u("\u0410"), "a")
- .replace(u("\u0430"), "a")
- .replace(u("\u0411"), "b")
- .replace(u("\u0431"), "b")
- .replace(u("\u0412"), "v")
- .replace(u("\u0432"), "v")
- .replace(u("\u0413"), "g")
- .replace(u("\u0433"), "g")
- .replace(u("\u0414"), "d")
- .replace(u("\u0434"), "d")
- .replace(u("\u0415"), "e")
- .replace(u("\u0435"), "e")
- .replace(u("\u0401"), "e")
- .replace(u("\u0451"), "e")
- .replace(u("\u0416"), "zh")
- .replace(u("\u0436"), "zh")
- .replace(u("\u0417"), "z")
- .replace(u("\u0437"), "z")
- .replace(u("\u0418"), "i")
- .replace(u("\u0438"), "i")
- .replace(u("\u0419"), "i`")
- .replace(u("\u0439"), "i`")
- .replace(u("\u041A"), "k")
- .replace(u("\u043A"), "k")
- .replace(u("\u041B"), "l")
- .replace(u("\u043B"), "l")
- .replace(u("\u041C"), "m")
- .replace(u("\u043C"), "m")
- .replace(u("\u041D"), "n")
- .replace(u("\u043D"), "n")
- .replace(u("\u041E"), "o")
- .replace(u("\u043E"), "o")
- .replace(u("\u041F"), "p")
- .replace(u("\u043F"), "p")
- .replace(u("\u0420"), "r")
- .replace(u("\u0440"), "r")
- .replace(u("\u0421"), "s")
- .replace(u("\u0441"), "s")
- .replace(u("\u0422"), "t")
- .replace(u("\u0442"), "t")
- .replace(u("\u0423"), "u")
- .replace(u("\u0443"), "u")
- .replace(u("\u0424"), "f")
- .replace(u("\u0444"), "f")
- .replace(u("\u0425"), "kh")
- .replace(u("\u0445"), "kh")
- .replace(u("\u0426"), "t^s")
- .replace(u("\u0446"), "t^s")
- .replace(u("\u0427"), "ch")
- .replace(u("\u0447"), "ch")
- .replace(u("\u0428"), "sh")
- .replace(u("\u0448"), "sh")
- .replace(u("\u0429"), "shch")
- .replace(u("\u0449"), "shch")
- .replace(u("\u042A"), "''")
- .replace(u("\u044A"), "''")
- .replace(u("\u042B"), "y")
- .replace(u("\u044B"), "y")
- .replace(u("\u042C"), "'")
- .replace(u("\u044C"), "'")
- .replace(u("\u042D"), "e`")
- .replace(u("\u044D"), "e`")
- .replace(u("\u042E"), "i^u")
- .replace(u("\u044E"), "i^u")
- .replace(u("\u042F"), "i^a")
- .replace(u("\u044F"), "i^a")
+ word.replace("\u0410", "a")
+ .replace("\u0430", "a")
+ .replace("\u0411", "b")
+ .replace("\u0431", "b")
+ .replace("\u0412", "v")
+ .replace("\u0432", "v")
+ .replace("\u0413", "g")
+ .replace("\u0433", "g")
+ .replace("\u0414", "d")
+ .replace("\u0434", "d")
+ .replace("\u0415", "e")
+ .replace("\u0435", "e")
+ .replace("\u0401", "e")
+ .replace("\u0451", "e")
+ .replace("\u0416", "zh")
+ .replace("\u0436", "zh")
+ .replace("\u0417", "z")
+ .replace("\u0437", "z")
+ .replace("\u0418", "i")
+ .replace("\u0438", "i")
+ .replace("\u0419", "i`")
+ .replace("\u0439", "i`")
+ .replace("\u041A", "k")
+ .replace("\u043A", "k")
+ .replace("\u041B", "l")
+ .replace("\u043B", "l")
+ .replace("\u041C", "m")
+ .replace("\u043C", "m")
+ .replace("\u041D", "n")
+ .replace("\u043D", "n")
+ .replace("\u041E", "o")
+ .replace("\u043E", "o")
+ .replace("\u041F", "p")
+ .replace("\u043F", "p")
+ .replace("\u0420", "r")
+ .replace("\u0440", "r")
+ .replace("\u0421", "s")
+ .replace("\u0441", "s")
+ .replace("\u0422", "t")
+ .replace("\u0442", "t")
+ .replace("\u0423", "u")
+ .replace("\u0443", "u")
+ .replace("\u0424", "f")
+ .replace("\u0444", "f")
+ .replace("\u0425", "kh")
+ .replace("\u0445", "kh")
+ .replace("\u0426", "t^s")
+ .replace("\u0446", "t^s")
+ .replace("\u0427", "ch")
+ .replace("\u0447", "ch")
+ .replace("\u0428", "sh")
+ .replace("\u0448", "sh")
+ .replace("\u0429", "shch")
+ .replace("\u0449", "shch")
+ .replace("\u042A", "''")
+ .replace("\u044A", "''")
+ .replace("\u042B", "y")
+ .replace("\u044B", "y")
+ .replace("\u042C", "'")
+ .replace("\u044C", "'")
+ .replace("\u042D", "e`")
+ .replace("\u044D", "e`")
+ .replace("\u042E", "i^u")
+ .replace("\u044E", "i^u")
+ .replace("\u042F", "i^a")
+ .replace("\u044F", "i^a")
)
return word
@@ -796,38 +793,38 @@ def __roman_to_cyrillic(self, word):
"""
word = (
- word.replace("i^u", u("\u044E"))
- .replace("i^a", u("\u044F"))
- .replace("shch", u("\u0449"))
- .replace("kh", u("\u0445"))
- .replace("t^s", u("\u0446"))
- .replace("ch", u("\u0447"))
- .replace("e`", u("\u044D"))
- .replace("i`", u("\u0439"))
- .replace("sh", u("\u0448"))
- .replace("k", u("\u043A"))
- .replace("e", u("\u0435"))
- .replace("zh", u("\u0436"))
- .replace("a", u("\u0430"))
- .replace("b", u("\u0431"))
- .replace("v", u("\u0432"))
- .replace("g", u("\u0433"))
- .replace("d", u("\u0434"))
- .replace("e", u("\u0435"))
- .replace("z", u("\u0437"))
- .replace("i", u("\u0438"))
- .replace("l", u("\u043B"))
- .replace("m", u("\u043C"))
- .replace("n", u("\u043D"))
- .replace("o", u("\u043E"))
- .replace("p", u("\u043F"))
- .replace("r", u("\u0440"))
- .replace("s", u("\u0441"))
- .replace("t", u("\u0442"))
- .replace("u", u("\u0443"))
- .replace("f", u("\u0444"))
- .replace("''", u("\u044A"))
- .replace("y", u("\u044B"))
- .replace("'", u("\u044C"))
+ word.replace("i^u", "\u044E")
+ .replace("i^a", "\u044F")
+ .replace("shch", "\u0449")
+ .replace("kh", "\u0445")
+ .replace("t^s", "\u0446")
+ .replace("ch", "\u0447")
+ .replace("e`", "\u044D")
+ .replace("i`", "\u0439")
+ .replace("sh", "\u0448")
+ .replace("k", "\u043A")
+ .replace("e", "\u0435")
+ .replace("zh", "\u0436")
+ .replace("a", "\u0430")
+ .replace("b", "\u0431")
+ .replace("v", "\u0432")
+ .replace("g", "\u0433")
+ .replace("d", "\u0434")
+ .replace("e", "\u0435")
+ .replace("z", "\u0437")
+ .replace("i", "\u0438")
+ .replace("l", "\u043B")
+ .replace("m", "\u043C")
+ .replace("n", "\u043D")
+ .replace("o", "\u043E")
+ .replace("p", "\u043F")
+ .replace("r", "\u0440")
+ .replace("s", "\u0441")
+ .replace("t", "\u0442")
+ .replace("u", "\u0443")
+ .replace("f", "\u0444")
+ .replace("''", "\u044A")
+ .replace("y", "\u044B")
+ .replace("'", "\u044C")
)
return word
diff --git a/src/whoosh/lang/snowball/spanish.py b/src/whoosh/lang/snowball/spanish.py
index f1e50ed2..eaa2cce9 100644
--- a/src/whoosh/lang/snowball/spanish.py
+++ b/src/whoosh/lang/snowball/spanish.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _StandardStemmer
@@ -26,7 +24,7 @@ class SpanishStemmer(_StandardStemmer):
"""
- __vowels = u("aeiou\xE1\xE9\xED\xF3\xFA\xFC")
+ __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xFC"
__step0_suffixes = (
"selas",
"selos",
@@ -52,7 +50,7 @@ class SpanishStemmer(_StandardStemmer):
"adoras",
"adores",
"ancias",
- u("log\xEDas"),
+ "log\xEDas",
"encias",
"amente",
"idades",
@@ -62,11 +60,11 @@ class SpanishStemmer(_StandardStemmer):
"ibles",
"istas",
"adora",
- u("aci\xF3n"),
+ "aci\xF3n",
"antes",
"ancia",
- u("log\xEDa"),
- u("uci\xf3n"),
+ "log\xEDa",
+ "uci\xf3n",
"encia",
"mente",
"anza",
@@ -102,36 +100,36 @@ class SpanishStemmer(_StandardStemmer):
"ya",
"ye",
"yo",
- u("y\xF3"),
+ "y\xF3",
)
__step2b_suffixes = (
- u("ar\xEDamos"),
- u("er\xEDamos"),
- u("ir\xEDamos"),
- u("i\xE9ramos"),
- u("i\xE9semos"),
- u("ar\xEDais"),
+ "ar\xEDamos",
+ "er\xEDamos",
+ "ir\xEDamos",
+ "i\xE9ramos",
+ "i\xE9semos",
+ "ar\xEDais",
"aremos",
- u("er\xEDais"),
+ "er\xEDais",
"eremos",
- u("ir\xEDais"),
+ "ir\xEDais",
"iremos",
"ierais",
"ieseis",
"asteis",
"isteis",
- u("\xE1bamos"),
- u("\xE1ramos"),
- u("\xE1semos"),
- u("ar\xEDan"),
- u("ar\xEDas"),
- u("ar\xE9is"),
- u("er\xEDan"),
- u("er\xEDas"),
- u("er\xE9is"),
- u("ir\xEDan"),
- u("ir\xEDas"),
- u("ir\xE9is"),
+ "\xE1bamos",
+ "\xE1ramos",
+ "\xE1semos",
+ "ar\xEDan",
+ "ar\xEDas",
+ "ar\xE9is",
+ "er\xEDan",
+ "er\xEDas",
+ "er\xE9is",
+ "ir\xEDan",
+ "ir\xEDas",
+ "ir\xE9is",
"ieran",
"iesen",
"ieron",
@@ -141,16 +139,16 @@ class SpanishStemmer(_StandardStemmer):
"abais",
"arais",
"aseis",
- u("\xE9amos"),
- u("ar\xE1n"),
- u("ar\xE1s"),
- u("ar\xEDa"),
- u("er\xE1n"),
- u("er\xE1s"),
- u("er\xEDa"),
- u("ir\xE1n"),
- u("ir\xE1s"),
- u("ir\xEDa"),
+ "\xE9amos",
+ "ar\xE1n",
+ "ar\xE1s",
+ "ar\xEDa",
+ "er\xE1n",
+ "er\xE1s",
+ "er\xEDa",
+ "ir\xE1n",
+ "ir\xE1s",
+ "ir\xEDa",
"iera",
"iese",
"aste",
@@ -165,44 +163,44 @@ class SpanishStemmer(_StandardStemmer):
"idas",
"aras",
"ases",
- u("\xEDais"),
+ "\xEDais",
"ados",
"idos",
"amos",
"imos",
"emos",
- u("ar\xE1"),
- u("ar\xE9"),
- u("er\xE1"),
- u("er\xE9"),
- u("ir\xE1"),
- u("ir\xE9"),
+ "ar\xE1",
+ "ar\xE9",
+ "er\xE1",
+ "er\xE9",
+ "ir\xE1",
+ "ir\xE9",
"aba",
"ada",
"ida",
"ara",
"ase",
- u("\xEDan"),
+ "\xEDan",
"ado",
"ido",
- u("\xEDas"),
- u("\xE1is"),
- u("\xE9is"),
- u("\xEDa"),
+ "\xEDas",
+ "\xE1is",
+ "\xE9is",
+ "\xEDa",
"ad",
"ed",
"id",
"an",
- u("i\xF3"),
+ "i\xF3",
"ar",
"er",
"ir",
"as",
- u("\xEDs"),
+ "\xEDs",
"en",
"es",
)
- __step3_suffixes = ("os", "a", "e", "o", u("\xE1"), u("\xE9"), u("\xED"), u("\xF3"))
+ __step3_suffixes = ("os", "a", "e", "o", "\xE1", "\xE9", "\xED", "\xF3")
def stem(self, word):
"""
@@ -227,36 +225,36 @@ def stem(self, word):
if rv.endswith(suffix):
if rv[: -len(suffix)].endswith(
(
- u("i\xE9ndo"),
- u("\xE1ndo"),
- u("\xE1r"),
- u("\xE9r"),
- u("\xEDr"),
+ "i\xE9ndo",
+ "\xE1ndo",
+ "\xE1r",
+ "\xE9r",
+ "\xEDr",
)
):
word = (
word[: -len(suffix)]
- .replace(u("\xE1"), "a")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
+ .replace("\xE1", "a")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
)
r1 = (
r1[: -len(suffix)]
- .replace(u("\xE1"), "a")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
+ .replace("\xE1", "a")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
)
r2 = (
r2[: -len(suffix)]
- .replace(u("\xE1"), "a")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
+ .replace("\xE1", "a")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
)
rv = (
rv[: -len(suffix)]
- .replace(u("\xE1"), "a")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
+ .replace("\xE1", "a")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
)
elif rv[: -len(suffix)].endswith(
@@ -303,7 +301,7 @@ def stem(self, word):
if suffix in (
"adora",
"ador",
- u("aci\xF3n"),
+ "aci\xF3n",
"adoras",
"adores",
"aciones",
@@ -320,11 +318,11 @@ def stem(self, word):
word = word[:-2]
rv = rv[:-2]
- elif suffix in (u("log\xEDa"), u("log\xEDas")):
+ elif suffix in ("log\xEDa", "log\xEDas"):
word = word.replace(suffix, "log")
rv = rv.replace(suffix, "log")
- elif suffix in (u("uci\xF3n"), "uciones"):
+ elif suffix in ("uci\xF3n", "uciones"):
word = word.replace(suffix, "u")
rv = rv.replace(suffix, "u")
@@ -374,7 +372,7 @@ def stem(self, word):
# STEP 2b: Other verb suffixes
for suffix in self.__step2b_suffixes:
if rv.endswith(suffix):
- if suffix in ("en", "es", u("\xE9is"), "emos"):
+ if suffix in ("en", "es", "\xE9is", "emos"):
word = word[: -len(suffix)]
rv = rv[: -len(suffix)]
@@ -391,7 +389,7 @@ def stem(self, word):
# STEP 3: Residual suffix
for suffix in self.__step3_suffixes:
if rv.endswith(suffix):
- if suffix in ("e", u("\xE9")):
+ if suffix in ("e", "\xE9"):
word = word[: -len(suffix)]
rv = rv[: -len(suffix)]
@@ -407,10 +405,10 @@ def stem(self, word):
break
word = (
- word.replace(u("\xE1"), "a")
- .replace(u("\xE9"), "e")
- .replace(u("\xED"), "i")
- .replace(u("\xF3"), "o")
- .replace(u("\xFA"), "u")
+ word.replace("\xE1", "a")
+ .replace("\xE9", "e")
+ .replace("\xED", "i")
+ .replace("\xF3", "o")
+ .replace("\xFA", "u")
)
return word
diff --git a/src/whoosh/lang/snowball/swedish.py b/src/whoosh/lang/snowball/swedish.py
index cb46fbfd..4f9855d8 100644
--- a/src/whoosh/lang/snowball/swedish.py
+++ b/src/whoosh/lang/snowball/swedish.py
@@ -1,5 +1,3 @@
-from whoosh.compat import u
-
from .bases import _ScandinavianStemmer
@@ -23,7 +21,7 @@ class SwedishStemmer(_ScandinavianStemmer):
http://snowball.tartarus.org/algorithms/swedish/stemmer.html
"""
- __vowels = u("aeiouy\xE4\xE5\xF6")
+ __vowels = "aeiouy\xE4\xE5\xF6"
__s_ending = "bcdfghjklmnoprtvy"
__step1_suffixes = (
"heterna",
@@ -65,7 +63,7 @@ class SwedishStemmer(_ScandinavianStemmer):
"s",
)
__step2_suffixes = ("dd", "gd", "nn", "dt", "gt", "kt", "tt")
- __step3_suffixes = ("fullt", u("l\xF6st"), "els", "lig", "ig")
+ __step3_suffixes = ("fullt", "l\xF6st", "els", "lig", "ig")
def stem(self, word):
"""
@@ -105,7 +103,7 @@ def stem(self, word):
if r1.endswith(suffix):
if suffix in ("els", "lig", "ig"):
word = word[: -len(suffix)]
- elif suffix in ("fullt", u("l\xF6st")):
+ elif suffix in ("fullt", "l\xF6st"):
word = word[:-1]
break
diff --git a/src/whoosh/lang/wordnet.py b/src/whoosh/lang/wordnet.py
index 69fbffb3..8f73f129 100644
--- a/src/whoosh/lang/wordnet.py
+++ b/src/whoosh/lang/wordnet.py
@@ -34,7 +34,6 @@
from collections import defaultdict
-from whoosh.compat import iterkeys, text_type
from whoosh.fields import ID, STORED, Schema
@@ -75,9 +74,9 @@ def make_index(storage, indexname, word2nums, num2words):
schema = Schema(word=ID, syns=STORED)
ix = storage.create_index(schema, indexname=indexname)
w = ix.writer()
- for word in iterkeys(word2nums):
+ for word in word2nums.keys():
syns = synonyms(word2nums, num2words, word)
- w.add_document(word=text_type(word), syns=syns)
+ w.add_document(word=str(word), syns=syns)
w.commit()
return ix
diff --git a/src/whoosh/matching/mcore.py b/src/whoosh/matching/mcore.py
index 6e8112b3..55895b46 100644
--- a/src/whoosh/matching/mcore.py
+++ b/src/whoosh/matching/mcore.py
@@ -49,10 +49,9 @@
method will return ``True``.
"""
+from abc import abstractmethod
from itertools import repeat
-from whoosh.compat import abstractmethod, izip
-
# Exceptions
@@ -507,7 +506,7 @@ def all_items(self):
if values is None:
values = repeat("")
- return izip(self._ids, values)
+ return zip(self._ids, values)
def value(self):
if self._values:
diff --git a/src/whoosh/multiproc.py b/src/whoosh/multiproc.py
index 9b0e44b3..7fd1560b 100644
--- a/src/whoosh/multiproc.py
+++ b/src/whoosh/multiproc.py
@@ -25,11 +25,11 @@
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Matt Chaput.
-
+import pickle
+import queue
from multiprocessing import Process, Queue, cpu_count
from whoosh.codec import base
-from whoosh.compat import pickle, queue
from whoosh.externalsort import imerge
from whoosh.util import random_name
from whoosh.writing import SegmentWriter
diff --git a/src/whoosh/qparser/dateparse.py b/src/whoosh/qparser/dateparse.py
index 35552c17..ffbb4fc7 100644
--- a/src/whoosh/qparser/dateparse.py
+++ b/src/whoosh/qparser/dateparse.py
@@ -29,7 +29,6 @@
import sys
from datetime import datetime, timedelta, timezone
-from whoosh.compat import iteritems, string_type
from whoosh.qparser import plugins, syntax
from whoosh.qparser.taggers import Tagger
from whoosh.support.relativedelta import relativedelta
@@ -79,7 +78,7 @@ class ParserBase:
"""Base class for date parser elements."""
def to_parser(self, e):
- if isinstance(e, string_type):
+ if isinstance(e, str):
return Regex(e)
else:
return e
@@ -467,7 +466,7 @@ def parse(self, text, dt, pos=0, debug=-9999):
def extract(self, match):
d = match.groupdict()
- for key, value in iteritems(d):
+ for key, value in d.items():
try:
value = int(value)
d[key] = value
diff --git a/src/whoosh/qparser/default.py b/src/whoosh/qparser/default.py
index 30416872..79ce0aa9 100644
--- a/src/whoosh/qparser/default.py
+++ b/src/whoosh/qparser/default.py
@@ -28,7 +28,6 @@
import sys
from whoosh import query
-from whoosh.compat import text_type
from whoosh.qparser import syntax
from whoosh.qparser.common import QueryParserError, print_debug
@@ -365,7 +364,7 @@ def parse(self, text, normalize=True, debug=False):
:rtype: :class:`whoosh.query.Query`
"""
- if not isinstance(text, text_type):
+ if not isinstance(text, str):
text = text.decode("latin1")
nodes = self.process(text, debug=debug)
diff --git a/src/whoosh/qparser/plugins.py b/src/whoosh/qparser/plugins.py
index 15f32170..e39bd584 100644
--- a/src/whoosh/qparser/plugins.py
+++ b/src/whoosh/qparser/plugins.py
@@ -28,7 +28,6 @@
import copy
from whoosh import query
-from whoosh.compat import iteritems, u
from whoosh.qparser import syntax
from whoosh.qparser.common import attach
from whoosh.qparser.taggers import FnTagger, RegexTagger
@@ -78,9 +77,7 @@ def filters(self, parser):
return ()
def create(self, parser, match):
- # Groupdict keys can be unicode sometimes apparently? Convert them to
- # str for use as keyword arguments. This should be Py3-safe.
- kwargs = {str(k): v for k, v in iteritems(match.groupdict())}
+ kwargs = {str(k): v for k, v in match.groupdict().items()}
return self.nodetype(**kwargs)
@@ -147,7 +144,7 @@ class WildcardPlugin(TaggingPlugin):
# \u055E = Armenian question mark
# \u061F = Arabic question mark
# \u1367 = Ethiopic question mark
- qmarks = u("?\u055E\u061F\u1367")
+ qmarks = "?\u055E\u061F\u1367"
expr = f"(?P[*{qmarks}])"
def filters(self, parser):
@@ -629,8 +626,7 @@ def _parse_args(self, argstring):
for part in parts:
if "=" in part:
name, value = part.split("=", 1)
- # Wrap with str() because Python 2.5 can't handle unicode kws
- name = str(name.strip())
+ name = name.strip()
else:
name = None
value = part
@@ -1223,7 +1219,7 @@ class FieldAliasPlugin(Plugin):
def __init__(self, fieldmap):
self.fieldmap = fieldmap
self.reverse = {}
- for key, values in iteritems(fieldmap):
+ for key, values in fieldmap.items():
for value in values:
self.reverse[value] = key
@@ -1282,7 +1278,7 @@ def __init__(self, map, group=syntax.OrGroup, mirror=False):
self.group = group
if mirror:
# Add in reversed mappings
- map.update({v: k for k, v in iteritems(map)})
+ map.update({v: k for k, v in map.items()})
def filters(self, parser):
# Run after the fieldname filter (100) but before multifield (110)
diff --git a/src/whoosh/query/compound.py b/src/whoosh/query/compound.py
index fc2787aa..b0dcba87 100644
--- a/src/whoosh/query/compound.py
+++ b/src/whoosh/query/compound.py
@@ -27,7 +27,6 @@
from whoosh import matching
-from whoosh.compat import text_type, u
from whoosh.query import qcore
from whoosh.util import make_binary_tree, make_weighted_tree
@@ -51,14 +50,12 @@ def __repr__(self):
r += ")"
return r
- def __unicode__(self):
- r = u("(")
- r += self.JOINT.join([text_type(s) for s in self.subqueries])
- r += u(")")
+ def __str__(self):
+ r = "("
+ r += self.JOINT.join([str(s) for s in self.subqueries])
+ r += ")"
return r
- __str__ = __unicode__
-
def __eq__(self, other):
return (
other
@@ -248,7 +245,7 @@ class And(CompoundQuery):
>>> Term("content", u"render") & Term("content", u"shade")
"""
- # This is used by the superclass's __unicode__ method.
+ # This is used by the superclass's __str__ method.
JOINT = " AND "
intersect_merge = True
@@ -279,7 +276,7 @@ class Or(CompoundQuery):
>>> Term("content", u"render") | Term("content", u"shade")
"""
- # This is used by the superclass's __unicode__ method.
+ # This is used by the superclass's __str__ method.
JOINT = " OR "
intersect_merge = False
TOO_MANY_CLAUSES = 1024
@@ -308,16 +305,14 @@ def __init__(self, subqueries, boost=1.0, minmatch=0, scale=None):
self.minmatch = minmatch
self.scale = scale
- def __unicode__(self):
- r = u("(")
- r += (self.JOINT).join([text_type(s) for s in self.subqueries])
- r += u(")")
+ def __str__(self):
+ r = "("
+ r += (self.JOINT).join([str(s) for s in self.subqueries])
+ r += ")"
if self.minmatch:
- r += u(">%s") % self.minmatch
+ r += f">{self.minmatch}"
return r
- __str__ = __unicode__
-
def normalize(self):
norm = CompoundQuery.normalize(self)
if norm.__class__ is self.__class__:
@@ -457,16 +452,14 @@ def __init__(self, subqueries, boost=1.0, tiebreak=0.0):
CompoundQuery.__init__(self, subqueries, boost=boost)
self.tiebreak = tiebreak
- def __unicode__(self):
- r = u("DisMax(")
- r += " ".join(sorted(text_type(s) for s in self.subqueries))
- r += u(")")
+ def __str__(self):
+ r = "DisMax("
+ r += " ".join(sorted(str(s) for s in self.subqueries))
+ r += ")"
if self.tiebreak:
- r += u("~") + text_type(self.tiebreak)
+ r += "~" + str(self.tiebreak)
return r
- __str__ = __unicode__
-
def normalize(self):
norm = CompoundQuery.normalize(self)
if norm.__class__ is self.__class__:
diff --git a/src/whoosh/query/positional.py b/src/whoosh/query/positional.py
index bb6b381a..73673666 100644
--- a/src/whoosh/query/positional.py
+++ b/src/whoosh/query/positional.py
@@ -30,7 +30,6 @@
from whoosh import matching
from whoosh.analysis import Token
-from whoosh.compat import u
from whoosh.query import compound, qcore, terms
@@ -173,10 +172,8 @@ def __repr__(self):
self.boost,
)
- def __unicode__(self):
- return u('%s:"%s"') % (self.fieldname, u(" ").join(self.words))
-
- __str__ = __unicode__
+ def __str__(self):
+ return f"{self.fieldname}:\"{' '.join(self.words)}\""
def __hash__(self):
h = hash(self.fieldname) ^ hash(self.slop) ^ hash(self.boost)
diff --git a/src/whoosh/query/qcore.py b/src/whoosh/query/qcore.py
index a827a9bc..7702245c 100644
--- a/src/whoosh/query/qcore.py
+++ b/src/whoosh/query/qcore.py
@@ -28,9 +28,9 @@
import copy
from array import array
+from operator import methodcaller
from whoosh import matching
-from whoosh.compat import methodcaller, u
from whoosh.reading import TermNotFound
# Exceptions
@@ -169,7 +169,7 @@ class Query:
# in this query
error = None
- def __unicode__(self):
+ def __str__(self):
raise NotImplementedError(self.__class__.__name__)
def __getitem__(self, item):
@@ -598,8 +598,8 @@ class _NullQuery(Query):
def __init__(self):
self.error = None
- def __unicode__(self):
- return u("<_NullQuery>")
+ def __str__(self):
+ return "<_NullQuery>"
def __call__(self):
return self
@@ -709,10 +709,8 @@ def __eq__(self, other):
and self.boost == other.boost
)
- def __unicode__(self):
- return u("%s:*") % self.fieldname
-
- __str__ = __unicode__
+ def __str__(self):
+ return f"{self.fieldname}:*"
def __hash__(self):
return hash(self.fieldname)
diff --git a/src/whoosh/query/ranges.py b/src/whoosh/query/ranges.py
index cd96e063..19686ffb 100644
--- a/src/whoosh/query/ranges.py
+++ b/src/whoosh/query/ranges.py
@@ -26,7 +26,6 @@
# policies, either expressed or implied, of Matt Chaput.
-from whoosh.compat import b, u
from whoosh.query import compound, qcore, terms, wrappers
from whoosh.util.times import datetime_to_long
@@ -46,14 +45,12 @@ def __repr__(self):
self.constantscore,
)
- def __unicode__(self):
+ def __str__(self):
startchar = "{" if self.startexcl else "["
endchar = "}" if self.endexcl else "]"
start = "" if self.start is None else self.start
end = "" if self.end is None else self.end
- return u("%s:%s%s TO %s%s") % (self.fieldname, startchar, start, end, endchar)
-
- __str__ = __unicode__
+ return f"{self.fieldname}:{startchar}{start} TO {end}{endchar}"
def __eq__(self, other):
return (
@@ -192,7 +189,7 @@ def __init__(
self.constantscore = constantscore
def normalize(self):
- if self.start in ("", None) and self.end in (u("\uffff"), None):
+ if self.start in ("", None) and self.end in ("\uffff", None):
from whoosh.query import Every
return Every(self.fieldname, boost=self.boost)
@@ -226,7 +223,7 @@ def _btexts(self, ixreader):
endexcl = self.endexcl
if self.start is None:
- start = b("")
+ start = b""
else:
try:
start = field.to_bytes(self.start)
@@ -234,7 +231,7 @@ def _btexts(self, ixreader):
return
if self.end is None:
- end = b("\xFF\xFF\xFF\xFF")
+ end = b"\xFF\xFF\xFF\xFF"
else:
try:
end = field.to_bytes(self.end)
diff --git a/src/whoosh/query/terms.py b/src/whoosh/query/terms.py
index b7ae92f7..dd08eff5 100644
--- a/src/whoosh/query/terms.py
+++ b/src/whoosh/query/terms.py
@@ -32,7 +32,6 @@
from whoosh import matching
from whoosh.analysis import Token
-from whoosh.compat import bytes_type, text_type, u
from whoosh.lang.morph_en import variations
from whoosh.query import qcore
@@ -43,7 +42,7 @@ class Term(qcore.Query):
>>> Term("content", u"render")
"""
- __inittypes__ = {"fieldname": str, "text": text_type, "boost": float}
+ __inittypes__ = {"fieldname": str, "text": str, "boost": float}
def __init__(self, fieldname, text, boost=1.0, minquality=None):
self.fieldname = fieldname
@@ -67,21 +66,19 @@ def __repr__(self):
r += ")"
return r
- def __unicode__(self):
+ def __str__(self):
text = self.text
- if isinstance(text, bytes_type):
+ if isinstance(text, bytes):
try:
text = text.decode("ascii")
except UnicodeDecodeError:
text = repr(text)
- t = u("%s:%s") % (self.fieldname, text)
+ t = f"{self.fieldname}:{text}"
if self.boost != 1:
- t += u("^") + text_type(self.boost)
+ t += "^" + str(self.boost)
return t
- __str__ = __unicode__
-
def __hash__(self):
return hash(self.fieldname) ^ hash(self.text) ^ hash(self.boost)
@@ -247,7 +244,7 @@ def matcher(self, searcher, context=None):
class PatternQuery(MultiTerm):
"""An intermediate base class for common methods of Prefix and Wildcard."""
- __inittypes__ = {"fieldname": str, "text": text_type, "boost": float}
+ __inittypes__ = {"fieldname": str, "text": str, "boost": float}
def __init__(self, fieldname, text, boost=1.0, constantscore=True):
self.fieldname = fieldname
@@ -317,11 +314,9 @@ class Prefix(PatternQuery):
>>> Prefix("content", u"comp")
"""
- def __unicode__(self):
+ def __str__(self):
return f"{self.fieldname}:{self.text}*"
- __str__ = __unicode__
-
def _btexts(self, ixreader):
return ixreader.expand_prefix(self.fieldname, self.text)
@@ -344,11 +339,9 @@ class Wildcard(PatternQuery):
SPECIAL_CHARS = frozenset("*?[")
- def __unicode__(self):
+ def __str__(self):
return f"{self.fieldname}:{self.text}"
- __str__ = __unicode__
-
def _get_pattern(self):
return fnmatch.translate(self.text)
@@ -390,11 +383,9 @@ class Regex(PatternQuery):
SPECIAL_CHARS = frozenset("{}()[].?*+^$\\")
- def __unicode__(self):
+ def __str__(self):
return f'{self.fieldname}:r"{self.text}"'
- __str__ = __unicode__
-
def _get_pattern(self):
return self.text
@@ -447,7 +438,7 @@ class FuzzyTerm(ExpandingTerm):
__inittypes__ = {
"fieldname": str,
- "text": text_type,
+ "text": str,
"boost": float,
"maxdist": float,
"prefixlength": int,
@@ -498,16 +489,14 @@ def __repr__(self):
self.prefixlength,
)
- def __unicode__(self):
- r = u("%s:%s") % (self.fieldname, self.text) + u("~")
+ def __str__(self):
+ r = f"{self.fieldname}:{self.text}" + "~"
if self.maxdist > 1:
- r += u("%d") % self.maxdist
+ r += "%d" % self.maxdist
if self.boost != 1.0:
- r += u("^%f") % self.boost
+ r += f"^{self.boost:f}"
return r
- __str__ = __unicode__
-
def __hash__(self):
return (
hash(self.fieldname)
@@ -571,10 +560,8 @@ def _btexts(self, ixreader):
if (fieldname, btext) in ixreader:
yield btext
- def __unicode__(self):
- return u("%s:<%s>") % (self.fieldname, self.text)
-
- __str__ = __unicode__
+ def __str__(self):
+ return f"{self.fieldname}:<{self.text}>"
def replace(self, fieldname, oldtext, newtext):
q = copy.copy(self)
diff --git a/src/whoosh/query/wrappers.py b/src/whoosh/query/wrappers.py
index 531f7a08..02490ab8 100644
--- a/src/whoosh/query/wrappers.py
+++ b/src/whoosh/query/wrappers.py
@@ -29,7 +29,6 @@
from array import array
from whoosh import matching
-from whoosh.compat import text_type, u
from whoosh.query import qcore
@@ -104,10 +103,8 @@ def __eq__(self, other):
def __repr__(self):
return f"{self.__class__.__name__}({repr(self.query)})"
- def __unicode__(self):
- return u("NOT ") + text_type(self.query)
-
- __str__ = __unicode__
+ def __str__(self):
+ return "NOT " + str(self.query)
def __hash__(self):
return hash(self.__class__.__name__) ^ hash(self.query) ^ hash(self.boost)
diff --git a/src/whoosh/reading.py b/src/whoosh/reading.py
index 484fff6c..c3f4b3d9 100644
--- a/src/whoosh/reading.py
+++ b/src/whoosh/reading.py
@@ -28,6 +28,7 @@
"""This module contains classes that allow reading from an index.
"""
+from abc import abstractmethod
from bisect import bisect_right
from heapq import heapify, heappop, heapreplace, nlargest
from math import log
@@ -35,7 +36,6 @@
from cached_property import cached_property
from whoosh import columns
-from whoosh.compat import abstractmethod, iteritems, next, zip_
from whoosh.filedb.filestore import OverlayStorage
from whoosh.matching import MultiMatcher
from whoosh.support.levenshtein import distance
@@ -693,7 +693,7 @@ def stored_fields(self, docnum):
schema = self.schema
sfs = self._perdoc.stored_fields(docnum)
# Double-check with schema to filter out removed fields
- return dict(item for item in iteritems(sfs) if item[0] in schema)
+ return dict(item for item in sfs.items() if item[0] in schema)
# Delegate doc methods to the per-doc reader
@@ -1038,7 +1038,7 @@ def is_atomic(self):
return False
def leaf_readers(self):
- return zip_(self.readers, self.doc_offsets)
+ return list(zip(self.readers, self.doc_offsets))
def add_reader(self, reader):
self.readers.append(reader)
@@ -1138,7 +1138,7 @@ def term_info(self, fieldname, text):
# Get the term infos for the sub-readers containing the term
tis = [
(r.term_info(fieldname, text), offset)
- for r, offset in zip_(self.readers, self.doc_offsets)
+ for r, offset in zip(self.readers, self.doc_offsets)
if term in r
]
diff --git a/src/whoosh/scoring.py b/src/whoosh/scoring.py
index ad515b43..882692cc 100644
--- a/src/whoosh/scoring.py
+++ b/src/whoosh/scoring.py
@@ -32,8 +32,6 @@
from math import log, pi
-from whoosh.compat import iteritems
-
# Base classes
@@ -293,7 +291,7 @@ def __init__(self, B=0.75, K1=1.2, **kwargs):
self.K1 = K1
self._field_B = {}
- for k, v in iteritems(kwargs):
+ for k, v in kwargs.items():
if k.endswith("_B"):
fieldname = k[:-2]
self._field_B[fieldname] = v
diff --git a/src/whoosh/searching.py b/src/whoosh/searching.py
index 87af340d..1d3f010b 100644
--- a/src/whoosh/searching.py
+++ b/src/whoosh/searching.py
@@ -34,7 +34,6 @@
from math import ceil
from whoosh import classify, highlight, query, scoring
-from whoosh.compat import iteritems, iterkeys, itervalues
from whoosh.idsets import BitSet, DocIdSet
from whoosh.reading import TermNotFound
@@ -394,13 +393,13 @@ def documents(self, **kw):
)
def _kw_to_text(self, kw):
- for k, v in iteritems(kw):
+ for k, v in kw.items():
field = self.schema[k]
kw[k] = field.to_bytes(v)
def _query_for_kw(self, kw):
subqueries = []
- for key, value in iteritems(kw):
+ for key, value in kw.items():
subqueries.append(query.Term(key, value))
if subqueries:
q = query.And(subqueries).normalize()
@@ -945,7 +944,7 @@ def correct_query(
# Remap correctors dict according to aliases
d = {}
- for fieldname, corr in iteritems(correctors):
+ for fieldname, corr in correctors.items():
fieldname = aliases.get(fieldname, fieldname)
d[fieldname] = corr
correctors = d
@@ -1572,7 +1571,7 @@ def __len__(self):
return len(self.fields())
def __iter__(self):
- return iterkeys(self.fields())
+ return self.fields().keys()
def __getitem__(self, fieldname):
if fieldname in self.fields():
@@ -1598,13 +1597,13 @@ def values(self):
return list(self.fields().values())
def iteritems(self):
- return iteritems(self.fields())
+ return self.fields().items()
def iterkeys(self):
- return iterkeys(self.fields())
+ return self.fields().keys()
def itervalues(self):
- return itervalues(self.fields())
+ return self.fields().values()
def __delitem__(self, key):
raise NotImplementedError("You cannot modify a search result")
diff --git a/src/whoosh/sorting.py b/src/whoosh/sorting.py
index cd36fb99..93e0171a 100644
--- a/src/whoosh/sorting.py
+++ b/src/whoosh/sorting.py
@@ -28,8 +28,6 @@
from array import array
from collections import defaultdict
-from whoosh.compat import iteritems, izip, string_type
-
# Faceting objects
@@ -783,7 +781,7 @@ def __repr__(self):
@classmethod
def from_sortedby(cls, sortedby):
multi = cls()
- if isinstance(sortedby, string_type):
+ if isinstance(sortedby, str):
multi._add(sortedby)
elif isinstance(sortedby, (list, tuple)) or hasattr(sortedby, "__iter__"):
for item in sortedby:
@@ -795,7 +793,7 @@ def from_sortedby(cls, sortedby):
def _add(self, item):
if isinstance(item, FacetType):
self.add_facet(item)
- elif isinstance(item, string_type):
+ elif isinstance(item, str):
self.add_field(item)
else:
raise Exception(f"Don't know what to do with facet {item!r}")
@@ -851,7 +849,7 @@ def key_for(self, matcher, docid):
def key_to_name(self, key):
return tuple(
catter.key_to_name(keypart)
- for catter, keypart in izip(self.catters, key)
+ for catter, keypart in zip(self.catters, key)
)
@@ -882,7 +880,7 @@ def from_groupedby(cls, groupedby):
facets = cls()
if isinstance(groupedby, (cls, dict)):
facets.add_facets(groupedby)
- elif isinstance(groupedby, string_type):
+ elif isinstance(groupedby, str):
facets.add_field(groupedby)
elif isinstance(groupedby, FacetType):
facets.add_facet(groupedby.default_name(), groupedby)
@@ -1004,7 +1002,7 @@ def add(self, groupname, docid, sortkey):
def as_dict(self):
d = {}
- for key, items in iteritems(self.dict):
+ for key, items in self.dict.items():
d[key] = [docnum for _, docnum in sorted(items)]
return d
diff --git a/src/whoosh/spelling.py b/src/whoosh/spelling.py
index e929b4f1..15a8220e 100644
--- a/src/whoosh/spelling.py
+++ b/src/whoosh/spelling.py
@@ -33,7 +33,6 @@
from heapq import heappush, heapreplace
from whoosh import highlight
-from whoosh.compat import iteritems
# Corrector objects
@@ -170,7 +169,7 @@ def _suggestions(self, text, maxdist, prefix):
seen[sug] = op(seen[sug], score)
else:
seen[sug] = score
- return iteritems(seen)
+ return seen.items()
# Query correction
diff --git a/src/whoosh/support/charset.py b/src/whoosh/support/charset.py
index 4acee88f..7334d853 100644
--- a/src/whoosh/support/charset.py
+++ b/src/whoosh/support/charset.py
@@ -6,8 +6,6 @@
import re
from collections import defaultdict
-from whoosh.compat import iteritems, izip, range, u, unichr
-
# This is a straightforward accent-folding charset taken from Carlos Bueno's
# article "Accent Folding for Auto-Complete", for use with CharsetFilter.
#
@@ -19,718 +17,718 @@
# http://github.com/aristus/accent-folding/blob/master/accent_fold.py
accent_map = {
- u("H"): u("h"), # H -> h
- u("I"): u("i"), # I -> i
- u("J"): u("j"), # J -> j
- u("N"): u("n"), # N -> n
- u("P"): u("p"), # P -> p
- u("S"): u("s"), # S -> s
- u("T"): u("t"), # T -> t
- u("W"): u("w"), # W -> w
- u("Y"): u("y"), # Y -> y
- u("i"): u("i"), # i -> i
- u("n"): u("n"), # n -> n
- u("p"): u("p"), # p -> p
- u("s"): u("s"), # s -> s
- u("\xc0"): u("a"), # À -> a
- u("\xc1"): u("a"), # Á -> a
- u("\xc2"): u("a"), # Â -> a
- u("\xc3"): u("a"), # Ã -> a
- u("\xc4"): u("a"), # Ä -> a
- u("\xc5"): u("a"), # Å -> a
- u("\xc7"): u("c"), # Ç -> c
- u("\xc8"): u("e"), # È -> e
- u("\xc9"): u("e"), # É -> e
- u("\xca"): u("e"), # Ê -> e
- u("\xcb"): u("e"), # Ë -> e
- u("\xcc"): u("i"), # Ì -> i
- u("\xcd"): u("i"), # Í -> i
- u("\xce"): u("i"), # Î -> i
- u("\xcf"): u("i"), # Ï -> i
- u("\xd1"): u("n"), # Ñ -> n
- u("\xd2"): u("o"), # Ò -> o
- u("\xd3"): u("o"), # Ó -> o
- u("\xd4"): u("o"), # Ô -> o
- u("\xd5"): u("o"), # Õ -> o
- u("\xd6"): u("o"), # Ö -> o
- u("\xd8"): u("o"), # Ø -> o
- u("\xd9"): u("u"), # Ù -> u
- u("\xda"): u("u"), # Ú -> u
- u("\xdb"): u("u"), # Û -> u
- u("\xdc"): u("u"), # Ü -> u
- u("\xdd"): u("y"), # Ý -> y
- u("\xde"): u("t"), # Þ -> t
- u("\xdf"): u("s"), # ß -> s
- u("\xe0"): u("a"), # à -> a
- u("\xe1"): u("a"), # á -> a
- u("\xe2"): u("a"), # â -> a
- u("\xe3"): u("a"), # ã -> a
- u("\xe4"): u("a"), # ä -> a
- u("\xe5"): u("a"), # å -> a
- u("\xe7"): u("c"), # ç -> c
- u("\xe8"): u("e"), # è -> e
- u("\xe9"): u("e"), # é -> e
- u("\xea"): u("e"), # ê -> e
- u("\xeb"): u("e"), # ë -> e
- u("\xec"): u("i"), # ì -> i
- u("\xed"): u("i"), # í -> i
- u("\xee"): u("i"), # î -> i
- u("\xef"): u("i"), # ï -> i
- u("\xf0"): u("d"), # ð -> d
- u("\xf1"): u("n"), # ñ -> n
- u("\xf2"): u("o"), # ò -> o
- u("\xf3"): u("o"), # ó -> o
- u("\xf4"): u("o"), # ô -> o
- u("\xf5"): u("o"), # õ -> o
- u("\xf6"): u("o"), # ö -> o
- u("\xf8"): u("o"), # ø -> o
- u("\xf9"): u("u"), # ù -> u
- u("\xfa"): u("u"), # ú -> u
- u("\xfb"): u("u"), # û -> u
- u("\xfc"): u("u"), # ü -> u
- u("\xfd"): u("y"), # ý -> y
- u("\xfe"): u("t"), # þ -> t
- u("\xff"): u("y"), # ÿ -> y
- u("\u0100"): u("a"), # Ā -> a
- u("\u0101"): u("a"), # ā -> a
- u("\u0102"): u("a"), # Ă -> a
- u("\u0103"): u("a"), # ă -> a
- u("\u0104"): u("a"), # Ą -> a
- u("\u0105"): u("a"), # ą -> a
- u("\u0106"): u("c"), # Ć -> c
- u("\u0107"): u("c"), # ć -> c
- u("\u0108"): u("c"), # Ĉ -> c
- u("\u0109"): u("c"), # ĉ -> c
- u("\u010a"): u("c"), # Ċ -> c
- u("\u010b"): u("c"), # ċ -> c
- u("\u010c"): u("c"), # Č -> c
- u("\u010d"): u("c"), # č -> c
- u("\u010e"): u("d"), # Ď -> d
- u("\u010f"): u("d"), # ď -> d
- u("\u0110"): u("d"), # Đ -> d
- u("\u0111"): u("d"), # đ -> d
- u("\u0112"): u("e"), # Ē -> e
- u("\u0113"): u("e"), # ē -> e
- u("\u0114"): u("e"), # Ĕ -> e
- u("\u0115"): u("e"), # ĕ -> e
- u("\u0116"): u("e"), # Ė -> e
- u("\u0117"): u("e"), # ė -> e
- u("\u0118"): u("e"), # Ę -> e
- u("\u0119"): u("e"), # ę -> e
- u("\u011a"): u("e"), # Ě -> e
- u("\u011b"): u("e"), # ě -> e
- u("\u011c"): u("g"), # Ĝ -> g
- u("\u011d"): u("g"), # ĝ -> g
- u("\u011e"): u("g"), # Ğ -> g
- u("\u011f"): u("g"), # ğ -> g
- u("\u0120"): u("g"), # Ġ -> g
- u("\u0121"): u("g"), # ġ -> g
- u("\u0122"): u("g"), # Ģ -> g
- u("\u0123"): u("g"), # ģ -> g
- u("\u0124"): u("h"), # Ĥ -> h
- u("\u0125"): u("h"), # ĥ -> h
- u("\u0126"): u("h"), # Ħ -> h
- u("\u0127"): u("h"), # ħ -> h
- u("\u0128"): u("i"), # Ĩ -> i
- u("\u0129"): u("i"), # ĩ -> i
- u("\u012a"): u("i"), # Ī -> i
- u("\u012b"): u("i"), # ī -> i
- u("\u012c"): u("i"), # Ĭ -> i
- u("\u012d"): u("i"), # ĭ -> i
- u("\u012e"): u("i"), # Į -> i
- u("\u012f"): u("i"), # į -> i
- u("\u0130"): u("i"), # İ -> i
- u("\u0131"): u("i"), # ı -> i
- u("\u0134"): u("j"), # Ĵ -> j
- u("\u0135"): u("j"), # ĵ -> j
- u("\u0136"): u("k"), # Ķ -> k
- u("\u0137"): u("k"), # ķ -> k
- u("\u0139"): u("a"), # Ĺ -> a
- u("\u013a"): u("l"), # ĺ -> l
- u("\u013b"): u("l"), # Ļ -> l
- u("\u013c"): u("l"), # ļ -> l
- u("\u013d"): u("l"), # Ľ -> l
- u("\u013e"): u("l"), # ľ -> l
- u("\u013f"): u("l"), # Ŀ -> l
- u("\u0140"): u("l"), # ŀ -> l
- u("\u0141"): u("l"), # Ł -> l
- u("\u0142"): u("l"), # ł -> l
- u("\u0143"): u("n"), # Ń -> n
- u("\u0144"): u("n"), # ń -> n
- u("\u0145"): u("n"), # Ņ -> n
- u("\u0146"): u("n"), # ņ -> n
- u("\u0147"): u("n"), # Ň -> n
- u("\u0148"): u("n"), # ň -> n
- u("\u014c"): u("o"), # Ō -> o
- u("\u014d"): u("o"), # ō -> o
- u("\u014e"): u("o"), # Ŏ -> o
- u("\u014f"): u("o"), # ŏ -> o
- u("\u0150"): u("o"), # Ő -> o
- u("\u0151"): u("o"), # ő -> o
- u("\u0154"): u("r"), # Ŕ -> r
- u("\u0155"): u("r"), # ŕ -> r
- u("\u0156"): u("r"), # Ŗ -> r
- u("\u0157"): u("r"), # ŗ -> r
- u("\u0158"): u("r"), # Ř -> r
- u("\u0159"): u("r"), # ř -> r
- u("\u015a"): u("s"), # Ś -> s
- u("\u015b"): u("s"), # ś -> s
- u("\u015c"): u("s"), # Ŝ -> s
- u("\u015d"): u("s"), # ŝ -> s
- u("\u015e"): u("s"), # Ş -> s
- u("\u015f"): u("s"), # ş -> s
- u("\u0160"): u("s"), # Š -> s
- u("\u0161"): u("s"), # š -> s
- u("\u0162"): u("t"), # Ţ -> t
- u("\u0163"): u("t"), # ţ -> t
- u("\u0164"): u("t"), # Ť -> t
- u("\u0165"): u("t"), # ť -> t
- u("\u0166"): u("t"), # Ŧ -> t
- u("\u0167"): u("t"), # ŧ -> t
- u("\u0168"): u("u"), # Ũ -> u
- u("\u0169"): u("u"), # ũ -> u
- u("\u016a"): u("u"), # Ū -> u
- u("\u016b"): u("u"), # ū -> u
- u("\u016c"): u("u"), # Ŭ -> u
- u("\u016d"): u("u"), # ŭ -> u
- u("\u016e"): u("u"), # Ů -> u
- u("\u016f"): u("u"), # ů -> u
- u("\u0170"): u("u"), # Ű -> u
- u("\u0171"): u("u"), # ű -> u
- u("\u0172"): u("u"), # Ų -> u
- u("\u0173"): u("u"), # ų -> u
- u("\u0174"): u("w"), # Ŵ -> w
- u("\u0175"): u("w"), # ŵ -> w
- u("\u0176"): u("y"), # Ŷ -> y
- u("\u0177"): u("y"), # ŷ -> y
- u("\u0178"): u("y"), # Ÿ -> y
- u("\u0179"): u("z"), # Ź -> z
- u("\u017a"): u("z"), # ź -> z
- u("\u017b"): u("z"), # Ż -> z
- u("\u017c"): u("z"), # ż -> z
- u("\u017d"): u("z"), # Ž -> z
- u("\u017e"): u("z"), # ž -> z
- u("\u0180"): u("b"), # ƀ -> b
- u("\u0181"): u("b"), # Ɓ -> b
- u("\u0182"): u("b"), # Ƃ -> b
- u("\u0183"): u("b"), # ƃ -> b
- u("\u0187"): u("c"), # Ƈ -> c
- u("\u0188"): u("c"), # ƈ -> c
- u("\u0189"): u("d"), # Ɖ -> d
- u("\u018a"): u("d"), # Ɗ -> d
- u("\u018b"): u("d"), # Ƌ -> d
- u("\u018c"): u("d"), # ƌ -> d
- u("\u018e"): u("e"), # Ǝ -> e
- u("\u018f"): u("e"), # Ə -> e
- u("\u0191"): u("f"), # Ƒ -> f
- u("\u0192"): u("f"), # ƒ -> f
- u("\u0193"): u("g"), # Ɠ -> g
- u("\u0197"): u("i"), # Ɨ -> i
- u("\u0198"): u("k"), # Ƙ -> k
- u("\u0199"): u("k"), # ƙ -> k
- u("\u019a"): u("l"), # ƚ -> l
- u("\u019d"): u("n"), # Ɲ -> n
- u("\u019e"): u("n"), # ƞ -> n
- u("\u019f"): u("o"), # Ɵ -> o
- u("\u01a0"): u("o"), # Ơ -> o
- u("\u01a1"): u("o"), # ơ -> o
- u("\u01a4"): u("p"), # Ƥ -> p
- u("\u01a5"): u("p"), # ƥ -> p
- u("\u01ab"): u("t"), # ƫ -> t
- u("\u01ac"): u("t"), # Ƭ -> t
- u("\u01ad"): u("t"), # ƭ -> t
- u("\u01ae"): u("t"), # Ʈ -> t
- u("\u01af"): u("u"), # Ư -> u
- u("\u01b0"): u("u"), # ư -> u
- u("\u01b2"): u("v"), # Ʋ -> v
- u("\u01b3"): u("y"), # Ƴ -> y
- u("\u01b4"): u("y"), # ƴ -> y
- u("\u01b5"): u("z"), # Ƶ -> z
- u("\u01b6"): u("z"), # ƶ -> z
- u("\u01ba"): u("z"), # ƺ -> z
- u("\u01cd"): u("a"), # Ǎ -> a
- u("\u01ce"): u("a"), # ǎ -> a
- u("\u01cf"): u("i"), # Ǐ -> i
- u("\u01d0"): u("i"), # ǐ -> i
- u("\u01d1"): u("o"), # Ǒ -> o
- u("\u01d2"): u("o"), # ǒ -> o
- u("\u01d3"): u("u"), # Ǔ -> u
- u("\u01d4"): u("u"), # ǔ -> u
- u("\u01d5"): u("u"), # Ǖ -> u
- u("\u01d6"): u("u"), # ǖ -> u
- u("\u01d7"): u("u"), # Ǘ -> u
- u("\u01d8"): u("u"), # ǘ -> u
- u("\u01d9"): u("u"), # Ǚ -> u
- u("\u01da"): u("u"), # ǚ -> u
- u("\u01db"): u("u"), # Ǜ -> u
- u("\u01dc"): u("u"), # ǜ -> u
- u("\u01dd"): u("e"), # ǝ -> e
- u("\u01de"): u("a"), # Ǟ -> a
- u("\u01df"): u("a"), # ǟ -> a
- u("\u01e0"): u("a"), # Ǡ -> a
- u("\u01e1"): u("a"), # ǡ -> a
- u("\u01e2"): u("a"), # Ǣ -> a
- u("\u01e3"): u("a"), # ǣ -> a
- u("\u01e4"): u("g"), # Ǥ -> g
- u("\u01e5"): u("g"), # ǥ -> g
- u("\u01e6"): u("g"), # Ǧ -> g
- u("\u01e7"): u("g"), # ǧ -> g
- u("\u01e8"): u("k"), # Ǩ -> k
- u("\u01e9"): u("k"), # ǩ -> k
- u("\u01ea"): u("o"), # Ǫ -> o
- u("\u01eb"): u("o"), # ǫ -> o
- u("\u01ec"): u("o"), # Ǭ -> o
- u("\u01ed"): u("o"), # ǭ -> o
- u("\u01ee"): u("z"), # Ǯ -> z
- u("\u01ef"): u("z"), # ǯ -> z
- u("\u01f0"): u("j"), # ǰ -> j
- u("\u01f4"): u("g"), # Ǵ -> g
- u("\u01f5"): u("g"), # ǵ -> g
- u("\u01f8"): u("n"), # Ǹ -> n
- u("\u01f9"): u("n"), # ǹ -> n
- u("\u01fa"): u("a"), # Ǻ -> a
- u("\u01fb"): u("a"), # ǻ -> a
- u("\u01fc"): u("a"), # Ǽ -> a
- u("\u01fd"): u("a"), # ǽ -> a
- u("\u01fe"): u("o"), # Ǿ -> o
- u("\u01ff"): u("o"), # ǿ -> o
- u("\u0200"): u("a"), # Ȁ -> a
- u("\u0201"): u("a"), # ȁ -> a
- u("\u0202"): u("a"), # Ȃ -> a
- u("\u0203"): u("a"), # ȃ -> a
- u("\u0204"): u("e"), # Ȅ -> e
- u("\u0205"): u("e"), # ȅ -> e
- u("\u0206"): u("e"), # Ȇ -> e
- u("\u0207"): u("e"), # ȇ -> e
- u("\u0208"): u("i"), # Ȉ -> i
- u("\u0209"): u("i"), # ȉ -> i
- u("\u020a"): u("i"), # Ȋ -> i
- u("\u020b"): u("i"), # ȋ -> i
- u("\u020c"): u("o"), # Ȍ -> o
- u("\u020d"): u("o"), # ȍ -> o
- u("\u020e"): u("o"), # Ȏ -> o
- u("\u020f"): u("o"), # ȏ -> o
- u("\u0210"): u("r"), # Ȑ -> r
- u("\u0211"): u("r"), # ȑ -> r
- u("\u0212"): u("r"), # Ȓ -> r
- u("\u0213"): u("r"), # ȓ -> r
- u("\u0214"): u("u"), # Ȕ -> u
- u("\u0215"): u("u"), # ȕ -> u
- u("\u0216"): u("u"), # Ȗ -> u
- u("\u0217"): u("u"), # ȗ -> u
- u("\u0218"): u("s"), # Ș -> s
- u("\u0219"): u("s"), # ș -> s
- u("\u021a"): u("t"), # Ț -> t
- u("\u021b"): u("t"), # ț -> t
- u("\u021e"): u("h"), # Ȟ -> h
- u("\u021f"): u("h"), # ȟ -> h
- u("\u0220"): u("n"), # Ƞ -> n
- u("\u0221"): u("d"), # ȡ -> d
- u("\u0224"): u("z"), # Ȥ -> z
- u("\u0225"): u("z"), # ȥ -> z
- u("\u0226"): u("a"), # Ȧ -> a
- u("\u0227"): u("a"), # ȧ -> a
- u("\u0228"): u("e"), # Ȩ -> e
- u("\u0229"): u("e"), # ȩ -> e
- u("\u022a"): u("o"), # Ȫ -> o
- u("\u022b"): u("o"), # ȫ -> o
- u("\u022c"): u("o"), # Ȭ -> o
- u("\u022d"): u("o"), # ȭ -> o
- u("\u022e"): u("o"), # Ȯ -> o
- u("\u022f"): u("o"), # ȯ -> o
- u("\u0230"): u("o"), # Ȱ -> o
- u("\u0231"): u("o"), # ȱ -> o
- u("\u0232"): u("y"), # Ȳ -> y
- u("\u0233"): u("y"), # ȳ -> y
- u("\u0234"): u("l"), # ȴ -> l
- u("\u0235"): u("n"), # ȵ -> n
- u("\u0236"): u("t"), # ȶ -> t
- u("\u0237"): u("j"), # ȷ -> j
- u("\u023a"): u("a"), # Ⱥ -> a
- u("\u023b"): u("c"), # Ȼ -> c
- u("\u023c"): u("c"), # ȼ -> c
- u("\u023d"): u("l"), # Ƚ -> l
- u("\u023e"): u("t"), # Ⱦ -> t
- u("\u0243"): u("b"), # Ƀ -> b
- u("\u0244"): u("u"), # Ʉ -> u
- u("\u0246"): u("e"), # Ɇ -> e
- u("\u0247"): u("e"), # ɇ -> e
- u("\u0248"): u("j"), # Ɉ -> j
- u("\u0249"): u("j"), # ɉ -> j
- u("\u024a"): u("q"), # Ɋ -> q
- u("\u024b"): u("q"), # ɋ -> q
- u("\u024c"): u("r"), # Ɍ -> r
- u("\u024d"): u("r"), # ɍ -> r
- u("\u024e"): u("y"), # Ɏ -> y
- u("\u024f"): u("y"), # ɏ -> y
- u("\u0253"): u("b"), # ɓ -> b
- u("\u0255"): u("c"), # ɕ -> c
- u("\u0256"): u("d"), # ɖ -> d
- u("\u0257"): u("d"), # ɗ -> d
- u("\u025a"): u("e"), # ɚ -> e
- u("\u025d"): u("e"), # ɝ -> e
- u("\u025f"): u("j"), # ɟ -> j
- u("\u0260"): u("g"), # ɠ -> g
- u("\u0268"): u("i"), # ɨ -> i
- u("\u026b"): u("l"), # ɫ -> l
- u("\u026c"): u("l"), # ɬ -> l
- u("\u026d"): u("l"), # ɭ -> l
- u("\u0271"): u("m"), # ɱ -> m
- u("\u0272"): u("n"), # ɲ -> n
- u("\u0273"): u("n"), # ɳ -> n
- u("\u0275"): u("o"), # ɵ -> o
- u("\u027c"): u("r"), # ɼ -> r
- u("\u027d"): u("r"), # ɽ -> r
- u("\u027e"): u("r"), # ɾ -> r
- u("\u0282"): u("s"), # ʂ -> s
- u("\u0284"): u("j"), # ʄ -> j
- u("\u0288"): u("t"), # ʈ -> t
- u("\u0289"): u("u"), # ʉ -> u
- u("\u028b"): u("v"), # ʋ -> v
- u("\u028f"): u("y"), # ʏ -> y
- u("\u0290"): u("z"), # ʐ -> z
- u("\u0291"): u("z"), # ʑ -> z
- u("\u029d"): u("j"), # ʝ -> j
- u("\u02a0"): u("q"), # ʠ -> q
- u("\u0303"): u("p"), # ̃ -> p
- u("\u0308"): u("t"), # ̈ -> t
- u("\u030a"): u("y"), # ̊ -> y
- u("\u030c"): u("j"), # ̌ -> j
- u("\u0323"): u("l"), # ̣ -> l
- u("\u0329"): u("s"), # ̩ -> s
- u("\u0331"): u("h"), # ̱ -> h
- u("\u1d6c"): u("b"), # ᵬ -> b
- u("\u1d6d"): u("d"), # ᵭ -> d
- u("\u1d6e"): u("f"), # ᵮ -> f
- u("\u1d72"): u("r"), # ᵲ -> r
- u("\u1d73"): u("r"), # ᵳ -> r
- u("\u1d75"): u("t"), # ᵵ -> t
- u("\u1e00"): u("a"), # Ḁ -> a
- u("\u1e01"): u("a"), # ḁ -> a
- u("\u1e02"): u("b"), # Ḃ -> b
- u("\u1e03"): u("b"), # ḃ -> b
- u("\u1e04"): u("b"), # Ḅ -> b
- u("\u1e05"): u("b"), # ḅ -> b
- u("\u1e06"): u("b"), # Ḇ -> b
- u("\u1e07"): u("b"), # ḇ -> b
- u("\u1e08"): u("c"), # Ḉ -> c
- u("\u1e09"): u("c"), # ḉ -> c
- u("\u1e0a"): u("d"), # Ḋ -> d
- u("\u1e0b"): u("d"), # ḋ -> d
- u("\u1e0c"): u("d"), # Ḍ -> d
- u("\u1e0d"): u("d"), # ḍ -> d
- u("\u1e0e"): u("d"), # Ḏ -> d
- u("\u1e0f"): u("d"), # ḏ -> d
- u("\u1e10"): u("d"), # Ḑ -> d
- u("\u1e11"): u("d"), # ḑ -> d
- u("\u1e12"): u("d"), # Ḓ -> d
- u("\u1e13"): u("d"), # ḓ -> d
- u("\u1e14"): u("e"), # Ḕ -> e
- u("\u1e15"): u("e"), # ḕ -> e
- u("\u1e16"): u("e"), # Ḗ -> e
- u("\u1e17"): u("e"), # ḗ -> e
- u("\u1e18"): u("e"), # Ḙ -> e
- u("\u1e19"): u("e"), # ḙ -> e
- u("\u1e1a"): u("e"), # Ḛ -> e
- u("\u1e1b"): u("e"), # ḛ -> e
- u("\u1e1c"): u("e"), # Ḝ -> e
- u("\u1e1d"): u("e"), # ḝ -> e
- u("\u1e1e"): u("f"), # Ḟ -> f
- u("\u1e1f"): u("f"), # ḟ -> f
- u("\u1e20"): u("g"), # Ḡ -> g
- u("\u1e21"): u("g"), # ḡ -> g
- u("\u1e22"): u("h"), # Ḣ -> h
- u("\u1e23"): u("h"), # ḣ -> h
- u("\u1e24"): u("h"), # Ḥ -> h
- u("\u1e25"): u("h"), # ḥ -> h
- u("\u1e26"): u("h"), # Ḧ -> h
- u("\u1e27"): u("h"), # ḧ -> h
- u("\u1e28"): u("h"), # Ḩ -> h
- u("\u1e29"): u("h"), # ḩ -> h
- u("\u1e2a"): u("h"), # Ḫ -> h
- u("\u1e2b"): u("h"), # ḫ -> h
- u("\u1e2c"): u("i"), # Ḭ -> i
- u("\u1e2d"): u("i"), # ḭ -> i
- u("\u1e2e"): u("i"), # Ḯ -> i
- u("\u1e2f"): u("i"), # ḯ -> i
- u("\u1e30"): u("k"), # Ḱ -> k
- u("\u1e31"): u("k"), # ḱ -> k
- u("\u1e32"): u("k"), # Ḳ -> k
- u("\u1e33"): u("k"), # ḳ -> k
- u("\u1e34"): u("k"), # Ḵ -> k
- u("\u1e35"): u("k"), # ḵ -> k
- u("\u1e36"): u("l"), # Ḷ -> l
- u("\u1e37"): u("l"), # ḷ -> l
- u("\u1e38"): u("l"), # Ḹ -> l
- u("\u1e39"): u("l"), # ḹ -> l
- u("\u1e3a"): u("l"), # Ḻ -> l
- u("\u1e3b"): u("l"), # ḻ -> l
- u("\u1e3c"): u("l"), # Ḽ -> l
- u("\u1e3d"): u("l"), # ḽ -> l
- u("\u1e3e"): u("m"), # Ḿ -> m
- u("\u1e3f"): u("m"), # ḿ -> m
- u("\u1e40"): u("m"), # Ṁ -> m
- u("\u1e41"): u("m"), # ṁ -> m
- u("\u1e42"): u("m"), # Ṃ -> m
- u("\u1e43"): u("m"), # ṃ -> m
- u("\u1e44"): u("n"), # Ṅ -> n
- u("\u1e45"): u("n"), # ṅ -> n
- u("\u1e46"): u("n"), # Ṇ -> n
- u("\u1e47"): u("n"), # ṇ -> n
- u("\u1e48"): u("n"), # Ṉ -> n
- u("\u1e49"): u("n"), # ṉ -> n
- u("\u1e4a"): u("n"), # Ṋ -> n
- u("\u1e4b"): u("n"), # ṋ -> n
- u("\u1e4c"): u("o"), # Ṍ -> o
- u("\u1e4d"): u("o"), # ṍ -> o
- u("\u1e4e"): u("o"), # Ṏ -> o
- u("\u1e4f"): u("o"), # ṏ -> o
- u("\u1e50"): u("o"), # Ṑ -> o
- u("\u1e51"): u("o"), # ṑ -> o
- u("\u1e52"): u("o"), # Ṓ -> o
- u("\u1e53"): u("o"), # ṓ -> o
- u("\u1e54"): u("p"), # Ṕ -> p
- u("\u1e55"): u("p"), # ṕ -> p
- u("\u1e56"): u("p"), # Ṗ -> p
- u("\u1e57"): u("p"), # ṗ -> p
- u("\u1e58"): u("r"), # Ṙ -> r
- u("\u1e59"): u("r"), # ṙ -> r
- u("\u1e5a"): u("r"), # Ṛ -> r
- u("\u1e5b"): u("r"), # ṛ -> r
- u("\u1e5c"): u("r"), # Ṝ -> r
- u("\u1e5d"): u("r"), # ṝ -> r
- u("\u1e5e"): u("r"), # Ṟ -> r
- u("\u1e5f"): u("r"), # ṟ -> r
- u("\u1e60"): u("s"), # Ṡ -> s
- u("\u1e61"): u("s"), # ṡ -> s
- u("\u1e62"): u("s"), # Ṣ -> s
- u("\u1e63"): u("s"), # ṣ -> s
- u("\u1e64"): u("s"), # Ṥ -> s
- u("\u1e65"): u("s"), # ṥ -> s
- u("\u1e66"): u("s"), # Ṧ -> s
- u("\u1e67"): u("s"), # ṧ -> s
- u("\u1e68"): u("s"), # Ṩ -> s
- u("\u1e69"): u("s"), # ṩ -> s
- u("\u1e6a"): u("t"), # Ṫ -> t
- u("\u1e6b"): u("t"), # ṫ -> t
- u("\u1e6c"): u("t"), # Ṭ -> t
- u("\u1e6d"): u("t"), # ṭ -> t
- u("\u1e6e"): u("t"), # Ṯ -> t
- u("\u1e6f"): u("t"), # ṯ -> t
- u("\u1e70"): u("t"), # Ṱ -> t
- u("\u1e71"): u("t"), # ṱ -> t
- u("\u1e72"): u("u"), # Ṳ -> u
- u("\u1e73"): u("u"), # ṳ -> u
- u("\u1e74"): u("u"), # Ṵ -> u
- u("\u1e75"): u("u"), # ṵ -> u
- u("\u1e76"): u("u"), # Ṷ -> u
- u("\u1e77"): u("u"), # ṷ -> u
- u("\u1e78"): u("u"), # Ṹ -> u
- u("\u1e79"): u("u"), # ṹ -> u
- u("\u1e7a"): u("u"), # Ṻ -> u
- u("\u1e7b"): u("u"), # ṻ -> u
- u("\u1e7c"): u("v"), # Ṽ -> v
- u("\u1e7d"): u("v"), # ṽ -> v
- u("\u1e7e"): u("v"), # Ṿ -> v
- u("\u1e7f"): u("v"), # ṿ -> v
- u("\u1e80"): u("w"), # Ẁ -> w
- u("\u1e81"): u("w"), # ẁ -> w
- u("\u1e82"): u("w"), # Ẃ -> w
- u("\u1e83"): u("w"), # ẃ -> w
- u("\u1e84"): u("w"), # Ẅ -> w
- u("\u1e85"): u("w"), # ẅ -> w
- u("\u1e86"): u("w"), # Ẇ -> w
- u("\u1e87"): u("w"), # ẇ -> w
- u("\u1e88"): u("w"), # Ẉ -> w
- u("\u1e89"): u("w"), # ẉ -> w
- u("\u1e8a"): u("x"), # Ẋ -> x
- u("\u1e8b"): u("x"), # ẋ -> x
- u("\u1e8c"): u("x"), # Ẍ -> x
- u("\u1e8d"): u("x"), # ẍ -> x
- u("\u1e8e"): u("y"), # Ẏ -> y
- u("\u1e8f"): u("y"), # ẏ -> y
- u("\u1e90"): u("z"), # Ẑ -> z
- u("\u1e91"): u("z"), # ẑ -> z
- u("\u1e92"): u("z"), # Ẓ -> z
- u("\u1e93"): u("z"), # ẓ -> z
- u("\u1e94"): u("z"), # Ẕ -> z
- u("\u1e95"): u("z"), # ẕ -> z
- u("\u1e96"): u("h"), # ẖ -> h
- u("\u1e97"): u("t"), # ẗ -> t
- u("\u1e98"): u("w"), # ẘ -> w
- u("\u1e99"): u("y"), # ẙ -> y
- u("\u1e9a"): u("a"), # ẚ -> a
- u("\u1e9b"): u("s"), # ẛ -> s
- u("\u1ea0"): u("a"), # Ạ -> a
- u("\u1ea1"): u("a"), # ạ -> a
- u("\u1ea2"): u("a"), # Ả -> a
- u("\u1ea3"): u("a"), # ả -> a
- u("\u1ea4"): u("a"), # Ấ -> a
- u("\u1ea5"): u("a"), # ấ -> a
- u("\u1ea6"): u("a"), # Ầ -> a
- u("\u1ea7"): u("a"), # ầ -> a
- u("\u1ea8"): u("a"), # Ẩ -> a
- u("\u1ea9"): u("a"), # ẩ -> a
- u("\u1eaa"): u("a"), # Ẫ -> a
- u("\u1eab"): u("a"), # ẫ -> a
- u("\u1eac"): u("a"), # Ậ -> a
- u("\u1ead"): u("a"), # ậ -> a
- u("\u1eae"): u("a"), # Ắ -> a
- u("\u1eaf"): u("a"), # ắ -> a
- u("\u1eb0"): u("a"), # Ằ -> a
- u("\u1eb1"): u("a"), # ằ -> a
- u("\u1eb2"): u("a"), # Ẳ -> a
- u("\u1eb3"): u("a"), # ẳ -> a
- u("\u1eb4"): u("a"), # Ẵ -> a
- u("\u1eb5"): u("a"), # ẵ -> a
- u("\u1eb6"): u("a"), # Ặ -> a
- u("\u1eb7"): u("a"), # ặ -> a
- u("\u1eb8"): u("e"), # Ẹ -> e
- u("\u1eb9"): u("e"), # ẹ -> e
- u("\u1eba"): u("e"), # Ẻ -> e
- u("\u1ebb"): u("e"), # ẻ -> e
- u("\u1ebc"): u("e"), # Ẽ -> e
- u("\u1ebd"): u("e"), # ẽ -> e
- u("\u1ebe"): u("e"), # Ế -> e
- u("\u1ebf"): u("e"), # ế -> e
- u("\u1ec0"): u("e"), # Ề -> e
- u("\u1ec1"): u("e"), # ề -> e
- u("\u1ec2"): u("e"), # Ể -> e
- u("\u1ec3"): u("e"), # ể -> e
- u("\u1ec4"): u("e"), # Ễ -> e
- u("\u1ec5"): u("e"), # ễ -> e
- u("\u1ec6"): u("e"), # Ệ -> e
- u("\u1ec7"): u("e"), # ệ -> e
- u("\u1ec8"): u("i"), # Ỉ -> i
- u("\u1ec9"): u("i"), # ỉ -> i
- u("\u1eca"): u("i"), # Ị -> i
- u("\u1ecb"): u("i"), # ị -> i
- u("\u1ecc"): u("o"), # Ọ -> o
- u("\u1ecd"): u("o"), # ọ -> o
- u("\u1ece"): u("o"), # Ỏ -> o
- u("\u1ecf"): u("o"), # ỏ -> o
- u("\u1ed0"): u("o"), # Ố -> o
- u("\u1ed1"): u("o"), # ố -> o
- u("\u1ed2"): u("o"), # Ồ -> o
- u("\u1ed3"): u("o"), # ồ -> o
- u("\u1ed4"): u("o"), # Ổ -> o
- u("\u1ed5"): u("o"), # ổ -> o
- u("\u1ed6"): u("o"), # Ỗ -> o
- u("\u1ed7"): u("o"), # ỗ -> o
- u("\u1ed8"): u("o"), # Ộ -> o
- u("\u1ed9"): u("o"), # ộ -> o
- u("\u1eda"): u("o"), # Ớ -> o
- u("\u1edb"): u("o"), # ớ -> o
- u("\u1edc"): u("o"), # Ờ -> o
- u("\u1edd"): u("o"), # ờ -> o
- u("\u1ede"): u("o"), # Ở -> o
- u("\u1edf"): u("o"), # ở -> o
- u("\u1ee0"): u("o"), # Ỡ -> o
- u("\u1ee1"): u("o"), # ỡ -> o
- u("\u1ee2"): u("o"), # Ợ -> o
- u("\u1ee3"): u("o"), # ợ -> o
- u("\u1ee4"): u("u"), # Ụ -> u
- u("\u1ee5"): u("u"), # ụ -> u
- u("\u1ee6"): u("u"), # Ủ -> u
- u("\u1ee7"): u("u"), # ủ -> u
- u("\u1ee8"): u("u"), # Ứ -> u
- u("\u1ee9"): u("u"), # ứ -> u
- u("\u1eea"): u("u"), # Ừ -> u
- u("\u1eeb"): u("u"), # ừ -> u
- u("\u1eec"): u("u"), # Ử -> u
- u("\u1eed"): u("u"), # ử -> u
- u("\u1eee"): u("u"), # Ữ -> u
- u("\u1eef"): u("u"), # ữ -> u
- u("\u1ef0"): u("u"), # Ự -> u
- u("\u1ef1"): u("u"), # ự -> u
- u("\u1ef2"): u("y"), # Ỳ -> y
- u("\u1ef3"): u("y"), # ỳ -> y
- u("\u1ef4"): u("y"), # Ỵ -> y
- u("\u1ef5"): u("y"), # ỵ -> y
- u("\u1ef6"): u("y"), # Ỷ -> y
- u("\u1ef7"): u("y"), # ỷ -> y
- u("\u1ef8"): u("y"), # Ỹ -> y
- u("\u1ef9"): u("y"), # ỹ -> y
- u("\u2c60"): u("l"), # Ⱡ -> l
- u("\u2c61"): u("l"), # ⱡ -> l
- u("\u2c62"): u("l"), # Ɫ -> l
- u("\u2c63"): u("p"), # Ᵽ -> p
- u("\u2c64"): u("r"), # Ɽ -> r
- u("\u2c65"): u("a"), # ⱥ -> a
- u("\u2c66"): u("t"), # ⱦ -> t
- u("\u2c67"): u("h"), # Ⱨ -> h
- u("\u2c68"): u("h"), # ⱨ -> h
- u("\u2c69"): u("k"), # Ⱪ -> k
- u("\u2c6a"): u("k"), # ⱪ -> k
- u("\u2c6b"): u("z"), # Ⱬ -> z
- u("\u2c6c"): u("z"), # ⱬ -> z
- u("\uff10"): u("0"), # 0 -> 0
- u("\uff11"): u("1"), # 1 -> 1
- u("\uff12"): u("2"), # 2 -> 2
- u("\uff13"): u("3"), # 3 -> 3
- u("\uff14"): u("4"), # 4 -> 4
- u("\uff15"): u("5"), # 5 -> 5
- u("\uff16"): u("6"), # 6 -> 6
- u("\uff17"): u("7"), # 7 -> 7
- u("\uff18"): u("8"), # 8 -> 8
- u("\uff19"): u("9"), # 9 -> 9
- u("\uff21"): u("A"), # A -> A
- u("\uff22"): u("B"), # B -> B
- u("\uff23"): u("C"), # C -> C
- u("\uff24"): u("D"), # D -> D
- u("\uff25"): u("E"), # E -> E
- u("\uff26"): u("F"), # F -> F
- u("\uff27"): u("G"), # G -> G
- u("\uff28"): u("H"), # H -> H
- u("\uff29"): u("I"), # I -> I
- u("\uff2a"): u("J"), # J -> J
- u("\uff2b"): u("K"), # K -> K
- u("\uff2c"): u("L"), # L -> L
- u("\uff2d"): u("M"), # M -> M
- u("\uff2e"): u("N"), # N -> N
- u("\uff2f"): u("O"), # O -> O
- u("\uff30"): u("P"), # P -> P
- u("\uff31"): u("Q"), # Q -> Q
- u("\uff32"): u("R"), # R -> R
- u("\uff33"): u("S"), # S -> S
- u("\uff34"): u("T"), # T -> T
- u("\uff35"): u("U"), # U -> U
- u("\uff36"): u("V"), # V -> V
- u("\uff37"): u("W"), # W -> W
- u("\uff38"): u("X"), # X -> X
- u("\uff39"): u("Y"), # Y -> Y
- u("\uff3a"): u("Z"), # Z -> Z
- u("\uff41"): u("a"), # a -> a
- u("\uff42"): u("b"), # b -> b
- u("\uff43"): u("c"), # c -> c
- u("\uff44"): u("d"), # d -> d
- u("\uff45"): u("e"), # e -> e
- u("\uff46"): u("f"), # f -> f
- u("\uff47"): u("g"), # g -> g
- u("\uff48"): u("h"), # h -> h
- u("\uff49"): u("i"), # i -> i
- u("\uff4a"): u("j"), # j -> j
- u("\uff4b"): u("k"), # k -> k
- u("\uff4c"): u("l"), # l -> l
- u("\uff4d"): u("m"), # m -> m
- u("\uff4e"): u("n"), # n -> n
- u("\uff4f"): u("o"), # o -> o
- u("\uff50"): u("p"), # p -> p
- u("\uff51"): u("q"), # q -> q
- u("\uff52"): u("r"), # r -> r
- u("\uff53"): u("s"), # s -> s
- u("\uff54"): u("t"), # t -> t
- u("\uff55"): u("u"), # u -> u
- u("\uff56"): u("v"), # v -> v
- u("\uff57"): u("w"), # w -> w
- u("\uff58"): u("x"), # x -> x
- u("\uff59"): u("y"), # y -> y
- u("\uff5a"): u("z"), # z -> z
+ "H": "h", # H -> h
+ "I": "i", # I -> i
+ "J": "j", # J -> j
+ "N": "n", # N -> n
+ "P": "p", # P -> p
+ "S": "s", # S -> s
+ "T": "t", # T -> t
+ "W": "w", # W -> w
+ "Y": "y", # Y -> y
+ "i": "i", # i -> i
+ "n": "n", # n -> n
+ "p": "p", # p -> p
+ "s": "s", # s -> s
+ "\xc0": "a", # À -> a
+ "\xc1": "a", # Á -> a
+ "\xc2": "a", # Â -> a
+ "\xc3": "a", # Ã -> a
+ "\xc4": "a", # Ä -> a
+ "\xc5": "a", # Å -> a
+ "\xc7": "c", # Ç -> c
+ "\xc8": "e", # È -> e
+ "\xc9": "e", # É -> e
+ "\xca": "e", # Ê -> e
+ "\xcb": "e", # Ë -> e
+ "\xcc": "i", # Ì -> i
+ "\xcd": "i", # Í -> i
+ "\xce": "i", # Î -> i
+ "\xcf": "i", # Ï -> i
+ "\xd1": "n", # Ñ -> n
+ "\xd2": "o", # Ò -> o
+ "\xd3": "o", # Ó -> o
+ "\xd4": "o", # Ô -> o
+ "\xd5": "o", # Õ -> o
+ "\xd6": "o", # Ö -> o
+ "\xd8": "o", # Ø -> o
+ "\xd9": "u", # Ù -> u
+ "\xda": "u", # Ú -> u
+ "\xdb": "u", # Û -> u
+ "\xdc": "u", # Ü -> u
+ "\xdd": "y", # Ý -> y
+ "\xde": "t", # Þ -> t
+ "\xdf": "s", # ß -> s
+ "\xe0": "a", # à -> a
+ "\xe1": "a", # á -> a
+ "\xe2": "a", # â -> a
+ "\xe3": "a", # ã -> a
+ "\xe4": "a", # ä -> a
+ "\xe5": "a", # å -> a
+ "\xe7": "c", # ç -> c
+ "\xe8": "e", # è -> e
+ "\xe9": "e", # é -> e
+ "\xea": "e", # ê -> e
+ "\xeb": "e", # ë -> e
+ "\xec": "i", # ì -> i
+ "\xed": "i", # í -> i
+ "\xee": "i", # î -> i
+ "\xef": "i", # ï -> i
+ "\xf0": "d", # ð -> d
+ "\xf1": "n", # ñ -> n
+ "\xf2": "o", # ò -> o
+ "\xf3": "o", # ó -> o
+ "\xf4": "o", # ô -> o
+ "\xf5": "o", # õ -> o
+ "\xf6": "o", # ö -> o
+ "\xf8": "o", # ø -> o
+ "\xf9": "u", # ù -> u
+ "\xfa": "u", # ú -> u
+ "\xfb": "u", # û -> u
+ "\xfc": "u", # ü -> u
+ "\xfd": "y", # ý -> y
+ "\xfe": "t", # þ -> t
+ "\xff": "y", # ÿ -> y
+ "\u0100": "a", # Ā -> a
+ "\u0101": "a", # ā -> a
+ "\u0102": "a", # Ă -> a
+ "\u0103": "a", # ă -> a
+ "\u0104": "a", # Ą -> a
+ "\u0105": "a", # ą -> a
+ "\u0106": "c", # Ć -> c
+ "\u0107": "c", # ć -> c
+ "\u0108": "c", # Ĉ -> c
+ "\u0109": "c", # ĉ -> c
+ "\u010a": "c", # Ċ -> c
+ "\u010b": "c", # ċ -> c
+ "\u010c": "c", # Č -> c
+ "\u010d": "c", # č -> c
+ "\u010e": "d", # Ď -> d
+ "\u010f": "d", # ď -> d
+ "\u0110": "d", # Đ -> d
+ "\u0111": "d", # đ -> d
+ "\u0112": "e", # Ē -> e
+ "\u0113": "e", # ē -> e
+ "\u0114": "e", # Ĕ -> e
+ "\u0115": "e", # ĕ -> e
+ "\u0116": "e", # Ė -> e
+ "\u0117": "e", # ė -> e
+ "\u0118": "e", # Ę -> e
+ "\u0119": "e", # ę -> e
+ "\u011a": "e", # Ě -> e
+ "\u011b": "e", # ě -> e
+ "\u011c": "g", # Ĝ -> g
+ "\u011d": "g", # ĝ -> g
+ "\u011e": "g", # Ğ -> g
+ "\u011f": "g", # ğ -> g
+ "\u0120": "g", # Ġ -> g
+ "\u0121": "g", # ġ -> g
+ "\u0122": "g", # Ģ -> g
+ "\u0123": "g", # ģ -> g
+ "\u0124": "h", # Ĥ -> h
+ "\u0125": "h", # ĥ -> h
+ "\u0126": "h", # Ħ -> h
+ "\u0127": "h", # ħ -> h
+ "\u0128": "i", # Ĩ -> i
+ "\u0129": "i", # ĩ -> i
+ "\u012a": "i", # Ī -> i
+ "\u012b": "i", # ī -> i
+ "\u012c": "i", # Ĭ -> i
+ "\u012d": "i", # ĭ -> i
+ "\u012e": "i", # Į -> i
+ "\u012f": "i", # į -> i
+ "\u0130": "i", # İ -> i
+ "\u0131": "i", # ı -> i
+ "\u0134": "j", # Ĵ -> j
+ "\u0135": "j", # ĵ -> j
+ "\u0136": "k", # Ķ -> k
+ "\u0137": "k", # ķ -> k
+ "\u0139": "a", # Ĺ -> a
+ "\u013a": "l", # ĺ -> l
+ "\u013b": "l", # Ļ -> l
+ "\u013c": "l", # ļ -> l
+ "\u013d": "l", # Ľ -> l
+ "\u013e": "l", # ľ -> l
+ "\u013f": "l", # Ŀ -> l
+ "\u0140": "l", # ŀ -> l
+ "\u0141": "l", # Ł -> l
+ "\u0142": "l", # ł -> l
+ "\u0143": "n", # Ń -> n
+ "\u0144": "n", # ń -> n
+ "\u0145": "n", # Ņ -> n
+ "\u0146": "n", # ņ -> n
+ "\u0147": "n", # Ň -> n
+ "\u0148": "n", # ň -> n
+ "\u014c": "o", # Ō -> o
+ "\u014d": "o", # ō -> o
+ "\u014e": "o", # Ŏ -> o
+ "\u014f": "o", # ŏ -> o
+ "\u0150": "o", # Ő -> o
+ "\u0151": "o", # ő -> o
+ "\u0154": "r", # Ŕ -> r
+ "\u0155": "r", # ŕ -> r
+ "\u0156": "r", # Ŗ -> r
+ "\u0157": "r", # ŗ -> r
+ "\u0158": "r", # Ř -> r
+ "\u0159": "r", # ř -> r
+ "\u015a": "s", # Ś -> s
+ "\u015b": "s", # ś -> s
+ "\u015c": "s", # Ŝ -> s
+ "\u015d": "s", # ŝ -> s
+ "\u015e": "s", # Ş -> s
+ "\u015f": "s", # ş -> s
+ "\u0160": "s", # Š -> s
+ "\u0161": "s", # š -> s
+ "\u0162": "t", # Ţ -> t
+ "\u0163": "t", # ţ -> t
+ "\u0164": "t", # Ť -> t
+ "\u0165": "t", # ť -> t
+ "\u0166": "t", # Ŧ -> t
+ "\u0167": "t", # ŧ -> t
+ "\u0168": "u", # Ũ -> u
+ "\u0169": "u", # ũ -> u
+ "\u016a": "u", # Ū -> u
+ "\u016b": "u", # ū -> u
+ "\u016c": "u", # Ŭ -> u
+ "\u016d": "u", # ŭ -> u
+ "\u016e": "u", # Ů -> u
+ "\u016f": "u", # ů -> u
+ "\u0170": "u", # Ű -> u
+ "\u0171": "u", # ű -> u
+ "\u0172": "u", # Ų -> u
+ "\u0173": "u", # ų -> u
+ "\u0174": "w", # Ŵ -> w
+ "\u0175": "w", # ŵ -> w
+ "\u0176": "y", # Ŷ -> y
+ "\u0177": "y", # ŷ -> y
+ "\u0178": "y", # Ÿ -> y
+ "\u0179": "z", # Ź -> z
+ "\u017a": "z", # ź -> z
+ "\u017b": "z", # Ż -> z
+ "\u017c": "z", # ż -> z
+ "\u017d": "z", # Ž -> z
+ "\u017e": "z", # ž -> z
+ "\u0180": "b", # ƀ -> b
+ "\u0181": "b", # Ɓ -> b
+ "\u0182": "b", # Ƃ -> b
+ "\u0183": "b", # ƃ -> b
+ "\u0187": "c", # Ƈ -> c
+ "\u0188": "c", # ƈ -> c
+ "\u0189": "d", # Ɖ -> d
+ "\u018a": "d", # Ɗ -> d
+ "\u018b": "d", # Ƌ -> d
+ "\u018c": "d", # ƌ -> d
+ "\u018e": "e", # Ǝ -> e
+ "\u018f": "e", # Ə -> e
+ "\u0191": "f", # Ƒ -> f
+ "\u0192": "f", # ƒ -> f
+ "\u0193": "g", # Ɠ -> g
+ "\u0197": "i", # Ɨ -> i
+ "\u0198": "k", # Ƙ -> k
+ "\u0199": "k", # ƙ -> k
+ "\u019a": "l", # ƚ -> l
+ "\u019d": "n", # Ɲ -> n
+ "\u019e": "n", # ƞ -> n
+ "\u019f": "o", # Ɵ -> o
+ "\u01a0": "o", # Ơ -> o
+ "\u01a1": "o", # ơ -> o
+ "\u01a4": "p", # Ƥ -> p
+ "\u01a5": "p", # ƥ -> p
+ "\u01ab": "t", # ƫ -> t
+ "\u01ac": "t", # Ƭ -> t
+ "\u01ad": "t", # ƭ -> t
+ "\u01ae": "t", # Ʈ -> t
+ "\u01af": "u", # Ư -> u
+ "\u01b0": "u", # ư -> u
+ "\u01b2": "v", # Ʋ -> v
+ "\u01b3": "y", # Ƴ -> y
+ "\u01b4": "y", # ƴ -> y
+ "\u01b5": "z", # Ƶ -> z
+ "\u01b6": "z", # ƶ -> z
+ "\u01ba": "z", # ƺ -> z
+ "\u01cd": "a", # Ǎ -> a
+ "\u01ce": "a", # ǎ -> a
+ "\u01cf": "i", # Ǐ -> i
+ "\u01d0": "i", # ǐ -> i
+ "\u01d1": "o", # Ǒ -> o
+ "\u01d2": "o", # ǒ -> o
+ "\u01d3": "u", # Ǔ -> u
+ "\u01d4": "u", # ǔ -> u
+ "\u01d5": "u", # Ǖ -> u
+ "\u01d6": "u", # ǖ -> u
+ "\u01d7": "u", # Ǘ -> u
+ "\u01d8": "u", # ǘ -> u
+ "\u01d9": "u", # Ǚ -> u
+ "\u01da": "u", # ǚ -> u
+ "\u01db": "u", # Ǜ -> u
+ "\u01dc": "u", # ǜ -> u
+ "\u01dd": "e", # ǝ -> e
+ "\u01de": "a", # Ǟ -> a
+ "\u01df": "a", # ǟ -> a
+ "\u01e0": "a", # Ǡ -> a
+ "\u01e1": "a", # ǡ -> a
+ "\u01e2": "a", # Ǣ -> a
+ "\u01e3": "a", # ǣ -> a
+ "\u01e4": "g", # Ǥ -> g
+ "\u01e5": "g", # ǥ -> g
+ "\u01e6": "g", # Ǧ -> g
+ "\u01e7": "g", # ǧ -> g
+ "\u01e8": "k", # Ǩ -> k
+ "\u01e9": "k", # ǩ -> k
+ "\u01ea": "o", # Ǫ -> o
+ "\u01eb": "o", # ǫ -> o
+ "\u01ec": "o", # Ǭ -> o
+ "\u01ed": "o", # ǭ -> o
+ "\u01ee": "z", # Ǯ -> z
+ "\u01ef": "z", # ǯ -> z
+ "\u01f0": "j", # ǰ -> j
+ "\u01f4": "g", # Ǵ -> g
+ "\u01f5": "g", # ǵ -> g
+ "\u01f8": "n", # Ǹ -> n
+ "\u01f9": "n", # ǹ -> n
+ "\u01fa": "a", # Ǻ -> a
+ "\u01fb": "a", # ǻ -> a
+ "\u01fc": "a", # Ǽ -> a
+ "\u01fd": "a", # ǽ -> a
+ "\u01fe": "o", # Ǿ -> o
+ "\u01ff": "o", # ǿ -> o
+ "\u0200": "a", # Ȁ -> a
+ "\u0201": "a", # ȁ -> a
+ "\u0202": "a", # Ȃ -> a
+ "\u0203": "a", # ȃ -> a
+ "\u0204": "e", # Ȅ -> e
+ "\u0205": "e", # ȅ -> e
+ "\u0206": "e", # Ȇ -> e
+ "\u0207": "e", # ȇ -> e
+ "\u0208": "i", # Ȉ -> i
+ "\u0209": "i", # ȉ -> i
+ "\u020a": "i", # Ȋ -> i
+ "\u020b": "i", # ȋ -> i
+ "\u020c": "o", # Ȍ -> o
+ "\u020d": "o", # ȍ -> o
+ "\u020e": "o", # Ȏ -> o
+ "\u020f": "o", # ȏ -> o
+ "\u0210": "r", # Ȑ -> r
+ "\u0211": "r", # ȑ -> r
+ "\u0212": "r", # Ȓ -> r
+ "\u0213": "r", # ȓ -> r
+ "\u0214": "u", # Ȕ -> u
+ "\u0215": "u", # ȕ -> u
+ "\u0216": "u", # Ȗ -> u
+ "\u0217": "u", # ȗ -> u
+ "\u0218": "s", # Ș -> s
+ "\u0219": "s", # ș -> s
+ "\u021a": "t", # Ț -> t
+ "\u021b": "t", # ț -> t
+ "\u021e": "h", # Ȟ -> h
+ "\u021f": "h", # ȟ -> h
+ "\u0220": "n", # Ƞ -> n
+ "\u0221": "d", # ȡ -> d
+ "\u0224": "z", # Ȥ -> z
+ "\u0225": "z", # ȥ -> z
+ "\u0226": "a", # Ȧ -> a
+ "\u0227": "a", # ȧ -> a
+ "\u0228": "e", # Ȩ -> e
+ "\u0229": "e", # ȩ -> e
+ "\u022a": "o", # Ȫ -> o
+ "\u022b": "o", # ȫ -> o
+ "\u022c": "o", # Ȭ -> o
+ "\u022d": "o", # ȭ -> o
+ "\u022e": "o", # Ȯ -> o
+ "\u022f": "o", # ȯ -> o
+ "\u0230": "o", # Ȱ -> o
+ "\u0231": "o", # ȱ -> o
+ "\u0232": "y", # Ȳ -> y
+ "\u0233": "y", # ȳ -> y
+ "\u0234": "l", # ȴ -> l
+ "\u0235": "n", # ȵ -> n
+ "\u0236": "t", # ȶ -> t
+ "\u0237": "j", # ȷ -> j
+ "\u023a": "a", # Ⱥ -> a
+ "\u023b": "c", # Ȼ -> c
+ "\u023c": "c", # ȼ -> c
+ "\u023d": "l", # Ƚ -> l
+ "\u023e": "t", # Ⱦ -> t
+ "\u0243": "b", # Ƀ -> b
+ "\u0244": "u", # Ʉ -> u
+ "\u0246": "e", # Ɇ -> e
+ "\u0247": "e", # ɇ -> e
+ "\u0248": "j", # Ɉ -> j
+ "\u0249": "j", # ɉ -> j
+ "\u024a": "q", # Ɋ -> q
+ "\u024b": "q", # ɋ -> q
+ "\u024c": "r", # Ɍ -> r
+ "\u024d": "r", # ɍ -> r
+ "\u024e": "y", # Ɏ -> y
+ "\u024f": "y", # ɏ -> y
+ "\u0253": "b", # ɓ -> b
+ "\u0255": "c", # ɕ -> c
+ "\u0256": "d", # ɖ -> d
+ "\u0257": "d", # ɗ -> d
+ "\u025a": "e", # ɚ -> e
+ "\u025d": "e", # ɝ -> e
+ "\u025f": "j", # ɟ -> j
+ "\u0260": "g", # ɠ -> g
+ "\u0268": "i", # ɨ -> i
+ "\u026b": "l", # ɫ -> l
+ "\u026c": "l", # ɬ -> l
+ "\u026d": "l", # ɭ -> l
+ "\u0271": "m", # ɱ -> m
+ "\u0272": "n", # ɲ -> n
+ "\u0273": "n", # ɳ -> n
+ "\u0275": "o", # ɵ -> o
+ "\u027c": "r", # ɼ -> r
+ "\u027d": "r", # ɽ -> r
+ "\u027e": "r", # ɾ -> r
+ "\u0282": "s", # ʂ -> s
+ "\u0284": "j", # ʄ -> j
+ "\u0288": "t", # ʈ -> t
+ "\u0289": "u", # ʉ -> u
+ "\u028b": "v", # ʋ -> v
+ "\u028f": "y", # ʏ -> y
+ "\u0290": "z", # ʐ -> z
+ "\u0291": "z", # ʑ -> z
+ "\u029d": "j", # ʝ -> j
+ "\u02a0": "q", # ʠ -> q
+ "\u0303": "p", # ̃ -> p
+ "\u0308": "t", # ̈ -> t
+ "\u030a": "y", # ̊ -> y
+ "\u030c": "j", # ̌ -> j
+ "\u0323": "l", # ̣ -> l
+ "\u0329": "s", # ̩ -> s
+ "\u0331": "h", # ̱ -> h
+ "\u1d6c": "b", # ᵬ -> b
+ "\u1d6d": "d", # ᵭ -> d
+ "\u1d6e": "f", # ᵮ -> f
+ "\u1d72": "r", # ᵲ -> r
+ "\u1d73": "r", # ᵳ -> r
+ "\u1d75": "t", # ᵵ -> t
+ "\u1e00": "a", # Ḁ -> a
+ "\u1e01": "a", # ḁ -> a
+ "\u1e02": "b", # Ḃ -> b
+ "\u1e03": "b", # ḃ -> b
+ "\u1e04": "b", # Ḅ -> b
+ "\u1e05": "b", # ḅ -> b
+ "\u1e06": "b", # Ḇ -> b
+ "\u1e07": "b", # ḇ -> b
+ "\u1e08": "c", # Ḉ -> c
+ "\u1e09": "c", # ḉ -> c
+ "\u1e0a": "d", # Ḋ -> d
+ "\u1e0b": "d", # ḋ -> d
+ "\u1e0c": "d", # Ḍ -> d
+ "\u1e0d": "d", # ḍ -> d
+ "\u1e0e": "d", # Ḏ -> d
+ "\u1e0f": "d", # ḏ -> d
+ "\u1e10": "d", # Ḑ -> d
+ "\u1e11": "d", # ḑ -> d
+ "\u1e12": "d", # Ḓ -> d
+ "\u1e13": "d", # ḓ -> d
+ "\u1e14": "e", # Ḕ -> e
+ "\u1e15": "e", # ḕ -> e
+ "\u1e16": "e", # Ḗ -> e
+ "\u1e17": "e", # ḗ -> e
+ "\u1e18": "e", # Ḙ -> e
+ "\u1e19": "e", # ḙ -> e
+ "\u1e1a": "e", # Ḛ -> e
+ "\u1e1b": "e", # ḛ -> e
+ "\u1e1c": "e", # Ḝ -> e
+ "\u1e1d": "e", # ḝ -> e
+ "\u1e1e": "f", # Ḟ -> f
+ "\u1e1f": "f", # ḟ -> f
+ "\u1e20": "g", # Ḡ -> g
+ "\u1e21": "g", # ḡ -> g
+ "\u1e22": "h", # Ḣ -> h
+ "\u1e23": "h", # ḣ -> h
+ "\u1e24": "h", # Ḥ -> h
+ "\u1e25": "h", # ḥ -> h
+ "\u1e26": "h", # Ḧ -> h
+ "\u1e27": "h", # ḧ -> h
+ "\u1e28": "h", # Ḩ -> h
+ "\u1e29": "h", # ḩ -> h
+ "\u1e2a": "h", # Ḫ -> h
+ "\u1e2b": "h", # ḫ -> h
+ "\u1e2c": "i", # Ḭ -> i
+ "\u1e2d": "i", # ḭ -> i
+ "\u1e2e": "i", # Ḯ -> i
+ "\u1e2f": "i", # ḯ -> i
+ "\u1e30": "k", # Ḱ -> k
+ "\u1e31": "k", # ḱ -> k
+ "\u1e32": "k", # Ḳ -> k
+ "\u1e33": "k", # ḳ -> k
+ "\u1e34": "k", # Ḵ -> k
+ "\u1e35": "k", # ḵ -> k
+ "\u1e36": "l", # Ḷ -> l
+ "\u1e37": "l", # ḷ -> l
+ "\u1e38": "l", # Ḹ -> l
+ "\u1e39": "l", # ḹ -> l
+ "\u1e3a": "l", # Ḻ -> l
+ "\u1e3b": "l", # ḻ -> l
+ "\u1e3c": "l", # Ḽ -> l
+ "\u1e3d": "l", # ḽ -> l
+ "\u1e3e": "m", # Ḿ -> m
+ "\u1e3f": "m", # ḿ -> m
+ "\u1e40": "m", # Ṁ -> m
+ "\u1e41": "m", # ṁ -> m
+ "\u1e42": "m", # Ṃ -> m
+ "\u1e43": "m", # ṃ -> m
+ "\u1e44": "n", # Ṅ -> n
+ "\u1e45": "n", # ṅ -> n
+ "\u1e46": "n", # Ṇ -> n
+ "\u1e47": "n", # ṇ -> n
+ "\u1e48": "n", # Ṉ -> n
+ "\u1e49": "n", # ṉ -> n
+ "\u1e4a": "n", # Ṋ -> n
+ "\u1e4b": "n", # ṋ -> n
+ "\u1e4c": "o", # Ṍ -> o
+ "\u1e4d": "o", # ṍ -> o
+ "\u1e4e": "o", # Ṏ -> o
+ "\u1e4f": "o", # ṏ -> o
+ "\u1e50": "o", # Ṑ -> o
+ "\u1e51": "o", # ṑ -> o
+ "\u1e52": "o", # Ṓ -> o
+ "\u1e53": "o", # ṓ -> o
+ "\u1e54": "p", # Ṕ -> p
+ "\u1e55": "p", # ṕ -> p
+ "\u1e56": "p", # Ṗ -> p
+ "\u1e57": "p", # ṗ -> p
+ "\u1e58": "r", # Ṙ -> r
+ "\u1e59": "r", # ṙ -> r
+ "\u1e5a": "r", # Ṛ -> r
+ "\u1e5b": "r", # ṛ -> r
+ "\u1e5c": "r", # Ṝ -> r
+ "\u1e5d": "r", # ṝ -> r
+ "\u1e5e": "r", # Ṟ -> r
+ "\u1e5f": "r", # ṟ -> r
+ "\u1e60": "s", # Ṡ -> s
+ "\u1e61": "s", # ṡ -> s
+ "\u1e62": "s", # Ṣ -> s
+ "\u1e63": "s", # ṣ -> s
+ "\u1e64": "s", # Ṥ -> s
+ "\u1e65": "s", # ṥ -> s
+ "\u1e66": "s", # Ṧ -> s
+ "\u1e67": "s", # ṧ -> s
+ "\u1e68": "s", # Ṩ -> s
+ "\u1e69": "s", # ṩ -> s
+ "\u1e6a": "t", # Ṫ -> t
+ "\u1e6b": "t", # ṫ -> t
+ "\u1e6c": "t", # Ṭ -> t
+ "\u1e6d": "t", # ṭ -> t
+ "\u1e6e": "t", # Ṯ -> t
+ "\u1e6f": "t", # ṯ -> t
+ "\u1e70": "t", # Ṱ -> t
+ "\u1e71": "t", # ṱ -> t
+ "\u1e72": "u", # Ṳ -> u
+ "\u1e73": "u", # ṳ -> u
+ "\u1e74": "u", # Ṵ -> u
+ "\u1e75": "u", # ṵ -> u
+ "\u1e76": "u", # Ṷ -> u
+ "\u1e77": "u", # ṷ -> u
+ "\u1e78": "u", # Ṹ -> u
+ "\u1e79": "u", # ṹ -> u
+ "\u1e7a": "u", # Ṻ -> u
+ "\u1e7b": "u", # ṻ -> u
+ "\u1e7c": "v", # Ṽ -> v
+ "\u1e7d": "v", # ṽ -> v
+ "\u1e7e": "v", # Ṿ -> v
+ "\u1e7f": "v", # ṿ -> v
+ "\u1e80": "w", # Ẁ -> w
+ "\u1e81": "w", # ẁ -> w
+ "\u1e82": "w", # Ẃ -> w
+ "\u1e83": "w", # ẃ -> w
+ "\u1e84": "w", # Ẅ -> w
+ "\u1e85": "w", # ẅ -> w
+ "\u1e86": "w", # Ẇ -> w
+ "\u1e87": "w", # ẇ -> w
+ "\u1e88": "w", # Ẉ -> w
+ "\u1e89": "w", # ẉ -> w
+ "\u1e8a": "x", # Ẋ -> x
+ "\u1e8b": "x", # ẋ -> x
+ "\u1e8c": "x", # Ẍ -> x
+ "\u1e8d": "x", # ẍ -> x
+ "\u1e8e": "y", # Ẏ -> y
+ "\u1e8f": "y", # ẏ -> y
+ "\u1e90": "z", # Ẑ -> z
+ "\u1e91": "z", # ẑ -> z
+ "\u1e92": "z", # Ẓ -> z
+ "\u1e93": "z", # ẓ -> z
+ "\u1e94": "z", # Ẕ -> z
+ "\u1e95": "z", # ẕ -> z
+ "\u1e96": "h", # ẖ -> h
+ "\u1e97": "t", # ẗ -> t
+ "\u1e98": "w", # ẘ -> w
+ "\u1e99": "y", # ẙ -> y
+ "\u1e9a": "a", # ẚ -> a
+ "\u1e9b": "s", # ẛ -> s
+ "\u1ea0": "a", # Ạ -> a
+ "\u1ea1": "a", # ạ -> a
+ "\u1ea2": "a", # Ả -> a
+ "\u1ea3": "a", # ả -> a
+ "\u1ea4": "a", # Ấ -> a
+ "\u1ea5": "a", # ấ -> a
+ "\u1ea6": "a", # Ầ -> a
+ "\u1ea7": "a", # ầ -> a
+ "\u1ea8": "a", # Ẩ -> a
+ "\u1ea9": "a", # ẩ -> a
+ "\u1eaa": "a", # Ẫ -> a
+ "\u1eab": "a", # ẫ -> a
+ "\u1eac": "a", # Ậ -> a
+ "\u1ead": "a", # ậ -> a
+ "\u1eae": "a", # Ắ -> a
+ "\u1eaf": "a", # ắ -> a
+ "\u1eb0": "a", # Ằ -> a
+ "\u1eb1": "a", # ằ -> a
+ "\u1eb2": "a", # Ẳ -> a
+ "\u1eb3": "a", # ẳ -> a
+ "\u1eb4": "a", # Ẵ -> a
+ "\u1eb5": "a", # ẵ -> a
+ "\u1eb6": "a", # Ặ -> a
+ "\u1eb7": "a", # ặ -> a
+ "\u1eb8": "e", # Ẹ -> e
+ "\u1eb9": "e", # ẹ -> e
+ "\u1eba": "e", # Ẻ -> e
+ "\u1ebb": "e", # ẻ -> e
+ "\u1ebc": "e", # Ẽ -> e
+ "\u1ebd": "e", # ẽ -> e
+ "\u1ebe": "e", # Ế -> e
+ "\u1ebf": "e", # ế -> e
+ "\u1ec0": "e", # Ề -> e
+ "\u1ec1": "e", # ề -> e
+ "\u1ec2": "e", # Ể -> e
+ "\u1ec3": "e", # ể -> e
+ "\u1ec4": "e", # Ễ -> e
+ "\u1ec5": "e", # ễ -> e
+ "\u1ec6": "e", # Ệ -> e
+ "\u1ec7": "e", # ệ -> e
+ "\u1ec8": "i", # Ỉ -> i
+ "\u1ec9": "i", # ỉ -> i
+ "\u1eca": "i", # Ị -> i
+ "\u1ecb": "i", # ị -> i
+ "\u1ecc": "o", # Ọ -> o
+ "\u1ecd": "o", # ọ -> o
+ "\u1ece": "o", # Ỏ -> o
+ "\u1ecf": "o", # ỏ -> o
+ "\u1ed0": "o", # Ố -> o
+ "\u1ed1": "o", # ố -> o
+ "\u1ed2": "o", # Ồ -> o
+ "\u1ed3": "o", # ồ -> o
+ "\u1ed4": "o", # Ổ -> o
+ "\u1ed5": "o", # ổ -> o
+ "\u1ed6": "o", # Ỗ -> o
+ "\u1ed7": "o", # ỗ -> o
+ "\u1ed8": "o", # Ộ -> o
+ "\u1ed9": "o", # ộ -> o
+ "\u1eda": "o", # Ớ -> o
+ "\u1edb": "o", # ớ -> o
+ "\u1edc": "o", # Ờ -> o
+ "\u1edd": "o", # ờ -> o
+ "\u1ede": "o", # Ở -> o
+ "\u1edf": "o", # ở -> o
+ "\u1ee0": "o", # Ỡ -> o
+ "\u1ee1": "o", # ỡ -> o
+ "\u1ee2": "o", # Ợ -> o
+ "\u1ee3": "o", # ợ -> o
+ "\u1ee4": "u", # Ụ -> u
+ "\u1ee5": "u", # ụ -> u
+ "\u1ee6": "u", # Ủ -> u
+ "\u1ee7": "u", # ủ -> u
+ "\u1ee8": "u", # Ứ -> u
+ "\u1ee9": "u", # ứ -> u
+ "\u1eea": "u", # Ừ -> u
+ "\u1eeb": "u", # ừ -> u
+ "\u1eec": "u", # Ử -> u
+ "\u1eed": "u", # ử -> u
+ "\u1eee": "u", # Ữ -> u
+ "\u1eef": "u", # ữ -> u
+ "\u1ef0": "u", # Ự -> u
+ "\u1ef1": "u", # ự -> u
+ "\u1ef2": "y", # Ỳ -> y
+ "\u1ef3": "y", # ỳ -> y
+ "\u1ef4": "y", # Ỵ -> y
+ "\u1ef5": "y", # ỵ -> y
+ "\u1ef6": "y", # Ỷ -> y
+ "\u1ef7": "y", # ỷ -> y
+ "\u1ef8": "y", # Ỹ -> y
+ "\u1ef9": "y", # ỹ -> y
+ "\u2c60": "l", # Ⱡ -> l
+ "\u2c61": "l", # ⱡ -> l
+ "\u2c62": "l", # Ɫ -> l
+ "\u2c63": "p", # Ᵽ -> p
+ "\u2c64": "r", # Ɽ -> r
+ "\u2c65": "a", # ⱥ -> a
+ "\u2c66": "t", # ⱦ -> t
+ "\u2c67": "h", # Ⱨ -> h
+ "\u2c68": "h", # ⱨ -> h
+ "\u2c69": "k", # Ⱪ -> k
+ "\u2c6a": "k", # ⱪ -> k
+ "\u2c6b": "z", # Ⱬ -> z
+ "\u2c6c": "z", # ⱬ -> z
+ "\uff10": "0", # 0 -> 0
+ "\uff11": "1", # 1 -> 1
+ "\uff12": "2", # 2 -> 2
+ "\uff13": "3", # 3 -> 3
+ "\uff14": "4", # 4 -> 4
+ "\uff15": "5", # 5 -> 5
+ "\uff16": "6", # 6 -> 6
+ "\uff17": "7", # 7 -> 7
+ "\uff18": "8", # 8 -> 8
+ "\uff19": "9", # 9 -> 9
+ "\uff21": "A", # A -> A
+ "\uff22": "B", # B -> B
+ "\uff23": "C", # C -> C
+ "\uff24": "D", # D -> D
+ "\uff25": "E", # E -> E
+ "\uff26": "F", # F -> F
+ "\uff27": "G", # G -> G
+ "\uff28": "H", # H -> H
+ "\uff29": "I", # I -> I
+ "\uff2a": "J", # J -> J
+ "\uff2b": "K", # K -> K
+ "\uff2c": "L", # L -> L
+ "\uff2d": "M", # M -> M
+ "\uff2e": "N", # N -> N
+ "\uff2f": "O", # O -> O
+ "\uff30": "P", # P -> P
+ "\uff31": "Q", # Q -> Q
+ "\uff32": "R", # R -> R
+ "\uff33": "S", # S -> S
+ "\uff34": "T", # T -> T
+ "\uff35": "U", # U -> U
+ "\uff36": "V", # V -> V
+ "\uff37": "W", # W -> W
+ "\uff38": "X", # X -> X
+ "\uff39": "Y", # Y -> Y
+ "\uff3a": "Z", # Z -> Z
+ "\uff41": "a", # a -> a
+ "\uff42": "b", # b -> b
+ "\uff43": "c", # c -> c
+ "\uff44": "d", # d -> d
+ "\uff45": "e", # e -> e
+ "\uff46": "f", # f -> f
+ "\uff47": "g", # g -> g
+ "\uff48": "h", # h -> h
+ "\uff49": "i", # i -> i
+ "\uff4a": "j", # j -> j
+ "\uff4b": "k", # k -> k
+ "\uff4c": "l", # l -> l
+ "\uff4d": "m", # m -> m
+ "\uff4e": "n", # n -> n
+ "\uff4f": "o", # o -> o
+ "\uff50": "p", # p -> p
+ "\uff51": "q", # q -> q
+ "\uff52": "r", # r -> r
+ "\uff53": "s", # s -> s
+ "\uff54": "t", # t -> t
+ "\uff55": "u", # u -> u
+ "\uff56": "v", # v -> v
+ "\uff57": "w", # w -> w
+ "\uff58": "x", # x -> x
+ "\uff59": "y", # y -> y
+ "\uff5a": "z", # z -> z
}
# The unicode.translate() method actually requires a dictionary mapping
# character *numbers* to characters, for some reason.
-accent_map = {ord(k): v for k, v in iteritems(accent_map)}
+accent_map = {ord(k): v for k, v in accent_map.items()}
# This Sphinx charset table taken from http://speeple.com/unicode-maps.txt
@@ -1325,10 +1323,10 @@ def charset_table_to_dict(tablestring):
end2 = charspec_to_int(match.group(4))
assert (end1 - start1) == (end2 - start2)
try:
- for fromord, tooord in izip(
+ for fromord, tooord in zip(
range(start1, end1 + 1), range(start2, end2 + 1)
):
- map[fromord] = unichr(tooord)
+ map[fromord] = chr(tooord)
except ValueError:
pass
continue
@@ -1338,7 +1336,7 @@ def charset_table_to_dict(tablestring):
fromord = charspec_to_int(match.group(1))
toord = charspec_to_int(match.group(2))
try:
- map[fromord] = unichr(toord)
+ map[fromord] = chr(toord)
except ValueError:
pass
continue
@@ -1347,7 +1345,7 @@ def charset_table_to_dict(tablestring):
if match:
ord = charspec_to_int(match.group(0))
try:
- map[ord] = unichr(ord)
+ map[ord] = chr(ord)
except ValueError:
pass
continue
@@ -1358,7 +1356,7 @@ def charset_table_to_dict(tablestring):
end = charspec_to_int(match.group(2))
try:
for ord in range(start, end + 1):
- map[ord] = unichr(ord)
+ map[ord] = chr(ord)
except ValueError:
pass
continue
@@ -1370,8 +1368,8 @@ def charset_table_to_dict(tablestring):
assert toord - fromord % 2 == 0
for ord in range(fromord, toord + 1, 2):
try:
- map[ord] = unichr(ord + 1)
- map[ord + 1] = unichr(ord + 1)
+ map[ord] = chr(ord + 1)
+ map[ord + 1] = chr(ord + 1)
except ValueError:
pass
continue
diff --git a/src/whoosh/support/unicode.py b/src/whoosh/support/unicode.py
index 4010744b..52ec95d3 100644
--- a/src/whoosh/support/unicode.py
+++ b/src/whoosh/support/unicode.py
@@ -1,8 +1,6 @@
import re
from bisect import bisect_right
-from whoosh.compat import text_type, u
-
# http://unicode.org/Public/UNIDATA/Blocks.txt
_blockdata = """
# Blocks-5.1.0.txt
@@ -241,13 +239,13 @@ def blockname(ch):
>>> blockname(u'a')
'Basic Latin'
- >>> blockname(unichr(0x0b80))
+ >>> blockname(chr(0x0b80))
'Tamil'
- >>> block(unichr(2048))
+ >>> block(chr(2048))
None
"""
- assert isinstance(ch, text_type) and len(ch) == 1, repr(ch)
+ assert isinstance(ch, str) and len(ch) == 1, repr(ch)
cp = ord(ch)
i = bisect_right(_starts, cp) - 1
end = _ends[i]
@@ -261,9 +259,9 @@ def blocknum(ch):
>>> blocknum(u'a')
0
- >>> blocknum(unichr(0x0b80))
+ >>> blocknum(chr(0x0b80))
22
- >>> blocknum(unichr(2048))
+ >>> blocknum(chr(2048))
None
"""
@@ -275,7 +273,7 @@ def blocknum(ch):
return i
-digits = u(
+digits = (
"0123456789\xb2\xb3\xb9\u0660\u0661\u0662\u0663\u0664\u0665\u0666"
"\u0667\u0668\u0669\u06f0\u06f1\u06f2\u06f3\u06f4\u06f5\u06f6\u06f7"
"\u06f8\u06f9\u07c0\u07c1\u07c2\u07c3\u07c4\u07c5\u07c6\u07c7\u07c8"
@@ -317,7 +315,7 @@ def blocknum(ch):
"\uabf2\uabf3\uabf4\uabf5\uabf6\uabf7\uabf8\uabf9\uff10\uff11\uff12"
"\uff13\uff14\uff15\uff16\uff17\uff18\uff19"
)
-lowercase = u(
+lowercase = (
"abcdefghijklmnopqrstuvwxyz\xaa\xb5\xba\xdf\xe0\xe1\xe2\xe3\xe4"
"\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3"
"\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\u0101\u0103\u0105"
@@ -438,7 +436,7 @@ def blocknum(ch):
"\uff4e\uff4f\uff50\uff51\uff52\uff53\uff54\uff55\uff56\uff57"
"\uff58\uff59\uff5a"
)
-uppercase = u(
+uppercase = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZ\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8"
"\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd8"
"\xd9\xda\xdb\xdc\xdd\xde\u0100\u0102\u0104\u0106\u0108\u010a"
diff --git a/src/whoosh/util/__init__.py b/src/whoosh/util/__init__.py
index baddf02e..81593209 100644
--- a/src/whoosh/util/__init__.py
+++ b/src/whoosh/util/__init__.py
@@ -49,11 +49,7 @@ def random_name(size=28):
def random_bytes(size=28):
- gen = (random.randint(0, 255) for _ in range(size))
- if sys.version_info[0] >= 3:
- return bytes(gen)
- else:
- return array("B", gen).tostring()
+ return bytes(random.randint(0, 255) for _ in range(size))
def make_binary_tree(fn, args, **kwargs):
diff --git a/src/whoosh/util/cache.py b/src/whoosh/util/cache.py
index 6e8b7a09..d38e8d53 100644
--- a/src/whoosh/util/cache.py
+++ b/src/whoosh/util/cache.py
@@ -27,19 +27,10 @@
import functools
+from collections import Counter
from heapq import nsmallest
from operator import itemgetter
-from whoosh.compat import iteritems
-
-try:
- from collections import Counter
-except ImportError:
-
- class Counter(dict):
- def __missing__(self, key):
- return 0
-
def unbound_cache(func):
"""Caching decorator with an unbounded cache size."""
@@ -86,7 +77,7 @@ def wrapper(*args):
stats[1] += 1 # Miss
if len(data) == maxsize:
for k, _ in nsmallest(
- maxsize // 10 or 1, iteritems(usecount), key=itemgetter(1)
+ maxsize // 10 or 1, usecount.items(), key=itemgetter(1)
):
del data[k]
del usecount[k]
diff --git a/src/whoosh/util/numeric.py b/src/whoosh/util/numeric.py
index af813f49..8ae440ca 100644
--- a/src/whoosh/util/numeric.py
+++ b/src/whoosh/util/numeric.py
@@ -31,7 +31,6 @@
from bisect import bisect_left
from struct import pack, unpack
-from whoosh.compat import b, long_type
from whoosh.system import (
pack_byte,
pack_double,
@@ -51,7 +50,7 @@
unpack_ushort,
)
-NaN = struct.unpack("> 7
a.append(i)
- return array_tobytes(a)
+ return a.tobytes()
_varint_cache_size = 512
diff --git a/src/whoosh/writing.py b/src/whoosh/writing.py
index 54b76f92..6981a4cf 100644
--- a/src/whoosh/writing.py
+++ b/src/whoosh/writing.py
@@ -28,11 +28,11 @@
import threading
import time
+from abc import abstractmethod
from bisect import bisect_right
from contextlib import contextmanager
from whoosh import columns
-from whoosh.compat import abstractmethod, bytes_type
from whoosh.externalsort import SortingPool
from whoosh.fields import UnknownFieldError
from whoosh.index import LockError
@@ -159,9 +159,9 @@ def _remove_run(self, path):
return self.tempstore.delete_file(path)
def add(self, item):
- assert isinstance(item[1], bytes_type), f"tbytes={item[1]!r}"
+ assert isinstance(item[1], bytes), f"tbytes={item[1]!r}"
if item[4] is not None:
- assert isinstance(item[4], bytes_type), f"vbytes={item[4]!r}"
+ assert isinstance(item[4], bytes), f"vbytes={item[4]!r}"
self.fieldnames.add(item[0])
size = (
28
diff --git a/stress/test_bigindex.py b/stress/test_bigindex.py
index 80ca84a3..0bc21b1e 100644
--- a/stress/test_bigindex.py
+++ b/stress/test_bigindex.py
@@ -1,7 +1,6 @@
import random
from whoosh import fields
-from whoosh.compat import text_type, u
from whoosh.util import now
from whoosh.util.testing import TempIndex
@@ -27,7 +26,7 @@ def test_20000_single():
t = now()
for i in range(20000):
w = ix.writer()
- w.add_document(id=text_type(i), text=u(" ").join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
w.commit()
print("Write single:", now() - t)
@@ -59,7 +58,7 @@ def test_20000_buffered():
t = now()
w = BufferedWriter(ix, limit=100, period=None)
for i in range(20000):
- w.add_document(id=text_type(i), text=u(" ").join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
w.close()
print("Write buffered:", now() - t)
@@ -89,7 +88,7 @@ def test_20000_batch():
t = now()
w = ix.writer()
for i in range(20000):
- w.add_document(id=text_type(i), text=u(" ").join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
if not i % 100:
w.commit()
w = ix.writer()
diff --git a/stress/test_bigsort.py b/stress/test_bigsort.py
index b25b9007..bcc7b11e 100644
--- a/stress/test_bigsort.py
+++ b/stress/test_bigsort.py
@@ -4,7 +4,6 @@
from datetime import datetime, timezone
from whoosh import fields, index, query
-from whoosh.compat import text_type
from whoosh.util import now
@@ -27,7 +26,7 @@ def test_bigsort():
dt = datetime.fromtimestamp(
random.randint(15839593, 1294102139), tz=timezone.utc
)
- w.add_document(id=text_type(i), date=dt)
+ w.add_document(id=str(i), date=dt)
w.commit()
print("Writing took ", now() - t)
diff --git a/stress/test_bigtable.py b/stress/test_bigtable.py
index 0040d53d..152c2f82 100644
--- a/stress/test_bigtable.py
+++ b/stress/test_bigtable.py
@@ -1,7 +1,6 @@
from random import randint, shuffle
from nose.tools import assert_equal # type: ignore @UnresolvedImport
-from whoosh.compat import iteritems
from whoosh.filedb.filetables import HashReader, HashWriter
from whoosh.util.testing import TempStorage
@@ -16,7 +15,7 @@ def randstring(min, max):
samp = {randstring(1, 50): randstring(1, 50) for _ in range(count)}
fhw = HashWriter(st.create_file("big.hsh"))
- fhw.add_all(iteritems(samp))
+ fhw.add_all(samp.items())
fhw.close()
fhr = HashReader(st.open_file("big.hsh"))
@@ -25,7 +24,7 @@ def randstring(min, max):
for key in keys:
assert_equal(samp[key], fhr[key])
- set1 = set(iteritems(samp))
+ set1 = set(samp.items())
set2 = set(fhr.items())
assert_equal(set1, set2)
diff --git a/stress/test_threading.py b/stress/test_threading.py
index c02b3d47..b0636f63 100644
--- a/stress/test_threading.py
+++ b/stress/test_threading.py
@@ -3,7 +3,6 @@
import time
from whoosh import fields, query
-from whoosh.compat import text_type, u
from whoosh.util.testing import TempStorage
@@ -48,10 +47,8 @@ def run(self):
print(i)
w = ix.writer()
for _ in range(random.randint(1, 100)):
- content = u(" ").join(
- random.sample(domain, random.randint(5, 20))
- )
- w.add_document(id=text_type(num), content=content)
+ content = " ".join(random.sample(domain, random.randint(5, 20)))
+ w.add_document(id=str(num), content=content)
num += 1
w.commit()
diff --git a/stress/test_update.py b/stress/test_update.py
index 63524989..28658a23 100644
--- a/stress/test_update.py
+++ b/stress/test_update.py
@@ -2,7 +2,6 @@
from nose.tools import assert_equal
from whoosh import fields, query
-from whoosh.compat import text_type
from whoosh.util.testing import TempIndex
@@ -12,7 +11,7 @@ def test_many_updates():
for _ in range(10000):
num = random.randint(0, 5000)
w = ix.writer()
- w.update_document(key=text_type(num))
+ w.update_document(key=str(num))
w.commit()
with ix.searcher() as s:
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 394ce032..2f643834 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -1,11 +1,12 @@
+from pickle import dumps
+
import pytest
from whoosh import analysis, fields, qparser
-from whoosh.compat import b, dumps, u, unichr
from whoosh.filedb.filestore import RamStorage
def test_regextokenizer():
- value = u("AAAaaaBBBbbbCCCcccDDDddd")
+ value = "AAAaaaBBBbbbCCCcccDDDddd"
rex = analysis.RegexTokenizer("[A-Z]+")
assert [t.text for t in rex(value)] == ["AAA", "BBB", "CCC", "DDD"]
@@ -15,7 +16,7 @@ def test_regextokenizer():
def test_path_tokenizer():
- value = u("/alfa/bravo/charlie/delta/")
+ value = "/alfa/bravo/charlie/delta/"
pt = analysis.PathTokenizer()
assert [t.text for t in pt(value)] == [
"/alfa",
@@ -32,14 +33,14 @@ def test_path_tokenizer2():
index = st.create_index(schema)
with index.writer() as writer:
- writer.add_document(path=u("/alfa/brvo/charlie/delta/"))
- writer.add_document(path=u("/home/user/file.txt"))
+ writer.add_document(path="/alfa/brvo/charlie/delta/")
+ writer.add_document(path="/home/user/file.txt")
assert not index.is_empty()
with index.reader() as reader:
items = list(reader.all_terms())
assert "path" in [field for field, value in items]
- assert b("/alfa") in [value for field, value in items]
+ assert b"/alfa" in [value for field, value in items]
def test_composition1():
@@ -47,7 +48,7 @@ def test_composition1():
assert ca.__class__.__name__ == "CompositeAnalyzer"
assert ca[0].__class__.__name__ == "RegexTokenizer"
assert ca[1].__class__.__name__ == "LowercaseFilter"
- assert [t.text for t in ca(u("ABC 123"))] == ["abc", "123"]
+ assert [t.text for t in ca("ABC 123")] == ["abc", "123"]
def test_composition2():
@@ -58,7 +59,7 @@ def test_composition2():
assert sa[0].__class__.__name__ == "RegexTokenizer"
assert sa[1].__class__.__name__ == "LowercaseFilter"
assert sa[2].__class__.__name__ == "StopFilter"
- assert [t.text for t in sa(u("The ABC 123"))], ["abc", "123"]
+ assert [t.text for t in sa("The ABC 123")], ["abc", "123"]
def test_composition3():
@@ -84,8 +85,8 @@ def test_shared_composition():
ana1 = shared | analysis.NgramFilter(3)
ana2 = shared | analysis.DoubleMetaphoneFilter()
- assert [t.text for t in ana1(u("hello"))] == ["hel", "ell", "llo"]
- assert [t.text for t in ana2(u("hello"))] == ["HL"]
+ assert [t.text for t in ana1("hello")] == ["hel", "ell", "llo"]
+ assert [t.text for t in ana2("hello")] == ["HL"]
def test_multifilter():
@@ -93,13 +94,13 @@ def test_multifilter():
f2 = analysis.PassFilter()
mf = analysis.MultiFilter(a=f1, b=f2)
ana = analysis.RegexTokenizer(r"\S+") | mf
- text = u("ALFA BRAVO CHARLIE")
+ text = "ALFA BRAVO CHARLIE"
assert [t.text for t in ana(text, mode="a")] == ["alfa", "bravo", "charlie"]
assert [t.text for t in ana(text, mode="b")] == ["ALFA", "BRAVO", "CHARLIE"]
def test_tee_filter():
- target = u("Alfa Bravo Charlie")
+ target = "Alfa Bravo Charlie"
f1 = analysis.LowercaseFilter()
f2 = analysis.ReverseTextFilter()
ana = analysis.RegexTokenizer(r"\S+") | analysis.TeeFilter(f1, f2)
@@ -135,10 +136,10 @@ def test_intraword():
def check(text, ls):
assert [(t.pos, t.text) for t in ana(text)] == ls
- check(u("PowerShot)"), [(0, "Power"), (1, "Shot"), (1, "PowerShot")])
- check(u("A's+B's&C's"), [(0, "A"), (1, "B"), (2, "C"), (2, "ABC")])
+ check("PowerShot)", [(0, "Power"), (1, "Shot"), (1, "PowerShot")])
+ check("A's+B's&C's", [(0, "A"), (1, "B"), (2, "C"), (2, "ABC")])
check(
- u("Super-Duper-XL500-42-AutoCoder!"),
+ "Super-Duper-XL500-42-AutoCoder!",
[
(0, "Super"),
(1, "Duper"),
@@ -158,7 +159,7 @@ def test_intraword_chars():
iwf = analysis.IntraWordFilter(mergewords=True, mergenums=True)
ana = analysis.RegexTokenizer(r"\S+") | iwf | analysis.LowercaseFilter()
- target = u("WiKiWo-rd")
+ target = "WiKiWo-rd"
tokens = [(t.text, t.startchar, t.endchar) for t in ana(target, chars=True)]
assert tokens == [
("wi", 0, 2),
@@ -168,7 +169,7 @@ def test_intraword_chars():
("wikiword", 0, 9),
]
- target = u("Zo WiKiWo-rd")
+ target = "Zo WiKiWo-rd"
tokens = [(t.text, t.startchar, t.endchar) for t in ana(target, chars=True)]
assert tokens == [
("zo", 0, 2),
@@ -184,7 +185,7 @@ def test_intraword_possessive():
iwf = analysis.IntraWordFilter(mergewords=True, mergenums=True)
ana = analysis.RegexTokenizer(r"\S+") | iwf | analysis.LowercaseFilter()
- target = u("O'Malley's-Bar")
+ target = "O'Malley's-Bar"
tokens = [(t.text, t.startchar, t.endchar) for t in ana(target, chars=True)]
assert tokens == [
("o", 0, 1),
@@ -195,11 +196,11 @@ def test_intraword_possessive():
def test_word_segments():
- wordset = set(u("alfa bravo charlie delta").split())
+ wordset = set("alfa bravo charlie delta".split())
cwf = analysis.CompoundWordFilter(wordset, keep_compound=True)
ana = analysis.RegexTokenizer(r"\S+") | cwf
- target = u("alfacharlie bravodelta delto bravo subalfa")
+ target = "alfacharlie bravodelta delto bravo subalfa"
tokens = [t.text for t in ana(target)]
assert tokens == [
"alfacharlie",
@@ -215,34 +216,34 @@ def test_word_segments():
cwf = analysis.CompoundWordFilter(wordset, keep_compound=False)
ana = analysis.RegexTokenizer(r"\S+") | cwf
- target = u("alfacharlie bravodelta delto bravo subalfa")
+ target = "alfacharlie bravodelta delto bravo subalfa"
tokens = [t.text for t in ana(target)]
assert tokens == ["alfa", "charlie", "bravo", "delta", "delto", "bravo", "subalfa"]
def test_biword():
ana = analysis.RegexTokenizer(r"\w+") | analysis.BiWordFilter()
- result = [t.copy() for t in ana(u("the sign of four"), chars=True, positions=True)]
+ result = [t.copy() for t in ana("the sign of four", chars=True, positions=True)]
assert ["the-sign", "sign-of", "of-four"] == [t.text for t in result]
assert [(0, 8), (4, 11), (9, 16)] == [(t.startchar, t.endchar) for t in result]
assert [0, 1, 2] == [t.pos for t in result]
- result = [t.copy() for t in ana(u("single"))]
+ result = [t.copy() for t in ana("single")]
assert len(result) == 1
assert result[0].text == "single"
def test_shingles():
ana = analysis.RegexTokenizer(r"\w+") | analysis.ShingleFilter(3, " ")
- source = u("better a witty fool than a foolish wit")
+ source = "better a witty fool than a foolish wit"
results = [t.copy() for t in ana(source, positions=True, chars=True)]
assert [t.text for t in results] == [
- u("better a witty"),
- u("a witty fool"),
- u("witty fool than"),
- u("fool than a"),
- u("than a foolish"),
- u("a foolish wit"),
+ "better a witty",
+ "a witty fool",
+ "witty fool than",
+ "fool than a",
+ "than a foolish",
+ "a foolish wit",
]
assert [t.pos for t in results] == list(range(len(results)))
for t in results:
@@ -252,14 +253,14 @@ def test_shingles():
def test_unicode_blocks():
from whoosh.support.unicode import blockname, blocknum, blocks
- assert blockname(u("a")) == "Basic Latin"
- assert blockname(unichr(0x0B80)) == "Tamil"
- assert blockname(unichr(2048)) is None
- assert blocknum(u("a")) == 0
- assert blocknum(unichr(0x0B80)) == 22
- assert blocknum(unichr(2048)) is None
- assert blocknum(u("a")) == blocks.Basic_Latin # @UndefinedVariable
- assert blocknum(unichr(0x0B80)) == blocks.Tamil # @UndefinedVariable
+ assert blockname("a") == "Basic Latin"
+ assert blockname(chr(0x0B80)) == "Tamil"
+ assert blockname(chr(2048)) is None
+ assert blocknum("a") == 0
+ assert blocknum(chr(0x0B80)) == 22
+ assert blocknum(chr(2048)) is None
+ assert blocknum("a") == blocks.Basic_Latin # @UndefinedVariable
+ assert blocknum(chr(0x0B80)) == blocks.Tamil # @UndefinedVariable
def test_double_metaphone():
@@ -311,7 +312,7 @@ def test_double_metaphone():
| analysis.LowercaseFilter()
| analysis.DoubleMetaphoneFilter()
)
- results = [(t.text, t.boost) for t in mf(u("Spruce View"))]
+ results = [(t.text, t.boost) for t in mf("Spruce View")]
assert results == [("SPRS", 1.0), ("F", 1.0), ("FF", 0.5)]
mf = (
@@ -319,7 +320,7 @@ def test_double_metaphone():
| analysis.LowercaseFilter()
| analysis.DoubleMetaphoneFilter(combine=True)
)
- results = [(t.text, t.boost) for t in mf(u("Spruce View"))]
+ results = [(t.text, t.boost) for t in mf("Spruce View")]
assert results == [
("spruce", 1.0),
("SPRS", 1.0),
@@ -329,13 +330,13 @@ def test_double_metaphone():
]
namefield = fields.TEXT(analyzer=mf)
- texts = list(namefield.process_text(u("Spruce View"), mode="query"))
- assert texts == [u("spruce"), "SPRS", u("view"), "F", "FF"]
+ texts = list(namefield.process_text("Spruce View", mode="query"))
+ assert texts == ["spruce", "SPRS", "view", "F", "FF"]
def test_substitution():
mf = analysis.RegexTokenizer(r"\S+") | analysis.SubstitutionFilter("-", "")
- assert [t.text for t in mf(u("one-two th-re-ee four"))] == [
+ assert [t.text for t in mf("one-two th-re-ee four")] == [
"onetwo",
"threee",
"four",
@@ -344,12 +345,12 @@ def test_substitution():
mf = analysis.RegexTokenizer(r"\S+") | analysis.SubstitutionFilter(
"([^=]*)=(.*)", r"\2=\1"
)
- assert [t.text for t in mf(u("a=b c=d ef"))] == ["b=a", "d=c", "ef"]
+ assert [t.text for t in mf("a=b c=d ef")] == ["b=a", "d=c", "ef"]
def test_delimited_attribute():
ana = analysis.RegexTokenizer(r"\S+") | analysis.DelimitedAttributeFilter()
- results = [(t.text, t.boost) for t in ana(u("image render^2 file^0.5"))]
+ results = [(t.text, t.boost) for t in ana("image render^2 file^0.5")]
assert results == [("image", 1.0), ("render", 2.0), ("file", 0.5)]
@@ -415,11 +416,11 @@ def test_porter2():
#
# ix = st.create_index(schema)
# with ix.writer() as w:
-# w.add_document(text=u("rains falling strangely"))
+# w.add_document(text="rains falling strangely")
#
# ix = st.open_index()
# with ix.writer() as w:
-# w.add_document(text=u("pains stalling strongly"))
+# w.add_document(text="pains stalling strongly")
#
# ix = st.open_index()
# with ix.reader() as r:
@@ -428,9 +429,7 @@ def test_porter2():
def test_url():
- sample = u(
- "Visit https://github.com/sygil-dev/whoosh-reloaded or urn:isbn:5930502 or http://www.apple.com/."
- )
+ sample = "Visit https://github.com/sygil-dev/whoosh-reloaded or urn:isbn:5930502 or http://www.apple.com/."
anas = [
analysis.SimpleAnalyzer(analysis.url_pattern),
@@ -439,12 +438,12 @@ def test_url():
for ana in anas:
ts = [t.text for t in ana(sample)]
assert ts == [
- u("visit"),
- u("https://github.com/sygil-dev/whoosh-reloaded"),
- u("or"),
- u("urn:isbn:5930502"),
- u("or"),
- u("http://www.apple.com/"),
+ "visit",
+ "https://github.com/sygil-dev/whoosh-reloaded",
+ "or",
+ "urn:isbn:5930502",
+ "or",
+ "http://www.apple.com/",
]
@@ -459,12 +458,12 @@ def test_name_field():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("one"), name=u("Leif Ericson"))
+ w.add_document(id="one", name="Leif Ericson")
w.commit()
s = ix.searcher()
qp = qparser.QueryParser("name", schema)
- q = qp.parse(u("leaf eriksen"), normalize=False)
+ q = qp.parse("leaf eriksen", normalize=False)
r = s.search(q)
assert len(r) == 1
@@ -474,11 +473,11 @@ def test_start_pos():
ana = analysis.RegexTokenizer(r"\S+") | analysis.LowercaseFilter()
kw = {"positions": True}
- tks = formats.tokens(u("alfa bravo charlie delta"), ana, kw)
+ tks = formats.tokens("alfa bravo charlie delta", ana, kw)
assert [t.pos for t in tks] == [0, 1, 2, 3]
kw["start_pos"] = 3
- ts = [t.copy() for t in formats.tokens(u("A B C D").split(), ana, kw)]
+ ts = [t.copy() for t in formats.tokens("A B C D".split(), ana, kw)]
assert " ".join([t.text for t in ts]) == "A B C D"
assert [t.pos for t in ts] == [3, 4, 5, 6]
@@ -487,16 +486,16 @@ def test_frowny_face():
# See https://bitbucket.org/mchaput/whoosh/issue/166/
ana = analysis.RegexTokenizer(r"\S+") | analysis.IntraWordFilter()
# text is all delimiters
- tokens = [t.text for t in ana(u(":-("))]
+ tokens = [t.text for t in ana(":-(")]
assert tokens == []
# text has consecutive delimiters
- tokens = [t.text for t in ana(u("LOL:)"))]
+ tokens = [t.text for t in ana("LOL:)")]
assert tokens == ["LOL"]
def test_ngrams():
- s = u("abcdefg h ij klm")
+ s = "abcdefg h ij klm"
tk = analysis.RegexTokenizer(r"\S+")
def dotest(f):
@@ -529,18 +528,18 @@ def dotest(f):
@pytest.mark.skipif("sys.version_info < (2,6)")
def test_language_analyzer():
domain = [
- ("da", u("Jeg gik mig over s\xf8 og land"), [u("gik"), u("s\xf8"), u("land")]),
+ ("da", "Jeg gik mig over s\xf8 og land", ["gik", "s\xf8", "land"]),
(
"nl",
- u("Daar komt een muisje aangelopen"),
- [u("komt"), u("muisj"), u("aangelop")],
+ "Daar komt een muisje aangelopen",
+ ["komt", "muisj", "aangelop"],
),
(
"de",
- u("Berlin war ihm zu gro\xdf, da baut' er sich ein Schlo\xdf."),
- [u("berlin"), u("gross"), u("baut"), u("schloss")],
+ "Berlin war ihm zu gro\xdf, da baut' er sich ein Schlo\xdf.",
+ ["berlin", "gross", "baut", "schloss"],
),
- ("es", u("Por el mar corren las liebres"), ["mar", "corr", "liebr"]),
+ ("es", "Por el mar corren las liebres", ["mar", "corr", "liebr"]),
]
for lang, source, target in domain:
@@ -574,9 +573,7 @@ def test_shingle_stopwords():
| analysis.ShingleFilter(size=3)
)
- texts = [
- t.text for t in ana(u("some other stuff and then some things To Check "))
- ]
+ texts = [t.text for t in ana("some other stuff and then some things To Check ")]
assert texts == [
"some-other-stuff",
"other-stuff-and",
@@ -594,9 +591,7 @@ def test_shingle_stopwords():
| analysis.ShingleFilter(size=3)
)
- texts = [
- t.text for t in ana(u("some other stuff and then some things To Check "))
- ]
+ texts = [t.text for t in ana("some other stuff and then some things To Check ")]
assert texts == [
"some-other-stuff",
"other-stuff-then",
@@ -614,7 +609,7 @@ def test_biword_stopwords():
| analysis.BiWordFilter()
)
- texts = [t.text for t in ana(u("stuff and then some"))]
+ texts = [t.text for t in ana("stuff and then some")]
assert texts == ["stuff-and", "and-then", "then-some"]
# Use a stop list here
@@ -625,18 +620,18 @@ def test_biword_stopwords():
| analysis.BiWordFilter()
)
- texts = [t.text for t in ana(u("stuff and then some"))]
+ texts = [t.text for t in ana("stuff and then some")]
assert texts == ["stuff-then", "then-some"]
@pytest.mark.skipif("sys.version_info < (2,6)")
def test_stop_lang():
stopper = analysis.RegexTokenizer() | analysis.StopFilter()
- ls = [token.text for token in stopper(u("this is a test"))]
- assert ls == [u("test")]
+ ls = [token.text for token in stopper("this is a test")]
+ assert ls == ["test"]
es_stopper = analysis.RegexTokenizer() | analysis.StopFilter(lang="es")
- ls = [token.text for token in es_stopper(u("el lapiz es en la mesa"))]
+ ls = [token.text for token in es_stopper("el lapiz es en la mesa")]
assert ls == ["lapiz", "mesa"]
diff --git a/tests/test_automata.py b/tests/test_automata.py
index d66fa021..bb7ae192 100644
--- a/tests/test_automata.py
+++ b/tests/test_automata.py
@@ -1,9 +1,9 @@
import gzip
import os.path
from bisect import bisect_left
+from itertools import permutations
from whoosh.automata import fsa, glob, lev
-from whoosh.compat import permutations
from whoosh.support.levenshtein import levenshtein
diff --git a/tests/test_classify.py b/tests/test_classify.py
index 89d008d3..d2a43c8d 100644
--- a/tests/test_classify.py
+++ b/tests/test_classify.py
@@ -1,33 +1,18 @@
from whoosh import analysis, classify, fields, formats, query, reading
-from whoosh.compat import text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
domain = [
- u(
- "A volume that is a signed distance field used for collision calculations. The turbulence is damped near the collision object to prevent particles from passing through."
- ),
- u(
- "When particles cross the SDF boundary they have their velocities reversed according to the SDF normal and are pushed outside of the SDF."
- ),
- u(
- "The distance at which the particles start to slow down due to a collision object."
- ),
- u(
- "There are several different ways to update a particle system in response to an external velocity field. They are broadly categorized as Force, Velocity, and Position updates."
- ),
- u(
- "Instead of applying a force in the direction of the velocity field, the force is applied relative to the difference between the particle's velocity and the velocity field. This effectively adds an implicit drag that causes the particles to match the velocity field."
- ),
- u(
- "In Velocity Blend mode, the amount to mix in the field velocity every timestep."
- ),
- u(
- "In Velocity Blend mode, the amount to add the curlnoise velocity to the particle's velocity. This can be useful in addition to advectbyvolume to layer turbulence on a velocity field."
- ),
+ "A volume that is a signed distance field used for collision calculations. The turbulence is damped near the collision object to prevent particles from passing through.",
+ "When particles cross the SDF boundary they have their velocities reversed according to the SDF normal and are pushed outside of the SDF.",
+ "The distance at which the particles start to slow down due to a collision object.",
+ "There are several different ways to update a particle system in response to an external velocity field. They are broadly categorized as Force, Velocity, and Position updates.",
+ "Instead of applying a force in the direction of the velocity field, the force is applied relative to the difference between the particle's velocity and the velocity field. This effectively adds an implicit drag that causes the particles to match the velocity field.",
+ "In Velocity Blend mode, the amount to mix in the field velocity every timestep.",
+ "In Velocity Blend mode, the amount to add the curlnoise velocity to the particle's velocity. This can be useful in addition to advectbyvolume to layer turbulence on a velocity field.",
]
-text = u("How do I use a velocity field for particles")
+text = "How do I use a velocity field for particles"
def create_index():
@@ -45,7 +30,7 @@ def create_index():
from string import ascii_lowercase
for letter, content in zip(ascii_lowercase, domain):
- w.add_document(path=u("/%s") % letter, content=content, extra=u(""))
+ w.add_document(path=f"/{letter}", content=content, extra="")
w.commit()
return ix
@@ -71,7 +56,7 @@ def test_keyterms(model=classify.Bo1Model):
with ix.searcher() as s:
docnum = s.document_number(path="/a")
keys = list(s.key_terms([docnum], "content", numterms=3, model=model))
- assert [t[0] for t in keys] == [u("collision"), u("calculations"), u("damped")]
+ assert [t[0] for t in keys] == ["collision", "calculations", "damped"]
keys = list(s.key_terms([docnum], "extra", numterms=3, model=model))
assert keys == []
@@ -87,22 +72,22 @@ def test_keyterms_from_text(model=classify.Bo2Model):
def test_more_like_this(model=classify.Bo2Model):
docs = [
- u("alfa bravo charlie delta echo foxtrot golf"),
- u("delta echo foxtrot golf hotel india juliet"),
- u("echo foxtrot golf hotel india juliet kilo"),
- u("foxtrot golf hotel india juliet kilo lima"),
- u("golf hotel india juliet kilo lima mike"),
- u("foxtrot golf hotel india alfa bravo charlie"),
+ "alfa bravo charlie delta echo foxtrot golf",
+ "delta echo foxtrot golf hotel india juliet",
+ "echo foxtrot golf hotel india juliet kilo",
+ "foxtrot golf hotel india juliet kilo lima",
+ "golf hotel india juliet kilo lima mike",
+ "foxtrot golf hotel india alfa bravo charlie",
]
def _check(schema, **kwargs):
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- for i, text in enumerate(docs):
- w.add_document(id=text_type(i + 1), text=text)
+ for i, text in enumerate(docs, 1):
+ w.add_document(id=str(i), text=text)
with ix.searcher() as s:
- docnum = s.document_number(id=u("1"))
+ docnum = s.document_number(id="1")
r = s.more_like(docnum, "text", model=model, **kwargs)
assert [hit["id"] for hit in r] == ["6", "2", "3"]
@@ -124,13 +109,13 @@ def test_more_like(model=classify.Bo2Model):
schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT(stored=True))
with TempIndex(schema, "morelike") as ix:
with ix.writer() as w:
- w.add_document(id=u("1"), text=u("alfa bravo charlie"))
- w.add_document(id=u("2"), text=u("bravo charlie delta"))
- w.add_document(id=u("3"), text=u("echo"))
- w.add_document(id=u("4"), text=u("delta echo foxtrot"))
- w.add_document(id=u("5"), text=u("echo echo echo"))
- w.add_document(id=u("6"), text=u("foxtrot golf hotel"))
- w.add_document(id=u("7"), text=u("golf hotel india"))
+ w.add_document(id="1", text="alfa bravo charlie")
+ w.add_document(id="2", text="bravo charlie delta")
+ w.add_document(id="3", text="echo")
+ w.add_document(id="4", text="delta echo foxtrot")
+ w.add_document(id="5", text="echo echo echo")
+ w.add_document(id="6", text="foxtrot golf hotel")
+ w.add_document(id="7", text="golf hotel india")
with ix.searcher() as s:
docnum = s.document_number(id="3")
@@ -143,7 +128,7 @@ def test_empty_more_like(model=classify.Bo1Model):
with TempIndex(schema, "emptymore") as ix:
with ix.searcher() as s:
assert s.doc_count() == 0
- q = query.Term("a", u("b"))
+ q = query.Term("a", "b")
r = s.search(q)
assert r.scored_length() == 0
assert r.key_terms("text", model=model) == []
diff --git a/tests/test_codecs.py b/tests/test_codecs.py
index eef187d5..b757bd0b 100644
--- a/tests/test_codecs.py
+++ b/tests/test_codecs.py
@@ -7,7 +7,6 @@
from whoosh.codec.memory import MemoryCodec, MemSegment, MemTermsReader, MemWriter
from whoosh.codec.plaintext import PlainTextCodec
from whoosh.codec.whoosh3 import W3Codec
-from whoosh.compat import array_tobytes, b, text_type, u
from whoosh.fields import TEXT, Schema
from whoosh.filedb.filestore import RamStorage
from whoosh.reading import TermNotFound
@@ -15,6 +14,14 @@
from whoosh.util.testing import TempStorage
+def b(s):
+ return s.encode("latin-1")
+
+
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def _make_codec(**kwargs):
st = RamStorage()
codec = default_codec(**kwargs)
@@ -37,24 +44,24 @@ def test_termkey():
tw = codec.field_writer(st, seg)
fieldobj = fields.TEXT()
tw.start_field("alfa", fieldobj)
- tw.start_term(b("bravo"))
- tw.add(0, 1.0, b(""), 3)
+ tw.start_term(b"bravo")
+ tw.add(0, 1.0, b"", 3)
tw.finish_term()
- tw.start_term(b("\xc3\xa6\xc3\xaf\xc5\xc3\xba"))
- tw.add(0, 4.0, b(""), 3)
+ tw.start_term(b"\xc3\xa6\xc3\xaf\xc5\xc3\xba")
+ tw.add(0, 4.0, b"", 3)
tw.finish_term()
tw.finish_field()
tw.start_field("text", fieldobj)
- tw.start_term(b("\xe6\xa5\xe6\xac\xe8\xaa"))
- tw.add(0, 7.0, b(""), 9)
+ tw.start_term(b"\xe6\xa5\xe6\xac\xe8\xaa")
+ tw.add(0, 7.0, b"", 9)
tw.finish_term()
tw.finish_field()
tw.close()
tr = codec.terms_reader(st, seg)
- assert ("alfa", b("bravo")) in tr
- assert ("alfa", b("\xc3\xa6\xc3\xaf\xc5\xc3\xba")) in tr
- assert ("text", b("\xe6\xa5\xe6\xac\xe8\xaa")) in tr
+ assert ("alfa", b"bravo") in tr
+ assert ("alfa", b"\xc3\xa6\xc3\xaf\xc5\xc3\xba") in tr
+ assert ("text", b"\xe6\xa5\xe6\xac\xe8\xaa") in tr
tr.close()
@@ -64,7 +71,7 @@ def random_fieldname():
def random_btext():
a = array("H", (random.randint(0, 0xD7FF) for _ in range(1, 20)))
- return array_tobytes(a).decode("utf-16")
+ return a.tobytes().decode("utf-16")
domain = sorted(
{(random_fieldname(), random_btext().encode("utf-8")) for _ in range(1000)}
@@ -84,7 +91,7 @@ def random_btext():
tw.start_field(fieldname, fieldobj)
lastfield = fieldname
tw.start_term(text)
- tw.add(0, 1.0, b(""), 1)
+ tw.add(0, 1.0, b"", 1)
tw.finish_term()
if lastfield:
tw.finish_field()
@@ -157,9 +164,7 @@ def test_termindex():
schema = fields.Schema(a=fields.TEXT, b=fields.TEXT)
tw = codec.field_writer(st, seg)
- postings = (
- (fname, b(text), 0, i, b("")) for (i, (fname, text)) in enumerate(terms)
- )
+ postings = ((fname, b(text), 0, i, b"") for (i, (fname, text)) in enumerate(terms))
tw.add_postings(schema, FakeLengths(), postings)
tw.close()
@@ -224,9 +229,7 @@ def test_vector():
dw = codec.per_document_writer(st, seg)
dw.start_doc(0)
dw.add_field("title", field, None, 1)
- dw.add_vector_items(
- "title", field, [(u("alfa"), 1.0, b("t1")), (u("bravo"), 2.0, b("t2"))]
- )
+ dw.add_vector_items("title", field, [("alfa", 1.0, b"t1"), ("bravo", 2.0, b"t2")])
dw.finish_doc()
dw.close()
seg.set_doc_count(1)
@@ -240,13 +243,13 @@ def test_vector():
while m.is_active():
ps.append((m.id(), m.weight(), m.value()))
m.next()
- assert ps == [(u("alfa"), 1.0, b("t1")), (u("bravo"), 2.0, b("t2"))]
+ assert ps == [("alfa", 1.0, b"t1"), ("bravo", 2.0, b"t2")]
def test_vector_values():
field = fields.TEXT(vector=formats.Frequency())
st, codec, seg = _make_codec()
- content = u("alfa bravo charlie alfa")
+ content = "alfa bravo charlie alfa"
dw = codec.per_document_writer(st, seg)
dw.start_doc(0)
@@ -307,15 +310,15 @@ def test_fieldwriter_single_term():
fw = codec.field_writer(st, seg)
fw.start_field("text", field)
- fw.start_term(b("alfa"))
- fw.add(0, 1.5, b("test"), 1)
+ fw.start_term(b"alfa")
+ fw.add(0, 1.5, b"test", 1)
fw.finish_term()
fw.finish_field()
fw.close()
tr = codec.terms_reader(st, seg)
- assert ("text", b("alfa")) in tr
- ti = tr.term_info("text", b("alfa"))
+ assert ("text", b"alfa") in tr
+ ti = tr.term_info("text", b"alfa")
assert math.isclose(ti.weight(), 1.5)
assert ti.doc_frequency() == 1
assert ti.min_length() == 1
@@ -333,21 +336,21 @@ def test_fieldwriter_two_terms():
fw = codec.field_writer(st, seg)
fw.start_field("text", field)
- fw.start_term(b("alfa"))
- fw.add(0, 2.0, b("test1"), 2)
- fw.add(1, 1.0, b("test2"), 1)
+ fw.start_term(b"alfa")
+ fw.add(0, 2.0, b"test1", 2)
+ fw.add(1, 1.0, b"test2", 1)
fw.finish_term()
- fw.start_term(b("bravo"))
- fw.add(0, 3.0, b("test3"), 3)
- fw.add(2, 2.0, b("test4"), 2)
+ fw.start_term(b"bravo")
+ fw.add(0, 3.0, b"test3", 3)
+ fw.add(2, 2.0, b"test4", 2)
fw.finish_term()
fw.finish_field()
fw.close()
tr = codec.terms_reader(st, seg)
- assert ("text", b("alfa")) in tr
+ assert ("text", b"alfa") in tr
- ti = tr.term_info("text", b("alfa"))
+ ti = tr.term_info("text", b"alfa")
assert math.isclose(ti.weight(), 3.0)
assert ti.doc_frequency() == 2
assert ti.min_length() == 1
@@ -355,9 +358,9 @@ def test_fieldwriter_two_terms():
assert math.isclose(ti.max_weight(), 2.0)
assert ti.min_id() == 0
assert ti.max_id() == 1
- assert ("text", b("bravo")) in tr
+ assert ("text", b"bravo") in tr
- ti = tr.term_info("text", b("bravo"))
+ ti = tr.term_info("text", b"bravo")
assert math.isclose(ti.weight(), 5.0)
assert ti.doc_frequency() == 2
assert ti.min_length() == 2
@@ -366,7 +369,7 @@ def test_fieldwriter_two_terms():
assert ti.min_id() == 0
assert ti.max_id() == 2
- m = tr.matcher("text", b("bravo"), field.format)
+ m = tr.matcher("text", b"bravo", field.format)
assert list(m.all_ids()) == [0, 2]
@@ -378,18 +381,18 @@ def test_fieldwriter_multiblock():
fw = codec.field_writer(st, seg)
fw.start_field("text", field)
- fw.start_term(b("alfa"))
- fw.add(0, 2.0, b("test1"), 2)
- fw.add(1, 5.0, b("test2"), 5)
- fw.add(2, 3.0, b("test3"), 3)
- fw.add(3, 4.0, b("test4"), 4)
- fw.add(4, 1.0, b("test5"), 1)
+ fw.start_term(b"alfa")
+ fw.add(0, 2.0, b"test1", 2)
+ fw.add(1, 5.0, b"test2", 5)
+ fw.add(2, 3.0, b"test3", 3)
+ fw.add(3, 4.0, b"test4", 4)
+ fw.add(4, 1.0, b"test5", 1)
fw.finish_term()
fw.finish_field()
fw.close()
tr = codec.terms_reader(st, seg)
- ti = tr.term_info("text", b("alfa"))
+ ti = tr.term_info("text", b"alfa")
assert math.isclose(ti.weight(), 15.0)
assert ti.doc_frequency() == 5
assert ti.min_length() == 1
@@ -399,23 +402,23 @@ def test_fieldwriter_multiblock():
assert ti.max_id() == 4
ps = []
- m = tr.matcher("text", b("alfa"), field.format)
+ m = tr.matcher("text", b"alfa", field.format)
while m.is_active():
ps.append((m.id(), m.weight(), m.value()))
m.next()
assert ps == [
- (0, 2.0, b("test1")),
- (1, 5.0, b("test2")),
- (2, 3.0, b("test3")),
- (3, 4.0, b("test4")),
- (4, 1.0, b("test5")),
+ (0, 2.0, b"test1"),
+ (1, 5.0, b"test2"),
+ (2, 3.0, b"test3"),
+ (3, 4.0, b"test4"),
+ (4, 1.0, b"test5"),
]
def test_term_values():
field = fields.TEXT(phrase=False)
st, codec, seg = _make_codec()
- content = u("alfa bravo charlie alfa")
+ content = "alfa bravo charlie alfa"
fw = codec.field_writer(st, seg)
fw.start_field("f1", field)
@@ -429,9 +432,9 @@ def test_term_values():
tr = codec.terms_reader(st, seg)
ps = [(term, ti.weight(), ti.doc_frequency()) for term, ti in tr.items()]
assert ps == [
- (("f1", b("alfa")), 2.0, 1),
- (("f1", b("bravo")), 1.0, 1),
- (("f1", b("charlie")), 1.0, 1),
+ (("f1", b"alfa"), 2.0, 1),
+ (("f1", b"bravo"), 1.0, 1),
+ (("f1", b"charlie"), 1.0, 1),
]
@@ -461,15 +464,15 @@ def test_skip():
fieldobj = fields.TEXT()
fw = codec.field_writer(st, seg)
fw.start_field("f1", fieldobj)
- fw.start_term(b("test"))
+ fw.start_term(b"test")
for n in _docnums:
- fw.add(n, 1.0, b(""), None)
+ fw.add(n, 1.0, b"", None)
fw.finish_term()
fw.finish_field()
fw.close()
tr = codec.terms_reader(st, seg)
- m = tr.matcher("f1", b("test"), fieldobj.format)
+ m = tr.matcher("f1", b"test", fieldobj.format)
assert m.id() == 1
m.skip_to(220)
assert m.id() == 283
@@ -487,11 +490,11 @@ def test_skip():
#
# fw = codec.field_writer(st, seg)
# fw.start_field("text", field)
-# fw.start_term(b("special"))
-# fw.add(0, 1.0, b("test1"), 1)
+# fw.start_term(b"special")
+# fw.add(0, 1.0, b"test1", 1)
# fw.finish_term()
-# fw.start_term(b("specific"))
-# fw.add(1, 1.0, b("test2"), 1)
+# fw.start_term(b"specific")
+# fw.add(1, 1.0, b"test2", 1)
# fw.finish_term()
# fw.finish_field()
# fw.close()
@@ -500,7 +503,7 @@ def test_skip():
# assert gr.has_root("text")
# cur = gr.cursor("text")
# strings = list(cur.flatten_strings())
-# assert type(strings[0]) == text_type
+# assert type(strings[0]) == str
# assert strings == ["special", "specific"]
#
#
@@ -512,19 +515,19 @@ def test_skip():
#
# fw = codec.field_writer(st, seg)
# fw.start_field("text", field)
-# fw.start_term(b("special"))
-# fw.add(0, 1.0, b("test1"), 1)
+# fw.start_term(b"special")
+# fw.add(0, 1.0, b"test1", 1)
# fw.finish_term()
-# fw.start_term(b("specific"))
-# fw.add(1, 1.0, b("test2"), 1)
+# fw.start_term(b"specific")
+# fw.add(1, 1.0, b"test2", 1)
# fw.finish_term()
-# fw.add_spell_word("text", u("specials"))
-# fw.add_spell_word("text", u("specifically"))
+# fw.add_spell_word("text", "specials")
+# fw.add_spell_word("text", "specifically")
# fw.finish_field()
# fw.close()
#
# tr = codec.terms_reader(st, seg)
-# assert list(tr.terms()) == [("text", b("special")), ("text", b("specific"))]
+# assert list(tr.terms()) == [("text", b"special"), ("text", b"specific")]
#
# cur = codec.graph_reader(st, seg).cursor("text")
# assert list(cur.flatten_strings()) == ["specials", "specifically"]
@@ -546,17 +549,17 @@ def test_plaintext_codec():
ix = st.create_index(schema)
with ix.writer(codec=W3Codec()) as w:
w.add_document(
- a=u("alfa bravo charlie"), b="hello", c=100, d=u("quelling whining echoing")
+ a="alfa bravo charlie", b="hello", c=100, d="quelling whining echoing"
)
w.add_document(
- a=u("bravo charlie delta"), b=1000, c=200, d=u("rolling timing yelling")
+ a="bravo charlie delta", b=1000, c=200, d="rolling timing yelling"
)
- w.add_document(a=u(cde), b=5.5, c=300, d=u("using opening pulling"))
+ w.add_document(a=u(cde), b=5.5, c=300, d="using opening pulling")
w.add_document(
- a=u("delta echo foxtrot"), b=True, c=-100, d=u("aching selling dipping")
+ a="delta echo foxtrot", b=True, c=-100, d="aching selling dipping"
)
w.add_document(
- a=u("echo foxtrot india"), b=None, c=-200, d=u("filling going hopping")
+ a="echo foxtrot india", b=None, c=-200, d="filling going hopping"
)
with ix.reader() as r:
@@ -620,17 +623,17 @@ def test_memory_codec():
codec = MemoryCodec()
with codec.writer(schema) as w:
w.add_document(
- a=u("alfa bravo charlie"), b="hello", c=100, d=u("quelling whining echoing")
+ a="alfa bravo charlie", b="hello", c=100, d="quelling whining echoing"
)
w.add_document(
- a=u("bravo charlie delta"), b=1000, c=200, d=u("rolling timing yelling")
+ a="bravo charlie delta", b=1000, c=200, d="rolling timing yelling"
)
- w.add_document(a=u(cde), b=5.5, c=300, d=u("using opening pulling"))
+ w.add_document(a=u(cde), b=5.5, c=300, d="using opening pulling")
w.add_document(
- a=u("delta echo foxtrot"), b=True, c=-100, d=u("aching selling dipping")
+ a="delta echo foxtrot", b=True, c=-100, d="aching selling dipping"
)
w.add_document(
- a=u("echo foxtrot india"), b=None, c=-200, d=u("filling going hopping")
+ a="echo foxtrot india", b=None, c=-200, d="filling going hopping"
)
reader = codec.reader(schema)
diff --git a/tests/test_collector.py b/tests/test_collector.py
index 16260a06..fc5a7b12 100644
--- a/tests/test_collector.py
+++ b/tests/test_collector.py
@@ -1,6 +1,5 @@
import pytest
from whoosh import collectors, fields, query, searching
-from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
@@ -9,14 +8,14 @@ def test_add():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, text=u("alfa bravo charlie"))
- w.add_document(id=2, text=u("alfa bravo delta"))
- w.add_document(id=3, text=u("alfa charlie echo"))
+ w.add_document(id=1, text="alfa bravo charlie")
+ w.add_document(id=2, text="alfa bravo delta")
+ w.add_document(id=3, text="alfa charlie echo")
w.commit()
with ix.searcher() as s:
- assert s.doc_frequency("text", u("charlie")) == 2
- r = s.search(query.Term("text", u("charlie")))
+ assert s.doc_frequency("text", "charlie") == 2
+ r = s.search(query.Term("text", "charlie"))
assert [hit["id"] for hit in r] == [1, 3]
assert len(r) == 2
@@ -25,12 +24,12 @@ def test_filter_that_matches_no_document():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, text=u("alfa bravo charlie"))
- w.add_document(id=2, text=u("alfa bravo delta"))
+ w.add_document(id=1, text="alfa bravo charlie")
+ w.add_document(id=2, text="alfa bravo delta")
w.commit()
with ix.searcher() as s:
- r = s.search(query.Every(), filter=query.Term("text", u("echo")))
+ r = s.search(query.Every(), filter=query.Term("text", "echo"))
assert [hit["id"] for hit in r] == []
assert len(r) == 0
@@ -40,7 +39,7 @@ def test_timelimit():
ix = RamStorage().create_index(schema)
w = ix.writer()
for _ in range(50):
- w.add_document(text=u("alfa"))
+ w.add_document(text="alfa")
w.commit()
import time
@@ -57,7 +56,7 @@ def matcher(self, searcher, context=None):
return SlowMatcher(self.child.matcher(searcher, context))
with ix.searcher() as s:
- oq = query.Term("text", u("alfa"))
+ oq = query.Term("text", "alfa")
sq = SlowQuery(oq)
col = collectors.TimeLimitCollector(s.collector(limit=None), timelimit=0.1)
@@ -111,7 +110,7 @@ def matcher(self, searcher, context=None):
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("Hello"))
+ w.add_document(text="Hello")
with ix.searcher() as s:
q = SlowQuery()
@@ -253,17 +252,15 @@ def test_filter_results_count():
)
with TempIndex(schema) as ix:
with ix.writer() as w:
- w.add_document(
- id=1, django_ct=u("app.model1"), text=u("alfa bravo charlie")
- )
- w.add_document(id=2, django_ct=u("app.model1"), text=u("alfa bravo delta"))
- w.add_document(id=3, django_ct=u("app.model2"), text=u("alfa charlie echo"))
+ w.add_document(id=1, django_ct="app.model1", text="alfa bravo charlie")
+ w.add_document(id=2, django_ct="app.model1", text="alfa bravo delta")
+ w.add_document(id=3, django_ct="app.model2", text="alfa charlie echo")
with ix.searcher() as s:
- q = query.Term("django_ct", u("app.model1"))
+ q = query.Term("django_ct", "app.model1")
r1 = s.search(q, limit=None)
assert len(r1) == 2
- q = query.Term("text", u("alfa"))
+ q = query.Term("text", "alfa")
r2 = s.search(q, filter=r1, limit=1)
assert len(r2) == 2
diff --git a/tests/test_columns.py b/tests/test_columns.py
index 55a5cec7..c187ec97 100644
--- a/tests/test_columns.py
+++ b/tests/test_columns.py
@@ -1,11 +1,12 @@
import inspect
import random
import sys
+from io import BytesIO
+from pickle import dumps, loads
import pytest
from whoosh import columns, fields, query
from whoosh.codec.whoosh3 import W3Codec
-from whoosh.compat import BytesIO, b, bytes_type, dumps, izip, loads, text_type, u
from whoosh.filedb import compound
from whoosh.filedb.filestore import RamStorage
from whoosh.matching import ConstantScoreMatcher
@@ -13,6 +14,10 @@
from whoosh.util.testing import TempIndex, TempStorage
+def b(s):
+ return s.encode("latin-1")
+
+
def test_pickleability():
# Ignore base classes
ignore = (columns.Column, columns.WrappedColumn, columns.ListColumn)
@@ -65,9 +70,9 @@ def test_multistream():
f = st.open_file("test")
msr = compound.CompoundStorage(f)
- assert msr.open_file("a").read() == b("123456789abc")
- assert msr.open_file("b").read() == b("abcdefghijk")
- assert msr.open_file("c").read() == b("AaBbCcDdEeFfGgHh")
+ assert msr.open_file("a").read() == b"123456789abc"
+ assert msr.open_file("b").read() == b"abcdefghijk"
+ assert msr.open_file("c").read() == b"AaBbCcDdEeFfGgHh"
def test_random_multistream():
@@ -77,21 +82,14 @@ def randstring(n):
s = "".join(random.choice(letters) for _ in range(n))
return s.encode("latin1")
- domain = {}
- for _ in range(100):
- name = randstring(random.randint(5, 10))
- value = randstring(2500)
- domain[name] = value
-
+ domain = {randstring(random.randint(5, 10)): randstring(2500) for _ in range(100)}
outfiles = {name: BytesIO(value) for name, value in domain.items()}
with TempStorage() as st:
msw = compound.CompoundWriter(st, buffersize=1024)
- mfiles = {}
- for name in domain:
- mfiles[name] = msw.create_file(name)
+ mfiles = {name: msw.create_file(name) for name in domain}
while outfiles:
- name = random.choice(list(outfiles.keys()))
+ name = random.choice(list(outfiles))
v = outfiles[name].read(1000)
mfiles[name].write(v)
if len(v) < 1000:
@@ -110,7 +108,7 @@ def _rt(c, values, default):
# Continuous
st = RamStorage()
f = st.create_file("test1")
- f.write(b("hello"))
+ f.write(b"hello")
w = c.writer(f)
for docnum, v in enumerate(values):
w.add(docnum, v)
@@ -130,9 +128,9 @@ def _rt(c, values, default):
target = [default] * doccount
f = st.create_file("test2")
- f.write(b("hello"))
+ f.write(b"hello")
w = c.writer(f)
- for docnum, v in izip(range(10, doccount, 7), values):
+ for docnum, v in zip(range(10, doccount, 7), values):
target[docnum] = v
w.add(docnum, v)
w.finish(doccount)
@@ -151,21 +149,21 @@ def _rt(c, values, default):
def test_roundtrip():
- _rt(columns.VarBytesColumn(), [b("a"), b("ccc"), b("bbb"), b("e"), b("dd")], b(""))
+ _rt(columns.VarBytesColumn(), [b"a", b"ccc", b"bbb", b"e", b"dd"], b"")
_rt(
columns.FixedBytesColumn(5),
- [b("aaaaa"), b("eeeee"), b("ccccc"), b("bbbbb"), b("eeeee")],
- b("\x00") * 5,
+ [b"aaaaa", b"eeeee", b"ccccc", b"bbbbb", b"eeeee"],
+ b"\x00" * 5,
)
_rt(
columns.RefBytesColumn(),
- [b("a"), b("ccc"), b("bb"), b("ccc"), b("a"), b("bb")],
- b(""),
+ [b"a", b"ccc", b"bb", b"ccc", b"a", b"bb"],
+ b"",
)
_rt(
columns.RefBytesColumn(3),
- [b("aaa"), b("bbb"), b("ccc"), b("aaa"), b("bbb"), b("ccc")],
- b("\x00") * 3,
+ [b"aaa", b"bbb", b"ccc", b"aaa", b"bbb", b"ccc"],
+ b"\x00" * 3,
)
_rt(
columns.StructColumn("ifH", (0, 0.0, 0)),
@@ -199,11 +197,11 @@ def test_roundtrip():
_rt(c, [None, True, False, 100, -7, "hello"], None)
c = columns.VarBytesListColumn()
- _rt(c, [[b("garnet"), b("amethyst")], [b("pearl")]], [])
+ _rt(c, [[b"garnet", b"amethyst"], [b"pearl"]], [])
_c = columns.VarBytesListColumn()
c = columns.FixedBytesListColumn(4)
- _rt(c, [[b("garn"), b("amet")], [b("pear")]], [])
+ _rt(c, [[b"garn", b"amet"], [b"pear"]], [])
def test_multivalue():
@@ -212,8 +210,8 @@ def test_multivalue():
)
ix = RamStorage().create_index(schema)
with ix.writer(codec=W3Codec()) as w:
- w.add_document(s=u("alfa foxtrot charlie").split(), n=[100, 200, 300])
- w.add_document(s=u("juliet bravo india").split(), n=[10, 20, 30])
+ w.add_document(s="alfa foxtrot charlie".split(), n=[100, 200, 300])
+ w.add_document(s="juliet bravo india".split(), n=[10, 20, 30])
with ix.reader() as r:
scr = r.column_reader("s")
@@ -228,23 +226,23 @@ def test_column_field():
a=fields.TEXT(sortable=True), b=fields.COLUMN(columns.RefBytesColumn())
)
with TempIndex(schema, "columnfield") as ix:
- cd = b("charlie delta")
+ cd = b"charlie delta"
with ix.writer(codec=W3Codec()) as w:
- w.add_document(a=u("alfa bravo"), b=cd)
- w.add_document(a=u("bravo charlie"), b=b("delta echo"))
- w.add_document(a=u("charlie delta"), b=b("echo foxtrot"))
+ w.add_document(a="alfa bravo", b=cd)
+ w.add_document(a="bravo charlie", b=b"delta echo")
+ w.add_document(a="charlie delta", b=b"echo foxtrot")
with ix.reader() as r:
assert r.has_column("a")
assert r.has_column("b")
cra = r.column_reader("a")
- assert cra[0] == u("alfa bravo")
- assert type(cra[0]) == text_type
+ assert cra[0] == "alfa bravo"
+ assert type(cra[0]) == str
crb = r.column_reader("b")
assert crb[0] == cd
- assert type(crb[0]) == bytes_type
+ assert type(crb[0]) == bytes
def test_column_query():
@@ -253,25 +251,25 @@ def test_column_query():
)
with TempIndex(schema, "ColumnQuery") as ix:
with ix.writer(codec=W3Codec()) as w:
- w.add_document(id=1, a=u("alfa"), b=10)
- w.add_document(id=2, a=u("bravo"), b=20)
- w.add_document(id=3, a=u("charlie"), b=30)
- w.add_document(id=4, a=u("delta"), b=40)
- w.add_document(id=5, a=u("echo"), b=50)
- w.add_document(id=6, a=u("foxtrot"), b=60)
+ w.add_document(id=1, a="alfa", b=10)
+ w.add_document(id=2, a="bravo", b=20)
+ w.add_document(id=3, a="charlie", b=30)
+ w.add_document(id=4, a="delta", b=40)
+ w.add_document(id=5, a="echo", b=50)
+ w.add_document(id=6, a="foxtrot", b=60)
with ix.searcher() as s:
def check(q):
return [s.stored_fields(docnum)["id"] for docnum in q.docs(s)]
- q = ColumnQuery("a", u("bravo"))
+ q = ColumnQuery("a", "bravo")
assert check(q) == [2]
q = ColumnQuery("b", 30)
assert check(q) == [3]
- q = ColumnQuery("a", lambda v: v != u("delta"))
+ q = ColumnQuery("a", lambda v: v != "delta")
assert check(q) == [1, 2, 3, 5, 6]
q = ColumnQuery("b", lambda v: v > 30)
@@ -302,7 +300,7 @@ def rw(size):
if i <= 65535 - 1:
assert v == hex(i).encode("latin1")
else:
- assert v == b("")
+ assert v == b""
f.close()
rw(255)
@@ -320,7 +318,7 @@ def rw(size):
def test_varbytes_offsets():
- values = u("alfa bravo charlie delta echo foxtrot golf hotel").split()
+ values = "alfa bravo charlie delta echo foxtrot golf hotel".split()
vlen = len(values)
# Without offsets:
diff --git a/tests/test_compound.py b/tests/test_compound.py
index 38de3f22..6a6539eb 100644
--- a/tests/test_compound.py
+++ b/tests/test_compound.py
@@ -1,4 +1,3 @@
-from whoosh.compat import b
from whoosh.filedb.compound import CompoundStorage
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempStorage
@@ -26,17 +25,17 @@ def _test_simple_compound(st):
with f.open_file("a") as af:
for x in alist:
assert x == af.read_int()
- assert af.read() == b("")
+ assert af.read() == b""
with f.open_file("b") as bf:
for x in blist:
assert x == bf.read_varint()
- assert bf.read() == b("")
+ assert bf.read() == b""
with f.open_file("c") as cf:
for x in clist:
assert x == cf.read_int()
- assert cf.read() == b("")
+ assert cf.read() == b""
def test_simple_compound_mmap():
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 47acff8e..aba2e903 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -2,7 +2,6 @@
import pytest
from whoosh import fields, qparser, query
-from whoosh.compat import b, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util import times
from whoosh.util.testing import TempIndex
@@ -136,24 +135,24 @@ def test_index_numeric():
w.add_document(a=1, b=1)
with ix.searcher() as s:
assert list(s.lexicon("a")) == [
- b("\x00\x00\x00\x00\x01"),
- b("\x04\x00\x00\x00\x00"),
- b("\x08\x00\x00\x00\x00"),
- b("\x0c\x00\x00\x00\x00"),
- b("\x10\x00\x00\x00\x00"),
- b("\x14\x00\x00\x00\x00"),
- b("\x18\x00\x00\x00\x00"),
- b("\x1c\x00\x00\x00\x00"),
+ b"\x00\x00\x00\x00\x01",
+ b"\x04\x00\x00\x00\x00",
+ b"\x08\x00\x00\x00\x00",
+ b"\x0c\x00\x00\x00\x00",
+ b"\x10\x00\x00\x00\x00",
+ b"\x14\x00\x00\x00\x00",
+ b"\x18\x00\x00\x00\x00",
+ b"\x1c\x00\x00\x00\x00",
]
assert list(s.lexicon("b")) == [
- b("\x00\x80\x00\x00\x01"),
- b("\x04\x08\x00\x00\x00"),
- b("\x08\x00\x80\x00\x00"),
- b("\x0c\x00\x08\x00\x00"),
- b("\x10\x00\x00\x80\x00"),
- b("\x14\x00\x00\x08\x00"),
- b("\x18\x00\x00\x00\x80"),
- b("\x1c\x00\x00\x00\x08"),
+ b"\x00\x80\x00\x00\x01",
+ b"\x04\x08\x00\x00\x00",
+ b"\x08\x00\x80\x00\x00",
+ b"\x0c\x00\x08\x00\x00",
+ b"\x10\x00\x00\x80\x00",
+ b"\x14\x00\x00\x08\x00",
+ b"\x18\x00\x00\x00\x80",
+ b"\x1c\x00\x00\x00\x08",
]
@@ -166,17 +165,17 @@ def test_numeric():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("a"), integer=5820, floating=1.2)
- w.add_document(id=u("b"), integer=22, floating=2.3)
- w.add_document(id=u("c"), integer=78, floating=3.4)
- w.add_document(id=u("d"), integer=13, floating=4.5)
- w.add_document(id=u("e"), integer=9, floating=5.6)
+ w.add_document(id="a", integer=5820, floating=1.2)
+ w.add_document(id="b", integer=22, floating=2.3)
+ w.add_document(id="c", integer=78, floating=3.4)
+ w.add_document(id="d", integer=13, floating=4.5)
+ w.add_document(id="e", integer=9, floating=5.6)
w.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("integer", schema)
- q = qp.parse(u("5820"))
+ q = qp.parse("5820")
r = s.search(q)
assert len(r) == 1
assert r[0]["id"] == "a"
@@ -204,20 +203,20 @@ def test_decimal_numeric():
# assert f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56"))
w = ix.writer()
- w.add_document(id=u("a"), deci=Decimal("123.56"))
- w.add_document(id=u("b"), deci=Decimal("0.536255"))
- w.add_document(id=u("c"), deci=Decimal("2.5255"))
- w.add_document(id=u("d"), deci=Decimal("58"))
+ w.add_document(id="a", deci=Decimal("123.56"))
+ w.add_document(id="b", deci=Decimal("0.536255"))
+ w.add_document(id="c", deci=Decimal("2.5255"))
+ w.add_document(id="d", deci=Decimal("58"))
w.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("deci", schema)
- q = qp.parse(u("123.56"))
+ q = qp.parse("123.56")
r = s.search(q)
assert len(r) == 1
assert r[0]["id"] == "a"
- r = s.search(qp.parse(u("0.536255")))
+ r = s.search(qp.parse("0.536255"))
assert len(r) == 1
assert r[0]["id"] == "b"
@@ -226,20 +225,20 @@ def test_numeric_parsing():
schema = fields.Schema(id=fields.ID(stored=True), number=fields.NUMERIC)
qp = qparser.QueryParser("number", schema)
- q = qp.parse(u("[10 to *]"))
+ q = qp.parse("[10 to *]")
assert q == query.NullQuery
- q = qp.parse(u("[to 400]"))
+ q = qp.parse("[to 400]")
assert q.__class__ is query.NumericRange
assert q.start is None
assert q.end == 400
- q = qp.parse(u("[10 to]"))
+ q = qp.parse("[10 to]")
assert q.__class__ is query.NumericRange
assert q.start == 10
assert q.end is None
- q = qp.parse(u("[10 to 400]"))
+ q = qp.parse("[10 to 400]")
assert q.__class__ is query.NumericRange
assert q.start == 10
assert q.end == 400
@@ -391,7 +390,7 @@ def test_datetime():
for month in range(1, 12):
for day in range(1, 28):
w.add_document(
- id=u("%s-%s") % (month, day),
+ id=f"{month}-{day}",
date=datetime(2010, month, day, 14, 0, 0, tzinfo=timezone.utc),
)
w.commit()
@@ -409,7 +408,7 @@ def test_datetime():
r = s.search(qp.parse("date:'2010 02'"))
assert len(r) == 27
- q = qp.parse(u("date:[2010-05 to 2010-08]"))
+ q = qp.parse("date:[2010-05 to 2010-08]")
startdt = datetime(2010, 5, 1, 0, 0, 0, 0, tzinfo=timezone.utc)
enddt = datetime(2010, 8, 31, 23, 59, 59, 999999, tzinfo=timezone.utc)
assert q.__class__ is query.NumericRange
@@ -422,11 +421,11 @@ def test_boolean():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("a"), done=True)
- w.add_document(id=u("b"), done=False)
- w.add_document(id=u("c"), done=True)
- w.add_document(id=u("d"), done=False)
- w.add_document(id=u("e"), done=True)
+ w.add_document(id="a", done=True)
+ w.add_document(id="b", done=False)
+ w.add_document(id="c", done=True)
+ w.add_document(id="d", done=False)
+ w.add_document(id="e", done=True)
w.commit()
with ix.searcher() as s:
@@ -443,7 +442,7 @@ def test_boolean():
q = qp.parse("done:false")
assert q.__class__ == query.Term
assert q.text is False
- assert schema["done"].to_bytes(False) == b("f")
+ assert schema["done"].to_bytes(False) == b"f"
r = s.search(q)
assert sorted([d["id"] for d in r]) == ["b", "d"]
assert not any(d["done"] for d in r)
@@ -457,15 +456,15 @@ def test_boolean2():
schema = fields.Schema(t=fields.TEXT(stored=True), b=fields.BOOLEAN(stored=True))
ix = RamStorage().create_index(schema)
writer = ix.writer()
- writer.add_document(t=u("some kind of text"), b=False)
- writer.add_document(t=u("some other kind of text"), b=False)
- writer.add_document(t=u("some more text"), b=False)
- writer.add_document(t=u("some again"), b=True)
+ writer.add_document(t="some kind of text", b=False)
+ writer.add_document(t="some other kind of text", b=False)
+ writer.add_document(t="some more text", b=False)
+ writer.add_document(t="some again", b=True)
writer.commit()
with ix.searcher() as s:
- qf = qparser.QueryParser("b", None).parse(u("f"))
- qt = qparser.QueryParser("b", None).parse(u("t"))
+ qf = qparser.QueryParser("b", None).parse("f")
+ qt = qparser.QueryParser("b", None).parse("t")
r = s.search(qf)
assert len(r) == 3
@@ -482,8 +481,8 @@ def test_boolean3():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(t=u("with hardcopy"), b=True, c=u("alfa"))
- w.add_document(t=u("no hardcopy"), b=False, c=u("bravo"))
+ w.add_document(t="with hardcopy", b=True, c="alfa")
+ w.add_document(t="no hardcopy", b=False, c="bravo")
with ix.searcher() as s:
q = query.Term("b", schema["b"].to_bytes(True))
@@ -499,10 +498,10 @@ def test_boolean_strings():
w.add_document(i=1, b="True")
w.add_document(i=2, b="false")
w.add_document(i=3, b="False")
- w.add_document(i=4, b=u("true"))
- w.add_document(i=5, b=u("True"))
- w.add_document(i=6, b=u("false"))
- w.add_document(i=7, b=u("False"))
+ w.add_document(i=4, b="true")
+ w.add_document(i=5, b="True")
+ w.add_document(i=6, b="false")
+ w.add_document(i=7, b="False")
with ix.searcher() as s:
qp = qparser.QueryParser("b", ix.schema)
@@ -579,15 +578,15 @@ def test_boolean_multifield():
)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(name=u("audi"), bit=True)
- w.add_document(name=u("vw"), bit=False)
- w.add_document(name=u("porsche"), bit=False)
- w.add_document(name=u("ferrari"), bit=True)
- w.add_document(name=u("citroen"), bit=False)
+ w.add_document(name="audi", bit=True)
+ w.add_document(name="vw", bit=False)
+ w.add_document(name="porsche", bit=False)
+ w.add_document(name="ferrari", bit=True)
+ w.add_document(name="citroen", bit=False)
with ix.searcher() as s:
qp = qparser.MultifieldParser(["name", "bit"], schema)
- q = qp.parse(u("boop"))
+ q = qp.parse("boop")
r = s.search(q)
assert sorted(hit["name"] for hit in r) == ["audi", "ferrari"]
@@ -599,13 +598,13 @@ def test_idlist():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(paths=u("here there everywhere"))
- w.add_document(paths=u("here"))
- w.add_document(paths=u("there"))
+ w.add_document(paths="here there everywhere")
+ w.add_document(paths="here")
+ w.add_document(paths="there")
with ix.searcher() as s:
qp = qparser.QueryParser("paths", schema)
- q = qp.parse(u("here"))
+ q = qp.parse("here")
r = s.search(q)
assert sorted(hit["paths"] for hit in r) == ["here", "here there everywhere"]
@@ -617,7 +616,7 @@ def test_missing_field():
with ix.searcher() as s:
with pytest.raises(KeyError):
- s.document_numbers(id=u("test"))
+ s.document_numbers(id="test")
def test_token_boost():
@@ -625,11 +624,11 @@ def test_token_boost():
ana = RegexTokenizer() | DoubleMetaphoneFilter()
field = fields.TEXT(analyzer=ana, phrase=False)
- results = sorted(field.index(u("spruce view")))
+ results = sorted(field.index("spruce view"))
assert results == [
- (b("F"), 1, 1.0, b("\x00\x00\x00\x01")),
- (b("FF"), 1, 0.5, b("\x00\x00\x00\x01")),
- (b("SPRS"), 1, 1.0, b("\x00\x00\x00\x01")),
+ (b"F", 1, 1.0, b"\x00\x00\x00\x01"),
+ (b"FF", 1, 0.5, b"\x00\x00\x00\x01"),
+ (b"SPRS", 1, 1.0, b"\x00\x00\x00\x01"),
]
@@ -644,8 +643,9 @@ def test_pickle_idlist():
def test_pickle_schema():
+ from pickle import dumps
+
from whoosh import analysis
- from whoosh.compat import dumps
from whoosh.support.charset import accent_map
freetext_analyzer = analysis.StemmingAnalyzer() | analysis.CharsetFilter(accent_map)
diff --git a/tests/test_flexible.py b/tests/test_flexible.py
index 3eb6c257..92cdab1e 100644
--- a/tests/test_flexible.py
+++ b/tests/test_flexible.py
@@ -1,5 +1,4 @@
from whoosh import fields
-from whoosh.compat import b, u
from whoosh.util.testing import TempIndex
@@ -7,16 +6,16 @@ def test_addfield():
schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
with TempIndex(schema, "addfield") as ix:
w = ix.writer()
- w.add_document(id=u("a"), content=u("alfa"))
- w.add_document(id=u("b"), content=u("bravo"))
- w.add_document(id=u("c"), content=u("charlie"))
+ w.add_document(id="a", content="alfa")
+ w.add_document(id="b", content="bravo")
+ w.add_document(id="c", content="charlie")
w.commit()
ix.add_field("added", fields.KEYWORD(stored=True))
w = ix.writer()
- w.add_document(id=u("d"), content=u("delta"), added=u("fourth"))
- w.add_document(id=u("e"), content=u("echo"), added=u("fifth"))
+ w.add_document(id="d", content="delta", added="fourth")
+ w.add_document(id="e", content="echo", added="fifth")
w.commit(merge=False)
with ix.searcher() as s:
@@ -29,21 +28,21 @@ def test_addfield_spelling():
schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
with TempIndex(schema, "addfield") as ix:
w = ix.writer()
- w.add_document(id=u("a"), content=u("alfa"))
- w.add_document(id=u("b"), content=u("bravo"))
- w.add_document(id=u("c"), content=u("charlie"))
+ w.add_document(id="a", content="alfa")
+ w.add_document(id="b", content="bravo")
+ w.add_document(id="c", content="charlie")
w.commit()
ix.add_field("added", fields.KEYWORD(stored=True))
w = ix.writer()
- w.add_document(id=u("d"), content=u("delta"), added=u("fourth"))
- w.add_document(id=u("e"), content=u("echo"), added=u("fifth"))
+ w.add_document(id="d", content="delta", added="fourth")
+ w.add_document(id="e", content="echo", added="fifth")
w.commit(merge=False)
with ix.searcher() as s:
- assert s.document(id=u("d")) == {"id": "d", "added": "fourth"}
- assert s.document(id=u("b")) == {"id": "b"}
+ assert s.document(id="d") == {"id": "d", "added": "fourth"}
+ assert s.document(id="b") == {"id": "b"}
def test_removefield():
@@ -52,13 +51,13 @@ def test_removefield():
)
with TempIndex(schema, "removefield") as ix:
w = ix.writer()
- w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
- w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
- w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
+ w.add_document(id="b", content="bravo", city="baghdad")
+ w.add_document(id="c", content="charlie", city="cairo")
+ w.add_document(id="d", content="delta", city="dakar")
w.commit()
with ix.searcher() as s:
- assert s.document(id=u("c")) == {"id": "c", "city": "cairo"}
+ assert s.document(id="c") == {"id": "c", "city": "cairo"}
w = ix.writer()
w.remove_field("content")
@@ -70,8 +69,8 @@ def test_removefield():
assert ixschema.stored_names() == ["id"]
with ix.searcher() as s:
- assert ("content", b("charlie")) not in s.reader()
- assert s.document(id=u("c")) == {"id": u("c")}
+ assert ("content", b"charlie") not in s.reader()
+ assert s.document(id="c") == {"id": "c"}
def test_optimize_away():
@@ -80,13 +79,13 @@ def test_optimize_away():
)
with TempIndex(schema, "optimizeaway") as ix:
w = ix.writer()
- w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
- w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
- w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
+ w.add_document(id="b", content="bravo", city="baghdad")
+ w.add_document(id="c", content="charlie", city="cairo")
+ w.add_document(id="d", content="delta", city="dakar")
w.commit()
with ix.searcher() as s:
- assert s.document(id=u("c")) == {"id": "c", "city": "cairo"}
+ assert s.document(id="c") == {"id": "c", "city": "cairo"}
w = ix.writer()
w.remove_field("content")
@@ -94,8 +93,8 @@ def test_optimize_away():
w.commit(optimize=True)
with ix.searcher() as s:
- assert ("content", u("charlie")) not in s.reader()
- assert s.document(id=u("c")) == {"id": u("c")}
+ assert ("content", "charlie") not in s.reader()
+ assert s.document(id="c") == {"id": "c"}
if __name__ == "__main__":
diff --git a/tests/test_highlighting.py b/tests/test_highlighting.py
index e47b0d9b..eee21cd0 100644
--- a/tests/test_highlighting.py
+++ b/tests/test_highlighting.py
@@ -2,11 +2,14 @@
# from jieba.analyse import ChineseAnalyzer
from whoosh import analysis, fields, highlight, qparser, query
-from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
-_doc = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet " + "kilo lima")
+_doc = "alfa bravo charlie delta echo foxtrot golf hotel india juliet kilo lima"
+
+
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
def test_null_fragment():
@@ -34,8 +37,8 @@ def search(searcher, query_string):
ix = RamStorage().create_index(schema)
w = ix.writer()
w.add_document(
- id=u("1"),
- title=u("strict phrase highlights phrase terms but not individual terms"),
+ id="1",
+ title="strict phrase highlights phrase terms but not individual terms",
)
w.commit()
@@ -81,7 +84,7 @@ def search(searcher, query_string):
def test_sentence_fragment():
- text = u(
+ text = (
"This is the first sentence. This one doesn't have the word. "
+ "This sentence is the second. Third sentence here."
)
@@ -131,7 +134,7 @@ def test_html_escape():
sa = analysis.StandardAnalyzer()
wf = highlight.WholeFragmenter()
hf = highlight.HtmlFormatter()
- htext = highlight.highlight(u('alfa delta'), terms, sa, wf, hf)
+ htext = highlight.highlight('alfa delta', terms, sa, wf, hf)
assert (
htext
== 'alfa <bravo "charlie"> delta'
@@ -155,17 +158,17 @@ def test_workflow_easy():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), title=u("The man who wasn't there"))
- w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
- w.add_document(id=u("3"), title=u("The invisible man"))
- w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
- w.add_document(id=u("5"), title=u("The woman who disappeared"))
+ w.add_document(id="1", title="The man who wasn't there")
+ w.add_document(id="2", title="The dog who barked at midnight")
+ w.add_document(id="3", title="The invisible man")
+ w.add_document(id="4", title="The girl with the dragon tattoo")
+ w.add_document(id="5", title="The woman who disappeared")
w.commit()
with ix.searcher() as s:
# Parse the user query
parser = qparser.QueryParser("title", schema=ix.schema)
- q = parser.parse(u("man"))
+ q = parser.parse("man")
r = s.search(q, terms=True)
assert len(r) == 2
@@ -180,17 +183,17 @@ def test_workflow_manual():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), title=u("The man who wasn't there"))
- w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
- w.add_document(id=u("3"), title=u("The invisible man"))
- w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
- w.add_document(id=u("5"), title=u("The woman who disappeared"))
+ w.add_document(id="1", title="The man who wasn't there")
+ w.add_document(id="2", title="The dog who barked at midnight")
+ w.add_document(id="3", title="The invisible man")
+ w.add_document(id="4", title="The girl with the dragon tattoo")
+ w.add_document(id="5", title="The woman who disappeared")
w.commit()
with ix.searcher() as s:
# Parse the user query
parser = qparser.QueryParser("title", schema=ix.schema)
- q = parser.parse(u("man"))
+ q = parser.parse("man")
# Extract the terms the user used in the field we're interested in
terms = [text for fieldname, text in q.all_terms() if fieldname == "title"]
@@ -223,7 +226,7 @@ def test_unstored():
schema = fields.Schema(text=fields.TEXT, tags=fields.KEYWORD)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(text=u("alfa bravo charlie"), tags=u("delta echo"))
+ w.add_document(text="alfa bravo charlie", tags="delta echo")
w.commit()
hit = ix.searcher().search(query.Term("text", "bravo"))[0]
@@ -240,7 +243,7 @@ def test_multifilter():
schema = fields.Schema(text=fields.TEXT(analyzer=ana, stored=True))
with TempIndex(schema) as ix:
w = ix.writer()
- w.add_document(text=u("Our BabbleTron5000 is great"))
+ w.add_document(text="Our BabbleTron5000 is great")
w.commit()
with ix.searcher() as s:
@@ -253,7 +256,7 @@ def test_multifilter():
def test_pinpoint():
- domain = u(
+ domain = (
"alfa bravo charlie delta echo foxtrot golf hotel india juliet "
"kilo lima mike november oskar papa quebec romeo sierra tango"
)
@@ -291,11 +294,11 @@ def test_highlight_wildcards():
schema = fields.Schema(text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo charlie delta cookie echo"))
+ w.add_document(text="alfa bravo charlie delta cookie echo")
with ix.searcher() as s:
qp = qparser.QueryParser("text", ix.schema)
- q = qp.parse(u("c*"))
+ q = qp.parse("c*")
r = s.search(q)
assert r.scored_length() == 1
r.formatter = highlight.UppercaseFormatter()
@@ -307,11 +310,11 @@ def test_highlight_ngrams():
schema = fields.Schema(text=fields.NGRAMWORDS(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("Multiplication and subtraction are good"))
+ w.add_document(text="Multiplication and subtraction are good")
with ix.searcher() as s:
qp = qparser.QueryParser("text", ix.schema)
- q = qp.parse(u("multiplication"))
+ q = qp.parse("multiplication")
r = s.search(q)
assert r.scored_length() == 1
@@ -324,8 +327,8 @@ def test_highlight_ngrams():
def test_issue324():
sa = analysis.StemmingAnalyzer()
result = highlight.highlight(
- u("Indexed!\n1"),
- [u("index")],
+ "Indexed!\n1",
+ ["index"],
sa,
fragmenter=highlight.ContextFragmenter(),
formatter=highlight.UppercaseFormatter(),
@@ -337,25 +340,23 @@ def test_whole_noterms():
schema = fields.Schema(text=fields.TEXT(stored=True), tag=fields.KEYWORD)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(
- text=u("alfa bravo charlie delta echo foxtrot golf"), tag=u("foo")
- )
+ w.add_document(text="alfa bravo charlie delta echo foxtrot golf", tag="foo")
with ix.searcher() as s:
- r = s.search(query.Term("text", u("delta")))
+ r = s.search(query.Term("text", "delta"))
assert len(r) == 1
r.fragmenter = highlight.WholeFragmenter()
r.formatter = highlight.UppercaseFormatter()
hi = r[0].highlights("text")
- assert hi == u("alfa bravo charlie DELTA echo foxtrot golf")
+ assert hi == "alfa bravo charlie DELTA echo foxtrot golf"
- r = s.search(query.Term("tag", u("foo")))
+ r = s.search(query.Term("tag", "foo"))
assert len(r) == 1
r.fragmenter = highlight.WholeFragmenter()
r.formatter = highlight.UppercaseFormatter()
hi = r[0].highlights("text")
- assert hi == u("")
+ assert hi == ""
hi = r[0].highlights("text", minscore=0)
- assert hi == u("alfa bravo charlie delta echo foxtrot golf")
+ assert hi == "alfa bravo charlie delta echo foxtrot golf"
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 01c1fb07..363c8a17 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -1,10 +1,10 @@
import random
from collections import defaultdict
from datetime import datetime, timezone
+from itertools import permutations
import pytest
from whoosh import __version__, analysis, fields, index, qparser, query
-from whoosh.compat import b, permutations, text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.numeric import byte_to_length, length_to_byte
from whoosh.util.testing import TempIndex, TempStorage
@@ -25,20 +25,20 @@ def test_creation():
ix = st.create_index(s)
w = ix.writer()
w.add_document(
- title=u("First"),
- content=u("This is the first document"),
- path=u("/a"),
- tags=u("first second third"),
- quick=u("First document"),
- note=u("This is the first document"),
+ title="First",
+ content="This is the first document",
+ path="/a",
+ tags="first second third",
+ quick="First document",
+ note="This is the first document",
)
w.add_document(
- content=u("Let's try this again"),
- title=u("Second"),
- path=u("/b"),
- tags=u("Uno Dos Tres"),
- quick=u("Second document"),
- note=u("This is the second document"),
+ content="Let's try this again",
+ title="Second",
+ path="/b",
+ tags="Uno Dos Tres",
+ quick="Second document",
+ note="This is the second document",
)
w.commit()
@@ -47,9 +47,9 @@ def test_empty_commit():
s = fields.Schema(id=fields.ID(stored=True))
with TempIndex(s, "emptycommit") as ix:
w = ix.writer()
- w.add_document(id=u("1"))
- w.add_document(id=u("2"))
- w.add_document(id=u("3"))
+ w.add_document(id="1")
+ w.add_document(id="2")
+ w.add_document(id="3")
w.commit()
w = ix.writer()
@@ -70,7 +70,7 @@ def test_version_in():
assert v[1] == index._CURRENT_TOC_VERSION
with ix.writer() as w:
- w.add_document(text=u("alfa"))
+ w.add_document(text="alfa")
assert not ix.is_empty()
@@ -78,20 +78,20 @@ def test_version_in():
def test_simple_indexing():
schema = fields.Schema(text=fields.TEXT, id=fields.STORED)
domain = (
- u("alfa"),
- u("bravo"),
- u("charlie"),
- u("delta"),
- u("echo"),
- u("foxtrot"),
- u("golf"),
- u("hotel"),
- u("india"),
- u("juliet"),
- u("kilo"),
- u("lima"),
- u("mike"),
- u("november"),
+ "alfa",
+ "bravo",
+ "charlie",
+ "delta",
+ "echo",
+ "foxtrot",
+ "golf",
+ "hotel",
+ "india",
+ "juliet",
+ "kilo",
+ "lima",
+ "mike",
+ "november",
)
docs = defaultdict(list)
with TempIndex(schema, "simple") as ix:
@@ -100,7 +100,7 @@ def test_simple_indexing():
smp = random.sample(domain, 5)
for word in smp:
docs[word].append(i)
- w.add_document(text=u(" ").join(smp), id=i)
+ w.add_document(text=" ".join(smp), id=i)
with ix.searcher() as s:
for word in domain:
@@ -119,12 +119,12 @@ def test_integrity():
ix = st.create_index(s)
w = ix.writer()
- w.add_document(name=u("Yellow brown"), value=u("Blue red green purple?"))
- w.add_document(name=u("Alpha beta"), value=u("Gamma delta epsilon omega."))
+ w.add_document(name="Yellow brown", value="Blue red green purple?")
+ w.add_document(name="Alpha beta", value="Gamma delta epsilon omega.")
w.commit()
w = ix.writer()
- w.add_document(name=u("One two"), value=u("Three four five."))
+ w.add_document(name="One two", value="Three four five.")
w.commit()
tr = ix.reader()
@@ -139,12 +139,12 @@ def test_lengths():
)
with TempIndex(s, "testlengths") as ix:
w = ix.writer()
- items = u("ABCDEFG")
+ items = "ABCDEFG"
from itertools import cycle, islice
lengths = [10, 20, 2, 102, 45, 3, 420, 2]
for length in lengths:
- w.add_document(f2=u(" ").join(islice(cycle(items), length)))
+ w.add_document(f2=" ".join(islice(cycle(items), length)))
w.commit()
with ix.reader() as dr:
@@ -155,7 +155,7 @@ def test_lengths():
def test_many_lengths():
- domain = u("alfa bravo charlie delta echo").split()
+ domain = "alfa bravo charlie delta echo".split()
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
@@ -180,9 +180,9 @@ def test_lengths_ram():
st = RamStorage()
ix = st.create_index(s)
w = ix.writer()
- w.add_document(f1=u("A B C D E"), f2=u("X Y Z"))
- w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T"))
- w.add_document(f1=u("D E F"), f2=u("U V A B C D E"))
+ w.add_document(f1="A B C D E", f2="X Y Z")
+ w.add_document(f1="B B B B C D D Q", f2="Q R S T")
+ w.add_document(f1="D E F", f2="U V A B C D E")
w.commit()
dr = ix.reader()
@@ -207,22 +207,22 @@ def test_merged_lengths():
)
with TempIndex(s, "mergedlengths") as ix:
w = ix.writer()
- w.add_document(f1=u("A B C"), f2=u("X"))
- w.add_document(f1=u("B C D E"), f2=u("Y Z"))
+ w.add_document(f1="A B C", f2="X")
+ w.add_document(f1="B C D E", f2="Y Z")
w.commit()
w = ix.writer()
- w.add_document(f1=u("A"), f2=u("B C D E X Y"))
- w.add_document(f1=u("B C"), f2=u("X"))
+ w.add_document(f1="A", f2="B C D E X Y")
+ w.add_document(f1="B C", f2="X")
w.commit(merge=False)
w = ix.writer()
- w.add_document(f1=u("A B X Y Z"), f2=u("B C"))
- w.add_document(f1=u("Y X"), f2=u("A B"))
+ w.add_document(f1="A B X Y Z", f2="B C")
+ w.add_document(f1="Y X", f2="A B")
w.commit(merge=False)
with ix.reader() as dr:
- assert dr.stored_fields(0)["f1"] == u("A B C")
+ assert dr.stored_fields(0)["f1"] == "A B C"
assert dr.doc_field_length(0, "f1") == 3
assert dr.doc_field_length(2, "f2") == 6
assert dr.doc_field_length(4, "f1") == 5
@@ -234,36 +234,36 @@ def test_frequency_keyword():
ix = st.create_index(s)
w = ix.writer()
- w.add_document(content=u("A B C D E"))
- w.add_document(content=u("B B B B C D D"))
- w.add_document(content=u("D E F"))
+ w.add_document(content="A B C D E")
+ w.add_document(content="B B B B C D D")
+ w.add_document(content="D E F")
w.commit()
with ix.reader() as tr:
- assert tr.doc_frequency("content", u("B")) == 2
- assert tr.frequency("content", u("B")) == 5
- assert tr.doc_frequency("content", u("E")) == 2
- assert tr.frequency("content", u("E")) == 2
- assert tr.doc_frequency("content", u("A")) == 1
- assert tr.frequency("content", u("A")) == 1
- assert tr.doc_frequency("content", u("D")) == 3
- assert tr.frequency("content", u("D")) == 4
- assert tr.doc_frequency("content", u("F")) == 1
- assert tr.frequency("content", u("F")) == 1
- assert tr.doc_frequency("content", u("Z")) == 0
- assert tr.frequency("content", u("Z")) == 0
+ assert tr.doc_frequency("content", "B") == 2
+ assert tr.frequency("content", "B") == 5
+ assert tr.doc_frequency("content", "E") == 2
+ assert tr.frequency("content", "E") == 2
+ assert tr.doc_frequency("content", "A") == 1
+ assert tr.frequency("content", "A") == 1
+ assert tr.doc_frequency("content", "D") == 3
+ assert tr.frequency("content", "D") == 4
+ assert tr.doc_frequency("content", "F") == 1
+ assert tr.frequency("content", "F") == 1
+ assert tr.doc_frequency("content", "Z") == 0
+ assert tr.frequency("content", "Z") == 0
stats = [
(fname, text, ti.doc_frequency(), ti.weight()) for (fname, text), ti in tr
]
assert stats == [
- ("content", b("A"), 1, 1),
- ("content", b("B"), 2, 5),
- ("content", b("C"), 2, 2),
- ("content", b("D"), 3, 4),
- ("content", b("E"), 2, 2),
- ("content", b("F"), 1, 1),
+ ("content", b"A", 1, 1),
+ ("content", b"B", 2, 5),
+ ("content", b"C", 2, 2),
+ ("content", b"D", 3, 4),
+ ("content", b"E", 2, 2),
+ ("content", b"F", 1, 1),
]
@@ -273,36 +273,36 @@ def test_frequency_text():
ix = st.create_index(s)
w = ix.writer()
- w.add_document(content=u("alfa bravo charlie delta echo"))
- w.add_document(content=u("bravo bravo bravo bravo charlie delta delta"))
- w.add_document(content=u("delta echo foxtrot"))
+ w.add_document(content="alfa bravo charlie delta echo")
+ w.add_document(content="bravo bravo bravo bravo charlie delta delta")
+ w.add_document(content="delta echo foxtrot")
w.commit()
with ix.reader() as tr:
- assert tr.doc_frequency("content", u("bravo")) == 2
- assert tr.frequency("content", u("bravo")) == 5
- assert tr.doc_frequency("content", u("echo")) == 2
- assert tr.frequency("content", u("echo")) == 2
- assert tr.doc_frequency("content", u("alfa")) == 1
- assert tr.frequency("content", u("alfa")) == 1
- assert tr.doc_frequency("content", u("delta")) == 3
- assert tr.frequency("content", u("delta")) == 4
- assert tr.doc_frequency("content", u("foxtrot")) == 1
- assert tr.frequency("content", u("foxtrot")) == 1
- assert tr.doc_frequency("content", u("zulu")) == 0
- assert tr.frequency("content", u("zulu")) == 0
+ assert tr.doc_frequency("content", "bravo") == 2
+ assert tr.frequency("content", "bravo") == 5
+ assert tr.doc_frequency("content", "echo") == 2
+ assert tr.frequency("content", "echo") == 2
+ assert tr.doc_frequency("content", "alfa") == 1
+ assert tr.frequency("content", "alfa") == 1
+ assert tr.doc_frequency("content", "delta") == 3
+ assert tr.frequency("content", "delta") == 4
+ assert tr.doc_frequency("content", "foxtrot") == 1
+ assert tr.frequency("content", "foxtrot") == 1
+ assert tr.doc_frequency("content", "zulu") == 0
+ assert tr.frequency("content", "zulu") == 0
stats = [
(fname, text, ti.doc_frequency(), ti.weight()) for (fname, text), ti in tr
]
assert stats == [
- ("content", b("alfa"), 1, 1),
- ("content", b("bravo"), 2, 5),
- ("content", b("charlie"), 2, 2),
- ("content", b("delta"), 3, 4),
- ("content", b("echo"), 2, 2),
- ("content", b("foxtrot"), 1, 1),
+ ("content", b"alfa", 1, 1),
+ ("content", b"bravo", 2, 5),
+ ("content", b"charlie", 2, 2),
+ ("content", b"delta", 3, 4),
+ ("content", b"echo", 2, 2),
+ ("content", b"foxtrot", 1, 1),
]
@@ -310,36 +310,28 @@ def test_deletion():
s = fields.Schema(key=fields.ID, name=fields.TEXT, value=fields.TEXT)
with TempIndex(s, "deletion") as ix:
w = ix.writer()
- w.add_document(
- key=u("A"), name=u("Yellow brown"), value=u("Blue red green purple?")
- )
- w.add_document(
- key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega.")
- )
- w.add_document(key=u("C"), name=u("One two"), value=u("Three four five."))
+ w.add_document(key="A", name="Yellow brown", value="Blue red green purple?")
+ w.add_document(key="B", name="Alpha beta", value="Gamma delta epsilon omega.")
+ w.add_document(key="C", name="One two", value="Three four five.")
w.commit()
w = ix.writer()
- assert w.delete_by_term("key", u("B")) == 1
+ assert w.delete_by_term("key", "B") == 1
w.commit(merge=False)
assert ix.doc_count_all() == 3
assert ix.doc_count() == 2
w = ix.writer()
- w.add_document(
- key=u("A"), name=u("Yellow brown"), value=u("Blue red green purple?")
- )
- w.add_document(
- key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega.")
- )
- w.add_document(key=u("C"), name=u("One two"), value=u("Three four five."))
+ w.add_document(key="A", name="Yellow brown", value="Blue red green purple?")
+ w.add_document(key="B", name="Alpha beta", value="Gamma delta epsilon omega.")
+ w.add_document(key="C", name="One two", value="Three four five.")
w.commit()
# This will match both documents with key == B, one of which is already
# deleted. This should not raise an error.
w = ix.writer()
- assert w.delete_by_term("key", u("B")) == 1
+ assert w.delete_by_term("key", "B") == 1
w.commit()
ix.optimize()
@@ -355,14 +347,14 @@ def test_writer_reuse():
ix = RamStorage().create_index(s)
w = ix.writer()
- w.add_document(key=u("A"))
- w.add_document(key=u("B"))
- w.add_document(key=u("C"))
+ w.add_document(key="A")
+ w.add_document(key="B")
+ w.add_document(key="C")
w.commit()
# You can't re-use a commited/canceled writer
- pytest.raises(IndexingError, w.add_document, key=u("D"))
- pytest.raises(IndexingError, w.update_document, key=u("B"))
+ pytest.raises(IndexingError, w.add_document, key="D")
+ pytest.raises(IndexingError, w.update_document, key="B")
pytest.raises(IndexingError, w.delete_document, 0)
pytest.raises(IndexingError, w.add_reader, None)
pytest.raises(IndexingError, w.add_field, "name", fields.ID)
@@ -373,9 +365,9 @@ def test_writer_reuse():
def test_update():
# Test update with multiple unique keys
SAMPLE_DOCS = [
- {"id": u("test1"), "path": u("/test/1"), "text": u("Hello")},
- {"id": u("test2"), "path": u("/test/2"), "text": u("There")},
- {"id": u("test3"), "path": u("/test/3"), "text": u("Reader")},
+ {"id": "test1", "path": "/test/1", "text": "Hello"},
+ {"id": "test2", "path": "/test/2", "text": "There"},
+ {"id": "test3", "path": "/test/3", "text": "Reader"},
]
schema = fields.Schema(
@@ -390,7 +382,7 @@ def test_update():
w.add_document(**doc)
with ix.writer() as w:
- w.update_document(id=u("test2"), path=u("test/1"), text=u("Replacement"))
+ w.update_document(id="test2", path="test/1", text="Replacement")
def test_update2():
@@ -402,7 +394,7 @@ def test_update2():
random.shuffle(nums)
for i, n in enumerate(nums):
w = ix.writer()
- w.update_document(key=text_type(n % 10), p=text_type(i))
+ w.update_document(key=str(n % 10), p=str(i))
w.commit()
with ix.searcher() as s:
@@ -420,7 +412,7 @@ def test_update_numeric():
random.shuffle(nums)
for num in nums:
with ix.writer() as w:
- w.update_document(num=num, text=text_type(num))
+ w.update_document(num=num, text=str(num))
with ix.searcher() as s:
results = [d["text"] for _, d in s.iter_docs()]
@@ -430,13 +422,11 @@ def test_update_numeric():
def test_reindex():
sample_docs = [
- {"id": u("test1"), "text": u("This is a document. Awesome, is it not?")},
- {"id": u("test2"), "text": u("Another document. Astounding!")},
+ {"id": "test1", "text": "This is a document. Awesome, is it not?"},
+ {"id": "test2", "text": "Another document. Astounding!"},
{
- "id": u("test3"),
- "text": u(
- "A fascinating article on the behavior of domestic " "steak knives."
- ),
+ "id": "test3",
+ "text": ("A fascinating article on the behavior of domestic steak knives."),
},
]
@@ -459,18 +449,18 @@ def reindex():
def test_noscorables1():
values = [
- u("alfa"),
- u("bravo"),
- u("charlie"),
- u("delta"),
- u("echo"),
- u("foxtrot"),
- u("golf"),
- u("hotel"),
- u("india"),
- u("juliet"),
- u("kilo"),
- u("lima"),
+ "alfa",
+ "bravo",
+ "charlie",
+ "delta",
+ "echo",
+ "foxtrot",
+ "golf",
+ "hotel",
+ "india",
+ "juliet",
+ "kilo",
+ "lima",
]
from random import choice, randint, sample
@@ -481,7 +471,7 @@ def test_noscorables1():
w = ix.writer()
for _ in range(times):
w.add_document(
- id=choice(values), tags=u(" ").join(sample(values, randint(2, 7)))
+ id=choice(values), tags=" ".join(sample(values, randint(2, 7)))
)
w.commit()
@@ -493,7 +483,7 @@ def test_noscorables2():
schema = fields.Schema(field=fields.ID)
with TempIndex(schema, "noscorables2") as ix:
writer = ix.writer()
- writer.add_document(field=u("foo"))
+ writer.add_document(field="foo")
writer.commit()
@@ -504,42 +494,42 @@ def test_multi():
with TempIndex(schema, "multi") as ix:
writer = ix.writer()
# Deleted 1
- writer.add_document(id=u("1"), content=u("alfa bravo charlie"))
+ writer.add_document(id="1", content="alfa bravo charlie")
# Deleted 1
- writer.add_document(id=u("2"), content=u("bravo charlie delta echo"))
+ writer.add_document(id="2", content="bravo charlie delta echo")
# Deleted 2
- writer.add_document(id=u("3"), content=u("charlie delta echo foxtrot"))
+ writer.add_document(id="3", content="charlie delta echo foxtrot")
writer.commit()
writer = ix.writer()
writer.delete_by_term("id", "1")
writer.delete_by_term("id", "2")
- writer.add_document(id=u("4"), content=u("apple bear cherry donut"))
- writer.add_document(id=u("5"), content=u("bear cherry donut eggs"))
+ writer.add_document(id="4", content="apple bear cherry donut")
+ writer.add_document(id="5", content="bear cherry donut eggs")
# Deleted 2
- writer.add_document(id=u("6"), content=u("delta echo foxtrot golf"))
+ writer.add_document(id="6", content="delta echo foxtrot golf")
# no d
- writer.add_document(id=u("7"), content=u("echo foxtrot golf hotel"))
+ writer.add_document(id="7", content="echo foxtrot golf hotel")
writer.commit(merge=False)
writer = ix.writer()
writer.delete_by_term("id", "3")
writer.delete_by_term("id", "6")
- writer.add_document(id=u("8"), content=u("cherry donut eggs falafel"))
- writer.add_document(id=u("9"), content=u("donut eggs falafel grape"))
- writer.add_document(id=u("A"), content=u(" foxtrot golf hotel india"))
+ writer.add_document(id="8", content="cherry donut eggs falafel")
+ writer.add_document(id="9", content="donut eggs falafel grape")
+ writer.add_document(id="A", content=" foxtrot golf hotel india")
writer.commit(merge=False)
assert ix.doc_count() == 6
with ix.searcher() as s:
- r = s.search(query.Prefix("content", u("d")), optimize=False)
+ r = s.search(query.Prefix("content", "d"), optimize=False)
assert sorted([d["id"] for d in r]) == ["4", "5", "8", "9"]
- r = s.search(query.Prefix("content", u("d")))
+ r = s.search(query.Prefix("content", "d"))
assert sorted([d["id"] for d in r]) == ["4", "5", "8", "9"]
- r = s.search(query.Prefix("content", u("d")), limit=None)
+ r = s.search(query.Prefix("content", "d"), limit=None)
assert sorted([d["id"] for d in r]) == ["4", "5", "8", "9"]
@@ -547,9 +537,9 @@ def test_deleteall():
schema = fields.Schema(text=fields.TEXT)
with TempIndex(schema, "deleteall") as ix:
w = ix.writer()
- domain = u("alfa bravo charlie delta echo").split()
+ domain = "alfa bravo charlie delta echo".split()
for i, ls in enumerate(permutations(domain)):
- w.add_document(text=u(" ").join(ls))
+ w.add_document(text=" ".join(ls))
if not i % 10:
w.commit()
w = ix.writer()
@@ -564,9 +554,7 @@ def test_deleteall():
with ix.searcher() as s:
r = s.search(
- query.Or(
- [query.Term("text", u("alfa")), query.Term("text", u("bravo"))]
- )
+ query.Or([query.Term("text", "alfa"), query.Term("text", "bravo")])
)
assert len(r) == 0
@@ -581,7 +569,7 @@ def test_simple_stored():
schema = fields.Schema(a=fields.ID(stored=True), b=fields.ID(stored=False))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=u("alfa"), b=u("bravo"))
+ w.add_document(a="alfa", b="bravo")
with ix.searcher() as s:
sf = s.stored_fields(0)
assert sf == {"a": "alfa"}
@@ -591,11 +579,11 @@ def test_single():
schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
with TempIndex(schema, "single") as ix:
w = ix.writer()
- w.add_document(id=u("1"), text=u("alfa"))
+ w.add_document(id="1", text="alfa")
w.commit()
with ix.searcher() as s:
- assert ("text", u("alfa")) in s.reader()
+ assert ("text", "alfa") in s.reader()
assert list(s.documents(id="1")) == [{"id": "1"}]
assert list(s.documents(text="alfa")) == [{"id": "1"}]
assert list(s.all_stored_fields()) == [{"id": "1"}]
@@ -607,13 +595,13 @@ def test_indentical_fields():
)
with TempIndex(schema, "identifields") as ix:
w = ix.writer()
- w.add_document(id=1, f1=u("alfa"), f2=u("alfa"), f3=u("alfa"))
+ w.add_document(id=1, f1="alfa", f2="alfa", f3="alfa")
w.commit()
with ix.searcher() as s:
- assert list(s.lexicon("f1")) == [b("alfa")]
- assert list(s.lexicon("f2")) == [b("alfa")]
- assert list(s.lexicon("f3")) == [b("alfa")]
+ assert list(s.lexicon("f1")) == [b"alfa"]
+ assert list(s.lexicon("f2")) == [b"alfa"]
+ assert list(s.lexicon("f3")) == [b"alfa"]
assert list(s.documents(f1="alfa")) == [{"id": 1}]
assert list(s.documents(f2="alfa")) == [{"id": 1}]
assert list(s.documents(f3="alfa")) == [{"id": 1}]
@@ -638,7 +626,7 @@ def test_multivalue():
],
num=[1, 2, 3, 12],
)
- w.add_document(txt=u("a b c").split())
+ w.add_document(txt="a b c".split())
with ix.reader() as r:
assert ("num", 3) in r
@@ -663,8 +651,8 @@ def stem_piglatin(w):
# Fake documents
corpus = [
- (u("eng"), u("Such stuff as dreams are made on")),
- (u("pig"), u("Otay ebay, roay otnay otay ebay")),
+ ("eng", "Such stuff as dreams are made on"),
+ ("pig", "Otay ebay, roay otnay otay ebay"),
]
schema = fields.Schema(
@@ -705,9 +693,9 @@ def test_doc_boost():
schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, a=u("alfa alfa alfa"), b=u("bravo"))
- w.add_document(id=1, a=u("alfa"), b=u("bear"), _a_boost=5.0)
- w.add_document(id=2, a=u("alfa alfa alfa alfa"), _boost=0.5)
+ w.add_document(id=0, a="alfa alfa alfa", b="bravo")
+ w.add_document(id=1, a="alfa", b="bear", _a_boost=5.0)
+ w.add_document(id=2, a="alfa alfa alfa alfa", _boost=0.5)
w.commit()
with ix.searcher() as s:
@@ -715,8 +703,8 @@ def test_doc_boost():
assert [hit["id"] for hit in r] == [1, 0, 2]
w = ix.writer()
- w.add_document(id=3, a=u("alfa"), b=u("bottle"))
- w.add_document(id=4, b=u("bravo"), _b_boost=2.0)
+ w.add_document(id=3, a="alfa", b="bottle")
+ w.add_document(id=4, b="bravo", _b_boost=2.0)
w.commit(merge=False)
with ix.searcher() as s:
@@ -733,16 +721,16 @@ def test_globfield_length_merge():
with TempIndex(schema, "globlenmerge") as ix:
with ix.writer() as w:
w.add_document(
- title=u("First document"),
- path=u("/a"),
- content_text=u("This is the first document we've added!"),
+ title="First document",
+ path="/a",
+ content_text="This is the first document we've added!",
)
with ix.writer() as w:
w.add_document(
- title=u("Second document"),
- path=u("/b"),
- content_text=u("The second document is even more interesting!"),
+ title="Second document",
+ path="/b",
+ content_text="The second document is even more interesting!",
)
with ix.searcher() as s:
@@ -788,14 +776,14 @@ def test_index_decimals():
with ix.writer() as w:
with pytest.raises(TypeError):
- w.add_document(name=u("hello"), num=Decimal("3.2"))
+ w.add_document(name="hello", num=Decimal("3.2"))
schema = fields.Schema(
name=fields.KEYWORD(stored=True), num=fields.NUMERIC(Decimal, decimal_places=5)
)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(name=u("hello"), num=Decimal("3.2"))
+ w.add_document(name="hello", num=Decimal("3.2"))
def test_stored_tuple():
diff --git a/tests/test_matching.py b/tests/test_matching.py
index 295d80df..22a2c25b 100644
--- a/tests/test_matching.py
+++ b/tests/test_matching.py
@@ -1,7 +1,7 @@
+from itertools import permutations
from random import choice, randint, sample
from whoosh import fields, matching, qparser, query
-from whoosh.compat import b, permutations, u
from whoosh.filedb.filestore import RamStorage
from whoosh.query import And, Term
from whoosh.scoring import WeightScorer
@@ -258,41 +258,41 @@ def test_intersection():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(key=u("a"), value=u("alpha bravo charlie delta"))
- w.add_document(key=u("b"), value=u("echo foxtrot alpha bravo"))
- w.add_document(key=u("c"), value=u("charlie delta golf hotel"))
+ w.add_document(key="a", value="alpha bravo charlie delta")
+ w.add_document(key="b", value="echo foxtrot alpha bravo")
+ w.add_document(key="c", value="charlie delta golf hotel")
w.commit()
w = ix.writer()
- w.add_document(key=u("d"), value=u("india alpha bravo charlie"))
- w.add_document(key=u("e"), value=u("delta bravo india bravo"))
+ w.add_document(key="d", value="india alpha bravo charlie")
+ w.add_document(key="e", value="delta bravo india bravo")
w.commit()
with ix.searcher() as s:
- q = And([Term("value", u("bravo")), Term("value", u("delta"))])
+ q = And([Term("value", "bravo"), Term("value", "delta")])
m = q.matcher(s)
assert _keys(s, m.all_ids()) == ["a", "e"]
- q = And([Term("value", u("bravo")), Term("value", u("alpha"))])
+ q = And([Term("value", "bravo"), Term("value", "alpha")])
m = q.matcher(s)
assert _keys(s, m.all_ids()) == ["a", "b", "d"]
def test_random_intersections():
domain = [
- u("alpha"),
- u("bravo"),
- u("charlie"),
- u("delta"),
- u("echo"),
- u("foxtrot"),
- u("golf"),
- u("hotel"),
- u("india"),
- u("juliet"),
- u("kilo"),
- u("lima"),
- u("mike"),
+ "alpha",
+ "bravo",
+ "charlie",
+ "delta",
+ "echo",
+ "foxtrot",
+ "golf",
+ "hotel",
+ "india",
+ "juliet",
+ "kilo",
+ "lima",
+ "mike",
]
segments = 5
docsperseg = 50
@@ -311,7 +311,7 @@ def test_random_intersections():
for j in range(docsperseg):
docnum = i * docsperseg + j
# Create a string of random words
- doc = u(" ").join(choice(domain) for _ in range(randint(*fieldlimits)))
+ doc = " ".join(choice(domain) for _ in range(randint(*fieldlimits)))
# Add the string to the index
w.add_document(key=docnum, value=doc)
# Add a (docnum, string) tuple to the documents list
@@ -455,7 +455,7 @@ def test_random_andnot():
def test_current_terms():
- domain = u("alfa bravo charlie delta").split()
+ domain = "alfa bravo charlie delta".split()
schema = fields.Schema(text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
@@ -469,8 +469,8 @@ def test_current_terms():
while m.is_active():
assert sorted(m.matching_terms()) == [
- ("text", b("alfa")),
- ("text", b("charlie")),
+ ("text", b"alfa"),
+ ("text", b"charlie"),
]
m.next()
@@ -482,15 +482,15 @@ def test_dismax():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=u("1"), title="alfa", body="bravo")
- w.add_document(id=u("1"), title="charlie", body="bravo")
- w.add_document(id=u("1"), title="alfa", body="alfa")
+ w.add_document(id="1", title="alfa", body="bravo")
+ w.add_document(id="1", title="charlie", body="bravo")
+ w.add_document(id="1", title="alfa", body="alfa")
with ix.searcher() as s:
qp = qparser.MultifieldParser(["title", "body"], schema)
dp = qparser.DisMaxParser({"body": 1.0, "title": 2.5}, None)
- query_text = u("alfa OR bravo")
+ query_text = "alfa OR bravo"
qqp = qp.parse(query_text)
qdp = dp.parse(query_text)
rq = s.search(qqp, limit=1)
@@ -510,7 +510,7 @@ def test_exclusion():
# Make 39 documents with dates != dt1 and then make a last document
# with feed == dt1.
for i in range(40):
- w.add_document(id=u(str(i)), date=(dt2 if i >= 1 else dt1))
+ w.add_document(id=str(i), date=(dt2 if i >= 1 else dt1))
with ix.searcher() as s:
qp = qparser.QueryParser("id", schema)
diff --git a/tests/test_misc.py b/tests/test_misc.py
index 5a4f2224..d24ae793 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -2,7 +2,6 @@
import threading
import time
-from whoosh.compat import u
from whoosh.util.filelock import try_for
from whoosh.util.numeric import byte_to_length, length_to_byte
from whoosh.util.testing import TempStorage
@@ -34,8 +33,8 @@ def test_storage_creation():
ix = st.create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo"))
- w.add_document(text=u("bracho charlie"))
+ w.add_document(text="alfa bravo")
+ w.add_document(text="bracho charlie")
st.destroy()
assert not os.path.exists(dirpath)
diff --git a/tests/test_mpwriter.py b/tests/test_mpwriter.py
index 0703a849..ab2d4a02 100644
--- a/tests/test_mpwriter.py
+++ b/tests/test_mpwriter.py
@@ -1,9 +1,9 @@
import random
from collections import deque
+from itertools import permutations
import pytest
from whoosh import fields, query
-from whoosh.compat import izip, permutations, text_type, u
from whoosh.util.numeric import byte_to_length, length_to_byte
from whoosh.util.testing import TempIndex
@@ -38,7 +38,7 @@ def _do_basic(writerclass):
docs = []
# A ring buffer for creating string values
buf = deque()
- for ls in permutations(u("abcd")):
+ for ls in permutations("abcd"):
word = "".join(ls)
# Remember this word is in the index (to check lexicon)
words.append(word)
@@ -71,7 +71,7 @@ def _do_basic(writerclass):
r = s.reader()
# Check the lexicon
- for word, term in izip(words, r.field_terms("text")):
+ for word, term in zip(words, r.field_terms("text")):
assert word == term
# Check the doc count
assert r.doc_count_all() == len(docs)
@@ -151,23 +151,23 @@ def _do_merge(writerclass):
with TempIndex(schema) as ix:
w = ix.writer()
for key in "abc":
- w.add_document(key=u(key), value=u(domain[key]))
+ w.add_document(key=key, value=domain[key])
w.commit()
w = ix.writer()
for key in "def":
- w.add_document(key=u(key), value=u(domain[key]))
+ w.add_document(key=key, value=domain[key])
w.commit(merge=False)
w = writerclass(ix, procs=3)
del domain["b"]
- w.delete_by_term("key", u("b"))
+ w.delete_by_term("key", "b")
domain["e"] = "xx yy zz"
- w.update_document(key=u("e"), value=u(domain["e"]))
+ w.update_document(key="e", value=domain["e"])
for key in "ghijk":
- w.add_document(key=u(key), value=u(domain[key]))
+ w.add_document(key=key, value=domain[key])
w.commit(optimize=True)
assert len(ix._segments()) == 1
@@ -223,9 +223,9 @@ def test_no_score_no_store():
schema = fields.Schema(a=fields.ID, b=fields.KEYWORD)
domain = {}
- keys = list(u("abcdefghijklmnopqrstuvwx"))
+ keys = list("abcdefghijklmnopqrstuvwx")
random.shuffle(keys)
- words = u("alfa bravo charlie delta").split()
+ words = "alfa bravo charlie delta".split()
for i, key in enumerate(keys):
domain[key] = words[i % len(words)]
@@ -245,14 +245,14 @@ def test_multisegment():
from whoosh.multiproc import MpWriter
schema = fields.Schema(a=fields.TEXT(stored=True, spelling=True, vector=True))
- words = u("alfa bravo charlie delta echo").split()
+ words = "alfa bravo charlie delta echo".split()
with TempIndex(schema) as ix:
with ix.writer(procs=3, multisegment=True, batchsize=10) as w:
assert w.__class__ == MpWriter
assert w.multisegment
for ls in permutations(words, 3):
- w.add_document(a=u(" ").join(ls))
+ w.add_document(a=" ".join(ls))
assert len(ix._segments()) == 3
@@ -269,7 +269,7 @@ def test_batchsize_eq_doccount():
with TempIndex(schema) as ix:
with ix.writer(procs=4, batchsize=10) as w:
for i in range(10):
- w.add_document(a=u(str(i)))
+ w.add_document(a=str(i))
def test_finish_segment():
@@ -282,6 +282,6 @@ def test_finish_segment():
w = MpWriter(ix, procs=2, batchsize=1, multisegment=False, limitmb=0.00001)
for i in range(100):
- w.add_document(a=text_type(i) * 10)
+ w.add_document(a=str(i) * 10)
w.commit()
diff --git a/tests/test_nested.py b/tests/test_nested.py
index 41dc704c..046b22ff 100644
--- a/tests/test_nested.py
+++ b/tests/test_nested.py
@@ -1,5 +1,4 @@
from whoosh import fields, query, sorting
-from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
@@ -14,21 +13,21 @@ def test_nested_parent():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(name=u("iPad"), type=u("product"))
- w.add_document(part=u("screen"), price=100)
- w.add_document(part=u("battery"), price=50)
- w.add_document(part=u("case"), price=20)
+ w.add_document(name="iPad", type="product")
+ w.add_document(part="screen", price=100)
+ w.add_document(part="battery", price=50)
+ w.add_document(part="case", price=20)
with w.group():
- w.add_document(name=u("iPhone"), type=u("product"))
- w.add_document(part=u("screen"), price=60)
- w.add_document(part=u("battery"), price=30)
- w.add_document(part=u("case"), price=10)
+ w.add_document(name="iPhone", type="product")
+ w.add_document(part="screen", price=60)
+ w.add_document(part="battery", price=30)
+ w.add_document(part="case", price=10)
with w.group():
- w.add_document(name=u("Mac mini"), type=u("product"))
- w.add_document(part=u("hard drive"), price=50)
- w.add_document(part=u("case"), price=50)
+ w.add_document(name="Mac mini", type="product")
+ w.add_document(part="hard drive", price=50)
+ w.add_document(part="case", price=50)
with ix.searcher() as s:
price = s.schema["price"]
@@ -48,22 +47,21 @@ def test_scoring():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(kind=u("class"), name=u("Index"))
- w.add_document(kind=u("method"), name=u("add document"))
- w.add_document(kind=u("method"), name=u("add reader"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Index")
+ w.add_document(kind="method", name="add document")
+ w.add_document(kind="method", name="add reader")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Accumulator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("get result"))
+ w.add_document(kind="class", name="Accumulator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="get result")
with w.group():
- w.add_document(kind=u("class"), name=u("Calculator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("add all"))
- w.add_document(kind=u("method"), name=u("add some"))
- w.add_document(kind=u("method"), name=u("multiply"))
- w.add_document(kind=u("method"), name=u("close"))
-
+ w.add_document(kind="class", name="Calculator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="add all")
+ w.add_document(kind="method", name="add some")
+ w.add_document(kind="method", name="multiply")
+ w.add_document(kind="method", name="close")
with ix.searcher() as s:
q = query.NestedParent(query.Term("kind", "class"), query.Term("name", "add"))
r = s.search(q)
@@ -77,25 +75,25 @@ def test_missing():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(kind=u("class"), name=u("Index"))
- w.add_document(kind=u("method"), name=u("add document"))
- w.add_document(kind=u("method"), name=u("add reader"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Index")
+ w.add_document(kind="method", name="add document")
+ w.add_document(kind="method", name="add reader")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Accumulator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("get result"))
+ w.add_document(kind="class", name="Accumulator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="get result")
with w.group():
- w.add_document(kind=u("class"), name=u("Calculator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("add all"))
- w.add_document(kind=u("method"), name=u("add some"))
- w.add_document(kind=u("method"), name=u("multiply"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Calculator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="add all")
+ w.add_document(kind="method", name="add some")
+ w.add_document(kind="method", name="multiply")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Deleter"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("delete"))
+ w.add_document(kind="class", name="Deleter")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="delete")
with ix.searcher() as s:
q = query.NestedParent(query.Term("kind", "class"), query.Term("name", "add"))
@@ -127,25 +125,25 @@ def test_nested_delete():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(kind=u("class"), name=u("Index"))
- w.add_document(kind=u("method"), name=u("add document"))
- w.add_document(kind=u("method"), name=u("add reader"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Index")
+ w.add_document(kind="method", name="add document")
+ w.add_document(kind="method", name="add reader")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Accumulator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("get result"))
+ w.add_document(kind="class", name="Accumulator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="get result")
with w.group():
- w.add_document(kind=u("class"), name=u("Calculator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("add all"))
- w.add_document(kind=u("method"), name=u("add some"))
- w.add_document(kind=u("method"), name=u("multiply"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Calculator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="add all")
+ w.add_document(kind="method", name="add some")
+ w.add_document(kind="method", name="multiply")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Deleter"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("delete"))
+ w.add_document(kind="class", name="Deleter")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="delete")
# Delete "Accumulator" class
with ix.writer() as w:
@@ -194,25 +192,25 @@ def test_all_parents_deleted():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(kind=u("class"), name=u("Index"))
- w.add_document(kind=u("method"), name=u("add document"))
- w.add_document(kind=u("method"), name=u("add reader"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Index")
+ w.add_document(kind="method", name="add document")
+ w.add_document(kind="method", name="add reader")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Accumulator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("get result"))
+ w.add_document(kind="class", name="Accumulator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="get result")
with w.group():
- w.add_document(kind=u("class"), name=u("Calculator"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("add all"))
- w.add_document(kind=u("method"), name=u("add some"))
- w.add_document(kind=u("method"), name=u("multiply"))
- w.add_document(kind=u("method"), name=u("close"))
+ w.add_document(kind="class", name="Calculator")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="add all")
+ w.add_document(kind="method", name="add some")
+ w.add_document(kind="method", name="multiply")
+ w.add_document(kind="method", name="close")
with w.group():
- w.add_document(kind=u("class"), name=u("Deleter"))
- w.add_document(kind=u("method"), name=u("add"))
- w.add_document(kind=u("method"), name=u("delete"))
+ w.add_document(kind="class", name="Deleter")
+ w.add_document(kind="method", name="add")
+ w.add_document(kind="method", name="delete")
with ix.writer() as w:
w.delete_by_term("name", "Index")
@@ -230,21 +228,21 @@ def test_everything_is_a_parent():
schema = fields.Schema(
id=fields.STORED, kind=fields.ID, name=fields.ID(stored=True)
)
- k = u("alfa")
+ k = "alfa"
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, kind=k, name=u("one"))
- w.add_document(id=1, kind=k, name=u("two"))
- w.add_document(id=2, kind=k, name=u("three"))
- w.add_document(id=3, kind=k, name=u("four"))
- w.add_document(id=4, kind=k, name=u("one"))
- w.add_document(id=5, kind=k, name=u("two"))
- w.add_document(id=6, kind=k, name=u("three"))
- w.add_document(id=7, kind=k, name=u("four"))
- w.add_document(id=8, kind=k, name=u("one"))
- w.add_document(id=9, kind=k, name=u("two"))
- w.add_document(id=10, kind=k, name=u("three"))
- w.add_document(id=11, kind=k, name=u("four"))
+ w.add_document(id=0, kind=k, name="one")
+ w.add_document(id=1, kind=k, name="two")
+ w.add_document(id=2, kind=k, name="three")
+ w.add_document(id=3, kind=k, name="four")
+ w.add_document(id=4, kind=k, name="one")
+ w.add_document(id=5, kind=k, name="two")
+ w.add_document(id=6, kind=k, name="three")
+ w.add_document(id=7, kind=k, name="four")
+ w.add_document(id=8, kind=k, name="one")
+ w.add_document(id=9, kind=k, name="two")
+ w.add_document(id=10, kind=k, name="three")
+ w.add_document(id=11, kind=k, name="four")
with ix.searcher() as s:
pq = query.Term("kind", k)
@@ -258,21 +256,21 @@ def test_no_parents():
schema = fields.Schema(
id=fields.STORED, kind=fields.ID, name=fields.ID(stored=True)
)
- k = u("alfa")
+ k = "alfa"
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, kind=k, name=u("one"))
- w.add_document(id=1, kind=k, name=u("two"))
- w.add_document(id=2, kind=k, name=u("three"))
- w.add_document(id=3, kind=k, name=u("four"))
- w.add_document(id=4, kind=k, name=u("one"))
- w.add_document(id=5, kind=k, name=u("two"))
- w.add_document(id=6, kind=k, name=u("three"))
- w.add_document(id=7, kind=k, name=u("four"))
- w.add_document(id=8, kind=k, name=u("one"))
- w.add_document(id=9, kind=k, name=u("two"))
- w.add_document(id=10, kind=k, name=u("three"))
- w.add_document(id=11, kind=k, name=u("four"))
+ w.add_document(id=0, kind=k, name="one")
+ w.add_document(id=1, kind=k, name="two")
+ w.add_document(id=2, kind=k, name="three")
+ w.add_document(id=3, kind=k, name="four")
+ w.add_document(id=4, kind=k, name="one")
+ w.add_document(id=5, kind=k, name="two")
+ w.add_document(id=6, kind=k, name="three")
+ w.add_document(id=7, kind=k, name="four")
+ w.add_document(id=8, kind=k, name="one")
+ w.add_document(id=9, kind=k, name="two")
+ w.add_document(id=10, kind=k, name="three")
+ w.add_document(id=11, kind=k, name="four")
with ix.searcher() as s:
pq = query.Term("kind", "bravo")
@@ -331,20 +329,20 @@ def test_nested_children():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
with w.group():
- w.add_document(t=u("album"), album_name=u("alfa bravo charlie"))
- w.add_document(t=u("track"), track=1, song_name=u("delta echo foxtrot"))
- w.add_document(t=u("track"), track=2, song_name=u("golf hotel india"))
- w.add_document(t=u("track"), track=3, song_name=u("juliet kilo lima"))
+ w.add_document(t="album", album_name="alfa bravo charlie")
+ w.add_document(t="track", track=1, song_name="delta echo foxtrot")
+ w.add_document(t="track", track=2, song_name="golf hotel india")
+ w.add_document(t="track", track=3, song_name="juliet kilo lima")
with w.group():
- w.add_document(t=u("album"), album_name=u("mike november oskar"))
- w.add_document(t=u("track"), track=1, song_name=u("papa quebec romeo"))
- w.add_document(t=u("track"), track=2, song_name=u("sierra tango ultra"))
- w.add_document(t=u("track"), track=3, song_name=u("victor whiskey xray"))
+ w.add_document(t="album", album_name="mike november oskar")
+ w.add_document(t="track", track=1, song_name="papa quebec romeo")
+ w.add_document(t="track", track=2, song_name="sierra tango ultra")
+ w.add_document(t="track", track=3, song_name="victor whiskey xray")
with w.group():
- w.add_document(t=u("album"), album_name=u("yankee zulu one"))
- w.add_document(t=u("track"), track=1, song_name=u("two three four"))
- w.add_document(t=u("track"), track=2, song_name=u("five six seven"))
- w.add_document(t=u("track"), track=3, song_name=u("eight nine ten"))
+ w.add_document(t="album", album_name="yankee zulu one")
+ w.add_document(t="track", track=1, song_name="two three four")
+ w.add_document(t="track", track=2, song_name="five six seven")
+ w.add_document(t="track", track=3, song_name="eight nine ten")
with ix.searcher() as s:
pq = query.Term("t", "album")
diff --git a/tests/test_parse_plugins.py b/tests/test_parse_plugins.py
index 4e391fa6..d5b398c5 100644
--- a/tests/test_parse_plugins.py
+++ b/tests/test_parse_plugins.py
@@ -2,12 +2,15 @@
from datetime import datetime, timezone
from whoosh import analysis, fields, formats, qparser, query
-from whoosh.compat import text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.qparser import dateparse, default, plugins, syntax
from whoosh.util.times import adatetime
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def _plugin_classes(ignore):
# Get all the subclasses of Plugin in whoosh.qparser.plugins
return [
@@ -55,7 +58,7 @@ def test_field_alias():
qp = qparser.QueryParser("content", None)
qp.add_plugin(plugins.FieldAliasPlugin({"title": ("article", "caption")}))
q = qp.parse("alfa title:bravo article:charlie caption:delta")
- assert text_type(q) == u(
+ assert str(q) == (
"(content:alfa AND title:bravo AND title:charlie AND title:delta)"
)
@@ -72,42 +75,42 @@ def cb(arg):
basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc)
qp.add_plugin(dateparse.DateParserPlugin(basedate, callback=cb))
- q = qp.parse(u("hello date:'last tuesday'"))
+ q = qp.parse("hello date:'last tuesday'")
assert q.__class__ == query.And
assert q[1].__class__ == query.DateRange
assert q[1].startdate == adatetime(2010, 9, 14).floor()
assert q[1].enddate == adatetime(2010, 9, 14).ceil()
- q = qp.parse(u("date:'3am to 5pm'"))
+ q = qp.parse("date:'3am to 5pm'")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 9, 20, 3).floor()
assert q.enddate == adatetime(2010, 9, 20, 17).ceil()
- q = qp.parse(u("date:blah"))
+ q = qp.parse("date:blah")
assert q == query.NullQuery
assert errs[0] == "blah"
- q = qp.parse(u("hello date:blarg"))
- assert q.__unicode__() == "(text:hello AND <_NullQuery>)"
+ q = qp.parse("hello date:blarg")
+ assert str(q) == "(text:hello AND <_NullQuery>)"
assert q[1].error == "blarg"
assert errs[1] == "blarg"
- q = qp.parse(u("hello date:20055x10"))
- assert q.__unicode__() == "(text:hello AND <_NullQuery>)"
+ q = qp.parse("hello date:20055x10")
+ assert str(q) == "(text:hello AND <_NullQuery>)"
assert q[1].error == "20055x10"
assert errs[2] == "20055x10"
- q = qp.parse(u("hello date:'2005 19 32'"))
- assert q.__unicode__() == "(text:hello AND <_NullQuery>)"
+ q = qp.parse("hello date:'2005 19 32'")
+ assert str(q) == "(text:hello AND <_NullQuery>)"
assert q[1].error == "2005 19 32"
assert errs[3] == "2005 19 32"
- q = qp.parse(u("date:'march 24 to dec 12'"))
+ q = qp.parse("date:'march 24 to dec 12'")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 24).floor()
assert q.enddate == adatetime(2010, 12, 12).ceil()
- q = qp.parse(u("date:('30 june' OR '10 july') quick"))
+ q = qp.parse("date:('30 june' OR '10 july') quick")
assert q.__class__ == query.And
assert len(q) == 2
assert q[0].__class__ == query.Or
@@ -121,32 +124,32 @@ def test_date_range():
basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc)
qp.add_plugin(dateparse.DateParserPlugin(basedate))
- q = qp.parse(u("date:['30 march' to 'next wednesday']"))
+ q = qp.parse("date:['30 march' to 'next wednesday']")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 30).floor()
assert q.enddate == adatetime(2010, 9, 22).ceil()
- q = qp.parse(u("date:[to 'next wednesday']"))
+ q = qp.parse("date:[to 'next wednesday']")
assert q.__class__ == query.DateRange
assert q.startdate is None
assert q.enddate == adatetime(2010, 9, 22).ceil()
- q = qp.parse(u("date:['30 march' to]"))
+ q = qp.parse("date:['30 march' to]")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 30).floor()
assert q.enddate is None
- q = qp.parse(u("date:[30 march to next wednesday]"))
+ q = qp.parse("date:[30 march to next wednesday]")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 30).floor()
assert q.enddate == adatetime(2010, 9, 22).ceil()
- q = qp.parse(u("date:[to next wednesday]"))
+ q = qp.parse("date:[to next wednesday]")
assert q.__class__ == query.DateRange
assert q.startdate is None
assert q.enddate == adatetime(2010, 9, 22).ceil()
- q = qp.parse(u("date:[30 march to]"))
+ q = qp.parse("date:[30 march to]")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 30).floor()
assert q.enddate is None
@@ -193,7 +196,7 @@ def test_free_dates():
basedate = datetime(2010, 9, 20, 15, 16, 6, 454000, tzinfo=timezone.utc)
qp.add_plugin(dateparse.DateParserPlugin(basedate, free=True))
- q = qp.parse(u("hello date:last tuesday"))
+ q = qp.parse("hello date:last tuesday")
assert q.__class__ == query.And
assert len(q) == 2
assert q[0].__class__ == query.Term
@@ -202,7 +205,7 @@ def test_free_dates():
assert q[1].startdate == adatetime(2010, 9, 14).floor()
assert q[1].enddate == adatetime(2010, 9, 14).ceil()
- q = qp.parse(u("date:mar 29 1972 hello"))
+ q = qp.parse("date:mar 29 1972 hello")
assert q.__class__ == query.And
assert len(q) == 2
assert q[0].__class__ == query.DateRange
@@ -211,12 +214,12 @@ def test_free_dates():
assert q[1].__class__ == query.Term
assert q[1].text == "hello"
- q = qp.parse(u("date:2005 march 2"))
+ q = qp.parse("date:2005 march 2")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2005, 3, 2).floor()
assert q.enddate == adatetime(2005, 3, 2).ceil()
- q = qp.parse(u("date:'2005' march 2"))
+ q = qp.parse("date:'2005' march 2")
assert q.__class__ == query.And
assert len(q) == 3
assert q[0].__class__ == query.DateRange
@@ -226,17 +229,17 @@ def test_free_dates():
assert q[1].fieldname == "text"
assert q[1].text == "march"
- q = qp.parse(u("date:march 24 to dec 12"))
+ q = qp.parse("date:march 24 to dec 12")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 3, 24).floor()
assert q.enddate == adatetime(2010, 12, 12).ceil()
- q = qp.parse(u("date:5:10pm"))
+ q = qp.parse("date:5:10pm")
assert q.__class__ == query.DateRange
assert q.startdate == adatetime(2010, 9, 20, 17, 10).floor()
assert q.enddate == adatetime(2010, 9, 20, 17, 10).ceil()
- q = qp.parse(u("(date:30 june OR date:10 july) quick"))
+ q = qp.parse("(date:30 june OR date:10 july) quick")
assert q.__class__ == query.And
assert len(q) == 2
assert q[0].__class__ == query.Or
@@ -249,9 +252,9 @@ def test_prefix_plugin():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), text=u("alfa"))
- w.add_document(id=u("2"), text=u("bravo"))
- w.add_document(id=u("3"), text=u("buono"))
+ w.add_document(id="1", text="alfa")
+ w.add_document(id="2", text="bravo")
+ w.add_document(id="3", text="buono")
w.commit()
with ix.searcher() as s:
@@ -259,11 +262,11 @@ def test_prefix_plugin():
qp.remove_plugin_class(plugins.WildcardPlugin)
qp.add_plugin(plugins.PrefixPlugin)
- q = qp.parse(u("b*"))
+ q = qp.parse("b*")
r = s.search(q, limit=None)
assert len(r) == 2
- q = qp.parse(u("br*"))
+ q = qp.parse("br*")
r = s.search(q, limit=None)
assert len(r) == 1
@@ -299,37 +302,34 @@ def test_custom_tokens():
def test_copyfield():
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, None))
- assert text_type(qp.parse("hello b:matt")) == "(a:hello AND b:matt AND c:matt)"
+ assert str(qp.parse("hello b:matt")) == "(a:hello AND b:matt AND c:matt)"
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.AndMaybeGroup))
- assert (
- text_type(qp.parse("hello b:matt")) == "(a:hello AND (b:matt ANDMAYBE c:matt))"
- )
+ assert str(qp.parse("hello b:matt")) == "(a:hello AND (b:matt ANDMAYBE c:matt))"
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.RequireGroup))
assert (
- text_type(qp.parse("hello (there OR b:matt)"))
+ str(qp.parse("hello (there OR b:matt)"))
== "(a:hello AND (a:there OR (b:matt REQUIRE c:matt)))"
)
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"a": "c"}, syntax.OrGroup))
assert (
- text_type(qp.parse("hello there"))
+ str(qp.parse("hello there"))
== "((a:hello OR c:hello) AND (a:there OR c:there))"
)
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, mirror=True))
- assert text_type(qp.parse("hello c:matt")) == "(a:hello AND (c:matt OR b:matt))"
+ assert str(qp.parse("hello c:matt")) == "(a:hello AND (c:matt OR b:matt))"
qp = qparser.QueryParser("a", None)
qp.add_plugin(plugins.CopyFieldPlugin({"c": "a"}, mirror=True))
assert (
- text_type(qp.parse("hello c:matt"))
- == "((a:hello OR c:hello) AND (c:matt OR a:matt))"
+ str(qp.parse("hello c:matt")) == "((a:hello OR c:hello) AND (c:matt OR a:matt))"
)
ana = analysis.RegexAnalyzer(r"\w+") | analysis.DoubleMetaphoneFilter()
@@ -342,7 +342,7 @@ def test_copyfield():
"((name:spruce OR name_phone:SPRS) "
"AND (name:view OR name_phone:F OR name_phone:FF))"
)
- assert text_type(qp.parse(u("spruce view"))) == target
+ assert str(qp.parse("spruce view")) == target
def test_gtlt():
@@ -357,7 +357,7 @@ def test_gtlt():
qp.add_plugin(plugins.GtLtPlugin())
qp.add_plugin(dateparse.DateParserPlugin())
- q = qp.parse(u("a:hello b:>100 c:<=z there"))
+ q = qp.parse("a:hello b:>100 c:<=z there")
assert q.__class__ == query.And
assert len(q) == 4
assert q[0] == query.Term("a", "hello")
@@ -365,7 +365,7 @@ def test_gtlt():
assert q[2] == query.TermRange("c", None, "z")
assert q[3] == query.Term("a", "there")
- q = qp.parse(u("hello e:>'29 mar 2001' there"))
+ q = qp.parse("hello e:>'29 mar 2001' there")
assert q.__class__ == query.And
assert len(q) == 3
assert q[0] == query.Term("a", "hello")
@@ -375,13 +375,13 @@ def test_gtlt():
)
assert q[2] == query.Term("a", "there")
- q = qp.parse(u("a:> alfa c:<= bravo"))
- assert text_type(q) == "(a:a: AND a:alfa AND a:c: AND a:bravo)"
+ q = qp.parse("a:> alfa c:<= bravo")
+ assert str(q) == "(a:a: AND a:alfa AND a:c: AND a:bravo)"
qp.remove_plugin_class(plugins.FieldsPlugin)
qp.remove_plugin_class(plugins.RangePlugin)
- q = qp.parse(u("hello a:>500 there"))
- assert text_type(q) == "(a:hello AND a:a: AND a:500 AND a:there)"
+ q = qp.parse("hello a:>500 there")
+ assert str(q) == "(a:hello AND a:a: AND a:500 AND a:there)"
def test_regex():
@@ -389,11 +389,11 @@ def test_regex():
qp = qparser.QueryParser("a", schema)
qp.add_plugin(plugins.RegexPlugin())
- q = qp.parse(u("a:foo-bar b:foo-bar"))
- assert q.__unicode__() == "(a:foo-bar AND b:foo AND b:bar)"
+ q = qp.parse("a:foo-bar b:foo-bar")
+ assert str(q) == "(a:foo-bar AND b:foo AND b:bar)"
- q = qp.parse(u('a:r"foo-bar" b:r"foo-bar"'))
- assert q.__unicode__() == '(a:r"foo-bar" AND b:r"foo-bar")'
+ q = qp.parse('a:r"foo-bar" b:r"foo-bar"')
+ assert str(q) == '(a:r"foo-bar" AND b:r"foo-bar")'
def test_pseudofield():
@@ -407,8 +407,8 @@ def regex_maker(node):
qp = qparser.QueryParser("a", schema)
qp.add_plugin(qparser.PseudoFieldPlugin({"regex": regex_maker}))
- q = qp.parse(u("alfa regex:br.vo"))
- assert q.__unicode__() == '(a:alfa AND content:r"br.vo")'
+ q = qp.parse("alfa regex:br.vo")
+ assert str(q) == '(a:alfa AND content:r"br.vo")'
def rev_text(node):
if node.has_text:
@@ -429,8 +429,8 @@ def rev_text(node):
qp = qparser.QueryParser("content", schema)
qp.add_plugin(qparser.PseudoFieldPlugin({"reverse": rev_text}))
- q = qp.parse(u("alfa reverse:bravo"))
- assert q.__unicode__() == "(content:alfa AND (reverse:bravo OR reverse:ovarb))"
+ q = qp.parse("alfa reverse:bravo")
+ assert str(q) == "(content:alfa AND (reverse:bravo OR reverse:ovarb))"
def test_fuzzy_plugin():
@@ -493,23 +493,21 @@ def test_fuzzy_prefix():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
# Match -> first
- w.add_document(
- title=u("First"), content=u("This is the first document we've added!")
- )
+ w.add_document(title="First", content="This is the first document we've added!")
# No match
w.add_document(
- title=u("Second"),
- content=u("The second one is even more interesting! filst"),
+ title="Second",
+ content="The second one is even more interesting! filst",
)
# Match -> first
- w.add_document(title=u("Third"), content=u("The world first line we've added!"))
+ w.add_document(title="Third", content="The world first line we've added!")
# Match -> zeroth
w.add_document(
- title=u("Fourth"),
- content=u("The second one is alaways comes after zeroth!"),
+ title="Fourth",
+ content="The second one is alaways comes after zeroth!",
)
# Match -> fire is within 2 edits (transpose + delete) of first
- w.add_document(title=u("Fifth"), content=u("The fire is beautiful"))
+ w.add_document(title="Fifth", content="The fire is beautiful")
from whoosh.qparser import FuzzyTermPlugin, QueryParser
@@ -543,13 +541,11 @@ def __init__(self, children, *args, **kwargs):
def __hash__(self):
return hash(tuple(self.children)) ^ hash(self.args)
- def __unicode__(self):
+ def __str__(self):
qs = "|".join(str(q) for q in self.children)
args = ",".join(self.args)
kwargs = ",".join(sorted("%s:%s" % item for item in self.kwargs.items()))
- return u("<%s %s %s>") % (qs, args, kwargs)
-
- __str__ = __unicode__
+ return f"<{qs} {args} {kwargs}>"
def fuzzy(qs, prefix=0, maxdist=2):
prefix = int(prefix)
@@ -624,22 +620,22 @@ def test_sequence_plugin():
qp.add_plugin(plugins.FuzzyTermPlugin())
qp.add_plugin(plugins.SequencePlugin())
- q = qp.parse(u('alfa "bravo charlie~2 (delta OR echo)" foxtrot'))
+ q = qp.parse('alfa "bravo charlie~2 (delta OR echo)" foxtrot')
assert (
- q.__unicode__()
+ str(q)
== "(f:alfa AND (f:bravo NEAR f:charlie~2 NEAR (f:delta OR f:echo)) AND f:foxtrot)"
)
assert q[1].__class__ == query.Sequence
- q = qp.parse(u('alfa "bravo charlie~2 d?lt*'))
+ q = qp.parse('alfa "bravo charlie~2 d?lt*')
assert q[0].text == "alfa"
assert q[1].text == "bravo"
assert q[2].__class__ == query.FuzzyTerm
assert q[3].__class__ == query.Wildcard
- q = qp.parse(u('alfa "bravo charlie~2" d?lt* "[a TO z] [0 TO 9]" echo'))
+ q = qp.parse('alfa "bravo charlie~2" d?lt* "[a TO z] [0 TO 9]" echo')
assert (
- q.__unicode__()
+ str(q)
== "(f:alfa AND (f:bravo NEAR f:charlie~2) AND f:d?lt* AND (f:[a TO z] NEAR f:[0 TO 9]) AND f:echo)"
)
assert q[0].text == "alfa"
@@ -650,7 +646,7 @@ def test_sequence_plugin():
assert q[3][1].__class__ == query.TermRange
assert q[4].text == "echo"
- q = qp.parse(u('alfa "bravo charlie~3"~2 delta'))
+ q = qp.parse('alfa "bravo charlie~3"~2 delta')
assert q[1].__class__ == query.Sequence
assert q[1].slop == 2
assert q[1][1].__class__ == query.FuzzyTerm
@@ -662,12 +658,10 @@ def test_sequence_andmaybe():
qp.remove_plugin_class(plugins.PhrasePlugin)
qp.add_plugins([plugins.FuzzyTermPlugin(), plugins.SequencePlugin()])
- q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"'))
+ q = qp.parse('Dahmen ANDMAYBE "Besov Spaces"')
assert isinstance(q, query.AndMaybe)
- assert q[0] == query.Term("f", u("Dahmen"))
- assert q[1] == query.Sequence(
- [query.Term("f", u("Besov")), query.Term("f", u("Spaces"))]
- )
+ assert q[0] == query.Term("f", "Dahmen")
+ assert q[1] == query.Sequence([query.Term("f", "Besov"), query.Term("f", "Spaces")])
def test_sequence_complex():
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
index 66c56ee7..dd1d22b1 100644
--- a/tests/test_parsing.py
+++ b/tests/test_parsing.py
@@ -1,5 +1,4 @@
from whoosh import analysis, fields, query
-from whoosh.compat import text_type, u
from whoosh.qparser import default, plugins
@@ -161,20 +160,20 @@ def test_boost():
def test_empty_querystring():
s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID)
qp = default.QueryParser("content", s)
- q = qp.parse(u(""))
+ q = qp.parse("")
assert q == query.NullQuery
def test_fields():
s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID)
qp = default.QueryParser("content", s)
- q = qp.parse(u("test"))
+ q = qp.parse("test")
assert q.__class__ == query.Term
assert q.fieldname == "content"
assert q.text == "test"
mq = default.MultifieldParser(("title", "content"), s)
- q = mq.parse(u("test"))
+ q = mq.parse("test")
assert q.__class__ == query.Or
assert q[0].__class__ == query.Term
assert q[1].__class__ == query.Term
@@ -183,7 +182,7 @@ def test_fields():
assert q[0].text == "test"
assert q[1].text == "test"
- q = mq.parse(u("title:test"))
+ q = mq.parse("title:test")
assert q.__class__ == query.Term
assert q.fieldname == "title"
assert q.text == "test"
@@ -194,12 +193,12 @@ def test_multifield():
content=fields.TEXT, title=fields.TEXT, cat=fields.KEYWORD, date=fields.DATETIME
)
- qs = u("a (b c cat:d) OR (b c cat:e)")
+ qs = "a (b c cat:d) OR (b c cat:e)"
qp = default.MultifieldParser(["x", "y"], schema)
q = qp.parse(qs)
assert (
- text_type(q)
+ str(q)
== "((x:a OR y:a) AND (((x:b OR y:b) AND (x:c OR y:c) AND cat:d) OR ((x:b OR y:b) AND (x:c OR y:c) AND cat:e)))"
)
@@ -207,40 +206,38 @@ def test_multifield():
def test_fieldname_chars():
s = fields.Schema(abc123=fields.TEXT, nisbah=fields.KEYWORD)
qp = default.QueryParser("content", s)
- fieldmap = {"nisbah": [u("\u0646\u0633\u0628\u0629")], "abc123": ["xyz"]}
+ fieldmap = {"nisbah": ["\u0646\u0633\u0628\u0629"], "abc123": ["xyz"]}
qp.add_plugin(plugins.FieldAliasPlugin(fieldmap))
- q = qp.parse(u("abc123:456"))
+ q = qp.parse("abc123:456")
assert q.__class__ == query.Term
- assert q.fieldname == u("abc123")
- assert q.text == u("456")
+ assert q.fieldname == "abc123"
+ assert q.text == "456"
- q = qp.parse(u("abc123:456 def"))
- assert text_type(q) == u("(abc123:456 AND content:def)")
+ q = qp.parse("abc123:456 def")
+ assert str(q) == "(abc123:456 AND content:def)"
q = qp.parse(
- u(
- "\u0646\u0633\u0628\u0629:\u0627\u0644\u0641\u0644\u0633"
- "\u0637\u064a\u0646\u064a"
- )
+ "\u0646\u0633\u0628\u0629:\u0627\u0644\u0641\u0644\u0633"
+ "\u0637\u064a\u0646\u064a"
)
assert q.__class__ == query.Term
- assert q.fieldname == u("nisbah")
- assert q.text == u("\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a")
+ assert q.fieldname == "nisbah"
+ assert q.text == "\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a"
- q = qp.parse(u("abc123 (xyz:123 OR qrs)"))
- assert text_type(q) == "(content:abc123 AND (abc123:123 OR content:qrs))"
+ q = qp.parse("abc123 (xyz:123 OR qrs)")
+ assert str(q) == "(content:abc123 AND (abc123:123 OR content:qrs))"
def test_colonspace():
s = fields.Schema(content=fields.TEXT, url=fields.ID)
qp = default.QueryParser("content", s)
- q = qp.parse(u("url:test"))
+ q = qp.parse("url:test")
assert q.__class__ == query.Term
assert q.fieldname == "url"
assert q.text == "test"
- q = qp.parse(u("url: test"))
+ q = qp.parse("url: test")
assert q.__class__ == query.And
assert q[0].__class__ == query.Term
assert q[1].__class__ == query.Term
@@ -249,14 +246,14 @@ def test_colonspace():
assert q[0].text == "url"
assert q[1].text == "test"
- q = qp.parse(u("url:"))
+ q = qp.parse("url:")
assert q.__class__ == query.Term
assert q.fieldname == "content"
assert q.text == "url"
s = fields.Schema(foo=fields.KEYWORD)
qp = default.QueryParser("foo", s)
- q = qp.parse(u("blah:"))
+ q = qp.parse("blah:")
assert q.__class__ == query.Term
assert q.fieldname == "foo"
assert q.text == "blah:"
@@ -265,7 +262,7 @@ def test_colonspace():
def test_andor():
qp = default.QueryParser("a", None)
q = qp.parse("a AND b OR c AND d OR e AND f")
- assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"
+ assert str(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"
q = qp.parse("aORb")
assert q == query.Term("a", "aORb")
@@ -281,34 +278,34 @@ def test_andor():
def test_andnot():
qp = default.QueryParser("content", None)
- q = qp.parse(u("this ANDNOT that"))
+ q = qp.parse("this ANDNOT that")
assert q.__class__ == query.AndNot
assert q.a.__class__ == query.Term
assert q.b.__class__ == query.Term
assert q.a.text == "this"
assert q.b.text == "that"
- q = qp.parse(u("foo ANDNOT bar baz"))
+ q = qp.parse("foo ANDNOT bar baz")
assert q.__class__ == query.And
assert len(q) == 2
assert q[0].__class__ == query.AndNot
assert q[1].__class__ == query.Term
- q = qp.parse(u("foo fie ANDNOT bar baz"))
+ q = qp.parse("foo fie ANDNOT bar baz")
assert q.__class__ == query.And
assert len(q) == 3
assert q[0].__class__ == query.Term
assert q[1].__class__ == query.AndNot
assert q[2].__class__ == query.Term
- q = qp.parse(u("a AND b ANDNOT c"))
+ q = qp.parse("a AND b ANDNOT c")
assert q.__class__ == query.AndNot
- assert text_type(q) == "((content:a AND content:b) ANDNOT content:c)"
+ assert str(q) == "((content:a AND content:b) ANDNOT content:c)"
def test_boost_query():
qp = default.QueryParser("content", None)
- q = qp.parse(u("this^3 fn:that^0.5 5.67 hi^5x"))
+ q = qp.parse("this^3 fn:that^0.5 5.67 hi^5x")
assert q[0].boost == 3.0
assert q[1].boost == 0.5
assert q[1].fieldname == "fn"
@@ -325,7 +322,7 @@ def test_boost_query():
def test_boosts():
qp = default.QueryParser("t", None)
q = qp.parse("alfa ((bravo^2)^3)^4 charlie")
- assert q.__unicode__() == "(t:alfa AND t:bravo^24.0 AND t:charlie)"
+ assert str(q) == "(t:alfa AND t:bravo^24.0 AND t:charlie)"
def test_wild():
@@ -343,7 +340,7 @@ def test_wild():
#
qp = default.QueryParser("content", None)
- q = qp.parse(u("hello *the?e* ?star*s? test"))
+ q = qp.parse("hello *the?e* ?star*s? test")
assert len(q) == 4
assert q[0].__class__ == query.Term
assert q[0].text == "hello"
@@ -356,7 +353,7 @@ def test_wild():
#
qp = default.QueryParser("content", None)
- q = qp.parse(u("*the?e*"))
+ q = qp.parse("*the?e*")
assert q.__class__ == query.Wildcard
assert q.text == "*the?e*"
@@ -364,7 +361,7 @@ def test_wild():
def test_parse_fieldname_underscores():
s = fields.Schema(my_name=fields.ID(stored=True), my_value=fields.TEXT)
qp = default.QueryParser("my_value", schema=s)
- q = qp.parse(u("my_name:Green"))
+ q = qp.parse("my_name:Green")
assert q.__class__ == query.Term
assert q.fieldname == "my_name"
assert q.text == "Green"
@@ -372,11 +369,11 @@ def test_parse_fieldname_underscores():
def test_endstar():
qp = default.QueryParser("text", None)
- q = qp.parse(u("word*"))
+ q = qp.parse("word*")
assert q.__class__ == query.Prefix
assert q.text == "word"
- q = qp.parse(u("first* second"))
+ q = qp.parse("first* second")
assert q[0].__class__ == query.Prefix
assert q[0].text == "first"
@@ -463,32 +460,32 @@ def test_singlequotes_query():
# assert q.__class__ == query.Term
# assert q.text == "http://www.example.com"
#
-# q = qp.parse(u("\u005c\u005c"))
+# q = qp.parse("\u005c\u005c")
# assert q.__class__ == query.Term
# assert q.text == "\\"
# def test_escaping_wildcards():
# qp = default.QueryParser("text", None)
#
-# q = qp.parse(u("a*b*c?d"))
+# q = qp.parse("a*b*c?d")
# assert q.__class__ == query.Wildcard
# assert q.text == "a*b*c?d"
#
-# q = qp.parse(u("a*b\u005c*c?d"))
+# q = qp.parse("a*b\u005c*c?d")
# assert q.__class__ == query.Wildcard
# assert q.text == "a*b*c?d"
#
-# q = qp.parse(u("a*b\u005c\u005c*c?d"))
+# q = qp.parse("a*b\u005c\u005c*c?d")
# assert q.__class__ == query.Wildcard
-# assert q.text, u('a*b\u005c*c?d'))
+# assert q.text, 'a*b\u005c*c?d')
#
-# q = qp.parse(u("ab*"))
+# q = qp.parse("ab*")
# assert q.__class__ == query.Prefix
-# assert q.text, u("ab"))
+# assert q.text, "ab")
#
-# q = qp.parse(u("ab\u005c\u005c*"))
+# q = qp.parse("ab\u005c\u005c*")
# assert q.__class__ == query.Wildcard
-# assert q.text, u("ab\u005c*"))
+# assert q.text, "ab\u005c*")
def test_phrase_phrase():
@@ -506,16 +503,16 @@ def test_phrase_phrase():
def test_weird_characters():
qp = default.QueryParser("content", None)
- q = qp.parse(u(".abcd@gmail.com"))
+ q = qp.parse(".abcd@gmail.com")
assert q.__class__ == query.Term
assert q.text == ".abcd@gmail.com"
- q = qp.parse(u("r*"))
+ q = qp.parse("r*")
assert q.__class__ == query.Prefix
assert q.text == "r"
- q = qp.parse(u("."))
+ q = qp.parse(".")
assert q.__class__ == query.Term
assert q.text == "."
- q = qp.parse(u("?"))
+ q = qp.parse("?")
assert q.__class__ == query.Wildcard
assert q.text == "?"
@@ -523,38 +520,38 @@ def test_weird_characters():
def test_euro_chars():
schema = fields.Schema(text=fields.TEXT)
qp = default.QueryParser("text", schema)
- q = qp.parse(u("stra\xdfe"))
+ q = qp.parse("stra\xdfe")
assert q.__class__ == query.Term
- assert q.text == u("stra\xdfe")
+ assert q.text == "stra\xdfe"
def test_star():
schema = fields.Schema(text=fields.TEXT(stored=True))
qp = default.QueryParser("text", schema)
- q = qp.parse(u("*"))
+ q = qp.parse("*")
assert q.__class__ == query.Every
assert q.fieldname == "text"
- q = qp.parse(u("*h?ll*"))
+ q = qp.parse("*h?ll*")
assert q.__class__ == query.Wildcard
assert q.text == "*h?ll*"
- q = qp.parse(u("h?pe"))
+ q = qp.parse("h?pe")
assert q.__class__ == query.Wildcard
assert q.text == "h?pe"
- q = qp.parse(u("*? blah"))
+ q = qp.parse("*? blah")
assert q.__class__ == query.And
assert q[0].__class__ == query.Wildcard
assert q[0].text == "*?"
assert q[1].__class__ == query.Term
assert q[1].text == "blah"
- q = qp.parse(u("*ending"))
+ q = qp.parse("*ending")
assert q.__class__ == query.Wildcard
assert q.text == "*ending"
- q = qp.parse(u("*q"))
+ q = qp.parse("*q")
assert q.__class__ == query.Wildcard
assert q.text == "*q"
@@ -563,13 +560,13 @@ def test_star_field():
schema = fields.Schema(text=fields.TEXT)
qp = default.QueryParser("text", schema)
- q = qp.parse(u("*:*"))
+ q = qp.parse("*:*")
assert q.__class__ == query.Every
assert q.fieldname is None
# This gets parsed to a term with text="*:test" which is then analyzed down
# to just "test"
- q = qp.parse(u("*:test"))
+ q = qp.parse("*:test")
assert q.__class__ == query.Term
assert q.fieldname == "text"
assert q.text == "test"
@@ -579,49 +576,49 @@ def test_range_query():
schema = fields.Schema(name=fields.ID(stored=True), text=fields.TEXT(stored=True))
qp = default.QueryParser("text", schema)
- q = qp.parse(u("[alfa to bravo}"))
+ q = qp.parse("[alfa to bravo}")
assert q.__class__ == query.TermRange
assert q.start == "alfa"
assert q.end == "bravo"
assert q.startexcl is False
assert q.endexcl is True
- q = qp.parse(u("['hello there' to 'what ever']"))
+ q = qp.parse("['hello there' to 'what ever']")
assert q.__class__ == query.TermRange
assert q.start == "hello there"
assert q.end == "what ever"
assert q.startexcl is False
assert q.endexcl is False
- q = qp.parse(u("name:{'to' to 'b'}"))
+ q = qp.parse("name:{'to' to 'b'}")
assert q.__class__ == query.TermRange
assert q.start == "to"
assert q.end == "b"
assert q.startexcl is True
assert q.endexcl is True
- q = qp.parse(u("name:{'a' to 'to']"))
+ q = qp.parse("name:{'a' to 'to']")
assert q.__class__ == query.TermRange
assert q.start == "a"
assert q.end == "to"
assert q.startexcl is True
assert q.endexcl is False
- q = qp.parse(u("name:[a to to]"))
+ q = qp.parse("name:[a to to]")
assert q.__class__ == query.TermRange
assert q.start == "a"
assert q.end == "to"
- q = qp.parse(u("name:[to to b]"))
+ q = qp.parse("name:[to to b]")
assert q.__class__ == query.TermRange
assert q.start == "to"
assert q.end == "b"
- q = qp.parse(u("[alfa to alfa]"))
+ q = qp.parse("[alfa to alfa]")
assert q.__class__ == query.Term
assert q.text == "alfa"
- q = qp.parse(u("Ind* AND name:[d TO]"))
+ q = qp.parse("Ind* AND name:[d TO]")
assert q.__class__ == query.And
assert q[0].__class__ == query.Prefix
assert q[1].__class__ == query.TermRange
@@ -629,7 +626,7 @@ def test_range_query():
assert q[1].start == "d"
assert q[1].fieldname == "name"
- q = qp.parse(u("name:[d TO]"))
+ q = qp.parse("name:[d TO]")
assert q.__class__ == query.TermRange
assert q.start == "d"
assert q.fieldname == "name"
@@ -666,11 +663,11 @@ def test_regressions():
# From 0.3.18, these used to require escaping. Mostly good for
# regression testing.
- assert qp.parse(u("re-inker")) == query.Term("f", "re-inker")
- assert qp.parse(u("0.7 wire")) == query.And(
+ assert qp.parse("re-inker") == query.Term("f", "re-inker")
+ assert qp.parse("0.7 wire") == query.And(
[query.Term("f", "0.7"), query.Term("f", "wire")]
)
- assert qp.parse(u("daler-rowney pearl 'bell bronze'")) == query.And(
+ assert qp.parse("daler-rowney pearl 'bell bronze'") == query.And(
[
query.Term("f", "daler-rowney"),
query.Term("f", "pearl"),
@@ -678,7 +675,7 @@ def test_regressions():
]
)
- q = qp.parse(u('22" BX'))
+ q = qp.parse('22" BX')
assert q, query.And([query.Term("f", '22"') == query.Term("f", "BX")])
@@ -687,7 +684,7 @@ def test_empty_ranges():
qp = default.QueryParser("text", schema)
for fname in ("name", "date"):
- q = qp.parse(u("%s:[to]") % fname)
+ q = qp.parse(f"{fname}:[to]")
assert q.__class__ == query.Every
@@ -732,7 +729,7 @@ def test_nonexistant_fieldnames():
schema = fields.Schema(id=fields.ID, text=fields.TEXT(analyzer=a))
qp = default.QueryParser("text", schema)
- q = qp.parse(u("id:/code http://localhost/"))
+ q = qp.parse("id:/code http://localhost/")
assert q.__class__ == query.And
assert q[0].__class__ == query.Term
assert q[0].fieldname == "id"
@@ -745,7 +742,7 @@ def test_nonexistant_fieldnames():
def test_stopped():
schema = fields.Schema(text=fields.TEXT)
qp = default.QueryParser("text", schema)
- q = qp.parse(u("a b"))
+ q = qp.parse("a b")
assert q == query.NullQuery
@@ -753,49 +750,49 @@ def test_analyzing_terms():
ana = analysis.StemmingAnalyzer()
schema = fields.Schema(text=fields.TEXT(analyzer=ana))
qp = default.QueryParser("text", schema)
- q = qp.parse(u("Indexed!"))
+ q = qp.parse("Indexed!")
assert q.__class__ == query.Term
assert q.text == "index"
def test_simple_parsing():
parser = default.SimpleParser("x", None)
- q = parser.parse(u("alfa bravo charlie delta"))
- assert text_type(q) == "(x:alfa OR x:bravo OR x:charlie OR x:delta)"
+ q = parser.parse("alfa bravo charlie delta")
+ assert str(q) == "(x:alfa OR x:bravo OR x:charlie OR x:delta)"
- q = parser.parse(u("alfa +bravo charlie delta"))
- assert text_type(q) == "(x:bravo ANDMAYBE (x:alfa OR x:charlie OR x:delta))"
+ q = parser.parse("alfa +bravo charlie delta")
+ assert str(q) == "(x:bravo ANDMAYBE (x:alfa OR x:charlie OR x:delta))"
- q = parser.parse(u("alfa +bravo -charlie delta"))
- assert text_type(q) == "((x:bravo ANDMAYBE (x:alfa OR x:delta)) ANDNOT x:charlie)"
+ q = parser.parse("alfa +bravo -charlie delta")
+ assert str(q) == "((x:bravo ANDMAYBE (x:alfa OR x:delta)) ANDNOT x:charlie)"
- q = parser.parse(u("- alfa +bravo + delta"))
- assert text_type(q) == "((x:bravo AND x:delta) ANDNOT x:alfa)"
+ q = parser.parse("- alfa +bravo + delta")
+ assert str(q) == "((x:bravo AND x:delta) ANDNOT x:alfa)"
def test_dismax():
parser = default.DisMaxParser({"body": 0.8, "title": 2.5}, None)
- q = parser.parse(u("alfa bravo charlie"))
+ q = parser.parse("alfa bravo charlie")
assert (
- text_type(q)
+ str(q)
== "(DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:bravo^0.8 title:bravo^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5))"
)
- q = parser.parse(u("alfa +bravo charlie"))
+ q = parser.parse("alfa +bravo charlie")
assert (
- text_type(q)
+ str(q)
== "(DisMax(body:bravo^0.8 title:bravo^2.5) ANDMAYBE (DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)))"
)
- q = parser.parse(u("alfa -bravo charlie"))
+ q = parser.parse("alfa -bravo charlie")
assert (
- text_type(q)
+ str(q)
== "((DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))"
)
- q = parser.parse(u("alfa -bravo +charlie"))
+ q = parser.parse("alfa -bravo +charlie")
assert (
- text_type(q)
+ str(q)
== "((DisMax(body:charlie^0.8 title:charlie^2.5) ANDMAYBE DisMax(body:alfa^0.8 title:alfa^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))"
)
@@ -809,11 +806,8 @@ def test_many_clauses():
def test_roundtrip():
parser = default.QueryParser("a", None)
- q = parser.parse(u("a OR ((b AND c AND d AND e) OR f OR g) ANDNOT h"))
- assert (
- text_type(q)
- == "((a:a OR (a:b AND a:c AND a:d AND a:e) OR a:f OR a:g) ANDNOT a:h)"
- )
+ q = parser.parse("a OR ((b AND c AND d AND e) OR f OR g) ANDNOT h")
+ assert str(q) == "((a:a OR (a:b AND a:c AND a:d AND a:e) OR a:f OR a:g) ANDNOT a:h)"
def test_ngrams():
@@ -821,7 +815,7 @@ def test_ngrams():
parser = default.QueryParser("grams", schema)
parser.remove_plugin_class(plugins.WhitespacePlugin)
- q = parser.parse(u("Hello There"))
+ q = parser.parse("Hello There")
assert q.__class__ == query.And
assert len(q) == 8
assert [sq.text for sq in q] == [
@@ -840,7 +834,7 @@ def test_ngramwords():
schema = fields.Schema(grams=fields.NGRAMWORDS(queryor=True))
parser = default.QueryParser("grams", schema)
- q = parser.parse(u("Hello Tom"))
+ q = parser.parse("Hello Tom")
assert q.__class__ == query.And
assert q[0].__class__ == query.Or
assert q[1].__class__ == query.Term
@@ -854,7 +848,7 @@ def test_multitoken_default():
assert textfield.multitoken_query == "default"
schema = fields.Schema(text=textfield)
parser = default.QueryParser("text", schema)
- qstring = u("chaw-bacon")
+ qstring = "chaw-bacon"
texts = list(schema["text"].process_text(qstring))
assert texts == ["chaw", "bacon"]
@@ -873,7 +867,7 @@ def test_multitoken_or():
textfield.multitoken_query = "or"
schema = fields.Schema(text=textfield)
parser = default.QueryParser("text", schema)
- qstring = u("chaw-bacon")
+ qstring = "chaw-bacon"
texts = list(schema["text"].process_text(qstring))
assert texts == ["chaw", "bacon"]
@@ -892,7 +886,7 @@ def test_multitoken_phrase():
textfield.multitoken_query = "phrase"
schema = fields.Schema(text=textfield)
parser = default.QueryParser("text", schema)
- qstring = u("chaw-bacon")
+ qstring = "chaw-bacon"
texts = list(schema["text"].process_text(qstring))
assert texts == ["chaw", "bacon"]
@@ -904,24 +898,24 @@ def test_multitoken_phrase():
def test_singlequote_multitoken():
schema = fields.Schema(text=fields.TEXT(multitoken_query="or"))
parser = default.QueryParser("text", schema)
- q = parser.parse(u("foo bar"))
- assert q.__unicode__() == "(text:foo AND text:bar)"
+ q = parser.parse("foo bar")
+ assert str(q) == "(text:foo AND text:bar)"
- q = parser.parse(u("'foo bar'")) # single quotes
- assert q.__unicode__() == "(text:foo OR text:bar)"
+ q = parser.parse("'foo bar'") # single quotes
+ assert str(q) == "(text:foo OR text:bar)"
def test_operator_queries():
qp = default.QueryParser("f", None)
q = qp.parse("a AND b OR c AND d")
- assert text_type(q) == "((f:a AND f:b) OR (f:c AND f:d))"
+ assert str(q) == "((f:a AND f:b) OR (f:c AND f:d))"
q = qp.parse("a OR b OR c OR d")
- assert text_type(q) == "(f:a OR f:b OR f:c OR f:d)"
+ assert str(q) == "(f:a OR f:b OR f:c OR f:d)"
q = qp.parse("a ANDMAYBE b ANDNOT c REQUIRE d")
- assert text_type(q) == "((f:a ANDMAYBE (f:b ANDNOT f:c)) REQUIRE f:d)"
+ assert str(q) == "((f:a ANDMAYBE (f:b ANDNOT f:c)) REQUIRE f:d)"
# def test_associativity():
@@ -936,39 +930,39 @@ def test_operator_queries():
#
# p = make_parser(left_andmaybe)
# q = p.parse("a ANDMAYBE b ANDMAYBE c ANDMAYBE d")
-# assert text_type(q), "(((f:a ANDMAYBE f:b) ANDMAYBE f:c) ANDMAYBE f:d)")
+# assert str(q), "(((f:a ANDMAYBE f:b) ANDMAYBE f:c) ANDMAYBE f:d)")
#
# p = make_parser(right_andmaybe)
# q = p.parse("a ANDMAYBE b ANDMAYBE c ANDMAYBE d")
-# assert text_type(q), "(f:a ANDMAYBE (f:b ANDMAYBE (f:c ANDMAYBE f:d)))")
+# assert str(q), "(f:a ANDMAYBE (f:b ANDMAYBE (f:c ANDMAYBE f:d)))")
#
# p = make_parser(not_)
# q = p.parse("a NOT b NOT c NOT d", normalize=False)
-# assert text_type(q), "(f:a AND NOT f:b AND NOT f:c AND NOT f:d)")
+# assert str(q), "(f:a AND NOT f:b AND NOT f:c AND NOT f:d)")
#
# p = make_parser(left_andmaybe)
# q = p.parse("(a ANDMAYBE b) ANDMAYBE (c ANDMAYBE d)")
-# assert text_type(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
+# assert str(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
#
# p = make_parser(right_andmaybe)
# q = p.parse("(a ANDMAYBE b) ANDMAYBE (c ANDMAYBE d)")
-# assert text_type(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
+# assert str(q), "((f:a ANDMAYBE f:b) ANDMAYBE (f:c ANDMAYBE f:d))")
def test_not_assoc():
qp = default.QueryParser("text", None)
- q = qp.parse(u("a AND NOT b OR c"))
- assert text_type(q) == "((text:a AND NOT text:b) OR text:c)"
+ q = qp.parse("a AND NOT b OR c")
+ assert str(q) == "((text:a AND NOT text:b) OR text:c)"
qp = default.QueryParser("text", None)
- q = qp.parse(u("a NOT (b OR c)"))
- assert text_type(q) == "(text:a AND NOT (text:b OR text:c))"
+ q = qp.parse("a NOT (b OR c)")
+ assert str(q) == "(text:a AND NOT (text:b OR text:c))"
def test_fieldname_space():
qp = default.QueryParser("a", None)
q = qp.parse("Man Ray: a retrospective")
- assert text_type(q) == "(a:Man AND a:Ray: AND a:a AND a:retrospective)"
+ assert str(q) == "(a:Man AND a:Ray: AND a:a AND a:retrospective)"
def test_fieldname_fieldname():
@@ -981,16 +975,16 @@ def test_paren_fieldname():
schema = fields.Schema(kind=fields.ID, content=fields.TEXT)
qp = default.QueryParser("content", schema)
- q = qp.parse(u("(kind:1d565 OR kind:7c584) AND (stuff)"))
- assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
+ q = qp.parse("(kind:1d565 OR kind:7c584) AND (stuff)")
+ assert str(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
- q = qp.parse(u("kind:(1d565 OR 7c584) AND (stuff)"))
- assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
+ q = qp.parse("kind:(1d565 OR 7c584) AND (stuff)")
+ assert str(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
def test_star_paren():
qp = default.QueryParser("content", None)
- q = qp.parse(u("(*john*) AND (title:blog)"))
+ q = qp.parse("(*john*) AND (title:blog)")
assert q.__class__ == query.And
assert q[0].__class__ == query.Wildcard
@@ -1006,7 +1000,7 @@ def test_dash():
schema = fields.Schema(
title=fields.TEXT(analyzer=ana), text=fields.TEXT(analyzer=ana), time=fields.ID
)
- qtext = u("*Ben-Hayden*")
+ qtext = "*Ben-Hayden*"
qp = default.QueryParser("text", schema)
q = qp.parse(qtext)
@@ -1016,10 +1010,7 @@ def test_dash():
qp = default.MultifieldParser(["title", "text", "time"], schema)
q = qp.parse(qtext)
- assert (
- q.__unicode__()
- == "(title:*ben-hayden* OR text:*ben-hayden* OR time:*Ben-Hayden*)"
- )
+ assert str(q) == "(title:*ben-hayden* OR text:*ben-hayden* OR time:*Ben-Hayden*)"
def test_bool_True():
@@ -1039,17 +1030,17 @@ def test_not_order():
)
qp = default.QueryParser("count", schema)
- q1 = qp.parse(u("(NOT (count:0) AND cats:1)"))
+ q1 = qp.parse("(NOT (count:0) AND cats:1)")
assert q1.__class__ == query.And
assert q1[0].__class__ == query.Not
assert q1[1].__class__ == query.Term
- assert q1.__unicode__() == "(NOT count:0 AND cats:1)"
+ assert str(q1) == "(NOT count:0 AND cats:1)"
- q2 = qp.parse(u("(cats:1 AND NOT (count:0))"))
+ q2 = qp.parse("(cats:1 AND NOT (count:0))")
assert q2.__class__ == query.And
assert q2[0].__class__ == query.Term
assert q2[1].__class__ == query.Not
- assert q2.__unicode__() == "(cats:1 AND NOT count:0)"
+ assert str(q2) == "(cats:1 AND NOT count:0)"
def test_spacespace_and():
@@ -1071,33 +1062,33 @@ def test_spacespace_and():
def test_unicode_num():
schema = fields.Schema(num=fields.NUMERIC)
- parser = default.QueryParser(u("num"), schema=schema)
- q = parser.parse(u("num:1"))
+ parser = default.QueryParser("num", schema=schema)
+ q = parser.parse("num:1")
- _ = text_type(q)
+ _ = str(q)
def test_phrase_andmaybe():
qp = default.QueryParser("f", None)
- q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"'))
+ q = qp.parse('Dahmen ANDMAYBE "Besov Spaces"')
assert isinstance(q, query.AndMaybe)
- assert q[0] == query.Term("f", u("Dahmen"))
- assert q[1] == query.Phrase("f", [u("Besov"), u("Spaces")])
+ assert q[0] == query.Term("f", "Dahmen")
+ assert q[1] == query.Phrase("f", ["Besov", "Spaces"])
def test_phrase_boost():
qp = default.QueryParser("f", None)
- q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"^9'))
+ q = qp.parse('Dahmen ANDMAYBE "Besov Spaces"^9')
assert isinstance(q, query.AndMaybe)
- assert q[0] == query.Term("f", u("Dahmen"))
- assert q[1] == query.Phrase("f", [u("Besov"), u("Spaces")], boost=9)
+ assert q[0] == query.Term("f", "Dahmen")
+ assert q[1] == query.Phrase("f", ["Besov", "Spaces"], boost=9)
def test_andmaybe_none():
schema = fields.Schema(f=fields.TEXT, year=fields.NUMERIC)
qp = default.QueryParser("f", schema)
- _ = qp.parse(u("Dahmen ANDMAYBE @year:[2000 TO]"))
+ _ = qp.parse("Dahmen ANDMAYBE @year:[2000 TO]")
def test_quoted_prefix():
@@ -1106,7 +1097,7 @@ def test_quoted_prefix():
expr = r"(^|(?<=[ (]))(?P\w+|[*]):"
qp.replace_plugin(plugins.FieldsPlugin(expr))
- q = qp.parse(u("foo url:https://apple.com:8080/bar* baz"))
+ q = qp.parse("foo url:https://apple.com:8080/bar* baz")
assert isinstance(q, query.And)
assert q[0] == query.Term("f", "foo")
assert q[1] == query.Prefix("url", "https://apple.com:8080/bar")
diff --git a/tests/test_postings.py b/tests/test_postings.py
index 6d836a19..8d87c97b 100644
--- a/tests/test_postings.py
+++ b/tests/test_postings.py
@@ -1,6 +1,5 @@
from whoosh import analysis, fields
from whoosh.codec import default_codec
-from whoosh.compat import u
from whoosh.formats import (
CharacterBoosts,
Characters,
@@ -38,7 +37,7 @@ def _roundtrip(content, format_, astype, ana=None):
def test_existence_postings():
- content = u("alfa bravo charlie")
+ content = "alfa bravo charlie"
assert _roundtrip(content, Existence(), "frequency") == [
("alfa", 1),
("bravo", 1),
@@ -47,7 +46,7 @@ def test_existence_postings():
def test_frequency_postings():
- content = u("alfa bravo charlie bravo alfa alfa")
+ content = "alfa bravo charlie bravo alfa alfa"
assert _roundtrip(content, Frequency(), "frequency") == [
("alfa", 3),
("bravo", 2),
@@ -56,7 +55,7 @@ def test_frequency_postings():
def test_position_postings():
- content = u("alfa bravo charlie bravo alfa alfa")
+ content = "alfa bravo charlie bravo alfa alfa"
assert _roundtrip(content, Positions(), "positions") == [
("alfa", [0, 4, 5]),
("bravo", [1, 3]),
@@ -70,7 +69,7 @@ def test_position_postings():
def test_character_postings():
- content = u("alfa bravo charlie bravo alfa alfa")
+ content = "alfa bravo charlie bravo alfa alfa"
assert _roundtrip(content, Characters(), "characters") == [
("alfa", [(0, 0, 4), (4, 25, 29), (5, 30, 34)]),
("bravo", [(1, 5, 10), (3, 19, 24)]),
@@ -91,7 +90,7 @@ def test_character_postings():
def test_posboost_postings():
pbs = PositionBoosts()
ana = analysis.RegexTokenizer(r"\S+") | analysis.DelimitedAttributeFilter()
- content = u("alfa^2 bravo^0.1 charlie^2 bravo^0.5 alfa alfa")
+ content = "alfa^2 bravo^0.1 charlie^2 bravo^0.5 alfa alfa"
assert _roundtrip(content, pbs, "position_boosts", ana) == [
("alfa", [(0, 2), (4, 1), (5, 1)]),
("bravo", [(1, 0.1), (3, 0.5)]),
@@ -112,7 +111,7 @@ def test_posboost_postings():
def test_charboost_postings():
cbs = CharacterBoosts()
ana = analysis.RegexTokenizer(r"\S+") | analysis.DelimitedAttributeFilter()
- content = u("alfa^2 bravo^0.1 charlie^2 bravo^0.5 alfa alfa")
+ content = "alfa^2 bravo^0.1 charlie^2 bravo^0.5 alfa alfa"
assert _roundtrip(content, cbs, "character_boosts", ana) == [
("alfa", [(0, 0, 4, 2), (4, 37, 41, 1), (5, 42, 46, 1)]),
("bravo", [(1, 7, 12, 0.1), (3, 27, 32, 0.5)]),
diff --git a/tests/test_quality.py b/tests/test_quality.py
index 845f9d1f..5bb71c66 100644
--- a/tests/test_quality.py
+++ b/tests/test_quality.py
@@ -1,7 +1,6 @@
import random
from whoosh import fields, matching, scoring
-from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.numeric import byte_to_length, length_to_byte
@@ -16,7 +15,7 @@ def test_max_field_length():
ix = st.create_index(schema)
for i in range(1, 200, 7):
w = ix.writer()
- w.add_document(t=u(" ").join(["word"] * i))
+ w.add_document(t=" ".join(["word"] * i))
w.commit()
with ix.reader() as r:
@@ -34,7 +33,7 @@ def test_minmax_field_length():
count = random.randint(1, 100)
least = min(count, least)
most = max(count, most)
- w.add_document(t=u(" ").join(["word"] * count))
+ w.add_document(t=" ".join(["word"] * count))
w.commit()
with ix.reader() as r:
@@ -46,46 +45,46 @@ def test_term_stats():
schema = fields.Schema(t=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(t=u("alfa bravo charlie delta echo"))
- w.add_document(t=u("bravo charlie delta echo foxtrot"))
- w.add_document(t=u("charlie delta echo foxtrot golf"))
- w.add_document(t=u("delta echo foxtrot"))
- w.add_document(t=u("echo foxtrot golf hotel india juliet"))
- w.add_document(t=u("foxtrot alfa alfa alfa"))
+ w.add_document(t="alfa bravo charlie delta echo")
+ w.add_document(t="bravo charlie delta echo foxtrot")
+ w.add_document(t="charlie delta echo foxtrot golf")
+ w.add_document(t="delta echo foxtrot")
+ w.add_document(t="echo foxtrot golf hotel india juliet")
+ w.add_document(t="foxtrot alfa alfa alfa")
w.commit()
with ix.reader() as r:
- ti = r.term_info("t", u("alfa"))
+ ti = r.term_info("t", "alfa")
assert ti.weight() == 4.0
assert ti.doc_frequency() == 2
assert ti.min_length() == 4
assert ti.max_length() == 5
assert ti.max_weight() == 3.0
- assert r.term_info("t", u("echo")).min_length() == 3
+ assert r.term_info("t", "echo").min_length() == 3
assert r.doc_field_length(3, "t") == 3
assert r.min_field_length("t") == 3
assert r.max_field_length("t") == 6
w = ix.writer()
- w.add_document(t=u("alfa"))
- w.add_document(t=u("bravo charlie"))
- w.add_document(t=u("echo foxtrot tango bravo"))
- w.add_document(t=u("golf hotel"))
- w.add_document(t=u("india"))
- w.add_document(t=u("juliet alfa bravo charlie delta echo foxtrot"))
+ w.add_document(t="alfa")
+ w.add_document(t="bravo charlie")
+ w.add_document(t="echo foxtrot tango bravo")
+ w.add_document(t="golf hotel")
+ w.add_document(t="india")
+ w.add_document(t="juliet alfa bravo charlie delta echo foxtrot")
w.commit(merge=False)
with ix.reader() as r:
- ti = r.term_info("t", u("alfa"))
+ ti = r.term_info("t", "alfa")
assert ti.weight() == 6.0
assert ti.doc_frequency() == 4
assert ti.min_length() == 1
assert ti.max_length() == 7
assert ti.max_weight() == 3.0
- assert r.term_info("t", u("echo")).min_length() == 3
+ assert r.term_info("t", "echo").min_length() == 3
assert r.min_field_length("t") == 1
assert r.max_field_length("t") == 7
@@ -95,43 +94,43 @@ def test_min_max_id():
schema = fields.Schema(id=fields.STORED, t=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, t=u("alfa bravo charlie"))
- w.add_document(id=1, t=u("bravo charlie delta"))
- w.add_document(id=2, t=u("charlie delta echo"))
- w.add_document(id=3, t=u("delta echo foxtrot"))
- w.add_document(id=4, t=u("echo foxtrot golf"))
+ w.add_document(id=0, t="alfa bravo charlie")
+ w.add_document(id=1, t="bravo charlie delta")
+ w.add_document(id=2, t="charlie delta echo")
+ w.add_document(id=3, t="delta echo foxtrot")
+ w.add_document(id=4, t="echo foxtrot golf")
w.commit()
with ix.reader() as r:
- ti = r.term_info("t", u("delta"))
+ ti = r.term_info("t", "delta")
assert ti.min_id() == 1
assert ti.max_id() == 3
- ti = r.term_info("t", u("alfa"))
+ ti = r.term_info("t", "alfa")
assert ti.min_id() == 0
assert ti.max_id() == 0
- ti = r.term_info("t", u("foxtrot"))
+ ti = r.term_info("t", "foxtrot")
assert ti.min_id() == 3
assert ti.max_id() == 4
w = ix.writer()
- w.add_document(id=5, t=u("foxtrot golf hotel"))
- w.add_document(id=6, t=u("golf hotel alfa"))
- w.add_document(id=7, t=u("hotel alfa bravo"))
- w.add_document(id=8, t=u("alfa bravo charlie"))
+ w.add_document(id=5, t="foxtrot golf hotel")
+ w.add_document(id=6, t="golf hotel alfa")
+ w.add_document(id=7, t="hotel alfa bravo")
+ w.add_document(id=8, t="alfa bravo charlie")
w.commit(merge=False)
with ix.reader() as r:
- ti = r.term_info("t", u("delta"))
+ ti = r.term_info("t", "delta")
assert ti.min_id() == 1
assert ti.max_id() == 3
- ti = r.term_info("t", u("alfa"))
+ ti = r.term_info("t", "alfa")
assert ti.min_id() == 0
assert ti.max_id() == 8
- ti = r.term_info("t", u("foxtrot"))
+ ti = r.term_info("t", "foxtrot")
assert ti.min_id() == 3
assert ti.max_id() == 5
diff --git a/tests/test_queries.py b/tests/test_queries.py
index b9a6090e..ab29cea1 100644
--- a/tests/test_queries.py
+++ b/tests/test_queries.py
@@ -2,7 +2,6 @@
import pytest
from whoosh import fields, qparser, query
-from whoosh.compat import b, u
from whoosh.filedb.filestore import RamStorage
from whoosh.qparser import QueryParser
from whoosh.query import (
@@ -29,8 +28,12 @@
from whoosh.util.testing import TempIndex
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def test_all_terms():
- q = QueryParser("a", None).parse(u('hello b:there c:"my friend"'))
+ q = QueryParser("a", None).parse('hello b:there c:"my friend"')
ts = q.all_terms(phrases=False)
assert sorted(ts) == [("a", "hello"), ("b", "there")]
ts = q.all_terms(phrases=True)
@@ -42,21 +45,21 @@ def test_existing_terms():
ix = RamStorage().create_index(s)
w = ix.writer()
- w.add_document(key=u("a"), value=u("alfa bravo charlie delta echo"))
- w.add_document(key=u("b"), value=u("foxtrot golf hotel india juliet"))
+ w.add_document(key="a", value="alfa bravo charlie delta echo")
+ w.add_document(key="b", value="foxtrot golf hotel india juliet")
w.commit()
r = ix.reader()
- q = QueryParser("value", None).parse(u('alfa hotel tango "sierra bravo"'))
+ q = QueryParser("value", None).parse('alfa hotel tango "sierra bravo"')
ts = q.existing_terms(r, phrases=False)
- assert sorted(ts) == [("value", b("alfa")), ("value", b("hotel"))]
+ assert sorted(ts) == [("value", b"alfa"), ("value", b"hotel")]
ts = q.existing_terms(r)
assert sorted(ts) == [
- ("value", b("alfa")),
- ("value", b("bravo")),
- ("value", b("hotel")),
+ ("value", b"alfa"),
+ ("value", b"bravo"),
+ ("value", b"hotel"),
]
@@ -65,8 +68,8 @@ def test_wildcard_existing_terms():
ix = RamStorage().create_index(s)
w = ix.writer()
- w.add_document(key=u("a"), value=u("alfa bravo bear charlie delta"))
- w.add_document(key=u("a"), value=u("boggle echo render rendering renders"))
+ w.add_document(key="a", value="alfa bravo bear charlie delta")
+ w.add_document(key="a", value="boggle echo render rendering renders")
w.commit()
r = ix.reader()
qp = QueryParser("value", ix.schema)
@@ -76,25 +79,25 @@ def words(terms):
for t in terms:
assert t[0] == "value"
z.append(t[1])
- return b(" ").join(sorted(z))
+ return b" ".join(sorted(z))
- q = qp.parse(u("b*"))
+ q = qp.parse("b*")
ts = q.existing_terms(r)
assert ts == set()
ts = q.existing_terms(r, expand=True)
- assert words(ts) == b("bear boggle bravo")
+ assert words(ts) == b"bear boggle bravo"
- q = qp.parse(u("[a TO f]"))
+ q = qp.parse("[a TO f]")
ts = q.existing_terms(r)
assert ts == set()
ts = q.existing_terms(r, expand=True)
- assert words(ts) == b("alfa bear boggle bravo charlie delta echo")
+ assert words(ts) == b"alfa bear boggle bravo charlie delta echo"
q = query.Variations("value", "render")
ts = q.existing_terms(r, expand=False)
- assert ts == {("value", b("render"))}
+ assert ts == {("value", b"render")}
ts = q.existing_terms(r, expand=True)
- assert words(ts) == b("render rendering renders")
+ assert words(ts) == b"render rendering renders"
def test_replace():
@@ -120,12 +123,10 @@ def visit(q):
return q
return q.apply(visit)
- before = And(
- [Not(Term("a", u("b"))), Variations("a", u("c")), Not(FuzzyTerm("a", u("d")))]
- )
+ before = And([Not(Term("a", "b")), Variations("a", "c"), Not(FuzzyTerm("a", "d"))])
after = visit(before)
assert after == And(
- [Not(Term("a", u("B"))), Variations("a", u("C")), Not(FuzzyTerm("a", u("D")))]
+ [Not(Term("a", "B")), Variations("a", "C"), Not(FuzzyTerm("a", "D"))]
)
def term2var(q):
@@ -152,23 +153,23 @@ def boost_phrases(q):
before = And(
[
- Term("a", u("b")),
- Or([Term("c", u("d")), Phrase("a", [u("e"), u("f")])]),
- Phrase("a", [u("g"), u("h")], boost=0.25),
+ Term("a", "b"),
+ Or([Term("c", "d"), Phrase("a", ["e", "f"])]),
+ Phrase("a", ["g", "h"], boost=0.25),
]
)
after = before.accept(boost_phrases)
assert after == And(
[
- Term("a", u("b")),
- Or([Term("c", u("d")), Phrase("a", [u("e"), u("f")], boost=2.0)]),
- Phrase("a", [u("g"), u("h")], boost=0.5),
+ Term("a", "b"),
+ Or([Term("c", "d"), Phrase("a", ["e", "f"], boost=2.0)]),
+ Phrase("a", ["g", "h"], boost=0.5),
]
)
- before = Phrase("a", [u("b"), u("c")], boost=2.5)
+ before = Phrase("a", ["b", "c"], boost=2.5)
after = before.accept(boost_phrases)
- assert after == Phrase("a", [u("b"), u("c")], boost=5.0)
+ assert after == Phrase("a", ["b", "c"], boost=5.0)
def test_simplify():
@@ -176,8 +177,8 @@ def test_simplify():
ix = RamStorage().create_index(s)
w = ix.writer()
- w.add_document(k=u("1"), v=u("aardvark apple allan alfa bear bee"))
- w.add_document(k=u("2"), v=u("brie glue geewhiz goop julia"))
+ w.add_document(k="1", v="aardvark apple allan alfa bear bee")
+ w.add_document(k="2", v="brie glue geewhiz goop julia")
w.commit()
r = ix.reader()
@@ -198,24 +199,22 @@ def test_simplify():
def test_merge_ranges():
- q = And([TermRange("f1", u("a"), None), TermRange("f1", None, u("z"))])
- assert q.normalize() == TermRange("f1", u("a"), u("z"))
+ q = And([TermRange("f1", "a", None), TermRange("f1", None, "z")])
+ assert q.normalize() == TermRange("f1", "a", "z")
- q = And(
- [NumericRange("f1", None, u("aaaaa")), NumericRange("f1", u("zzzzz"), None)]
- )
+ q = And([NumericRange("f1", None, "aaaaa"), NumericRange("f1", "zzzzz", None)])
assert q.normalize() == q
- q = And([TermRange("f1", u("a"), u("z")), TermRange("f1", "b", "x")])
- assert q.normalize() == TermRange("f1", u("a"), u("z"))
+ q = And([TermRange("f1", "a", "z"), TermRange("f1", "b", "x")])
+ assert q.normalize() == TermRange("f1", "a", "z")
- q = And([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))])
- assert q.normalize() == TermRange("f1", u("f"), u("m"))
+ q = And([TermRange("f1", "a", "m"), TermRange("f1", "f", "q")])
+ assert q.normalize() == TermRange("f1", "f", "m")
- q = Or([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))])
- assert q.normalize() == TermRange("f1", u("a"), u("q"))
+ q = Or([TermRange("f1", "a", "m"), TermRange("f1", "f", "q")])
+ assert q.normalize() == TermRange("f1", "a", "q")
- q = Or([TermRange("f1", u("m"), None), TermRange("f1", None, u("n"))])
+ q = Or([TermRange("f1", "m", None), TermRange("f1", None, "n")])
assert q.normalize() == Every("f1")
q = And([Every("f1"), Term("f1", "a"), Variations("f1", "b")])
@@ -223,23 +222,23 @@ def test_merge_ranges():
q = Or(
[
- Term("f1", u("q")),
- TermRange("f1", u("m"), None),
- TermRange("f1", None, u("n")),
+ Term("f1", "q"),
+ TermRange("f1", "m", None),
+ TermRange("f1", None, "n"),
]
)
assert q.normalize() == Every("f1")
- q = And([Or([Term("f1", u("a")), Term("f1", u("b"))]), Every("f1")])
+ q = And([Or([Term("f1", "a"), Term("f1", "b")]), Every("f1")])
assert q.normalize() == Every("f1")
- q = And([Term("f1", u("a")), And([Or([Every("f1")])])])
+ q = And([Term("f1", "a"), And([Or([Every("f1")])])])
assert q.normalize() == Every("f1")
def test_normalize_compound():
def oq():
- return Or([Term("a", u("a")), Term("a", u("b"))])
+ return Or([Term("a", "a"), Term("a", "b")])
def nq(level):
if level == 0:
@@ -249,27 +248,25 @@ def nq(level):
q = nq(5)
q = q.normalize()
- assert q == Or([Term("a", u("a")), Term("a", u("b"))])
+ assert q == Or([Term("a", "a"), Term("a", "b")])
def test_duplicates():
- q = And([Term("a", u("b")), Term("a", u("b"))])
- assert q.normalize() == Term("a", u("b"))
+ q = And([Term("a", "b"), Term("a", "b")])
+ assert q.normalize() == Term("a", "b")
- q = And([Prefix("a", u("b")), Prefix("a", u("b"))])
- assert q.normalize() == Prefix("a", u("b"))
+ q = And([Prefix("a", "b"), Prefix("a", "b")])
+ assert q.normalize() == Prefix("a", "b")
- q = And(
- [Variations("a", u("b")), And([Variations("a", u("b")), Term("a", u("b"))])]
- )
- assert q.normalize() == And([Variations("a", u("b")), Term("a", u("b"))])
+ q = And([Variations("a", "b"), And([Variations("a", "b"), Term("a", "b")])])
+ assert q.normalize() == And([Variations("a", "b"), Term("a", "b")])
- q = And([Term("a", u("b")), Prefix("a", u("b")), Term("a", u("b"), boost=1.1)])
+ q = And([Term("a", "b"), Prefix("a", "b"), Term("a", "b", boost=1.1)])
assert q.normalize() == q
# Wildcard without * or ? normalizes to Term
- q = And([Wildcard("a", u("b")), And([Wildcard("a", u("b")), Term("a", u("b"))])])
- assert q.normalize() == Term("a", u("b"))
+ q = And([Wildcard("a", "b"), And([Wildcard("a", "b"), Term("a", "b")])])
+ assert q.normalize() == Term("a", "b")
# TODO: FIX THIS
@@ -282,36 +279,36 @@ def do(q1, q2):
assert hash(q1) == hash(q1a)
assert q1 != q2
- do(Term("a", u("b"), boost=1.1), Term("a", u("b"), boost=1.5))
+ do(Term("a", "b", boost=1.1), Term("a", "b", boost=1.5))
do(
- And([Term("a", u("b")), Term("c", u("d"))], boost=1.1),
- And([Term("a", u("b")), Term("c", u("d"))], boost=1.5),
+ And([Term("a", "b"), Term("c", "d")], boost=1.1),
+ And([Term("a", "b"), Term("c", "d")], boost=1.5),
)
do(
- Or([Term("a", u("b"), boost=1.1), Term("c", u("d"))]),
- Or([Term("a", u("b"), boost=1.8), Term("c", u("d"))], boost=1.5),
+ Or([Term("a", "b", boost=1.1), Term("c", "d")]),
+ Or([Term("a", "b", boost=1.8), Term("c", "d")], boost=1.5),
)
do(
- DisjunctionMax([Term("a", u("b"), boost=1.8), Term("c", u("d"))]),
- DisjunctionMax([Term("a", u("b"), boost=1.1), Term("c", u("d"))], boost=1.5),
+ DisjunctionMax([Term("a", "b", boost=1.8), Term("c", "d")]),
+ DisjunctionMax([Term("a", "b", boost=1.1), Term("c", "d")], boost=1.5),
)
- do(Not(Term("a", u("b"), boost=1.1)), Not(Term("a", u("b"), boost=1.5)))
- do(Prefix("a", u("b"), boost=1.1), Prefix("a", u("b"), boost=1.5))
- do(Wildcard("a", u("b*x?"), boost=1.1), Wildcard("a", u("b*x?"), boost=1.5))
+ do(Not(Term("a", "b", boost=1.1)), Not(Term("a", "b", boost=1.5)))
+ do(Prefix("a", "b", boost=1.1), Prefix("a", "b", boost=1.5))
+ do(Wildcard("a", "b*x?", boost=1.1), Wildcard("a", "b*x?", boost=1.5))
do(
- FuzzyTerm("a", u("b"), constantscore=True),
- FuzzyTerm("a", u("b"), constantscore=False),
+ FuzzyTerm("a", "b", constantscore=True),
+ FuzzyTerm("a", "b", constantscore=False),
)
- do(FuzzyTerm("a", u("b"), boost=1.1), FuzzyTerm("a", u("b"), boost=1.5))
- do(TermRange("a", u("b"), u("c")), TermRange("a", u("b"), u("d")))
- do(TermRange("a", None, u("c")), TermRange("a", None, None))
+ do(FuzzyTerm("a", "b", boost=1.1), FuzzyTerm("a", "b", boost=1.5))
+ do(TermRange("a", "b", "c"), TermRange("a", "b", "d"))
+ do(TermRange("a", None, "c"), TermRange("a", None, None))
do(
- TermRange("a", u("b"), u("c"), boost=1.1),
- TermRange("a", u("b"), u("c"), boost=1.5),
+ TermRange("a", "b", "c", boost=1.1),
+ TermRange("a", "b", "c", boost=1.5),
)
do(
- TermRange("a", u("b"), u("c"), constantscore=True),
- TermRange("a", u("b"), u("c"), constantscore=False),
+ TermRange("a", "b", "c", constantscore=True),
+ TermRange("a", "b", "c", constantscore=False),
)
do(NumericRange("a", 1, 5), NumericRange("a", 1, 6))
do(NumericRange("a", None, 5), NumericRange("a", None, None))
@@ -321,75 +318,75 @@ def do(q1, q2):
NumericRange("a", 3, 6, constantscore=False),
)
# do(DateRange)
- do(Variations("a", u("render")), Variations("a", u("renders")))
+ do(Variations("a", "render"), Variations("a", "renders"))
do(
- Variations("a", u("render"), boost=1.1),
- Variations("a", u("renders"), boost=1.5),
+ Variations("a", "render", boost=1.1),
+ Variations("a", "renders", boost=1.5),
)
- do(Phrase("a", [u("b"), u("c"), u("d")]), Phrase("a", [u("b"), u("c"), u("e")]))
+ do(Phrase("a", ["b", "c", "d"]), Phrase("a", ["b", "c", "e"]))
do(
- Phrase("a", [u("b"), u("c"), u("d")], boost=1.1),
- Phrase("a", [u("b"), u("c"), u("d")], boost=1.5),
+ Phrase("a", ["b", "c", "d"], boost=1.1),
+ Phrase("a", ["b", "c", "d"], boost=1.5),
)
do(
- Phrase("a", [u("b"), u("c"), u("d")], slop=1),
- Phrase("a", [u("b"), u("c"), u("d")], slop=2),
+ Phrase("a", ["b", "c", "d"], slop=1),
+ Phrase("a", ["b", "c", "d"], slop=2),
)
# do(Ordered)
do(Every(), Every("a"))
do(Every("a"), Every("b"))
do(Every("a", boost=1.1), Every("a", boost=1.5))
- do(NullQuery, Term("a", u("b")))
- do(ConstantScoreQuery(Term("a", u("b"))), ConstantScoreQuery(Term("a", u("c"))))
+ do(NullQuery, Term("a", "b"))
+ do(ConstantScoreQuery(Term("a", "b")), ConstantScoreQuery(Term("a", "c")))
do(
- ConstantScoreQuery(Term("a", u("b")), score=2.0),
- ConstantScoreQuery(Term("a", u("c")), score=2.1),
+ ConstantScoreQuery(Term("a", "b"), score=2.0),
+ ConstantScoreQuery(Term("a", "c"), score=2.1),
)
do(
- Require(Term("a", u("b")), Term("c", u("d"))),
- Require(Term("a", u("b"), boost=1.1), Term("c", u("d"))),
+ Require(Term("a", "b"), Term("c", "d")),
+ Require(Term("a", "b", boost=1.1), Term("c", "d")),
)
# do(Require)
# do(AndMaybe)
# do(AndNot)
# do(Otherwise)
- do(SpanFirst(Term("a", u("b")), limit=1), SpanFirst(Term("a", u("b")), limit=2))
+ do(SpanFirst(Term("a", "b"), limit=1), SpanFirst(Term("a", "b"), limit=2))
do(
- SpanNear(Term("a", u("b")), Term("c", u("d"))),
- SpanNear(Term("a", u("b")), Term("c", u("e"))),
+ SpanNear(Term("a", "b"), Term("c", "d")),
+ SpanNear(Term("a", "b"), Term("c", "e")),
)
do(
- SpanNear(Term("a", u("b")), Term("c", u("d")), slop=1),
- SpanNear(Term("a", u("b")), Term("c", u("d")), slop=2),
+ SpanNear(Term("a", "b"), Term("c", "d"), slop=1),
+ SpanNear(Term("a", "b"), Term("c", "d"), slop=2),
)
do(
- SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=1),
- SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=2),
+ SpanNear(Term("a", "b"), Term("c", "d"), mindist=1),
+ SpanNear(Term("a", "b"), Term("c", "d"), mindist=2),
)
do(
- SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=True),
- SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=False),
+ SpanNear(Term("a", "b"), Term("c", "d"), ordered=True),
+ SpanNear(Term("a", "b"), Term("c", "d"), ordered=False),
)
do(
- SpanNot(Term("a", u("b")), Term("a", u("c"))),
- SpanNot(Term("a", u("b")), Term("a", u("d"))),
+ SpanNot(Term("a", "b"), Term("a", "c")),
+ SpanNot(Term("a", "b"), Term("a", "d")),
)
do(
- SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("d"))]),
- SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("e"))]),
+ SpanOr([Term("a", "b"), Term("a", "c"), Term("a", "d")]),
+ SpanOr([Term("a", "b"), Term("a", "c"), Term("a", "e")]),
)
do(
- SpanContains(Term("a", u("b")), Term("a", u("c"))),
- SpanContains(Term("a", u("b")), Term("a", u("d"))),
+ SpanContains(Term("a", "b"), Term("a", "c")),
+ SpanContains(Term("a", "b"), Term("a", "d")),
)
# do(SpanBefore)
# do(SpanCondition)
def test_requires():
- a = Term("f", u("a"))
- b = Term("f", u("b"))
+ a = Term("f", "a")
+ b = Term("f", "b")
assert And([a, b]).requires() == {a, b}
assert Or([a, b]).requires() == set()
assert AndMaybe(a, b).requires() == {a}
@@ -409,15 +406,15 @@ def test_highlight_daterange():
w = ix.writer()
w.update_document(
- id=u("1"),
- title=u("Life Aquatic"),
- content=u("A nautic film crew sets out to kill a gigantic shark."),
+ id="1",
+ title="Life Aquatic",
+ content="A nautic film crew sets out to kill a gigantic shark.",
released=datetime(2004, 12, 25, tzinfo=timezone.utc),
)
w.update_document(
- id=u("2"),
- title=u("Darjeeling Limited"),
- content=u(
+ id="2",
+ title="Darjeeling Limited",
+ content=(
"Three brothers meet in India for a life changing train " + "journey."
),
released=datetime(2007, 10, 27, tzinfo=timezone.utc),
@@ -425,7 +422,7 @@ def test_highlight_daterange():
w.commit()
s = ix.searcher()
- r = s.search(Term("content", u("train")), terms=True)
+ r = s.search(Term("content", "train"), terms=True)
assert len(r) == 1
assert r[0]["id"] == "2"
assert (
@@ -439,7 +436,7 @@ def test_highlight_daterange():
def test_patterns():
- domain = u(
+ domain = (
"aaron able acre adage aether after ago ahi aim ajax akimbo "
"alembic all amiga amount ampere"
).split()
@@ -452,33 +449,33 @@ def test_patterns():
with ix.reader() as r:
assert list(r.field_terms("word")) == domain
- assert list(r.expand_prefix("word", "al")) == [b("alembic"), b("all")]
+ assert list(r.expand_prefix("word", "al")) == [b"alembic", b"all"]
q = query.Prefix("word", "al")
- assert q.simplify(r).__unicode__() == "(word:alembic OR word:all)"
+ assert str(q.simplify(r)) == "(word:alembic OR word:all)"
q = query.Wildcard("word", "a*[ae]")
assert (
- q.simplify(r).__unicode__()
+ str(q.simplify(r))
== "(word:able OR word:acre OR word:adage OR word:amiga OR word:ampere)"
)
assert q._find_prefix(q.text) == "a"
q = query.Regex("word", "am.*[ae]")
- assert q.simplify(r).__unicode__() == "(word:amiga OR word:ampere)"
+ assert str(q.simplify(r)) == "(word:amiga OR word:ampere)"
assert q._find_prefix(q.text) == "am"
q = query.Regex("word", "able|ago")
- assert q.simplify(r).__unicode__() == "(word:able OR word:ago)"
+ assert str(q.simplify(r)) == "(word:able OR word:ago)"
assert q._find_prefix(q.text) == ""
# special case: ? may mean "zero occurences"
q = query.Regex("word", "ah?i")
- assert q.simplify(r).__unicode__() == "(word:ahi OR word:aim)"
+ assert str(q.simplify(r)) == "(word:ahi OR word:aim)"
assert q._find_prefix(q.text) == "a"
# special case: * may mean "zero occurences"
q = query.Regex("word", "ah*i")
- assert q.simplify(r).__unicode__() == "(word:ahi OR word:aim)"
+ assert str(q.simplify(r)) == "(word:ahi OR word:aim)"
assert q._find_prefix(q.text) == "a"
@@ -488,7 +485,7 @@ def test_or_nots1():
st = RamStorage()
ix = st.create_index(schema)
with ix.writer() as w:
- w.add_document(a=u("alfa"), b=u("charlie"))
+ w.add_document(a="alfa", b="charlie")
with ix.searcher() as s:
q = query.And(
@@ -512,7 +509,7 @@ def test_or_nots2():
st = RamStorage()
ix = st.create_index(schema)
with ix.writer() as w:
- w.add_document(b=u("bravo"))
+ w.add_document(b="bravo")
with ix.searcher() as s:
q = query.Or([query.Term("a", "alfa"), query.Not(query.Term("b", "alfa"))])
@@ -526,9 +523,9 @@ def test_or_nots3():
)
with TempIndex(schema, "ornot") as ix:
w = ix.writer()
- w.add_document(title=u("a1"), itemtype=u("a"))
- w.add_document(title=u("a2"), itemtype=u("a"))
- w.add_document(title=u("b1"), itemtype=u("b"))
+ w.add_document(title="a1", itemtype="a")
+ w.add_document(title="a2", itemtype="a")
+ w.add_document(title="b1", itemtype="b")
w.commit()
q = Term("itemtype", "a") | Not(Term("itemtype", "a"))
@@ -544,16 +541,16 @@ def test_ornot_andnot():
ix = st.create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, a=u("word1 word1"))
- w.add_document(id=1, a=u("word1 word2"))
- w.add_document(id=2, a=u("word1 foo"))
- w.add_document(id=3, a=u("foo word2"))
- w.add_document(id=4, a=u("foo bar"))
+ w.add_document(id=0, a="word1 word1")
+ w.add_document(id=1, a="word1 word2")
+ w.add_document(id=2, a="word1 foo")
+ w.add_document(id=3, a="foo word2")
+ w.add_document(id=4, a="foo bar")
with ix.searcher() as s:
qp = qparser.QueryParser("a", ix.schema)
- q1 = qp.parse(u("NOT word1 NOT word2"))
- q2 = qp.parse(u("NOT (word1 OR word2)"))
+ q1 = qp.parse("NOT word1 NOT word2")
+ q2 = qp.parse("NOT (word1 OR word2)")
r1 = [hit["id"] for hit in s.search(q1, sortedby="id")]
r2 = [hit["id"] for hit in s.search(q2, sortedby="id")]
@@ -576,11 +573,11 @@ def test_issue_355():
with ix.searcher() as s:
# Passing a bytestring for a numeric field
- q = Term("seats", b("maker"))
+ q = Term("seats", b"maker")
r1 = [hit["seats"] for hit in s.search(q, limit=5)]
# Passing a unicode string for a numeric field
- q = Term("seats", u("maker"))
+ q = Term("seats", "maker")
r2 = [hit["seats"] for hit in s.search(q, limit=5)]
# Passing a value too large for the numeric field
@@ -594,16 +591,14 @@ def test_sequence():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, text=u("alfa bravo charlie delta echo"))
- w.add_document(id=1, text=u("bravo charlie delta echo alfa"))
- w.add_document(id=2, text=u("charlie delta echo bravo"))
- w.add_document(id=3, text=u("delta echo charlie"))
- w.add_document(id=4, text=u("echo delta"))
+ w.add_document(id=0, text="alfa bravo charlie delta echo")
+ w.add_document(id=1, text="bravo charlie delta echo alfa")
+ w.add_document(id=2, text="charlie delta echo bravo")
+ w.add_document(id=3, text="delta echo charlie")
+ w.add_document(id=4, text="echo delta")
with ix.searcher() as s:
- seq = query.Sequence(
- [query.Term("text", u("echo")), query.Term("text", u("alfa"))]
- )
+ seq = query.Sequence([query.Term("text", "echo"), query.Term("text", "alfa")])
q = query.And([query.Term("text", "bravo"), seq])
r = s.search(q, limit=4)
@@ -615,14 +610,14 @@ def test_andmaybe():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, text=u("alfa bravo charlie delta echo"))
- w.add_document(id=1, text=u("bravo charlie delta echo alfa"))
- w.add_document(id=2, text=u("charlie delta echo bravo"))
- w.add_document(id=3, text=u("delta echo charlie"))
- w.add_document(id=4, text=u("echo delta"))
+ w.add_document(id=0, text="alfa bravo charlie delta echo")
+ w.add_document(id=1, text="bravo charlie delta echo alfa")
+ w.add_document(id=2, text="charlie delta echo bravo")
+ w.add_document(id=3, text="delta echo charlie")
+ w.add_document(id=4, text="echo delta")
qp = qparser.QueryParser("text", schema)
- q = qp.parse(u('bravo ANDMAYBE "echo alfa"'))
+ q = qp.parse('bravo ANDMAYBE "echo alfa"')
with ix.searcher() as s:
r = s.search(q)
diff --git a/tests/test_reading.py b/tests/test_reading.py
index b7a4a9df..347edae9 100644
--- a/tests/test_reading.py
+++ b/tests/test_reading.py
@@ -4,7 +4,6 @@
import pytest
from whoosh import fields, formats, reading
-from whoosh.compat import b, u
from whoosh.filedb.filestore import RamStorage
from whoosh.reading import SegmentReader
from whoosh.util.testing import TempIndex
@@ -22,11 +21,11 @@ def _create_index():
def _one_segment_index():
ix = _create_index()
w = ix.writer()
- w.add_document(f1=u("A B C"), f2=u("1 2 3"), f3=u("X Y Z"))
- w.add_document(f1=u("D E F"), f2=u("4 5 6"), f3=u("Q R S"))
- w.add_document(f1=u("A E C"), f2=u("1 4 6"), f3=u("X Q S"))
- w.add_document(f1=u("A A A"), f2=u("2 3 5"), f3=u("Y R Z"))
- w.add_document(f1=u("A B"), f2=u("1 2"), f3=u("X Y"))
+ w.add_document(f1="A B C", f2="1 2 3", f3="X Y Z")
+ w.add_document(f1="D E F", f2="4 5 6", f3="Q R S")
+ w.add_document(f1="A E C", f2="1 4 6", f3="X Q S")
+ w.add_document(f1="A A A", f2="2 3 5", f3="Y R Z")
+ w.add_document(f1="A B", f2="1 2", f3="X Y")
w.commit()
return ix
@@ -35,17 +34,17 @@ def _one_segment_index():
def _multi_segment_index():
ix = _create_index()
w = ix.writer()
- w.add_document(f1=u("A B C"), f2=u("1 2 3"), f3=u("X Y Z"))
- w.add_document(f1=u("D E F"), f2=u("4 5 6"), f3=u("Q R S"))
+ w.add_document(f1="A B C", f2="1 2 3", f3="X Y Z")
+ w.add_document(f1="D E F", f2="4 5 6", f3="Q R S")
w.commit()
w = ix.writer()
- w.add_document(f1=u("A E C"), f2=u("1 4 6"), f3=u("X Q S"))
- w.add_document(f1=u("A A A"), f2=u("2 3 5"), f3=u("Y R Z"))
+ w.add_document(f1="A E C", f2="1 4 6", f3="X Q S")
+ w.add_document(f1="A A A", f2="2 3 5", f3="Y R Z")
w.commit(merge=False)
w = ix.writer()
- w.add_document(f1=u("A B"), f2=u("1 2"), f3=u("X Y"))
+ w.add_document(f1="A B", f2="1 2", f3="X Y")
w.commit(merge=False)
return ix
@@ -61,24 +60,24 @@ def _fstats(r):
def test_readers():
target = [
- ("f1", b("A"), 4, 6),
- ("f1", b("B"), 2, 2),
- ("f1", b("C"), 2, 2),
- ("f1", b("D"), 1, 1),
- ("f1", b("E"), 2, 2),
- ("f1", b("F"), 1, 1),
- ("f2", b("1"), 3, 3),
- ("f2", b("2"), 3, 3),
- ("f2", b("3"), 2, 2),
- ("f2", b("4"), 2, 2),
- ("f2", b("5"), 2, 2),
- ("f2", b("6"), 2, 2),
- ("f3", b("Q"), 2, 2),
- ("f3", b("R"), 2, 2),
- ("f3", b("S"), 2, 2),
- ("f3", b("X"), 3, 3),
- ("f3", b("Y"), 3, 3),
- ("f3", b("Z"), 2, 2),
+ ("f1", b"A", 4, 6),
+ ("f1", b"B", 2, 2),
+ ("f1", b"C", 2, 2),
+ ("f1", b"D", 1, 1),
+ ("f1", b"E", 2, 2),
+ ("f1", b"F", 1, 1),
+ ("f2", b"1", 3, 3),
+ ("f2", b"2", 3, 3),
+ ("f2", b"3", 2, 2),
+ ("f2", b"4", 2, 2),
+ ("f2", b"5", 2, 2),
+ ("f2", b"6", 2, 2),
+ ("f3", b"Q", 2, 2),
+ ("f3", b"R", 2, 2),
+ ("f3", b"S", 2, 2),
+ ("f3", b"X", 3, 3),
+ ("f3", b"Y", 3, 3),
+ ("f3", b"Z", 2, 2),
]
target = sorted(target)
@@ -109,11 +108,11 @@ def test_term_inspection():
with TempIndex(schema) as ix:
with ix.writer() as w:
w.add_document(
- title=u("My document"),
- content=u("AA AA BB BB CC AA AA AA BB BB CC DD EE EE"),
+ title="My document",
+ content="AA AA BB BB CC AA AA AA BB BB CC DD EE EE",
)
w.add_document(
- title=u("My other document"), content=u("AA AB BB CC EE EE AX AX DD")
+ title="My other document", content="AA AB BB CC EE EE AX AX DD"
)
with ix.reader() as r:
@@ -121,52 +120,52 @@ def test_term_inspection():
assert cterms == "aa ab ax bb cc dd ee"
a_exp = list(r.expand_prefix("content", "a"))
- assert a_exp == [b("aa"), b("ab"), b("ax")]
+ assert a_exp == [b"aa", b"ab", b"ax"]
assert set(r.all_terms()) == {
- ("content", b("aa")),
- ("content", b("ab")),
- ("content", b("ax")),
- ("content", b("bb")),
- ("content", b("cc")),
- ("content", b("dd")),
- ("content", b("ee")),
- ("title", b("document")),
- ("title", b("my")),
- ("title", b("other")),
+ ("content", b"aa"),
+ ("content", b"ab"),
+ ("content", b"ax"),
+ ("content", b"bb"),
+ ("content", b"cc"),
+ ("content", b"dd"),
+ ("content", b"ee"),
+ ("title", b"document"),
+ ("title", b"my"),
+ ("title", b"other"),
}
# (text, doc_freq, index_freq)
cstats = _fstats(r.iter_field("content"))
assert cstats == [
- (b("aa"), 2, 6),
- (b("ab"), 1, 1),
- (b("ax"), 1, 2),
- (b("bb"), 2, 5),
- (b("cc"), 2, 3),
- (b("dd"), 2, 2),
- (b("ee"), 2, 4),
+ (b"aa", 2, 6),
+ (b"ab", 1, 1),
+ (b"ax", 1, 2),
+ (b"bb", 2, 5),
+ (b"cc", 2, 3),
+ (b"dd", 2, 2),
+ (b"ee", 2, 4),
]
prestats = _fstats(r.iter_field("content", prefix="c"))
- assert prestats == [(b("cc"), 2, 3), (b("dd"), 2, 2), (b("ee"), 2, 4)]
+ assert prestats == [(b"cc", 2, 3), (b"dd", 2, 2), (b"ee", 2, 4)]
assert list(r.most_frequent_terms("content")) == [
- (6, b("aa")),
- (5, b("bb")),
- (4, b("ee")),
- (3, b("cc")),
- (2, b("dd")),
+ (6, b"aa"),
+ (5, b"bb"),
+ (4, b"ee"),
+ (3, b"cc"),
+ (2, b"dd"),
]
assert list(r.most_frequent_terms("content", prefix="a")) == [
- (6, b("aa")),
- (2, b("ax")),
- (1, b("ab")),
+ (6, b"aa"),
+ (2, b"ax"),
+ (1, b"ab"),
]
assert list(r.most_distinctive_terms("content", 3)) == [
- (1.3862943611198906, b("ax")),
- (0.6931471805599453, b("ab")),
- (0.0, b("ee")),
+ (1.3862943611198906, b"ax"),
+ (0.6931471805599453, b"ab"),
+ (0.0, b"ee"),
]
@@ -179,21 +178,19 @@ def test_vector_postings():
ix = st.create_index(s)
writer = ix.writer()
- writer.add_document(
- id=u("1"), content=u("the quick brown fox jumped over the " + "lazy dogs")
- )
+ writer.add_document(id="1", content="the quick brown fox jumped over the lazy dogs")
writer.commit()
r = ix.reader()
terms = list(r.vector_as("weight", 0, "content"))
assert terms == [
- (u("brown"), 1.0),
- (u("dogs"), 1.0),
- (u("fox"), 1.0),
- (u("jumped"), 1.0),
- (u("lazy"), 1.0),
- (u("over"), 1.0),
- (u("quick"), 1.0),
+ ("brown", 1.0),
+ ("dogs", 1.0),
+ ("fox", 1.0),
+ ("jumped", 1.0),
+ ("lazy", 1.0),
+ ("over", 1.0),
+ ("quick", 1.0),
]
@@ -208,17 +205,17 @@ def test_stored_fields():
ix = st.create_index(s)
writer = ix.writer()
- writer.add_document(a=u("1"), b="a", c=u("zulu"), d=u("Alfa"))
- writer.add_document(a=u("2"), b="b", c=u("yankee"), d=u("Bravo"))
- writer.add_document(a=u("3"), b="c", c=u("xray"), d=u("Charlie"))
+ writer.add_document(a="1", b="a", c="zulu", d="Alfa")
+ writer.add_document(a="2", b="b", c="yankee", d="Bravo")
+ writer.add_document(a="3", b="c", c="xray", d="Charlie")
writer.commit()
with ix.searcher() as sr:
- assert sr.stored_fields(0) == {"a": u("1"), "b": "a", "d": u("Alfa")}
- assert sr.stored_fields(2) == {"a": u("3"), "b": "c", "d": u("Charlie")}
+ assert sr.stored_fields(0) == {"a": "1", "b": "a", "d": "Alfa"}
+ assert sr.stored_fields(2) == {"a": "3", "b": "c", "d": "Charlie"}
- assert sr.document(a=u("1")) == {"a": u("1"), "b": "a", "d": u("Alfa")}
- assert sr.document(a=u("2")) == {"a": u("2"), "b": "b", "d": u("Bravo")}
+ assert sr.document(a="1") == {"a": "1", "b": "a", "d": "Alfa"}
+ assert sr.document(a="2") == {"a": "2", "b": "b", "d": "Bravo"}
def test_stored_fields2():
@@ -236,22 +233,22 @@ def test_stored_fields2():
writer = ix.writer()
writer.add_document(
- content=u("Content of this document."),
- title=u("This is the title"),
- summary=u("This is the summary"),
- path=u("/main"),
+ content="Content of this document.",
+ title="This is the title",
+ summary="This is the summary",
+ path="/main",
)
writer.add_document(
- content=u("Second document."),
- title=u("Second title"),
- summary=u("Summary numero due"),
- path=u("/second"),
+ content="Second document.",
+ title="Second title",
+ summary="Summary numero due",
+ path="/second",
)
writer.add_document(
- content=u("Third document."),
- title=u("Title 3"),
- summary=u("Summary treo"),
- path=u("/san"),
+ content="Third document.",
+ title="Title 3",
+ summary="Summary treo",
+ path="/san",
)
writer.commit()
@@ -275,10 +272,10 @@ def test_all_stored_fields():
schema = fields.Schema(a=fields.ID(stored=True), b=fields.STORED)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=u("alfa"), b=u("bravo"))
- w.add_document(a=u("apple"), b=u("bear"))
- w.add_document(a=u("alpaca"), b=u("beagle"))
- w.add_document(a=u("aim"), b=u("box"))
+ w.add_document(a="alfa", b="bravo")
+ w.add_document(a="apple", b="bear")
+ w.add_document(a="alpaca", b="beagle")
+ w.add_document(a="aim", b="box")
w = ix.writer()
w.delete_by_term("a", "apple")
@@ -297,37 +294,37 @@ def test_first_id():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(path=u("/a"))
- w.add_document(path=u("/b"))
- w.add_document(path=u("/c"))
+ w.add_document(path="/a")
+ w.add_document(path="/b")
+ w.add_document(path="/c")
w.commit()
r = ix.reader()
- docid = r.first_id("path", u("/b"))
+ docid = r.first_id("path", "/b")
assert r.stored_fields(docid) == {"path": "/b"}
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(path=u("/a"))
- w.add_document(path=u("/b"))
- w.add_document(path=u("/c"))
+ w.add_document(path="/a")
+ w.add_document(path="/b")
+ w.add_document(path="/c")
w.commit(merge=False)
w = ix.writer()
- w.add_document(path=u("/d"))
- w.add_document(path=u("/e"))
- w.add_document(path=u("/f"))
+ w.add_document(path="/d")
+ w.add_document(path="/e")
+ w.add_document(path="/f")
w.commit(merge=False)
w = ix.writer()
- w.add_document(path=u("/g"))
- w.add_document(path=u("/h"))
- w.add_document(path=u("/i"))
+ w.add_document(path="/g")
+ w.add_document(path="/h")
+ w.add_document(path="/i")
w.commit(merge=False)
r = ix.reader()
assert r.__class__ == reading.MultiReader
- docid = r.first_id("path", u("/e"))
+ docid = r.first_id("path", "/e")
assert r.stored_fields(docid) == {"path": "/e"}
with pytest.raises(NotImplementedError):
@@ -346,7 +343,7 @@ def run(self):
class RecoverWriter(threading.Thread):
- domain = u("alfa bravo charlie deleta echo foxtrot golf hotel india")
+ domain = "alfa bravo charlie deleta echo foxtrot golf hotel india"
domain = domain.split()
def __init__(self, ix):
@@ -375,9 +372,9 @@ def test_delete_recovery():
def test_nonexclusive_read():
schema = fields.Schema(text=fields.TEXT)
with TempIndex(schema, "readlock") as ix:
- for num in u("one two three four five").split():
+ for num in "one two three four five".split():
w = ix.writer()
- w.add_document(text=u("Test document %s") % num)
+ w.add_document(text=f"Test document {num}")
w.commit(merge=False)
def fn():
@@ -500,54 +497,54 @@ def _check_inspection_results(ix):
assert cterms == "aa aé aú bb cc dd ee"
a_exp = list(r.expand_prefix("content", "a"))
- assert a_exp == [b("aa"), AE, AU]
+ assert a_exp == [b"aa", AE, AU]
tset = set(r.all_terms())
assert tset == {
- ("content", b("aa")),
+ ("content", b"aa"),
("content", AE),
("content", AU),
- ("content", b("bb")),
- ("content", b("cc")),
- ("content", b("dd")),
- ("content", b("ee")),
- ("title", b("document")),
- ("title", b("my")),
- ("title", b("other")),
+ ("content", b"bb"),
+ ("content", b"cc"),
+ ("content", b"dd"),
+ ("content", b"ee"),
+ ("title", b"document"),
+ ("title", b"my"),
+ ("title", b"other"),
}
# (text, doc_freq, index_freq)
assert _fstats(r.iter_field("content")) == [
- (b("aa"), 2, 6),
+ (b"aa", 2, 6),
(AE, 1, 1),
(AU, 1, 2),
- (b("bb"), 2, 5),
- (b("cc"), 2, 3),
- (b("dd"), 2, 2),
- (b("ee"), 2, 4),
+ (b"bb", 2, 5),
+ (b"cc", 2, 3),
+ (b"dd", 2, 2),
+ (b"ee", 2, 4),
]
assert _fstats(r.iter_field("content", prefix="c")) == [
- (b("cc"), 2, 3),
- (b("dd"), 2, 2),
- (b("ee"), 2, 4),
+ (b"cc", 2, 3),
+ (b"dd", 2, 2),
+ (b"ee", 2, 4),
]
assert list(r.most_frequent_terms("content")) == [
- (6, b("aa")),
- (5, b("bb")),
- (4, b("ee")),
- (3, b("cc")),
- (2, b("dd")),
+ (6, b"aa"),
+ (5, b"bb"),
+ (4, b"ee"),
+ (3, b"cc"),
+ (2, b"dd"),
]
assert list(r.most_frequent_terms("content", prefix="a")) == [
- (6, b("aa")),
+ (6, b"aa"),
(2, AU),
(1, AE),
]
assert list(r.most_distinctive_terms("content", 3)) == [
(1.3862943611198906, AU),
(0.6931471805599453, AE),
- (0.0, b("ee")),
+ (0.0, b"ee"),
]
diff --git a/tests/test_results.py b/tests/test_results.py
index 3494ea15..6a586fe1 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -1,7 +1,8 @@
+from itertools import permutations
+
import pytest
from whoosh import analysis, fields, formats, highlight, qparser, query
from whoosh.codec.whoosh3 import W3Codec
-from whoosh.compat import permutations, text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex, TempStorage
@@ -14,20 +15,20 @@ def test_score_retrieval():
ix = storage.create_index(schema)
writer = ix.writer()
writer.add_document(
- title=u("Miss Mary"),
- content=u("Mary had a little white lamb its fleece" " was white as snow"),
+ title="Miss Mary",
+ content="Mary had a little white lamb its fleece was white as snow",
)
writer.add_document(
- title=u("Snow White"),
- content=u("Snow white lived in the forest with seven" " dwarfs"),
+ title="Snow White",
+ content="Snow white lived in the forest with seven dwarfs",
)
writer.commit()
with ix.searcher() as s:
results = s.search(query.Term("content", "white"))
assert len(results) == 2
- assert results[0]["title"] == u("Miss Mary")
- assert results[1]["title"] == u("Snow White")
+ assert results[0]["title"] == "Miss Mary"
+ assert results[1]["title"] == "Snow White"
assert results.score(0) is not None
assert results.score(0) != 0
assert results.score(0) != 1
@@ -39,14 +40,14 @@ def test_resultcopy():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(a=u("alfa bravo charlie"))
- w.add_document(a=u("bravo charlie delta"))
- w.add_document(a=u("charlie delta echo"))
- w.add_document(a=u("delta echo foxtrot"))
+ w.add_document(a="alfa bravo charlie")
+ w.add_document(a="bravo charlie delta")
+ w.add_document(a="charlie delta echo")
+ w.add_document(a="delta echo foxtrot")
w.commit()
with ix.searcher() as s:
- r = s.search(qparser.QueryParser("a", None).parse(u("charlie")))
+ r = s.search(qparser.QueryParser("a", None).parse("charlie"))
assert len(r) == 3
rcopy = r.copy()
assert r.top_n == rcopy.top_n
@@ -57,16 +58,16 @@ def test_resultslength():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), value=u("alfa alfa alfa alfa alfa"))
- w.add_document(id=u("2"), value=u("alfa alfa alfa alfa"))
- w.add_document(id=u("3"), value=u("alfa alfa alfa"))
- w.add_document(id=u("4"), value=u("alfa alfa"))
- w.add_document(id=u("5"), value=u("alfa"))
- w.add_document(id=u("6"), value=u("bravo"))
+ w.add_document(id="1", value="alfa alfa alfa alfa alfa")
+ w.add_document(id="2", value="alfa alfa alfa alfa")
+ w.add_document(id="3", value="alfa alfa alfa")
+ w.add_document(id="4", value="alfa alfa")
+ w.add_document(id="5", value="alfa")
+ w.add_document(id="6", value="bravo")
w.commit()
with ix.searcher() as s:
- r = s.search(query.Term("value", u("alfa")), limit=3)
+ r = s.search(query.Term("value", "alfa"), limit=3)
assert len(r) == 5
assert r.scored_length() == 3
assert r[10:] == []
@@ -76,14 +77,14 @@ def test_combine():
schema = fields.Schema(id=fields.ID(stored=True), value=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), value=u("alfa bravo charlie all"))
- w.add_document(id=u("2"), value=u("bravo charlie echo all"))
- w.add_document(id=u("3"), value=u("charlie echo foxtrot all"))
- w.add_document(id=u("4"), value=u("echo foxtrot india all"))
- w.add_document(id=u("5"), value=u("foxtrot india juliet all"))
- w.add_document(id=u("6"), value=u("india juliet alfa all"))
- w.add_document(id=u("7"), value=u("juliet alfa bravo all"))
- w.add_document(id=u("8"), value=u("charlie charlie charlie all"))
+ w.add_document(id="1", value="alfa bravo charlie all")
+ w.add_document(id="2", value="bravo charlie echo all")
+ w.add_document(id="3", value="charlie echo foxtrot all")
+ w.add_document(id="4", value="echo foxtrot india all")
+ w.add_document(id="5", value="foxtrot india juliet all")
+ w.add_document(id="6", value="india juliet alfa all")
+ w.add_document(id="7", value="juliet alfa bravo all")
+ w.add_document(id="8", value="charlie charlie charlie all")
w.commit()
with ix.searcher() as s:
@@ -98,26 +99,26 @@ def check(r1, methodname, r2, ids):
def rfor(t):
return s.search(query.Term("value", t))
- assert idsof(rfor(u("foxtrot"))) == "345"
- check(rfor(u("foxtrot")), "extend", rfor("charlie"), "345812")
- check(rfor(u("foxtrot")), "filter", rfor("juliet"), "5")
- check(rfor(u("charlie")), "filter", rfor("foxtrot"), "3")
- check(rfor(u("all")), "filter", rfor("foxtrot"), "345")
- check(rfor(u("all")), "upgrade", rfor("india"), "45612378")
- check(rfor(u("charlie")), "upgrade_and_extend", rfor("echo"), "23814")
+ assert idsof(rfor("foxtrot")) == "345"
+ check(rfor("foxtrot"), "extend", rfor("charlie"), "345812")
+ check(rfor("foxtrot"), "filter", rfor("juliet"), "5")
+ check(rfor("charlie"), "filter", rfor("foxtrot"), "3")
+ check(rfor("all"), "filter", rfor("foxtrot"), "345")
+ check(rfor("all"), "upgrade", rfor("india"), "45612378")
+ check(rfor("charlie"), "upgrade_and_extend", rfor("echo"), "23814")
def test_results_filter():
schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id="1", words=u("bravo top"))
- w.add_document(id="2", words=u("alfa top"))
- w.add_document(id="3", words=u("alfa top"))
- w.add_document(id="4", words=u("alfa bottom"))
- w.add_document(id="5", words=u("bravo bottom"))
- w.add_document(id="6", words=u("charlie bottom"))
- w.add_document(id="7", words=u("charlie bottom"))
+ w.add_document(id="1", words="bravo top")
+ w.add_document(id="2", words="alfa top")
+ w.add_document(id="3", words="alfa top")
+ w.add_document(id="4", words="alfa bottom")
+ w.add_document(id="5", words="bravo bottom")
+ w.add_document(id="6", words="charlie bottom")
+ w.add_document(id="7", words="charlie bottom")
w.commit()
with ix.searcher() as s:
@@ -126,8 +127,8 @@ def check(r, target):
result = "".join(s.stored_fields(d)["id"] for d in r.docs())
assert result == target
- r = s.search(query.Term("words", u("alfa")))
- r.filter(s.search(query.Term("words", u("bottom"))))
+ r = s.search(query.Term("words", "alfa"))
+ r.filter(s.search(query.Term("words", "bottom")))
check(r, "4")
@@ -176,20 +177,20 @@ def test_extend_empty():
schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, words=u("alfa bravo charlie"))
- w.add_document(id=2, words=u("bravo charlie delta"))
- w.add_document(id=3, words=u("charlie delta echo"))
- w.add_document(id=4, words=u("delta echo foxtrot"))
- w.add_document(id=5, words=u("echo foxtrot golf"))
+ w.add_document(id=1, words="alfa bravo charlie")
+ w.add_document(id=2, words="bravo charlie delta")
+ w.add_document(id=3, words="charlie delta echo")
+ w.add_document(id=4, words="delta echo foxtrot")
+ w.add_document(id=5, words="echo foxtrot golf")
w.commit()
with ix.searcher() as s:
# Get an empty results object
- r1 = s.search(query.Term("words", u("hotel")))
+ r1 = s.search(query.Term("words", "hotel"))
# Copy it
r1c = r1.copy()
# Get a non-empty results object
- r2 = s.search(query.Term("words", u("delta")))
+ r2 = s.search(query.Term("words", "delta"))
# Copy it
r2c = r2.copy()
# Extend r1 with r2
@@ -202,21 +203,21 @@ def test_extend_filtered():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, text=u("alfa bravo charlie"))
- w.add_document(id=2, text=u("bravo charlie delta"))
- w.add_document(id=3, text=u("juliet delta echo"))
- w.add_document(id=4, text=u("delta bravo alfa"))
- w.add_document(id=5, text=u("foxtrot sierra tango"))
+ w.add_document(id=1, text="alfa bravo charlie")
+ w.add_document(id=2, text="bravo charlie delta")
+ w.add_document(id=3, text="juliet delta echo")
+ w.add_document(id=4, text="delta bravo alfa")
+ w.add_document(id=5, text="foxtrot sierra tango")
w.commit()
hits = lambda result: [hit["id"] for hit in result]
with ix.searcher() as s:
- r1 = s.search(query.Term("text", u("alfa")), filter={1, 4})
+ r1 = s.search(query.Term("text", "alfa"), filter={1, 4})
assert r1.allowed == {1, 4}
assert len(r1.top_n) == 0
- r2 = s.search(query.Term("text", u("bravo")))
+ r2 = s.search(query.Term("text", "bravo"))
assert len(r2.top_n) == 3
assert hits(r2) == [1, 2, 4]
@@ -235,16 +236,16 @@ def test_pages():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), c=u("alfa alfa alfa alfa alfa alfa"))
- w.add_document(id=u("2"), c=u("alfa alfa alfa alfa alfa"))
- w.add_document(id=u("3"), c=u("alfa alfa alfa alfa"))
- w.add_document(id=u("4"), c=u("alfa alfa alfa"))
- w.add_document(id=u("5"), c=u("alfa alfa"))
- w.add_document(id=u("6"), c=u("alfa"))
+ w.add_document(id="1", c="alfa alfa alfa alfa alfa alfa")
+ w.add_document(id="2", c="alfa alfa alfa alfa alfa")
+ w.add_document(id="3", c="alfa alfa alfa alfa")
+ w.add_document(id="4", c="alfa alfa alfa")
+ w.add_document(id="5", c="alfa alfa")
+ w.add_document(id="6", c="alfa")
w.commit()
with ix.searcher(weighting=Frequency) as s:
- q = query.Term("c", u("alfa"))
+ q = query.Term("c", "alfa")
r = s.search(q)
assert [d["id"] for d in r] == ["1", "2", "3", "4", "5", "6"]
r = s.search_page(q, 2, pagelen=2)
@@ -263,17 +264,17 @@ def test_pages_with_filter():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), type=u("odd"), c=u("alfa alfa alfa alfa alfa alfa"))
- w.add_document(id=u("2"), type=u("even"), c=u("alfa alfa alfa alfa alfa"))
- w.add_document(id=u("3"), type=u("odd"), c=u("alfa alfa alfa alfa"))
- w.add_document(id=u("4"), type=u("even"), c=u("alfa alfa alfa"))
- w.add_document(id=u("5"), type=u("odd"), c=u("alfa alfa"))
- w.add_document(id=u("6"), type=u("even"), c=u("alfa"))
+ w.add_document(id="1", type="odd", c="alfa alfa alfa alfa alfa alfa")
+ w.add_document(id="2", type="even", c="alfa alfa alfa alfa alfa")
+ w.add_document(id="3", type="odd", c="alfa alfa alfa alfa")
+ w.add_document(id="4", type="even", c="alfa alfa alfa")
+ w.add_document(id="5", type="odd", c="alfa alfa")
+ w.add_document(id="6", type="even", c="alfa")
w.commit()
with ix.searcher(weighting=Frequency) as s:
- q = query.Term("c", u("alfa"))
- filterq = query.Term("type", u("even"))
+ q = query.Term("c", "alfa")
+ filterq = query.Term("type", "even")
r = s.search(q, filter=filterq)
assert [d["id"] for d in r] == ["2", "4", "6"]
r = s.search_page(q, 2, pagelen=2, filter=filterq)
@@ -284,7 +285,7 @@ def test_extra_slice():
schema = fields.Schema(key=fields.ID(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- for char in u("abcdefghijklmnopqrstuvwxyz"):
+ for char in "abcdefghijklmnopqrstuvwxyz":
w.add_document(key=char)
w.commit()
@@ -302,7 +303,7 @@ def test_page_counts():
w = ix.writer()
for i in range(10):
- w.add_document(id=text_type(i))
+ w.add_document(id=str(i))
w.commit()
with ix.searcher(weighting=Frequency) as s:
@@ -340,11 +341,11 @@ def test_resultspage():
domain = ("alfa", "bravo", "bravo", "charlie", "delta")
w = ix.writer()
for i, lst in enumerate(permutations(domain, 3)):
- w.add_document(id=text_type(i), content=u(" ").join(lst))
+ w.add_document(id=str(i), content=" ".join(lst))
w.commit()
with ix.searcher() as s:
- q = query.Term("content", u("bravo"))
+ q = query.Term("content", "bravo")
r = s.search(q, limit=10)
tops = list(r)
@@ -376,7 +377,7 @@ def test_highlight_setters():
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(text=u("Hello"))
+ w.add_document(text="Hello")
w.commit()
r = ix.searcher().search(query.Term("text", "hello"))
@@ -393,27 +394,27 @@ def test_snippets():
ix = RamStorage().create_index(schema)
w = ix.writer()
w.add_document(
- text=u(
+ text=(
"Lay out the rough animation by creating the important poses where they occur on the timeline."
)
)
w.add_document(
- text=u(
+ text=(
"Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key."
)
)
w.add_document(
- text=u(
+ text=(
"Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses."
)
)
w.add_document(
- text=u(
+ text=(
"Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters."
)
)
w.add_document(
- text=u(
+ text=(
"Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame."
)
)
@@ -427,7 +428,7 @@ def test_snippets():
with ix.searcher() as s:
qp = qparser.QueryParser("text", ix.schema)
- q = qp.parse(u("key"))
+ q = qp.parse("key")
r = s.search(q, terms=True)
r.fragmenter = highlight.SentenceFragmenter()
r.formatter = highlight.UppercaseFormatter()
@@ -444,17 +445,17 @@ def test_keyterms():
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(path=u("a"), content=u("This is some generic content"))
- w.add_document(path=u("b"), content=u("This is some distinctive content"))
+ w.add_document(path="a", content="This is some generic content")
+ w.add_document(path="b", content="This is some distinctive content")
w.commit()
with ix.searcher() as s:
- docnum = s.document_number(path=u("b"))
+ docnum = s.document_number(path="b")
keyterms = list(s.key_terms([docnum], "content"))
assert len(keyterms) > 0
assert keyterms[0][0] == "distinctive"
- r = s.search(query.Term("path", u("b")))
+ r = s.search(query.Term("path", "b"))
keyterms2 = list(r.key_terms("content"))
assert len(keyterms2) > 0
assert keyterms2[0][0] == "distinctive"
@@ -465,20 +466,18 @@ def test_lengths():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, text=u("alfa bravo charlie delta echo"))
- w.add_document(id=2, text=u("bravo charlie delta echo foxtrot"))
- w.add_document(id=3, text=u("charlie needle echo foxtrot golf"))
- w.add_document(id=4, text=u("delta echo foxtrot golf hotel"))
- w.add_document(id=5, text=u("echo needle needle hotel india"))
- w.add_document(id=6, text=u("foxtrot golf hotel india juliet"))
- w.add_document(id=7, text=u("golf needle india juliet kilo"))
- w.add_document(id=8, text=u("hotel india juliet needle lima"))
+ w.add_document(id=1, text="alfa bravo charlie delta echo")
+ w.add_document(id=2, text="bravo charlie delta echo foxtrot")
+ w.add_document(id=3, text="charlie needle echo foxtrot golf")
+ w.add_document(id=4, text="delta echo foxtrot golf hotel")
+ w.add_document(id=5, text="echo needle needle hotel india")
+ w.add_document(id=6, text="foxtrot golf hotel india juliet")
+ w.add_document(id=7, text="golf needle india juliet kilo")
+ w.add_document(id=8, text="hotel india juliet needle lima")
w.commit()
with ix.searcher() as s:
- q = query.Or(
- [query.Term("text", u("needle")), query.Term("text", u("charlie"))]
- )
+ q = query.Or([query.Term("text", "needle"), query.Term("text", "charlie")])
r = s.search(q, limit=2)
assert not r.has_exact_length()
assert r.estimated_length() == 7
@@ -493,14 +492,14 @@ def test_lengths2():
count = 0
for _ in range(3):
w = ix.writer()
- for ls in permutations(u("alfa bravo charlie").split()):
+ for ls in permutations("alfa bravo charlie".split()):
if "bravo" in ls and "charlie" in ls:
count += 1
- w.add_document(text=u(" ").join(ls))
+ w.add_document(text=" ".join(ls))
w.commit(merge=False)
with ix.searcher() as s:
- q = query.Or([query.Term("text", u("bravo")), query.Term("text", u("charlie"))])
+ q = query.Or([query.Term("text", "bravo"), query.Term("text", "charlie")])
r = s.search(q, limit=None)
assert len(r) == count
@@ -511,16 +510,16 @@ def test_lengths2():
def test_stability():
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
- domain = u("alfa bravo charlie delta").split()
+ domain = "alfa bravo charlie delta".split()
w = ix.writer()
for ls in permutations(domain, 3):
- w.add_document(text=u(" ").join(ls))
+ w.add_document(text=" ".join(ls))
w.commit()
with ix.searcher() as s:
- q = query.Term("text", u("bravo"))
+ q = query.Term("text", "bravo")
last = []
- for i in range(s.doc_frequency("text", u("bravo"))):
+ for i in range(s.doc_frequency("text", "bravo")):
# Only un-optimized results are stable
r = s.search(q, limit=i + 1, optimize=False)
docnums = [hit.docnum for hit in r]
@@ -532,14 +531,14 @@ def test_terms():
schema = fields.Schema(text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(text=u("alfa sierra tango"))
- w.add_document(text=u("bravo charlie delta"))
- w.add_document(text=u("charlie delta echo"))
- w.add_document(text=u("delta echo foxtrot"))
+ w.add_document(text="alfa sierra tango")
+ w.add_document(text="bravo charlie delta")
+ w.add_document(text="charlie delta echo")
+ w.add_document(text="delta echo foxtrot")
w.commit()
qp = qparser.QueryParser("text", ix.schema)
- q = qp.parse(u("(bravo AND charlie) OR foxtrot OR missing"))
+ q = qp.parse("(bravo AND charlie) OR foxtrot OR missing")
r = ix.searcher().search(q, terms=True)
fieldobj = schema["text"]
@@ -559,7 +558,7 @@ def test_hit_column():
schema = fields.Schema(text=fields.TEXT())
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo charlie"))
+ w.add_document(text="alfa bravo charlie")
with ix.searcher() as s:
r = s.search(query.Term("text", "alfa"))
@@ -572,13 +571,13 @@ def test_hit_column():
schema = fields.Schema(text=fields.TEXT(sortable=True))
ix = RamStorage().create_index(schema)
with ix.writer(codec=W3Codec()) as w:
- w.add_document(text=u("alfa bravo charlie"))
+ w.add_document(text="alfa bravo charlie")
with ix.searcher() as s:
r = s.search(query.Term("text", "alfa"))
assert len(r) == 1
hit = r[0]
- assert hit["text"] == u("alfa bravo charlie")
+ assert hit["text"] == "alfa bravo charlie"
def test_closed_searcher():
@@ -619,35 +618,35 @@ def test_paged_highlights():
schema = fields.Schema(text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo charlie delta echo foxtrot"))
- w.add_document(text=u("bravo charlie delta echo foxtrot golf"))
- w.add_document(text=u("charlie delta echo foxtrot golf hotel"))
- w.add_document(text=u("delta echo foxtrot golf hotel india"))
- w.add_document(text=u("echo foxtrot golf hotel india juliet"))
- w.add_document(text=u("foxtrot golf hotel india juliet kilo"))
+ w.add_document(text="alfa bravo charlie delta echo foxtrot")
+ w.add_document(text="bravo charlie delta echo foxtrot golf")
+ w.add_document(text="charlie delta echo foxtrot golf hotel")
+ w.add_document(text="delta echo foxtrot golf hotel india")
+ w.add_document(text="echo foxtrot golf hotel india juliet")
+ w.add_document(text="foxtrot golf hotel india juliet kilo")
with ix.searcher() as s:
- q = query.Term("text", u("alfa"))
+ q = query.Term("text", "alfa")
page = s.search_page(q, 1, pagelen=3)
page.results.fragmenter = highlight.WholeFragmenter()
page.results.formatter = highlight.UppercaseFormatter()
hi = page[0].highlights("text")
- assert hi == u("ALFA bravo charlie delta echo foxtrot")
+ assert hi == "ALFA bravo charlie delta echo foxtrot"
def test_phrase_keywords():
schema = fields.Schema(text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo charlie delta"))
- w.add_document(text=u("bravo charlie delta echo"))
- w.add_document(text=u("charlie delta echo foxtrot"))
- w.add_document(text=u("delta echo foxtrot alfa"))
- w.add_document(text=u("echo foxtrot alfa bravo"))
+ w.add_document(text="alfa bravo charlie delta")
+ w.add_document(text="bravo charlie delta echo")
+ w.add_document(text="charlie delta echo foxtrot")
+ w.add_document(text="delta echo foxtrot alfa")
+ w.add_document(text="echo foxtrot alfa bravo")
with ix.searcher() as s:
- q = query.Phrase("text", u("alfa bravo").split())
+ q = query.Phrase("text", "alfa bravo".split())
r = s.search(q)
assert len(r) == 2
kts = " ".join(t for t, score in r.key_terms("text"))
@@ -658,8 +657,8 @@ def test_every_keywords():
schema = fields.Schema(title=fields.TEXT, content=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(title=u("alfa"), content=u("bravo"))
- w.add_document(title=u("charlie"), content=u("delta"))
+ w.add_document(title="alfa", content="bravo")
+ w.add_document(title="charlie", content="delta")
with ix.searcher() as s:
q = qparser.QueryParser("content", ix.schema).parse("*")
@@ -678,10 +677,10 @@ def test_filter_by_result():
)
with TempIndex(schema, "filter") as ix:
- words = u("foo bar baz qux barney").split()
+ words = "foo bar baz qux barney".split()
with ix.writer() as w:
for x in range(100):
- t = u("even" if x % 2 == 0 else "odd")
+ t = "even" if x % 2 == 0 else "odd"
c = words[x % len(words)]
w.add_document(title=t, content=c)
diff --git a/tests/test_searching.py b/tests/test_searching.py
index aa8410c0..4caaf95b 100644
--- a/tests/test_searching.py
+++ b/tests/test_searching.py
@@ -1,10 +1,10 @@
import copy
from datetime import datetime, timedelta, timezone
+from itertools import permutations, zip_longest
import pytest
from whoosh import analysis, fields, index, qparser, query, scoring
from whoosh.codec.whoosh3 import W3Codec
-from whoosh.compat import b, izip_longest, permutations, text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
@@ -15,16 +15,12 @@ def make_index():
ix = st.create_index(s)
w = ix.writer()
+ w.add_document(key="A", name="Yellow brown", value="Blue red green render purple?")
+ w.add_document(key="B", name="Alpha beta", value="Gamma delta epsilon omega.")
+ w.add_document(key="C", name="One two", value="Three rendered four five.")
+ w.add_document(key="D", name="Quick went", value="Every red town.")
w.add_document(
- key=u("A"), name=u("Yellow brown"), value=u("Blue red green render purple?")
- )
- w.add_document(
- key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega.")
- )
- w.add_document(key=u("C"), name=u("One two"), value=u("Three rendered four five."))
- w.add_document(key=u("D"), name=u("Quick went"), value=u("Every red town."))
- w.add_document(
- key=u("E"), name=u("Yellow uptown"), value=u("Interest rendering outer photo!")
+ key="E", name="Yellow uptown", value="Interest rendering outer photo!"
)
w.commit()
@@ -61,43 +57,43 @@ def test_docs_method():
def test_term():
- _run_query(query.Term("name", u("yellow")), [u("A"), u("E")])
- _run_query(query.Term("value", u("zeta")), [])
- _run_query(query.Term("value", u("red")), [u("A"), u("D")])
+ _run_query(query.Term("name", "yellow"), ["A", "E"])
+ _run_query(query.Term("value", "zeta"), [])
+ _run_query(query.Term("value", "red"), ["A", "D"])
def test_require():
_run_query(
- query.Require(query.Term("value", u("red")), query.Term("name", u("yellow"))),
- [u("A")],
+ query.Require(query.Term("value", "red"), query.Term("name", "yellow")),
+ ["A"],
)
def test_and():
_run_query(
- query.And([query.Term("value", u("red")), query.Term("name", u("yellow"))]),
- [u("A")],
+ query.And([query.Term("value", "red"), query.Term("name", "yellow")]),
+ ["A"],
)
# Missing
_run_query(
- query.And([query.Term("value", u("ochre")), query.Term("name", u("glonk"))]), []
+ query.And([query.Term("value", "ochre"), query.Term("name", "glonk")]), []
)
def test_or():
_run_query(
- query.Or([query.Term("value", u("red")), query.Term("name", u("yellow"))]),
- [u("A"), u("D"), u("E")],
+ query.Or([query.Term("value", "red"), query.Term("name", "yellow")]),
+ ["A", "D", "E"],
)
# Missing
_run_query(
- query.Or([query.Term("value", u("ochre")), query.Term("name", u("glonk"))]), []
+ query.Or([query.Term("value", "ochre"), query.Term("name", "glonk")]), []
)
_run_query(query.Or([]), [])
def test_ors():
- domain = u("alfa bravo charlie delta").split()
+ domain = "alfa bravo charlie delta".split()
s = fields.Schema(num=fields.STORED, text=fields.TEXT)
st = RamStorage()
ix = st.create_index(s)
@@ -114,7 +110,7 @@ def test_ors():
q.binary_matcher = True
r2 = [(hit.docnum, hit.score) for hit in s.search(q, limit=None)]
- for item1, item2 in izip_longest(r1, r2):
+ for item1, item2 in zip_longest(r1, r2):
assert item1[0] == item2[0]
assert item1[1] == item2[1]
@@ -123,25 +119,23 @@ def test_not():
_run_query(
query.And(
[
- query.Or(
- [query.Term("value", u("red")), query.Term("name", u("yellow"))]
- ),
- query.Not(query.Term("name", u("quick"))),
+ query.Or([query.Term("value", "red"), query.Term("name", "yellow")]),
+ query.Not(query.Term("name", "quick")),
]
),
- [u("A"), u("E")],
+ ["A", "E"],
)
def test_topnot():
- _run_query(query.Not(query.Term("value", "red")), [u("B"), "C", "E"])
- _run_query(query.Not(query.Term("name", "yellow")), [u("B"), u("C"), u("D")])
+ _run_query(query.Not(query.Term("value", "red")), ["B", "C", "E"])
+ _run_query(query.Not(query.Term("name", "yellow")), ["B", "C", "D"])
def test_andnot():
_run_query(
- query.AndNot(query.Term("name", u("yellow")), query.Term("value", u("purple"))),
- [u("E")],
+ query.AndNot(query.Term("name", "yellow"), query.Term("value", "purple")),
+ ["E"],
)
@@ -149,24 +143,24 @@ def test_andnot2():
schema = fields.Schema(a=fields.ID(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(a=u("bravo"))
- w.add_document(a=u("echo"))
- w.add_document(a=u("juliet"))
+ w.add_document(a="bravo")
+ w.add_document(a="echo")
+ w.add_document(a="juliet")
w.commit()
w = ix.writer()
- w.add_document(a=u("kilo"))
- w.add_document(a=u("foxtrot"))
- w.add_document(a=u("charlie"))
+ w.add_document(a="kilo")
+ w.add_document(a="foxtrot")
+ w.add_document(a="charlie")
w.commit(merge=False)
w = ix.writer()
- w.delete_by_term("a", u("echo"))
- w.add_document(a=u("alfa"))
- w.add_document(a=u("india"))
- w.add_document(a=u("delta"))
+ w.delete_by_term("a", "echo")
+ w.add_document(a="alfa")
+ w.add_document(a="india")
+ w.add_document(a="delta")
w.commit(merge=False)
with ix.searcher() as s:
- q = query.TermRange("a", u("bravo"), u("k"))
+ q = query.TermRange("a", "bravo", "k")
qr = [hit["a"] for hit in s.search(q)]
assert " ".join(sorted(qr)) == "bravo charlie delta foxtrot india juliet"
@@ -183,15 +177,15 @@ def test_andnot2():
def test_variations():
- _run_query(query.Variations("value", u("render")), [u("A"), u("C"), u("E")])
+ _run_query(query.Variations("value", "render"), ["A", "C", "E"])
def test_wildcard():
_run_query(
query.Or(
- [query.Wildcard("value", u("*red*")), query.Wildcard("name", u("*yellow*"))]
+ [query.Wildcard("value", "*red*"), query.Wildcard("name", "*yellow*")]
),
- [u("A"), u("C"), u("D"), u("E")],
+ ["A", "C", "D", "E"],
)
# Missing
_run_query(query.Wildcard("value", "glonk*"), [])
@@ -202,11 +196,11 @@ def test_not2():
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
- writer.add_document(name=u("a"), value=u("alfa bravo charlie delta echo"))
- writer.add_document(name=u("b"), value=u("bravo charlie delta echo foxtrot"))
- writer.add_document(name=u("c"), value=u("charlie delta echo foxtrot golf"))
- writer.add_document(name=u("d"), value=u("delta echo golf hotel india"))
- writer.add_document(name=u("e"), value=u("echo golf hotel india juliet"))
+ writer.add_document(name="a", value="alfa bravo charlie delta echo")
+ writer.add_document(name="b", value="bravo charlie delta echo foxtrot")
+ writer.add_document(name="c", value="charlie delta echo foxtrot golf")
+ writer.add_document(name="d", value="delta echo golf hotel india")
+ writer.add_document(name="e", value="echo golf hotel india juliet")
writer.commit()
with ix.searcher() as s:
@@ -217,7 +211,7 @@ def test_not2():
results = s.search(p.parse("echo NOT bravo"))
assert sorted([d["name"] for d in results]) == ["c", "d", "e"]
- ix.delete_by_term("value", u("bravo"))
+ ix.delete_by_term("value", "bravo")
with ix.searcher() as s:
results = s.search(p.parse("echo NOT charlie"))
@@ -230,12 +224,12 @@ def test_not2():
# ix = st.create_index(schema)
#
# w = ix.writer()
-# w.add_document(k=1, v=u("alfa bravo charlie delta echo"))
-# w.add_document(k=2, v=u("bravo charlie delta echo foxtrot"))
-# w.add_document(k=3, v=u("charlie delta echo foxtrot golf"))
-# w.add_document(k=4, v=u("delta echo foxtrot golf hotel"))
-# w.add_document(k=5, v=u("echo foxtrot golf hotel india"))
-# w.add_document(k=6, v=u("foxtrot golf hotel india juliet"))
+# w.add_document(k=1, v="alfa bravo charlie delta echo")
+# w.add_document(k=2, v="bravo charlie delta echo foxtrot")
+# w.add_document(k=3, v="charlie delta echo foxtrot golf")
+# w.add_document(k=4, v="delta echo foxtrot golf hotel")
+# w.add_document(k=5, v="echo foxtrot golf hotel india")
+# w.add_document(k=6, v="foxtrot golf hotel india juliet")
# w.commit()
#
# s = ix.searcher()
@@ -250,17 +244,17 @@ def test_range():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("A"), content=u("alfa bravo charlie delta echo"))
- w.add_document(id=u("B"), content=u("bravo charlie delta echo foxtrot"))
- w.add_document(id=u("C"), content=u("charlie delta echo foxtrot golf"))
- w.add_document(id=u("D"), content=u("delta echo foxtrot golf hotel"))
- w.add_document(id=u("E"), content=u("echo foxtrot golf hotel india"))
+ w.add_document(id="A", content="alfa bravo charlie delta echo")
+ w.add_document(id="B", content="bravo charlie delta echo foxtrot")
+ w.add_document(id="C", content="charlie delta echo foxtrot golf")
+ w.add_document(id="D", content="delta echo foxtrot golf hotel")
+ w.add_document(id="E", content="echo foxtrot golf hotel india")
w.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("content", schema)
- q = qp.parse(u("charlie [delta TO foxtrot]"))
+ q = qp.parse("charlie [delta TO foxtrot]")
assert q.__class__ == query.And
assert q[0].__class__ == query.Term
assert q[1].__class__ == query.TermRange
@@ -269,9 +263,9 @@ def test_range():
assert not q[1].startexcl
assert not q[1].endexcl
ids = sorted([d["id"] for d in s.search(q)])
- assert ids == [u("A"), u("B"), u("C")]
+ assert ids == ["A", "B", "C"]
- q = qp.parse(u("foxtrot {echo TO hotel]"))
+ q = qp.parse("foxtrot {echo TO hotel]")
assert q.__class__ == query.And
assert q[0].__class__ == query.Term
assert q[1].__class__ == query.TermRange
@@ -280,19 +274,19 @@ def test_range():
assert q[1].startexcl
assert not q[1].endexcl
ids = sorted([d["id"] for d in s.search(q)])
- assert ids == [u("B"), u("C"), u("D"), u("E")]
+ assert ids == ["B", "C", "D", "E"]
- q = qp.parse(u("{bravo TO delta}"))
+ q = qp.parse("{bravo TO delta}")
assert q.__class__ == query.TermRange
assert q.start == "bravo"
assert q.end == "delta"
assert q.startexcl
assert q.endexcl
ids = sorted([d["id"] for d in s.search(q)])
- assert ids == [u("A"), u("B"), u("C")]
+ assert ids == ["A", "B", "C"]
# Shouldn't match anything
- q = qp.parse(u("[1 to 10]"))
+ q = qp.parse("[1 to 10]")
assert q.__class__ == query.TermRange
assert len(s.search(q)) == 0
@@ -302,7 +296,7 @@ def test_range_clusiveness():
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
- for letter in u("abcdefg"):
+ for letter in "abcdefg":
w.add_document(id=letter)
w.commit()
@@ -324,7 +318,7 @@ def test_open_ranges():
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
- for letter in u("abcdefg"):
+ for letter in "abcdefg":
w.add_document(id=letter)
w.commit()
@@ -336,12 +330,12 @@ def check(qstring, result):
r = "".join(sorted([d["id"] for d in s.search(q)]))
assert r == result
- check(u("[b TO]"), "bcdefg")
- check(u("[TO e]"), "abcde")
- check(u("[b TO d]"), "bcd")
- check(u("{b TO]"), "cdefg")
- check(u("[TO e}"), "abcd")
- check(u("{b TO d}"), "c")
+ check("[b TO]", "bcdefg")
+ check("[TO e]", "abcde")
+ check("[b TO d]", "bcd")
+ check("{b TO]", "cdefg")
+ check("[TO e}", "abcd")
+ check("{b TO d}", "c")
def test_open_numeric_ranges():
@@ -418,7 +412,7 @@ def test_open_date_ranges():
def test_negated_unlimited_ranges():
- # Whoosh should treat u("[to]") as if it was "*"
+ # Whoosh should treat "[to]" as if it was "*"
schema = fields.Schema(
id=fields.ID(stored=True), num=fields.NUMERIC, date=fields.DATETIME
)
@@ -426,7 +420,7 @@ def test_negated_unlimited_ranges():
w = ix.writer()
from string import ascii_letters
- domain = text_type(ascii_letters)
+ domain = ascii_letters
dt = datetime.now(tz=timezone.utc)
for i, letter in enumerate(domain):
@@ -436,14 +430,14 @@ def test_negated_unlimited_ranges():
with ix.searcher() as s:
qp = qparser.QueryParser("id", schema)
- nq = qp.parse(u("NOT [to]"))
+ nq = qp.parse("NOT [to]")
assert nq.__class__ == query.Not
q = nq.query
assert q.__class__ == query.Every
assert "".join(h["id"] for h in s.search(q, limit=None)) == domain
assert not list(nq.docs(s))
- nq = qp.parse(u("NOT num:[to]"))
+ nq = qp.parse("NOT num:[to]")
assert nq.__class__ == query.Not
q = nq.query
assert q.__class__ == query.NumericRange
@@ -452,7 +446,7 @@ def test_negated_unlimited_ranges():
assert "".join(h["id"] for h in s.search(q, limit=None)) == domain
assert not list(nq.docs(s))
- nq = qp.parse(u("NOT date:[to]"))
+ nq = qp.parse("NOT date:[to]")
assert nq.__class__ == query.Not
q = nq.query
assert q.__class__ == query.Every
@@ -466,14 +460,14 @@ def test_keyword_or():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(a=u("First"), b=u("ccc ddd"))
- w.add_document(a=u("Second"), b=u("aaa ddd"))
- w.add_document(a=u("Third"), b=u("ccc eee"))
+ w.add_document(a="First", b="ccc ddd")
+ w.add_document(a="Second", b="aaa ddd")
+ w.add_document(a="Third", b="ccc eee")
w.commit()
qp = qparser.QueryParser("b", schema)
with ix.searcher() as s:
- qr = qp.parse(u("b:ccc OR b:eee"))
+ qr = qp.parse("b:ccc OR b:eee")
assert qr.__class__ == query.Or
r = s.search(qr)
assert len(r) == 2
@@ -485,22 +479,22 @@ def test_merged():
schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
with TempIndex(schema) as ix:
with ix.writer() as w:
- w.add_document(id=u("alfa"), content=u("alfa"))
- w.add_document(id=u("bravo"), content=u("bravo"))
+ w.add_document(id="alfa", content="alfa")
+ w.add_document(id="bravo", content="bravo")
with ix.searcher() as s:
- r = s.search(query.Term("content", u("bravo")))
+ r = s.search(query.Term("content", "bravo"))
assert len(r) == 1
assert r[0]["id"] == "bravo"
with ix.writer() as w:
- w.add_document(id=u("charlie"), content=u("charlie"))
+ w.add_document(id="charlie", content="charlie")
w.optimize = True
assert len(ix._segments()) == 1
with ix.searcher() as s:
- r = s.search(query.Term("content", u("bravo")))
+ r = s.search(query.Term("content", "bravo"))
assert len(r) == 1
assert r[0]["id"] == "bravo"
@@ -510,41 +504,41 @@ def test_multireader():
st = RamStorage()
ix = st.create_index(sc)
w = ix.writer()
- w.add_document(id=u("alfa"), content=u("alfa"))
- w.add_document(id=u("bravo"), content=u("bravo"))
- w.add_document(id=u("charlie"), content=u("charlie"))
- w.add_document(id=u("delta"), content=u("delta"))
- w.add_document(id=u("echo"), content=u("echo"))
- w.add_document(id=u("foxtrot"), content=u("foxtrot"))
- w.add_document(id=u("golf"), content=u("golf"))
- w.add_document(id=u("hotel"), content=u("hotel"))
- w.add_document(id=u("india"), content=u("india"))
+ w.add_document(id="alfa", content="alfa")
+ w.add_document(id="bravo", content="bravo")
+ w.add_document(id="charlie", content="charlie")
+ w.add_document(id="delta", content="delta")
+ w.add_document(id="echo", content="echo")
+ w.add_document(id="foxtrot", content="foxtrot")
+ w.add_document(id="golf", content="golf")
+ w.add_document(id="hotel", content="hotel")
+ w.add_document(id="india", content="india")
w.commit()
with ix.searcher() as s:
- r = s.search(query.Term("content", u("bravo")))
+ r = s.search(query.Term("content", "bravo"))
assert len(r) == 1
assert r[0]["id"] == "bravo"
w = ix.writer()
- w.add_document(id=u("juliet"), content=u("juliet"))
- w.add_document(id=u("kilo"), content=u("kilo"))
- w.add_document(id=u("lima"), content=u("lima"))
- w.add_document(id=u("mike"), content=u("mike"))
- w.add_document(id=u("november"), content=u("november"))
- w.add_document(id=u("oscar"), content=u("oscar"))
- w.add_document(id=u("papa"), content=u("papa"))
- w.add_document(id=u("quebec"), content=u("quebec"))
- w.add_document(id=u("romeo"), content=u("romeo"))
+ w.add_document(id="juliet", content="juliet")
+ w.add_document(id="kilo", content="kilo")
+ w.add_document(id="lima", content="lima")
+ w.add_document(id="mike", content="mike")
+ w.add_document(id="november", content="november")
+ w.add_document(id="oscar", content="oscar")
+ w.add_document(id="papa", content="papa")
+ w.add_document(id="quebec", content="quebec")
+ w.add_document(id="romeo", content="romeo")
w.commit()
assert len(ix._segments()) == 2
# r = ix.reader()
# assert r.__class__.__name__ == "MultiReader"
- # pr = r.postings("content", u("bravo"))
+ # pr = r.postings("content", "bravo")
with ix.searcher() as s:
- r = s.search(query.Term("content", u("bravo")))
+ r = s.search(query.Term("content", "bravo"))
assert len(r) == 1
assert r[0]["id"] == "bravo"
@@ -554,14 +548,14 @@ def test_posting_phrase():
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
- writer.add_document(name=u("A"), value=u("Little Miss Muffet sat on a tuffet"))
- writer.add_document(name=u("B"), value=u("Miss Little Muffet tuffet"))
- writer.add_document(name=u("C"), value=u("Miss Little Muffet tuffet sat"))
+ writer.add_document(name="A", value="Little Miss Muffet sat on a tuffet")
+ writer.add_document(name="B", value="Miss Little Muffet tuffet")
+ writer.add_document(name="C", value="Miss Little Muffet tuffet sat")
writer.add_document(
- name=u("D"),
- value=u("Gibberish blonk falunk miss muffet sat " + "tuffet garbonzo"),
+ name="D",
+ value="Gibberish blonk falunk miss muffet sat " + "tuffet garbonzo",
)
- writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
+ writer.add_document(name="E", value="Blah blah blah pancakes")
writer.commit()
with ix.searcher() as s:
@@ -569,9 +563,7 @@ def test_posting_phrase():
def names(results):
return sorted([fields["name"] for fields in results])
- q = query.Phrase(
- "value", [u("little"), u("miss"), u("muffet"), u("sat"), u("tuffet")]
- )
+ q = query.Phrase("value", ["little", "miss", "muffet", "sat", "tuffet"])
m = q.matcher(s)
assert m.__class__.__name__ == "SpanNear2Matcher"
@@ -579,21 +571,21 @@ def names(results):
assert names(r) == ["A"]
assert len(r) == 1
- q = query.Phrase("value", [u("miss"), u("muffet"), u("sat"), u("tuffet")])
+ q = query.Phrase("value", ["miss", "muffet", "sat", "tuffet"])
assert names(s.search(q)) == ["A", "D"]
- q = query.Phrase("value", [u("falunk"), u("gibberish")])
+ q = query.Phrase("value", ["falunk", "gibberish"])
r = s.search(q)
assert not names(r)
assert len(r) == 0
- q = query.Phrase("value", [u("gibberish"), u("falunk")], slop=2)
+ q = query.Phrase("value", ["gibberish", "falunk"], slop=2)
assert names(s.search(q)) == ["D"]
- q = query.Phrase("value", [u("blah")] * 4)
+ q = query.Phrase("value", ["blah"] * 4)
assert not names(s.search(q)) # blah blah blah blah
- q = query.Phrase("value", [u("blah")] * 3)
+ q = query.Phrase("value", ["blah"] * 3)
m = q.matcher(s)
assert names(s.search(q)) == ["E"]
@@ -603,17 +595,17 @@ def test_phrase_score():
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
- writer.add_document(name=u("A"), value=u("Little Miss Muffet sat on a tuffet"))
+ writer.add_document(name="A", value="Little Miss Muffet sat on a tuffet")
writer.add_document(
- name=u("D"),
- value=u("Gibberish blonk falunk miss muffet sat " + "tuffet garbonzo"),
+ name="D",
+ value="Gibberish blonk falunk miss muffet sat " + "tuffet garbonzo",
)
- writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
- writer.add_document(name=u("F"), value=u("Little miss muffet little miss muffet"))
+ writer.add_document(name="E", value="Blah blah blah pancakes")
+ writer.add_document(name="F", value="Little miss muffet little miss muffet")
writer.commit()
with ix.searcher() as s:
- q = query.Phrase("value", [u("little"), u("miss"), u("muffet")])
+ q = query.Phrase("value", ["little", "miss", "muffet"])
m = q.matcher(s)
assert m.id() == 0
score1 = m.weight()
@@ -628,18 +620,18 @@ def test_stop_phrase():
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
- writer.add_document(title=u("Richard of York"))
- writer.add_document(title=u("Lily the Pink"))
+ writer.add_document(title="Richard of York")
+ writer.add_document(title="Lily the Pink")
writer.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("title", schema)
- q = qp.parse(u("richard of york"))
- assert q.__unicode__() == "(title:richard AND title:york)"
+ q = qp.parse("richard of york")
+ assert str(q) == "(title:richard AND title:york)"
assert len(s.search(q)) == 1
- # q = qp.parse(u("lily the pink"))
+ # q = qp.parse("lily the pink")
# assert len(s.search(q)), 1)
- assert len(s.find("title", u("lily the pink"))) == 1
+ assert len(s.find("title", "lily the pink")) == 1
def test_phrase_order():
@@ -650,7 +642,7 @@ def test_phrase_order():
writer = ix.writer()
for ls in permutations(["ape", "bay", "can", "day"], 4):
- writer.add_document(text=u(" ").join(ls))
+ writer.add_document(text=" ".join(ls))
writer.commit()
with ix.searcher() as s:
@@ -660,7 +652,7 @@ def result(q):
return sorted([d["text"] for d in r])
q = query.Phrase("text", ["bay", "can", "day"])
- assert result(q) == [u("ape bay can day"), u("bay can day ape")]
+ assert result(q) == ["ape bay can day", "bay can day ape"]
def test_phrase_sameword():
@@ -669,8 +661,8 @@ def test_phrase_sameword():
ix = storage.create_index(schema)
writer = ix.writer()
- writer.add_document(id=1, text=u("The film Linda Linda Linda is good"))
- writer.add_document(id=2, text=u("The model Linda Evangelista is pretty"))
+ writer.add_document(id=1, text="The film Linda Linda Linda is good")
+ writer.add_document(id=2, text="The model Linda Evangelista is pretty")
writer.commit()
with ix.searcher() as s:
@@ -683,12 +675,12 @@ def test_phrase_multi():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
- domain = u("alfa bravo charlie delta echo").split()
+ domain = "alfa bravo charlie delta echo".split()
w = None
for i, ls in enumerate(permutations(domain)):
if w is None:
w = ix.writer()
- w.add_document(id=i, text=u(" ").join(ls))
+ w.add_document(id=i, text=" ".join(ls))
if not i % 30:
w.commit()
w = None
@@ -706,21 +698,21 @@ def test_missing_field_scoring():
)
with TempIndex(schema) as ix:
with ix.writer() as w:
- w.add_document(name=u("Frank"), hobbies=u("baseball, basketball"))
+ w.add_document(name="Frank", hobbies="baseball, basketball")
with ix.reader() as r:
assert r.field_length("hobbies") == 2
assert r.field_length("name") == 1
with ix.writer() as w:
- w.add_document(name=u("Jonny"))
+ w.add_document(name="Jonny")
with ix.searcher() as s:
assert s.field_length("hobbies") == 2
assert s.field_length("name") == 2
parser = qparser.MultifieldParser(["name", "hobbies"], schema)
- q = parser.parse(u("baseball"))
+ q = parser.parse("baseball")
result = s.search(q)
assert len(result) == 1
@@ -731,20 +723,20 @@ def test_search_fieldname_underscores():
ix = st.create_index(s)
w = ix.writer()
- w.add_document(my_name=u("Green"), my_value=u("It's not easy being green"))
- w.add_document(my_name=u("Red"), my_value=u("Hopping mad like a playground ball"))
+ w.add_document(my_name="Green", my_value="It's not easy being green")
+ w.add_document(my_name="Red", my_value="Hopping mad like a playground ball")
w.commit()
qp = qparser.QueryParser("my_value", schema=s)
with ix.searcher() as s:
- r = s.search(qp.parse(u("my_name:Green")))
+ r = s.search(qp.parse("my_name:Green"))
assert r[0]["my_name"] == "Green"
def test_short_prefix():
s = fields.Schema(name=fields.ID, value=fields.TEXT)
qp = qparser.QueryParser("value", schema=s)
- q = qp.parse(u("s*"))
+ q = qp.parse("s*")
assert q.__class__.__name__ == "Prefix"
assert q.text == "s"
@@ -757,10 +749,10 @@ def test_weighting():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), n_comments=5)
- w.add_document(id=u("2"), n_comments=12)
- w.add_document(id=u("3"), n_comments=2)
- w.add_document(id=u("4"), n_comments=7)
+ w.add_document(id="1", n_comments=5)
+ w.add_document(id="2", n_comments=12)
+ w.add_document(id="3", n_comments=2)
+ w.add_document(id="4", n_comments=7)
w.commit()
# Fake Weighting implementation
@@ -778,7 +770,7 @@ def score(self, matcher):
return ncomments
with ix.searcher(weighting=CommentWeighting()) as s:
- q = query.TermRange("id", u("1"), u("4"), constantscore=False)
+ q = query.TermRange("id", "1", "4", constantscore=False)
r = s.search(q)
ids = [fs["id"] for fs in r]
@@ -793,9 +785,9 @@ def test_dismax():
w = ix.writer()
w.add_document(
id=1,
- f1=u("alfa bravo charlie delta"),
- f2=u("alfa alfa alfa"),
- f3=u("alfa echo foxtrot hotel india"),
+ f1="alfa bravo charlie delta",
+ f2="alfa alfa alfa",
+ f3="alfa echo foxtrot hotel india",
)
w.commit()
@@ -820,12 +812,12 @@ def test_deleted_wildcard():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("alfa"))
- w.add_document(id=u("bravo"))
- w.add_document(id=u("charlie"))
- w.add_document(id=u("delta"))
- w.add_document(id=u("echo"))
- w.add_document(id=u("foxtrot"))
+ w.add_document(id="alfa")
+ w.add_document(id="bravo")
+ w.add_document(id="charlie")
+ w.add_document(id="delta")
+ w.add_document(id="echo")
+ w.add_document(id="foxtrot")
w.commit()
w = ix.writer()
@@ -845,11 +837,11 @@ def test_missing_wildcard():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), f1=u("alfa"), f2=u("apple"))
- w.add_document(id=u("2"), f1=u("bravo"))
- w.add_document(id=u("3"), f1=u("charlie"), f2=u("candy"))
- w.add_document(id=u("4"), f2=u("donut"))
- w.add_document(id=u("5"))
+ w.add_document(id="1", f1="alfa", f2="apple")
+ w.add_document(id="2", f1="bravo")
+ w.add_document(id="3", f1="charlie", f2="candy")
+ w.add_document(id="4", f2="donut")
+ w.add_document(id="5")
w.commit()
with ix.searcher() as s:
@@ -873,10 +865,10 @@ def test_finalweighting():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), summary=u("alfa bravo"), n_comments=5)
- w.add_document(id=u("2"), summary=u("alfa"), n_comments=12)
- w.add_document(id=u("3"), summary=u("bravo"), n_comments=2)
- w.add_document(id=u("4"), summary=u("bravo bravo"), n_comments=7)
+ w.add_document(id="1", summary="alfa bravo", n_comments=5)
+ w.add_document(id="2", summary="alfa", n_comments=12)
+ w.add_document(id="3", summary="bravo", n_comments=2)
+ w.add_document(id="4", summary="bravo bravo", n_comments=7)
w.commit()
class CommentWeighting(Frequency):
@@ -899,16 +891,16 @@ def test_outofdate():
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"))
- w.add_document(id=u("2"))
+ w.add_document(id="1")
+ w.add_document(id="2")
w.commit()
s = ix.searcher()
assert s.up_to_date()
w = ix.writer()
- w.add_document(id=u("3"))
- w.add_document(id=u("4"))
+ w.add_document(id="3")
+ w.add_document(id="4")
assert s.up_to_date()
w.commit()
@@ -924,18 +916,18 @@ def test_find_missing():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=u("1"), text=u("alfa"))
- w.add_document(id=u("2"), text=u("bravo"))
- w.add_document(text=u("charlie"))
- w.add_document(id=u("4"), text=u("delta"))
- w.add_document(text=u("echo"))
- w.add_document(id=u("6"), text=u("foxtrot"))
- w.add_document(text=u("golf"))
+ w.add_document(id="1", text="alfa")
+ w.add_document(id="2", text="bravo")
+ w.add_document(text="charlie")
+ w.add_document(id="4", text="delta")
+ w.add_document(text="echo")
+ w.add_document(id="6", text="foxtrot")
+ w.add_document(text="golf")
w.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("text", schema)
- q = qp.parse(u("NOT id:*"))
+ q = qp.parse("NOT id:*")
r = s.search(q, limit=None)
assert [h["text"] for h in r] == ["charlie", "echo", "golf"]
@@ -946,44 +938,44 @@ def test_ngram_phrase():
ix = RamStorage().create_index(schema)
writer = ix.writer()
writer.add_document(
- text=u(
+ text=(
"\u9AD8\u6821\u307E\u3067\u306F\u6771\u4EAC"
"\u3067\u3001\u5927\u5B66\u304B\u3089\u306F"
"\u4EAC\u5927\u3067\u3059\u3002"
),
- path=u("sample"),
+ path="sample",
)
writer.commit()
with ix.searcher() as s:
p = qparser.QueryParser("text", schema)
- q = p.parse(u("\u6771\u4EAC\u5927\u5B66"))
+ q = p.parse("\u6771\u4EAC\u5927\u5B66")
assert len(s.search(q)) == 1
- q = p.parse(u('"\u6771\u4EAC\u5927\u5B66"'))
+ q = p.parse('"\u6771\u4EAC\u5927\u5B66"')
assert len(s.search(q)) == 0
- q = p.parse(u('"\u306F\u6771\u4EAC\u3067"'))
+ q = p.parse('"\u306F\u6771\u4EAC\u3067"')
assert len(s.search(q)) == 1
def test_ordered():
- domain = u("alfa bravo charlie delta echo foxtrot").split(" ")
+ domain = "alfa bravo charlie delta echo foxtrot".split(" ")
schema = fields.Schema(f=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
writer = ix.writer()
for ls in permutations(domain):
- writer.add_document(f=u(" ").join(ls))
+ writer.add_document(f=" ".join(ls))
writer.commit()
with ix.searcher() as s:
q = query.Ordered(
[
- query.Term("f", u("alfa")),
- query.Term("f", u("charlie")),
- query.Term("f", u("echo")),
+ query.Term("f", "alfa"),
+ query.Term("f", "charlie"),
+ query.Term("f", "echo"),
]
)
r = s.search(q)
@@ -1002,20 +994,20 @@ def test_otherwise():
schema = fields.Schema(id=fields.STORED, f=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, f=u("alfa one two"))
- w.add_document(id=2, f=u("alfa three four"))
- w.add_document(id=3, f=u("bravo four five"))
- w.add_document(id=4, f=u("bravo six seven"))
+ w.add_document(id=1, f="alfa one two")
+ w.add_document(id=2, f="alfa three four")
+ w.add_document(id=3, f="bravo four five")
+ w.add_document(id=4, f="bravo six seven")
w.commit()
with ix.searcher() as s:
- q = query.Otherwise(query.Term("f", u("alfa")), query.Term("f", u("six")))
+ q = query.Otherwise(query.Term("f", "alfa"), query.Term("f", "six"))
assert [d["id"] for d in s.search(q)] == [1, 2]
- q = query.Otherwise(query.Term("f", u("tango")), query.Term("f", u("four")))
+ q = query.Otherwise(query.Term("f", "tango"), query.Term("f", "four"))
assert [d["id"] for d in s.search(q)] == [2, 3]
- q = query.Otherwise(query.Term("f", u("tango")), query.Term("f", u("nine")))
+ q = query.Otherwise(query.Term("f", "tango"), query.Term("f", "nine"))
assert [d["id"] for d in s.search(q)] == []
@@ -1023,10 +1015,10 @@ def test_fuzzyterm():
schema = fields.Schema(id=fields.STORED, f=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, f=u("alfa bravo charlie delta"))
- w.add_document(id=2, f=u("bravo charlie delta echo"))
- w.add_document(id=3, f=u("charlie delta echo foxtrot"))
- w.add_document(id=4, f=u("delta echo foxtrot golf"))
+ w.add_document(id=1, f="alfa bravo charlie delta")
+ w.add_document(id=2, f="bravo charlie delta echo")
+ w.add_document(id=3, f="charlie delta echo foxtrot")
+ w.add_document(id=4, f="delta echo foxtrot golf")
w.commit()
with ix.searcher() as s:
@@ -1038,14 +1030,14 @@ def test_fuzzyterm2():
schema = fields.Schema(id=fields.STORED, f=fields.TEXT(spelling=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, f=u("alfa bravo charlie delta"))
- w.add_document(id=2, f=u("bravo charlie delta echo"))
- w.add_document(id=3, f=u("charlie delta echo foxtrot"))
- w.add_document(id=4, f=u("delta echo foxtrot golf"))
+ w.add_document(id=1, f="alfa bravo charlie delta")
+ w.add_document(id=2, f="bravo charlie delta echo")
+ w.add_document(id=3, f="charlie delta echo foxtrot")
+ w.add_document(id=4, f="delta echo foxtrot golf")
w.commit()
with ix.searcher() as s:
- assert list(s.reader().terms_within("f", u("brave"), 1)) == ["bravo"]
+ assert list(s.reader().terms_within("f", "brave", 1)) == ["bravo"]
q = query.FuzzyTerm("f", "brave")
assert [d["id"] for d in s.search(q)] == [1, 2]
@@ -1055,11 +1047,11 @@ def test_multireader_not():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, f=u("alfa bravo chralie"))
- w.add_document(id=1, f=u("bravo chralie delta"))
- w.add_document(id=2, f=u("charlie delta echo"))
- w.add_document(id=3, f=u("delta echo foxtrot"))
- w.add_document(id=4, f=u("echo foxtrot golf"))
+ w.add_document(id=0, f="alfa bravo chralie")
+ w.add_document(id=1, f="bravo chralie delta")
+ w.add_document(id=2, f="charlie delta echo")
+ w.add_document(id=3, f="delta echo foxtrot")
+ w.add_document(id=4, f="echo foxtrot golf")
w.commit()
with ix.searcher() as s:
@@ -1069,16 +1061,16 @@ def test_multireader_not():
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=5, f=u("alfa bravo chralie"))
- w.add_document(id=6, f=u("bravo chralie delta"))
+ w.add_document(id=5, f="alfa bravo chralie")
+ w.add_document(id=6, f="bravo chralie delta")
w.commit(merge=False)
w = ix.writer()
- w.add_document(id=7, f=u("charlie delta echo"))
- w.add_document(id=8, f=u("delta echo foxtrot"))
+ w.add_document(id=7, f="charlie delta echo")
+ w.add_document(id=8, f="delta echo foxtrot")
w.commit(merge=False)
w = ix.writer()
- w.add_document(id=9, f=u("echo foxtrot golf"))
- w.add_document(id=10, f=u("foxtrot golf delta"))
+ w.add_document(id=9, f="echo foxtrot golf")
+ w.add_document(id=10, f="foxtrot golf delta")
w.commit(merge=False)
assert len(ix._segments()) > 1
@@ -1093,18 +1085,18 @@ def test_boost_phrase():
title=fields.TEXT(field_boost=5.0, stored=True), text=fields.TEXT
)
ix = RamStorage().create_index(schema)
- domain = u("alfa bravo charlie delta").split()
+ domain = "alfa bravo charlie delta".split()
w = ix.writer()
for ls in permutations(domain):
- t = u(" ").join(ls)
+ t = " ".join(ls)
w.add_document(title=t, text=t)
w.commit()
q = query.Or(
[
- query.Term("title", u("alfa")),
- query.Term("title", u("bravo")),
- query.Phrase("text", [u("bravo"), u("charlie"), u("delta")]),
+ query.Term("title", "alfa"),
+ query.Term("title", "bravo"),
+ query.Phrase("text", ["bravo", "charlie", "delta"]),
]
)
@@ -1128,19 +1120,19 @@ def test_filter():
schema = fields.Schema(id=fields.STORED, path=fields.ID, text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, path=u("/a/1"), text=u("alfa bravo charlie"))
- w.add_document(id=2, path=u("/b/1"), text=u("bravo charlie delta"))
- w.add_document(id=3, path=u("/c/1"), text=u("charlie delta echo"))
+ w.add_document(id=1, path="/a/1", text="alfa bravo charlie")
+ w.add_document(id=2, path="/b/1", text="bravo charlie delta")
+ w.add_document(id=3, path="/c/1", text="charlie delta echo")
w.commit(merge=False)
w = ix.writer()
- w.add_document(id=4, path=u("/a/2"), text=u("delta echo alfa"))
- w.add_document(id=5, path=u("/b/2"), text=u("echo alfa bravo"))
- w.add_document(id=6, path=u("/c/2"), text=u("alfa bravo charlie"))
+ w.add_document(id=4, path="/a/2", text="delta echo alfa")
+ w.add_document(id=5, path="/b/2", text="echo alfa bravo")
+ w.add_document(id=6, path="/c/2", text="alfa bravo charlie")
w.commit(merge=False)
w = ix.writer()
- w.add_document(id=7, path=u("/a/3"), text=u("bravo charlie delta"))
- w.add_document(id=8, path=u("/b/3"), text=u("charlie delta echo"))
- w.add_document(id=9, path=u("/c/3"), text=u("delta echo alfa"))
+ w.add_document(id=7, path="/a/3", text="bravo charlie delta")
+ w.add_document(id=8, path="/b/3", text="charlie delta echo")
+ w.add_document(id=9, path="/c/3", text="delta echo alfa")
w.commit(merge=False)
with ix.searcher() as s:
@@ -1161,13 +1153,13 @@ def test_fieldboost():
schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, a=u("alfa bravo charlie"), b=u("echo foxtrot india"))
- w.add_document(id=1, a=u("delta bravo charlie"), b=u("alfa alfa alfa"))
- w.add_document(id=2, a=u("alfa alfa alfa"), b=u("echo foxtrot india"))
- w.add_document(id=3, a=u("alfa sierra romeo"), b=u("alfa tango echo"))
- w.add_document(id=4, a=u("bravo charlie delta"), b=u("alfa foxtrot india"))
- w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango"))
- w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango"))
+ w.add_document(id=0, a="alfa bravo charlie", b="echo foxtrot india")
+ w.add_document(id=1, a="delta bravo charlie", b="alfa alfa alfa")
+ w.add_document(id=2, a="alfa alfa alfa", b="echo foxtrot india")
+ w.add_document(id=3, a="alfa sierra romeo", b="alfa tango echo")
+ w.add_document(id=4, a="bravo charlie delta", b="alfa foxtrot india")
+ w.add_document(id=5, a="alfa alfa echo", b="tango tango tango")
+ w.add_document(id=6, a="alfa bravo echo", b="alfa alfa tango")
w.commit()
def field_booster(fieldname, factor=2.0):
@@ -1184,9 +1176,9 @@ def booster_fn(obj):
return booster_fn
with ix.searcher() as s:
- q = query.Or([query.Term("a", u("alfa")), query.Term("b", u("alfa"))])
+ q = query.Or([query.Term("a", "alfa"), query.Term("b", "alfa")])
q = q.accept(field_booster("a", 100.0))
- assert text_type(q) == text_type("(a:alfa^100.0 OR b:alfa)")
+ assert str(q) == "(a:alfa^100.0 OR b:alfa)"
r = s.search(q)
assert [hit["id"] for hit in r] == [2, 5, 6, 3, 0, 1, 4]
@@ -1198,12 +1190,12 @@ def test_andmaybe_quality():
ix = RamStorage().create_index(schema)
domain = [
- (u("Alpha Bravo Charlie Delta"), 2000),
- (u("Echo Bravo Foxtrot"), 2000),
- (u("Bravo Golf Hotel"), 2002),
- (u("Bravo India"), 2002),
- (u("Juliet Kilo Bravo"), 2004),
- (u("Lima Bravo Mike"), 2004),
+ ("Alpha Bravo Charlie Delta", 2000),
+ ("Echo Bravo Foxtrot", 2000),
+ ("Bravo Golf Hotel", 2002),
+ ("Bravo India", 2002),
+ ("Juliet Kilo Bravo", 2004),
+ ("Lima Bravo Mike", 2004),
]
w = ix.writer()
for title, year in domain:
@@ -1212,7 +1204,7 @@ def test_andmaybe_quality():
with ix.searcher() as s:
qp = qparser.QueryParser("title", ix.schema)
- q = qp.parse(u("title:bravo ANDMAYBE year:2004"))
+ q = qp.parse("title:bravo ANDMAYBE year:2004")
titles = [hit["title"] for hit in s.search(q, limit=None)[:2]]
assert "Juliet Kilo Bravo" in titles
@@ -1225,15 +1217,15 @@ def test_collect_limit():
schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id="a", text=u("alfa bravo charlie delta echo"))
- w.add_document(id="b", text=u("bravo charlie delta echo foxtrot"))
- w.add_document(id="c", text=u("charlie delta echo foxtrot golf"))
- w.add_document(id="d", text=u("delta echo foxtrot golf hotel"))
- w.add_document(id="e", text=u("echo foxtrot golf hotel india"))
+ w.add_document(id="a", text="alfa bravo charlie delta echo")
+ w.add_document(id="b", text="bravo charlie delta echo foxtrot")
+ w.add_document(id="c", text="charlie delta echo foxtrot golf")
+ w.add_document(id="d", text="delta echo foxtrot golf hotel")
+ w.add_document(id="e", text="echo foxtrot golf hotel india")
w.commit()
with ix.searcher() as s:
- r = s.search(query.Term("text", u("golf")), limit=10)
+ r = s.search(query.Term("text", "golf"), limit=10)
assert len(r) == 3
count = 0
for _ in r:
@@ -1241,15 +1233,15 @@ def test_collect_limit():
assert count == 3
w = ix.writer()
- w.add_document(id="f", text=u("foxtrot golf hotel india juliet"))
- w.add_document(id="g", text=u("golf hotel india juliet kilo"))
- w.add_document(id="h", text=u("hotel india juliet kilo lima"))
- w.add_document(id="i", text=u("india juliet kilo lima mike"))
- w.add_document(id="j", text=u("juliet kilo lima mike november"))
+ w.add_document(id="f", text="foxtrot golf hotel india juliet")
+ w.add_document(id="g", text="golf hotel india juliet kilo")
+ w.add_document(id="h", text="hotel india juliet kilo lima")
+ w.add_document(id="i", text="india juliet kilo lima mike")
+ w.add_document(id="j", text="juliet kilo lima mike november")
w.commit(merge=False)
with ix.searcher() as s:
- r = s.search(query.Term("text", u("golf")), limit=20)
+ r = s.search(query.Term("text", "golf"), limit=20)
assert len(r) == 5
count = 0
for _ in r:
@@ -1261,14 +1253,14 @@ def test_scorer():
schema = fields.Schema(key=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(key=u("alfa alfa alfa"))
- w.add_document(key=u("alfa alfa alfa alfa"))
- w.add_document(key=u("alfa alfa"))
+ w.add_document(key="alfa alfa alfa")
+ w.add_document(key="alfa alfa alfa alfa")
+ w.add_document(key="alfa alfa")
w.commit()
w = ix.writer()
- w.add_document(key=u("alfa alfa alfa alfa alfa alfa"))
- w.add_document(key=u("alfa"))
- w.add_document(key=u("alfa alfa alfa alfa alfa"))
+ w.add_document(key="alfa alfa alfa alfa alfa alfa")
+ w.add_document(key="alfa")
+ w.add_document(key="alfa alfa alfa alfa alfa")
w.commit(merge=False)
@@ -1289,14 +1281,14 @@ def test_pos_scorer():
schema = fields.Schema(id=fields.STORED, key=fields.TEXT(analyzer=ana))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, key=u("0 0 1 0 0 0"))
- w.add_document(id=1, key=u("0 0 0 1 0 0"))
- w.add_document(id=2, key=u("0 1 0 0 0 0"))
+ w.add_document(id=0, key="0 0 1 0 0 0")
+ w.add_document(id=1, key="0 0 0 1 0 0")
+ w.add_document(id=2, key="0 1 0 0 0 0")
w.commit()
w = ix.writer()
- w.add_document(id=3, key=u("0 0 0 0 0 1"))
- w.add_document(id=4, key=u("1 0 0 0 0 0"))
- w.add_document(id=5, key=u("0 0 0 0 1 0"))
+ w.add_document(id=3, key="0 0 0 0 0 1")
+ w.add_document(id=4, key="1 0 0 0 0 0")
+ w.add_document(id=5, key="0 0 0 0 1 0")
w.commit(merge=False)
def pos_score_fn(searcher, fieldname, text, matcher):
@@ -1315,10 +1307,10 @@ def pos_score_fn(searcher, fieldname, text, matcher):
# ix = RamStorage().create_index(schema)
# with ix.writer() as w:
# for i in range(200):
-# text = u("a%s" % i)
+# text = "a%s" % i
# w.add_document(id=i, text=text)
#
-# q = query.Prefix("text", u("a"))
+# q = query.Prefix("text", "a")
# q.TOO_MANY_CLAUSES = 100
#
# with ix.searcher() as s:
@@ -1352,7 +1344,7 @@ def test_collapse():
ix = RamStorage().create_index(schema)
with ix.writer(codec=W3Codec()) as w:
for id, text, size, tag in domain:
- w.add_document(id=u(id), text=u(text), size=size, tag=u(tag))
+ w.add_document(id=id, text=text, size=size, tag=tag)
with ix.searcher() as s:
q = query.Term("text", "blah")
@@ -1402,7 +1394,7 @@ def test_collapse_nocolumn():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
for id, text, size, tag in domain:
- w.add_document(id=u(id), text=u(text), size=size, tag=u(tag))
+ w.add_document(id=id, text=text, size=size, tag=tag)
with ix.searcher() as s:
q = query.Term("text", "blah")
@@ -1432,7 +1424,7 @@ def test_collapse_nocolumn():
def test_collapse_length():
- domain = u(
+ domain = (
"alfa apple agnostic aplomb arc "
"bravo big braid beer "
"charlie crouch car "
@@ -1468,7 +1460,7 @@ def check(r):
def test_collapse_length_nocolumn():
- domain = u(
+ domain = (
"alfa apple agnostic aplomb arc "
"bravo big braid beer "
"charlie crouch car "
@@ -1514,17 +1506,17 @@ def test_collapse_order():
)
ix = RamStorage().create_index(schema)
with ix.writer(codec=W3Codec()) as w:
- w.add_document(id="a", price=10, rating=1, tag=u("x"))
- w.add_document(id="b", price=80, rating=3, tag=u("y"))
- w.add_document(id="c", price=60, rating=1, tag=u("z"))
+ w.add_document(id="a", price=10, rating=1, tag="x")
+ w.add_document(id="b", price=80, rating=3, tag="y")
+ w.add_document(id="c", price=60, rating=1, tag="z")
w.add_document(id="d", price=30, rating=2)
- w.add_document(id="e", price=50, rating=3, tag=u("x"))
- w.add_document(id="f", price=20, rating=1, tag=u("y"))
- w.add_document(id="g", price=50, rating=2, tag=u("z"))
+ w.add_document(id="e", price=50, rating=3, tag="x")
+ w.add_document(id="f", price=20, rating=1, tag="y")
+ w.add_document(id="g", price=50, rating=2, tag="z")
w.add_document(id="h", price=90, rating=5)
- w.add_document(id="i", price=50, rating=5, tag=u("x"))
- w.add_document(id="j", price=40, rating=1, tag=u("y"))
- w.add_document(id="k", price=50, rating=4, tag=u("z"))
+ w.add_document(id="i", price=50, rating=5, tag="x")
+ w.add_document(id="j", price=40, rating=1, tag="y")
+ w.add_document(id="k", price=50, rating=4, tag="z")
w.add_document(id="l", price=70, rating=2)
with ix.searcher() as s:
@@ -1556,17 +1548,17 @@ def test_collapse_order_nocolumn():
)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id="a", price=10, rating=1, tag=u("x"))
- w.add_document(id="b", price=80, rating=3, tag=u("y"))
- w.add_document(id="c", price=60, rating=1, tag=u("z"))
+ w.add_document(id="a", price=10, rating=1, tag="x")
+ w.add_document(id="b", price=80, rating=3, tag="y")
+ w.add_document(id="c", price=60, rating=1, tag="z")
w.add_document(id="d", price=30, rating=2)
- w.add_document(id="e", price=50, rating=3, tag=u("x"))
- w.add_document(id="f", price=20, rating=1, tag=u("y"))
- w.add_document(id="g", price=50, rating=2, tag=u("z"))
+ w.add_document(id="e", price=50, rating=3, tag="x")
+ w.add_document(id="f", price=20, rating=1, tag="y")
+ w.add_document(id="g", price=50, rating=2, tag="z")
w.add_document(id="h", price=90, rating=5)
- w.add_document(id="i", price=50, rating=5, tag=u("x"))
- w.add_document(id="j", price=40, rating=1, tag=u("y"))
- w.add_document(id="k", price=50, rating=4, tag=u("z"))
+ w.add_document(id="i", price=50, rating=5, tag="x")
+ w.add_document(id="j", price=40, rating=1, tag="y")
+ w.add_document(id="k", price=50, rating=4, tag="z")
w.add_document(id="l", price=70, rating=2)
with ix.searcher() as s:
@@ -1593,16 +1585,16 @@ def test_coord():
schema = fields.Schema(id=fields.STORED, hits=fields.STORED, tags=fields.KEYWORD)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, hits=0, tags=u("blah blah blah blah"))
- w.add_document(id=1, hits=0, tags=u("echo echo blah blah"))
- w.add_document(id=2, hits=1, tags=u("bravo charlie delta echo"))
- w.add_document(id=3, hits=2, tags=u("charlie delta echo foxtrot"))
- w.add_document(id=4, hits=3, tags=u("delta echo foxtrot golf"))
- w.add_document(id=5, hits=3, tags=u("echo foxtrot golf hotel"))
- w.add_document(id=6, hits=2, tags=u("foxtrot golf hotel india"))
- w.add_document(id=7, hits=1, tags=u("golf hotel india juliet"))
- w.add_document(id=8, hits=0, tags=u("foxtrot foxtrot foo foo"))
- w.add_document(id=9, hits=0, tags=u("foo foo foo foo"))
+ w.add_document(id=0, hits=0, tags="blah blah blah blah")
+ w.add_document(id=1, hits=0, tags="echo echo blah blah")
+ w.add_document(id=2, hits=1, tags="bravo charlie delta echo")
+ w.add_document(id=3, hits=2, tags="charlie delta echo foxtrot")
+ w.add_document(id=4, hits=3, tags="delta echo foxtrot golf")
+ w.add_document(id=5, hits=3, tags="echo foxtrot golf hotel")
+ w.add_document(id=6, hits=2, tags="foxtrot golf hotel india")
+ w.add_document(id=7, hits=1, tags="golf hotel india juliet")
+ w.add_document(id=8, hits=0, tags="foxtrot foxtrot foo foo")
+ w.add_document(id=9, hits=0, tags="foo foo foo foo")
og = qparser.OrGroup.factory(0.99)
qp = qparser.QueryParser("tags", schema, group=og)
@@ -1622,7 +1614,7 @@ def test_keyword_search():
schema = fields.Schema(tags=fields.KEYWORD)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(tags=u("keyword1 keyword2 keyword3 keyword4 keyword5"))
+ w.add_document(tags="keyword1 keyword2 keyword3 keyword4 keyword5")
with ix.searcher() as s:
r = s.search_page(query.Term("tags", "keyword3"), 1)
@@ -1635,23 +1627,21 @@ def test_groupedby_with_terms():
with ix.writer() as w:
w.add_document(
- organism=u("mus"),
- content=u("IPFSTD1 IPFSTD_kdwq134 Kaminski-all Study00:00:00"),
- )
- w.add_document(
- organism=u("mus"), content=u("IPFSTD1 IPFSTD_kdwq134 Kaminski-all Study")
+ organism="mus",
+ content="IPFSTD1 IPFSTD_kdwq134 Kaminski-all Study00:00:00",
)
w.add_document(
- organism=u("hs"), content=u("This is the first document we've added!")
+ organism="mus", content="IPFSTD1 IPFSTD_kdwq134 Kaminski-all Study"
)
+ w.add_document(organism="hs", content="This is the first document we've added!")
with ix.searcher() as s:
- q = qparser.QueryParser("content", schema=ix.schema).parse(u("IPFSTD1"))
+ q = qparser.QueryParser("content", schema=ix.schema).parse("IPFSTD1")
r = s.search(q, groupedby=["organism"], terms=True)
assert len(r) == 2
assert r.groups("organism") == {"mus": [1, 0]}
assert r.has_matched_terms()
- assert r.matched_terms() == {("content", b("ipfstd1"))}
+ assert r.matched_terms() == {("content", b"ipfstd1")}
def test_buffered_refresh():
@@ -1661,8 +1651,8 @@ def test_buffered_refresh():
ix = RamStorage().create_index(schema)
with writing.BufferedWriter(ix, period=1000) as writer:
- writer.add_document(foo=u("1"))
- writer.add_document(foo=u("2"))
+ writer.add_document(foo="1")
+ writer.add_document(foo="2")
with writer.searcher() as searcher:
assert searcher.doc_count() == 2
@@ -1675,19 +1665,19 @@ def test_score_length():
schema = fields.Schema(a=fields.TEXT, b=fields.TEXT)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=u("alfa bravo charlie"))
- w.add_document(b=u("delta echo foxtrot"))
- w.add_document(a=u("golf hotel india"))
+ w.add_document(a="alfa bravo charlie")
+ w.add_document(b="delta echo foxtrot")
+ w.add_document(a="golf hotel india")
with ix.writer() as w:
w.merge = False
- w.add_document(b=u("juliet kilo lima"))
+ w.add_document(b="juliet kilo lima")
# In the second segment, there is an "a" field here, but in the
# corresponding document in the first segment, the field doesn't exist,
# so if the scorer is getting segment offsets wrong, scoring this
# document will error
- w.add_document(a=u("mike november oskar"))
- w.add_document(b=u("papa quebec romeo"))
+ w.add_document(a="mike november oskar")
+ w.add_document(b="papa quebec romeo")
with ix.searcher() as s:
assert not s.is_atomic()
@@ -1702,18 +1692,18 @@ def test_terms_with_filter():
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(text=u("alfa bravo charlie delta"))
- w.add_document(text=u("bravo charlie delta echo"))
- w.add_document(text=u("charlie delta echo foxtrot"))
- w.add_document(text=u("delta echo foxtrot golf"))
- w.add_document(text=u("echo foxtrot golf hotel"))
- w.add_document(text=u("foxtrot golf hotel alfa"))
- w.add_document(text=u("golf hotel alfa bravo"))
- w.add_document(text=u("hotel alfa bravo charlie"))
+ w.add_document(text="alfa bravo charlie delta")
+ w.add_document(text="bravo charlie delta echo")
+ w.add_document(text="charlie delta echo foxtrot")
+ w.add_document(text="delta echo foxtrot golf")
+ w.add_document(text="echo foxtrot golf hotel")
+ w.add_document(text="foxtrot golf hotel alfa")
+ w.add_document(text="golf hotel alfa bravo")
+ w.add_document(text="hotel alfa bravo charlie")
with ix.searcher() as s:
workingset = {1, 2, 3}
- q = query.Term("text", u("foxtrot"))
+ q = query.Term("text", "foxtrot")
r = s.search_page(q, pagenum=1, pagelen=5, terms=True, filter=workingset)
assert r.scored_length() == 2
@@ -1724,10 +1714,10 @@ def test_terms_to_bytes():
schema = fields.Schema(a=fields.TEXT, b=fields.NUMERIC, id=fields.STORED)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, a=u("alfa bravo"), b=100)
- w.add_document(id=1, a=u("bravo charlie"), b=200)
- w.add_document(id=2, a=u("charlie delta"), b=100)
- w.add_document(id=3, a=u("delta echo"), b=200)
+ w.add_document(id=0, a="alfa bravo", b=100)
+ w.add_document(id=1, a="bravo charlie", b=200)
+ w.add_document(id=2, a="charlie delta", b=100)
+ w.add_document(id=3, a="delta echo", b=200)
with ix.searcher() as s:
t1 = query.Term("b", 200)
@@ -1747,25 +1737,25 @@ def test_issue_334():
with ix.writer() as w:
with w.group():
- w.add_document(kind=u("class"), name=u("Index"))
- w.add_document(kind=u("method"), name=u("add document"), returns=u("void"))
- w.add_document(kind=u("method"), name=u("add reader"), returns=u("void"))
- w.add_document(kind=u("method"), name=u("close"), returns=u("void"))
+ w.add_document(kind="class", name="Index")
+ w.add_document(kind="method", name="add document", returns="void")
+ w.add_document(kind="method", name="add reader", returns="void")
+ w.add_document(kind="method", name="close", returns="void")
with w.group():
- w.add_document(kind=u("class"), name=u("Accumulator"))
- w.add_document(kind=u("method"), name=u("add"), returns=u("void"))
- w.add_document(kind=u("method"), name=u("get result"), returns=u("number"))
+ w.add_document(kind="class", name="Accumulator")
+ w.add_document(kind="method", name="add", returns="void")
+ w.add_document(kind="method", name="get result", returns="number")
with w.group():
- w.add_document(kind=u("class"), name=u("Calculator"))
- w.add_document(kind=u("method"), name=u("add"), returns=u("number"))
- w.add_document(kind=u("method"), name=u("add all"), returns=u("number"))
- w.add_document(kind=u("method"), name=u("add some"), returns=u("number"))
- w.add_document(kind=u("method"), name=u("multiply"), returns=u("number"))
- w.add_document(kind=u("method"), name=u("close"), returns=u("void"))
+ w.add_document(kind="class", name="Calculator")
+ w.add_document(kind="method", name="add", returns="number")
+ w.add_document(kind="method", name="add all", returns="number")
+ w.add_document(kind="method", name="add some", returns="number")
+ w.add_document(kind="method", name="multiply", returns="number")
+ w.add_document(kind="method", name="close", returns="void")
with w.group():
- w.add_document(kind=u("class"), name=u("Deleter"))
- w.add_document(kind=u("method"), name=u("add"), returns=u("void"))
- w.add_document(kind=u("method"), name=u("delete"), returns=u("void"))
+ w.add_document(kind="class", name="Deleter")
+ w.add_document(kind="method", name="add", returns="void")
+ w.add_document(kind="method", name="delete", returns="void")
with ix.searcher() as s:
pq = query.Term("kind", "class")
@@ -1774,7 +1764,7 @@ def test_issue_334():
q = query.NestedChildren(pq, cq) & query.Term("returns", "void")
r = s.search(q)
assert len(r) == 1
- assert r[0]["name"] == u("close")
+ assert r[0]["name"] == "close"
def test_find_decimals():
@@ -1786,12 +1776,12 @@ def test_find_decimals():
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(name=u("alfa"), num=Decimal("1.5"))
- w.add_document(name=u("bravo"), num=Decimal("2.1"))
- w.add_document(name=u("charlie"), num=Decimal("5.3"))
- w.add_document(name=u("delta"), num=Decimal(3))
- w.add_document(name=u("echo"), num=Decimal("3.00001"))
- w.add_document(name=u("foxtrot"), num=Decimal("3"))
+ w.add_document(name="alfa", num=Decimal("1.5"))
+ w.add_document(name="bravo", num=Decimal("2.1"))
+ w.add_document(name="charlie", num=Decimal("5.3"))
+ w.add_document(name="delta", num=Decimal(3))
+ w.add_document(name="echo", num=Decimal("3.00001"))
+ w.add_document(name="foxtrot", num=Decimal("3"))
qp = qparser.QueryParser("name", ix.schema)
q = qp.parse("num:3.0")
@@ -1812,7 +1802,7 @@ def test_limit_scores():
count = 0
for words in permutations(domain, 4):
count += 1
- w.add_document(desc=" ".join(words), parent=text_type(count))
+ w.add_document(desc=" ".join(words), parent=str(count))
with ix.searcher() as s:
q = query.And(
diff --git a/tests/test_sorting.py b/tests/test_sorting.py
index 4196222d..6ebccca2 100644
--- a/tests/test_sorting.py
+++ b/tests/test_sorting.py
@@ -1,8 +1,8 @@
import random
from datetime import datetime, timedelta, timezone
+from itertools import permutations
from whoosh import columns, fields, query, sorting
-from whoosh.compat import permutations, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
@@ -26,16 +26,20 @@ def run(self):
assert result == "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
docs = (
- {"id": u("zulu"), "num": 100, "tag": u("one"), "frac": 0.75},
- {"id": u("xray"), "num": -5, "tag": u("three"), "frac": 2.0},
- {"id": u("yankee"), "num": 3, "tag": u("two"), "frac": 5.5},
- {"id": u("alfa"), "num": 7, "tag": u("three"), "frac": 2.25},
- {"id": u("tango"), "num": 2, "tag": u("two"), "frac": 1.75},
- {"id": u("foxtrot"), "num": -800, "tag": u("two"), "frac": 3.25},
- {"id": u("sierra"), "num": 1, "tag": u("one"), "frac": 4.75},
- {"id": u("whiskey"), "num": 0, "tag": u("three"), "frac": 5.25},
- {"id": u("bravo"), "num": 582045, "tag": u("three"), "frac": 1.25},
+ {"id": "zulu", "num": 100, "tag": "one", "frac": 0.75},
+ {"id": "xray", "num": -5, "tag": "three", "frac": 2.0},
+ {"id": "yankee", "num": 3, "tag": "two", "frac": 5.5},
+ {"id": "alfa", "num": 7, "tag": "three", "frac": 2.25},
+ {"id": "tango", "num": 2, "tag": "two", "frac": 1.75},
+ {"id": "foxtrot", "num": -800, "tag": "two", "frac": 3.25},
+ {"id": "sierra", "num": 1, "tag": "one", "frac": 4.75},
+ {"id": "whiskey", "num": 0, "tag": "three", "frac": 5.25},
+ {"id": "bravo", "num": 582045, "tag": "three", "frac": 1.25},
)
@@ -52,7 +56,7 @@ def get_schema():
def make_single_index(ix):
w = ix.writer()
for doc in docs:
- w.add_document(ev=u("a"), **doc)
+ w.add_document(ev="a", **doc)
w.commit()
@@ -60,13 +64,13 @@ def make_multi_index(ix):
for i in range(0, len(docs), 3):
w = ix.writer()
for doc in docs[i : i + 3]:
- w.add_document(ev=u("a"), **doc)
+ w.add_document(ev="a", **doc)
w.commit(merge=False)
def try_sort(sortedby, key, q=None, limit=None, reverse=False):
if q is None:
- q = query.Term("ev", u("a"))
+ q = query.Term("ev", "a")
correct = [d["id"] for d in sorted(docs, key=key, reverse=reverse)][:limit]
schema = get_schema()
@@ -119,7 +123,7 @@ def test_empty_field():
def test_page_sorted():
schema = fields.Schema(key=fields.ID(stored=True))
with TempIndex(schema, "pagesorted") as ix:
- domain = list(u("abcdefghijklmnopqrstuvwxyz"))
+ domain = list("abcdefghijklmnopqrstuvwxyz")
random.shuffle(domain)
w = ix.writer()
@@ -145,21 +149,21 @@ def test_score_facet():
schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT, c=fields.ID)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=1, a=u("alfa alfa bravo"), b=u("bottle"), c=u("c"))
- w.add_document(id=2, a=u("alfa alfa alfa"), b=u("bottle"), c=u("c"))
+ w.add_document(id=1, a="alfa alfa bravo", b="bottle", c="c")
+ w.add_document(id=2, a="alfa alfa alfa", b="bottle", c="c")
w.commit()
w = ix.writer()
- w.add_document(id=3, a=u("alfa bravo bravo"), b=u("bottle"), c=u("c"))
- w.add_document(id=4, a=u("alfa bravo alfa"), b=u("apple"), c=u("c"))
+ w.add_document(id=3, a="alfa bravo bravo", b="bottle", c="c")
+ w.add_document(id=4, a="alfa bravo alfa", b="apple", c="c")
w.commit(merge=False)
w = ix.writer()
- w.add_document(id=5, a=u("alfa bravo bravo"), b=u("apple"), c=u("c"))
- w.add_document(id=6, a=u("alfa alfa alfa"), b=u("apple"), c=u("c"))
+ w.add_document(id=5, a="alfa bravo bravo", b="apple", c="c")
+ w.add_document(id=6, a="alfa alfa alfa", b="apple", c="c")
w.commit(merge=False)
with ix.searcher() as s:
facet = sorting.MultiFacet(["b", sorting.ScoreFacet()])
- r = s.search(q=query.Term("a", u("alfa")), sortedby=facet)
+ r = s.search(q=query.Term("a", "alfa"), sortedby=facet)
assert [h["id"] for h in r] == [6, 4, 5, 2, 1, 3]
@@ -173,7 +177,7 @@ def test_function_facet():
for w2 in domain:
for w3 in domain:
for w4 in domain:
- w.add_document(id=count, text=u(" ").join((w1, w2, w3, w4)))
+ w.add_document(id=count, text=" ".join((w1, w2, w3, w4)))
count += 1
w.commit()
@@ -184,7 +188,7 @@ def fn(searcher, docnum):
return 0 - (1.0 / (abs(v.get("alfa", 0) - v.get("bravo", 0)) + 1.0))
with ix.searcher() as s:
- q = query.And([query.Term("text", u("alfa")), query.Term("text", u("bravo"))])
+ q = query.And([query.Term("text", "alfa"), query.Term("text", "bravo")])
fnfacet = sorting.FunctionFacet(fn)
r = s.search(q, sortedby=fnfacet)
@@ -219,7 +223,7 @@ def test_numeric_field_facet():
def test_query_facet():
schema = fields.Schema(id=fields.STORED, v=fields.ID)
ix = RamStorage().create_index(schema)
- for i, ltr in enumerate(u("iacgbehdf")):
+ for i, ltr in enumerate("iacgbehdf"):
w = ix.writer()
w.add_document(id=i, v=ltr)
w.commit(merge=False)
@@ -245,7 +249,7 @@ def test_query_facet():
def test_query_facet_overlap():
- domain = u("abcdefghi")
+ domain = "abcdefghi"
schema = fields.Schema(
v=fields.KEYWORD(stored=True), num=fields.NUMERIC(stored=True)
)
@@ -277,10 +281,10 @@ def test_missing_field_facet():
schema = fields.Schema(id=fields.STORED, tag=fields.ID)
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(id=0, tag=u("alfa"))
- w.add_document(id=1, tag=u("alfa"))
+ w.add_document(id=0, tag="alfa")
+ w.add_document(id=1, tag="alfa")
w.add_document(id=2)
- w.add_document(id=3, tag=u("bravo"))
+ w.add_document(id=3, tag="bravo")
w.add_document(id=4)
w.commit()
@@ -309,11 +313,11 @@ def test_missing_overlap():
schema = fields.Schema(a=fields.NUMERIC(stored=True), b=fields.KEYWORD(stored=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=0, b=u("one two"))
+ w.add_document(a=0, b="one two")
w.add_document(a=1)
- w.add_document(a=2, b=u("two three"))
+ w.add_document(a=2, b="two three")
w.add_document(a=3)
- w.add_document(a=4, b=u("three four"))
+ w.add_document(a=4, b="three four")
with ix.searcher() as s:
facet = sorting.FieldFacet("b", allow_overlap=True)
@@ -502,11 +506,11 @@ def test_overlapping_vector():
schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD(vector=True))
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, tags=u("alfa bravo charlie"))
- w.add_document(id=1, tags=u("bravo charlie delta"))
- w.add_document(id=2, tags=u("charlie delta echo"))
- w.add_document(id=3, tags=u("delta echo alfa"))
- w.add_document(id=4, tags=u("echo alfa bravo"))
+ w.add_document(id=0, tags="alfa bravo charlie")
+ w.add_document(id=1, tags="bravo charlie delta")
+ w.add_document(id=2, tags="charlie delta echo")
+ w.add_document(id=3, tags="delta echo alfa")
+ w.add_document(id=4, tags="echo alfa bravo")
with ix.searcher() as s:
of = sorting.FieldFacet("tags", allow_overlap=True)
@@ -538,11 +542,11 @@ def test_overlapping_lists():
schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, tags=u("alfa bravo charlie"))
- w.add_document(id=1, tags=u("bravo charlie delta"))
- w.add_document(id=2, tags=u("charlie delta echo"))
- w.add_document(id=3, tags=u("delta echo alfa"))
- w.add_document(id=4, tags=u("echo alfa bravo"))
+ w.add_document(id=0, tags="alfa bravo charlie")
+ w.add_document(id=1, tags="bravo charlie delta")
+ w.add_document(id=2, tags="charlie delta echo")
+ w.add_document(id=3, tags="delta echo alfa")
+ w.add_document(id=4, tags="echo alfa bravo")
with ix.searcher() as s:
of = sorting.FieldFacet("tags", allow_overlap=True)
@@ -578,9 +582,9 @@ def check(method):
results = s.search(query.Every(), groupedby="tag")
groups = results.groups()
assert sorted(groups.items()) == [
- (u("one"), [0, 6]),
- (u("three"), [1, 3, 7, 8]),
- (u("two"), [2, 4, 5]),
+ ("one", [0, 6]),
+ ("three", [1, 3, 7, 8]),
+ ("two", [2, 4, 5]),
]
check(make_single_index)
@@ -591,20 +595,20 @@ def test_multifacet():
schema = fields.Schema(tag=fields.ID(stored=True), size=fields.ID(stored=True))
with TempIndex(schema, "multifacet") as ix:
w = ix.writer()
- w.add_document(tag=u("alfa"), size=u("small"))
- w.add_document(tag=u("bravo"), size=u("medium"))
- w.add_document(tag=u("alfa"), size=u("large"))
- w.add_document(tag=u("bravo"), size=u("small"))
- w.add_document(tag=u("alfa"), size=u("medium"))
- w.add_document(tag=u("bravo"), size=u("medium"))
+ w.add_document(tag="alfa", size="small")
+ w.add_document(tag="bravo", size="medium")
+ w.add_document(tag="alfa", size="large")
+ w.add_document(tag="bravo", size="small")
+ w.add_document(tag="alfa", size="medium")
+ w.add_document(tag="bravo", size="medium")
w.commit()
correct = {
- (u("bravo"), u("medium")): [1, 5],
- (u("alfa"), u("large")): [2],
- (u("alfa"), u("medium")): [4],
- (u("alfa"), u("small")): [0],
- (u("bravo"), u("small")): [3],
+ ("bravo", "medium"): [1, 5],
+ ("alfa", "large"): [2],
+ ("alfa", "medium"): [4],
+ ("alfa", "small"): [0],
+ ("bravo", "small"): [3],
}
with ix.searcher() as s:
@@ -616,8 +620,8 @@ def test_multifacet():
def test_sort_filter():
schema = fields.Schema(group=fields.ID(stored=True), key=fields.ID(stored=True))
- groups = u("alfa bravo charlie").split()
- keys = u("abcdefghijklmnopqrstuvwxyz")
+ groups = "alfa bravo charlie".split()
+ keys = "abcdefghijklmnopqrstuvwxyz"
source = []
for i in range(100):
key = keys[i % len(keys)]
@@ -638,7 +642,7 @@ def test_sort_filter():
w = ix.writer()
w.commit()
- fq = query.Term("group", u("bravo"))
+ fq = query.Term("group", "bravo")
with ix.searcher() as s:
r = s.search(query.Every(), sortedby=("key", "group"), filter=fq, limit=20)
@@ -646,7 +650,7 @@ def test_sort_filter():
d for d in source if d["group"] == "bravo"
][:20]
- fq = query.Term("group", u("bravo"))
+ fq = query.Term("group", "bravo")
r = s.search(
query.Every(), sortedby=("key", "group"), filter=fq, limit=None
)
@@ -662,7 +666,7 @@ def test_sort_filter():
d for d in source if d["group"] == "bravo"
][:20]
- fq = query.Term("group", u("bravo"))
+ fq = query.Term("group", "bravo")
r = s.search(
query.Every(), sortedby=("key", "group"), filter=fq, limit=None
)
@@ -681,7 +685,7 @@ def test_sorting_function():
for w2 in domain:
for w3 in domain:
for w4 in domain:
- w.add_document(id=count, text=u(" ").join((w1, w2, w3, w4)))
+ w.add_document(id=count, text=" ".join((w1, w2, w3, w4)))
count += 1
w.commit()
@@ -694,7 +698,7 @@ def fn(searcher, docnum):
fnfacet = sorting.FunctionFacet(fn)
with ix.searcher() as s:
- q = query.And([query.Term("text", u("alfa")), query.Term("text", u("bravo"))])
+ q = query.And([query.Term("text", "alfa"), query.Term("text", "bravo")])
results = s.search(q, sortedby=fnfacet)
r = [hit["text"] for hit in results]
for t in r[:10]:
@@ -757,12 +761,12 @@ def test_sorted_groups():
schema = fields.Schema(a=fields.STORED, b=fields.TEXT, c=fields.ID)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=0, b=u("blah"), c=u("apple"))
- w.add_document(a=1, b=u("blah blah"), c=u("bear"))
- w.add_document(a=2, b=u("blah blah blah"), c=u("apple"))
- w.add_document(a=3, b=u("blah blah blah blah"), c=u("bear"))
- w.add_document(a=4, b=u("blah blah blah blah blah"), c=u("apple"))
- w.add_document(a=5, b=u("blah blah blah blah blah blah"), c=u("bear"))
+ w.add_document(a=0, b="blah", c="apple")
+ w.add_document(a=1, b="blah blah", c="bear")
+ w.add_document(a=2, b="blah blah blah", c="apple")
+ w.add_document(a=3, b="blah blah blah blah", c="bear")
+ w.add_document(a=4, b="blah blah blah blah blah", c="apple")
+ w.add_document(a=5, b="blah blah blah blah blah blah", c="bear")
with ix.searcher() as s:
q = query.Term("b", "blah")
@@ -776,13 +780,13 @@ def test_group_types():
schema = fields.Schema(a=fields.STORED, b=fields.TEXT, c=fields.ID)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(a=0, b=u("blah"), c=u("apple"))
- w.add_document(a=1, b=u("blah blah"), c=u("bear"))
- w.add_document(a=2, b=u("blah blah blah"), c=u("apple"))
- w.add_document(a=3, b=u("blah blah blah blah"), c=u("bear"))
- w.add_document(a=4, b=u("blah blah blah blah blah"), c=u("apple"))
- w.add_document(a=5, b=u("blah blah blah blah blah blah"), c=u("bear"))
- w.add_document(a=6, b=u("blah blah blah blah blah blah blah"), c=u("apple"))
+ w.add_document(a=0, b="blah", c="apple")
+ w.add_document(a=1, b="blah blah", c="bear")
+ w.add_document(a=2, b="blah blah blah", c="apple")
+ w.add_document(a=3, b="blah blah blah blah", c="bear")
+ w.add_document(a=4, b="blah blah blah blah blah", c="apple")
+ w.add_document(a=5, b="blah blah blah blah blah blah", c="bear")
+ w.add_document(a=6, b="blah blah blah blah blah blah blah", c="apple")
with ix.searcher() as s:
q = query.Term("b", "blah")
@@ -815,24 +819,24 @@ def test_nocachefield_segments():
schema = fields.Schema(a=fields.ID(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
- w.add_document(a=u("bravo"))
- w.add_document(a=u("echo"))
- w.add_document(a=u("juliet"))
+ w.add_document(a="bravo")
+ w.add_document(a="echo")
+ w.add_document(a="juliet")
w.commit()
w = ix.writer()
- w.add_document(a=u("kilo"))
- w.add_document(a=u("foxtrot"))
- w.add_document(a=u("charlie"))
+ w.add_document(a="kilo")
+ w.add_document(a="foxtrot")
+ w.add_document(a="charlie")
w.commit(merge=False)
w = ix.writer()
- w.delete_by_term("a", u("echo"))
- w.add_document(a=u("alfa"))
- w.add_document(a=u("india"))
- w.add_document(a=u("delta"))
+ w.delete_by_term("a", "echo")
+ w.add_document(a="alfa")
+ w.add_document(a="india")
+ w.add_document(a="delta")
w.commit(merge=False)
with ix.searcher() as s:
- q = query.TermRange("a", u("bravo"), u("k"))
+ q = query.TermRange("a", "bravo", "k")
facet = sorting.FieldFacet("a", reverse=True)
r = s.search(q, sortedby=facet)
@@ -845,29 +849,27 @@ def test_nocachefield_segments():
"bravo",
]
- mq = query.Or([query.Term("a", u("bravo")), query.Term("a", u("delta"))])
+ mq = query.Or([query.Term("a", "bravo"), query.Term("a", "delta")])
anq = query.AndNot(q, mq)
r = s.search(anq, sortedby=facet)
assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"]
- mq = query.Or([query.Term("a", u("bravo")), query.Term("a", u("delta"))])
+ mq = query.Or([query.Term("a", "bravo"), query.Term("a", "delta")])
r = s.search(q, mask=mq, sortedby=facet)
assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"]
fq = query.Or(
[
- query.Term("a", u("alfa")),
- query.Term("a", u("charlie")),
- query.Term("a", u("echo")),
- query.Term("a", u("india")),
+ query.Term("a", "alfa"),
+ query.Term("a", "charlie"),
+ query.Term("a", "echo"),
+ query.Term("a", "india"),
]
)
r = s.search(query.Every(), filter=fq, sortedby=facet)
assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"]
- nq = query.Not(
- query.Or([query.Term("a", u("alfa")), query.Term("a", u("india"))])
- )
+ nq = query.Not(query.Or([query.Term("a", "alfa"), query.Term("a", "india")]))
r = s.search(query.Every(), filter=nq, sortedby=facet)
assert [hit["a"] for hit in r] == [
"kilo",
@@ -976,12 +978,12 @@ def test(ix):
def test_filtered_grouped():
schema = fields.Schema(tag=fields.ID, text=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
- domain = u("alfa bravo charlie delta echo foxtrot").split()
+ domain = "alfa bravo charlie delta echo foxtrot".split()
with ix.writer() as w:
for i, ls in enumerate(permutations(domain, 3)):
- tag = u(str(i % 3))
- w.add_document(tag=tag, text=u(" ").join(ls))
+ tag = str(i % 3)
+ w.add_document(tag=tag, text=" ".join(ls))
with ix.searcher() as s:
f = query.And([query.Term("text", "charlie"), query.Term("text", "delta")])
@@ -994,15 +996,15 @@ def test_add_sortable():
schema = fields.Schema(chapter=fields.ID(stored=True), price=fields.NUMERIC)
ix = st.create_index(schema)
with ix.writer() as w:
- w.add_document(chapter=u("alfa"), price=100)
- w.add_document(chapter=u("bravo"), price=200)
- w.add_document(chapter=u("charlie"), price=300)
- w.add_document(chapter=u("delta"), price=400)
+ w.add_document(chapter="alfa", price=100)
+ w.add_document(chapter="bravo", price=200)
+ w.add_document(chapter="charlie", price=300)
+ w.add_document(chapter="delta", price=400)
with ix.writer() as w:
- w.add_document(chapter=u("bravo"), price=500)
- w.add_document(chapter=u("alfa"), price=600)
- w.add_document(chapter=u("delta"), price=100)
- w.add_document(chapter=u("charlie"), price=200)
+ w.add_document(chapter="bravo", price=500)
+ w.add_document(chapter="alfa", price=600)
+ w.add_document(chapter="delta", price=100)
+ w.add_document(chapter="charlie", price=200)
w.merge = False
with ix.reader() as r:
@@ -1043,29 +1045,29 @@ def test_missing_column():
schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD)
ix = RamStorage().create_index(schema)
with ix.writer() as w:
- w.add_document(id=0, tags=u("alfa bravo charlie"))
- w.add_document(id=1, tags=u("bravo charlie delta"))
- w.add_document(id=2, tags=u("charlie delta echo"))
+ w.add_document(id=0, tags="alfa bravo charlie")
+ w.add_document(id=1, tags="bravo charlie delta")
+ w.add_document(id=2, tags="charlie delta echo")
w.merge = False
with ix.writer() as w:
w.add_field("age", fields.NUMERIC(sortable=True))
- w.add_document(id=3, tags=u("delta echo foxtrot"), age=10)
- w.add_document(id=4, tags=u("echo foxtrot golf"), age=5)
- w.add_document(id=5, tags=u("foxtrot golf alfa"), age=20)
+ w.add_document(id=3, tags="delta echo foxtrot", age=10)
+ w.add_document(id=4, tags="echo foxtrot golf", age=5)
+ w.add_document(id=5, tags="foxtrot golf alfa", age=20)
w.merge = False
with ix.writer() as w:
- w.add_document(id=6, tags=u("golf alfa bravo"), age=2)
- w.add_document(id=7, tags=u("alfa hotel india"), age=50)
- w.add_document(id=8, tags=u("hotel india bravo"), age=15)
+ w.add_document(id=6, tags="golf alfa bravo", age=2)
+ w.add_document(id=7, tags="alfa hotel india", age=50)
+ w.add_document(id=8, tags="hotel india bravo", age=15)
w.merge = False
with ix.searcher() as s:
assert not s.is_atomic()
- q = query.Term("tags", u("alfa"))
+ q = query.Term("tags", "alfa")
# Have to use yucky low-level collector API to make sure we used a
# ColumnCategorizer to do the sorting
@@ -1086,9 +1088,9 @@ def test_compound_sort():
schema = fields.Schema(a=fspec, b=fspec, c=fspec)
ix = RamStorage().create_index(schema)
- alist = u("alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo").split()
- blist = u("alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa").split()
- clist = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet").split()
+ alist = "alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo".split()
+ blist = "alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa".split()
+ clist = "alfa bravo charlie delta echo foxtrot golf hotel india juliet".split()
assert all(len(ls) == 10 for ls in (alist, blist, clist))
with ix.writer() as w:
diff --git a/tests/test_spans.py b/tests/test_spans.py
index ffb2e269..01c78731 100644
--- a/tests/test_spans.py
+++ b/tests/test_spans.py
@@ -1,5 +1,6 @@
+from itertools import permutations
+
from whoosh import analysis, fields, formats
-from whoosh.compat import permutations, u
from whoosh.filedb.filestore import RamStorage
from whoosh.query import And, Or, Phrase, Term, spans
from whoosh.util.testing import TempIndex
@@ -23,7 +24,7 @@ def get_index():
w = _ix.writer()
for ls in permutations(domain, 4):
- w.add_document(text=u(" ").join(ls), _stored_text=ls)
+ w.add_document(text=" ".join(ls), _stored_text=ls)
w.commit()
return _ix
@@ -38,7 +39,7 @@ def test_multimatcher():
for _ in range(3):
w = ix.writer()
for ls in permutations(domain):
- w.add_document(content=u(" ").join(ls))
+ w.add_document(content=" ".join(ls))
w.commit(merge=False)
q = Term("content", "bravo")
@@ -61,7 +62,7 @@ def test_excludematcher():
for _ in range(3):
w = ix.writer()
for ls in permutations(domain):
- w.add_document(content=u(" ").join(ls))
+ w.add_document(content=" ".join(ls))
w.commit(merge=False)
w = ix.writer()
@@ -189,10 +190,10 @@ def test_near_unordered():
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
- w.add_document(text=u("alfa bravo charlie delta echo"))
- w.add_document(text=u("alfa bravo delta echo charlie"))
- w.add_document(text=u("alfa charlie bravo delta echo"))
- w.add_document(text=u("echo delta alfa foxtrot"))
+ w.add_document(text="alfa bravo charlie delta echo")
+ w.add_document(text="alfa bravo delta echo charlie")
+ w.add_document(text="alfa charlie bravo delta echo")
+ w.add_document(text="echo delta alfa foxtrot")
w.commit()
with ix.searcher() as s:
@@ -201,8 +202,8 @@ def test_near_unordered():
)
r = sorted(d["text"] for d in s.search(q))
assert r == [
- u("alfa bravo charlie delta echo"),
- u("alfa charlie bravo delta echo"),
+ "alfa bravo charlie delta echo",
+ "alfa charlie bravo delta echo",
]
@@ -213,7 +214,7 @@ def test_span_near_tree():
ix = st.create_index(schema)
w = ix.writer()
w.add_document(
- text=u(
+ text=(
"The Lucene library is by Doug Cutting and Whoosh was made by Matt Chaput"
)
)
diff --git a/tests/test_spelling.py b/tests/test_spelling.py
index 8a313eb1..3f773785 100644
--- a/tests/test_spelling.py
+++ b/tests/test_spelling.py
@@ -1,20 +1,24 @@
import gzip
from whoosh import analysis, fields, highlight, query, spelling
-from whoosh.compat import u
from whoosh.qparser import QueryParser
from whoosh.support.levenshtein import levenshtein
from whoosh.util.testing import TempIndex
+
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
_wordlist = sorted(
- u(
- "render animation animate shader shading zebra koala"
- "ready kismet reaction page delete quick fox jumped"
- "over lazy dog wicked erase red team yellow under interest"
- "open print acrid sear deaf feed grow heal jolly kilt"
- "low zone xylophone crown vale brown neat meat reduction"
- "blunder preaction lamppost"
- ).split()
+ """
+ render animation animate shader shading zebra koala
+ ready kismet reaction page delete quick fox jumped
+ over lazy dog wicked erase red team yellow under interest
+ open print acrid sear deaf feed grow heal jolly kilt
+ low zone xylophone crown vale brown neat meat reduction
+ blunder preaction lamppost
+ """.split()
)
@@ -168,7 +172,7 @@ def test_correct_query():
q = qp.parse(qtext, ix.schema)
c = s.correct_query(q, qtext)
assert (
- c.query.__unicode__()
+ str(c.query)
== '(a:alfa AND b:"brovo november" AND a:delta AND a:detail)'
)
assert c.string == 'alfa b:("brovo november" a:delta) detail'
diff --git a/tests/test_tables.py b/tests/test_tables.py
index 480f3633..f0ff719b 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -1,6 +1,5 @@
import random
-from whoosh.compat import b, iteritems
from whoosh.filedb.filestore import RamStorage
from whoosh.filedb.filetables import (
HashReader,
@@ -11,6 +10,10 @@
from whoosh.util.testing import TempStorage
+def b(s):
+ return s.encode("latin-1")
+
+
def test_hash_single():
st = RamStorage()
hw = HashWriter(st.create_file("test.hsh"))
@@ -103,7 +106,7 @@ def randstring():
samp = {randstring(): randstring() for _ in range(times)}
hw = HashWriter(st.create_file("test.hsh"))
- for k, v in iteritems(samp):
+ for k, v in samp.items():
hw.add(k, v)
hw.close()
diff --git a/tests/test_vectors.py b/tests/test_vectors.py
index 339859f3..58ac582e 100644
--- a/tests/test_vectors.py
+++ b/tests/test_vectors.py
@@ -1,9 +1,12 @@
from whoosh import fields, formats
-from whoosh.compat import u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def test_single_term():
schema = fields.Schema(text=fields.TEXT(vector=True))
ix = RamStorage().create_index(schema)
diff --git a/tests/test_weightings.py b/tests/test_weightings.py
index 5a275355..94aa037a 100644
--- a/tests/test_weightings.py
+++ b/tests/test_weightings.py
@@ -1,12 +1,16 @@
import inspect
import sys
+from itertools import permutations
from random import choice, randint
from whoosh import fields, query, scoring
-from whoosh.compat import permutations, u
from whoosh.filedb.filestore import RamStorage
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def _weighting_classes(ignore):
# Get all the subclasses of Weighting in whoosh.scoring
return [
diff --git a/tests/test_writing.py b/tests/test_writing.py
index f3244159..0014c98b 100644
--- a/tests/test_writing.py
+++ b/tests/test_writing.py
@@ -4,11 +4,14 @@
import pytest
from whoosh import analysis, fields, query, writing
-from whoosh.compat import b, text_type, u
from whoosh.filedb.filestore import RamStorage
from whoosh.util.testing import TempIndex
+def u(s):
+ return s.decode("ascii") if isinstance(s, bytes) else s
+
+
def test_no_stored():
schema = fields.Schema(id=fields.ID, text=fields.TEXT)
with TempIndex(schema, "nostored") as ix:
@@ -26,7 +29,7 @@ def test_no_stored():
w = ix.writer()
for i in range(20):
- w.add_document(id=text_type(i), text=" ".join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
w.commit()
with ix.reader() as r:
@@ -55,7 +58,7 @@ def test_asyncwriter():
for i in range(20):
w = writing.AsyncWriter(ix)
writers.append(w)
- w.add_document(id=text_type(i), text=" ".join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
w.commit()
# Wait for all writers to finish before checking the results
@@ -90,7 +93,7 @@ def test_asyncwriter_no_stored():
for i in range(20):
w = writing.AsyncWriter(ix)
writers.append(w)
- w.add_document(id=text_type(i), text=" ".join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
w.commit()
# Wait for all writers to finish before checking the results
@@ -122,7 +125,7 @@ def test_buffered():
ix, period=None, limit=10, commitargs={"merge": False}
)
for i in range(20):
- w.add_document(id=text_type(i), text=" ".join(random.sample(domain, 5)))
+ w.add_document(id=str(i), text=" ".join(random.sample(domain, 5)))
time.sleep(0.1)
w.close()
@@ -162,7 +165,7 @@ def test_buffered_update():
w = writing.BufferedWriter(ix, period=None, limit=5)
for i in range(10):
for char in "abc":
- fs = {"id": char, "payload": text_type(i) + char}
+ fs = {"id": char, "payload": str(i) + char}
w.update_document(**fs)
with w.reader() as r:
@@ -446,7 +449,7 @@ def test_clear():
with ix.searcher() as s:
assert s.doc_count_all() == 1
- assert list(s.reader().lexicon("a")) == [b("bar"), b("baz"), b("foo")]
+ assert list(s.reader().lexicon("a")) == [b"bar", b"baz", b"foo"]
def test_spellable_list():