diff --git a/.gitignore b/.gitignore
index 1d3379d..2e9d7ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,3 +83,7 @@ doc/cdoc/build
ehthumbs.db
Icon?
Thumbs.db
+
+# Test generated files #
+########################
+.python-version
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..1c4cac6
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,23 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+ - id: fix-byte-order-marker
+ - id: destroyed-symlinks
+ - id: fix-encoding-pragma
+ args: ["--remove"]
+ - id: mixed-line-ending
+ - id: name-tests-test
+ args: ["--pytest-test-first"]
+ - id: pretty-format-json
+ args: ["--autofix", "--no-ensure-ascii"]
+ exclude: ".ipynb"
+
+- repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.7.3
+ hooks:
+ - id: ruff-format
+ types_or: [ python, pyi, jupyter ]
diff --git a/TODO b/TODO
index 60a701e..2b18b11 100644
--- a/TODO
+++ b/TODO
@@ -104,7 +104,7 @@ the cheap trick way of doing it is:
def arima(n, m):
return ArimaModelType(n, m)
and then in the factor type sniffing code detect these things and
-separate them out from "real" factors.
+separate them out from "real" factors.
* make sure that pickling works
- And make sure that if we allow it at all, then it's sustainable!
diff --git a/doc/R-comparison.rst b/doc/R-comparison.rst
index 8e74f49..fb5905c 100644
--- a/doc/R-comparison.rst
+++ b/doc/R-comparison.rst
@@ -105,7 +105,7 @@ Differences from R:
# R:
> qr(model.matrix(~ 1 + a:b))$rank
[1] 4
-
+
However, the matrix produced for this formula has 5 columns, meaning
that it contains redundant overspecification:
@@ -149,7 +149,7 @@ Differences from R:
use a full-rank encoding for ``b``. Therefore, we *should* use a
full-rank encoding for ``b``, and produce a model matrix with 6
columns. But in fact, R gives us only 4:
-
+
.. code-block:: rconsole
# R:
diff --git a/doc/_examples/example_lm.py b/doc/_examples/example_lm.py
index eb56afc..4f85a35 100644
--- a/doc/_examples/example_lm.py
+++ b/doc/_examples/example_lm.py
@@ -1,9 +1,11 @@
import numpy as np
from patsy import dmatrices, build_design_matrices
+
class LM(object):
"""An example ordinary least squares linear model class, analogous to R's
lm() function. Don't use this in real life, it isn't properly tested."""
+
def __init__(self, formula_like, data={}):
y, x = dmatrices(formula_like, data, 1)
self.nobs = x.shape[0]
@@ -12,27 +14,27 @@ def __init__(self, formula_like, data={}):
self._x_design_info = x.design_info
def __repr__(self):
- summary = ("Ordinary least-squares regression\n"
- " Model: %s ~ %s\n"
- " Regression (beta) coefficients:\n"
- % (self._y_design_info.describe(),
- self._x_design_info.describe()))
+ summary = (
+ "Ordinary least-squares regression\n"
+ " Model: %s ~ %s\n"
+ " Regression (beta) coefficients:\n"
+ % (self._y_design_info.describe(), self._x_design_info.describe())
+ )
for name, value in zip(self._x_design_info.column_names, self.betas):
summary += " %s: %0.3g\n" % (name, value[0])
return summary
def predict(self, new_data):
- (new_x,) = build_design_matrices([self._x_design_info],
- new_data)
+ (new_x,) = build_design_matrices([self._x_design_info], new_data)
return np.dot(new_x, self.betas)
def loglik(self, new_data):
- (new_y, new_x) = build_design_matrices([self._y_design_info,
- self._x_design_info],
- new_data)
+ (new_y, new_x) = build_design_matrices(
+ [self._y_design_info, self._x_design_info], new_data
+ )
new_pred = np.dot(new_x, self.betas)
sigma2 = self.rss / self.nobs
# It'd be more elegant to use scipy.stats.norm.logpdf here, but adding
# a dependency on scipy makes the docs build more complicated:
Z = -0.5 * np.log(2 * np.pi * sigma2)
- return Z + -0.5 * (new_y - new_x) ** 2/sigma2
+ return Z + -0.5 * (new_y - new_x) ** 2 / sigma2
diff --git a/doc/_examples/example_treatment.py b/doc/_examples/example_treatment.py
index ddc88d5..387c4d9 100644
--- a/doc/_examples/example_treatment.py
+++ b/doc/_examples/example_treatment.py
@@ -1,18 +1,26 @@
import numpy as np
+
class MyTreat(object):
def __init__(self, reference=0):
self.reference = reference
def code_with_intercept(self, levels):
- return ContrastMatrix(np.eye(len(levels)),
- ["[My.%s]" % (level,) for level in levels])
+ return ContrastMatrix(
+ np.eye(len(levels)), ["[My.%s]" % (level,) for level in levels]
+ )
def code_without_intercept(self, levels):
eye = np.eye(len(levels) - 1)
- contrasts = np.vstack((eye[:self.reference, :],
- np.zeros((1, len(levels) - 1)),
- eye[self.reference:, :]))
- suffixes = ["[MyT.%s]" % (level,) for level in
- levels[:self.reference] + levels[self.reference + 1:]]
+ contrasts = np.vstack(
+ (
+ eye[: self.reference, :],
+ np.zeros((1, len(levels) - 1)),
+ eye[self.reference :, :],
+ )
+ )
+ suffixes = [
+ "[MyT.%s]" % (level,)
+ for level in levels[: self.reference] + levels[self.reference + 1 :]
+ ]
return ContrastMatrix(contrasts, suffixes)
diff --git a/doc/_static/facebox.css b/doc/_static/facebox.css
index 3f33b9f..4cacbac 100644
--- a/doc/_static/facebox.css
+++ b/doc/_static/facebox.css
@@ -77,4 +77,4 @@
.facebox_overlayBG {
background-color: #000;
z-index: 99;
-}
\ No newline at end of file
+}
diff --git a/doc/_static/show-code.js b/doc/_static/show-code.js
index fbff113..7bd102c 100644
--- a/doc/_static/show-code.js
+++ b/doc/_static/show-code.js
@@ -25,13 +25,13 @@ function scrapeText(codebox){
return newlines.join('\\n');
}
-$(document).ready(
+$(document).ready(
function() {
// grab all code boxes
var ipythoncode = $(".highlight-ipython");
$.each(ipythoncode, function() {
var code = scrapeText($(this).text());
- // give them a facebox pop-up with plain text code
+ // give them a facebox pop-up with plain text code
$(this).append('View Code');
$(this,"textarea").select();
});
diff --git a/doc/categorical-coding.rst b/doc/categorical-coding.rst
index f470616..8c0a6a4 100644
--- a/doc/categorical-coding.rst
+++ b/doc/categorical-coding.rst
@@ -78,7 +78,7 @@ As an example, here's a simplified version of the built-in
:class:`Treatment` coding object:
.. literalinclude:: _examples/example_treatment.py
-
+
.. ipython:: python
:suppress:
diff --git a/doc/conf.py b/doc/conf.py
index c9fffff..2ad5d64 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,24 +1,26 @@
-# -*- coding: utf-8 -*-
-
# General information about the project.
-project = 'patsy'
-copyright = '2011-2015, Nathaniel J. Smith'
+project = "patsy"
+copyright = "2011-2015, Nathaniel J. Smith"
import sys
+
print("python exec:", sys.executable)
print("sys.path:", sys.path)
try:
import numpy
+
print("numpy: %s, %s" % (numpy.__version__, numpy.__file__))
except ImportError:
print("no numpy")
try:
import matplotlib
+
print("matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__))
except ImportError:
print("no matplotlib")
try:
import IPython
+
print("ipython: %s, %s" % (IPython.__version__, IPython.__file__))
except ImportError:
print("no ipython")
@@ -29,8 +31,10 @@
#
# The short X.Y version.
import sys, os
+
sys.path.insert(0, os.getcwd() + "/..")
import patsy
+
version = patsy.__version__
# The full version, including alpha/beta/rc tags.
release = version
@@ -52,17 +56,21 @@
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.append(os.path.abspath('sphinxext'))
+sys.path.append(os.path.abspath("sphinxext"))
# -- General configuration -----------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.imgmath',
- 'sphinx.ext.intersphinx',
- 'IPython.sphinxext.ipython_directive',
- 'IPython.sphinxext.ipython_console_highlighting',
- ]
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.doctest",
+ "sphinx.ext.imgmath",
+ "sphinx.ext.intersphinx",
+ "IPython.sphinxext.ipython_directive",
+ "IPython.sphinxext.ipython_console_highlighting",
+]
+
# Undocumented trick: if we def setup here in conf.py, it gets called just
# like an extension's setup function.
@@ -71,171 +79,170 @@ def setup(app):
app.add_javascript("facebox.js")
app.add_stylesheet("facebox.css")
+
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
# The encoding of source files.
-#source_encoding = 'utf-8'
+# source_encoding = 'utf-8'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
-#language = None
+# language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-#today = ''
+# today = ''
# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
# List of documents that shouldn't be included in the build.
-#unused_docs = []
+# unused_docs = []
# List of directories, relative to source directory, that shouldn't be searched
# for source files.
-exclude_trees = ['_build']
+exclude_trees = ["_build"]
# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-#show_authors = False
+# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. Major themes that come with
# Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = 'default'
+html_theme = "default"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
-#html_theme_options = {}
+# html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
-#html_title = None
+# html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-#html_logo = None
+# html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
-#html_favicon = None
+# html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
# If false, no module index is generated.
-#html_use_modindex = True
+# html_use_modindex = True
# If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
# If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
# Output file base name for HTML help builder.
-htmlhelp_basename = 'patsydoc'
+htmlhelp_basename = "patsydoc"
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+# latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
+# latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'patsy.tex', u'patsy Documentation',
- u'Nathaniel J. Smith', 'manual'),
+ ("index", "patsy.tex", "patsy Documentation", "Nathaniel J. Smith", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-#latex_logo = None
+# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
# Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
+# latex_preamble = ''
# Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
# If false, no module index is generated.
-#latex_use_modindex = True
+# latex_use_modindex = True
# -- Custom extra options
autoclass_content = "both"
-intersphinx_mapping = {"python": ("http://docs.python.org", None),
- "numpy": ("http://docs.scipy.org/doc/numpy",
- None),
- "pandas": ('http://pandas.pydata.org/pandas-docs/stable/',
- None),
- }
+intersphinx_mapping = {
+ "python": ("http://docs.python.org", None),
+ "numpy": ("http://docs.scipy.org/doc/numpy", None),
+ "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
+}
autodoc_member_order = "source"
diff --git a/doc/expert-model-specification.rst b/doc/expert-model-specification.rst
index 16c5779..4595bac 100644
--- a/doc/expert-model-specification.rst
+++ b/doc/expert-model-specification.rst
@@ -251,7 +251,7 @@ Put together, it looks something like this:
.. code-block:: python
class MyAlternativeFactor(object):
- # A factor object that simply returns the design
+ # A factor object that simply returns the design
def __init__(self, alternative_formula, side):
self.alternative_formula = alternative_formula
self.side = side
diff --git a/doc/formulas.rst b/doc/formulas.rst
index 421c901..e644dc1 100644
--- a/doc/formulas.rst
+++ b/doc/formulas.rst
@@ -16,7 +16,7 @@ and interpreted. Here's the picture you'll want to keep in mind:
.. figure:: figures/formula-structure.png
:align: center
-
+
The pieces that make up a formula.
Say we have a formula like::
@@ -493,7 +493,7 @@ Then:
more fundamental idea, that when we write:
y ~ a:b
-
+
we mean that the value of `y` can vary depending on every possible
*combination* of `a` and `b`.
diff --git a/doc/library-developers.rst b/doc/library-developers.rst
index 478d302..1bf282c 100644
--- a/doc/library-developers.rst
+++ b/doc/library-developers.rst
@@ -128,9 +128,9 @@ And here's how it can be used:
# Old and boring approach (but it still works):
X = np.column_stack(([1] * len(data["y"]), data["x"]))
LM((data["y"], X))
-
+
# Fancy new way:
- m = LM("y ~ x", data)
+ m = LM("y ~ x", data)
m
m.predict({"x": [10, 20, 30]})
m.loglik(data)
diff --git a/patsy/__init__.py b/patsy/__init__.py
index 1617052..50431ec 100644
--- a/patsy/__init__.py
+++ b/patsy/__init__.py
@@ -10,15 +10,23 @@
# Do this first, to make it easy to check for warnings while testing:
import os
+
if os.environ.get("PATSY_FORCE_NO_WARNINGS"):
import warnings
+
warnings.filterwarnings("error", module="^patsy")
- warnings.filterwarnings("ignore", "is_categorical_dtype is deprecated", DeprecationWarning, module="^patsy")
+ warnings.filterwarnings(
+ "ignore",
+ "is_categorical_dtype is deprecated",
+ DeprecationWarning,
+ module="^patsy",
+ )
del warnings
del os
import patsy.origin
+
class PatsyError(Exception):
"""This is the main error type raised by Patsy functions.
@@ -35,6 +43,7 @@ class PatsyError(Exception):
``.message`` and ``.origin`` attributes directly. (The latter may be
None.)
"""
+
def __init__(self, message, origin=None):
Exception.__init__(self, message)
self.message = message
@@ -45,8 +54,7 @@ def __str__(self):
if self.origin is None:
return self.message
else:
- return ("%s\n%s"
- % (self.message, self.origin.caretize(indent=4)))
+ return "%s\n%s" % (self.message, self.origin.caretize(indent=4))
def set_origin(self, origin):
# This is useful to modify an exception to add origin information as
@@ -60,56 +68,72 @@ def set_origin(self, origin):
origin = None
self.origin = origin
+
__all__ = ["PatsyError"]
# We make a rich API available for explicit use. To see what exactly is
# exported, check each module's __all__, or import this module and look at its
# __all__.
+
def _reexport(mod):
__all__.extend(mod.__all__)
for var in mod.__all__:
globals()[var] = getattr(mod, var)
+
# This used to have less copy-paste, but explicit import statements make
# packaging tools like py2exe and py2app happier. Sigh.
import patsy.highlevel
+
_reexport(patsy.highlevel)
import patsy.build
+
_reexport(patsy.build)
import patsy.constraint
+
_reexport(patsy.constraint)
import patsy.contrasts
+
_reexport(patsy.contrasts)
import patsy.desc
+
_reexport(patsy.desc)
import patsy.design_info
+
_reexport(patsy.design_info)
import patsy.eval
+
_reexport(patsy.eval)
import patsy.origin
+
_reexport(patsy.origin)
import patsy.state
+
_reexport(patsy.state)
import patsy.user_util
+
_reexport(patsy.user_util)
import patsy.missing
+
_reexport(patsy.missing)
import patsy.splines
+
_reexport(patsy.splines)
import patsy.mgcv_cubic_splines
+
_reexport(patsy.mgcv_cubic_splines)
# XX FIXME: we aren't exporting any of the explicit parsing interface
diff --git a/patsy/build.py b/patsy/build.py
index 6f9067e..b6d6475 100644
--- a/patsy/build.py
+++ b/patsy/build.py
@@ -11,14 +11,14 @@
import numpy as np
from patsy import PatsyError
-from patsy.categorical import (guess_categorical,
- CategoricalSniffer,
- categorical_to_int)
-from patsy.util import (atleast_2d_column_default,
- have_pandas, asarray_or_pandas,
- safe_issubdtype)
-from patsy.design_info import (DesignMatrix, DesignInfo,
- FactorInfo, SubtermInfo)
+from patsy.categorical import guess_categorical, CategoricalSniffer, categorical_to_int
+from patsy.util import (
+ atleast_2d_column_default,
+ have_pandas,
+ asarray_or_pandas,
+ safe_issubdtype,
+)
+from patsy.design_info import DesignMatrix, DesignInfo, FactorInfo, SubtermInfo
from patsy.redundancy import pick_contrasts_for_term
from patsy.eval import EvalEnvironment
from patsy.contrasts import code_contrast_matrix, Treatment
@@ -28,6 +28,7 @@
if have_pandas:
import pandas
+
class _MockFactor(object):
def __init__(self, name="MOCKMOCK"):
self._name = name
@@ -38,15 +39,19 @@ def eval(self, state, env):
def name(self):
return self._name
+
def _max_allowed_dim(dim, arr, factor):
if arr.ndim > dim:
- msg = ("factor '%s' evaluates to an %s-dimensional array; I only "
- "handle arrays with dimension <= %s"
- % (factor.name(), arr.ndim, dim))
+ msg = (
+ "factor '%s' evaluates to an %s-dimensional array; I only "
+ "handle arrays with dimension <= %s" % (factor.name(), arr.ndim, dim)
+ )
raise PatsyError(msg, factor)
+
def test__max_allowed_dim():
import pytest
+
f = _MockFactor()
_max_allowed_dim(1, np.array(1), f)
_max_allowed_dim(1, np.array([1]), f)
@@ -57,6 +62,7 @@ def test__max_allowed_dim():
_max_allowed_dim(2, np.array([[1]]), f)
pytest.raises(PatsyError, _max_allowed_dim, 2, np.array([[[1]]]), f)
+
def _eval_factor(factor_info, data, NA_action):
factor = factor_info.factor
result = factor.eval(factor_info.state, data)
@@ -65,28 +71,32 @@ def _eval_factor(factor_info, data, NA_action):
result = atleast_2d_column_default(result, preserve_pandas=True)
_max_allowed_dim(2, result, factor)
if result.shape[1] != factor_info.num_columns:
- raise PatsyError("when evaluating factor %s, I got %s columns "
- "instead of the %s I was expecting"
- % (factor.name(),
- factor_info.num_columns,
- result.shape[1]),
- factor)
+ raise PatsyError(
+ "when evaluating factor %s, I got %s columns "
+ "instead of the %s I was expecting"
+ % (factor.name(), factor_info.num_columns, result.shape[1]),
+ factor,
+ )
if not safe_issubdtype(np.asarray(result).dtype, np.number):
- raise PatsyError("when evaluating numeric factor %s, "
- "I got non-numeric data of type '%s'"
- % (factor.name(), result.dtype),
- factor)
+ raise PatsyError(
+ "when evaluating numeric factor %s, "
+ "I got non-numeric data of type '%s'" % (factor.name(), result.dtype),
+ factor,
+ )
return result, NA_action.is_numerical_NA(result)
# returns either a 1d ndarray or a pandas.Series, plus is_NA mask
else:
assert factor_info.type == "categorical"
- result = categorical_to_int(result, factor_info.categories, NA_action,
- origin=factor_info.factor)
+ result = categorical_to_int(
+ result, factor_info.categories, NA_action, origin=factor_info.factor
+ )
assert result.ndim == 1
return result, np.asarray(result == -1)
+
def test__eval_factor_numerical():
import pytest
+
naa = NAAction()
f = _MockFactor()
@@ -102,11 +112,8 @@ def test__eval_factor_numerical():
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [[1, 2]]}, naa)
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": ["a", "b"]}, naa)
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [True, False]}, naa)
- fi2 = FactorInfo(_MockFactor(), "numerical",
- {}, num_columns=2, categories=None)
- eval123321, is_NA = _eval_factor(fi2,
- {"mock": [[1, 3], [2, 2], [3, 1]]},
- naa)
+ fi2 = FactorInfo(_MockFactor(), "numerical", {}, num_columns=2, categories=None)
+ eval123321, is_NA = _eval_factor(fi2, {"mock": [[1, 3], [2, 2], [3, 1]]}, naa)
assert eval123321.shape == (3, 2)
assert np.all(eval123321 == [[1, 3], [2, 2], [3, 1]])
assert is_NA.shape == (3,)
@@ -114,79 +121,84 @@ def test__eval_factor_numerical():
pytest.raises(PatsyError, _eval_factor, fi2, {"mock": [1, 2, 3]}, naa)
pytest.raises(PatsyError, _eval_factor, fi2, {"mock": [[1, 2, 3]]}, naa)
- ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
- NAAction(NA_types=["NaN"]))
+ ev_nan, is_NA = _eval_factor(
+ fi1, {"mock": [1, 2, np.nan]}, NAAction(NA_types=["NaN"])
+ )
assert np.array_equal(is_NA, [False, False, True])
- ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
- NAAction(NA_types=[]))
+ ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]}, NAAction(NA_types=[]))
assert np.array_equal(is_NA, [False, False, False])
if have_pandas:
- eval_ser, _ = _eval_factor(fi1,
- {"mock":
- pandas.Series([1, 2, 3],
- index=[10, 20, 30])},
- naa)
+ eval_ser, _ = _eval_factor(
+ fi1, {"mock": pandas.Series([1, 2, 3], index=[10, 20, 30])}, naa
+ )
assert isinstance(eval_ser, pandas.DataFrame)
assert np.array_equal(eval_ser, [[1], [2], [3]])
assert np.array_equal(eval_ser.index, [10, 20, 30])
- eval_df1, _ = _eval_factor(fi1,
- {"mock":
- pandas.DataFrame([[2], [1], [3]],
- index=[20, 10, 30])},
- naa)
+ eval_df1, _ = _eval_factor(
+ fi1, {"mock": pandas.DataFrame([[2], [1], [3]], index=[20, 10, 30])}, naa
+ )
assert isinstance(eval_df1, pandas.DataFrame)
assert np.array_equal(eval_df1, [[2], [1], [3]])
assert np.array_equal(eval_df1.index, [20, 10, 30])
- eval_df2, _ = _eval_factor(fi2,
- {"mock":
- pandas.DataFrame([[2, 3], [1, 4], [3, -1]],
- index=[20, 30, 10])},
- naa)
+ eval_df2, _ = _eval_factor(
+ fi2,
+ {"mock": pandas.DataFrame([[2, 3], [1, 4], [3, -1]], index=[20, 30, 10])},
+ naa,
+ )
assert isinstance(eval_df2, pandas.DataFrame)
assert np.array_equal(eval_df2, [[2, 3], [1, 4], [3, -1]])
assert np.array_equal(eval_df2.index, [20, 30, 10])
- pytest.raises(PatsyError,
- _eval_factor, fi2,
- {"mock": pandas.Series([1, 2, 3], index=[10, 20, 30])},
- naa)
- pytest.raises(PatsyError,
- _eval_factor, fi1,
- {"mock":
- pandas.DataFrame([[2, 3], [1, 4], [3, -1]],
- index=[20, 30, 10])},
- naa)
+ pytest.raises(
+ PatsyError,
+ _eval_factor,
+ fi2,
+ {"mock": pandas.Series([1, 2, 3], index=[10, 20, 30])},
+ naa,
+ )
+ pytest.raises(
+ PatsyError,
+ _eval_factor,
+ fi1,
+ {"mock": pandas.DataFrame([[2, 3], [1, 4], [3, -1]], index=[20, 30, 10])},
+ naa,
+ )
+
def test__eval_factor_categorical():
import pytest
from patsy.categorical import C
+
naa = NAAction()
f = _MockFactor()
- fi1 = FactorInfo(f, "categorical",
- {}, num_columns=None, categories=("a", "b"))
+ fi1 = FactorInfo(f, "categorical", {}, num_columns=None, categories=("a", "b"))
assert fi1.factor is f
cat1, _ = _eval_factor(fi1, {"mock": ["b", "a", "b"]}, naa)
assert cat1.shape == (3,)
assert np.all(cat1 == [1, 0, 1])
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": ["c"]}, naa)
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": C(["a", "c"])}, naa)
- pytest.raises(PatsyError, _eval_factor, fi1,
- {"mock": C(["a", "b"], levels=["b", "a"])}, naa)
+ pytest.raises(
+ PatsyError, _eval_factor, fi1, {"mock": C(["a", "b"], levels=["b", "a"])}, naa
+ )
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [1, 0, 1]}, naa)
bad_cat = np.asarray(["b", "a", "a", "b"])
bad_cat.resize((2, 2))
pytest.raises(PatsyError, _eval_factor, fi1, {"mock": bad_cat}, naa)
- cat1_NA, is_NA = _eval_factor(fi1, {"mock": ["a", None, "b"]},
- NAAction(NA_types=["None"]))
+ cat1_NA, is_NA = _eval_factor(
+ fi1, {"mock": ["a", None, "b"]}, NAAction(NA_types=["None"])
+ )
assert np.array_equal(is_NA, [False, True, False])
assert np.array_equal(cat1_NA, [0, -1, 1])
- pytest.raises(PatsyError, _eval_factor, fi1,
- {"mock": ["a", None, "b"]}, NAAction(NA_types=[]))
+ pytest.raises(
+ PatsyError, _eval_factor, fi1, {"mock": ["a", None, "b"]}, NAAction(NA_types=[])
+ )
- fi2 = FactorInfo(_MockFactor(), "categorical", {},
- num_columns=None, categories=[False, True])
+ fi2 = FactorInfo(
+ _MockFactor(), "categorical", {}, num_columns=None, categories=[False, True]
+ )
cat2, _ = _eval_factor(fi2, {"mock": [True, False, False, True]}, naa)
assert cat2.shape == (4,)
assert np.all(cat2 == [1, 0, 0, 1])
@@ -203,22 +215,27 @@ def test__eval_factor_categorical():
assert np.array_equal(cat_sbool, [1, 0])
assert np.array_equal(cat_sbool.index, [11, 21])
+
def _column_combinations(columns_per_factor):
# For consistency with R, the left-most item iterates fastest:
iterators = [range(n) for n in reversed(columns_per_factor)]
for reversed_combo in itertools.product(*iterators):
yield reversed_combo[::-1]
+
def test__column_combinations():
- assert list(_column_combinations([2, 3])) == [(0, 0),
- (1, 0),
- (0, 1),
- (1, 1),
- (0, 2),
- (1, 2)]
+ assert list(_column_combinations([2, 3])) == [
+ (0, 0),
+ (1, 0),
+ (0, 1),
+ (1, 1),
+ (0, 2),
+ (1, 2),
+ ]
assert list(_column_combinations([3])) == [(0,), (1,), (2,)]
assert list(_column_combinations([])) == [()]
+
def _subterm_column_combinations(factor_infos, subterm):
columns_per_factor = []
for factor in subterm.factors:
@@ -229,17 +246,18 @@ def _subterm_column_combinations(factor_infos, subterm):
columns_per_factor.append(columns)
return _column_combinations(columns_per_factor)
+
def _subterm_column_names_iter(factor_infos, subterm):
total = 0
for i, column_idxs in enumerate(
- _subterm_column_combinations(factor_infos, subterm)):
+ _subterm_column_combinations(factor_infos, subterm)
+ ):
name_pieces = []
for factor, column_idx in zip(subterm.factors, column_idxs):
fi = factor_infos[factor]
if fi.type == "numerical":
if fi.num_columns > 1:
- name_pieces.append("%s[%s]"
- % (factor.name(), column_idx))
+ name_pieces.append("%s[%s]" % (factor.name(), column_idx))
else:
assert column_idx == 0
name_pieces.append(factor.name())
@@ -255,94 +273,116 @@ def _subterm_column_names_iter(factor_infos, subterm):
total += 1
assert total == subterm.num_columns
+
def _build_subterm(subterm, factor_infos, factor_values, out):
assert subterm.num_columns == out.shape[1]
out[...] = 1
for i, column_idxs in enumerate(
- _subterm_column_combinations(factor_infos, subterm)):
+ _subterm_column_combinations(factor_infos, subterm)
+ ):
for factor, column_idx in zip(subterm.factors, column_idxs):
if factor_infos[factor].type == "categorical":
contrast = subterm.contrast_matrices[factor]
if np.any(factor_values[factor] < 0):
- raise PatsyError("can't build a design matrix "
- "containing missing values", factor)
- out[:, i] *= contrast.matrix[factor_values[factor],
- column_idx]
+ raise PatsyError(
+ "can't build a design matrix " "containing missing values",
+ factor,
+ )
+ out[:, i] *= contrast.matrix[factor_values[factor], column_idx]
else:
assert factor_infos[factor].type == "numerical"
- assert (factor_values[factor].shape[1]
- == factor_infos[factor].num_columns)
+ assert (
+ factor_values[factor].shape[1] == factor_infos[factor].num_columns
+ )
out[:, i] *= factor_values[factor][:, column_idx]
+
def test__subterm_column_names_iter_and__build_subterm():
import pytest
from patsy.contrasts import ContrastMatrix
from patsy.categorical import C
+
f1 = _MockFactor("f1")
f2 = _MockFactor("f2")
f3 = _MockFactor("f3")
- contrast = ContrastMatrix(np.array([[0, 0.5],
- [3, 0]]),
- ["[c1]", "[c2]"])
-
- factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
- num_columns=1, categories=None),
- f2: FactorInfo(f2, "categorical", {},
- num_columns=None, categories=["a", "b"]),
- f3: FactorInfo(f3, "numerical", {},
- num_columns=1, categories=None),
- }
+ contrast = ContrastMatrix(np.array([[0, 0.5], [3, 0]]), ["[c1]", "[c2]"])
+
+ factor_infos1 = {
+ f1: FactorInfo(f1, "numerical", {}, num_columns=1, categories=None),
+ f2: FactorInfo(f2, "categorical", {}, num_columns=None, categories=["a", "b"]),
+ f3: FactorInfo(f3, "numerical", {}, num_columns=1, categories=None),
+ }
contrast_matrices = {f2: contrast}
subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
- assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
- == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])
+ assert list(_subterm_column_names_iter(factor_infos1, subterm1)) == [
+ "f1:f2[c1]:f3",
+ "f1:f2[c2]:f3",
+ ]
mat = np.empty((3, 2))
- _build_subterm(subterm1, factor_infos1,
- {f1: atleast_2d_column_default([1, 2, 3]),
- f2: np.asarray([0, 0, 1]),
- f3: atleast_2d_column_default([7.5, 2, -12])},
- mat)
- assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
- [0, 0.5 * 2 * 2],
- [3 * 3 * -12, 0]])
+ _build_subterm(
+ subterm1,
+ factor_infos1,
+ {
+ f1: atleast_2d_column_default([1, 2, 3]),
+ f2: np.asarray([0, 0, 1]),
+ f3: atleast_2d_column_default([7.5, 2, -12]),
+ },
+ mat,
+ )
+ assert np.allclose(mat, [[0, 0.5 * 1 * 7.5], [0, 0.5 * 2 * 2], [3 * 3 * -12, 0]])
# Check that missing categorical values blow up
- pytest.raises(PatsyError, _build_subterm, subterm1, factor_infos1,
- {f1: atleast_2d_column_default([1, 2, 3]),
- f2: np.asarray([0, -1, 1]),
- f3: atleast_2d_column_default([7.5, 2, -12])},
- mat)
+ pytest.raises(
+ PatsyError,
+ _build_subterm,
+ subterm1,
+ factor_infos1,
+ {
+ f1: atleast_2d_column_default([1, 2, 3]),
+ f2: np.asarray([0, -1, 1]),
+ f3: atleast_2d_column_default([7.5, 2, -12]),
+ },
+ mat,
+ )
factor_infos2 = dict(factor_infos1)
- factor_infos2[f1] = FactorInfo(f1, "numerical", {},
- num_columns=2, categories=None)
+ factor_infos2[f1] = FactorInfo(f1, "numerical", {}, num_columns=2, categories=None)
subterm2 = SubtermInfo([f1, f2, f3], contrast_matrices, 4)
- assert (list(_subterm_column_names_iter(factor_infos2, subterm2))
- == ["f1[0]:f2[c1]:f3",
- "f1[1]:f2[c1]:f3",
- "f1[0]:f2[c2]:f3",
- "f1[1]:f2[c2]:f3"])
+ assert list(_subterm_column_names_iter(factor_infos2, subterm2)) == [
+ "f1[0]:f2[c1]:f3",
+ "f1[1]:f2[c1]:f3",
+ "f1[0]:f2[c2]:f3",
+ "f1[1]:f2[c2]:f3",
+ ]
mat2 = np.empty((3, 4))
- _build_subterm(subterm2, factor_infos2,
- {f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]),
- f2: np.asarray([0, 0, 1]),
- f3: atleast_2d_column_default([7.5, 2, -12])},
- mat2)
- assert np.allclose(mat2, [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5],
- [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2],
- [3 * 5 * -12, 3 * 6 * -12, 0, 0]])
-
+ _build_subterm(
+ subterm2,
+ factor_infos2,
+ {
+ f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]),
+ f2: np.asarray([0, 0, 1]),
+ f3: atleast_2d_column_default([7.5, 2, -12]),
+ },
+ mat2,
+ )
+ assert np.allclose(
+ mat2,
+ [
+ [0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5],
+ [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2],
+ [3 * 5 * -12, 3 * 6 * -12, 0, 0],
+ ],
+ )
subterm_int = SubtermInfo([], {}, 1)
assert list(_subterm_column_names_iter({}, subterm_int)) == ["Intercept"]
mat3 = np.empty((3, 1))
- _build_subterm(subterm_int, {},
- {f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]},
- mat3)
+ _build_subterm(subterm_int, {}, {f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3)
assert np.allclose(mat3, 1)
+
def _factors_memorize(factors, data_iter_maker, eval_env):
# First, start off the memorization process by setting up each factor's
# state and finding out how many passes it will need:
@@ -372,6 +412,7 @@ def _factors_memorize(factors, data_iter_maker, eval_env):
which_pass += 1
return factor_states
+
def test__factors_memorize():
class MockFactor(object):
def __init__(self, requested_passes, token):
@@ -396,12 +437,15 @@ def memorize_finish(self, state, which_pass):
class Data(object):
CHUNKS = 3
+
def __init__(self):
self.calls = 0
self.data = [{"chunk": i} for i in range(self.CHUNKS)]
+
def __call__(self):
self.calls += 1
return iter(self.data)
+
data = Data()
f0 = MockFactor(0, "f0")
f1 = MockFactor(1, "f1")
@@ -415,24 +459,29 @@ def __call__(self):
f0: {
"calls": [],
"token": "f0",
- },
+ },
f1: {
"calls": mem_chunks0 + [("memorize_finish", 0)],
"token": "f1",
- },
+ },
f2a: {
- "calls": mem_chunks0 + [("memorize_finish", 0)]
- + mem_chunks1 + [("memorize_finish", 1)],
+ "calls": mem_chunks0
+ + [("memorize_finish", 0)]
+ + mem_chunks1
+ + [("memorize_finish", 1)],
"token": "f2a",
- },
+ },
f2b: {
- "calls": mem_chunks0 + [("memorize_finish", 0)]
- + mem_chunks1 + [("memorize_finish", 1)],
+ "calls": mem_chunks0
+ + [("memorize_finish", 0)]
+ + mem_chunks1
+ + [("memorize_finish", 1)],
"token": "f2b",
- },
- }
+ },
+ }
assert factor_states == expected
+
def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action):
num_column_counts = {}
cat_sniffers = {}
@@ -442,8 +491,7 @@ def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action):
value = factor.eval(factor_states[factor], data)
if factor in cat_sniffers or guess_categorical(value):
if factor not in cat_sniffers:
- cat_sniffers[factor] = CategoricalSniffer(NA_action,
- factor.origin)
+ cat_sniffers[factor] = CategoricalSniffer(NA_action, factor.origin)
done = cat_sniffers[factor].sniff(value)
if done:
examine_needed.remove(factor)
@@ -462,12 +510,15 @@ def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action):
cat_levels_contrasts[factor] = sniffer.levels_contrast()
return (num_column_counts, cat_levels_contrasts)
+
def test__examine_factor_types():
from patsy.categorical import C
+
class MockFactor(object):
def __init__(self):
# You should check this using 'is', not '=='
from patsy.origin import Origin
+
self.origin = Origin("MOCK", 1, 2)
def eval(self, state, data):
@@ -493,6 +544,7 @@ def next(self):
if self.i > 1:
raise StopIteration
return self.i
+
__next__ = next
num_1dim = MockFactor()
@@ -507,20 +559,21 @@ def next(self):
num_1dim: ([1, 2, 3], [4, 5, 6]),
num_1col: ([[1], [2], [3]], [[4], [5], [6]]),
num_4col: (np.zeros((3, 4)), np.ones((3, 4))),
- categ_1col: (C(["a", "b", "c"], levels=("a", "b", "c"),
- contrast="MOCK CONTRAST"),
- C(["c", "b", "a"], levels=("a", "b", "c"),
- contrast="MOCK CONTRAST")),
+ categ_1col: (
+ C(["a", "b", "c"], levels=("a", "b", "c"), contrast="MOCK CONTRAST"),
+ C(["c", "b", "a"], levels=("a", "b", "c"), contrast="MOCK CONTRAST"),
+ ),
bool_1col: ([True, True, False], [False, True, True]),
# It has to read through all the data to see all the possible levels:
string_1col: (["a", "a", "a"], ["c", "b", "a"]),
object_1col: ([object_levels[0]] * 3, object_levels),
- }
+ }
it = DataIterMaker()
- (num_column_counts, cat_levels_contrasts,
- ) = _examine_factor_types(factor_states.keys(), factor_states, it,
- NAAction())
+ (
+ num_column_counts,
+ cat_levels_contrasts,
+ ) = _examine_factor_types(factor_states.keys(), factor_states, it, NAAction())
assert it.i == 2
iterations = 0
assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
@@ -529,20 +582,21 @@ def next(self):
bool_1col: ((False, True), None),
string_1col: (("a", "b", "c"), None),
object_1col: (tuple(sorted(object_levels, key=id)), None),
- }
+ }
# Check that it doesn't read through all the data if that's not necessary:
it = DataIterMaker()
no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col]
- (num_column_counts, cat_levels_contrasts,
- ) = _examine_factor_types(no_read_necessary, factor_states, it,
- NAAction())
+ (
+ num_column_counts,
+ cat_levels_contrasts,
+ ) = _examine_factor_types(no_read_necessary, factor_states, it, NAAction())
assert it.i == 0
assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
assert cat_levels_contrasts == {
categ_1col: (("a", "b", "c"), "MOCK CONTRAST"),
bool_1col: ((False, True), None),
- }
+ }
# Illegal inputs:
bool_3col = MockFactor()
@@ -555,21 +609,22 @@ def next(self):
num_3dim: (np.zeros((3, 3, 3)), np.ones((3, 3, 3))),
string_3col: ([["a", "b", "c"]], [["b", "c", "a"]]),
object_3col: ([[[object()]]], [[[object()]]]),
- }
+ }
import pytest
+
for illegal_factor in illegal_factor_states:
it = DataIterMaker()
try:
- _examine_factor_types([illegal_factor], illegal_factor_states, it,
- NAAction())
+ _examine_factor_types(
+ [illegal_factor], illegal_factor_states, it, NAAction()
+ )
except PatsyError as e:
assert e.origin is illegal_factor.origin
else:
assert False
-def _make_subterm_infos(terms,
- num_column_counts,
- cat_levels_contrasts):
+
+def _make_subterm_infos(terms, num_column_counts, cat_levels_contrasts):
# Sort each term into a bucket based on the set of numeric factors it
# contains:
term_buckets = OrderedDict()
@@ -600,9 +655,9 @@ def _make_subterm_infos(terms,
used_subterms = set()
for term in bucket_terms:
subterm_infos = []
- factor_codings = pick_contrasts_for_term(term,
- num_column_counts,
- used_subterms)
+ factor_codings = pick_contrasts_for_term(
+ term, num_column_counts, used_subterms
+ )
# Construct one SubtermInfo for each subterm
for factor_coding in factor_codings:
subterm_factors = []
@@ -622,20 +677,20 @@ def _make_subterm_infos(terms,
levels, contrast = cat_levels_contrasts[factor]
# This is where the default coding is set to
# Treatment:
- coded = code_contrast_matrix(factor_coding[factor],
- levels, contrast,
- default=Treatment)
+ coded = code_contrast_matrix(
+ factor_coding[factor], levels, contrast, default=Treatment
+ )
contrast_matrices[factor] = coded
subterm_columns *= coded.matrix.shape[1]
- subterm_infos.append(SubtermInfo(subterm_factors,
- contrast_matrices,
- subterm_columns))
+ subterm_infos.append(
+ SubtermInfo(subterm_factors, contrast_matrices, subterm_columns)
+ )
term_to_subterm_infos[term] = subterm_infos
assert new_term_order == list(term_to_subterm_infos)
return term_to_subterm_infos
-def design_matrix_builders(termlists, data_iter_maker, eval_env,
- NA_action="drop"):
+
+def design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action="drop"):
"""Construct several :class:`DesignInfo` objects from termlists.
This is one of Patsy's fundamental functions. This function and
@@ -688,36 +743,38 @@ def design_matrix_builders(termlists, data_iter_maker, eval_env,
factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
# Now all the factors have working eval methods, so we can evaluate them
# on some data to find out what type of data they return.
- (num_column_counts,
- cat_levels_contrasts) = _examine_factor_types(all_factors,
- factor_states,
- data_iter_maker,
- NA_action)
+ (num_column_counts, cat_levels_contrasts) = _examine_factor_types(
+ all_factors, factor_states, data_iter_maker, NA_action
+ )
# Now we need the factor infos, which encapsulate the knowledge of
# how to turn any given factor into a chunk of data:
factor_infos = {}
for factor in all_factors:
if factor in num_column_counts:
- fi = FactorInfo(factor,
- "numerical",
- factor_states[factor],
- num_columns=num_column_counts[factor],
- categories=None)
+ fi = FactorInfo(
+ factor,
+ "numerical",
+ factor_states[factor],
+ num_columns=num_column_counts[factor],
+ categories=None,
+ )
else:
assert factor in cat_levels_contrasts
categories = cat_levels_contrasts[factor][0]
- fi = FactorInfo(factor,
- "categorical",
- factor_states[factor],
- num_columns=None,
- categories=categories)
+ fi = FactorInfo(
+ factor,
+ "categorical",
+ factor_states[factor],
+ num_columns=None,
+ categories=categories,
+ )
factor_infos[factor] = fi
# And now we can construct the DesignInfo for each termlist:
design_infos = []
for termlist in termlists:
- term_to_subterm_infos = _make_subterm_infos(termlist,
- num_column_counts,
- cat_levels_contrasts)
+ term_to_subterm_infos = _make_subterm_infos(
+ termlist, num_column_counts, cat_levels_contrasts
+ )
assert isinstance(term_to_subterm_infos, OrderedDict)
assert frozenset(term_to_subterm_infos) == frozenset(termlist)
this_design_factor_infos = {}
@@ -727,14 +784,18 @@ def design_matrix_builders(termlists, data_iter_maker, eval_env,
column_names = []
for subterms in term_to_subterm_infos.values():
for subterm in subterms:
- for column_name in _subterm_column_names_iter(
- factor_infos, subterm):
+ for column_name in _subterm_column_names_iter(factor_infos, subterm):
column_names.append(column_name)
- design_infos.append(DesignInfo(column_names,
- factor_infos=this_design_factor_infos,
- term_codings=term_to_subterm_infos))
+ design_infos.append(
+ DesignInfo(
+ column_names,
+ factor_infos=this_design_factor_infos,
+ term_codings=term_to_subterm_infos,
+ )
+ )
return design_infos
+
def _build_design_matrix(design_info, factor_info_to_values, dtype):
factor_to_values = {}
need_reshape = False
@@ -764,12 +825,12 @@ def _build_design_matrix(design_info, factor_info_to_values, dtype):
for subterm in subterms:
end_column = start_column + subterm.num_columns
m_slice = m[:, start_column:end_column]
- _build_subterm(subterm, design_info.factor_infos,
- factor_to_values, m_slice)
+ _build_subterm(subterm, design_info.factor_infos, factor_to_values, m_slice)
start_column = end_column
assert start_column == m.shape[1]
return need_reshape, m
+
class _CheckMatch(object):
def __init__(self, name, eq_fn):
self._name = name
@@ -785,18 +846,21 @@ def check(self, seen_value, desc, origin):
self._value_origin = origin
else:
if not self._eq_fn(self.value, seen_value):
- msg = ("%s mismatch between %s and %s"
- % (self._name, self._value_desc, desc))
+ msg = "%s mismatch between %s and %s" % (
+ self._name,
+ self._value_desc,
+ desc,
+ )
if isinstance(self.value, int):
msg += " (%r versus %r)" % (self.value, seen_value)
# XX FIXME: this is a case where having discontiguous Origins
# would be useful...
raise PatsyError(msg, origin)
-def build_design_matrices(design_infos, data,
- NA_action="drop",
- return_type="matrix",
- dtype=np.dtype(float)):
+
+def build_design_matrices(
+ design_infos, data, NA_action="drop", return_type="matrix", dtype=np.dtype(float)
+):
"""Construct several design matrices from :class:`DesignMatrixBuilder`
objects.
@@ -865,11 +929,14 @@ def build_design_matrices(design_infos, data,
if isinstance(NA_action, str):
NA_action = NAAction(NA_action)
if return_type == "dataframe" and not have_pandas:
- raise PatsyError("pandas.DataFrame was requested, but pandas "
- "is not installed")
+ raise PatsyError(
+ "pandas.DataFrame was requested, but pandas " "is not installed"
+ )
if return_type not in ("matrix", "dataframe"):
- raise PatsyError("unrecognized output type %r, should be "
- "'matrix' or 'dataframe'" % (return_type,))
+ raise PatsyError(
+ "unrecognized output type %r, should be "
+ "'matrix' or 'dataframe'" % (return_type,)
+ )
# Evaluate factors
factor_info_to_values = {}
factor_info_to_isNAs = {}
@@ -890,8 +957,7 @@ def build_design_matrices(design_infos, data,
name = factor_info.factor.name()
origin = factor_info.factor.origin
rows_checker.check(value.shape[0], name, origin)
- if (have_pandas
- and isinstance(value, (pandas.Series, pandas.DataFrame))):
+ if have_pandas and isinstance(value, (pandas.Series, pandas.DataFrame)):
index_checker.check(value.index, name, origin)
# Strategy: we work with raw ndarrays for doing the actual
# combining; DesignMatrixBuilder objects never sees pandas
@@ -904,8 +970,7 @@ def build_design_matrices(design_infos, data,
# Handle NAs
values = list(factor_info_to_values.values())
is_NAs = list(factor_info_to_isNAs.values())
- origins = [factor_info.factor.origin
- for factor_info in factor_info_to_values]
+ origins = [factor_info.factor.origin for factor_info in factor_info_to_values]
pandas_index = index_checker.value
num_rows = rows_checker.value
# num_rows is None iff evaluator_to_values (and associated sets like
@@ -927,9 +992,7 @@ def build_design_matrices(design_infos, data,
# Build factor values into matrices
results = []
for design_info in design_infos:
- results.append(_build_design_matrix(design_info,
- factor_info_to_values,
- dtype))
+ results.append(_build_design_matrix(design_info, factor_info_to_values, dtype))
matrices = []
for need_reshape, matrix in results:
if need_reshape:
@@ -939,25 +1002,27 @@ def build_design_matrices(design_infos, data,
# we can figure out what that is...
assert matrix.shape[0] == 1
if num_rows is not None:
- matrix = DesignMatrix(np.repeat(matrix, num_rows, axis=0),
- matrix.design_info)
+ matrix = DesignMatrix(
+ np.repeat(matrix, num_rows, axis=0), matrix.design_info
+ )
else:
raise PatsyError(
"No design matrix has any non-trivial factors, "
"the data object is not a DataFrame. "
"I can't tell how many rows the design matrix should "
"have!"
- )
+ )
matrices.append(matrix)
if return_type == "dataframe":
assert have_pandas
for i, matrix in enumerate(matrices):
di = matrix.design_info
- matrices[i] = pandas.DataFrame(matrix,
- columns=di.column_names,
- index=pandas_index)
+ matrices[i] = pandas.DataFrame(
+ matrix, columns=di.column_names, index=pandas_index
+ )
matrices[i].design_info = di
return matrices
+
# It should be possible to do just the factors -> factor_infos stuff
# alone, since that, well, makes logical sense to do.
diff --git a/patsy/builtins.py b/patsy/builtins.py
index 9a1e3b5..fb4b319 100644
--- a/patsy/builtins.py
+++ b/patsy/builtins.py
@@ -11,20 +11,26 @@
__all__ = ["I", "Q"]
from patsy.contrasts import ContrastMatrix, Treatment, Poly, Sum, Helmert, Diff
+
__all__ += ["ContrastMatrix", "Treatment", "Poly", "Sum", "Helmert", "Diff"]
from patsy.categorical import C
+
__all__ += ["C"]
from patsy.state import center, standardize, scale
+
__all__ += ["center", "standardize", "scale"]
from patsy.splines import bs
+
__all__ += ["bs"]
from patsy.mgcv_cubic_splines import cr, cc, te
+
__all__ += ["cr", "cc", "te"]
+
def I(x):
"""The identity function. Simply returns its input unchanged.
@@ -42,10 +48,12 @@ def I(x):
``x2``."""
return x
+
def test_I():
assert I(1) == 1
assert I(None) is None
+
def Q(name):
"""A way to 'quote' variable names, especially ones that do not otherwise
meet Python's variable name rules.
@@ -82,16 +90,18 @@ def Q(name):
y ~ np.sqrt(Q("weight.in.kg"))
"""
from patsy.eval import EvalEnvironment
+
env = EvalEnvironment.capture(1)
try:
return env.namespace[name]
except KeyError:
raise NameError("no data named %r found" % (name,))
+
def test_Q():
a = 1
assert Q("a") == 1
assert Q("Q") is Q
import pytest
- pytest.raises(NameError, Q, "asdfsadfdsad")
+ pytest.raises(NameError, Q, "asdfsadfdsad")
diff --git a/patsy/categorical.py b/patsy/categorical.py
index b552f42..7d5be9c 100644
--- a/patsy/categorical.py
+++ b/patsy/categorical.py
@@ -2,8 +2,7 @@
# Copyright (C) 2011-2013 Nathaniel Smith
# See file LICENSE.txt for license information.
-__all__ = ["C", "guess_categorical", "CategoricalSniffer",
- "categorical_to_int"]
+__all__ = ["C", "guess_categorical", "CategoricalSniffer", "categorical_to_int"]
# How we handle categorical data: the big picture
# -----------------------------------------------
@@ -36,21 +35,26 @@
import numpy as np
from patsy import PatsyError
-from patsy.util import (SortAnythingKey,
- safe_scalar_isnan,
- iterable,
- have_pandas, have_pandas_categorical,
- have_pandas_categorical_dtype,
- safe_is_pandas_categorical,
- pandas_Categorical_from_codes,
- pandas_Categorical_categories,
- pandas_Categorical_codes,
- safe_issubdtype,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ SortAnythingKey,
+ safe_scalar_isnan,
+ iterable,
+ have_pandas,
+ have_pandas_categorical,
+ have_pandas_categorical_dtype,
+ safe_is_pandas_categorical,
+ pandas_Categorical_from_codes,
+ pandas_Categorical_categories,
+ pandas_Categorical_codes,
+ safe_issubdtype,
+ no_pickling,
+ assert_no_pickling,
+)
if have_pandas:
import pandas
+
# Objects of this type will always be treated as categorical, with the
# specified levels and contrast (if given).
class _CategoricalBox(object):
@@ -61,6 +65,7 @@ def __init__(self, data, contrast, levels):
__getstate__ = no_pickling
+
def C(data, contrast=None, levels=None):
"""
Marks some `data` as being categorical, and specifies how to interpret
@@ -101,6 +106,7 @@ def C(data, contrast=None, levels=None):
data = data.data
return _CategoricalBox(data, contrast, levels)
+
def test_C():
c1 = C("asdf")
assert isinstance(c1, _CategoricalBox)
@@ -122,6 +128,7 @@ def test_C():
assert_no_pickling(c4)
+
def guess_categorical(data):
if safe_is_pandas_categorical(data):
return True
@@ -132,6 +139,7 @@ def guess_categorical(data):
return False
return True
+
def test_guess_categorical():
if have_pandas_categorical:
c = pandas.Categorical([1, 2, 3])
@@ -148,6 +156,7 @@ def test_guess_categorical():
assert not guess_categorical([1.0, 2.0, 3.0])
assert not guess_categorical([1.0, 2.0, 3.0, np.nan])
+
def _categorical_shape_fix(data):
# helper function
# data should not be a _CategoricalBox or pandas Categorical or anything
@@ -157,11 +166,11 @@ def _categorical_shape_fix(data):
raise PatsyError("categorical data cannot be >1-dimensional")
# coerce scalars into 1d, which is consistent with what we do for numeric
# factors. (See statsmodels/statsmodels#1881)
- if (not iterable(data)
- or isinstance(data, (str, bytes))):
+ if not iterable(data) or isinstance(data, (str, bytes)):
data = [data]
return data
+
class CategoricalSniffer(object):
def __init__(self, NA_action, origin=None):
self._NA_action = NA_action
@@ -210,17 +219,21 @@ def sniff(self, data):
try:
self._level_set.add(value)
except TypeError:
- raise PatsyError("Error interpreting categorical data: "
- "all items must be hashable",
- self._origin)
+ raise PatsyError(
+ "Error interpreting categorical data: "
+ "all items must be hashable",
+ self._origin,
+ )
# If everything we've seen is boolean, assume that everything else
# would be too. Otherwise we need to keep looking.
return self._level_set == set([True, False])
__getstate__ = no_pickling
+
def test_CategoricalSniffer():
from patsy.missing import NAAction
+
def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
for data in datas:
@@ -236,19 +249,24 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
# We make sure to test with both boxed and unboxed pandas objects,
# because we used to have a bug where boxed pandas objects would be
# treated as categorical, but their levels would be lost...
- preps = [lambda x: x,
- C]
+ preps = [lambda x: x, C]
if have_pandas_categorical_dtype:
- preps += [pandas.Series,
- lambda x: C(pandas.Series(x))]
+ preps += [pandas.Series, lambda x: C(pandas.Series(x))]
for prep in preps:
- t([], [prep(pandas.Categorical([1, 2, None]))],
- True, (1, 2))
+ t([], [prep(pandas.Categorical([1, 2, None]))], True, (1, 2))
# check order preservation
- t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
- True, ("a", "b"))
- t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
- True, ("b", "a"))
+ t(
+ [],
+ [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
+ True,
+ ("a", "b"),
+ )
+ t(
+ [],
+ [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
+ True,
+ ("b", "a"),
+ )
# check that if someone sticks a .contrast field onto our object
obj = prep(pandas.Categorical(["a", "b"]))
obj.contrast = "CONTRAST"
@@ -260,8 +278,7 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1))
# do some actual sniffing with NAs in
- t(["None", "NaN"], [C([1, np.nan]), C([10, None])],
- False, (1, 10))
+ t(["None", "NaN"], [C([1, np.nan]), C([10, None])], False, (1, 10))
# But 'None' can be a type if we don't make it represent NA:
sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"]))
sniffer.sniff(C([1, np.nan, None]))
@@ -273,17 +290,18 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
assert set(levels) == set([None, 1])
# bool special cases
- t(["None", "NaN"], [C([True, np.nan, None])],
- True, (False, True))
- t([], [C([10, 20]), C([False]), C([30, 40])],
- False, (False, True, 10, 20, 30, 40))
+ t(["None", "NaN"], [C([True, np.nan, None])], True, (False, True))
+ t([], [C([10, 20]), C([False]), C([30, 40])], False, (False, True, 10, 20, 30, 40))
# exercise the fast-path
- t([], [np.asarray([True, False]), ["foo"]],
- True, (False, True))
+ t([], [np.asarray([True, False]), ["foo"]], True, (False, True))
# check tuples too
- t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])],
- False, (("a", 1), ("b", 2), ("c", None)))
+ t(
+ ["None", "NaN"],
+ [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])],
+ False,
+ (("a", 1), ("b", 2), ("c", None)),
+ )
# contrasts
t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO")
@@ -304,6 +322,7 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
# >1d is illegal
pytest.raises(PatsyError, sniffer.sniff, np.asarray([["b"]]))
+
# returns either a 1d ndarray or a pandas.Series
def categorical_to_int(data, levels, NA_action, origin=None):
assert isinstance(levels, tuple)
@@ -312,16 +331,21 @@ def categorical_to_int(data, levels, NA_action, origin=None):
if safe_is_pandas_categorical(data):
data_levels_tuple = tuple(pandas_Categorical_categories(data))
if not data_levels_tuple == levels:
- raise PatsyError("mismatching levels: expected %r, got %r"
- % (levels, data_levels_tuple), origin)
+ raise PatsyError(
+ "mismatching levels: expected %r, got %r" % (levels, data_levels_tuple),
+ origin,
+ )
# pandas.Categorical also uses -1 to indicate NA, and we don't try to
# second-guess its NA detection, so we can just pass it back.
return pandas_Categorical_codes(data)
if isinstance(data, _CategoricalBox):
if data.levels is not None and tuple(data.levels) != levels:
- raise PatsyError("mismatching levels: expected %r, got %r"
- % (levels, tuple(data.levels)), origin)
+ raise PatsyError(
+ "mismatching levels: expected %r, got %r"
+ % (levels, tuple(data.levels)),
+ origin,
+ )
data = data.data
data = _categorical_shape_fix(data)
@@ -329,8 +353,9 @@ def categorical_to_int(data, levels, NA_action, origin=None):
try:
level_to_int = dict(zip(levels, range(len(levels))))
except TypeError:
- raise PatsyError("Error interpreting categorical data: "
- "all items must be hashable", origin)
+ raise PatsyError(
+ "Error interpreting categorical data: " "all items must be hashable", origin
+ )
# fastpath to avoid doing an item-by-item iteration over boolean arrays,
# as requested by #44
@@ -350,42 +375,52 @@ def categorical_to_int(data, levels, NA_action, origin=None):
if len(levels) <= SHOW_LEVELS:
level_strs += [repr(level) for level in levels]
else:
- level_strs += [repr(level)
- for level in levels[:SHOW_LEVELS//2]]
+ level_strs += [repr(level) for level in levels[: SHOW_LEVELS // 2]]
level_strs.append("...")
- level_strs += [repr(level)
- for level in levels[-SHOW_LEVELS//2:]]
+ level_strs += [repr(level) for level in levels[-SHOW_LEVELS // 2 :]]
level_str = "[%s]" % (", ".join(level_strs))
- raise PatsyError("Error converting data to categorical: "
- "observation with value %r does not match "
- "any of the expected levels (expected: %s)"
- % (value, level_str), origin)
+ raise PatsyError(
+ "Error converting data to categorical: "
+ "observation with value %r does not match "
+ "any of the expected levels (expected: %s)" % (value, level_str),
+ origin,
+ )
except TypeError:
- raise PatsyError("Error converting data to categorical: "
- "encountered unhashable value %r"
- % (value,), origin)
+ raise PatsyError(
+ "Error converting data to categorical: "
+ "encountered unhashable value %r" % (value,),
+ origin,
+ )
if have_pandas and isinstance(data, pandas.Series):
out = pandas.Series(out, index=data.index)
return out
+
def test_categorical_to_int():
import pytest
from patsy.missing import NAAction
+
if have_pandas:
s = pandas.Series(["a", "b", "c"], index=[10, 20, 30])
c_pandas = categorical_to_int(s, ("a", "b", "c"), NAAction())
assert np.all(c_pandas == [0, 1, 2])
assert np.all(c_pandas.index == [10, 20, 30])
# Input must be 1-dimensional
- pytest.raises(PatsyError,
- categorical_to_int,
- pandas.DataFrame({10: s}), ("a", "b", "c"), NAAction())
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ pandas.DataFrame({10: s}),
+ ("a", "b", "c"),
+ NAAction(),
+ )
if have_pandas_categorical:
constructors = [pandas_Categorical_from_codes]
if have_pandas_categorical_dtype:
+
def Series_from_codes(codes, categories):
c = pandas_Categorical_from_codes(codes, categories)
return pandas.Series(c)
+
constructors.append(Series_from_codes)
for con in constructors:
cat = con([1, 0, -1], ("a", "b"))
@@ -393,20 +428,23 @@ def Series_from_codes(codes, categories):
assert np.all(conv == [1, 0, -1])
# Trust pandas NA marking
cat2 = con([1, 0, -1], ("a", "None"))
- conv2 = categorical_to_int(cat, ("a", "b"),
- NAAction(NA_types=["None"]))
+ conv2 = categorical_to_int(cat, ("a", "b"), NAAction(NA_types=["None"]))
assert np.all(conv2 == [1, 0, -1])
# But levels must match
- pytest.raises(PatsyError,
- categorical_to_int,
- con([1, 0], ("a", "b")),
- ("a", "c"),
- NAAction())
- pytest.raises(PatsyError,
- categorical_to_int,
- con([1, 0], ("a", "b")),
- ("b", "a"),
- NAAction())
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ con([1, 0], ("a", "b")),
+ ("a", "c"),
+ NAAction(),
+ )
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ con([1, 0], ("a", "b")),
+ ("b", "a"),
+ NAAction(),
+ )
def t(data, levels, expected, NA_action=NAAction()):
got = categorical_to_int(data, levels, NA_action)
@@ -422,16 +460,21 @@ def t(data, levels, expected, NA_action=NAAction()):
t(["a", "b", "a"], ("a", "d", "z", "b"), [0, 3, 0])
t([("a", 1), ("b", 0), ("a", 1)], (("a", 1), ("b", 0)), [0, 1, 0])
- pytest.raises(PatsyError, categorical_to_int,
- ["a", "b", "a"], ("a", "c"), NAAction())
+ pytest.raises(
+ PatsyError, categorical_to_int, ["a", "b", "a"], ("a", "c"), NAAction()
+ )
t(C(["a", "b", "a"]), ("a", "b"), [0, 1, 0])
t(C(["a", "b", "a"]), ("b", "a"), [1, 0, 1])
t(C(["a", "b", "a"], levels=["b", "a"]), ("b", "a"), [1, 0, 1])
# Mismatch between C() levels and expected levels
- pytest.raises(PatsyError, categorical_to_int,
- C(["a", "b", "a"], levels=["a", "b"]),
- ("b", "a"), NAAction())
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ C(["a", "b", "a"], levels=["a", "b"]),
+ ("b", "a"),
+ NAAction(),
+ )
# ndim == 0 is okay
t("a", ("a", "b"), [0])
@@ -439,26 +482,47 @@ def t(data, levels, expected, NA_action=NAAction()):
t(True, (False, True), [1])
# ndim == 2 is disallowed
- pytest.raises(PatsyError, categorical_to_int,
- np.asarray([["a", "b"], ["b", "a"]]),
- ("a", "b"), NAAction())
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ np.asarray([["a", "b"], ["b", "a"]]),
+ ("a", "b"),
+ NAAction(),
+ )
# levels must be hashable
- pytest.raises(PatsyError, categorical_to_int,
- ["a", "b"], ("a", "b", {}), NAAction())
- pytest.raises(PatsyError, categorical_to_int,
- ["a", "b", {}], ("a", "b"), NAAction())
-
- t(["b", None, np.nan, "a"], ("a", "b"), [1, -1, -1, 0],
- NAAction(NA_types=["None", "NaN"]))
- t(["b", None, np.nan, "a"], ("a", "b", None), [1, -1, -1, 0],
- NAAction(NA_types=["None", "NaN"]))
- t(["b", None, np.nan, "a"], ("a", "b", None), [1, 2, -1, 0],
- NAAction(NA_types=["NaN"]))
+ pytest.raises(
+ PatsyError, categorical_to_int, ["a", "b"], ("a", "b", {}), NAAction()
+ )
+ pytest.raises(
+ PatsyError, categorical_to_int, ["a", "b", {}], ("a", "b"), NAAction()
+ )
+
+ t(
+ ["b", None, np.nan, "a"],
+ ("a", "b"),
+ [1, -1, -1, 0],
+ NAAction(NA_types=["None", "NaN"]),
+ )
+ t(
+ ["b", None, np.nan, "a"],
+ ("a", "b", None),
+ [1, -1, -1, 0],
+ NAAction(NA_types=["None", "NaN"]),
+ )
+ t(
+ ["b", None, np.nan, "a"],
+ ("a", "b", None),
+ [1, 2, -1, 0],
+ NAAction(NA_types=["NaN"]),
+ )
# Smoke test for the branch that formats the ellipsized list of levels in
# the error message:
- pytest.raises(PatsyError, categorical_to_int,
- ["a", "b", "q"],
- ("a", "b", "c", "d", "e", "f", "g", "h"),
- NAAction())
+ pytest.raises(
+ PatsyError,
+ categorical_to_int,
+ ["a", "b", "q"],
+ ("a", "b", "c", "d", "e", "f", "g", "h"),
+ NAAction(),
+ )
diff --git a/patsy/compat.py b/patsy/compat.py
index 882e13e..5d56d22 100644
--- a/patsy/compat.py
+++ b/patsy/compat.py
@@ -9,6 +9,7 @@
##### Numpy
import os
+
# To force use of the compat code, set this env var to a non-empty value:
optional_dep_ok = not os.environ.get("PATSY_AVOID_OPTIONAL_DEPENDENCIES")
@@ -23,6 +24,7 @@
# OrderedDict is only available in Python 2.7+. compat_ordereddict.py has
# comments at the top.
import collections
+
if optional_dep_ok and hasattr(collections, "OrderedDict"):
from collections import OrderedDict
else:
@@ -32,11 +34,10 @@
import sys
from patsy import PatsyError
+
def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
try:
return f(*args, **kwargs)
except Exception as e:
- new_exc = PatsyError("%s: %s: %s"
- % (msg, e.__class__.__name__, e),
- origin)
+ new_exc = PatsyError("%s: %s: %s" % (msg, e.__class__.__name__, e), origin)
raise new_exc from e
diff --git a/patsy/compat_ordereddict.py b/patsy/compat_ordereddict.py
index 9fd11f7..644a662 100644
--- a/patsy/compat_ordereddict.py
+++ b/patsy/compat_ordereddict.py
@@ -1,27 +1,29 @@
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
# Passes Python2.7's test suite and incorporates all the latest updates.
-#Author: Raymond Hettinger
-#License: MIT License
-#http://code.activestate.com/recipes/576693/ revision 9, downloaded 2012-03-28
+# Author: Raymond Hettinger
+# License: MIT License
+# http://code.activestate.com/recipes/576693/ revision 9, downloaded 2012-03-28
try:
from thread import get_ident as _get_ident
except ImportError:
# Hacked by njs -- I don't have dummy_thread and py3 doesn't have thread,
# so the import fails when nosetests3 tries to load this file.
- #from dummy_thread import get_ident as _get_ident
+ # from dummy_thread import get_ident as _get_ident
def _get_ident():
return ""
+
try:
from _abcoll import KeysView, ValuesView, ItemsView
except ImportError:
pass
-class OrderedDict(dict): # pragma: no cover
- 'Dictionary that remembers insertion order'
+class OrderedDict(dict): # pragma: no cover
+ "Dictionary that remembers insertion order"
+
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
@@ -33,23 +35,23 @@ class OrderedDict(dict): # pragma: no cover
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
def __init__(self, *args, **kwds):
- '''Initialize an ordered dictionary. Signature is the same as for
+ """Initialize an ordered dictionary. Signature is the same as for
regular dictionaries, but keyword arguments are not recommended
because their insertion order is arbitrary.
- '''
+ """
if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ raise TypeError("expected at most 1 arguments, got %d" % len(args))
try:
self.__root
except AttributeError:
- self.__root = root = [] # sentinel node
+ self.__root = root = [] # sentinel node
root[:] = [root, root, None]
self.__map = {}
self.__update(*args, **kwds)
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
- 'od.__setitem__(i, y) <==> od[i]=y'
+ "od.__setitem__(i, y) <==> od[i]=y"
# Setting a new item creates a new link which goes at the end of the linked
# list, and the inherited dictionary is updated with the new key/value pair.
if key not in self:
@@ -59,7 +61,7 @@ def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
dict_setitem(self, key, value)
def __delitem__(self, key, dict_delitem=dict.__delitem__):
- 'od.__delitem__(y) <==> del od[y]'
+ "od.__delitem__(y) <==> del od[y]"
# Deleting an existing item uses self.__map to find the link which is
# then removed by updating the links in the predecessor and successor nodes.
dict_delitem(self, key)
@@ -68,7 +70,7 @@ def __delitem__(self, key, dict_delitem=dict.__delitem__):
link_next[0] = link_prev
def __iter__(self):
- 'od.__iter__() <==> iter(od)'
+ "od.__iter__() <==> iter(od)"
root = self.__root
curr = root[1]
while curr is not root:
@@ -76,7 +78,7 @@ def __iter__(self):
curr = curr[1]
def __reversed__(self):
- 'od.__reversed__() <==> reversed(od)'
+ "od.__reversed__() <==> reversed(od)"
root = self.__root
curr = root[0]
while curr is not root:
@@ -84,7 +86,7 @@ def __reversed__(self):
curr = curr[0]
def clear(self):
- 'od.clear() -> None. Remove all items from od.'
+ "od.clear() -> None. Remove all items from od."
try:
for node in self.__map.itervalues():
del node[:]
@@ -96,12 +98,12 @@ def clear(self):
dict.clear(self)
def popitem(self, last=True):
- '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+ """od.popitem() -> (k, v), return and remove a (key, value) pair.
Pairs are returned in LIFO order if last is true or FIFO order if false.
- '''
+ """
if not self:
- raise KeyError('dictionary is empty')
+ raise KeyError("dictionary is empty")
root = self.__root
if last:
link = root[0]
@@ -121,45 +123,47 @@ def popitem(self, last=True):
# -- the following methods do not depend on the internal structure --
def keys(self):
- 'od.keys() -> list of keys in od'
+ "od.keys() -> list of keys in od"
return list(self)
def values(self):
- 'od.values() -> list of values in od'
+ "od.values() -> list of values in od"
return [self[key] for key in self]
def items(self):
- 'od.items() -> list of (key, value) pairs in od'
+ "od.items() -> list of (key, value) pairs in od"
return [(key, self[key]) for key in self]
def iterkeys(self):
- 'od.iterkeys() -> an iterator over the keys in od'
+ "od.iterkeys() -> an iterator over the keys in od"
return iter(self)
def itervalues(self):
- 'od.itervalues -> an iterator over the values in od'
+ "od.itervalues -> an iterator over the values in od"
for k in self:
yield self[k]
def iteritems(self):
- 'od.iteritems -> an iterator over the (key, value) items in od'
+ "od.iteritems -> an iterator over the (key, value) items in od"
for k in self:
yield (k, self[k])
def update(*args, **kwds):
- '''od.update(E, **F) -> None. Update od from dict/iterable E and F.
+ """od.update(E, **F) -> None. Update od from dict/iterable E and F.
If E is a dict instance, does: for k in E: od[k] = E[k]
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
Or if E is an iterable of items, does: for k, v in E: od[k] = v
In either case, this is followed by: for k, v in F.items(): od[k] = v
- '''
+ """
if len(args) > 2:
- raise TypeError('update() takes at most 2 positional '
- 'arguments (%d given)' % (len(args),))
+ raise TypeError(
+ "update() takes at most 2 positional "
+ "arguments (%d given)" % (len(args),)
+ )
elif not args:
- raise TypeError('update() takes at least 1 argument (0 given)')
+ raise TypeError("update() takes at least 1 argument (0 given)")
self = args[0]
# Make progressively weaker assumptions about "other"
other = ()
@@ -168,7 +172,7 @@ def update(*args, **kwds):
if isinstance(other, dict):
for key in other:
self[key] = other[key]
- elif hasattr(other, 'keys'):
+ elif hasattr(other, "keys"):
for key in other.keys():
self[key] = other[key]
else:
@@ -182,10 +186,10 @@ def update(*args, **kwds):
__marker = object()
def pop(self, key, default=__marker):
- '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+ """od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
- '''
+ """
if key in self:
result = self[key]
del self[key]
@@ -195,27 +199,27 @@ def pop(self, key, default=__marker):
return default
def setdefault(self, key, default=None):
- 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+ "od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od"
if key in self:
return self[key]
self[key] = default
return default
def __repr__(self, _repr_running={}):
- 'od.__repr__() <==> repr(od)'
+ "od.__repr__() <==> repr(od)"
call_key = id(self), _get_ident()
if call_key in _repr_running:
- return '...'
+ return "..."
_repr_running[call_key] = 1
try:
if not self:
- return '%s()' % (self.__class__.__name__,)
- return '%s(%r)' % (self.__class__.__name__, self.items())
+ return "%s()" % (self.__class__.__name__,)
+ return "%s(%r)" % (self.__class__.__name__, self.items())
finally:
del _repr_running[call_key]
def __reduce__(self):
- 'Return state information for pickling'
+ "Return state information for pickling"
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
for k in vars(OrderedDict()):
@@ -225,27 +229,27 @@ def __reduce__(self):
return self.__class__, (items,)
def copy(self):
- 'od.copy() -> a shallow copy of od'
+ "od.copy() -> a shallow copy of od"
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
- '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
+ """OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
and values equal to v (which defaults to None).
- '''
+ """
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
- '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
+ """od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
while comparison to a regular mapping is order-insensitive.
- '''
+ """
if isinstance(other, OrderedDict):
- return len(self)==len(other) and self.items() == other.items()
+ return len(self) == len(other) and self.items() == other.items()
return dict.__eq__(self, other)
def __ne__(self, other):
diff --git a/patsy/constraint.py b/patsy/constraint.py
index 012b226..6147a70 100644
--- a/patsy/constraint.py
+++ b/patsy/constraint.py
@@ -8,6 +8,7 @@
__all__ = ["LinearConstraint"]
import re
+
try:
from collections.abc import Mapping
except ImportError:
@@ -15,9 +16,13 @@
import numpy as np
from patsy import PatsyError
from patsy.origin import Origin
-from patsy.util import (atleast_2d_column_default,
- repr_pretty_delegate, repr_pretty_impl,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ atleast_2d_column_default,
+ repr_pretty_delegate,
+ repr_pretty_impl,
+ no_pickling,
+ assert_no_pickling,
+)
from patsy.infix_parser import Token, Operator, infix_parse
from patsy.parse_formula import _parsing_error_test
@@ -44,6 +49,7 @@ class LinearConstraint(object):
A list of strings giving the names of the variables being
constrained. (Used only for consistency checking.)
"""
+
def __init__(self, variable_names, coefs, constants=None):
self.variable_names = list(variable_names)
self.coefs = np.atleast_2d(np.asarray(coefs, dtype=float))
@@ -61,10 +67,12 @@ def __init__(self, variable_names, coefs, constants=None):
raise ValueError("shape mismatch between coefs and constants")
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
- return repr_pretty_impl(p, self,
- [self.variable_names, self.coefs, self.constants])
+ return repr_pretty_impl(
+ p, self, [self.variable_names, self.coefs, self.constants]
+ )
__getstate__ = no_pickling
@@ -87,6 +95,7 @@ def combine(cls, constraints):
constants = np.vstack([c.constants for c in constraints])
return cls(variable_names, coefs, constants)
+
def test_LinearConstraint():
try:
from numpy.testing import assert_equal
@@ -104,7 +113,6 @@ def test_LinearConstraint():
assert lc.coefs.dtype == np.dtype(float)
assert lc.constants.dtype == np.dtype(float)
-
# statsmodels wants to be able to create degenerate constraints like this,
# see:
# https://github.com/pydata/patsy/issues/89
@@ -113,20 +121,25 @@ def test_LinearConstraint():
assert_equal(lc.coefs, [[0]])
import pytest
+
pytest.raises(ValueError, LinearConstraint, ["a"], [[1, 2]])
pytest.raises(ValueError, LinearConstraint, ["a"], [[[1]]])
pytest.raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4])
pytest.raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4])
pytest.raises(ValueError, LinearConstraint, ["a"], [[1]], [[]])
pytest.raises(ValueError, LinearConstraint, ["a", "b"], [])
- pytest.raises(ValueError, LinearConstraint, ["a", "b"],
- np.zeros((0, 2)))
+ pytest.raises(ValueError, LinearConstraint, ["a", "b"], np.zeros((0, 2)))
assert_no_pickling(lc)
+
def test_LinearConstraint_combine():
- comb = LinearConstraint.combine([LinearConstraint(["a", "b"], [1, 0]),
- LinearConstraint(["a", "b"], [0, 1], [1])])
+ comb = LinearConstraint.combine(
+ [
+ LinearConstraint(["a", "b"], [1, 0]),
+ LinearConstraint(["a", "b"], [0, 1], [1]),
+ ]
+ )
assert comb.variable_names == ["a", "b"]
try:
from numpy.testing import assert_equal
@@ -136,38 +149,40 @@ def test_LinearConstraint_combine():
assert_equal(comb.constants, [[0], [1]])
import pytest
+
pytest.raises(ValueError, LinearConstraint.combine, [])
- pytest.raises(ValueError, LinearConstraint.combine,
- [LinearConstraint(["a"], [1]), LinearConstraint(["b"], [1])])
+ pytest.raises(
+ ValueError,
+ LinearConstraint.combine,
+ [LinearConstraint(["a"], [1]), LinearConstraint(["b"], [1])],
+ )
_ops = [
Operator(",", 2, -100),
-
Operator("=", 2, 0),
-
Operator("+", 1, 100),
Operator("-", 1, 100),
Operator("+", 2, 100),
Operator("-", 2, 100),
-
Operator("*", 2, 200),
Operator("/", 2, 200),
- ]
+]
_atomic = ["NUMBER", "VARIABLE"]
+
def _token_maker(type, string):
def make_token(scanner, token_string):
if type == "__OP__":
actual_type = token_string
else:
actual_type = type
- return Token(actual_type,
- Origin(string, *scanner.match.span()),
- token_string)
+ return Token(actual_type, Origin(string, *scanner.match.span()), token_string)
+
return make_token
+
def _tokenize_constraint(string, variable_names):
lparen_re = r"\("
rparen_re = r"\)"
@@ -186,29 +201,33 @@ def _tokenize_constraint(string, variable_names):
(variable_re, _token_maker("VARIABLE", string)),
(num_re, _token_maker("NUMBER", string)),
(whitespace_re, None),
- ]
+ ]
scanner = re.Scanner(lexicon)
tokens, leftover = scanner.scan(string)
if leftover:
offset = len(string) - len(leftover)
- raise PatsyError("unrecognized token in constraint",
- Origin(string, offset, offset + 1))
+ raise PatsyError(
+ "unrecognized token in constraint", Origin(string, offset, offset + 1)
+ )
return tokens
+
def test__tokenize_constraint():
code = "2 * (a + b) = q"
tokens = _tokenize_constraint(code, ["a", "b", "q"])
- expecteds = [("NUMBER", 0, 1, "2"),
- ("*", 2, 3, "*"),
- (Token.LPAREN, 4, 5, "("),
- ("VARIABLE", 5, 6, "a"),
- ("+", 7, 8, "+"),
- ("VARIABLE", 9, 10, "b"),
- (Token.RPAREN, 10, 11, ")"),
- ("=", 12, 13, "="),
- ("VARIABLE", 14, 15, "q")]
+ expecteds = [
+ ("NUMBER", 0, 1, "2"),
+ ("*", 2, 3, "*"),
+ (Token.LPAREN, 4, 5, "("),
+ ("VARIABLE", 5, 6, "a"),
+ ("+", 7, 8, "+"),
+ ("VARIABLE", 9, 10, "b"),
+ (Token.RPAREN, 10, 11, ")"),
+ ("=", 12, 13, "="),
+ ("VARIABLE", 14, 15, "q"),
+ ]
for got, expected in zip(tokens, expecteds):
assert isinstance(got, Token)
assert got.type == expected[0]
@@ -216,6 +235,7 @@ def test__tokenize_constraint():
assert got.extra == expected[3]
import pytest
+
pytest.raises(PatsyError, _tokenize_constraint, "1 + @b", ["b"])
# Shouldn't raise an error:
_tokenize_constraint("1 + @b", ["@b"])
@@ -233,9 +253,10 @@ def test__tokenize_constraint():
assert [t.type for t in tokens] == ["NUMBER", "*", "VARIABLE", ","]
assert [t.extra for t in tokens] == ["2", "*", "a[1,1]", ","]
+
def parse_constraint(string, variable_names):
- return infix_parse(_tokenize_constraint(string, variable_names),
- _ops, _atomic)
+ return infix_parse(_tokenize_constraint(string, variable_names), _ops, _atomic)
+
class _EvalConstraint(object):
def __init__(self, variable_names):
@@ -253,7 +274,7 @@ def __init__(self, variable_names):
("/", 2): self._eval_binary_div,
("=", 2): self._eval_binary_eq,
(",", 2): self._eval_binary_comma,
- }
+ }
# General scheme: there are 2 types we deal with:
# - linear combinations ("lincomb"s) of variables and constants,
@@ -263,7 +284,7 @@ def __init__(self, variable_names):
# - LinearConstraint objects
def is_constant(self, coefs):
- return np.all(coefs[:self._N] == 0)
+ return np.all(coefs[: self._N] == 0)
def _eval_variable(self, tree):
var = tree.token.extra
@@ -292,8 +313,9 @@ def _eval_binary_div(self, tree):
left = self.eval(tree.args[0])
right = self.eval(tree.args[1])
if not self.is_constant(right):
- raise PatsyError("Can't divide by a variable in a linear "
- "constraint", tree.args[1])
+ raise PatsyError(
+ "Can't divide by a variable in a linear " "constraint", tree.args[1]
+ )
return left / right[-1]
def _eval_binary_multiply(self, tree):
@@ -304,8 +326,9 @@ def _eval_binary_multiply(self, tree):
elif self.is_constant(right):
return left * right[-1]
else:
- raise PatsyError("Can't multiply one variable by another "
- "in a linear constraint", tree)
+ raise PatsyError(
+ "Can't multiply one variable by another " "in a linear constraint", tree
+ )
def _eval_binary_eq(self, tree):
# Handle "a1 = a2 = a3", which is parsed as "(a1 = a2) = a3"
@@ -319,7 +342,7 @@ def _eval_binary_eq(self, tree):
args[i] = arg.args[1 - i]
left = self.eval(args[0])
right = self.eval(args[1])
- coefs = left[:self._N] - right[:self._N]
+ coefs = left[: self._N] - right[: self._N]
if np.all(coefs == 0):
raise PatsyError("no variables appear in constraint", tree)
constant = -left[-1] + right[-1]
@@ -342,35 +365,33 @@ def eval(self, tree, constraint=False):
return val
else:
assert val.size == self._N + 1
- if np.all(val[:self._N] == 0):
- raise PatsyError("term is constant, with no variables",
- tree)
- return LinearConstraint(self._variable_names,
- val[:self._N],
- -val[-1])
+ if np.all(val[: self._N] == 0):
+ raise PatsyError("term is constant, with no variables", tree)
+ return LinearConstraint(self._variable_names, val[: self._N], -val[-1])
else:
# Force it to *not* be a constraint
if isinstance(val, LinearConstraint):
raise PatsyError("unexpected constraint object", tree)
return val
+
def linear_constraint(constraint_like, variable_names):
"""This is the internal interface implementing
DesignInfo.linear_constraint, see there for docs."""
if isinstance(constraint_like, LinearConstraint):
if constraint_like.variable_names != variable_names:
- raise ValueError("LinearConstraint has wrong variable_names "
- "(got %r, expected %r)"
- % (constraint_like.variable_names,
- variable_names))
+ raise ValueError(
+ "LinearConstraint has wrong variable_names "
+ "(got %r, expected %r)"
+ % (constraint_like.variable_names, variable_names)
+ )
return constraint_like
if isinstance(constraint_like, Mapping):
# Simple conjunction-of-equality constraints can be specified as
# dicts. {"x": 1, "y": 2} -> tests x = 1 and y = 2. Keys can be
# either variable names, or variable indices.
- coefs = np.zeros((len(constraint_like), len(variable_names)),
- dtype=float)
+ coefs = np.zeros((len(constraint_like), len(variable_names)), dtype=float)
constants = np.zeros(len(constraint_like))
used = set()
for i, (name, value) in enumerate(constraint_like.items()):
@@ -379,11 +400,9 @@ def linear_constraint(constraint_like, variable_names):
elif isinstance(name, int):
idx = name
else:
- raise ValueError("unrecognized variable name/index %r"
- % (name,))
+ raise ValueError("unrecognized variable name/index %r" % (name,))
if idx in used:
- raise ValueError("duplicated constraint on %r"
- % (variable_names[idx],))
+ raise ValueError("duplicated constraint on %r" % (variable_names[idx],))
used.add(idx)
coefs[i, idx] = 1
constants[i] = value
@@ -393,9 +412,11 @@ def linear_constraint(constraint_like, variable_names):
constraint_like = [constraint_like]
# fall-through
- if (isinstance(constraint_like, list)
+ if (
+ isinstance(constraint_like, list)
and constraint_like
- and isinstance(constraint_like[0], str)):
+ and isinstance(constraint_like[0], str)
+ ):
constraints = []
for code in constraint_like:
if not isinstance(code, str):
@@ -435,24 +456,22 @@ def _check_lincon(input, varnames, coefs, constants):
def test_linear_constraint():
import pytest
from patsy.compat import OrderedDict
+
t = _check_lincon
t(LinearConstraint(["a", "b"], [2, 3]), ["a", "b"], [[2, 3]], [[0]])
- pytest.raises(ValueError, linear_constraint,
- LinearConstraint(["b", "a"], [2, 3]),
- ["a", "b"])
+ pytest.raises(
+ ValueError, linear_constraint, LinearConstraint(["b", "a"], [2, 3]), ["a", "b"]
+ )
t({"a": 2}, ["a", "b"], [[1, 0]], [[2]])
- t(OrderedDict([("a", 2), ("b", 3)]),
- ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
- t(OrderedDict([("a", 2), ("b", 3)]),
- ["b", "a"], [[0, 1], [1, 0]], [[2], [3]])
+ t(OrderedDict([("a", 2), ("b", 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
+ t(OrderedDict([("a", 2), ("b", 3)]), ["b", "a"], [[0, 1], [1, 0]], [[2], [3]])
t({0: 2}, ["a", "b"], [[1, 0]], [[2]])
t(OrderedDict([(0, 2), (1, 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
- t(OrderedDict([("a", 2), (1, 3)]),
- ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
+ t(OrderedDict([("a", 2), (1, 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
pytest.raises(ValueError, linear_constraint, {"q": 1}, ["a", "b"])
pytest.raises(ValueError, linear_constraint, {"a": 1, 0: 2}, ["a", "b"])
@@ -472,37 +491,46 @@ def test_linear_constraint():
pytest.raises(ValueError, linear_constraint, ["a", {"b": 0}], ["a", "b"])
# Actual evaluator tests
- t("2 * (a + b/3) + b + 2*3/4 = 1 + 2*3", ["a", "b"],
- [[2, 2.0/3 + 1]], [[7 - 6.0/4]])
+ t(
+ "2 * (a + b/3) + b + 2*3/4 = 1 + 2*3",
+ ["a", "b"],
+ [[2, 2.0 / 3 + 1]],
+ [[7 - 6.0 / 4]],
+ )
t("+2 * -a", ["a", "b"], [[-2, 0]], [[0]])
t("a - b, a + b = 2", ["a", "b"], [[1, -1], [1, 1]], [[0], [2]])
- t("a = 1, a = 2, a = 3", ["a", "b"],
- [[1, 0], [1, 0], [1, 0]], [[1], [2], [3]])
+ t("a = 1, a = 2, a = 3", ["a", "b"], [[1, 0], [1, 0], [1, 0]], [[1], [2], [3]])
t("a * 2", ["a", "b"], [[2, 0]], [[0]])
t("-a = 1", ["a", "b"], [[-1, 0]], [[1]])
t("(2 + a - a) * b", ["a", "b"], [[0, 2]], [[0]])
t("a = 1 = b", ["a", "b"], [[1, 0], [0, -1]], [[1], [-1]])
t("a = (1 = b)", ["a", "b"], [[0, -1], [1, 0]], [[-1], [1]])
- t("a = 1, a = b = c", ["a", "b", "c"],
- [[1, 0, 0], [1, -1, 0], [0, 1, -1]], [[1], [0], [0]])
+ t(
+ "a = 1, a = b = c",
+ ["a", "b", "c"],
+ [[1, 0, 0], [1, -1, 0], [0, 1, -1]],
+ [[1], [0], [0]],
+ )
# One should never do this of course, but test that it works anyway...
t("a + 1 = 2", ["a", "a + 1"], [[0, 1]], [[2]])
t(([10, 20], [30]), ["a", "b"], [[10, 20]], [[30]])
- t(([[10, 20], [20, 40]], [[30], [35]]), ["a", "b"],
- [[10, 20], [20, 40]], [[30], [35]])
+ t(
+ ([[10, 20], [20, 40]], [[30], [35]]),
+ ["a", "b"],
+ [[10, 20], [20, 40]],
+ [[30], [35]],
+ )
# wrong-length tuple
- pytest.raises(ValueError, linear_constraint,
- ([1, 0], [0], [0]), ["a", "b"])
+ pytest.raises(ValueError, linear_constraint, ([1, 0], [0], [0]), ["a", "b"])
pytest.raises(ValueError, linear_constraint, ([1, 0],), ["a", "b"])
t([10, 20], ["a", "b"], [[10, 20]], [[0]])
t([[10, 20], [20, 40]], ["a", "b"], [[10, 20], [20, 40]], [[0], [0]])
t(np.array([10, 20]), ["a", "b"], [[10, 20]], [[0]])
- t(np.array([[10, 20], [20, 40]]), ["a", "b"],
- [[10, 20], [20, 40]], [[0], [0]])
+ t(np.array([[10, 20], [20, 40]]), ["a", "b"], [[10, 20], [20, 40]], [[0], [0]])
# unknown object type
pytest.raises(ValueError, linear_constraint, None, ["a", "b"])
@@ -529,4 +557,5 @@ def test_linear_constraint():
def test_eval_errors():
def doit(bad_code):
return linear_constraint(bad_code, ["a", "b", "c"])
+
_parsing_error_test(doit, _parse_eval_error_tests)
diff --git a/patsy/contrasts.py b/patsy/contrasts.py
index c3e6921..0ac9ac7 100644
--- a/patsy/contrasts.py
+++ b/patsy/contrasts.py
@@ -10,9 +10,14 @@
import numpy as np
from patsy import PatsyError
-from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
- safe_issubdtype,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ repr_pretty_delegate,
+ repr_pretty_impl,
+ safe_issubdtype,
+ no_pickling,
+ assert_no_pickling,
+)
+
class ContrastMatrix:
"""A simple container for a matrix used for coding categorical factors.
@@ -33,6 +38,7 @@ class ContrastMatrix:
final column names. E.g. for treatment coding the entries will look
like ``"[T.level1]"``.
"""
+
def __init__(self, matrix, column_suffixes):
self.matrix = np.asarray(matrix)
self.column_suffixes = column_suffixes
@@ -40,11 +46,13 @@ def __init__(self, matrix, column_suffixes):
raise PatsyError("matrix and column_suffixes don't conform")
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
repr_pretty_impl(p, self, [self.matrix, self.column_suffixes])
__getstate__ = no_pickling
+
def test_ContrastMatrix():
cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
assert np.array_equal(cm.matrix, np.eye(2))
@@ -53,10 +61,12 @@ def test_ContrastMatrix():
repr(cm)
import pytest
+
pytest.raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])
assert_no_pickling(cm)
+
# This always produces an object of the type that Python calls 'str' (whether
# that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does
# *not* make any particular guarantees about being reversible or having other
@@ -73,32 +83,38 @@ def _obj_to_readable_str(obj):
else:
return repr(obj)
+
def test__obj_to_readable_str():
def t(obj, expected):
got = _obj_to_readable_str(obj)
assert type(got) is str
assert got == expected
+
t(1, "1")
t(1.0, "1.0")
t("asdf", "asdf")
- t(u"asdf", "asdf")
+ t("asdf", "asdf")
# we can use "foo".encode here b/c this is python 3!
# a utf-8 encoded euro-sign comes out as a real euro sign.
- t("\u20ac".encode("utf-8"), u"\u20ac")
+ t("\u20ac".encode("utf-8"), "\u20ac")
# but a iso-8859-15 euro sign can't be decoded, and we fall back on
# repr()
t("\u20ac".encode("iso-8859-15"), "b'\\xa4'")
+
def _name_levels(prefix, levels):
return ["[%s%s]" % (prefix, _obj_to_readable_str(level)) for level in levels]
+
def test__name_levels():
assert _name_levels("a", ["b", "c"]) == ["[ab]", "[ac]"]
+
def _dummy_code(levels):
return ContrastMatrix(np.eye(len(levels)), _name_levels("", levels))
+
def _get_level(levels, level_ref):
if level_ref in levels:
return levels.index(level_ref)
@@ -106,11 +122,11 @@ def _get_level(levels, level_ref):
if level_ref < 0:
level_ref += len(levels)
if not (0 <= level_ref < len(levels)):
- raise PatsyError("specified level %r is out of range"
- % (level_ref,))
+ raise PatsyError("specified level %r is out of range" % (level_ref,))
return level_ref
raise PatsyError("specified level %r not found" % (level_ref,))
+
def test__get_level():
assert _get_level(["a", "b", "c"], 0) == 0
assert _get_level(["a", "b", "c"], -1) == 2
@@ -118,6 +134,7 @@ def test__get_level():
# For integer levels, we check identity before treating it as an index
assert _get_level([2, 1, 0], 0) == 2
import pytest
+
pytest.raises(PatsyError, _get_level, ["a", "b"], 2)
pytest.raises(PatsyError, _get_level, ["a", "b"], -3)
pytest.raises(PatsyError, _get_level, ["a", "b"], "c")
@@ -153,6 +170,7 @@ class Treatment:
using ``Treatment(reference=-1)`` will produce contrasts that are
"equivalent to those produced by many (but not all) SAS procedures".
"""
+
def __init__(self, reference=None):
self.reference = reference
@@ -165,14 +183,15 @@ def code_without_intercept(self, levels):
else:
reference = _get_level(levels, self.reference)
eye = np.eye(len(levels) - 1)
- contrasts = np.vstack((eye[:reference, :],
- np.zeros((1, len(levels) - 1)),
- eye[reference:, :]))
- names = _name_levels("T.", levels[:reference] + levels[reference + 1:])
+ contrasts = np.vstack(
+ (eye[:reference, :], np.zeros((1, len(levels) - 1)), eye[reference:, :])
+ )
+ names = _name_levels("T.", levels[:reference] + levels[reference + 1 :])
return ContrastMatrix(contrasts, names)
__getstate__ = no_pickling
+
def test_Treatment():
t1 = Treatment()
matrix = t1.code_with_intercept(["a", "b", "c"])
@@ -196,6 +215,7 @@ def test_Treatment():
assert matrix.column_suffixes == ["[T.1]", "[T.0]"]
assert np.allclose(matrix.matrix, [[0, 0], [1, 0], [0, 1]])
+
class Poly(object):
"""Orthogonal polynomial contrast coding.
@@ -230,6 +250,7 @@ class Poly(object):
rank encodings are always dummy-coded, regardless of what contrast you
have set.)
"""
+
def __init__(self, scores=None):
self.scores = scores
@@ -240,9 +261,10 @@ def _code_either(self, intercept, levels):
scores = np.arange(n)
scores = np.asarray(scores, dtype=float)
if len(scores) != n:
- raise PatsyError("number of levels (%s) does not match"
- " number of scores (%s)"
- % (n, len(scores)))
+ raise PatsyError(
+ "number of levels (%s) does not match"
+ " number of scores (%s)" % (n, len(scores))
+ )
# Strategy: just make a matrix whose columns are naive linear,
# quadratic, etc., functions of the raw scores, and then use 'qr' to
# orthogonalize each column against those to its left.
@@ -250,7 +272,7 @@ def _code_either(self, intercept, levels):
raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1))
q, r = np.linalg.qr(raw_poly)
q *= np.sign(np.diag(r))
- q /= np.sqrt(np.sum(q ** 2, axis=1))
+ q /= np.sqrt(np.sum(q**2, axis=1))
# The constant term is always all 1's -- we don't normalize it.
q[:, 0] = 1
names = [".Constant", ".Linear", ".Quadratic", ".Cubic"]
@@ -271,33 +293,44 @@ def code_without_intercept(self, levels):
__getstate__ = no_pickling
+
def test_Poly():
t1 = Poly()
matrix = t1.code_with_intercept(["a", "b", "c"])
assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
# Values from R 'options(digits=15); contr.poly(3)'
- expected = [[1, -7.07106781186548e-01, 0.408248290463863],
- [1, 0, -0.816496580927726],
- [1, 7.07106781186547e-01, 0.408248290463863]]
+ expected = [
+ [1, -7.07106781186548e-01, 0.408248290463863],
+ [1, 0, -0.816496580927726],
+ [1, 7.07106781186547e-01, 0.408248290463863],
+ ]
print(matrix.matrix)
assert np.allclose(matrix.matrix, expected)
matrix = t1.code_without_intercept(["a", "b", "c"])
assert matrix.column_suffixes == [".Linear", ".Quadratic"]
# Values from R 'options(digits=15); contr.poly(3)'
print(matrix.matrix)
- assert np.allclose(matrix.matrix,
- [[-7.07106781186548e-01, 0.408248290463863],
- [0, -0.816496580927726],
- [7.07106781186547e-01, 0.408248290463863]])
+ assert np.allclose(
+ matrix.matrix,
+ [
+ [-7.07106781186548e-01, 0.408248290463863],
+ [0, -0.816496580927726],
+ [7.07106781186547e-01, 0.408248290463863],
+ ],
+ )
matrix = Poly(scores=[0, 10, 11]).code_with_intercept(["a", "b", "c"])
assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
# Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 11))'
print(matrix.matrix)
- assert np.allclose(matrix.matrix,
- [[1, -0.813733471206735, 0.0671156055214024],
- [1, 0.348742916231458, -0.7382716607354268],
- [1, 0.464990554975277, 0.6711560552140243]])
+ assert np.allclose(
+ matrix.matrix,
+ [
+ [1, -0.813733471206735, 0.0671156055214024],
+ [1, 0.348742916231458, -0.7382716607354268],
+ [1, 0.464990554975277, 0.6711560552140243],
+ ],
+ )
# we had an integer/float handling bug for score vectors whose mean was
# non-integer, so check one of those:
@@ -305,19 +338,28 @@ def test_Poly():
assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"]
# Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 12))'
print(matrix.matrix)
- assert np.allclose(matrix.matrix,
- [[1, -0.806559132617443, 0.127000127000191],
- [1, 0.293294230042706, -0.762000762001143],
- [1, 0.513264902574736, 0.635000635000952]])
+ assert np.allclose(
+ matrix.matrix,
+ [
+ [1, -0.806559132617443, 0.127000127000191],
+ [1, 0.293294230042706, -0.762000762001143],
+ [1, 0.513264902574736, 0.635000635000952],
+ ],
+ )
import pytest
- pytest.raises(PatsyError,
- Poly(scores=[0, 1]).code_with_intercept,
- ["a", "b", "c"])
+
+ pytest.raises(PatsyError, Poly(scores=[0, 1]).code_with_intercept, ["a", "b", "c"])
matrix = t1.code_with_intercept(list(range(6)))
- assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic",
- ".Cubic", "^4", "^5"]
+ assert matrix.column_suffixes == [
+ ".Constant",
+ ".Linear",
+ ".Quadratic",
+ ".Cubic",
+ "^4",
+ "^5",
+ ]
class Sum(object):
@@ -349,6 +391,7 @@ class Sum(object):
This is equivalent to R's `contr.sum`.
"""
+
def __init__(self, omit=None):
self.omit = omit
@@ -366,24 +409,24 @@ def _sum_contrast(self, levels):
out = np.empty((n, n - 1))
out[:omit_i, :] = eye[:omit_i, :]
out[omit_i, :] = -1
- out[omit_i + 1:, :] = eye[omit_i:, :]
+ out[omit_i + 1 :, :] = eye[omit_i:, :]
return out
def code_with_intercept(self, levels):
contrast = self.code_without_intercept(levels)
- matrix = np.column_stack((np.ones(len(levels)),
- contrast.matrix))
+ matrix = np.column_stack((np.ones(len(levels)), contrast.matrix))
column_suffixes = ["[mean]"] + contrast.column_suffixes
return ContrastMatrix(matrix, column_suffixes)
def code_without_intercept(self, levels):
matrix = self._sum_contrast(levels)
omit_i = self._omit_i(levels)
- included_levels = levels[:omit_i] + levels[omit_i + 1:]
+ included_levels = levels[:omit_i] + levels[omit_i + 1 :]
return ContrastMatrix(matrix, _name_levels("S.", included_levels))
__getstate__ = no_pickling
+
def test_Sum():
t1 = Sum()
matrix = t1.code_with_intercept(["a", "b", "c"])
@@ -421,6 +464,7 @@ def test_Sum():
assert matrix.column_suffixes == ["[S.b]", "[S.c]"]
assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]])
+
class Helmert(object):
"""Helmert contrasts.
@@ -444,59 +488,58 @@ class Helmert(object):
This is equivalent to R's `contr.helmert`.
"""
+
def _helmert_contrast(self, levels):
n = len(levels)
- #http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm#HELMERT
- #contr = np.eye(n - 1)
- #int_range = np.arange(n - 1., 1, -1)
- #denom = np.repeat(int_range, np.arange(n - 2, 0, -1))
- #contr[np.tril_indices(n - 1, -1)] = -1. / denom
-
- #http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#HELMERT
- #contr = np.zeros((n - 1., n - 1))
- #int_range = np.arange(n, 1, -1)
- #denom = np.repeat(int_range[:-1], np.arange(n - 2, 0, -1))
- #contr[np.diag_indices(n - 1)] = (int_range - 1.) / int_range
- #contr[np.tril_indices(n - 1, -1)] = -1. / denom
- #contr = np.vstack((contr, -1./int_range))
-
- #r-like
+ # http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm#HELMERT
+ # contr = np.eye(n - 1)
+ # int_range = np.arange(n - 1., 1, -1)
+ # denom = np.repeat(int_range, np.arange(n - 2, 0, -1))
+ # contr[np.tril_indices(n - 1, -1)] = -1. / denom
+
+ # http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#HELMERT
+ # contr = np.zeros((n - 1., n - 1))
+ # int_range = np.arange(n, 1, -1)
+ # denom = np.repeat(int_range[:-1], np.arange(n - 2, 0, -1))
+ # contr[np.diag_indices(n - 1)] = (int_range - 1.) / int_range
+ # contr[np.tril_indices(n - 1, -1)] = -1. / denom
+ # contr = np.vstack((contr, -1./int_range))
+
+ # r-like
contr = np.zeros((n, n - 1))
contr[1:][np.diag_indices(n - 1)] = np.arange(1, n)
contr[np.triu_indices(n - 1)] = -1
return contr
def code_with_intercept(self, levels):
- contrast = np.column_stack((np.ones(len(levels)),
- self._helmert_contrast(levels)))
+ contrast = np.column_stack(
+ (np.ones(len(levels)), self._helmert_contrast(levels))
+ )
column_suffixes = _name_levels("H.", ["intercept"] + list(levels[1:]))
return ContrastMatrix(contrast, column_suffixes)
def code_without_intercept(self, levels):
contrast = self._helmert_contrast(levels)
- return ContrastMatrix(contrast,
- _name_levels("H.", levels[1:]))
+ return ContrastMatrix(contrast, _name_levels("H.", levels[1:]))
__getstate__ = no_pickling
+
def test_Helmert():
t1 = Helmert()
for levels in (["a", "b", "c", "d"], ("a", "b", "c", "d")):
matrix = t1.code_with_intercept(levels)
- assert matrix.column_suffixes == ["[H.intercept]",
- "[H.b]",
- "[H.c]",
- "[H.d]"]
- assert np.allclose(matrix.matrix, [[1, -1, -1, -1],
- [1, 1, -1, -1],
- [1, 0, 2, -1],
- [1, 0, 0, 3]])
+ assert matrix.column_suffixes == ["[H.intercept]", "[H.b]", "[H.c]", "[H.d]"]
+ assert np.allclose(
+ matrix.matrix,
+ [[1, -1, -1, -1], [1, 1, -1, -1], [1, 0, 2, -1], [1, 0, 0, 3]],
+ )
matrix = t1.code_without_intercept(levels)
assert matrix.column_suffixes == ["[H.b]", "[H.c]", "[H.d]"]
- assert np.allclose(matrix.matrix, [[-1, -1, -1],
- [1, -1, -1],
- [0, 2, -1],
- [0, 0, 3]])
+ assert np.allclose(
+ matrix.matrix, [[-1, -1, -1], [1, -1, -1], [0, 2, -1], [0, 0, 3]]
+ )
+
class Diff(object):
"""Backward difference coding.
@@ -517,27 +560,28 @@ class Diff(object):
# Full rank
dmatrix("0 + C(a, Diff)", balanced(a=3))
"""
+
def _diff_contrast(self, levels):
nlevels = len(levels)
- contr = np.zeros((nlevels, nlevels-1))
+ contr = np.zeros((nlevels, nlevels - 1))
int_range = np.arange(1, nlevels)
upper_int = np.repeat(int_range, int_range)
- row_i, col_i = np.triu_indices(nlevels-1)
+ row_i, col_i = np.triu_indices(nlevels - 1)
# we want to iterate down the columns not across the rows
# it would be nice if the index functions had a row/col order arg
col_order = np.argsort(col_i)
- contr[row_i[col_order],
- col_i[col_order]] = (upper_int-nlevels)/float(nlevels)
+ contr[row_i[col_order], col_i[col_order]] = (upper_int - nlevels) / float(
+ nlevels
+ )
lower_int = np.repeat(int_range, int_range[::-1])
- row_i, col_i = np.tril_indices(nlevels-1)
+ row_i, col_i = np.tril_indices(nlevels - 1)
# we want to iterate down the columns not across the rows
col_order = np.argsort(col_i)
- contr[row_i[col_order]+1, col_i[col_order]] = lower_int/float(nlevels)
+ contr[row_i[col_order] + 1, col_i[col_order]] = lower_int / float(nlevels)
return contr
def code_with_intercept(self, levels):
- contrast = np.column_stack((np.ones(len(levels)),
- self._diff_contrast(levels)))
+ contrast = np.column_stack((np.ones(len(levels)), self._diff_contrast(levels)))
return ContrastMatrix(contrast, _name_levels("D.", levels))
def code_without_intercept(self, levels):
@@ -546,21 +590,32 @@ def code_without_intercept(self, levels):
__getstate__ = no_pickling
+
def test_diff():
t1 = Diff()
matrix = t1.code_with_intercept(["a", "b", "c", "d"])
- assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]",
- "[D.d]"]
- assert np.allclose(matrix.matrix, [[1, -3/4., -1/2., -1/4.],
- [1, 1/4., -1/2., -1/4.],
- [1, 1/4., 1./2, -1/4.],
- [1, 1/4., 1/2., 3/4.]])
+ assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]", "[D.d]"]
+ assert np.allclose(
+ matrix.matrix,
+ [
+ [1, -3 / 4.0, -1 / 2.0, -1 / 4.0],
+ [1, 1 / 4.0, -1 / 2.0, -1 / 4.0],
+ [1, 1 / 4.0, 1.0 / 2, -1 / 4.0],
+ [1, 1 / 4.0, 1 / 2.0, 3 / 4.0],
+ ],
+ )
matrix = t1.code_without_intercept(["a", "b", "c", "d"])
assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]"]
- assert np.allclose(matrix.matrix, [[-3/4., -1/2., -1/4.],
- [1/4., -1/2., -1/4.],
- [1/4., 2./4, -1/4.],
- [1/4., 1/2., 3/4.]])
+ assert np.allclose(
+ matrix.matrix,
+ [
+ [-3 / 4.0, -1 / 2.0, -1 / 4.0],
+ [1 / 4.0, -1 / 2.0, -1 / 4.0],
+ [1 / 4.0, 2.0 / 4, -1 / 4.0],
+ [1 / 4.0, 1 / 2.0, 3 / 4.0],
+ ],
+ )
+
# contrast can be:
# -- a ContrastMatrix
@@ -578,10 +633,10 @@ def code_contrast_matrix(intercept, levels, contrast, default=None):
return contrast
as_array = np.asarray(contrast)
if safe_issubdtype(as_array.dtype, np.number):
- return ContrastMatrix(as_array,
- _name_levels("custom", range(as_array.shape[1])))
+ return ContrastMatrix(
+ as_array, _name_levels("custom", range(as_array.shape[1]))
+ )
if intercept:
return contrast.code_with_intercept(levels)
else:
return contrast.code_without_intercept(levels)
-
diff --git a/patsy/desc.py b/patsy/desc.py
index 4545de0..6f9d1af 100644
--- a/patsy/desc.py
+++ b/patsy/desc.py
@@ -16,6 +16,7 @@
# These are made available in the patsy.* namespace
__all__ = ["Term", "ModelDesc", "INTERCEPT"]
+
# One might think it would make more sense for 'factors' to be a set, rather
# than a tuple-with-guaranteed-unique-entries-that-compares-like-a-set. The
# reason we do it this way is that it preserves the order that the user typed
@@ -32,17 +33,19 @@ class Term(object):
Terms are hashable and compare by value.
Attributes:
-
+
.. attribute:: factors
A tuple of factor objects.
"""
+
def __init__(self, factors):
self.factors = tuple(uniqueify_list(factors))
def __eq__(self, other):
- return (isinstance(other, Term)
- and frozenset(other.factors) == frozenset(self.factors))
+ return isinstance(other, Term) and frozenset(other.factors) == frozenset(
+ self.factors
+ )
def __ne__(self, other):
return not self == other
@@ -51,6 +54,7 @@ def __hash__(self):
return hash((Term, frozenset(self.factors)))
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
repr_pretty_impl(p, self, [list(self.factors)])
@@ -64,8 +68,10 @@ def name(self):
__getstate__ = no_pickling
+
INTERCEPT = Term([])
+
class _MockFactor(object):
def __init__(self, name):
self._name = name
@@ -73,6 +79,7 @@ def __init__(self, name):
def name(self):
return self._name
+
def test_Term():
assert Term([1, 2, 1]).factors == (1, 2)
assert Term([1, 2]) == Term([2, 1])
@@ -85,6 +92,7 @@ def test_Term():
assert_no_pickling(Term([]))
+
class ModelDesc(object):
"""A simple container representing the termlists parsed from a formula.
@@ -103,17 +111,21 @@ class ModelDesc(object):
Two termlists representing the left- and right-hand sides of a
formula, suitable for passing to :func:`design_matrix_builders`.
"""
+
def __init__(self, lhs_termlist, rhs_termlist):
self.lhs_termlist = uniqueify_list(lhs_termlist)
self.rhs_termlist = uniqueify_list(rhs_termlist)
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
- return repr_pretty_impl(p, self,
- [],
- [("lhs_termlist", self.lhs_termlist),
- ("rhs_termlist", self.rhs_termlist)])
+ return repr_pretty_impl(
+ p,
+ self,
+ [],
+ [("lhs_termlist", self.lhs_termlist), ("rhs_termlist", self.rhs_termlist)],
+ )
def describe(self):
"""Returns a human-readable representation of this :class:`ModelDesc`
@@ -125,11 +137,13 @@ def describe(self):
was created by parsing a formula, then it should work in
practice. If you *really* have to.
"""
+
def term_code(term):
if term == INTERCEPT:
return "1"
else:
return term.name()
+
result = " + ".join([term_code(term) for term in self.lhs_termlist])
if result:
result += " ~ "
@@ -141,11 +155,12 @@ def term_code(term):
term_names = []
if INTERCEPT not in self.rhs_termlist:
term_names.append("0")
- term_names += [term_code(term) for term in self.rhs_termlist
- if term != INTERCEPT]
+ term_names += [
+ term_code(term) for term in self.rhs_termlist if term != INTERCEPT
+ ]
result += " + ".join(term_names)
return result
-
+
@classmethod
def from_formula(cls, tree_or_string):
"""Construct a :class:`ModelDesc` from a formula string.
@@ -165,6 +180,7 @@ def from_formula(cls, tree_or_string):
__getstate__ = no_pickling
+
def test_ModelDesc():
f1 = _MockFactor("a")
f2 = _MockFactor("b")
@@ -179,52 +195,63 @@ def test_ModelDesc():
assert ModelDesc([], []).describe() == "~ 0"
assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"
assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1"
- assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe()
- == "1 ~ b")
+ assert ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe() == "1 ~ b"
+
def test_ModelDesc_from_formula():
for input in ("y ~ x", parse_formula("y ~ x")):
md = ModelDesc.from_formula(input)
- assert md.lhs_termlist == [Term([EvalFactor("y")]),]
+ assert md.lhs_termlist == [
+ Term([EvalFactor("y")]),
+ ]
assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])]
+
class IntermediateExpr(object):
"This class holds an intermediate result while we're evaluating a tree."
+
def __init__(self, intercept, intercept_origin, intercept_removed, terms):
self.intercept = intercept
self.intercept_origin = intercept_origin
- self.intercept_removed =intercept_removed
+ self.intercept_removed = intercept_removed
self.terms = tuple(uniqueify_list(terms))
if self.intercept:
assert self.intercept_origin
assert not (self.intercept and self.intercept_removed)
__repr__ = repr_pretty_delegate
- def _pretty_repr_(self, p, cycle): # pragma: no cover
+
+ def _pretty_repr_(self, p, cycle): # pragma: no cover
assert not cycle
- return repr_pretty_impl(p, self,
- [self.intercept, self.intercept_origin,
- self.intercept_removed, self.terms])
+ return repr_pretty_impl(
+ p,
+ self,
+ [self.intercept, self.intercept_origin, self.intercept_removed, self.terms],
+ )
__getstate__ = no_pickling
+
def _maybe_add_intercept(doit, terms):
if doit:
return (INTERCEPT,) + terms
else:
return terms
+
def _eval_any_tilde(evaluator, tree):
- exprs = [evaluator.eval(arg) for arg in tree.args]
+ exprs = [evaluator.eval(arg) for arg in tree.args]
if len(exprs) == 1:
# Formula was like: "~ foo"
# We pretend that instead it was like: "0 ~ foo"
exprs.insert(0, IntermediateExpr(False, None, True, []))
assert len(exprs) == 2
# Note that only the RHS gets an implicit intercept:
- return ModelDesc(_maybe_add_intercept(exprs[0].intercept, exprs[0].terms),
- _maybe_add_intercept(not exprs[1].intercept_removed,
- exprs[1].terms))
+ return ModelDesc(
+ _maybe_add_intercept(exprs[0].intercept, exprs[0].terms),
+ _maybe_add_intercept(not exprs[1].intercept_removed, exprs[1].terms),
+ )
+
def _eval_binary_plus(evaluator, tree):
left_expr = evaluator.eval(tree.args[0])
@@ -233,38 +260,48 @@ def _eval_binary_plus(evaluator, tree):
else:
right_expr = evaluator.eval(tree.args[1])
if right_expr.intercept:
- return IntermediateExpr(True, right_expr.intercept_origin, False,
- left_expr.terms + right_expr.terms)
+ return IntermediateExpr(
+ True,
+ right_expr.intercept_origin,
+ False,
+ left_expr.terms + right_expr.terms,
+ )
else:
- return IntermediateExpr(left_expr.intercept,
- left_expr.intercept_origin,
- left_expr.intercept_removed,
- left_expr.terms + right_expr.terms)
-
+ return IntermediateExpr(
+ left_expr.intercept,
+ left_expr.intercept_origin,
+ left_expr.intercept_removed,
+ left_expr.terms + right_expr.terms,
+ )
+
def _eval_binary_minus(evaluator, tree):
left_expr = evaluator.eval(tree.args[0])
if tree.args[1].type == "ZERO":
- return IntermediateExpr(True, tree.args[1], False,
- left_expr.terms)
+ return IntermediateExpr(True, tree.args[1], False, left_expr.terms)
elif tree.args[1].type == "ONE":
return IntermediateExpr(False, None, True, left_expr.terms)
else:
right_expr = evaluator.eval(tree.args[1])
- terms = [term for term in left_expr.terms
- if term not in right_expr.terms]
+ terms = [term for term in left_expr.terms if term not in right_expr.terms]
if right_expr.intercept:
return IntermediateExpr(False, None, True, terms)
else:
- return IntermediateExpr(left_expr.intercept,
- left_expr.intercept_origin,
- left_expr.intercept_removed,
- terms)
+ return IntermediateExpr(
+ left_expr.intercept,
+ left_expr.intercept_origin,
+ left_expr.intercept_removed,
+ terms,
+ )
+
def _check_interactable(expr):
if expr.intercept:
- raise PatsyError("intercept term cannot interact with "
- "anything else", expr.intercept_origin)
+ raise PatsyError(
+ "intercept term cannot interact with " "anything else",
+ expr.intercept_origin,
+ )
+
def _interaction(left_expr, right_expr):
for expr in (left_expr, right_expr):
@@ -275,12 +312,13 @@ def _interaction(left_expr, right_expr):
terms.append(Term(l_term.factors + r_term.factors))
return IntermediateExpr(False, None, False, terms)
+
def _eval_binary_prod(evaluator, tree):
exprs = [evaluator.eval(arg) for arg in tree.args]
- return IntermediateExpr(False, None, False,
- exprs[0].terms
- + exprs[1].terms
- + _interaction(*exprs).terms)
+ return IntermediateExpr(
+ False, None, False, exprs[0].terms + exprs[1].terms + _interaction(*exprs).terms
+ )
+
# Division (nesting) is right-ward distributive:
# a / (b + c) -> a/b + a/c -> a + a:b + a:c
@@ -299,16 +337,17 @@ def _eval_binary_div(evaluator, tree):
left_factors = []
for term in left_expr.terms:
left_factors += list(term.factors)
- left_combined_expr = IntermediateExpr(False, None, False,
- [Term(left_factors)])
+ left_combined_expr = IntermediateExpr(False, None, False, [Term(left_factors)])
# Then interact it with everything on the right:
terms += list(_interaction(left_combined_expr, right_expr).terms)
return IntermediateExpr(False, None, False, terms)
+
def _eval_binary_interact(evaluator, tree):
exprs = [evaluator.eval(arg) for arg in tree.args]
return _interaction(*exprs)
+
def _eval_binary_power(evaluator, tree):
left_expr = evaluator.eval(tree.args[0])
_check_interactable(left_expr)
@@ -330,9 +369,11 @@ def _eval_binary_power(evaluator, tree):
all_terms = all_terms + big_expr.terms
return IntermediateExpr(False, None, False, all_terms)
+
def _eval_unary_plus(evaluator, tree):
return evaluator.eval(tree.args[0])
+
def _eval_unary_minus(evaluator, tree):
if tree.args[0].type == "ZERO":
return IntermediateExpr(True, tree.origin, False, [])
@@ -341,20 +382,24 @@ def _eval_unary_minus(evaluator, tree):
else:
raise PatsyError("Unary minus can only be applied to 1 or 0", tree)
+
def _eval_zero(evaluator, tree):
return IntermediateExpr(False, None, True, [])
-
+
+
def _eval_one(evaluator, tree):
return IntermediateExpr(True, tree.origin, False, [])
+
def _eval_number(evaluator, tree):
- raise PatsyError("numbers besides '0' and '1' are "
- "only allowed with **", tree)
+ raise PatsyError("numbers besides '0' and '1' are " "only allowed with **", tree)
+
def _eval_python_expr(evaluator, tree):
factor = EvalFactor(tree.token.extra, origin=tree.origin)
return IntermediateExpr(False, None, False, [Term([factor])])
+
class Evaluator(object):
def __init__(self):
self._evaluators = {}
@@ -391,21 +436,26 @@ def eval(self, tree, require_evalexpr=True):
assert isinstance(tree, ParseNode)
key = (tree.type, len(tree.args))
if key not in self._evaluators:
- raise PatsyError("I don't know how to evaluate this "
- "'%s' operator" % (tree.type,),
- tree.token)
+ raise PatsyError(
+ "I don't know how to evaluate this " "'%s' operator" % (tree.type,),
+ tree.token,
+ )
result = self._evaluators[key](self, tree)
if require_evalexpr and not isinstance(result, IntermediateExpr):
if isinstance(result, ModelDesc):
- raise PatsyError("~ can only be used once, and "
- "only at the top level",
- tree)
+ raise PatsyError(
+ "~ can only be used once, and " "only at the top level", tree
+ )
else:
- raise PatsyError("custom operator returned an "
- "object that I don't know how to "
- "handle", tree)
+ raise PatsyError(
+ "custom operator returned an "
+ "object that I don't know how to "
+ "handle",
+ tree,
+ )
return result
+
#############
_eval_tests = {
@@ -413,7 +463,6 @@ def eval(self, tree, require_evalexpr=True):
" ": (True, []),
" \n ": (True, []),
"a": (True, ["a"]),
-
"1": (True, []),
"0": (False, []),
"- 1": (False, []),
@@ -424,30 +473,23 @@ def eval(self, tree, require_evalexpr=True):
"1 + 0": (False, []),
"1 - 0": (True, []),
"0 - 1": (False, []),
-
"1 + a": (True, ["a"]),
"0 + a": (False, ["a"]),
"a - 1": (False, ["a"]),
"a - 0": (True, ["a"]),
"1 - a": (True, []),
-
"a + b": (True, ["a", "b"]),
"(a + b)": (True, ["a", "b"]),
"a + ((((b))))": (True, ["a", "b"]),
"a + ((((+b))))": (True, ["a", "b"]),
"a + ((((b - a))))": (True, ["a", "b"]),
-
"a + a + a": (True, ["a"]),
-
"a + (b - a)": (True, ["a", "b"]),
-
"a + np.log(a, base=10)": (True, ["a", "np.log(a, base=10)"]),
# Note different spacing:
"a + np.log(a, base=10) - np . log(a , base = 10)": (True, ["a"]),
-
"a + (I(b) + c)": (True, ["a", "I(b)", "c"]),
"a + I(b + c)": (True, ["a", "I(b + c)"]),
-
"a:b": (True, [("a", "b")]),
"a:b:a": (True, [("a", "b")]),
"a:(b + c)": (True, [("a", "b"), ("a", "c")]),
@@ -456,13 +498,10 @@ def eval(self, tree, require_evalexpr=True):
"c + a:c + a:(b - c)": (True, ["c", ("a", "c"), ("a", "b")]),
"(a - b):c": (True, [("a", "c")]),
"b + b:c + (a - b):c": (True, ["b", ("b", "c"), ("a", "c")]),
-
"a:b - a:b": (True, []),
"a:b - b:a": (True, []),
-
"1 - (a + b)": (True, []),
"a + b - (a + b)": (True, []),
-
"a * b": (True, ["a", "b", ("a", "b")]),
"a * b * a": (True, ["a", "b", ("a", "b")]),
"a * (b + c)": (True, ["a", "b", "c", ("a", "b"), ("a", "c")]),
@@ -471,29 +510,50 @@ def eval(self, tree, require_evalexpr=True):
"c + a:c + a * (b - c)": (True, ["c", ("a", "c"), "a", "b", ("a", "b")]),
"(a - b) * c": (True, ["a", "c", ("a", "c")]),
"b + b:c + (a - b) * c": (True, ["b", ("b", "c"), "a", "c", ("a", "c")]),
-
"a/b": (True, ["a", ("a", "b")]),
"(a + b)/c": (True, ["a", "b", ("a", "b", "c")]),
"b + b:c + (a - b)/c": (True, ["b", ("b", "c"), "a", ("a", "c")]),
"a/(b + c)": (True, ["a", ("a", "b"), ("a", "c")]),
-
"a ** 2": (True, ["a"]),
- "(a + b + c + d) ** 2": (True, ["a", "b", "c", "d",
- ("a", "b"), ("a", "c"), ("a", "d"),
- ("b", "c"), ("b", "d"), ("c", "d")]),
- "(a + b + c + d) ** 3": (True, ["a", "b", "c", "d",
- ("a", "b"), ("a", "c"), ("a", "d"),
- ("b", "c"), ("b", "d"), ("c", "d"),
- ("a", "b", "c"), ("a", "b", "d"),
- ("a", "c", "d"), ("b", "c", "d")]),
-
+ "(a + b + c + d) ** 2": (
+ True,
+ [
+ "a",
+ "b",
+ "c",
+ "d",
+ ("a", "b"),
+ ("a", "c"),
+ ("a", "d"),
+ ("b", "c"),
+ ("b", "d"),
+ ("c", "d"),
+ ],
+ ),
+ "(a + b + c + d) ** 3": (
+ True,
+ [
+ "a",
+ "b",
+ "c",
+ "d",
+ ("a", "b"),
+ ("a", "c"),
+ ("a", "d"),
+ ("b", "c"),
+ ("b", "d"),
+ ("c", "d"),
+ ("a", "b", "c"),
+ ("a", "b", "d"),
+ ("a", "c", "d"),
+ ("b", "c", "d"),
+ ],
+ ),
"a + +a": (True, ["a"]),
-
"~ a + b": (True, ["a", "b"]),
"~ a*b": (True, ["a", "b", ("a", "b")]),
"~ a*b + 0": (False, ["a", "b", ("a", "b")]),
"~ -1": (False, []),
-
"0 ~ a + b": (True, ["a", "b"]),
"1 ~ a + b": (True, [], True, ["a", "b"]),
"y ~ a + b": (False, ["y"], True, ["a", "b"]),
@@ -501,7 +561,6 @@ def eval(self, tree, require_evalexpr=True):
"0 + y * z ~ a + b": (False, ["y", "z", ("y", "z")], True, ["a", "b"]),
"-1 ~ 1": (False, [], True, []),
"1 + y ~ a + b": (True, ["y"], True, ["a", "b"]),
-
# Check precedence:
"a + b * c": (True, ["a", "b", "c", ("b", "c")]),
"a * b + c": (True, ["a", "b", ("a", "b"), "c"]),
@@ -510,15 +569,14 @@ def eval(self, tree, require_evalexpr=True):
"a / b + c": (True, ["a", ("a", "b"), "c"]),
"a*b:c": (True, ["a", ("b", "c"), ("a", "b", "c")]),
"a:b*c": (True, [("a", "b"), "c", ("a", "b", "c")]),
-
# Intercept handling:
"~ 1 + 1 + 0 + 1": (True, []),
"~ 0 + 1 + 0": (False, []),
"~ 0 - 1 - 1 + 0 + 1": (True, []),
"~ 1 - 1": (False, []),
"~ 0 + a + 1": (True, ["a"]),
- "~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising!
- "~ 0 + (a + 1)": (True, ["a"]), # Also correct!
+ "~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising!
+ "~ 0 + (a + 1)": (True, ["a"]), # Also correct!
"~ 1 - (a + 1)": (False, []),
}
@@ -526,60 +584,46 @@ def eval(self, tree, require_evalexpr=True):
_eval_error_tests = [
"a <+>",
"a + <(>",
-
"b + <(-a)>",
-
"a:<1>",
"(a + <1>)*b",
-
"a + <2>",
"a + <1.0>",
# eh, catching this is a hassle, we'll just leave the user some rope if
# they really want it:
- #"a + <0x1>",
-
+ # "a + <0x1>",
"a ** ",
"a ** <(1 + 1)>",
"a ** <1.5>",
-
"a + b <# asdf>",
-
"<)>",
"a + <)>",
"<*> a",
"a + <*>",
-
"a + ",
"a + ",
"a + ",
-
"a + <[bar>",
"a + <{bar>",
-
"a + <{bar[]>",
-
"a + foo<]>bar",
"a + foo[]<]>bar",
"a + foo{}<}>bar",
"a + foo<)>bar",
-
"a + b<)>",
"(a) <.>",
-
"<(>a + b",
-
" ~ b",
"y ~ <(a ~ b)>",
"<~ a> ~ b",
"~ <(a ~ b)>",
-
"1 + <-(a + b)>",
-
"<- a>",
"a + <-a**2>",
]
-def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover
+
+def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover
if expected_intercept:
expecteds = [()] + expecteds
assert len(terms) == len(expecteds)
@@ -591,7 +635,8 @@ def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cove
else:
assert term == expected
-def _do_eval_formula_tests(tests): # pragma: no cover
+
+def _do_eval_formula_tests(tests): # pragma: no cover
for code, result in tests.items():
if len(result) == 2:
result = (False, []) + result
@@ -600,24 +645,24 @@ def _do_eval_formula_tests(tests): # pragma: no cover
print(result)
print(model_desc)
lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result
- _assert_terms_match(model_desc.lhs_termlist,
- lhs_intercept, lhs_termlist)
- _assert_terms_match(model_desc.rhs_termlist,
- rhs_intercept, rhs_termlist)
+ _assert_terms_match(model_desc.lhs_termlist, lhs_intercept, lhs_termlist)
+ _assert_terms_match(model_desc.rhs_termlist, rhs_intercept, rhs_termlist)
+
def test_eval_formula():
_do_eval_formula_tests(_eval_tests)
+
def test_eval_formula_error_reporting():
from patsy.parse_formula import _parsing_error_test
+
parse_fn = lambda formula: ModelDesc.from_formula(formula)
_parsing_error_test(parse_fn, _eval_error_tests)
+
def test_formula_factor_origin():
from patsy.origin import Origin
+
desc = ModelDesc.from_formula("a + b")
- assert (desc.rhs_termlist[1].factors[0].origin
- == Origin("a + b", 0, 1))
- assert (desc.rhs_termlist[2].factors[0].origin
- == Origin("a + b", 4, 5))
-
+ assert desc.rhs_termlist[1].factors[0].origin == Origin("a + b", 0, 1)
+ assert desc.rhs_termlist[2].factors[0].origin == Origin("a + b", 4, 5)
diff --git a/patsy/design_info.py b/patsy/design_info.py
index e27d382..12a9510 100644
--- a/patsy/design_info.py
+++ b/patsy/design_info.py
@@ -27,13 +27,18 @@
from patsy import PatsyError
from patsy.util import atleast_2d_column_default
from patsy.compat import OrderedDict
-from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
- safe_issubdtype,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ repr_pretty_delegate,
+ repr_pretty_impl,
+ safe_issubdtype,
+ no_pickling,
+ assert_no_pickling,
+)
from patsy.constraint import linear_constraint
from patsy.contrasts import ContrastMatrix
from patsy.desc import ModelDesc, Term
+
class FactorInfo:
"""A FactorInfo object is a simple class that provides some metadata about
the role of a factor within a model. :attr:`DesignInfo.factor_infos` is
@@ -71,46 +76,49 @@ class FactorInfo:
``None``.
"""
- def __init__(self, factor, type, state,
- num_columns=None, categories=None):
+ def __init__(self, factor, type, state, num_columns=None, categories=None):
self.factor = factor
self.type = type
if self.type not in ["numerical", "categorical"]:
- raise ValueError("FactorInfo.type must be "
- "'numerical' or 'categorical', not %r"
- % (self.type,))
+ raise ValueError(
+ "FactorInfo.type must be "
+ "'numerical' or 'categorical', not %r" % (self.type,)
+ )
self.state = state
if self.type == "numerical":
if not isinstance(num_columns, int):
- raise ValueError("For numerical factors, num_columns "
- "must be an integer")
+ raise ValueError(
+ "For numerical factors, num_columns " "must be an integer"
+ )
if categories is not None:
- raise ValueError("For numerical factors, categories "
- "must be None")
+ raise ValueError("For numerical factors, categories " "must be None")
else:
assert self.type == "categorical"
if num_columns is not None:
- raise ValueError("For categorical factors, num_columns "
- "must be None")
+ raise ValueError("For categorical factors, num_columns " "must be None")
categories = tuple(categories)
self.num_columns = num_columns
self.categories = categories
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
+
class FactorState(object):
def __repr__(self):
return ""
- kwlist = [("factor", self.factor),
- ("type", self.type),
- # Don't put the state in people's faces, it will
- # just encourage them to pay attention to the
- # contents :-). Plus it's a bunch of gobbledygook
- # they don't care about. They can always look at
- # self.state if they want to know...
- ("state", FactorState()),
- ]
+
+ kwlist = [
+ ("factor", self.factor),
+ ("type", self.type),
+ # Don't put the state in people's faces, it will
+ # just encourage them to pay attention to the
+ # contents :-). Plus it's a bunch of gobbledygook
+ # they don't care about. They can always look at
+ # self.state if they want to know...
+ ("state", FactorState()),
+ ]
if self.type == "numerical":
kwlist.append(("num_columns", self.num_columns))
else:
@@ -119,6 +127,7 @@ def __repr__(self):
__getstate__ = no_pickling
+
def test_FactorInfo():
fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
assert fi1.factor == "asdf"
@@ -141,19 +150,18 @@ def test_FactorInfo():
repr(fi2)
import pytest
+
pytest.raises(ValueError, FactorInfo, "asdf", "non-numerical", {})
pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {})
- pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {},
- num_columns="asdf")
- pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {},
- num_columns=1, categories=1)
+ pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {}, num_columns="asdf")
+ pytest.raises(
+ ValueError, FactorInfo, "asdf", "numerical", {}, num_columns=1, categories=1
+ )
pytest.raises(TypeError, FactorInfo, "asdf", "categorical", {})
- pytest.raises(ValueError, FactorInfo, "asdf", "categorical", {},
- num_columns=1)
- pytest.raises(TypeError, FactorInfo, "asdf", "categorical", {},
- categories=1)
+ pytest.raises(ValueError, FactorInfo, "asdf", "categorical", {}, num_columns=1)
+ pytest.raises(TypeError, FactorInfo, "asdf", "categorical", {}, categories=1)
class SubtermInfo:
@@ -210,23 +218,32 @@ def __init__(self, factors, contrast_matrices, num_columns):
if factor not in factor_set:
raise ValueError("Unexpected factor in contrast_matrices dict")
if not isinstance(contrast_matrix, ContrastMatrix):
- raise ValueError("Expected a ContrastMatrix, not %r"
- % (contrast_matrix,))
+ raise ValueError(
+ "Expected a ContrastMatrix, not %r" % (contrast_matrix,)
+ )
self.contrast_matrices = contrast_matrices
if not isinstance(num_columns, int):
raise ValueError("num_columns must be an integer")
self.num_columns = num_columns
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
- repr_pretty_impl(p, self, [],
- [("factors", self.factors),
- ("contrast_matrices", self.contrast_matrices),
- ("num_columns", self.num_columns)])
+ repr_pretty_impl(
+ p,
+ self,
+ [],
+ [
+ ("factors", self.factors),
+ ("contrast_matrices", self.contrast_matrices),
+ ("num_columns", self.num_columns),
+ ],
+ )
__getstate__ = no_pickling
+
def test_SubtermInfo():
cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])
s = SubtermInfo(["a", "x"], {"a": cm}, 4)
@@ -238,12 +255,14 @@ def test_SubtermInfo():
repr(s)
import pytest
+
pytest.raises(TypeError, SubtermInfo, 1, {}, 1)
pytest.raises(ValueError, SubtermInfo, ["a", "x"], 1, 1)
pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1)
pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1)
pytest.raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5)
+
class DesignInfo(object):
"""A DesignInfo object holds metadata about a design matrix.
@@ -254,14 +273,16 @@ class DesignInfo(object):
"""
- def __init__(self, column_names,
- factor_infos=None, term_codings=None):
- self.column_name_indexes = OrderedDict(zip(column_names,
- range(len(column_names))))
+ def __init__(self, column_names, factor_infos=None, term_codings=None):
+ self.column_name_indexes = OrderedDict(
+ zip(column_names, range(len(column_names)))
+ )
if (factor_infos is None) != (term_codings is None):
- raise ValueError("Must specify either both or neither of "
- "factor_infos= and term_codings=")
+ raise ValueError(
+ "Must specify either both or neither of "
+ "factor_infos= and term_codings="
+ )
self.factor_infos = factor_infos
self.term_codings = term_codings
@@ -283,8 +304,7 @@ def __init__(self, column_names,
term_factors = set(term.factors)
for subterm in subterms:
if not isinstance(subterm, SubtermInfo):
- raise ValueError("expected SubtermInfo, "
- "not %r" % (subterm,))
+ raise ValueError("expected SubtermInfo, " "not %r" % (subterm,))
if not term_factors.issuperset(subterm.factors):
raise ValueError("unexpected factors in subterm")
@@ -292,12 +312,14 @@ def __init__(self, column_names,
for term in self.term_codings:
all_factors.update(term.factors)
if all_factors != set(self.factor_infos):
- raise ValueError("Provided Term objects and factor_infos "
- "do not match")
+ raise ValueError(
+ "Provided Term objects and factor_infos " "do not match"
+ )
for factor, factor_info in self.factor_infos.items():
if not isinstance(factor_info, FactorInfo):
- raise ValueError("expected FactorInfo object, not %r"
- % (factor_info,))
+ raise ValueError(
+ "expected FactorInfo object, not %r" % (factor_info,)
+ )
if factor != factor_info.factor:
raise ValueError("mismatched factor_info.factor")
@@ -313,13 +335,17 @@ def __init__(self, column_names,
assert fi.type == "categorical"
cm = subterm.contrast_matrices[factor].matrix
if cm.shape[0] != len(fi.categories):
- raise ValueError("Mismatched contrast matrix "
- "for factor %r" % (factor,))
+ raise ValueError(
+ "Mismatched contrast matrix "
+ "for factor %r" % (factor,)
+ )
cat_factors.add(factor)
exp_cols *= cm.shape[1]
if cat_factors != set(subterm.contrast_matrices):
- raise ValueError("Mismatch between contrast_matrices "
- "and categorical factors")
+ raise ValueError(
+ "Mismatch between contrast_matrices "
+ "and categorical factors"
+ )
if exp_cols != subterm.num_columns:
raise ValueError("Unexpected num_columns")
@@ -341,11 +367,12 @@ def __init__(self, column_names,
self.term_slices[term] = slice(idx, idx + term_columns)
idx += term_columns
if idx != len(self.column_names):
- raise ValueError("mismatch between column_names and columns "
- "coded by given terms")
+ raise ValueError(
+ "mismatch between column_names and columns " "coded by given terms"
+ )
self.term_name_slices = OrderedDict(
- [(term.name(), slice_)
- for (term, slice_) in self.term_slices.items()])
+ [(term.name(), slice_) for (term, slice_) in self.term_slices.items()]
+ )
# Guarantees:
# term_name_slices is never None
@@ -356,8 +383,9 @@ def __init__(self, column_names,
# term_name_slices.
assert self.term_name_slices is not None
if self.term_slices is not None:
- assert (list(self.term_slices.values())
- == list(self.term_name_slices.values()))
+ assert list(self.term_slices.values()) == list(
+ self.term_name_slices.values()
+ )
# These checks probably aren't necessary anymore now that we always
# generate the slices ourselves, but we'll leave them in just to be
# safe.
@@ -377,12 +405,15 @@ def __init__(self, column_names,
raise ValueError("term/column name collision")
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
- repr_pretty_impl(p, self,
- [self.column_names],
- [("factor_infos", self.factor_infos),
- ("term_codings", self.term_codings)])
+ repr_pretty_impl(
+ p,
+ self,
+ [self.column_names],
+ [("factor_infos", self.factor_infos), ("term_codings", self.term_codings)],
+ )
@property
def column_names(self):
@@ -404,22 +435,30 @@ def term_names(self):
@property
def builder(self):
".. deprecated:: 0.4.0"
- warnings.warn(DeprecationWarning(
- "The DesignInfo.builder attribute is deprecated starting in "
- "patsy v0.4.0; distinct builder objects have been eliminated "
- "and design_info.builder is now just a long-winded way of "
- "writing 'design_info' (i.e. the .builder attribute just "
- "returns self)"), stacklevel=2)
+ warnings.warn(
+ DeprecationWarning(
+ "The DesignInfo.builder attribute is deprecated starting in "
+ "patsy v0.4.0; distinct builder objects have been eliminated "
+ "and design_info.builder is now just a long-winded way of "
+ "writing 'design_info' (i.e. the .builder attribute just "
+ "returns self)"
+ ),
+ stacklevel=2,
+ )
return self
@property
def design_info(self):
".. deprecated:: 0.4.0"
- warnings.warn(DeprecationWarning(
- "Starting in patsy v0.4.0, the DesignMatrixBuilder class has "
- "been merged into the DesignInfo class. So there's no need to "
- "use builder.design_info to access the DesignInfo; 'builder' "
- "already *is* a DesignInfo."), stacklevel=2)
+ warnings.warn(
+ DeprecationWarning(
+ "Starting in patsy v0.4.0, the DesignMatrixBuilder class has "
+ "been merged into the DesignInfo class. So there's no need to "
+ "use builder.design_info to access the DesignInfo; 'builder' "
+ "already *is* a DesignInfo."
+ ),
+ stacklevel=2,
+ )
return self
def slice(self, columns_specifier):
@@ -459,16 +498,14 @@ def slice(self, columns_specifier):
return columns_specifier
if np.issubdtype(type(columns_specifier), np.integer):
return slice(columns_specifier, columns_specifier + 1)
- if (self.term_slices is not None
- and columns_specifier in self.term_slices):
+ if self.term_slices is not None and columns_specifier in self.term_slices:
return self.term_slices[columns_specifier]
if columns_specifier in self.term_name_slices:
return self.term_name_slices[columns_specifier]
if columns_specifier in self.column_name_indexes:
idx = self.column_name_indexes[columns_specifier]
return slice(idx, idx + 1)
- raise PatsyError("unknown column specified '%s'"
- % (columns_specifier,))
+ raise PatsyError("unknown column specified '%s'" % (columns_specifier,))
def linear_constraint(self, constraint_likes):
"""Construct a linear constraint in matrix form from a (possibly
@@ -641,9 +678,11 @@ def subset(self, which_terms):
for f in term.factors:
new_factor_infos[f] = self.factor_infos[f]
new_term_codings[term] = self.term_codings[term]
- return DesignInfo(new_column_names,
- factor_infos=new_factor_infos,
- term_codings=new_term_codings)
+ return DesignInfo(
+ new_column_names,
+ factor_infos=new_factor_infos,
+ term_codings=new_term_codings,
+ )
@classmethod
def from_array(cls, array_like, default_column_prefix="column"):
@@ -663,41 +702,44 @@ def from_array(cls, array_like, default_column_prefix="column"):
then this will be used to construct them.
:returns: a DesignInfo object
"""
- if hasattr(array_like, "design_info") and isinstance(array_like.design_info, cls):
+ if hasattr(array_like, "design_info") and isinstance(
+ array_like.design_info, cls
+ ):
return array_like.design_info
arr = atleast_2d_column_default(array_like, preserve_pandas=True)
if arr.ndim > 2:
raise ValueError("design matrix can't have >2 dimensions")
columns = getattr(arr, "columns", range(arr.shape[1]))
- if (hasattr(columns, "dtype")
- and not safe_issubdtype(columns.dtype, np.integer)):
+ if hasattr(columns, "dtype") and not safe_issubdtype(columns.dtype, np.integer):
column_names = [str(obj) for obj in columns]
else:
- column_names = ["%s%s" % (default_column_prefix, i)
- for i in columns]
+ column_names = ["%s%s" % (default_column_prefix, i) for i in columns]
return DesignInfo(column_names)
__getstate__ = no_pickling
+
def test_DesignInfo():
import pytest
+
class _MockFactor(object):
def __init__(self, name):
self._name = name
def name(self):
return self._name
+
f_x = _MockFactor("x")
f_y = _MockFactor("y")
t_x = Term([f_x])
t_y = Term([f_y])
- factor_infos = {f_x:
- FactorInfo(f_x, "numerical", {}, num_columns=3),
- f_y:
- FactorInfo(f_y, "numerical", {}, num_columns=1),
- }
- term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
- (t_y, [SubtermInfo([f_y], {}, 1)])])
+ factor_infos = {
+ f_x: FactorInfo(f_x, "numerical", {}, num_columns=3),
+ f_y: FactorInfo(f_y, "numerical", {}, num_columns=1),
+ }
+ term_codings = OrderedDict(
+ [(t_x, [SubtermInfo([f_x], {}, 3)]), (t_y, [SubtermInfo([f_y], {}, 1)])]
+ )
di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
assert di.column_names == ["x1", "x2", "x3", "y"]
assert di.term_names == ["x", "y"]
@@ -729,10 +771,12 @@ def name(self):
assert di.term_names == ["a1", "a2", "a3", "b"]
assert di.terms is None
assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
- assert di.term_name_slices == {"a1": slice(0, 1),
- "a2": slice(1, 2),
- "a3": slice(2, 3),
- "b": slice(3, 4)}
+ assert di.term_name_slices == {
+ "a1": slice(0, 1),
+ "a2": slice(1, 2),
+ "a3": slice(2, 3),
+ "b": slice(3, 4),
+ }
assert di.term_slices is None
assert di.describe() == "a1 + a2 + a3 + b"
@@ -747,137 +791,211 @@ def name(self):
# Failure modes
# must specify either both or neither of factor_infos and term_codings:
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], term_codings=term_codings)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos=factor_infos
+ )
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3", "y"], term_codings=term_codings
+ )
# factor_infos must be a dict
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y"],
+ list(factor_infos),
+ term_codings,
+ )
# wrong number of column names:
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3"], factor_infos, term_codings)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y1", "y2"],
+ factor_infos,
+ term_codings,
+ )
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings
+ )
# name overlap problems
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "y", "y2"], factor_infos, term_codings)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "y", "y2"], factor_infos, term_codings
+ )
# duplicate name
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x1", "x1", "y"], factor_infos, term_codings)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x1", "x1", "y"], factor_infos, term_codings
+ )
# f_y is in factor_infos, but not mentioned in any term
term_codings_x_only = OrderedDict(term_codings)
del term_codings_x_only[t_y]
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3"], factor_infos, term_codings_x_only)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings_x_only
+ )
# f_a is in a term, but not in factor_infos
f_a = _MockFactor("a")
t_a = Term([f_a])
term_codings_with_a = OrderedDict(term_codings)
term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)]
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y", "a"],
- factor_infos, term_codings_with_a)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y", "a"],
+ factor_infos,
+ term_codings_with_a,
+ )
# bad factor_infos
not_factor_infos = dict(factor_infos)
not_factor_infos[f_x] = "what is this I don't even"
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], not_factor_infos, term_codings)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3", "y"], not_factor_infos, term_codings
+ )
mismatch_factor_infos = dict(factor_infos)
mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y"],
+ mismatch_factor_infos,
+ term_codings,
+ )
# bad term_codings
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y"],
+ factor_infos,
+ dict(term_codings),
+ )
not_term_codings = OrderedDict(term_codings)
not_term_codings["this is a string"] = term_codings[t_x]
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos, not_term_codings)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, not_term_codings
+ )
non_list_term_codings = OrderedDict(term_codings)
non_list_term_codings[t_y] = tuple(term_codings[t_y])
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y"],
+ factor_infos,
+ non_list_term_codings,
+ )
non_subterm_term_codings = OrderedDict(term_codings)
non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["x1", "x2", "x3", "y"],
+ factor_infos,
+ non_subterm_term_codings,
+ )
bad_subterm = OrderedDict(term_codings)
# f_x is a factor in this model, but it is not a factor in t_y
term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
- pytest.raises(ValueError, DesignInfo,
- ["x1", "x2", "x3", "y"], factor_infos, bad_subterm)
+ pytest.raises(
+ ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, bad_subterm
+ )
# contrast matrix has wrong number of rows
- factor_codings_a = {f_a:
- FactorInfo(f_a, "categorical", {},
- categories=["a1", "a2"])}
- term_codings_a_bad_rows = OrderedDict([
- (t_a,
- [SubtermInfo([f_a],
- {f_a: ContrastMatrix(np.ones((3, 2)),
- ["[1]", "[2]"])},
- 2)])])
- pytest.raises(ValueError, DesignInfo,
- ["a[1]", "a[2]"],
- factor_codings_a,
- term_codings_a_bad_rows)
+ factor_codings_a = {
+ f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"])
+ }
+ term_codings_a_bad_rows = OrderedDict(
+ [
+ (
+ t_a,
+ [
+ SubtermInfo(
+ [f_a], {f_a: ContrastMatrix(np.ones((3, 2)), ["[1]", "[2]"])}, 2
+ )
+ ],
+ )
+ ]
+ )
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["a[1]", "a[2]"],
+ factor_codings_a,
+ term_codings_a_bad_rows,
+ )
# have a contrast matrix for a non-categorical factor
t_ax = Term([f_a, f_x])
- factor_codings_ax = {f_a:
- FactorInfo(f_a, "categorical", {},
- categories=["a1", "a2"]),
- f_x:
- FactorInfo(f_x, "numerical", {},
- num_columns=2)}
- term_codings_ax_extra_cm = OrderedDict([
- (t_ax,
- [SubtermInfo([f_a, f_x],
- {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
- f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])},
- 4)])])
- pytest.raises(ValueError, DesignInfo,
- ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
- factor_codings_ax,
- term_codings_ax_extra_cm)
+ factor_codings_ax = {
+ f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"]),
+ f_x: FactorInfo(f_x, "numerical", {}, num_columns=2),
+ }
+ term_codings_ax_extra_cm = OrderedDict(
+ [
+ (
+ t_ax,
+ [
+ SubtermInfo(
+ [f_a, f_x],
+ {
+ f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
+ f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
+ },
+ 4,
+ )
+ ],
+ )
+ ]
+ )
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
+ factor_codings_ax,
+ term_codings_ax_extra_cm,
+ )
# no contrast matrix for a categorical factor
- term_codings_ax_missing_cm = OrderedDict([
- (t_ax,
- [SubtermInfo([f_a, f_x],
- {},
- 4)])])
+ term_codings_ax_missing_cm = OrderedDict([(t_ax, [SubtermInfo([f_a, f_x], {}, 4)])])
# This actually fails before it hits the relevant check with a KeyError,
# but that's okay... the previous test still exercises the check.
- pytest.raises((ValueError, KeyError), DesignInfo,
- ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
- factor_codings_ax,
- term_codings_ax_missing_cm)
+ pytest.raises(
+ (ValueError, KeyError),
+ DesignInfo,
+ ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
+ factor_codings_ax,
+ term_codings_ax_missing_cm,
+ )
# subterm num_columns doesn't match the value computed from the individual
# factors
- term_codings_ax_wrong_subterm_columns = OrderedDict([
- (t_ax,
- [SubtermInfo([f_a, f_x],
- {f_a: ContrastMatrix(np.ones((2, 3)),
- ["[1]", "[2]", "[3]"])},
- # should be 2 * 3 = 6
- 5)])])
- pytest.raises(ValueError, DesignInfo,
- ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]",
- "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"],
- factor_codings_ax,
- term_codings_ax_wrong_subterm_columns)
+ term_codings_ax_wrong_subterm_columns = OrderedDict(
+ [
+ (
+ t_ax,
+ [
+ SubtermInfo(
+ [f_a, f_x],
+ {f_a: ContrastMatrix(np.ones((2, 3)), ["[1]", "[2]", "[3]"])},
+ # should be 2 * 3 = 6
+ 5,
+ )
+ ],
+ )
+ ]
+ )
+ pytest.raises(
+ ValueError,
+ DesignInfo,
+ ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]", "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"],
+ factor_codings_ax,
+ term_codings_ax_wrong_subterm_columns,
+ )
+
def test_DesignInfo_from_array():
di = DesignInfo.from_array([1, 2, 3])
@@ -886,8 +1004,7 @@ def test_DesignInfo_from_array():
assert di2.column_names == ["column0", "column1"]
di3 = DesignInfo.from_array([1, 2, 3], default_column_prefix="x")
assert di3.column_names == ["x0"]
- di4 = DesignInfo.from_array([[1, 2], [2, 3], [3, 4]],
- default_column_prefix="x")
+ di4 = DesignInfo.from_array([[1, 2], [2, 3], [3, 4]], default_column_prefix="x")
assert di4.column_names == ["x0", "x1"]
m = DesignMatrix([1, 2, 3], di3)
assert DesignInfo.from_array(m) is di3
@@ -897,24 +1014,26 @@ def test_DesignInfo_from_array():
assert di_weird.column_names == ["column0"]
import pytest
+
pytest.raises(ValueError, DesignInfo.from_array, np.ones((2, 2, 2)))
from patsy.util import have_pandas
+
if have_pandas:
import pandas
+
# with named columns
- di5 = DesignInfo.from_array(pandas.DataFrame([[1, 2]],
- columns=["a", "b"]))
+ di5 = DesignInfo.from_array(pandas.DataFrame([[1, 2]], columns=["a", "b"]))
assert di5.column_names == ["a", "b"]
# with irregularly numbered columns
- di6 = DesignInfo.from_array(pandas.DataFrame([[1, 2]],
- columns=[0, 10]))
+ di6 = DesignInfo.from_array(pandas.DataFrame([[1, 2]], columns=[0, 10]))
assert di6.column_names == ["column0", "column10"]
# with .design_info attr
df = pandas.DataFrame([[1, 2]])
df.design_info = di6
assert DesignInfo.from_array(df) is di6
+
def test_DesignInfo_linear_constraint():
di = DesignInfo(["a1", "a2", "a3", "b"])
con = di.linear_constraint(["2 * a1 = b + 1", "a3"])
@@ -922,17 +1041,21 @@ def test_DesignInfo_linear_constraint():
assert np.all(con.coefs == [[2, 0, 0, -1], [0, 0, 1, 0]])
assert np.all(con.constants == [[1], [0]])
+
def test_DesignInfo_deprecated_attributes():
d = DesignInfo(["a1", "a2"])
+
def check(attr):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
assert getattr(d, attr) is d
assert len(w) == 1
assert w[0].category is DeprecationWarning
+
check("builder")
check("design_info")
+
# Idea: format with a reasonable amount of precision, then if that turns out
# to be higher than necessary, remove as many zeros as we can. But only do
# this while we can do it to *all* the ordinarily-formatted numbers, to keep
@@ -945,8 +1068,12 @@ def _format_float_column(precision, col):
col_strs = np.array([format_str % (x,) for x in col], dtype=object)
# Really every item should have a decimal, but just in case, we don't want
# to strip zeros off the end of "10" or something like that.
- mask = np.array([simple_float_chars.issuperset(col_str) and "." in col_str
- for col_str in col_strs])
+ mask = np.array(
+ [
+ simple_float_chars.issuperset(col_str) and "." in col_str
+ for col_str in col_strs
+ ]
+ )
mask_idxes = np.nonzero(mask)[0]
strip_char = "0"
if np.any(mask):
@@ -961,11 +1088,13 @@ def _format_float_column(precision, col):
break
return col_strs
+
def test__format_float_column():
def t(precision, numbers, expected):
got = _format_float_column(precision, np.asarray(numbers))
print(got, expected)
assert np.array_equal(got, expected)
+
# This acts weird on old python versions (e.g. it can be "-nan"), so don't
# hardcode it:
nan_string = "%.3f" % (np.nan,)
@@ -974,6 +1103,7 @@ def t(precision, numbers, expected):
t(3, [1.0001, 2, 3, np.nan], ["1", "2", "3", nan_string])
t(4, [1.0001, 2, 3, np.nan], ["1.0001", "2.0000", "3.0000", nan_string])
+
# http://docs.scipy.org/doc/numpy/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array
class DesignMatrix(np.ndarray):
"""A simple numpy array subclass that carries design matrix metadata.
@@ -997,8 +1127,7 @@ class DesignMatrix(np.ndarray):
present only on "real" DesignMatrix objects.
"""
- def __new__(cls, input_array, design_info=None,
- default_column_prefix="column"):
+ def __new__(cls, input_array, design_info=None, default_column_prefix="column"):
"""Create a DesignMatrix, or cast an existing matrix to a DesignMatrix.
A call like::
@@ -1022,8 +1151,9 @@ def __new__(cls, input_array, design_info=None,
# from turning non-design-matrix arrays into DesignMatrix
# instances. (E.g., my_dm.diagonal() will return a DesignMatrix
# object, but one without a design_info attribute.)
- if (isinstance(input_array, DesignMatrix)
- and hasattr(input_array, "design_info")):
+ if isinstance(input_array, DesignMatrix) and hasattr(
+ input_array, "design_info"
+ ):
return input_array
self = atleast_2d_column_default(input_array).view(cls)
# Upcast integer to floating point
@@ -1035,15 +1165,17 @@ def __new__(cls, input_array, design_info=None,
if design_info is None:
design_info = DesignInfo.from_array(self, default_column_prefix)
if len(design_info.column_names) != self.shape[1]:
- raise ValueError("wrong number of column names for design matrix "
- "(got %s, wanted %s)"
- % (len(design_info.column_names), self.shape[1]))
+ raise ValueError(
+ "wrong number of column names for design matrix "
+ "(got %s, wanted %s)" % (len(design_info.column_names), self.shape[1])
+ )
self.design_info = design_info
if not safe_issubdtype(self.dtype, np.floating):
raise ValueError("design matrix must be real-valued floating point")
return self
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
if not hasattr(self, "design_info"):
# Not a real DesignMatrix
@@ -1064,26 +1196,32 @@ def _repr_pretty_(self, p, cycle):
names = self.design_info.column_names
column_name_widths = [len(name) for name in names]
- min_total_width = (INDENT + SEP * (self.shape[1] - 1)
- + np.sum(column_name_widths))
+ min_total_width = (
+ INDENT + SEP * (self.shape[1] - 1) + np.sum(column_name_widths)
+ )
if min_total_width <= MAX_TOTAL_WIDTH:
printable_part = np.asarray(self)[:MAX_ROWS, :]
- formatted_cols = [_format_float_column(PRECISION,
- printable_part[:, i])
- for i in range(self.shape[1])]
+ formatted_cols = [
+ _format_float_column(PRECISION, printable_part[:, i])
+ for i in range(self.shape[1])
+ ]
+
def max_width(col):
assert col.ndim == 1
if not col.shape[0]:
return 0
else:
return max([len(s) for s in col])
+
column_num_widths = [max_width(col) for col in formatted_cols]
- column_widths = [max(name_width, num_width)
- for (name_width, num_width)
- in zip(column_name_widths, column_num_widths)]
- total_width = (INDENT + SEP * (self.shape[1] - 1)
- + np.sum(column_widths))
- print_numbers = (total_width < MAX_TOTAL_WIDTH)
+ column_widths = [
+ max(name_width, num_width)
+ for (name_width, num_width) in zip(
+ column_name_widths, column_num_widths
+ )
+ ]
+ total_width = INDENT + SEP * (self.shape[1] - 1) + np.sum(column_widths)
+ print_numbers = total_width < MAX_TOTAL_WIDTH
else:
print_numbers = False
@@ -1094,8 +1232,7 @@ def max_width(col):
sep = " " * SEP
# list() is for Py3 compatibility
for row in [names] + list(zip(*formatted_cols)):
- cells = [cell.rjust(width)
- for (width, cell) in zip(column_widths, row)]
+ cells = [cell.rjust(width) for (width, cell) in zip(column_widths, row)]
p.text(sep.join(cells))
p.text("\n" + " " * p.indentation)
if MAX_ROWS < self.shape[0]:
@@ -1134,6 +1271,7 @@ def max_width(col):
__reduce__ = no_pickling
+
def test_design_matrix():
import pytest
@@ -1145,8 +1283,7 @@ def test_design_matrix():
pytest.raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)
mm2 = DesignMatrix([[12, 14, 16, 18]])
- assert mm2.design_info.column_names == ["column0", "column1", "column2",
- "column3"]
+ assert mm2.design_info.column_names == ["column0", "column1", "column2", "column3"]
mm3 = DesignMatrix([12, 14, 16, 18])
assert mm3.shape == (4, 1)
diff --git a/patsy/eval.py b/patsy/eval.py
index 6b4f5ea..12ce0d4 100644
--- a/patsy/eval.py
+++ b/patsy/eval.py
@@ -20,10 +20,10 @@
import numbers
from patsy import PatsyError
from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling
-from patsy.tokens import (pretty_untokenize, normalize_token_spacing,
- python_tokenize)
+from patsy.tokens import pretty_untokenize, normalize_token_spacing, python_tokenize
from patsy.compat import call_and_wrap_exc
+
def _all_future_flags():
flags = 0
for feature_name in __future__.all_feature_names:
@@ -36,8 +36,10 @@ def _all_future_flags():
flags |= feature.compiler_flag
return flags
+
_ALL_FUTURE_FLAGS = _all_future_flags()
+
# This is just a minimal dict-like object that does lookup in a 'stack' of
# dicts -- first it checks the first, then the second, etc. Assignments go
# into an internal, zeroth dict.
@@ -85,6 +87,7 @@ def test_VarLookupDict():
assert "a" in ds
assert "c" not in ds
import pytest
+
pytest.raises(KeyError, ds.__getitem__, "c")
ds["a"] = 10
assert ds["a"] == 10
@@ -94,6 +97,7 @@ def test_VarLookupDict():
assert_no_pickling(ds)
+
def ast_names(code):
"""Iterator that yields all the (ast) names in a Python expression.
@@ -106,34 +110,44 @@ def ast_names(code):
for node in ast.walk(ast.parse(code)):
if isinstance(node, disallowed_ast_nodes):
- raise PatsyError("Lambda, list/dict/set comprehension, generator "
- "expression in patsy formula not currently supported.")
+ raise PatsyError(
+ "Lambda, list/dict/set comprehension, generator "
+ "expression in patsy formula not currently supported."
+ )
if isinstance(node, ast.Name):
yield node.id
+
def test_ast_names():
- test_data = [('np.log(x)', ['np', 'x']),
- ('x', ['x']),
- ('center(x + 1)', ['center', 'x']),
- ('dt.date.dt.month', ['dt'])]
+ test_data = [
+ ("np.log(x)", ["np", "x"]),
+ ("x", ["x"]),
+ ("center(x + 1)", ["center", "x"]),
+ ("dt.date.dt.month", ["dt"]),
+ ]
for code, expected in test_data:
assert set(ast_names(code)) == set(expected)
+
def test_ast_names_disallowed_nodes():
import pytest
+
def list_ast_names(code):
return list(ast_names(code))
+
pytest.raises(PatsyError, list_ast_names, "lambda x: x + y")
pytest.raises(PatsyError, list_ast_names, "[x + 1 for x in range(10)]")
pytest.raises(PatsyError, list_ast_names, "(x + 1 for x in range(10))")
pytest.raises(PatsyError, list_ast_names, "{x: True for x in range(10)}")
pytest.raises(PatsyError, list_ast_names, "{x + 1 for x in range(10)}")
+
class EvalEnvironment(object):
"""Represents a Python execution environment.
Encapsulates a namespace for variable lookup and set of __future__
flags."""
+
def __init__(self, namespaces, flags=0):
assert not flags & ~_ALL_FUTURE_FLAGS
self._namespaces = list(namespaces)
@@ -150,8 +164,7 @@ def with_outer_namespace(self, outer_namespace):
This namespace will be used only for variables that are not found in
any existing namespace, i.e., it is "outside" them all."""
- return self.__class__(self._namespaces + [outer_namespace],
- self.flags)
+ return self.__class__(self._namespaces + [outer_namespace], self.flags)
def eval(self, expr, source_name="", inner_namespace={}):
"""Evaluate some Python code in the encapsulated environment.
@@ -163,8 +176,7 @@ def eval(self, expr, source_name="", inner_namespace={}):
:returns: The value of `expr`.
"""
code = compile(expr, source_name, "eval", self.flags, False)
- return eval(code, {}, VarLookupDict([inner_namespace]
- + self._namespaces))
+ return eval(code, {}, VarLookupDict([inner_namespace] + self._namespaces))
@classmethod
def capture(cls, eval_env=0, reference=0):
@@ -216,16 +228,20 @@ def my_model(formula_like, data, eval_env=0):
elif isinstance(eval_env, numbers.Integral):
depth = eval_env + reference
else:
- raise TypeError("Parameter 'eval_env' must be either an integer "
- "or an instance of patsy.EvalEnvironment.")
+ raise TypeError(
+ "Parameter 'eval_env' must be either an integer "
+ "or an instance of patsy.EvalEnvironment."
+ )
frame = inspect.currentframe()
try:
for i in range(depth + 1):
if frame is None:
raise ValueError("call-stack is not that deep!")
frame = frame.f_back
- return cls([frame.f_locals, frame.f_globals],
- frame.f_code.co_flags & _ALL_FUTURE_FLAGS)
+ return cls(
+ [frame.f_locals, frame.f_globals],
+ frame.f_code.co_flags & _ALL_FUTURE_FLAGS,
+ )
# The try/finally is important to avoid a potential reference cycle --
# any exception traceback will carry a reference to *our* frame, which
# contains a reference to our local variables, which would otherwise
@@ -245,37 +261,42 @@ def _namespace_ids(self):
return [id(n) for n in self._namespaces]
def __eq__(self, other):
- return (isinstance(other, EvalEnvironment)
- and self.flags == other.flags
- and self._namespace_ids() == other._namespace_ids())
+ return (
+ isinstance(other, EvalEnvironment)
+ and self.flags == other.flags
+ and self._namespace_ids() == other._namespace_ids()
+ )
def __ne__(self, other):
return not self == other
def __hash__(self):
- return hash((EvalEnvironment,
- self.flags,
- tuple(self._namespace_ids())))
+ return hash((EvalEnvironment, self.flags, tuple(self._namespace_ids())))
__getstate__ = no_pickling
-def _a(): # pragma: no cover
+
+def _a(): # pragma: no cover
_a = 1
return _b()
-def _b(): # pragma: no cover
+
+def _b(): # pragma: no cover
_b = 1
return _c()
-def _c(): # pragma: no cover
+
+def _c(): # pragma: no cover
_c = 1
- return [EvalEnvironment.capture(),
- EvalEnvironment.capture(0),
- EvalEnvironment.capture(1),
- EvalEnvironment.capture(0, reference=1),
- EvalEnvironment.capture(2),
- EvalEnvironment.capture(0, 2),
- ]
+ return [
+ EvalEnvironment.capture(),
+ EvalEnvironment.capture(0),
+ EvalEnvironment.capture(1),
+ EvalEnvironment.capture(0, reference=1),
+ EvalEnvironment.capture(2),
+ EvalEnvironment.capture(0, 2),
+ ]
+
def test_EvalEnvironment_capture_namespace():
c0, c, b1, b2, a1, a2 = _a()
@@ -294,7 +315,8 @@ def test_EvalEnvironment_capture_namespace():
assert b1.namespace["_c"] is _c
assert b2.namespace["_c"] is _c
import pytest
- pytest.raises(ValueError, EvalEnvironment.capture, 10 ** 6)
+
+ pytest.raises(ValueError, EvalEnvironment.capture, 10**6)
assert EvalEnvironment.capture(b1) is b1
@@ -302,24 +324,28 @@ def test_EvalEnvironment_capture_namespace():
assert_no_pickling(EvalEnvironment.capture())
+
def test_EvalEnvironment_capture_flags():
# This is the only __future__ feature currently usable in Python
# 3... fortunately it is probably not going anywhere.
TEST_FEATURE = "barry_as_FLUFL"
test_flag = getattr(__future__, TEST_FEATURE).compiler_flag
assert test_flag & _ALL_FUTURE_FLAGS
- source = ("def f():\n"
- " in_f = 'hi from f'\n"
- " global RETURN_INNER, RETURN_OUTER, RETURN_INNER_FROM_OUTER\n"
- " RETURN_INNER = EvalEnvironment.capture(0)\n"
- " RETURN_OUTER = call_capture_0()\n"
- " RETURN_INNER_FROM_OUTER = call_capture_1()\n"
- "f()\n")
+ source = (
+ "def f():\n"
+ " in_f = 'hi from f'\n"
+ " global RETURN_INNER, RETURN_OUTER, RETURN_INNER_FROM_OUTER\n"
+ " RETURN_INNER = EvalEnvironment.capture(0)\n"
+ " RETURN_OUTER = call_capture_0()\n"
+ " RETURN_INNER_FROM_OUTER = call_capture_1()\n"
+ "f()\n"
+ )
code = compile(source, "", "exec", 0, 1)
- env = {"EvalEnvironment": EvalEnvironment,
- "call_capture_0": lambda: EvalEnvironment.capture(0),
- "call_capture_1": lambda: EvalEnvironment.capture(1),
- }
+ env = {
+ "EvalEnvironment": EvalEnvironment,
+ "call_capture_0": lambda: EvalEnvironment.capture(0),
+ "call_capture_1": lambda: EvalEnvironment.capture(1),
+ }
env2 = dict(env)
exec(code, env)
assert env["RETURN_INNER"].namespace["in_f"] == "hi from f"
@@ -329,9 +355,13 @@ def test_EvalEnvironment_capture_flags():
assert env["RETURN_OUTER"].flags & _ALL_FUTURE_FLAGS == 0
assert env["RETURN_INNER_FROM_OUTER"].flags & _ALL_FUTURE_FLAGS == 0
- code2 = compile(("from __future__ import %s\n" % (TEST_FEATURE,))
- + source,
- "", "exec", 0, 1)
+ code2 = compile(
+ ("from __future__ import %s\n" % (TEST_FEATURE,)) + source,
+ "",
+ "exec",
+ 0,
+ 1,
+ )
exec(code2, env2)
assert env2["RETURN_INNER"].namespace["in_f"] == "hi from f"
assert env2["RETURN_INNER_FROM_OUTER"].namespace["in_f"] == "hi from f"
@@ -340,11 +370,13 @@ def test_EvalEnvironment_capture_flags():
assert env2["RETURN_OUTER"].flags & _ALL_FUTURE_FLAGS == 0
assert env2["RETURN_INNER_FROM_OUTER"].flags & _ALL_FUTURE_FLAGS == test_flag
+
def test_EvalEnvironment_eval_namespace():
env = EvalEnvironment([{"a": 1}])
assert env.eval("2 * a") == 2
assert env.eval("2 * a", inner_namespace={"a": 2}) == 4
import pytest
+
pytest.raises(NameError, env.eval, "2 * b")
a = 3
env2 = EvalEnvironment.capture(0)
@@ -354,6 +386,7 @@ def test_EvalEnvironment_eval_namespace():
assert env3.eval("2 * a") == 2
assert env3.eval("2 * b") == 6
+
def test_EvalEnvironment_eval_flags():
import pytest
@@ -374,12 +407,14 @@ def test_EvalEnvironment_eval_flags():
assert env2.subset(["a"]).flags == test_flag
assert env2.with_outer_namespace({"b": 10}).flags == test_flag
+
def test_EvalEnvironment_subset():
env = EvalEnvironment([{"a": 1}, {"b": 2}, {"c": 3}])
subset_a = env.subset(["a"])
assert subset_a.eval("a") == 1
import pytest
+
pytest.raises(NameError, subset_a.eval, "b")
pytest.raises(NameError, subset_a.eval, "c")
@@ -387,6 +422,7 @@ def test_EvalEnvironment_subset():
assert subset_bc.eval("b * c") == 6
pytest.raises(NameError, subset_bc.eval, "a")
+
def test_EvalEnvironment_eq():
# Two environments are eq only if they refer to exactly the same
# global/local dicts
@@ -399,6 +435,7 @@ def test_EvalEnvironment_eq():
env4 = capture_local_env()
assert env3 != env4
+
_builtins_dict = {}
exec("from patsy.builtins import *", {}, _builtins_dict)
# This is purely to make the existence of patsy.builtins visible to systems
@@ -406,6 +443,7 @@ def test_EvalEnvironment_eq():
# that patsy.builtins will be present in sys.modules in any case.
import patsy.builtins
+
class EvalFactor(object):
def __init__(self, code, origin=None):
"""A factor class that executes arbitrary Python code and supports
@@ -440,8 +478,7 @@ def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self.code)
def __eq__(self, other):
- return (isinstance(other, EvalFactor)
- and self.code == other.code)
+ return isinstance(other, EvalFactor) and self.code == other.code
def __ne__(self, other):
return not self == other
@@ -456,13 +493,13 @@ def memorize_passes_needed(self, state, eval_env):
eval_env = eval_env.with_outer_namespace(_builtins_dict)
env_namespace = eval_env.namespace
- subset_names = [name for name in ast_names(self.code)
- if name in env_namespace]
+ subset_names = [name for name in ast_names(self.code) if name in env_namespace]
eval_env = eval_env.subset(subset_names)
state["eval_env"] = eval_env
# example code: == "2 * center(x)"
i = [0]
+
def new_name_maker(token):
value = eval_env.namespace.get(token)
if hasattr(value, "__patsy_stateful_transform__"):
@@ -473,14 +510,17 @@ def new_name_maker(token):
return obj_name + ".transform"
else:
return token
+
# example eval_code: == "2 * _patsy_stobj0__center__.transform(x)"
eval_code = replace_bare_funcalls(self.code, new_name_maker)
state["eval_code"] = eval_code
# paranoia: verify that none of our new names appeared anywhere in the
# original code
if has_bare_variable_reference(state["transforms"], self.code):
- raise PatsyError("names of this form are reserved for "
- "internal use (%s)" % (token,), token.origin)
+ raise PatsyError(
+ "names of this form are reserved for " "internal use (%s)" % (token,),
+ token.origin,
+ )
# Pull out all the '_patsy_stobj0__center__.transform(x)' pieces
# to make '_patsy_stobj0__center__.memorize_chunk(x)' pieces
state["memorize_code"] = {}
@@ -491,9 +531,11 @@ def new_name_maker(token):
transform_call_name, transform_call_code = transform_call
assert transform_call_name == obj_name + ".transform"
assert transform_call_code.startswith(transform_call_name + "(")
- memorize_code = (obj_name
- + ".memorize_chunk"
- + transform_call_code[len(transform_call_name):])
+ memorize_code = (
+ obj_name
+ + ".memorize_chunk"
+ + transform_call_code[len(transform_call_name) :]
+ )
state["memorize_code"][obj_name] = memorize_code
# Then sort the codes into bins, so that every item in bin number i
# depends only on items in bin (i-1) or less. (By 'depends', we mean
@@ -529,29 +571,28 @@ def new_name_maker(token):
def _eval(self, code, memorize_state, data):
inner_namespace = VarLookupDict([data, memorize_state["transforms"]])
- return call_and_wrap_exc("Error evaluating factor",
- self,
- memorize_state["eval_env"].eval,
- code,
- inner_namespace=inner_namespace)
+ return call_and_wrap_exc(
+ "Error evaluating factor",
+ self,
+ memorize_state["eval_env"].eval,
+ code,
+ inner_namespace=inner_namespace,
+ )
def memorize_chunk(self, state, which_pass, data):
for obj_name in state["pass_bins"][which_pass]:
- self._eval(state["memorize_code"][obj_name],
- state,
- data)
+ self._eval(state["memorize_code"][obj_name], state, data)
def memorize_finish(self, state, which_pass):
for obj_name in state["pass_bins"][which_pass]:
state["transforms"][obj_name].memorize_finish()
def eval(self, memorize_state, data):
- return self._eval(memorize_state["eval_code"],
- memorize_state,
- data)
+ return self._eval(memorize_state["eval_code"], memorize_state, data)
__getstate__ = no_pickling
+
def test_EvalFactor_basics():
e = EvalFactor("a+b")
assert e.code == "a + b"
@@ -564,8 +605,10 @@ def test_EvalFactor_basics():
assert_no_pickling(e)
+
def test_EvalFactor_memorize_passes_needed():
from patsy.state import stateful_transform
+
foo = stateful_transform(lambda: "FOO-OBJ")
bar = stateful_transform(lambda: "BAR-OBJ")
quux = stateful_transform(lambda: "QUUX-OBJ")
@@ -581,30 +624,30 @@ def test_EvalFactor_memorize_passes_needed():
assert state["eval_env"].namespace[name] is locals()[name]
for name in ["w", "x", "y", "z", "e", "state"]:
assert name not in state["eval_env"].namespace
- assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ",
- "_patsy_stobj1__bar__": "BAR-OBJ",
- "_patsy_stobj2__foo__": "FOO-OBJ",
- "_patsy_stobj3__quux__": "QUUX-OBJ"}
- assert (state["eval_code"]
- == "_patsy_stobj0__foo__.transform(x)"
- " + _patsy_stobj1__bar__.transform("
- "_patsy_stobj2__foo__.transform(y))"
- " + _patsy_stobj3__quux__.transform(z, w)")
-
- assert (state["memorize_code"]
- == {"_patsy_stobj0__foo__":
- "_patsy_stobj0__foo__.memorize_chunk(x)",
- "_patsy_stobj1__bar__":
- "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))",
- "_patsy_stobj2__foo__":
- "_patsy_stobj2__foo__.memorize_chunk(y)",
- "_patsy_stobj3__quux__":
- "_patsy_stobj3__quux__.memorize_chunk(z, w)",
- })
- assert state["pass_bins"] == [set(["_patsy_stobj0__foo__",
- "_patsy_stobj2__foo__",
- "_patsy_stobj3__quux__"]),
- set(["_patsy_stobj1__bar__"])]
+ assert state["transforms"] == {
+ "_patsy_stobj0__foo__": "FOO-OBJ",
+ "_patsy_stobj1__bar__": "BAR-OBJ",
+ "_patsy_stobj2__foo__": "FOO-OBJ",
+ "_patsy_stobj3__quux__": "QUUX-OBJ",
+ }
+ assert (
+ state["eval_code"] == "_patsy_stobj0__foo__.transform(x)"
+ " + _patsy_stobj1__bar__.transform("
+ "_patsy_stobj2__foo__.transform(y))"
+ " + _patsy_stobj3__quux__.transform(z, w)"
+ )
+
+ assert state["memorize_code"] == {
+ "_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)",
+ "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))",
+ "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)",
+ "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)",
+ }
+ assert state["pass_bins"] == [
+ set(["_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__"]),
+ set(["_patsy_stobj1__bar__"]),
+ ]
+
class _MockTransform(object):
# Adds up all memorized data, then subtracts that sum from each datum
@@ -616,6 +659,7 @@ def __init__(self):
def memorize_chunk(self, data):
self._memorize_chunk_called += 1
import numpy as np
+
self._sum += np.sum(data)
def memorize_finish(self):
@@ -624,8 +668,10 @@ def memorize_finish(self):
def transform(self, data):
return data - self._sum
+
def test_EvalFactor_end_to_end():
from patsy.state import stateful_transform
+
foo = stateful_transform(_MockTransform)
e = EvalFactor("foo(x) + foo(foo(y))")
state = {}
@@ -638,13 +684,11 @@ def test_EvalFactor_end_to_end():
for name in ["x", "y", "e", "state"]:
assert name not in state["eval_env"].namespace
import numpy as np
- e.memorize_chunk(state, 0,
- {"x": np.array([1, 2]),
- "y": np.array([10, 11])})
+
+ e.memorize_chunk(state, 0, {"x": np.array([1, 2]), "y": np.array([10, 11])})
assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 1
assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 1
- e.memorize_chunk(state, 0, {"x": np.array([12, -10]),
- "y": np.array([100, 3])})
+ e.memorize_chunk(state, 0, {"x": np.array([12, -10]), "y": np.array([100, 3])})
assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 2
assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 2
assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_finish_called == 0
@@ -654,10 +698,8 @@ def test_EvalFactor_end_to_end():
assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_finish_called == 1
assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_chunk_called == 0
assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_finish_called == 0
- e.memorize_chunk(state, 1, {"x": np.array([1, 2]),
- "y": np.array([10, 11])})
- e.memorize_chunk(state, 1, {"x": np.array([12, -10]),
- "y": np.array([100, 3])})
+ e.memorize_chunk(state, 1, {"x": np.array([1, 2]), "y": np.array([10, 11])})
+ e.memorize_chunk(state, 1, {"x": np.array([12, -10]), "y": np.array([100, 3])})
e.memorize_finish(state, 1)
for transform in state["transforms"].values():
assert transform._memorize_chunk_called == 2
@@ -671,70 +713,78 @@ def test_EvalFactor_end_to_end():
# 2: -114, -113, -24, -121
# 1: 258, 259, 348, 251
# 0 + 1: 254, 256, 355, 236
- assert np.all(e.eval(state,
- {"x": np.array([1, 2, 12, -10]),
- "y": np.array([10, 11, 100, 3])})
- == [254, 256, 355, 236])
+ assert np.all(
+ e.eval(state, {"x": np.array([1, 2, 12, -10]), "y": np.array([10, 11, 100, 3])})
+ == [254, 256, 355, 236]
+ )
+
def annotated_tokens(code):
prev_was_dot = False
it = PushbackAdapter(python_tokenize(code))
- for (token_type, token, origin) in it:
+ for token_type, token, origin in it:
props = {}
- props["bare_ref"] = (not prev_was_dot and token_type == tokenize.NAME)
- props["bare_funcall"] = (props["bare_ref"]
- and it.has_more() and it.peek()[1] == "(")
+ props["bare_ref"] = not prev_was_dot and token_type == tokenize.NAME
+ props["bare_funcall"] = (
+ props["bare_ref"] and it.has_more() and it.peek()[1] == "("
+ )
yield (token_type, token, origin, props)
- prev_was_dot = (token == ".")
+ prev_was_dot = token == "."
+
def test_annotated_tokens():
- tokens_without_origins = [(token_type, token, props)
- for (token_type, token, origin, props)
- in (annotated_tokens("a(b) + c.d"))]
- assert (tokens_without_origins
- == [(tokenize.NAME, "a", {"bare_ref": True, "bare_funcall": True}),
- (tokenize.OP, "(", {"bare_ref": False, "bare_funcall": False}),
- (tokenize.NAME, "b", {"bare_ref": True, "bare_funcall": False}),
- (tokenize.OP, ")", {"bare_ref": False, "bare_funcall": False}),
- (tokenize.OP, "+", {"bare_ref": False, "bare_funcall": False}),
- (tokenize.NAME, "c", {"bare_ref": True, "bare_funcall": False}),
- (tokenize.OP, ".", {"bare_ref": False, "bare_funcall": False}),
- (tokenize.NAME, "d",
- {"bare_ref": False, "bare_funcall": False}),
- ])
+ tokens_without_origins = [
+ (token_type, token, props)
+ for (token_type, token, origin, props) in (annotated_tokens("a(b) + c.d"))
+ ]
+ assert tokens_without_origins == [
+ (tokenize.NAME, "a", {"bare_ref": True, "bare_funcall": True}),
+ (tokenize.OP, "(", {"bare_ref": False, "bare_funcall": False}),
+ (tokenize.NAME, "b", {"bare_ref": True, "bare_funcall": False}),
+ (tokenize.OP, ")", {"bare_ref": False, "bare_funcall": False}),
+ (tokenize.OP, "+", {"bare_ref": False, "bare_funcall": False}),
+ (tokenize.NAME, "c", {"bare_ref": True, "bare_funcall": False}),
+ (tokenize.OP, ".", {"bare_ref": False, "bare_funcall": False}),
+ (tokenize.NAME, "d", {"bare_ref": False, "bare_funcall": False}),
+ ]
# This was a bug:
assert len(list(annotated_tokens("x"))) == 1
+
def has_bare_variable_reference(names, code):
- for (_, token, _, props) in annotated_tokens(code):
+ for _, token, _, props in annotated_tokens(code):
if props["bare_ref"] and token in names:
return True
return False
+
def replace_bare_funcalls(code, replacer):
tokens = []
- for (token_type, token, origin, props) in annotated_tokens(code):
+ for token_type, token, origin, props in annotated_tokens(code):
if props["bare_ref"] and props["bare_funcall"]:
token = replacer(token)
tokens.append((token_type, token))
return pretty_untokenize(tokens)
+
def test_replace_bare_funcalls():
def replacer1(token):
return {"a": "b", "foo": "_internal.foo.process"}.get(token, token)
+
def t1(code, expected):
replaced = replace_bare_funcalls(code, replacer1)
print("%r -> %r" % (code, replaced))
print("(wanted %r)" % (expected,))
assert replaced == expected
+
t1("foobar()", "foobar()")
t1("a()", "b()")
t1("foobar.a()", "foobar.a()")
t1("foo()", "_internal.foo.process()")
t1("a + 1", "a + 1")
- t1("b() + a() * x[foo(2 ** 3)]",
- "b() + b() * x[_internal.foo.process(2 ** 3)]")
+ t1("b() + a() * x[foo(2 ** 3)]", "b() + b() * x[_internal.foo.process(2 ** 3)]")
+
class _FuncallCapturer(object):
# captures the next funcall
@@ -763,25 +813,33 @@ def add_token(self, token_type, token):
if self.started and self.paren_depth == 0:
self.done = True
+
# This is not a very general function -- it assumes that all references to the
# given object are of the form '.something(method call)'.
def capture_obj_method_calls(obj_name, code):
capturers = []
- for (token_type, token, origin, props) in annotated_tokens(code):
+ for token_type, token, origin, props in annotated_tokens(code):
for capturer in capturers:
capturer.add_token(token_type, token)
if props["bare_ref"] and token == obj_name:
capturers.append(_FuncallCapturer(token_type, token))
- return [("".join(capturer.func), pretty_untokenize(capturer.tokens))
- for capturer in capturers]
+ return [
+ ("".join(capturer.func), pretty_untokenize(capturer.tokens))
+ for capturer in capturers
+ ]
+
def test_capture_obj_method_calls():
- assert (capture_obj_method_calls("foo", "a + foo.baz(bar) + b.c(d)")
- == [("foo.baz", "foo.baz(bar)")])
- assert (capture_obj_method_calls("b", "a + foo.baz(bar) + b.c(d)")
- == [("b.c", "b.c(d)")])
- assert (capture_obj_method_calls("foo", "foo.bar(foo.baz(quux))")
- == [("foo.bar", "foo.bar(foo.baz(quux))"),
- ("foo.baz", "foo.baz(quux)")])
- assert (capture_obj_method_calls("bar", "foo[bar.baz(x(z[asdf])) ** 2]")
- == [("bar.baz", "bar.baz(x(z[asdf]))")])
+ assert capture_obj_method_calls("foo", "a + foo.baz(bar) + b.c(d)") == [
+ ("foo.baz", "foo.baz(bar)")
+ ]
+ assert capture_obj_method_calls("b", "a + foo.baz(bar) + b.c(d)") == [
+ ("b.c", "b.c(d)")
+ ]
+ assert capture_obj_method_calls("foo", "foo.bar(foo.baz(quux))") == [
+ ("foo.bar", "foo.bar(foo.baz(quux))"),
+ ("foo.baz", "foo.baz(quux)"),
+ ]
+ assert capture_obj_method_calls("bar", "foo[bar.baz(x(z[asdf])) ** 2]") == [
+ ("bar.baz", "bar.baz(x(z[asdf]))")
+ ]
diff --git a/patsy/highlevel.py b/patsy/highlevel.py
index 2138367..43a000e 100644
--- a/patsy/highlevel.py
+++ b/patsy/highlevel.py
@@ -3,8 +3,7 @@
# See file LICENSE.txt for license information.
# These are made available in the patsy.* namespace:
-__all__ = ["dmatrix", "dmatrices",
- "incr_dbuilder", "incr_dbuilders"]
+__all__ = ["dmatrix", "dmatrices", "incr_dbuilder", "incr_dbuilders"]
# problems:
# statsmodels reluctant to pass around separate eval environment, suggesting
@@ -19,46 +18,51 @@
from patsy.design_info import DesignMatrix, DesignInfo
from patsy.eval import EvalEnvironment
from patsy.desc import ModelDesc
-from patsy.build import (design_matrix_builders,
- build_design_matrices)
-from patsy.util import (have_pandas, asarray_or_pandas,
- atleast_2d_column_default)
+from patsy.build import design_matrix_builders, build_design_matrices
+from patsy.util import have_pandas, asarray_or_pandas, atleast_2d_column_default
if have_pandas:
import pandas
+
# Tries to build a (lhs, rhs) design given a formula_like and an incremental
# data source. If formula_like is not capable of doing this, then returns
# None.
-def _try_incr_builders(formula_like, data_iter_maker, eval_env,
- NA_action):
+def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action):
if isinstance(formula_like, DesignInfo):
- return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0],
- formula_like)
- if (isinstance(formula_like, tuple)
+ return (
+ design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0],
+ formula_like,
+ )
+ if (
+ isinstance(formula_like, tuple)
and len(formula_like) == 2
and isinstance(formula_like[0], DesignInfo)
- and isinstance(formula_like[1], DesignInfo)):
+ and isinstance(formula_like[1], DesignInfo)
+ ):
return formula_like
if hasattr(formula_like, "__patsy_get_model_desc__"):
formula_like = formula_like.__patsy_get_model_desc__(eval_env)
if not isinstance(formula_like, ModelDesc):
- raise PatsyError("bad value from %r.__patsy_get_model_desc__"
- % (formula_like,))
+ raise PatsyError(
+ "bad value from %r.__patsy_get_model_desc__" % (formula_like,)
+ )
# fallthrough
if isinstance(formula_like, str):
formula_like = ModelDesc.from_formula(formula_like)
# fallthrough
if isinstance(formula_like, ModelDesc):
assert isinstance(eval_env, EvalEnvironment)
- return design_matrix_builders([formula_like.lhs_termlist,
- formula_like.rhs_termlist],
- data_iter_maker,
- eval_env,
- NA_action)
+ return design_matrix_builders(
+ [formula_like.lhs_termlist, formula_like.rhs_termlist],
+ data_iter_maker,
+ eval_env,
+ NA_action,
+ )
else:
return None
+
def incr_dbuilder(formula_like, data_iter_maker, eval_env=0, NA_action="drop"):
"""Construct a design matrix builder incrementally from a large data set.
@@ -96,17 +100,19 @@ def iter_maker():
The ``NA_action`` argument.
"""
eval_env = EvalEnvironment.capture(eval_env, reference=1)
- design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
- NA_action)
+ design_infos = _try_incr_builders(
+ formula_like, data_iter_maker, eval_env, NA_action
+ )
if design_infos is None:
raise PatsyError("bad formula-like object")
if len(design_infos[0].column_names) > 0:
- raise PatsyError("encountered outcome variables for a model "
- "that does not expect them")
+ raise PatsyError(
+ "encountered outcome variables for a model " "that does not expect them"
+ )
return design_infos[1]
-def incr_dbuilders(formula_like, data_iter_maker, eval_env=0,
- NA_action="drop"):
+
+def incr_dbuilders(formula_like, data_iter_maker, eval_env=0, NA_action="drop"):
"""Construct two design matrix builders incrementally from a large data
set.
@@ -114,14 +120,16 @@ def incr_dbuilders(formula_like, data_iter_maker, eval_env=0,
to :func:`dmatrix`. See :func:`incr_dbuilder` for details.
"""
eval_env = EvalEnvironment.capture(eval_env, reference=1)
- design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
- NA_action)
+ design_infos = _try_incr_builders(
+ formula_like, data_iter_maker, eval_env, NA_action
+ )
if design_infos is None:
raise PatsyError("bad formula-like object")
if len(design_infos[0].column_names) == 0:
raise PatsyError("model is missing required outcome variables")
return design_infos
+
# This always returns a length-two tuple,
# response, predictors
# where
@@ -139,34 +147,41 @@ def incr_dbuilders(formula_like, data_iter_maker, eval_env=0,
# DesignInfo
# (DesignInfo, DesignInfo)
# any object with a special method __patsy_get_model_desc__
-def _do_highlevel_design(formula_like, data, eval_env,
- NA_action, return_type):
+def _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type):
if return_type == "dataframe" and not have_pandas:
- raise PatsyError("pandas.DataFrame was requested, but pandas "
- "is not installed")
+ raise PatsyError(
+ "pandas.DataFrame was requested, but pandas " "is not installed"
+ )
if return_type not in ("matrix", "dataframe"):
- raise PatsyError("unrecognized output type %r, should be "
- "'matrix' or 'dataframe'" % (return_type,))
+ raise PatsyError(
+ "unrecognized output type %r, should be "
+ "'matrix' or 'dataframe'" % (return_type,)
+ )
+
def data_iter_maker():
return iter([data])
- design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
- NA_action)
+
+ design_infos = _try_incr_builders(
+ formula_like, data_iter_maker, eval_env, NA_action
+ )
if design_infos is not None:
- return build_design_matrices(design_infos, data,
- NA_action=NA_action,
- return_type=return_type)
+ return build_design_matrices(
+ design_infos, data, NA_action=NA_action, return_type=return_type
+ )
else:
# No builders, but maybe we can still get matrices
if isinstance(formula_like, tuple):
if len(formula_like) != 2:
- raise PatsyError("don't know what to do with a length %s "
- "matrices tuple"
- % (len(formula_like),))
+ raise PatsyError(
+ "don't know what to do with a length %s "
+ "matrices tuple" % (len(formula_like),)
+ )
(lhs, rhs) = formula_like
else:
# subok=True is necessary here to allow DesignMatrixes to pass
# through
(lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
+
# some sort of explicit matrix or matrices were given. Currently we
# have them in one of these forms:
# -- an ndarray or subclass
@@ -188,6 +203,7 @@ def _regularize_matrix(m, default_column_prefix):
return (m, orig_index)
else:
return (DesignMatrix(m, di), orig_index)
+
rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
if lhs is None:
lhs = np.zeros((rhs.shape[0], 0), dtype=float)
@@ -196,13 +212,15 @@ def _regularize_matrix(m, default_column_prefix):
assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
if lhs.shape[0] != rhs.shape[0]:
- raise PatsyError("shape mismatch: outcome matrix has %s rows, "
- "predictor matrix has %s rows"
- % (lhs.shape[0], rhs.shape[0]))
+ raise PatsyError(
+ "shape mismatch: outcome matrix has %s rows, "
+ "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0])
+ )
if rhs_orig_index is not None and lhs_orig_index is not None:
if not rhs_orig_index.equals(lhs_orig_index):
- raise PatsyError("index mismatch: outcome and "
- "predictor have incompatible indexes")
+ raise PatsyError(
+ "index mismatch: outcome and " "predictor have incompatible indexes"
+ )
if return_type == "dataframe":
if rhs_orig_index is not None and lhs_orig_index is None:
lhs.index = rhs.index
@@ -210,8 +228,8 @@ def _regularize_matrix(m, default_column_prefix):
rhs.index = lhs.index
return (lhs, rhs)
-def dmatrix(formula_like, data={}, eval_env=0,
- NA_action="drop", return_type="matrix"):
+
+def dmatrix(formula_like, data={}, eval_env=0, NA_action="drop", return_type="matrix"):
"""Construct a single design matrix given a formula_like and data.
:arg formula_like: An object that can be used to construct a design
@@ -275,15 +293,19 @@ def dmatrix(formula_like, data={}, eval_env=0,
The ``NA_action`` argument.
"""
eval_env = EvalEnvironment.capture(eval_env, reference=1)
- (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
- NA_action, return_type)
+ (lhs, rhs) = _do_highlevel_design(
+ formula_like, data, eval_env, NA_action, return_type
+ )
if lhs.shape[1] != 0:
- raise PatsyError("encountered outcome variables for a model "
- "that does not expect them")
+ raise PatsyError(
+ "encountered outcome variables for a model " "that does not expect them"
+ )
return rhs
-def dmatrices(formula_like, data={}, eval_env=0,
- NA_action="drop", return_type="matrix"):
+
+def dmatrices(
+ formula_like, data={}, eval_env=0, NA_action="drop", return_type="matrix"
+):
"""Construct two design matrices given a formula_like and data.
This function is identical to :func:`dmatrix`, except that it requires
@@ -294,8 +316,9 @@ def dmatrices(formula_like, data={}, eval_env=0,
See :func:`dmatrix` for details.
"""
eval_env = EvalEnvironment.capture(eval_env, reference=1)
- (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
- NA_action, return_type)
+ (lhs, rhs) = _do_highlevel_design(
+ formula_like, data, eval_env, NA_action, return_type
+ )
if lhs.shape[1] == 0:
raise PatsyError("model is missing required outcome variables")
return (lhs, rhs)
diff --git a/patsy/infix_parser.py b/patsy/infix_parser.py
index fb0ddff..6c127b5 100644
--- a/patsy/infix_parser.py
+++ b/patsy/infix_parser.py
@@ -32,8 +32,13 @@
from patsy import PatsyError
from patsy.origin import Origin
-from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ repr_pretty_delegate,
+ repr_pretty_impl,
+ no_pickling,
+ assert_no_pickling,
+)
+
class _UniqueValue:
def __init__(self, print_as):
@@ -44,6 +49,7 @@ def __repr__(self):
__getstate__ = no_pickling
+
class Token:
"""A token with possible payload.
@@ -52,6 +58,7 @@ class Token:
An arbitrary object indicating the type of this token. Should be
:term:`hashable`, but otherwise it can be whatever you like.
"""
+
LPAREN = _UniqueValue("LPAREN")
RPAREN = _UniqueValue("RPAREN")
@@ -61,6 +68,7 @@ def __init__(self, type, origin, extra=None):
self.extra = extra
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
assert not cycle
kwargs = []
@@ -70,6 +78,7 @@ def _repr_pretty_(self, p, cycle):
__getstate__ = no_pickling
+
class ParseNode(object):
def __init__(self, type, token, args, origin):
self.type = type
@@ -78,11 +87,13 @@ def __init__(self, type, token, args, origin):
self.origin = origin
__repr__ = repr_pretty_delegate
+
def _repr_pretty_(self, p, cycle):
return repr_pretty_impl(p, self, [self.type, self.token, self.args])
__getstate__ = no_pickling
+
class Operator(object):
def __init__(self, token_type, arity, precedence):
self.token_type = token_type
@@ -90,11 +101,16 @@ def __init__(self, token_type, arity, precedence):
self.precedence = precedence
def __repr__(self):
- return "%s(%r, %r, %r)" % (self.__class__.__name__,
- self.token_type, self.arity, self.precedence)
+ return "%s(%r, %r, %r)" % (
+ self.__class__.__name__,
+ self.token_type,
+ self.arity,
+ self.precedence,
+ )
__getstate__ = no_pickling
+
class _StackOperator(object):
def __init__(self, op, token):
self.op = op
@@ -102,8 +118,10 @@ def __init__(self, op, token):
__getstate__ = no_pickling
+
_open_paren = Operator(Token.LPAREN, -1, -9999999)
+
class _ParseContext(object):
def __init__(self, unary_ops, binary_ops, atomic_types, trace):
self.op_stack = []
@@ -115,6 +133,7 @@ def __init__(self, unary_ops, binary_ops, atomic_types, trace):
__getstate__ = no_pickling
+
def _read_noun_context(token, c):
if token.type == Token.LPAREN:
if c.trace:
@@ -129,13 +148,13 @@ def _read_noun_context(token, c):
elif token.type in c.atomic_types:
if c.trace:
print("Pushing noun %r (%r)" % (token.type, token.extra))
- c.noun_stack.append(ParseNode(token.type, token, [],
- token.origin))
+ c.noun_stack.append(ParseNode(token.type, token, [], token.origin))
return False
else:
- raise PatsyError("expected a noun, not '%s'"
- % (token.origin.relevant_code(),),
- token)
+ raise PatsyError(
+ "expected a noun, not '%s'" % (token.origin.relevant_code(),), token
+ )
+
def _run_op(c):
assert c.op_stack
@@ -146,10 +165,15 @@ def _run_op(c):
args.reverse()
if c.trace:
print("Reducing %r (%r)" % (stackop.op.token_type, args))
- node = ParseNode(stackop.op.token_type, stackop.token, args,
- Origin.combine([stackop.token] + args))
+ node = ParseNode(
+ stackop.op.token_type,
+ stackop.token,
+ args,
+ Origin.combine([stackop.token] + args),
+ )
c.noun_stack.append(node)
+
def _read_op_context(token, c):
if token.type == Token.RPAREN:
if c.trace:
@@ -161,9 +185,7 @@ def _read_op_context(token, c):
assert c.op_stack[-1].op.token_type == Token.LPAREN
# Expand the origin of the item on top of the noun stack to include
# the open and close parens:
- combined = Origin.combine([c.op_stack[-1].token,
- c.noun_stack[-1].token,
- token])
+ combined = Origin.combine([c.op_stack[-1].token, c.noun_stack[-1].token, token])
c.noun_stack[-1].origin = combined
# Pop the open-paren
c.op_stack.pop()
@@ -172,17 +194,17 @@ def _read_op_context(token, c):
if c.trace:
print("Found binary operator %r" % (token.type))
stackop = _StackOperator(c.binary_ops[token.type], token)
- while (c.op_stack
- and stackop.op.precedence <= c.op_stack[-1].op.precedence):
+ while c.op_stack and stackop.op.precedence <= c.op_stack[-1].op.precedence:
_run_op(c)
if c.trace:
print("Pushing binary operator %r" % (token.type))
c.op_stack.append(stackop)
return True
else:
- raise PatsyError("expected an operator, not '%s'"
- % (token.origin.relevant_code(),),
- token)
+ raise PatsyError(
+ "expected an operator, not '%s'" % (token.origin.relevant_code(),), token
+ )
+
def infix_parse(tokens, operators, atomic_types, trace=False):
token_source = iter(tokens)
@@ -216,8 +238,10 @@ def infix_parse(tokens, operators, atomic_types, trace=False):
print("End of token stream")
if want_noun:
- raise PatsyError("expected a noun, but instead the expression ended",
- c.op_stack[-1].token.origin)
+ raise PatsyError(
+ "expected a noun, but instead the expression ended",
+ c.op_stack[-1].token.origin,
+ )
while c.op_stack:
if c.op_stack[-1].op.token_type == Token.LPAREN:
@@ -227,28 +251,31 @@ def infix_parse(tokens, operators, atomic_types, trace=False):
assert len(c.noun_stack) == 1
return c.noun_stack.pop()
+
# Much more thorough tests in parse_formula.py, this is just a smoke test:
def test_infix_parse():
- ops = [Operator("+", 2, 10),
- Operator("*", 2, 20),
- Operator("-", 1, 30)]
+ ops = [Operator("+", 2, 10), Operator("*", 2, 20), Operator("-", 1, 30)]
atomic = ["ATOM1", "ATOM2"]
# a + -b * (c + d)
mock_origin = Origin("asdf", 2, 3)
- tokens = [Token("ATOM1", mock_origin, "a"),
- Token("+", mock_origin, "+"),
- Token("-", mock_origin, "-"),
- Token("ATOM2", mock_origin, "b"),
- Token("*", mock_origin, "*"),
- Token(Token.LPAREN, mock_origin, "("),
- Token("ATOM1", mock_origin, "c"),
- Token("+", mock_origin, "+"),
- Token("ATOM2", mock_origin, "d"),
- Token(Token.RPAREN, mock_origin, ")")]
+ tokens = [
+ Token("ATOM1", mock_origin, "a"),
+ Token("+", mock_origin, "+"),
+ Token("-", mock_origin, "-"),
+ Token("ATOM2", mock_origin, "b"),
+ Token("*", mock_origin, "*"),
+ Token(Token.LPAREN, mock_origin, "("),
+ Token("ATOM1", mock_origin, "c"),
+ Token("+", mock_origin, "+"),
+ Token("ATOM2", mock_origin, "d"),
+ Token(Token.RPAREN, mock_origin, ")"),
+ ]
tree = infix_parse(tokens, ops, atomic)
+
def te(tree, type, extra):
assert tree.type == type
assert tree.token.extra == extra
+
te(tree, "+", "+")
te(tree.args[0], "ATOM1", "a")
assert tree.args[0].args == []
@@ -261,9 +288,9 @@ def te(tree, type, extra):
te(tree.args[1].args[1].args[1], "ATOM2", "d")
import pytest
+
# No ternary ops
- pytest.raises(ValueError,
- infix_parse, [], [Operator("+", 3, 10)], ["ATOMIC"])
+ pytest.raises(ValueError, infix_parse, [], [Operator("+", 3, 10)], ["ATOMIC"])
# smoke test just to make sure there are no egregious bugs in 'trace'
infix_parse(tokens, ops, atomic, trace=True)
diff --git a/patsy/mgcv_cubic_splines.py b/patsy/mgcv_cubic_splines.py
index 3aeb5eb..7acaa38 100644
--- a/patsy/mgcv_cubic_splines.py
+++ b/patsy/mgcv_cubic_splines.py
@@ -9,8 +9,13 @@
import numpy as np
-from patsy.util import (have_pandas, atleast_2d_column_default,
- no_pickling, assert_no_pickling, safe_string_eq)
+from patsy.util import (
+ have_pandas,
+ atleast_2d_column_default,
+ no_pickling,
+ assert_no_pickling,
+ safe_string_eq,
+)
from patsy.state import stateful_transform
if have_pandas:
@@ -32,18 +37,18 @@ def _get_natural_f(knots):
"""
try:
from scipy import linalg
- except ImportError: # pragma: no cover
+ except ImportError: # pragma: no cover
raise ImportError("Cubic spline functionality requires scipy.")
h = knots[1:] - knots[:-1]
- diag = (h[:-1] + h[1:]) / 3.
- ul_diag = h[1:-1] / 6.
- banded_b = np.array([np.r_[0., ul_diag], diag, np.r_[ul_diag, 0.]])
+ diag = (h[:-1] + h[1:]) / 3.0
+ ul_diag = h[1:-1] / 6.0
+ banded_b = np.array([np.r_[0.0, ul_diag], diag, np.r_[ul_diag, 0.0]])
d = np.zeros((knots.size - 2, knots.size))
for i in range(knots.size - 2):
- d[i, i] = 1. / h[i]
- d[i, i + 2] = 1. / h[i + 1]
- d[i, i + 1] = - d[i, i] - d[i, i + 2]
+ d[i, i] = 1.0 / h[i]
+ d[i, i + 2] = 1.0 / h[i + 1]
+ d[i, i + 1] = -d[i, i] - d[i, i + 2]
fm = linalg.solve_banded((1, 1), banded_b, d)
@@ -64,9 +69,10 @@ def _map_cyclic(x, lbound, ubound):
:raise ValueError: if lbound >= ubound.
"""
if lbound >= ubound:
- raise ValueError("Invalid argument: lbound (%r) should be "
- "less than ubound (%r)."
- % (lbound, ubound))
+ raise ValueError(
+ "Invalid argument: lbound (%r) should be "
+ "less than ubound (%r)." % (lbound, ubound)
+ )
x = np.copy(x)
x[x > ubound] = lbound + (x[x > ubound] - ubound) % (ubound - lbound)
@@ -86,6 +92,7 @@ def test__map_cyclic():
def test__map_cyclic_errors():
import pytest
+
x = np.linspace(0.2, 5.7, 10)
pytest.raises(ValueError, _map_cyclic, x, 4.5, 3.6)
pytest.raises(ValueError, _map_cyclic, x, 4.5, 4.5)
@@ -106,22 +113,22 @@ def _get_cyclic_f(knots):
b = np.zeros((n, n))
d = np.zeros((n, n))
- b[0, 0] = (h[n - 1] + h[0]) / 3.
- b[0, n - 1] = h[n - 1] / 6.
- b[n - 1, 0] = h[n - 1] / 6.
+ b[0, 0] = (h[n - 1] + h[0]) / 3.0
+ b[0, n - 1] = h[n - 1] / 6.0
+ b[n - 1, 0] = h[n - 1] / 6.0
- d[0, 0] = -1. / h[0] - 1. / h[n - 1]
- d[0, n - 1] = 1. / h[n - 1]
- d[n - 1, 0] = 1. / h[n - 1]
+ d[0, 0] = -1.0 / h[0] - 1.0 / h[n - 1]
+ d[0, n - 1] = 1.0 / h[n - 1]
+ d[n - 1, 0] = 1.0 / h[n - 1]
for i in range(1, n):
- b[i, i] = (h[i - 1] + h[i]) / 3.
- b[i, i - 1] = h[i - 1] / 6.
- b[i - 1, i] = h[i - 1] / 6.
+ b[i, i] = (h[i - 1] + h[i]) / 3.0
+ b[i, i - 1] = h[i - 1] / 6.0
+ b[i - 1, i] = h[i - 1] / 6.0
- d[i, i] = -1. / h[i - 1] - 1. / h[i]
- d[i, i - 1] = 1. / h[i - 1]
- d[i - 1, i] = 1. / h[i - 1]
+ d[i, i] = -1.0 / h[i - 1] - 1.0 / h[i]
+ d[i, i - 1] = 1.0 / h[i - 1]
+ d[i - 1, i] = 1.0 / h[i - 1]
return np.linalg.solve(b, d)
@@ -153,14 +160,15 @@ def _row_tensor_product(dms):
tp_ncols = 1
for dm in dms:
if dm.shape[0] != tp_nrows:
- raise ValueError("Tensor product arguments should have "
- "same number of rows.")
+ raise ValueError(
+ "Tensor product arguments should have " "same number of rows."
+ )
tp_ncols *= dm.shape[1]
tp = np.zeros((tp_nrows, tp_ncols))
- tp[:, -dms[-1].shape[1]:] = dms[-1]
+ tp[:, -dms[-1].shape[1] :] = dms[-1]
filled_tp_ncols = dms[-1].shape[1]
for dm in dms[-2::-1]:
- p = - filled_tp_ncols * dm.shape[1]
+ p = -filled_tp_ncols * dm.shape[1]
for j in range(dm.shape[1]):
xj = dm[:, j]
for t in range(-filled_tp_ncols, 0):
@@ -173,13 +181,15 @@ def _row_tensor_product(dms):
def test__row_tensor_product_errors():
import pytest
+
pytest.raises(ValueError, _row_tensor_product, [])
pytest.raises(ValueError, _row_tensor_product, [np.arange(1, 5)])
- pytest.raises(ValueError, _row_tensor_product,
- [np.arange(1, 5), np.arange(1, 5)])
- pytest.raises(ValueError, _row_tensor_product,
- [np.arange(1, 13).reshape((3, 4)),
- np.arange(1, 13).reshape((4, 3))])
+ pytest.raises(ValueError, _row_tensor_product, [np.arange(1, 5), np.arange(1, 5)])
+ pytest.raises(
+ ValueError,
+ _row_tensor_product,
+ [np.arange(1, 13).reshape((3, 4)), np.arange(1, 13).reshape((4, 3))],
+ )
def test__row_tensor_product():
@@ -202,12 +212,10 @@ def test__row_tensor_product():
# Testing main cases
dm2 = np.array([[1, 2], [1, 2]])
dm3 = np.arange(1, 7).reshape((2, 3))
- expected_tp5 = np.array([[1, 2, 3, 2, 4, 6],
- [4, 5, 6, 8, 10, 12]])
+ expected_tp5 = np.array([[1, 2, 3, 2, 4, 6], [4, 5, 6, 8, 10, 12]])
tp5 = _row_tensor_product([dm2, dm3])
assert np.array_equal(tp5, expected_tp5)
- expected_tp6 = np.array([[1, 2, 2, 4, 3, 6],
- [4, 8, 5, 10, 6, 12]])
+ expected_tp6 = np.array([[1, 2, 2, 4, 3, 6], [4, 8, 5, 10, 6, 12]])
tp6 = _row_tensor_product([dm3, dm2])
assert np.array_equal(tp6, expected_tp6)
@@ -266,14 +274,14 @@ def _compute_base_functions(x, knots):
ajm = xj1_x / hj
ajp = x_xj / hj
- cjm_3 = xj1_x * xj1_x * xj1_x / (6. * hj)
- cjm_3[x > np.max(knots)] = 0.
- cjm_1 = hj * xj1_x / 6.
+ cjm_3 = xj1_x * xj1_x * xj1_x / (6.0 * hj)
+ cjm_3[x > np.max(knots)] = 0.0
+ cjm_1 = hj * xj1_x / 6.0
cjm = cjm_3 - cjm_1
- cjp_3 = x_xj * x_xj * x_xj / (6. * hj)
- cjp_3[x < np.min(knots)] = 0.
- cjp_1 = hj * x_xj / 6.
+ cjp_3 = x_xj * x_xj * x_xj / (6.0 * hj)
+ cjp_3[x < np.min(knots)] = 0.0
+ cjp_1 = hj * x_xj / 6.0
cjp = cjp_3 - cjp_1
return ajm, ajp, cjm, cjp, j
@@ -293,7 +301,7 @@ def _absorb_constraints(design_matrix, constraints):
"""
try:
from scipy import linalg
- except ImportError: # pragma: no cover
+ except ImportError: # pragma: no cover
raise ImportError("Cubic spline functionality requires scipy.")
m = constraints.shape[0]
@@ -338,8 +346,7 @@ def _get_free_crs_dmatrix(x, knots, cyclic=False):
else:
f = _get_natural_f(knots)
- dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \
- cjm * f[j, :].T + cjp * f[j1, :].T
+ dmt = ajm * i[j, :].T + ajp * i[j1, :].T + cjm * f[j, :].T + cjp * f[j1, :].T
return dmt.T
@@ -387,8 +394,9 @@ def _get_te_dmatrix(design_matrices, constraints=None):
# Stateful Transforms
-def _get_all_sorted_knots(x, n_inner_knots=None, inner_knots=None,
- lower_bound=None, upper_bound=None):
+def _get_all_sorted_knots(
+ x, n_inner_knots=None, inner_knots=None, lower_bound=None, upper_bound=None
+):
"""Gets all knots locations with lower and upper exterior knots included.
If needed, inner knots are computed as equally spaced quantiles of the
@@ -407,25 +415,31 @@ def _get_all_sorted_knots(x, n_inner_knots=None, inner_knots=None,
compute ``n_inner_knots + 2`` distinct knots.
"""
if lower_bound is None and x.size == 0:
- raise ValueError("Cannot set lower exterior knot location: empty "
- "input data and lower_bound not specified.")
+ raise ValueError(
+ "Cannot set lower exterior knot location: empty "
+ "input data and lower_bound not specified."
+ )
elif lower_bound is None and x.size != 0:
lower_bound = np.min(x)
if upper_bound is None and x.size == 0:
- raise ValueError("Cannot set upper exterior knot location: empty "
- "input data and upper_bound not specified.")
+ raise ValueError(
+ "Cannot set upper exterior knot location: empty "
+ "input data and upper_bound not specified."
+ )
elif upper_bound is None and x.size != 0:
upper_bound = np.max(x)
if upper_bound < lower_bound:
- raise ValueError("lower_bound > upper_bound (%r > %r)"
- % (lower_bound, upper_bound))
+ raise ValueError(
+ "lower_bound > upper_bound (%r > %r)" % (lower_bound, upper_bound)
+ )
if inner_knots is None and n_inner_knots is not None:
if n_inner_knots < 0:
- raise ValueError("Invalid requested number of inner knots: %r"
- % (n_inner_knots,))
+ raise ValueError(
+ "Invalid requested number of inner knots: %r" % (n_inner_knots,)
+ )
x = x[(lower_bound <= x) & (x <= upper_bound)]
x = np.unique(x)
@@ -437,97 +451,94 @@ def _get_all_sorted_knots(x, n_inner_knots=None, inner_knots=None,
elif n_inner_knots == 0:
inner_knots = np.array([])
else:
- raise ValueError("No data values between lower_bound(=%r) and "
- "upper_bound(=%r): cannot compute requested "
- "%r inner knot(s)."
- % (lower_bound, upper_bound, n_inner_knots))
+ raise ValueError(
+ "No data values between lower_bound(=%r) and "
+ "upper_bound(=%r): cannot compute requested "
+ "%r inner knot(s)." % (lower_bound, upper_bound, n_inner_knots)
+ )
elif inner_knots is not None:
inner_knots = np.unique(inner_knots)
if n_inner_knots is not None and n_inner_knots != inner_knots.size:
- raise ValueError("Needed number of inner knots=%r does not match "
- "provided number of inner knots=%r."
- % (n_inner_knots, inner_knots.size))
+ raise ValueError(
+ "Needed number of inner knots=%r does not match "
+ "provided number of inner knots=%r." % (n_inner_knots, inner_knots.size)
+ )
n_inner_knots = inner_knots.size
if np.any(inner_knots < lower_bound):
- raise ValueError("Some knot values (%s) fall below lower bound "
- "(%r)."
- % (inner_knots[inner_knots < lower_bound],
- lower_bound))
+ raise ValueError(
+ "Some knot values (%s) fall below lower bound "
+ "(%r)." % (inner_knots[inner_knots < lower_bound], lower_bound)
+ )
if np.any(inner_knots > upper_bound):
- raise ValueError("Some knot values (%s) fall above upper bound "
- "(%r)."
- % (inner_knots[inner_knots > upper_bound],
- upper_bound))
+ raise ValueError(
+ "Some knot values (%s) fall above upper bound "
+ "(%r)." % (inner_knots[inner_knots > upper_bound], upper_bound)
+ )
else:
raise ValueError("Must specify either 'n_inner_knots' or 'inner_knots'.")
all_knots = np.concatenate(([lower_bound, upper_bound], inner_knots))
all_knots = np.unique(all_knots)
if all_knots.size != n_inner_knots + 2:
- raise ValueError("Unable to compute n_inner_knots(=%r) + 2 distinct "
- "knots: %r data value(s) found between "
- "lower_bound(=%r) and upper_bound(=%r)."
- % (n_inner_knots, x.size, lower_bound, upper_bound))
+ raise ValueError(
+ "Unable to compute n_inner_knots(=%r) + 2 distinct "
+ "knots: %r data value(s) found between "
+ "lower_bound(=%r) and upper_bound(=%r)."
+ % (n_inner_knots, x.size, lower_bound, upper_bound)
+ )
return all_knots
def test__get_all_sorted_knots():
import pytest
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), -1)
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), 0)
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), 0, lower_bound=1)
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), 0, upper_bound=5)
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), 0, lower_bound=3, upper_bound=1)
+
+ pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), -1)
+ pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0)
+ pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=1)
+ pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, upper_bound=5)
+ pytest.raises(
+ ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=3, upper_bound=1
+ )
assert np.array_equal(
- _get_all_sorted_knots(np.array([]), 0, lower_bound=1, upper_bound=5),
- [1, 5])
- pytest.raises(ValueError, _get_all_sorted_knots,
- np.array([]), 0, lower_bound=1, upper_bound=1)
+ _get_all_sorted_knots(np.array([]), 0, lower_bound=1, upper_bound=5), [1, 5]
+ )
+ pytest.raises(
+ ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=1, upper_bound=1
+ )
x = np.arange(6) * 2
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, -2)
+ pytest.raises(ValueError, _get_all_sorted_knots, x, -2)
+ assert np.array_equal(_get_all_sorted_knots(x, 0), [0, 10])
assert np.array_equal(
- _get_all_sorted_knots(x, 0),
- [0, 10])
+ _get_all_sorted_knots(x, 0, lower_bound=3, upper_bound=8), [3, 8]
+ )
assert np.array_equal(
- _get_all_sorted_knots(x, 0, lower_bound=3, upper_bound=8),
- [3, 8])
+ _get_all_sorted_knots(x, 2, lower_bound=1, upper_bound=9), [1, 4, 6, 9]
+ )
+ pytest.raises(ValueError, _get_all_sorted_knots, x, 2, lower_bound=1, upper_bound=3)
+ pytest.raises(
+ ValueError, _get_all_sorted_knots, x, 1, lower_bound=1.3, upper_bound=1.4
+ )
assert np.array_equal(
- _get_all_sorted_knots(x, 2, lower_bound=1, upper_bound=9),
- [1, 4, 6, 9])
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, 2, lower_bound=1, upper_bound=3)
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, 1, lower_bound=1.3, upper_bound=1.4)
+ _get_all_sorted_knots(x, 1, lower_bound=1, upper_bound=3), [1, 2, 3]
+ )
+ pytest.raises(ValueError, _get_all_sorted_knots, x, 1, lower_bound=2, upper_bound=3)
+ pytest.raises(ValueError, _get_all_sorted_knots, x, 1, inner_knots=[2, 3])
+ pytest.raises(ValueError, _get_all_sorted_knots, x, lower_bound=2, upper_bound=3)
+ assert np.array_equal(_get_all_sorted_knots(x, inner_knots=[3, 7]), [0, 3, 7, 10])
assert np.array_equal(
- _get_all_sorted_knots(x, 1, lower_bound=1, upper_bound=3),
- [1, 2, 3])
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, 1, lower_bound=2, upper_bound=3)
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, 1, inner_knots=[2, 3])
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, lower_bound=2, upper_bound=3)
- assert np.array_equal(
- _get_all_sorted_knots(x, inner_knots=[3, 7]),
- [0, 3, 7, 10])
- assert np.array_equal(
- _get_all_sorted_knots(x, inner_knots=[3, 7], lower_bound=2),
- [2, 3, 7, 10])
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, inner_knots=[3, 7], lower_bound=4)
- pytest.raises(ValueError, _get_all_sorted_knots,
- x, inner_knots=[3, 7], upper_bound=6)
+ _get_all_sorted_knots(x, inner_knots=[3, 7], lower_bound=2), [2, 3, 7, 10]
+ )
+ pytest.raises(
+ ValueError, _get_all_sorted_knots, x, inner_knots=[3, 7], lower_bound=4
+ )
+ pytest.raises(
+ ValueError, _get_all_sorted_knots, x, inner_knots=[3, 7], upper_bound=6
+ )
def _get_centering_constraint_from_dmatrix(design_matrix):
- """ Computes the centering constraint from the given design matrix.
+ """Computes the centering constraint from the given design matrix.
We want to ensure that if ``b`` is the array of parameters, our
model is centered, ie ``np.mean(np.dot(design_matrix, b))`` is zero.
@@ -551,6 +562,7 @@ class CubicRegressionSpline(object):
- ``cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None)``
for cyclic cubic regression spline
"""
+
common_doc = """
:arg df: The number of degrees of freedom to use for this spline. The
return value will have this many columns. You must specify at least one
@@ -589,24 +601,31 @@ def __init__(self, name, cyclic):
self._all_knots = None
self._constraints = None
- def memorize_chunk(self, x, df=None, knots=None,
- lower_bound=None, upper_bound=None,
- constraints=None):
- args = {"df": df,
- "knots": knots,
- "lower_bound": lower_bound,
- "upper_bound": upper_bound,
- "constraints": constraints,
- }
+ def memorize_chunk(
+ self,
+ x,
+ df=None,
+ knots=None,
+ lower_bound=None,
+ upper_bound=None,
+ constraints=None,
+ ):
+ args = {
+ "df": df,
+ "knots": knots,
+ "lower_bound": lower_bound,
+ "upper_bound": upper_bound,
+ "constraints": constraints,
+ }
self._tmp["args"] = args
x = np.atleast_1d(x)
if x.ndim == 2 and x.shape[1] == 1:
x = x[:, 0]
if x.ndim > 1:
- raise ValueError("Input to %r must be 1-d, "
- "or a 2-d column vector."
- % (self._name,))
+ raise ValueError(
+ "Input to %r must be 1-d, " "or a 2-d column vector." % (self._name,)
+ )
self._tmp.setdefault("xs", []).append(x)
@@ -630,8 +649,7 @@ def memorize_finish(self):
else:
constraints = np.atleast_2d(constraints)
if constraints.ndim != 2:
- raise ValueError("Constraints must be 2-d array or "
- "1-d vector.")
+ raise ValueError("Constraints must be 2-d array or " "1-d vector.")
n_constraints = constraints.shape[0]
n_inner_knots = None
@@ -640,16 +658,20 @@ def memorize_finish(self):
if not self._cyclic and n_constraints == 0:
min_df = 2
if args["df"] < min_df:
- raise ValueError("'df'=%r must be greater than or equal to %r."
- % (args["df"], min_df))
+ raise ValueError(
+ "'df'=%r must be greater than or equal to %r."
+ % (args["df"], min_df)
+ )
n_inner_knots = args["df"] - 2 + n_constraints
if self._cyclic:
n_inner_knots += 1
- self._all_knots = _get_all_sorted_knots(x,
- n_inner_knots=n_inner_knots,
- inner_knots=args["knots"],
- lower_bound=args["lower_bound"],
- upper_bound=args["upper_bound"])
+ self._all_knots = _get_all_sorted_knots(
+ x,
+ n_inner_knots=n_inner_knots,
+ inner_knots=args["knots"],
+ lower_bound=args["lower_bound"],
+ upper_bound=args["upper_bound"],
+ )
if constraints is not None:
if safe_string_eq(constraints, "center"):
# Now we can compute centering constraints
@@ -661,24 +683,32 @@ def memorize_finish(self):
if self._cyclic:
df_before_constraints -= 1
if constraints.shape[1] != df_before_constraints:
- raise ValueError("Constraints array should have %r columns but"
- " %r found."
- % (df_before_constraints, constraints.shape[1]))
+ raise ValueError(
+ "Constraints array should have %r columns but"
+ " %r found." % (df_before_constraints, constraints.shape[1])
+ )
self._constraints = constraints
- def transform(self, x, df=None, knots=None,
- lower_bound=None, upper_bound=None,
- constraints=None):
+ def transform(
+ self,
+ x,
+ df=None,
+ knots=None,
+ lower_bound=None,
+ upper_bound=None,
+ constraints=None,
+ ):
x_orig = x
x = np.atleast_1d(x)
if x.ndim == 2 and x.shape[1] == 1:
x = x[:, 0]
if x.ndim > 1:
- raise ValueError("Input to %r must be 1-d, "
- "or a 2-d column vector."
- % (self._name,))
- dm = _get_crs_dmatrix(x, self._all_knots,
- self._constraints, cyclic=self._cyclic)
+ raise ValueError(
+ "Input to %r must be 1-d, " "or a 2-d column vector." % (self._name,)
+ )
+ dm = _get_crs_dmatrix(
+ x, self._all_knots, self._constraints, cyclic=self._cyclic
+ )
if have_pandas:
if isinstance(x_orig, (pandas.Series, pandas.DataFrame)):
dm = pandas.DataFrame(dm)
@@ -714,7 +744,8 @@ class CR(CubicRegressionSpline):
__doc__ += CubicRegressionSpline.common_doc
def __init__(self):
- CubicRegressionSpline.__init__(self, name='cr', cyclic=False)
+ CubicRegressionSpline.__init__(self, name="cr", cyclic=False)
+
cr = stateful_transform(CR)
@@ -744,26 +775,31 @@ class CC(CubicRegressionSpline):
__doc__ += CubicRegressionSpline.common_doc
def __init__(self):
- CubicRegressionSpline.__init__(self, name='cc', cyclic=True)
+ CubicRegressionSpline.__init__(self, name="cc", cyclic=True)
+
cc = stateful_transform(CC)
def test_crs_errors():
import pytest
+
# Invalid 'x' shape
pytest.raises(ValueError, cr, np.arange(16).reshape((4, 4)), df=4)
- pytest.raises(ValueError, CR().transform,
- np.arange(16).reshape((4, 4)), df=4)
+ pytest.raises(ValueError, CR().transform, np.arange(16).reshape((4, 4)), df=4)
# Should provide at least 'df' or 'knots'
pytest.raises(ValueError, cr, np.arange(50))
# Invalid constraints shape
- pytest.raises(ValueError, cr, np.arange(50), df=4,
- constraints=np.arange(27).reshape((3, 3, 3)))
+ pytest.raises(
+ ValueError,
+ cr,
+ np.arange(50),
+ df=4,
+ constraints=np.arange(27).reshape((3, 3, 3)),
+ )
# Invalid nb of columns in constraints
# (should have df + 1 = 5, but 6 provided)
- pytest.raises(ValueError, cr, np.arange(50), df=4,
- constraints=np.arange(6))
+ pytest.raises(ValueError, cr, np.arange(50), df=4, constraints=np.arange(6))
# Too small 'df' for natural cubic spline
pytest.raises(ValueError, cr, np.arange(50), df=1)
# Too small 'df' for cyclic cubic spline
@@ -772,9 +808,12 @@ def test_crs_errors():
def test_crs_compat():
from patsy.test_state import check_stateful
- from patsy.test_splines_crs_data import (R_crs_test_x,
- R_crs_test_data,
- R_crs_num_tests)
+ from patsy.test_splines_crs_data import (
+ R_crs_test_x,
+ R_crs_test_data,
+ R_crs_num_tests,
+ )
+
lines = R_crs_test_data.split("\n")
tests_ran = 0
start_idx = lines.index("--BEGIN TEST CASE--")
@@ -796,8 +835,9 @@ def test_crs_compat():
spline_type = CC
adjust_df += 1
else:
- raise ValueError("Unrecognized spline type %r"
- % (test_data["spline_type"],))
+ raise ValueError(
+ "Unrecognized spline type %r" % (test_data["spline_type"],)
+ )
kwargs = {}
if test_data["absorb_cons"] == "TRUE":
kwargs["constraints"] = "center"
@@ -818,37 +858,53 @@ def test_crs_compat():
start_idx = stop_idx + 1
assert tests_ran == R_crs_num_tests
+
test_crs_compat.slow = True
+
def test_crs_with_specific_constraint():
from patsy.highlevel import incr_dbuilder, build_design_matrices, dmatrix
- x = (-1.5)**np.arange(20)
+
+ x = (-1.5) ** np.arange(20)
# Hard coded R values for smooth: s(x, bs="cr", k=5)
# R> knots <- smooth$xp
- knots_R = np.array([-2216.837820053100585937,
- -50.456909179687500000,
- -0.250000000000000000,
- 33.637939453125000000,
- 1477.891880035400390625])
+ knots_R = np.array(
+ [
+ -2216.837820053100585937,
+ -50.456909179687500000,
+ -0.250000000000000000,
+ 33.637939453125000000,
+ 1477.891880035400390625,
+ ]
+ )
# R> centering.constraint <- t(qr.X(attr(smooth, "qrc")))
- centering_constraint_R = np.array([[0.064910676323168478574,
- 1.4519875239407085132,
- -2.1947446912471946234,
- 1.6129783104357671153,
- 0.064868180547550072235]])
+ centering_constraint_R = np.array(
+ [
+ [
+ 0.064910676323168478574,
+ 1.4519875239407085132,
+ -2.1947446912471946234,
+ 1.6129783104357671153,
+ 0.064868180547550072235,
+ ]
+ ]
+ )
# values for which we want a prediction
- new_x = np.array([-3000., -200., 300., 2000.])
- result1 = dmatrix("cr(new_x, knots=knots_R[1:-1], "
- "lower_bound=knots_R[0], upper_bound=knots_R[-1], "
- "constraints=centering_constraint_R)")
+ new_x = np.array([-3000.0, -200.0, 300.0, 2000.0])
+ result1 = dmatrix(
+ "cr(new_x, knots=knots_R[1:-1], "
+ "lower_bound=knots_R[0], upper_bound=knots_R[-1], "
+ "constraints=centering_constraint_R)"
+ )
data_chunked = [{"x": x[:10]}, {"x": x[10:]}]
new_data = {"x": new_x}
- builder = incr_dbuilder("cr(x, df=4, constraints='center')",
- lambda: iter(data_chunked))
+ builder = incr_dbuilder(
+ "cr(x, df=4, constraints='center')", lambda: iter(data_chunked)
+ )
result2 = build_design_matrices([builder], new_data)[0]
- assert np.allclose(result1, result2, rtol=1e-12, atol=0.)
+ assert np.allclose(result1, result2, rtol=1e-12, atol=0.0)
class TE(object):
@@ -888,20 +944,22 @@ class TE(object):
.. versionadded:: 0.3.0
"""
+
def __init__(self):
self._tmp = {}
self._constraints = None
def memorize_chunk(self, *args, **kwargs):
- constraints = self._tmp.setdefault("constraints",
- kwargs.get("constraints"))
+ constraints = self._tmp.setdefault("constraints", kwargs.get("constraints"))
if safe_string_eq(constraints, "center"):
args_2d = []
for arg in args:
arg = atleast_2d_column_default(arg)
if arg.ndim != 2:
- raise ValueError("Each tensor product argument must be "
- "a 2-d array or 1-d vector.")
+ raise ValueError(
+ "Each tensor product argument must be "
+ "a 2-d array or 1-d vector."
+ )
args_2d.append(arg)
tp = _row_tensor_product(args_2d)
@@ -924,8 +982,7 @@ def memorize_finish(self):
else:
constraints = np.atleast_2d(constraints)
if constraints.ndim != 2:
- raise ValueError("Constraints must be 2-d array or "
- "1-d vector.")
+ raise ValueError("Constraints must be 2-d array or " "1-d vector.")
self._constraints = constraints
@@ -934,159 +991,263 @@ def transform(self, *args, **kwargs):
for arg in args:
arg = atleast_2d_column_default(arg)
if arg.ndim != 2:
- raise ValueError("Each tensor product argument must be "
- "a 2-d array or 1-d vector.")
+ raise ValueError(
+ "Each tensor product argument must be " "a 2-d array or 1-d vector."
+ )
args_2d.append(arg)
return _get_te_dmatrix(args_2d, self._constraints)
__getstate__ = no_pickling
+
te = stateful_transform(TE)
def test_te_errors():
import pytest
+
x = np.arange(27)
# Invalid input shape
pytest.raises(ValueError, te, x.reshape((3, 3, 3)))
- pytest.raises(ValueError, te, x.reshape((3, 3, 3)), constraints='center')
+ pytest.raises(ValueError, te, x.reshape((3, 3, 3)), constraints="center")
# Invalid constraints shape
- pytest.raises(ValueError, te, x,
- constraints=np.arange(8).reshape((2, 2, 2)))
+ pytest.raises(ValueError, te, x, constraints=np.arange(8).reshape((2, 2, 2)))
def test_te_1smooth():
from patsy.splines import bs
+
# Tensor product of 1 smooth covariate should be the same
# as the smooth alone
- x = (-1.5)**np.arange(20)
+ x = (-1.5) ** np.arange(20)
assert np.allclose(cr(x, df=6), te(cr(x, df=6)))
assert np.allclose(cc(x, df=5), te(cc(x, df=5)))
assert np.allclose(bs(x, df=4), te(bs(x, df=4)))
# Adding centering constraint to tensor product
- assert np.allclose(cr(x, df=3, constraints='center'),
- te(cr(x, df=4), constraints='center'))
+ assert np.allclose(
+ cr(x, df=3, constraints="center"), te(cr(x, df=4), constraints="center")
+ )
# Adding specific constraint
center_constraint = np.arange(1, 5)
- assert np.allclose(cr(x, df=3, constraints=center_constraint),
- te(cr(x, df=4), constraints=center_constraint))
+ assert np.allclose(
+ cr(x, df=3, constraints=center_constraint),
+ te(cr(x, df=4), constraints=center_constraint),
+ )
def test_te_2smooths():
from patsy.highlevel import incr_dbuilder, build_design_matrices
- x1 = (-1.5)**np.arange(20)
- x2 = (1.6)**np.arange(20)
+
+ x1 = (-1.5) ** np.arange(20)
+ x2 = (1.6) ** np.arange(20)
# Hard coded R results for smooth: te(x1, x2, bs=c("cs", "cc"), k=c(5,7))
# Without centering constraint:
- dmatrix_R_nocons = \
- np.array([[-4.4303024184609255207e-06, 7.9884438387230142235e-06,
- 9.7987758194797719025e-06, -7.2894213245475212959e-08,
- 1.5907686862964493897e-09, -3.2565884983072595159e-11,
- 0.0170749607855874667439, -3.0788499835965849050e-02,
- -3.7765754357352458725e-02, 2.8094376299826799787e-04,
- -6.1310290747349201414e-06, 1.2551314933193442915e-07,
- -0.26012671685838206770, 4.6904420337437874311e-01,
- 0.5753384627946153129230, -4.2800085814700449330e-03,
- 9.3402525733484874533e-05, -1.9121170389937518131e-06,
- -0.0904312240489447832781, 1.6305991924427923334e-01,
- 2.0001237112941641638e-01, -1.4879148887003382663e-03,
- 3.2470731316462736135e-05, -6.6473404365914134499e-07,
- 2.0447857920168824846e-05, -3.6870296695050991799e-05,
- -4.5225801045409022233e-05, 3.3643990293641665710e-07,
- -7.3421200200015877329e-09, 1.5030635073660743297e-10],
- [-9.4006130602653794302e-04, 7.8681398069163730347e-04,
- 2.4573006857381437217e-04, -1.4524712230452725106e-04,
- 7.8216741353106329551e-05, -3.1304283003914264551e-04,
- 3.6231183382798337611064, -3.0324832476174168328e+00,
- -9.4707559178211142559e-01, 5.5980126937492580286e-01,
- -3.0145747744342332730e-01, 1.2065077148806895302e+00,
- -35.17561267504181188315, 2.9441339255948005160e+01,
- 9.1948319320782125885216, -5.4349184288245195873e+00,
- 2.9267472035096449012e+00, -1.1713569391233907169e+01,
- 34.0275626863976370373166, -2.8480442582712722555e+01,
- -8.8947340548151565542e+00, 5.2575353623762932642e+00,
- -2.8312249982592527786e+00, 1.1331265795534763541e+01,
- 7.9462158845078978420e-01, -6.6508361863670617531e-01,
- -2.0771242914526857892e-01, 1.2277550230353953542e-01,
- -6.6115593588420035198e-02, 2.6461103043402139923e-01]])
+ dmatrix_R_nocons = np.array(
+ [
+ [
+ -4.4303024184609255207e-06,
+ 7.9884438387230142235e-06,
+ 9.7987758194797719025e-06,
+ -7.2894213245475212959e-08,
+ 1.5907686862964493897e-09,
+ -3.2565884983072595159e-11,
+ 0.0170749607855874667439,
+ -3.0788499835965849050e-02,
+ -3.7765754357352458725e-02,
+ 2.8094376299826799787e-04,
+ -6.1310290747349201414e-06,
+ 1.2551314933193442915e-07,
+ -0.26012671685838206770,
+ 4.6904420337437874311e-01,
+ 0.5753384627946153129230,
+ -4.2800085814700449330e-03,
+ 9.3402525733484874533e-05,
+ -1.9121170389937518131e-06,
+ -0.0904312240489447832781,
+ 1.6305991924427923334e-01,
+ 2.0001237112941641638e-01,
+ -1.4879148887003382663e-03,
+ 3.2470731316462736135e-05,
+ -6.6473404365914134499e-07,
+ 2.0447857920168824846e-05,
+ -3.6870296695050991799e-05,
+ -4.5225801045409022233e-05,
+ 3.3643990293641665710e-07,
+ -7.3421200200015877329e-09,
+ 1.5030635073660743297e-10,
+ ],
+ [
+ -9.4006130602653794302e-04,
+ 7.8681398069163730347e-04,
+ 2.4573006857381437217e-04,
+ -1.4524712230452725106e-04,
+ 7.8216741353106329551e-05,
+ -3.1304283003914264551e-04,
+ 3.6231183382798337611064,
+ -3.0324832476174168328e00,
+ -9.4707559178211142559e-01,
+ 5.5980126937492580286e-01,
+ -3.0145747744342332730e-01,
+ 1.2065077148806895302e00,
+ -35.17561267504181188315,
+ 2.9441339255948005160e01,
+ 9.1948319320782125885216,
+ -5.4349184288245195873e00,
+ 2.9267472035096449012e00,
+ -1.1713569391233907169e01,
+ 34.0275626863976370373166,
+ -2.8480442582712722555e01,
+ -8.8947340548151565542e00,
+ 5.2575353623762932642e00,
+ -2.8312249982592527786e00,
+ 1.1331265795534763541e01,
+ 7.9462158845078978420e-01,
+ -6.6508361863670617531e-01,
+ -2.0771242914526857892e-01,
+ 1.2277550230353953542e-01,
+ -6.6115593588420035198e-02,
+ 2.6461103043402139923e-01,
+ ],
+ ]
+ )
# With centering constraint:
- dmatrix_R_cons = \
- np.array([[0.00329998606323867252343, 1.6537431155796576600e-04,
- -1.2392262709790753433e-04, 6.5405304166706783407e-05,
- -6.6764045799537624095e-05, -0.1386431081763726258504,
- 0.124297283800864313830, -3.5487293655619825405e-02,
- -3.0527115315785902268e-03, 5.2009247643311604277e-04,
- -0.00384203992301702674378, -0.058901915802819435064,
- 0.266422358491648914036, 0.5739281693874087597607,
- -1.3171008503525844392e-03, 8.2573456631878912413e-04,
- 6.6730833453016958831e-03, -0.1467677784718444955470,
- 0.220757650934837484913, 0.1983127687880171796664,
- -1.6269930328365173316e-03, -1.7785892412241208812e-03,
- -3.2702835436351201243e-03, -4.3252183044300757109e-02,
- 4.3403766976235179376e-02, 3.5973406402893762387e-05,
- -5.4035858568225075046e-04, 2.9565209382794241247e-04,
- -2.2769990750264097637e-04],
- [0.41547954838956052681098, 1.9843570584107707994e-02,
- -1.5746590234791378593e-02, 8.3171184312221431434e-03,
- -8.7233014052017516377e-03, -15.9926770785086258541696,
- 16.503663226274017716833, -6.6005803955894726265e-01,
- 1.3986092022708346283e-01, -2.3516913533670955050e-01,
- 0.72251037497207359905360, -9.827337059999853963177,
- 3.917078117294827688255, 9.0171773596973618936090,
- -5.0616811270787671617e+00, 3.0189990249009683865e+00,
- -1.0872720629943064097e+01, 26.9308504460453121964747,
- -21.212262927009287949431, -9.1088328555582247503253,
- 5.2400156972500298025e+00, -3.0593641098325474736e+00,
- 1.0919392118399086300e+01, -4.6564290223265718538e+00,
- 4.8071307441606982991e+00, -1.9748377005689798924e-01,
- 5.4664183716965096538e-02, -2.8871392916916285148e-02,
- 2.3592766838010845176e-01]])
+ dmatrix_R_cons = np.array(
+ [
+ [
+ 0.00329998606323867252343,
+ 1.6537431155796576600e-04,
+ -1.2392262709790753433e-04,
+ 6.5405304166706783407e-05,
+ -6.6764045799537624095e-05,
+ -0.1386431081763726258504,
+ 0.124297283800864313830,
+ -3.5487293655619825405e-02,
+ -3.0527115315785902268e-03,
+ 5.2009247643311604277e-04,
+ -0.00384203992301702674378,
+ -0.058901915802819435064,
+ 0.266422358491648914036,
+ 0.5739281693874087597607,
+ -1.3171008503525844392e-03,
+ 8.2573456631878912413e-04,
+ 6.6730833453016958831e-03,
+ -0.1467677784718444955470,
+ 0.220757650934837484913,
+ 0.1983127687880171796664,
+ -1.6269930328365173316e-03,
+ -1.7785892412241208812e-03,
+ -3.2702835436351201243e-03,
+ -4.3252183044300757109e-02,
+ 4.3403766976235179376e-02,
+ 3.5973406402893762387e-05,
+ -5.4035858568225075046e-04,
+ 2.9565209382794241247e-04,
+ -2.2769990750264097637e-04,
+ ],
+ [
+ 0.41547954838956052681098,
+ 1.9843570584107707994e-02,
+ -1.5746590234791378593e-02,
+ 8.3171184312221431434e-03,
+ -8.7233014052017516377e-03,
+ -15.9926770785086258541696,
+ 16.503663226274017716833,
+ -6.6005803955894726265e-01,
+ 1.3986092022708346283e-01,
+ -2.3516913533670955050e-01,
+ 0.72251037497207359905360,
+ -9.827337059999853963177,
+ 3.917078117294827688255,
+ 9.0171773596973618936090,
+ -5.0616811270787671617e00,
+ 3.0189990249009683865e00,
+ -1.0872720629943064097e01,
+ 26.9308504460453121964747,
+ -21.212262927009287949431,
+ -9.1088328555582247503253,
+ 5.2400156972500298025e00,
+ -3.0593641098325474736e00,
+ 1.0919392118399086300e01,
+ -4.6564290223265718538e00,
+ 4.8071307441606982991e00,
+ -1.9748377005689798924e-01,
+ 5.4664183716965096538e-02,
+ -2.8871392916916285148e-02,
+ 2.3592766838010845176e-01,
+ ],
+ ]
+ )
new_x1 = np.array([11.390625, 656.84083557128906250])
new_x2 = np.array([16.777216000000006346, 1844.6744073709567147])
new_data = {"x1": new_x1, "x2": new_x2}
- data_chunked = [{"x1": x1[:10], "x2": x2[:10]},
- {"x1": x1[10:], "x2": x2[10:]}]
+ data_chunked = [{"x1": x1[:10], "x2": x2[:10]}, {"x1": x1[10:], "x2": x2[10:]}]
- builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6)) - 1",
- lambda: iter(data_chunked))
+ builder = incr_dbuilder(
+ "te(cr(x1, df=5), cc(x2, df=6)) - 1", lambda: iter(data_chunked)
+ )
dmatrix_nocons = build_design_matrices([builder], new_data)[0]
- assert np.allclose(dmatrix_nocons, dmatrix_R_nocons, rtol=1e-12, atol=0.)
+ assert np.allclose(dmatrix_nocons, dmatrix_R_nocons, rtol=1e-12, atol=0.0)
- builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6), "
- "constraints='center') - 1",
- lambda: iter(data_chunked))
+ builder = incr_dbuilder(
+ "te(cr(x1, df=5), cc(x2, df=6), " "constraints='center') - 1",
+ lambda: iter(data_chunked),
+ )
dmatrix_cons = build_design_matrices([builder], new_data)[0]
- assert np.allclose(dmatrix_cons, dmatrix_R_cons, rtol=1e-12, atol=0.)
+ assert np.allclose(dmatrix_cons, dmatrix_R_cons, rtol=1e-12, atol=0.0)
def test_te_3smooths():
from patsy.highlevel import incr_dbuilder, build_design_matrices
- x1 = (-1.5)**np.arange(20)
- x2 = (1.6)**np.arange(20)
- x3 = (-1.2)**np.arange(20)
+
+ x1 = (-1.5) ** np.arange(20)
+ x2 = (1.6) ** np.arange(20)
+ x3 = (-1.2) ** np.arange(20)
# Hard coded R results for smooth: te(x1, x2, x3, bs=c("cr", "cs", "cc"), k=c(3,3,4))
- design_matrix_R = \
- np.array([[7.2077663709837084334e-05, 2.0648333344343273131e-03,
- -4.7934014082310591768e-04, 2.3923430783992746568e-04,
- 6.8534265421922660466e-03, -1.5909867344112936776e-03,
- -6.8057712777151204314e-09, -1.9496724335203412851e-07,
- 4.5260614658693259131e-08, 0.0101479754187435277507,
- 0.290712501531622591333, -0.067487370093906928759,
- 0.03368233306025386619709, 0.9649092451763204847381,
- -0.2239985793289433757547, -9.5819975394704535133e-07,
- -2.7449874082511405643e-05, 6.3723431275833230217e-06,
- -1.5205851762850489204e-04, -0.00435607204539782688624,
- 0.00101123909269346416370, -5.0470024059694933508e-04,
- -1.4458319360584082416e-02, 3.3564223914790921634e-03,
- 1.4357783514933466209e-08, 4.1131230514870551983e-07,
- -9.5483976834512651038e-08]])
- new_data = {"x1": -38.443359375000000000,
- "x2": 68.719476736000032702,
- "x3": -5.1597803519999985156}
- data_chunked = [{"x1": x1[:10], "x2": x2[:10], "x3": x3[:10]},
- {"x1": x1[10:], "x2": x2[10:], "x3": x3[10:]}]
- builder = incr_dbuilder("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3)) - 1",
- lambda: iter(data_chunked))
+ design_matrix_R = np.array(
+ [
+ [
+ 7.2077663709837084334e-05,
+ 2.0648333344343273131e-03,
+ -4.7934014082310591768e-04,
+ 2.3923430783992746568e-04,
+ 6.8534265421922660466e-03,
+ -1.5909867344112936776e-03,
+ -6.8057712777151204314e-09,
+ -1.9496724335203412851e-07,
+ 4.5260614658693259131e-08,
+ 0.0101479754187435277507,
+ 0.290712501531622591333,
+ -0.067487370093906928759,
+ 0.03368233306025386619709,
+ 0.9649092451763204847381,
+ -0.2239985793289433757547,
+ -9.5819975394704535133e-07,
+ -2.7449874082511405643e-05,
+ 6.3723431275833230217e-06,
+ -1.5205851762850489204e-04,
+ -0.00435607204539782688624,
+ 0.00101123909269346416370,
+ -5.0470024059694933508e-04,
+ -1.4458319360584082416e-02,
+ 3.3564223914790921634e-03,
+ 1.4357783514933466209e-08,
+ 4.1131230514870551983e-07,
+ -9.5483976834512651038e-08,
+ ]
+ ]
+ )
+ new_data = {
+ "x1": -38.443359375000000000,
+ "x2": 68.719476736000032702,
+ "x3": -5.1597803519999985156,
+ }
+ data_chunked = [
+ {"x1": x1[:10], "x2": x2[:10], "x3": x3[:10]},
+ {"x1": x1[10:], "x2": x2[10:], "x3": x3[10:]},
+ ]
+ builder = incr_dbuilder(
+ "te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3)) - 1", lambda: iter(data_chunked)
+ )
design_matrix = build_design_matrices([builder], new_data)[0]
- assert np.allclose(design_matrix, design_matrix_R, rtol=1e-12, atol=0.)
+ assert np.allclose(design_matrix, design_matrix_R, rtol=1e-12, atol=0.0)
diff --git a/patsy/missing.py b/patsy/missing.py
index 3235739..b4d8a01 100644
--- a/patsy/missing.py
+++ b/patsy/missing.py
@@ -38,17 +38,19 @@
import numpy as np
from patsy import PatsyError
-from patsy.util import (safe_isnan, safe_scalar_isnan,
- no_pickling, assert_no_pickling)
+from patsy.util import safe_isnan, safe_scalar_isnan, no_pickling, assert_no_pickling
# These are made available in the patsy.* namespace
__all__ = ["NAAction"]
_valid_NA_types = ["None", "NaN"]
_valid_NA_responses = ["raise", "drop"]
+
+
def _desc_options(options):
return ", ".join([repr(opt) for opt in options])
+
class NAAction(object):
"""An :class:`NAAction` object defines a strategy for handling missing
data.
@@ -85,6 +87,7 @@ class NAAction(object):
instance of this class, or your own object that implements the same
interface, and pass that as the ``NA_action=`` argument instead.
"""
+
def __init__(self, on_NA="drop", NA_types=["None", "NaN"]):
"""The :class:`NAAction` constructor takes the following arguments:
@@ -104,17 +107,19 @@ def __init__(self, on_NA="drop", NA_types=["None", "NaN"]):
"""
self.on_NA = on_NA
if self.on_NA not in _valid_NA_responses:
- raise ValueError("invalid on_NA action %r "
- "(should be one of %s)"
- % (on_NA, _desc_options(_valid_NA_responses)))
+ raise ValueError(
+ "invalid on_NA action %r "
+ "(should be one of %s)" % (on_NA, _desc_options(_valid_NA_responses))
+ )
if isinstance(NA_types, str):
raise ValueError("NA_types should be a list of strings")
self.NA_types = tuple(NA_types)
for NA_type in self.NA_types:
if NA_type not in _valid_NA_types:
- raise ValueError("invalid NA_type %r "
- "(should be one of %s)"
- % (NA_type, _desc_options(_valid_NA_types)))
+ raise ValueError(
+ "invalid NA_type %r "
+ "(should be one of %s)" % (NA_type, _desc_options(_valid_NA_types))
+ )
def is_categorical_NA(self, obj):
"""Return True if `obj` is a categorical NA value.
@@ -163,7 +168,7 @@ def handle_NA(self, values, is_NAs, origins):
return self._handle_NA_raise(values, is_NAs, origins)
elif self.on_NA == "drop":
return self._handle_NA_drop(values, is_NAs, origins)
- else: # pragma: no cover
+ else: # pragma: no cover
assert False
def _handle_NA_raise(self, values, is_NAs, origins):
@@ -182,14 +187,17 @@ def _handle_NA_drop(self, values, is_NAs, origins):
__getstate__ = no_pickling
+
def test_NAAction_basic():
import pytest
+
pytest.raises(ValueError, NAAction, on_NA="pord")
pytest.raises(ValueError, NAAction, NA_types=("NaN", "asdf"))
pytest.raises(ValueError, NAAction, NA_types="NaN")
assert_no_pickling(NAAction())
+
def test_NAAction_NA_types_numerical():
for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]:
action = NAAction(NA_types=NA_types)
@@ -206,6 +214,7 @@ def test_NAAction_NA_types_numerical():
got_NA_mask = action.is_numerical_NA(arr)
assert np.array_equal(got_NA_mask, exp_NA_mask)
+
def test_NAAction_NA_types_categorical():
for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]:
action = NAAction(NA_types=NA_types)
@@ -214,47 +223,45 @@ def test_NAAction_NA_types_categorical():
assert action.is_categorical_NA(None) == ("None" in NA_types)
assert action.is_categorical_NA(np.nan) == ("NaN" in NA_types)
+
def test_NAAction_drop():
action = NAAction("drop")
- in_values = [np.asarray([-1, 2, -1, 4, 5]),
- np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]),
- np.asarray([[1.0, np.nan],
- [3.0, 4.0],
- [10.0, 5.0],
- [6.0, 7.0],
- [8.0, np.nan]]),
- ]
- is_NAs = [np.asarray([True, False, True, False, False]),
- np.zeros(5, dtype=bool),
- np.asarray([True, False, False, False, True]),
- ]
+ in_values = [
+ np.asarray([-1, 2, -1, 4, 5]),
+ np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]),
+ np.asarray([[1.0, np.nan], [3.0, 4.0], [10.0, 5.0], [6.0, 7.0], [8.0, np.nan]]),
+ ]
+ is_NAs = [
+ np.asarray([True, False, True, False, False]),
+ np.zeros(5, dtype=bool),
+ np.asarray([True, False, False, False, True]),
+ ]
out_values = action.handle_NA(in_values, is_NAs, [None] * 3)
assert len(out_values) == 3
assert np.array_equal(out_values[0], [2, 4])
assert np.array_equal(out_values[1], [20.0, 40.0])
assert np.array_equal(out_values[2], [[3.0, 4.0], [6.0, 7.0]])
+
def test_NAAction_raise():
action = NAAction(on_NA="raise")
# no-NA just passes through:
- in_arrs = [np.asarray([1.1, 1.2]),
- np.asarray([1, 2])]
+ in_arrs = [np.asarray([1.1, 1.2]), np.asarray([1, 2])]
is_NAs = [np.asarray([False, False])] * 2
got_arrs = action.handle_NA(in_arrs, is_NAs, [None, None])
assert np.array_equal(got_arrs[0], in_arrs[0])
assert np.array_equal(got_arrs[1], in_arrs[1])
from patsy.origin import Origin
+
o1 = Origin("asdf", 0, 1)
o2 = Origin("asdf", 2, 3)
# NA raises an error with a correct origin
in_idx = np.arange(2)
- in_arrs = [np.asarray([1.1, 1.2]),
- np.asarray([1.0, np.nan])]
- is_NAs = [np.asarray([False, False]),
- np.asarray([False, True])]
+ in_arrs = [np.asarray([1.1, 1.2]), np.asarray([1.0, np.nan])]
+ is_NAs = [np.asarray([False, False]), np.asarray([False, True])]
try:
action.handle_NA(in_arrs, is_NAs, [o1, o2])
assert False
diff --git a/patsy/origin.py b/patsy/origin.py
index 68ed71a..fcabf21 100644
--- a/patsy/origin.py
+++ b/patsy/origin.py
@@ -10,6 +10,7 @@
# These are made available in the patsy.* namespace
__all__ = ["Origin"]
+
class Origin(object):
"""This represents the origin of some object in some string.
@@ -52,7 +53,7 @@ def combine(cls, origin_objs):
* ``None``
* An object that has a ``.origin`` attribute which fulfills the above
criteria.
-
+
Returns either an Origin object, or None.
"""
origins = []
@@ -73,13 +74,15 @@ def combine(cls, origin_objs):
def relevant_code(self):
"""Extracts and returns the span of the original code represented by
this Origin. Example: ``x1``."""
- return self.code[self.start:self.end]
+ return self.code[self.start : self.end]
def __eq__(self, other):
- return (isinstance(other, Origin)
- and self.code == other.code
- and self.start == other.start
- and self.end == other.end)
+ return (
+ isinstance(other, Origin)
+ and self.code == other.code
+ and self.start == other.start
+ and self.end == other.end
+ )
def __ne__(self, other):
return not self == other
@@ -98,24 +101,28 @@ def caretize(self, indent=0):
indented by this much. The returned string does not have a trailing
newline.
"""
- return ("%s%s\n%s%s%s"
- % (" " * indent,
- self.code,
- " " * indent,
- " " * self.start,
- "^" * (self.end - self.start)))
+ return "%s%s\n%s%s%s" % (
+ " " * indent,
+ self.code,
+ " " * indent,
+ " " * self.start,
+ "^" * (self.end - self.start),
+ )
def __repr__(self):
return "%s<-%s (%s-%s)>" % (
- self.code[:self.start],
- self.code[self.start:self.end],
- self.code[self.end:],
- self.start, self.end)
+ self.code[: self.start],
+ self.code[self.start : self.end],
+ self.code[self.end :],
+ self.start,
+ self.end,
+ )
# We reimplement patsy.util.no_pickling, to avoid circular import issues
def __getstate__(self):
raise NotImplementedError
+
def test_Origin():
o1 = Origin("012345", 2, 4)
o2 = Origin("012345", 4, 5)
@@ -131,6 +138,7 @@ def test_Origin():
class ObjWithOrigin(object):
def __init__(self, origin=None):
self.origin = origin
+
o4 = Origin.combine([ObjWithOrigin(o1), ObjWithOrigin(), None])
assert o4 == o1
o5 = Origin.combine([ObjWithOrigin(o1), o2])
@@ -139,4 +147,5 @@ def __init__(self, origin=None):
assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None
from patsy.util import assert_no_pickling
+
assert_no_pickling(Origin("", 0, 0))
diff --git a/patsy/parse_formula.py b/patsy/parse_formula.py
index afab2d4..8d0c615 100644
--- a/patsy/parse_formula.py
+++ b/patsy/parse_formula.py
@@ -1,4 +1,4 @@
- # This file is part of Patsy
+# This file is part of Patsy
# Copyright (C) 2011 Nathaniel Smith
# See file LICENSE.txt for license information.
@@ -23,6 +23,7 @@
_atomic_token_types = ["PYTHON_EXPR", "ZERO", "ONE", "NUMBER"]
+
def _is_a(f, v):
try:
f(v)
@@ -31,6 +32,7 @@ def _is_a(f, v):
else:
return True
+
# Helper function for _tokenize_formula:
def _read_python_expr(it, end_tokens):
# Read out a full python expression, stopping when we hit an
@@ -66,16 +68,18 @@ def _read_python_expr(it, end_tokens):
token_type = "PYTHON_EXPR"
return Token(token_type, Origin.combine(origins), extra=expr_text)
else:
- raise PatsyError("unclosed bracket in embedded Python "
- "expression",
- Origin.combine(origins))
+ raise PatsyError(
+ "unclosed bracket in embedded Python " "expression", Origin.combine(origins)
+ )
+
def _tokenize_formula(code, operator_strings):
assert "(" not in operator_strings
assert ")" not in operator_strings
- magic_token_types = {"(": Token.LPAREN,
- ")": Token.RPAREN,
- }
+ magic_token_types = {
+ "(": Token.LPAREN,
+ ")": Token.RPAREN,
+ }
for operator_string in operator_strings:
magic_token_types[operator_string] = operator_string
# Once we enter a Python expression, a ( does not end it, but any other
@@ -91,46 +95,48 @@ def _tokenize_formula(code, operator_strings):
it.push_back((pytype, token_string, origin))
yield _read_python_expr(it, end_tokens)
+
def test__tokenize_formula():
code = "y ~ a + (foo(b,c + 2)) + -1 + 0 + 10"
tokens = list(_tokenize_formula(code, ["+", "-", "~"]))
- expecteds = [("PYTHON_EXPR", Origin(code, 0, 1), "y"),
- ("~", Origin(code, 2, 3), None),
- ("PYTHON_EXPR", Origin(code, 4, 5), "a"),
- ("+", Origin(code, 6, 7), None),
- (Token.LPAREN, Origin(code, 8, 9), None),
- ("PYTHON_EXPR", Origin(code, 9, 23), "foo(b, c + 2)"),
- (Token.RPAREN, Origin(code, 23, 24), None),
- ("+", Origin(code, 25, 26), None),
- ("-", Origin(code, 27, 28), None),
- ("ONE", Origin(code, 28, 29), "1"),
- ("+", Origin(code, 30, 31), None),
- ("ZERO", Origin(code, 32, 33), "0"),
- ("+", Origin(code, 34, 35), None),
- ("NUMBER", Origin(code, 36, 38), "10"),
- ]
+ expecteds = [
+ ("PYTHON_EXPR", Origin(code, 0, 1), "y"),
+ ("~", Origin(code, 2, 3), None),
+ ("PYTHON_EXPR", Origin(code, 4, 5), "a"),
+ ("+", Origin(code, 6, 7), None),
+ (Token.LPAREN, Origin(code, 8, 9), None),
+ ("PYTHON_EXPR", Origin(code, 9, 23), "foo(b, c + 2)"),
+ (Token.RPAREN, Origin(code, 23, 24), None),
+ ("+", Origin(code, 25, 26), None),
+ ("-", Origin(code, 27, 28), None),
+ ("ONE", Origin(code, 28, 29), "1"),
+ ("+", Origin(code, 30, 31), None),
+ ("ZERO", Origin(code, 32, 33), "0"),
+ ("+", Origin(code, 34, 35), None),
+ ("NUMBER", Origin(code, 36, 38), "10"),
+ ]
for got, expected in zip(tokens, expecteds):
assert isinstance(got, Token)
assert got.type == expected[0]
assert got.origin == expected[1]
assert got.extra == expected[2]
+
_unary_tilde = Operator("~", 1, -100)
_default_ops = [
_unary_tilde,
Operator("~", 2, -100),
-
Operator("+", 2, 100),
Operator("-", 2, 100),
Operator("*", 2, 200),
Operator("/", 2, 200),
Operator(":", 2, 300),
Operator("**", 2, 500),
-
Operator("+", 1, 100),
Operator("-", 1, 100),
]
+
def parse_formula(code, extra_operators=[]):
if not code.strip():
code = "~ 1"
@@ -141,35 +147,31 @@ def parse_formula(code, extra_operators=[]):
operators = _default_ops + extra_operators
operator_strings = [op.token_type for op in operators]
- tree = infix_parse(_tokenize_formula(code, operator_strings),
- operators,
- _atomic_token_types)
+ tree = infix_parse(
+ _tokenize_formula(code, operator_strings), operators, _atomic_token_types
+ )
if not isinstance(tree, ParseNode) or tree.type != "~":
tree = ParseNode("~", None, [tree], tree.origin)
return tree
+
#############
_parser_tests = {
"": ["~", "1"],
" ": ["~", "1"],
" \n ": ["~", "1"],
-
"1": ["~", "1"],
"a": ["~", "a"],
"a ~ b": ["~", "a", "b"],
-
"(a ~ b)": ["~", "a", "b"],
"a ~ ((((b))))": ["~", "a", "b"],
"a ~ ((((+b))))": ["~", "a", ["+", "b"]],
-
"a + b + c": ["~", ["+", ["+", "a", "b"], "c"]],
"a + (b ~ c) + d": ["~", ["+", ["+", "a", ["~", "b", "c"]], "d"]],
-
"a + np.log(a, base=10)": ["~", ["+", "a", "np.log(a, base=10)"]],
# Note different spacing:
"a + np . log(a , base = 10)": ["~", ["+", "a", "np.log(a, base=10)"]],
-
# Check precedence
"a + b ~ c * d": ["~", ["+", "a", "b"], ["*", "c", "d"]],
"a + b * c": ["~", ["+", "a", ["*", "b", "c"]]],
@@ -178,12 +180,11 @@ def parse_formula(code, extra_operators=[]):
"a + b:c": ["~", ["+", "a", [":", "b", "c"]]],
"(a + b):c": ["~", [":", ["+", "a", "b"], "c"]],
"a*b:c": ["~", ["*", "a", [":", "b", "c"]]],
-
"a+b / c": ["~", ["+", "a", ["/", "b", "c"]]],
"~ a": ["~", "a"],
-
"-1": ["~", ["-", "1"]],
- }
+}
+
def _compare_trees(got, expected):
assert isinstance(got, ParseNode)
@@ -195,6 +196,7 @@ def _compare_trees(got, expected):
assert got.type in _atomic_token_types
assert got.token.extra == expected
+
def _do_parse_test(test_cases, extra_operators):
for code, expected in test_cases.items():
actual = parse_formula(code, extra_operators=extra_operators)
@@ -202,9 +204,11 @@ def _do_parse_test(test_cases, extra_operators):
print(actual)
_compare_trees(actual, expected)
+
def test_parse_formula():
_do_parse_test(_parser_tests, [])
+
def test_parse_origin():
tree = parse_formula("a ~ b + c")
assert tree.origin == Origin("a ~ b + c", 0, 9)
@@ -215,43 +219,36 @@ def test_parse_origin():
assert tree.args[1].args[0].origin == Origin("a ~ b + c", 4, 5)
assert tree.args[1].args[1].origin == Origin("a ~ b + c", 8, 9)
+
# <> mark off where the error should be reported:
_parser_error_tests = [
"a <+>",
"a + <(>",
-
"a + b <# asdf>",
-
"<)>",
"a + <)>",
"<*> a",
"a + <*>",
-
"a + ",
"a + ",
"a + ",
-
"a + <[bar>",
"a + <{bar>",
-
"a + <{bar[]>",
-
"a + foo<]>bar",
"a + foo[]<]>bar",
"a + foo{}<}>bar",
"a + foo<)>bar",
-
"a + b<)>",
"(a) <.>",
-
"<(>a + b",
-
- "a +< >'foo", # Not the best placement for the error
+ "a +< >'foo", # Not the best placement for the error
]
+
# Split out so it can also be used by tests of the evaluator (which also
# raises PatsyError's)
-def _parsing_error_test(parse_fn, error_descs): # pragma: no cover
+def _parsing_error_test(parse_fn, error_descs): # pragma: no cover
for error_desc in error_descs:
letters = []
start = None
@@ -277,20 +274,22 @@ def _parsing_error_test(parse_fn, error_descs): # pragma: no cover
else:
assert False, "parser failed to report an error!"
+
def test_parse_errors(extra_operators=[]):
def parse_fn(code):
return parse_formula(code, extra_operators=extra_operators)
+
_parsing_error_test(parse_fn, _parser_error_tests)
+
_extra_op_parser_tests = {
"a | b": ["~", ["|", "a", "b"]],
"a * b|c": ["~", ["*", "a", ["|", "b", "c"]]],
- }
+}
+
def test_parse_extra_op():
extra_operators = [Operator("|", 2, 250)]
- _do_parse_test(_parser_tests,
- extra_operators=extra_operators)
- _do_parse_test(_extra_op_parser_tests,
- extra_operators=extra_operators)
+ _do_parse_test(_parser_tests, extra_operators=extra_operators)
+ _do_parse_test(_extra_op_parser_tests, extra_operators=extra_operators)
test_parse_errors(extra_operators=extra_operators)
diff --git a/patsy/redundancy.py b/patsy/redundancy.py
index c428bdf..c81d439 100644
--- a/patsy/redundancy.py
+++ b/patsy/redundancy.py
@@ -42,6 +42,7 @@
from patsy.util import no_pickling
+
# This should really be a named tuple, but those don't exist until Python
# 2.6...
class _ExpandedFactor(object):
@@ -49,6 +50,7 @@ class _ExpandedFactor(object):
full-rank (includes_intercept=True) or not.
These objects are treated as immutable."""
+
def __init__(self, includes_intercept, factor):
self.includes_intercept = includes_intercept
self.factor = factor
@@ -57,9 +59,11 @@ def __hash__(self):
return hash((_ExpandedFactor, self.includes_intercept, self.factor))
def __eq__(self, other):
- return (isinstance(other, _ExpandedFactor)
- and other.includes_intercept == self.includes_intercept
- and other.factor == self.factor)
+ return (
+ isinstance(other, _ExpandedFactor)
+ and other.includes_intercept == self.includes_intercept
+ and other.factor == self.factor
+ )
def __ne__(self, other):
return not self == other
@@ -73,15 +77,18 @@ def __repr__(self):
__getstate__ = no_pickling
+
class _Subterm(object):
"Also immutable."
+
def __init__(self, efactors):
self.efactors = frozenset(efactors)
def can_absorb(self, other):
# returns True if 'self' is like a-:b-, and 'other' is like a-
- return (len(self.efactors) - len(other.efactors) == 1
- and self.efactors.issuperset(other.efactors))
+ return len(self.efactors) - len(
+ other.efactors
+ ) == 1 and self.efactors.issuperset(other.efactors)
def absorb(self, other):
diff = self.efactors.difference(other.efactors)
@@ -96,8 +103,7 @@ def __hash__(self):
return hash((_Subterm, self.efactors))
def __eq__(self, other):
- return (isinstance(other, _Subterm)
- and self.efactors == self.efactors)
+ return isinstance(other, _Subterm) and self.efactors == self.efactors
def __ne__(self, other):
return not self == other
@@ -107,6 +113,7 @@ def __repr__(self):
__getstate__ = no_pickling
+
# For testing: takes a shorthand description of a list of subterms like
# [(), ("a-",), ("a-", "b+")]
# and expands it into a list of _Subterm and _ExpandedFactor objects.
@@ -116,11 +123,11 @@ def _expand_test_abbrevs(short_subterms):
factors = []
for factor_name in subterm:
assert factor_name[-1] in ("+", "-")
- factors.append(_ExpandedFactor(factor_name[-1] == "+",
- factor_name[:-1]))
+ factors.append(_ExpandedFactor(factor_name[-1] == "+", factor_name[:-1]))
subterms.append(_Subterm(factors))
return subterms
+
def test__Subterm():
s_ab = _expand_test_abbrevs([["a-", "b-"]])[0]
s_abc = _expand_test_abbrevs([["a-", "b-", "c-"]])[0]
@@ -134,6 +141,7 @@ def test__Subterm():
assert s_ab.can_absorb(s_a)
assert s_ab.absorb(s_a) == s_abp
+
# Importantly, this preserves the order of the input. Both the items inside
# each subset are in the order they were in the original tuple, and the tuples
# are emitted so that they're sorted with respect to their elements position
@@ -147,6 +155,7 @@ def helper(seq):
for subset in _subsets_sorted(seq[1:]):
yield subset
yield (obj,) + subset
+
# Transform each obj -> (idx, obj) tuple, so that we can later sort them
# by their position in the original list.
expanded = list(enumerate(tupl))
@@ -159,29 +168,41 @@ def helper(seq):
# And finally, we strip off the idx's:
for subset in expanded_subsets:
yield tuple([obj for (idx, obj) in subset])
-
+
+
def test__subsets_sorted():
assert list(_subsets_sorted((1, 2))) == [(), (1,), (2,), (1, 2)]
- assert (list(_subsets_sorted((1, 2, 3)))
- == [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)])
- assert len(list(_subsets_sorted(range(5)))) == 2 ** 5
+ assert list(_subsets_sorted((1, 2, 3))) == [
+ (),
+ (1,),
+ (2,),
+ (3,),
+ (1, 2),
+ (1, 3),
+ (2, 3),
+ (1, 2, 3),
+ ]
+ assert len(list(_subsets_sorted(range(5)))) == 2**5
+
def _simplify_one_subterm(subterms):
# We simplify greedily from left to right.
# Returns True if succeeded, False otherwise
for short_i, short_subterm in enumerate(subterms):
- for long_i, long_subterm in enumerate(subterms[short_i + 1:]):
+ for long_i, long_subterm in enumerate(subterms[short_i + 1 :]):
if long_subterm.can_absorb(short_subterm):
new_subterm = long_subterm.absorb(short_subterm)
subterms[short_i + 1 + long_i] = new_subterm
subterms.pop(short_i)
return True
return False
-
+
+
def _simplify_subterms(subterms):
while _simplify_one_subterm(subterms):
pass
+
def test__simplify_subterms():
def t(given, expected):
given = _expand_test_abbrevs(given)
@@ -189,12 +210,14 @@ def t(given, expected):
print("testing if:", given, "->", expected)
_simplify_subterms(given)
assert given == expected
+
t([("a-",)], [("a-",)])
t([(), ("a-",)], [("a+",)])
t([(), ("a-",), ("b-",), ("a-", "b-")], [("a+", "b+")])
t([(), ("a-",), ("a-", "b-")], [("a+",), ("a-", "b-")])
t([("a-",), ("b-",), ("a-", "b-")], [("b-",), ("a-", "b+")])
+
# 'term' is a Term
# 'numeric_factors' is any set-like object which lists the
# numeric/non-categorical factors in this term. Such factors are just
@@ -235,8 +258,10 @@ def pick_contrasts_for_term(term, numeric_factors, used_subterms):
factor_codings.append(factor_coding)
return factor_codings
+
def test_pick_contrasts_for_term():
from patsy.desc import Term
+
used = set()
codings = pick_contrasts_for_term(Term([]), set(), used)
assert codings == [{}]
diff --git a/patsy/splines.py b/patsy/splines.py
index 2644900..6504b98 100644
--- a/patsy/splines.py
+++ b/patsy/splines.py
@@ -15,10 +15,11 @@
if have_pandas:
import pandas
+
def _eval_bspline_basis(x, knots, degree):
try:
from scipy.interpolate import splev
- except ImportError: # pragma: no cover
+ except ImportError: # pragma: no cover
raise ImportError("spline functionality requires scipy")
# 'knots' are assumed to be already pre-processed. E.g. usually you
# want to include duplicate copies of boundary knots; you should do
@@ -36,9 +37,11 @@ def _eval_bspline_basis(x, knots, degree):
# this and decide what to do with it, I'm going to play it safe and
# disallow such points.
if np.min(x) < np.min(knots) or np.max(x) > np.max(knots):
- raise NotImplementedError("some data points fall outside the "
- "outermost knots, and I'm not sure how "
- "to handle them. (Patches accepted!)")
+ raise NotImplementedError(
+ "some data points fall outside the "
+ "outermost knots, and I'm not sure how "
+ "to handle them. (Patches accepted!)"
+ )
# Thanks to Charles Harris for explaining splev. It's not well
# documented, but basically it computes an arbitrary b-spline basis
# given knots and degree on some specified points (or derivatives
@@ -59,21 +62,26 @@ def _eval_bspline_basis(x, knots, degree):
basis[:, i] = splev(x, (knots, coefs, degree))
return basis
+
def _R_compat_quantile(x, probs):
- #return np.percentile(x, 100 * np.asarray(probs))
+ # return np.percentile(x, 100 * np.asarray(probs))
probs = np.asarray(probs)
- quantiles = np.asarray([np.percentile(x, 100 * prob)
- for prob in probs.ravel(order="C")])
+ quantiles = np.asarray(
+ [np.percentile(x, 100 * prob) for prob in probs.ravel(order="C")]
+ )
return quantiles.reshape(probs.shape, order="C")
+
def test__R_compat_quantile():
def t(x, prob, expected):
assert np.allclose(_R_compat_quantile(x, prob), expected)
+
t([10, 20], 0.5, 15)
t([10, 20], 0.3, 13)
t([10, 20], [0.3, 0.7], [13, 17])
t(list(range(10)), [0.3, 0.7], [2.7, 6.3])
+
class BS(object):
"""bs(x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None)
@@ -133,29 +141,37 @@ class BS(object):
.. versionadded:: 0.2.0
"""
+
def __init__(self):
self._tmp = {}
self._degree = None
self._all_knots = None
- def memorize_chunk(self, x, df=None, knots=None, degree=3,
- include_intercept=False,
- lower_bound=None, upper_bound=None):
- args = {"df": df,
- "knots": knots,
- "degree": degree,
- "include_intercept": include_intercept,
- "lower_bound": lower_bound,
- "upper_bound": upper_bound,
- }
+ def memorize_chunk(
+ self,
+ x,
+ df=None,
+ knots=None,
+ degree=3,
+ include_intercept=False,
+ lower_bound=None,
+ upper_bound=None,
+ ):
+ args = {
+ "df": df,
+ "knots": knots,
+ "degree": degree,
+ "include_intercept": include_intercept,
+ "lower_bound": lower_bound,
+ "upper_bound": upper_bound,
+ }
self._tmp["args"] = args
# XX: check whether we need x values before saving them
x = np.atleast_1d(x)
if x.ndim == 2 and x.shape[1] == 1:
x = x[:, 0]
if x.ndim > 1:
- raise ValueError("input to 'bs' must be 1-d, "
- "or a 2-d column vector")
+ raise ValueError("input to 'bs' must be 1-d, " "or a 2-d column vector")
# There's no better way to compute exact quantiles than memorizing
# all data.
self._tmp.setdefault("xs", []).append(x)
@@ -166,11 +182,11 @@ def memorize_finish(self):
del self._tmp
if args["degree"] < 0:
- raise ValueError("degree must be greater than 0 (not %r)"
- % (args["degree"],))
+ raise ValueError(
+ "degree must be greater than 0 (not %r)" % (args["degree"],)
+ )
if int(args["degree"]) != args["degree"]:
- raise ValueError("degree must be an integer (not %r)"
- % (self._degree,))
+ raise ValueError("degree must be an integer (not %r)" % (self._degree,))
# These are guaranteed to all be 1d vectors by the code above
x = np.concatenate(tmp["xs"])
@@ -182,20 +198,31 @@ def memorize_finish(self):
if not args["include_intercept"]:
n_inner_knots += 1
if n_inner_knots < 0:
- raise ValueError("df=%r is too small for degree=%r and "
- "include_intercept=%r; must be >= %s"
- % (args["df"], args["degree"],
- args["include_intercept"],
- # We know that n_inner_knots is negative;
- # if df were that much larger, it would
- # have been zero, and things would work.
- args["df"] - n_inner_knots))
+ raise ValueError(
+ "df=%r is too small for degree=%r and "
+ "include_intercept=%r; must be >= %s"
+ % (
+ args["df"],
+ args["degree"],
+ args["include_intercept"],
+ # We know that n_inner_knots is negative;
+ # if df were that much larger, it would
+ # have been zero, and things would work.
+ args["df"] - n_inner_knots,
+ )
+ )
if args["knots"] is not None:
if len(args["knots"]) != n_inner_knots:
- raise ValueError("df=%s with degree=%r implies %s knots, "
- "but %s knots were provided"
- % (args["df"], args["degree"],
- n_inner_knots, len(args["knots"])))
+ raise ValueError(
+ "df=%s with degree=%r implies %s knots, "
+ "but %s knots were provided"
+ % (
+ args["df"],
+ args["degree"],
+ n_inner_knots,
+ len(args["knots"]),
+ )
+ )
else:
# Need to compute inner knots
knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1]
@@ -211,31 +238,38 @@ def memorize_finish(self):
else:
upper_bound = np.max(x)
if lower_bound > upper_bound:
- raise ValueError("lower_bound > upper_bound (%r > %r)"
- % (lower_bound, upper_bound))
+ raise ValueError(
+ "lower_bound > upper_bound (%r > %r)" % (lower_bound, upper_bound)
+ )
inner_knots = np.asarray(inner_knots)
if inner_knots.ndim > 1:
raise ValueError("knots must be 1 dimensional")
if np.any(inner_knots < lower_bound):
- raise ValueError("some knot values (%s) fall below lower bound "
- "(%r)"
- % (inner_knots[inner_knots < lower_bound],
- lower_bound))
+ raise ValueError(
+ "some knot values (%s) fall below lower bound "
+ "(%r)" % (inner_knots[inner_knots < lower_bound], lower_bound)
+ )
if np.any(inner_knots > upper_bound):
- raise ValueError("some knot values (%s) fall above upper bound "
- "(%r)"
- % (inner_knots[inner_knots > upper_bound],
- upper_bound))
- all_knots = np.concatenate(([lower_bound, upper_bound] * order,
- inner_knots))
+ raise ValueError(
+ "some knot values (%s) fall above upper bound "
+ "(%r)" % (inner_knots[inner_knots > upper_bound], upper_bound)
+ )
+ all_knots = np.concatenate(([lower_bound, upper_bound] * order, inner_knots))
all_knots.sort()
self._degree = args["degree"]
self._all_knots = all_knots
- def transform(self, x, df=None, knots=None, degree=3,
- include_intercept=False,
- lower_bound=None, upper_bound=None):
+ def transform(
+ self,
+ x,
+ df=None,
+ knots=None,
+ degree=3,
+ include_intercept=False,
+ lower_bound=None,
+ upper_bound=None,
+ ):
basis = _eval_bspline_basis(x, self._all_knots, self._degree)
if not include_intercept:
basis = basis[:, 1:]
@@ -247,13 +281,14 @@ def transform(self, x, df=None, knots=None, degree=3,
__getstate__ = no_pickling
+
bs = stateful_transform(BS)
+
def test_bs_compat():
from patsy.test_state import check_stateful
- from patsy.test_splines_bs_data import (R_bs_test_x,
- R_bs_test_data,
- R_bs_num_tests)
+ from patsy.test_splines_bs_data import R_bs_test_x, R_bs_test_data, R_bs_num_tests
+
lines = R_bs_test_data.split("\n")
tests_ran = 0
start_idx = lines.index("--BEGIN TEST CASE--")
@@ -274,12 +309,12 @@ def test_bs_compat():
"df": eval(test_data["df"]),
# np.array() call, or None
"knots": eval(test_data["knots"]),
- }
+ }
if test_data["Boundary.knots"] != "None":
lower, upper = eval(test_data["Boundary.knots"])
kwargs["lower_bound"] = lower
kwargs["upper_bound"] = upper
- kwargs["include_intercept"] = (test_data["intercept"] == "TRUE")
+ kwargs["include_intercept"] = test_data["intercept"] == "TRUE"
# Special case: in R, setting intercept=TRUE increases the effective
# dof by 1. Adjust our arguments to match.
# if kwargs["df"] is not None and kwargs["include_intercept"]:
@@ -294,8 +329,10 @@ def test_bs_compat():
start_idx = stop_idx + 1
assert tests_ran == R_bs_num_tests
+
test_bs_compat.slow = 1
+
# This isn't checked by the above, because R doesn't have zero degree
# b-splines.
def test_bs_0degree():
@@ -315,18 +352,19 @@ def test_bs_0degree():
# get included into the larger region, not the smaller. This is consistent
# with Python's half-open interval convention -- each basis function is
# constant on [knot[i], knot[i + 1]).
- assert np.array_equal(bs([0, 1, 2], degree=0, knots=[1],
- include_intercept=True),
- [[1, 0],
- [0, 1],
- [0, 1]])
+ assert np.array_equal(
+ bs([0, 1, 2], degree=0, knots=[1], include_intercept=True),
+ [[1, 0], [0, 1], [0, 1]],
+ )
result_int = bs(x, knots=[1, 4], degree=0, include_intercept=True)
result_no_int = bs(x, knots=[1, 4], degree=0, include_intercept=False)
assert np.array_equal(result_int[:, 1:], result_no_int)
+
def test_bs_errors():
import pytest
+
x = np.linspace(-10, 10, 20)
# error checks:
# out of bounds
@@ -341,59 +379,43 @@ def test_bs_errors():
bs(x, df=10, include_intercept=False, knots=[0] * 9, degree=1)
bs(x, df=10, include_intercept=True, knots=[0] * 8, degree=1)
# too many knots:
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=False, knots=[0] * 8)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=True, knots=[0] * 7)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=False, knots=[0] * 10,
- degree=1)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=True, knots=[0] * 9,
- degree=1)
+ pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 8)
+ pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 7)
+ pytest.raises(
+ ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 10, degree=1
+ )
+ pytest.raises(
+ ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 9, degree=1
+ )
# too few knots:
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=False, knots=[0] * 6)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=True, knots=[0] * 5)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=False, knots=[0] * 8,
- degree=1)
- pytest.raises(ValueError,
- bs, x, df=10, include_intercept=True, knots=[0] * 7,
- degree=1)
+ pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 6)
+ pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 5)
+ pytest.raises(
+ ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 8, degree=1
+ )
+ pytest.raises(
+ ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 7, degree=1
+ )
# df too small
- pytest.raises(ValueError,
- bs, x, df=1, degree=3)
- pytest.raises(ValueError,
- bs, x, df=3, degree=5)
+ pytest.raises(ValueError, bs, x, df=1, degree=3)
+ pytest.raises(ValueError, bs, x, df=3, degree=5)
# bad degree
- pytest.raises(ValueError,
- bs, x, df=10, degree=-1)
- pytest.raises(ValueError,
- bs, x, df=10, degree=1.5)
+ pytest.raises(ValueError, bs, x, df=10, degree=-1)
+ pytest.raises(ValueError, bs, x, df=10, degree=1.5)
# upper_bound < lower_bound
- pytest.raises(ValueError,
- bs, x, 3, lower_bound=1, upper_bound=-1)
+ pytest.raises(ValueError, bs, x, 3, lower_bound=1, upper_bound=-1)
# multidimensional input
- pytest.raises(ValueError,
- bs, np.column_stack((x, x)), 3)
+ pytest.raises(ValueError, bs, np.column_stack((x, x)), 3)
# unsorted knots are okay, and get sorted
assert np.array_equal(bs(x, knots=[1, 4]), bs(x, knots=[4, 1]))
# 2d knots
- pytest.raises(ValueError,
- bs, x, knots=[[0], [20]])
+ pytest.raises(ValueError, bs, x, knots=[[0], [20]])
# knots > upper_bound
- pytest.raises(ValueError,
- bs, x, knots=[0, 20])
- pytest.raises(ValueError,
- bs, x, knots=[0, 4], upper_bound=3)
+ pytest.raises(ValueError, bs, x, knots=[0, 20])
+ pytest.raises(ValueError, bs, x, knots=[0, 4], upper_bound=3)
# knots < lower_bound
- pytest.raises(ValueError,
- bs, x, knots=[-20, 0])
- pytest.raises(ValueError,
- bs, x, knots=[-4, 0], lower_bound=-3)
-
+ pytest.raises(ValueError, bs, x, knots=[-20, 0])
+ pytest.raises(ValueError, bs, x, knots=[-4, 0], lower_bound=-3)
# differences between bs and ns (since the R code is a pile of copy-paste):
diff --git a/patsy/state.py b/patsy/state.py
index 933c588..8d674ba 100644
--- a/patsy/state.py
+++ b/patsy/state.py
@@ -26,29 +26,41 @@
from functools import wraps
import numpy as np
-from patsy.util import (atleast_2d_column_default,
- asarray_or_pandas, pandas_friendly_reshape,
- wide_dtype_for, safe_issubdtype,
- no_pickling, assert_no_pickling)
+from patsy.util import (
+ atleast_2d_column_default,
+ asarray_or_pandas,
+ pandas_friendly_reshape,
+ wide_dtype_for,
+ safe_issubdtype,
+ no_pickling,
+ assert_no_pickling,
+)
# These are made available in the patsy.* namespace
-__all__ = ["stateful_transform",
- "center", "standardize", "scale",
- ]
+__all__ = [
+ "stateful_transform",
+ "center",
+ "standardize",
+ "scale",
+]
+
def stateful_transform(class_):
"""Create a stateful transform callable object from a class that fulfills
the :ref:`stateful transform protocol `.
"""
+
@wraps(class_)
def stateful_transform_wrapper(*args, **kwargs):
transform = class_()
transform.memorize_chunk(*args, **kwargs)
transform.memorize_finish()
return transform.transform(*args, **kwargs)
+
stateful_transform_wrapper.__patsy_stateful_transform__ = class_
return stateful_transform_wrapper
+
# class NonIncrementalStatefulTransform(object):
# def __init__(self):
# self._data = []
@@ -76,6 +88,7 @@ def stateful_transform_wrapper(*args, **kwargs):
# class QuantileEstimatingTransform(NonIncrementalStatefulTransform):
# def memorize_all(self, input_data, *args, **kwargs):
+
class Center(object):
"""center(x)
@@ -85,6 +98,7 @@ class Center(object):
Equivalent to ``standardize(x, rescale=False)``
"""
+
def __init__(self):
self._sum = None
self._count = 0
@@ -118,8 +132,10 @@ def transform(self, x):
__getstate__ = no_pickling
+
center = stateful_transform(Center)
+
# See:
# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
# or page 232 of Knuth vol. 3 (3rd ed.).
@@ -141,6 +157,7 @@ class Standardize(object):
memory-efficient online algorithm, making it suitable for use with
large incrementally processed data-sets.
"""
+
def __init__(self):
self.current_n = 0
self.current_mean = None
@@ -176,6 +193,7 @@ def transform(self, x, center=True, rescale=True, ddof=0):
__getstate__ = no_pickling
+
standardize = stateful_transform(Standardize)
# R compatibility:
scale = standardize
diff --git a/patsy/test_build.py b/patsy/test_build.py
index 4b112ef..bad3be6 100644
--- a/patsy/test_build.py
+++ b/patsy/test_build.py
@@ -10,8 +10,7 @@
import numpy as np
import pytest
from patsy import PatsyError
-from patsy.util import (atleast_2d_column_default,
- have_pandas, have_pandas_categorical)
+from patsy.util import atleast_2d_column_default, have_pandas, have_pandas_categorical
from patsy.desc import Term, INTERCEPT
from patsy.build import build_design_matrices, design_matrix_builders
from patsy.categorical import C
@@ -21,6 +20,7 @@
if have_pandas:
import pandas
+
def assert_full_rank(m):
m = atleast_2d_column_default(m)
if m.shape[1] == 0:
@@ -29,18 +29,16 @@ def assert_full_rank(m):
rank = np.sum(s > 1e-10)
assert rank == m.shape[1]
+
def test_assert_full_rank():
assert_full_rank(np.eye(10))
assert_full_rank([[1, 0], [1, 0], [1, 0], [1, 1]])
- pytest.raises(AssertionError,
- assert_full_rank, [[1, 0], [2, 0]])
- pytest.raises(AssertionError,
- assert_full_rank, [[1, 2], [2, 4]])
- pytest.raises(AssertionError,
- assert_full_rank, [[1, 2, 3], [1, 10, 100]])
+ pytest.raises(AssertionError, assert_full_rank, [[1, 0], [2, 0]])
+ pytest.raises(AssertionError, assert_full_rank, [[1, 2], [2, 4]])
+ pytest.raises(AssertionError, assert_full_rank, [[1, 2, 3], [1, 10, 100]])
# col1 + col2 = col3
- pytest.raises(AssertionError,
- assert_full_rank, [[1, 2, 3], [1, 5, 6], [1, 6, 7]])
+ pytest.raises(AssertionError, assert_full_rank, [[1, 2, 3], [1, 5, 6], [1, 6, 7]])
+
def make_termlist(*entries):
terms = []
@@ -48,6 +46,7 @@ def make_termlist(*entries):
terms.append(Term([LookupFactor(name) for name in entry]))
return terms
+
def check_design_matrix(mm, expected_rank, termlist, column_names=None):
assert_full_rank(mm)
assert set(mm.design_info.terms) == set(termlist)
@@ -56,22 +55,23 @@ def check_design_matrix(mm, expected_rank, termlist, column_names=None):
assert mm.ndim == 2
assert mm.shape[1] == expected_rank
+
def make_matrix(data, expected_rank, entries, column_names=None):
termlist = make_termlist(*entries)
+
def iter_maker():
yield data
+
design_infos = design_matrix_builders([termlist], iter_maker, eval_env=0)
matrices = build_design_matrices(design_infos, data)
matrix = matrices[0]
- assert (design_infos[0].term_slices
- == matrix.design_info.term_slices)
- assert (design_infos[0].column_names
- == matrix.design_info.column_names)
+ assert design_infos[0].term_slices == matrix.design_info.term_slices
+ assert design_infos[0].column_names == matrix.design_info.column_names
assert matrix.design_info is design_infos[0]
- check_design_matrix(matrix, expected_rank, termlist,
- column_names=column_names)
+ check_design_matrix(matrix, expected_rank, termlist, column_names=column_names)
return matrix
+
def test_simple():
data = balanced(a=2, b=2)
x1 = data["x1"] = np.linspace(0, 1, len(data["a"]))
@@ -83,41 +83,52 @@ def test_simple():
m = make_matrix(data, 2, [[], ["a"]], column_names=["Intercept", "a[T.a2]"])
assert np.allclose(m, [[1, 0], [1, 0], [1, 1], [1, 1]])
- m = make_matrix(data, 4, [["a", "b"]],
- column_names=["a[a1]:b[b1]", "a[a2]:b[b1]",
- "a[a1]:b[b2]", "a[a2]:b[b2]"])
- assert np.allclose(m, [[1, 0, 0, 0],
- [0, 0, 1, 0],
- [0, 1, 0, 0],
- [0, 0, 0, 1]])
-
- m = make_matrix(data, 4, [[], ["a"], ["b"], ["a", "b"]],
- column_names=["Intercept", "a[T.a2]",
- "b[T.b2]", "a[T.a2]:b[T.b2]"])
- assert np.allclose(m, [[1, 0, 0, 0],
- [1, 0, 1, 0],
- [1, 1, 0, 0],
- [1, 1, 1, 1]])
-
- m = make_matrix(data, 4, [[], ["b"], ["a"], ["b", "a"]],
- column_names=["Intercept", "b[T.b2]",
- "a[T.a2]", "b[T.b2]:a[T.a2]"])
- assert np.allclose(m, [[1, 0, 0, 0],
- [1, 1, 0, 0],
- [1, 0, 1, 0],
- [1, 1, 1, 1]])
-
- m = make_matrix(data, 4, [["a"], ["x1"], ["a", "x1"]],
- column_names=["a[a1]", "a[a2]", "x1", "a[T.a2]:x1"])
- assert np.allclose(m, [[1, 0, x1[0], 0],
- [1, 0, x1[1], 0],
- [0, 1, x1[2], x1[2]],
- [0, 1, x1[3], x1[3]]])
-
- m = make_matrix(data, 3, [["x1"], ["x2"], ["x2", "x1"]],
- column_names=["x1", "x2", "x2:x1"])
+ m = make_matrix(
+ data,
+ 4,
+ [["a", "b"]],
+ column_names=["a[a1]:b[b1]", "a[a2]:b[b1]", "a[a1]:b[b2]", "a[a2]:b[b2]"],
+ )
+ assert np.allclose(m, [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
+
+ m = make_matrix(
+ data,
+ 4,
+ [[], ["a"], ["b"], ["a", "b"]],
+ column_names=["Intercept", "a[T.a2]", "b[T.b2]", "a[T.a2]:b[T.b2]"],
+ )
+ assert np.allclose(m, [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 1, 1]])
+
+ m = make_matrix(
+ data,
+ 4,
+ [[], ["b"], ["a"], ["b", "a"]],
+ column_names=["Intercept", "b[T.b2]", "a[T.a2]", "b[T.b2]:a[T.a2]"],
+ )
+ assert np.allclose(m, [[1, 0, 0, 0], [1, 1, 0, 0], [1, 0, 1, 0], [1, 1, 1, 1]])
+
+ m = make_matrix(
+ data,
+ 4,
+ [["a"], ["x1"], ["a", "x1"]],
+ column_names=["a[a1]", "a[a2]", "x1", "a[T.a2]:x1"],
+ )
+ assert np.allclose(
+ m,
+ [
+ [1, 0, x1[0], 0],
+ [1, 0, x1[1], 0],
+ [0, 1, x1[2], x1[2]],
+ [0, 1, x1[3], x1[3]],
+ ],
+ )
+
+ m = make_matrix(
+ data, 3, [["x1"], ["x2"], ["x2", "x1"]], column_names=["x1", "x2", "x2:x1"]
+ )
assert np.allclose(m, np.column_stack((x1, x2, x1 * x2)))
+
def test_R_bugs():
data = balanced(a=2, b=2, c=2)
data["x"] = np.linspace(0, 1, len(data["a"]))
@@ -136,6 +147,7 @@ def test_R_bugs():
# does get this one right, but we might as well test it.)
make_matrix(data, 6, [["a", "c"], ["a", "b"]])
+
def test_redundancy_thoroughly():
# To make sure there aren't any lurking bugs analogous to the ones that R
# has (see above), we check that we get the correct matrix rank for every
@@ -157,13 +169,16 @@ def all_subsets(l):
all_termlist_templates = list(all_subsets(all_terms))
print(len(all_termlist_templates))
# eliminate some of the symmetric versions to speed things up
- redundant = [[("b",), ("a",)],
- [("x2",), ("x1",)],
- [("b", "x2"), ("a", "x1")],
- [("a", "b", "x2"), ("a", "b", "x1")],
- [("b", "x1", "x2"), ("a", "x1", "x2")]]
+ redundant = [
+ [("b",), ("a",)],
+ [("x2",), ("x1",)],
+ [("b", "x2"), ("a", "x1")],
+ [("a", "b", "x2"), ("a", "b", "x1")],
+ [("b", "x1", "x2"), ("a", "x1", "x2")],
+ ]
count = 0
import time
+
start = time.time()
for termlist_template in all_termlist_templates:
termlist_set = set(termlist_template)
@@ -182,9 +197,9 @@ def all_subsets(l):
expected_rank = len(expanded_terms)
if termlist_template in [(), ((),)]:
# No data dependence, should fail
- pytest.raises(PatsyError,
- make_matrix,
- data, expected_rank, termlist_template)
+ pytest.raises(
+ PatsyError, make_matrix, data, expected_rank, termlist_template
+ )
else:
make_matrix(data, expected_rank, termlist_template)
count += 1
@@ -192,47 +207,56 @@ def all_subsets(l):
print("Completed:", count)
print("Took %0.2f seconds" % (time.time() - start,))
+
test_redundancy_thoroughly.slow = 1
+
def test_data_types():
- basic_dict = {"a": ["a1", "a2", "a1", "a2"],
- "x": [1, 2, 3, 4]}
+ basic_dict = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]}
# On Python 2, this is identical to basic_dict:
basic_dict_bytes = dict(basic_dict)
basic_dict_bytes["a"] = [s.encode("ascii") for s in basic_dict_bytes["a"]]
# On Python 3, this is identical to basic_dict:
- basic_dict_unicode = {"a": ["a1", "a2", "a1", "a2"],
- "x": [1, 2, 3, 4]}
+ basic_dict_unicode = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]}
basic_dict_unicode = dict(basic_dict)
basic_dict_unicode["a"] = [str(s) for s in basic_dict_unicode["a"]]
- structured_array_bytes = np.array(list(zip(basic_dict["a"],
- basic_dict["x"])),
- dtype=[("a", "S2"), ("x", int)])
- structured_array_unicode = np.array(list(zip(basic_dict["a"],
- basic_dict["x"])),
- dtype=[("a", "U2"), ("x", int)])
+ structured_array_bytes = np.array(
+ list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "S2"), ("x", int)]
+ )
+ structured_array_unicode = np.array(
+ list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "U2"), ("x", int)]
+ )
recarray_bytes = structured_array_bytes.view(np.recarray)
recarray_unicode = structured_array_unicode.view(np.recarray)
- datas = [basic_dict, structured_array_bytes, structured_array_unicode,
- recarray_bytes, recarray_unicode]
+ datas = [
+ basic_dict,
+ structured_array_bytes,
+ structured_array_unicode,
+ recarray_bytes,
+ recarray_unicode,
+ ]
if have_pandas:
df_bytes = pandas.DataFrame(basic_dict_bytes)
datas.append(df_bytes)
df_unicode = pandas.DataFrame(basic_dict_unicode)
datas.append(df_unicode)
for data in datas:
- m = make_matrix(data, 4, [["a"], ["a", "x"]],
- column_names=["a[a1]", "a[a2]", "a[a1]:x", "a[a2]:x"])
- assert np.allclose(m, [[1, 0, 1, 0],
- [0, 1, 0, 2],
- [1, 0, 3, 0],
- [0, 1, 0, 4]])
+ m = make_matrix(
+ data,
+ 4,
+ [["a"], ["a", "x"]],
+ column_names=["a[a1]", "a[a2]", "a[a1]:x", "a[a2]:x"],
+ )
+ assert np.allclose(m, [[1, 0, 1, 0], [0, 1, 0, 2], [1, 0, 3, 0], [0, 1, 0, 4]])
+
def test_build_design_matrices_dtype():
data = {"x": [1, 2, 3]}
+
def iter_maker():
yield data
+
builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0]
mat = build_design_matrices([builder], data)[0]
@@ -245,10 +269,13 @@ def iter_maker():
mat = build_design_matrices([builder], data, dtype=np.float128)[0]
assert mat.dtype == np.dtype(np.float128)
+
def test_return_type():
data = {"x": [1, 2, 3]}
+
def iter_maker():
yield data
+
builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0]
# Check explicitly passing return_type="matrix" works
@@ -256,101 +283,114 @@ def iter_maker():
assert isinstance(mat, DesignMatrix)
# Check that nonsense is detected
- pytest.raises(PatsyError,
- build_design_matrices, [builder], data,
- return_type="asdfsadf")
+ pytest.raises(
+ PatsyError, build_design_matrices, [builder], data, return_type="asdfsadf"
+ )
+
def test_NA_action():
initial_data = {"x": [1, 2, 3], "c": ["c1", "c2", "c1"]}
+
def iter_maker():
yield initial_data
+
builder = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0]
# By default drops rows containing either NaN or None
- mat = build_design_matrices([builder],
- {"x": [10.0, np.nan, 20.0],
- "c": np.asarray(["c1", "c2", None],
- dtype=object)})[0]
+ mat = build_design_matrices(
+ [builder],
+ {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)},
+ )[0]
assert mat.shape == (1, 3)
assert np.array_equal(mat, [[1.0, 0.0, 10.0]])
# NA_action="a string" also accepted:
- mat = build_design_matrices([builder],
- {"x": [10.0, np.nan, 20.0],
- "c": np.asarray(["c1", "c2", None],
- dtype=object)},
- NA_action="drop")[0]
+ mat = build_design_matrices(
+ [builder],
+ {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)},
+ NA_action="drop",
+ )[0]
assert mat.shape == (1, 3)
assert np.array_equal(mat, [[1.0, 0.0, 10.0]])
# And objects
from patsy.missing import NAAction
+
# allows NaN's to pass through
NA_action = NAAction(NA_types=[])
- mat = build_design_matrices([builder],
- {"x": [10.0, np.nan],
- "c": np.asarray(["c1", "c2"],
- dtype=object)},
- NA_action=NA_action)[0]
+ mat = build_design_matrices(
+ [builder],
+ {"x": [10.0, np.nan], "c": np.asarray(["c1", "c2"], dtype=object)},
+ NA_action=NA_action,
+ )[0]
assert mat.shape == (2, 3)
# According to this (and only this) function, NaN == NaN.
np.testing.assert_array_equal(mat, [[1.0, 0.0, 10.0], [0.0, 1.0, np.nan]])
# NA_action="raise"
- pytest.raises(PatsyError,
- build_design_matrices,
- [builder],
- {"x": [10.0, np.nan, 20.0],
- "c": np.asarray(["c1", "c2", None],
- dtype=object)},
- NA_action="raise")
+ pytest.raises(
+ PatsyError,
+ build_design_matrices,
+ [builder],
+ {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)},
+ NA_action="raise",
+ )
+
def test_NA_drop_preserves_levels():
# Even if all instances of some level are dropped, we still include it in
# the output matrix (as an all-zeros column)
data = {"x": [1.0, np.nan, 3.0], "c": ["c1", "c2", "c3"]}
+
def iter_maker():
yield data
+
design_info = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0]
assert design_info.column_names == ["c[c1]", "c[c2]", "c[c3]", "x"]
- mat, = build_design_matrices([design_info], data)
+ (mat,) = build_design_matrices([design_info], data)
assert mat.shape == (2, 4)
- assert np.array_equal(mat, [[1.0, 0.0, 0.0, 1.0],
- [0.0, 0.0, 1.0, 3.0]])
+ assert np.array_equal(mat, [[1.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 3.0]])
+
def test_return_type_pandas():
if not have_pandas:
return
- data = pandas.DataFrame({"x": [1, 2, 3],
- "y": [4, 5, 6],
- "a": ["a1", "a2", "a1"]},
- index=[10, 20, 30])
+ data = pandas.DataFrame(
+ {"x": [1, 2, 3], "y": [4, 5, 6], "a": ["a1", "a2", "a1"]}, index=[10, 20, 30]
+ )
+
def iter_maker():
yield data
- int_builder, = design_matrix_builders([make_termlist([])], iter_maker, 0)
- (y_builder, x_builder) = design_matrix_builders([make_termlist("y"),
- make_termlist("x")],
- iter_maker,
- eval_env=0)
- (x_a_builder,) = design_matrix_builders([make_termlist("x", "a")],
- iter_maker,
- eval_env=0)
- (x_y_builder,) = design_matrix_builders([make_termlist("x", "y")],
- iter_maker,
- eval_env=0)
+
+ (int_builder,) = design_matrix_builders([make_termlist([])], iter_maker, 0)
+ (y_builder, x_builder) = design_matrix_builders(
+ [make_termlist("y"), make_termlist("x")], iter_maker, eval_env=0
+ )
+ (x_a_builder,) = design_matrix_builders(
+ [make_termlist("x", "a")], iter_maker, eval_env=0
+ )
+ (x_y_builder,) = design_matrix_builders(
+ [make_termlist("x", "y")], iter_maker, eval_env=0
+ )
# Index compatibility is always checked for pandas input, regardless of
# whether we're producing pandas output
- pytest.raises(PatsyError,
- build_design_matrices,
- [x_a_builder], {"x": data["x"], "a": data["a"][::-1]})
- pytest.raises(PatsyError,
- build_design_matrices,
- [y_builder, x_builder],
- {"x": data["x"], "y": data["y"][::-1]})
+ pytest.raises(
+ PatsyError,
+ build_design_matrices,
+ [x_a_builder],
+ {"x": data["x"], "a": data["a"][::-1]},
+ )
+ pytest.raises(
+ PatsyError,
+ build_design_matrices,
+ [y_builder, x_builder],
+ {"x": data["x"], "y": data["y"][::-1]},
+ )
+
# And we also check consistency between data.index and value indexes
# Creating a mismatch between these is a bit tricky. We want a data object
# such that isinstance(data, DataFrame), but data["x"].index !=
@@ -361,20 +401,20 @@ def __getitem__(self, key):
return pandas.DataFrame.__getitem__(self, key)[::-1]
else:
return pandas.DataFrame.__getitem__(self, key)
- pytest.raises(PatsyError,
- build_design_matrices,
- [x_builder],
- CheatingDataFrame(data))
+
+ pytest.raises(
+ PatsyError, build_design_matrices, [x_builder], CheatingDataFrame(data)
+ )
# A mix of pandas input and unindexed input is fine
- (mat,) = build_design_matrices([x_y_builder],
- {"x": data["x"], "y": [40, 50, 60]})
+ (mat,) = build_design_matrices([x_y_builder], {"x": data["x"], "y": [40, 50, 60]})
assert np.allclose(mat, [[1, 40], [2, 50], [3, 60]])
# with return_type="dataframe", we get out DataFrames with nice indices
# and nice column names and design_info
- y_df, x_df = build_design_matrices([y_builder, x_builder], data,
- return_type="dataframe")
+ y_df, x_df = build_design_matrices(
+ [y_builder, x_builder], data, return_type="dataframe"
+ )
assert isinstance(y_df, pandas.DataFrame)
assert isinstance(x_df, pandas.DataFrame)
assert np.array_equal(y_df, [[4], [5], [6]])
@@ -389,9 +429,11 @@ def __getitem__(self, key):
assert x_df.design_info.term_names == ["x"]
# Same with mix of pandas and unindexed info, even if in different
# matrices
- y_df, x_df = build_design_matrices([y_builder, x_builder],
- {"y": [7, 8, 9], "x": data["x"]},
- return_type="dataframe")
+ y_df, x_df = build_design_matrices(
+ [y_builder, x_builder],
+ {"y": [7, 8, 9], "x": data["x"]},
+ return_type="dataframe",
+ )
assert isinstance(y_df, pandas.DataFrame)
assert isinstance(x_df, pandas.DataFrame)
assert np.array_equal(y_df, [[7], [8], [9]])
@@ -405,75 +447,84 @@ def __getitem__(self, key):
assert y_df.design_info.term_names == ["y"]
assert x_df.design_info.term_names == ["x"]
# Check categorical works for carrying index too
- (x_a_df,) = build_design_matrices([x_a_builder],
- {"x": [-1, -2, -3], "a": data["a"]},
- return_type="dataframe")
+ (x_a_df,) = build_design_matrices(
+ [x_a_builder], {"x": [-1, -2, -3], "a": data["a"]}, return_type="dataframe"
+ )
assert isinstance(x_a_df, pandas.DataFrame)
assert np.array_equal(x_a_df, [[1, 0, -1], [0, 1, -2], [1, 0, -3]])
assert np.array_equal(x_a_df.index, [10, 20, 30])
# And if we have no indexed input, then we let pandas make up an index as
# per its usual rules:
- (x_y_df,) = build_design_matrices([x_y_builder],
- {"y": [7, 8, 9], "x": [10, 11, 12]},
- return_type="dataframe")
+ (x_y_df,) = build_design_matrices(
+ [x_y_builder], {"y": [7, 8, 9], "x": [10, 11, 12]}, return_type="dataframe"
+ )
assert isinstance(x_y_df, pandas.DataFrame)
assert np.array_equal(x_y_df, [[10, 7], [11, 8], [12, 9]])
assert np.array_equal(x_y_df.index, [0, 1, 2])
# If 'data' is a DataFrame, then that suffices, even if no factors are
# available.
- (int_df,) = build_design_matrices([int_builder], data,
- return_type="dataframe")
+ (int_df,) = build_design_matrices([int_builder], data, return_type="dataframe")
assert isinstance(int_df, pandas.DataFrame)
assert np.array_equal(int_df, [[1], [1], [1]])
assert int_df.index.equals(pandas.Index([10, 20, 30]))
import patsy.build
+
had_pandas = patsy.build.have_pandas
try:
patsy.build.have_pandas = False
# return_type="dataframe" gives a nice error if pandas is not available
- pytest.raises(PatsyError,
- build_design_matrices,
- [x_builder], {"x": [1, 2, 3]}, return_type="dataframe")
+ pytest.raises(
+ PatsyError,
+ build_design_matrices,
+ [x_builder],
+ {"x": [1, 2, 3]},
+ return_type="dataframe",
+ )
finally:
patsy.build.have_pandas = had_pandas
- x_df, = build_design_matrices([x_a_builder],
- {"x": [1.0, np.nan, 3.0],
- "a": np.asarray([None, "a2", "a1"],
- dtype=object)},
- NA_action="drop",
- return_type="dataframe")
+ (x_df,) = build_design_matrices(
+ [x_a_builder],
+ {"x": [1.0, np.nan, 3.0], "a": np.asarray([None, "a2", "a1"], dtype=object)},
+ NA_action="drop",
+ return_type="dataframe",
+ )
assert x_df.index.equals(pandas.Index([2]))
+
def test_data_mismatch():
test_cases_twoway = [
# Data type mismatch
([1, 2, 3], [True, False, True]),
- (C(["a", "b", "c"], levels=["c", "b", "a"]),
- C(["a", "b", "c"], levels=["a", "b", "c"])),
+ (
+ C(["a", "b", "c"], levels=["c", "b", "a"]),
+ C(["a", "b", "c"], levels=["a", "b", "c"]),
+ ),
# column number mismatches
([[1], [2], [3]], [[1, 1], [2, 2], [3, 3]]),
([[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[1, 1], [2, 2], [3, 3]]),
- ]
+ ]
test_cases_oneway = [
([1, 2, 3], ["a", "b", "c"]),
([1, 2, 3], C(["a", "b", "c"])),
([True, False, True], C(["a", "b", "c"])),
([True, False, True], ["a", "b", "c"]),
- ]
+ ]
setup_predict_only = [
# This is not an error if both are fed in during make_builders, but it
# is an error to pass one to make_builders and the other to
# make_matrices.
(["a", "b", "c"], ["a", "b", "d"]),
- ]
+ ]
termlist = make_termlist(["x"])
+
def t_incremental(data1, data2):
def iter_maker():
yield {"x": data1}
yield {"x": data2}
+
try:
builders = design_matrix_builders([termlist], iter_maker, 0)
build_design_matrices(builders, {"x": data1})
@@ -482,30 +533,34 @@ def iter_maker():
pass
else:
raise AssertionError
+
def t_setup_predict(data1, data2):
def iter_maker():
yield {"x": data1}
+
builders = design_matrix_builders([termlist], iter_maker, 0)
- pytest.raises(PatsyError,
- build_design_matrices, builders, {"x": data2})
- for (a, b) in test_cases_twoway:
+ pytest.raises(PatsyError, build_design_matrices, builders, {"x": data2})
+
+ for a, b in test_cases_twoway:
t_incremental(a, b)
t_incremental(b, a)
t_setup_predict(a, b)
t_setup_predict(b, a)
- for (a, b) in test_cases_oneway:
+ for a, b in test_cases_oneway:
t_incremental(a, b)
t_setup_predict(a, b)
- for (a, b) in setup_predict_only:
+ for a, b in setup_predict_only:
t_setup_predict(a, b)
t_setup_predict(b, a)
- pytest.raises(PatsyError,
- make_matrix, {"x": [1, 2, 3], "y": [1, 2, 3, 4]},
- 2, [["x"], ["y"]])
+ pytest.raises(
+ PatsyError, make_matrix, {"x": [1, 2, 3], "y": [1, 2, 3, 4]}, 2, [["x"], ["y"]]
+ )
+
def test_data_independent_builder():
data = {"x": [1, 2, 3]}
+
def iter_maker():
yield data
@@ -517,20 +572,20 @@ def iter_maker():
null_builder = design_matrix_builders([make_termlist()], iter_maker, 0)[0]
pytest.raises(PatsyError, build_design_matrices, [null_builder], data)
- intercept_builder = design_matrix_builders([make_termlist([])],
- iter_maker,
- eval_env=0)[0]
+ intercept_builder = design_matrix_builders(
+ [make_termlist([])], iter_maker, eval_env=0
+ )[0]
pytest.raises(PatsyError, build_design_matrices, [intercept_builder], data)
- pytest.raises(PatsyError,
- build_design_matrices,
- [null_builder, intercept_builder], data)
+ pytest.raises(
+ PatsyError, build_design_matrices, [null_builder, intercept_builder], data
+ )
# If data is a DataFrame, it sets the number of rows.
if have_pandas:
- int_m, null_m = build_design_matrices([intercept_builder,
- null_builder],
- pandas.DataFrame(data))
+ int_m, null_m = build_design_matrices(
+ [intercept_builder, null_builder], pandas.DataFrame(data)
+ )
assert np.allclose(int_m, [[1], [1], [1]])
assert null_m.shape == (3, 0)
@@ -538,25 +593,28 @@ def iter_maker():
# data-independent matrices have the same number of rows.
x_termlist = make_termlist(["x"])
- builders = design_matrix_builders([x_termlist, make_termlist()],
- iter_maker,
- eval_env=0)
+ builders = design_matrix_builders(
+ [x_termlist, make_termlist()], iter_maker, eval_env=0
+ )
x_m, null_m = build_design_matrices(builders, data)
assert np.allclose(x_m, [[1], [2], [3]])
assert null_m.shape == (3, 0)
- builders = design_matrix_builders([x_termlist, make_termlist([])],
- iter_maker,
- eval_env=0)
+ builders = design_matrix_builders(
+ [x_termlist, make_termlist([])], iter_maker, eval_env=0
+ )
x_m, null_m = build_design_matrices(builders, data)
x_m, intercept_m = build_design_matrices(builders, data)
assert np.allclose(x_m, [[1], [2], [3]])
assert np.allclose(intercept_m, [[1], [1], [1]])
+
def test_same_factor_in_two_matrices():
data = {"x": [1, 2, 3], "a": ["a1", "a2", "a1"]}
+
def iter_maker():
yield data
+
t1 = make_termlist(["x"])
t2 = make_termlist(["x", "a"])
builders = design_matrix_builders([t1, t2], iter_maker, eval_env=0)
@@ -566,11 +624,17 @@ def iter_maker():
check_design_matrix(m2, 2, t2, column_names=["x:a[a1]", "x:a[a2]"])
assert np.allclose(m2, [[1, 0], [0, 2], [3, 0]])
+
def test_eval_env_type_builder():
data = {"x": [1, 2, 3]}
+
def iter_maker():
yield data
- pytest.raises(TypeError, design_matrix_builders, [make_termlist("x")], iter_maker, "foo")
+
+ pytest.raises(
+ TypeError, design_matrix_builders, [make_termlist("x")], iter_maker, "foo"
+ )
+
def test_categorical():
data_strings = {"a": ["a1", "a2", "a1"]}
@@ -579,107 +643,114 @@ def test_categorical():
if have_pandas_categorical:
data_pandas = {"a": pandas.Categorical(["a1", "a2", "a2"])}
datas.append(data_pandas)
+
def t(data1, data2):
def iter_maker():
yield data1
- builders = design_matrix_builders([make_termlist(["a"])],
- iter_maker,
- eval_env=0)
+
+ builders = design_matrix_builders(
+ [make_termlist(["a"])], iter_maker, eval_env=0
+ )
build_design_matrices(builders, data2)
+
for data1 in datas:
for data2 in datas:
t(data1, data2)
+
def test_contrast():
from patsy.contrasts import ContrastMatrix, Sum
+
values = ["a1", "a3", "a1", "a2"]
# No intercept in model, full-rank coding of 'a'
- m = make_matrix({"a": C(values)}, 3, [["a"]],
- column_names=["a[a1]", "a[a2]", "a[a3]"])
+ m = make_matrix(
+ {"a": C(values)}, 3, [["a"]], column_names=["a[a1]", "a[a2]", "a[a3]"]
+ )
- assert np.allclose(m, [[1, 0, 0],
- [0, 0, 1],
- [1, 0, 0],
- [0, 1, 0]])
+ assert np.allclose(m, [[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
for s in (Sum, Sum()):
- m = make_matrix({"a": C(values, s)}, 3, [["a"]],
- column_names=["a[mean]", "a[S.a1]", "a[S.a2]"])
+ m = make_matrix(
+ {"a": C(values, s)},
+ 3,
+ [["a"]],
+ column_names=["a[mean]", "a[S.a1]", "a[S.a2]"],
+ )
# Output from R
- assert np.allclose(m, [[1, 1, 0],
- [1,-1, -1],
- [1, 1, 0],
- [1, 0, 1]])
-
- m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [["a"]],
- column_names=["a[mean]", "a[S.a2]", "a[S.a3]"])
+ assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]])
+
+ m = make_matrix(
+ {"a": C(values, Sum(omit=0))},
+ 3,
+ [["a"]],
+ column_names=["a[mean]", "a[S.a2]", "a[S.a3]"],
+ )
# Output from R
- assert np.allclose(m, [[1, -1, -1],
- [1, 0, 1],
- [1, -1, -1],
- [1, 1, 0]])
+ assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]])
# Intercept in model, non-full-rank coding of 'a'
- m = make_matrix({"a": C(values)}, 3, [[], ["a"]],
- column_names=["Intercept", "a[T.a2]", "a[T.a3]"])
+ m = make_matrix(
+ {"a": C(values)},
+ 3,
+ [[], ["a"]],
+ column_names=["Intercept", "a[T.a2]", "a[T.a3]"],
+ )
- assert np.allclose(m, [[1, 0, 0],
- [1, 0, 1],
- [1, 0, 0],
- [1, 1, 0]])
+ assert np.allclose(m, [[1, 0, 0], [1, 0, 1], [1, 0, 0], [1, 1, 0]])
for s in (Sum, Sum()):
- m = make_matrix({"a": C(values, s)}, 3, [[], ["a"]],
- column_names=["Intercept", "a[S.a1]", "a[S.a2]"])
+ m = make_matrix(
+ {"a": C(values, s)},
+ 3,
+ [[], ["a"]],
+ column_names=["Intercept", "a[S.a1]", "a[S.a2]"],
+ )
# Output from R
- assert np.allclose(m, [[1, 1, 0],
- [1,-1, -1],
- [1, 1, 0],
- [1, 0, 1]])
-
- m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [[], ["a"]],
- column_names=["Intercept", "a[S.a2]", "a[S.a3]"])
+ assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]])
+
+ m = make_matrix(
+ {"a": C(values, Sum(omit=0))},
+ 3,
+ [[], ["a"]],
+ column_names=["Intercept", "a[S.a2]", "a[S.a3]"],
+ )
# Output from R
- assert np.allclose(m, [[1, -1, -1],
- [1, 0, 1],
- [1, -1, -1],
- [1, 1, 0]])
+ assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]])
# Weird ad hoc less-than-full-rank coding of 'a'
- m = make_matrix({"a": C(values, [[7, 12],
- [2, 13],
- [8, -1]])},
- 2, [["a"]],
- column_names=["a[custom0]", "a[custom1]"])
- assert np.allclose(m, [[7, 12],
- [8, -1],
- [7, 12],
- [2, 13]])
-
- m = make_matrix({"a": C(values, ContrastMatrix([[7, 12],
- [2, 13],
- [8, -1]],
- ["[foo]", "[bar]"]))},
- 2, [["a"]],
- column_names=["a[foo]", "a[bar]"])
- assert np.allclose(m, [[7, 12],
- [8, -1],
- [7, 12],
- [2, 13]])
+ m = make_matrix(
+ {"a": C(values, [[7, 12], [2, 13], [8, -1]])},
+ 2,
+ [["a"]],
+ column_names=["a[custom0]", "a[custom1]"],
+ )
+ assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]])
+
+ m = make_matrix(
+ {
+ "a": C(
+ values, ContrastMatrix([[7, 12], [2, 13], [8, -1]], ["[foo]", "[bar]"])
+ )
+ },
+ 2,
+ [["a"]],
+ column_names=["a[foo]", "a[bar]"],
+ )
+ assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]])
+
def test_DesignInfo_subset():
# For each combination of:
# formula, term names, term objects, mixed term name and term objects
# check that results match subset of full build
# and that removed variables don't hurt
- all_data = {"x": [1, 2],
- "y": [[3.1, 3.2],
- [4.1, 4.2]],
- "z": [5, 6]}
+ all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]}
all_terms = make_termlist("x", "y", "z")
+
def iter_maker():
yield all_data
+
all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0]
full_matrix = build_design_matrices([all_builder], all_data)[0]
@@ -718,8 +789,7 @@ def t(which_terms, variables, columns):
# Term must exist
pytest.raises(KeyError, all_builder.subset, "~ asdf")
pytest.raises(KeyError, all_builder.subset, ["asdf"])
- pytest.raises(KeyError,
- all_builder.subset, [Term(["asdf"])])
+ pytest.raises(KeyError, all_builder.subset, [Term(["asdf"])])
# Also check for a minimal DesignInfo (column names only)
min_di = DesignInfo(["a", "b", "c"])
diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py
index 66c293a..35c86a1 100644
--- a/patsy/test_highlevel.py
+++ b/patsy/test_highlevel.py
@@ -14,22 +14,30 @@
from patsy.categorical import C
from patsy.contrasts import Helmert
from patsy.user_util import balanced, LookupFactor
-from patsy.build import (design_matrix_builders,
- build_design_matrices)
-from patsy.highlevel import (dmatrix, dmatrices,
- incr_dbuilder, incr_dbuilders)
-from patsy.util import (have_pandas,
- have_pandas_categorical,
- have_pandas_categorical_dtype,
- pandas_Categorical_from_codes)
+from patsy.build import design_matrix_builders, build_design_matrices
+from patsy.highlevel import dmatrix, dmatrices, incr_dbuilder, incr_dbuilders
+from patsy.util import (
+ have_pandas,
+ have_pandas_categorical,
+ have_pandas_categorical_dtype,
+ pandas_Categorical_from_codes,
+)
from patsy.origin import Origin
if have_pandas:
import pandas
-def check_result(expect_full_designs, lhs, rhs, data,
- expected_rhs_values, expected_rhs_names,
- expected_lhs_values, expected_lhs_names): # pragma: no cover
+
+def check_result(
+ expect_full_designs,
+ lhs,
+ rhs,
+ data,
+ expected_rhs_values,
+ expected_rhs_names,
+ expected_lhs_values,
+ expected_lhs_names,
+): # pragma: no cover
assert np.allclose(rhs, expected_rhs_values)
assert rhs.design_info.column_names == expected_rhs_names
if lhs is not None:
@@ -41,11 +49,11 @@ def check_result(expect_full_designs, lhs, rhs, data,
if expect_full_designs:
if lhs is None:
- new_rhs, = build_design_matrices([rhs.design_info], data)
+ (new_rhs,) = build_design_matrices([rhs.design_info], data)
else:
- new_lhs, new_rhs = build_design_matrices([lhs.design_info,
- rhs.design_info],
- data)
+ new_lhs, new_rhs = build_design_matrices(
+ [lhs.design_info, rhs.design_info], data
+ )
assert np.allclose(new_lhs, lhs)
assert new_lhs.design_info.column_names == expected_lhs_names
assert np.allclose(new_rhs, rhs)
@@ -54,30 +62,42 @@ def check_result(expect_full_designs, lhs, rhs, data,
assert rhs.design_info.terms is None
assert lhs is None or lhs.design_info.terms is None
+
def dmatrix_pandas(formula_like, data={}, depth=0, return_type="matrix"):
return_type = "dataframe"
if isinstance(depth, int):
depth += 1
return dmatrix(formula_like, data, depth, return_type=return_type)
+
def dmatrices_pandas(formula_like, data={}, depth=0, return_type="matrix"):
return_type = "dataframe"
if isinstance(depth, int):
depth += 1
return dmatrices(formula_like, data, depth, return_type=return_type)
-def t(formula_like, data, depth,
- expect_full_designs,
- expected_rhs_values, expected_rhs_names,
- expected_lhs_values=None, expected_lhs_names=None): # pragma: no cover
+
+def t(
+ formula_like,
+ data,
+ depth,
+ expect_full_designs,
+ expected_rhs_values,
+ expected_rhs_names,
+ expected_lhs_values=None,
+ expected_lhs_names=None,
+): # pragma: no cover
if isinstance(depth, int):
depth += 1
+
def data_iter_maker():
return iter([data])
- if (isinstance(formula_like, (str, ModelDesc, DesignInfo))
- or (isinstance(formula_like, tuple)
- and isinstance(formula_like[0], DesignInfo))
- or hasattr(formula_like, "__patsy_get_model_desc__")):
+
+ if (
+ isinstance(formula_like, (str, ModelDesc, DesignInfo))
+ or (isinstance(formula_like, tuple) and isinstance(formula_like[0], DesignInfo))
+ or hasattr(formula_like, "__patsy_get_model_desc__")
+ ):
if expected_lhs_values is None:
builder = incr_dbuilder(formula_like, data_iter_maker, depth)
lhs = None
@@ -85,14 +105,19 @@ def data_iter_maker():
else:
builders = incr_dbuilders(formula_like, data_iter_maker, depth)
lhs, rhs = build_design_matrices(builders, data)
- check_result(expect_full_designs, lhs, rhs, data,
- expected_rhs_values, expected_rhs_names,
- expected_lhs_values, expected_lhs_names)
+ check_result(
+ expect_full_designs,
+ lhs,
+ rhs,
+ data,
+ expected_rhs_values,
+ expected_rhs_names,
+ expected_lhs_values,
+ expected_lhs_names,
+ )
else:
- pytest.raises(PatsyError, incr_dbuilders,
- formula_like, data_iter_maker)
- pytest.raises(PatsyError, incr_dbuilder,
- formula_like, data_iter_maker)
+ pytest.raises(PatsyError, incr_dbuilders, formula_like, data_iter_maker)
+ pytest.raises(PatsyError, incr_dbuilder, formula_like, data_iter_maker)
one_mat_fs = [dmatrix]
two_mat_fs = [dmatrices]
if have_pandas:
@@ -101,9 +126,16 @@ def data_iter_maker():
if expected_lhs_values is None:
for f in one_mat_fs:
rhs = f(formula_like, data, depth)
- check_result(expect_full_designs, None, rhs, data,
- expected_rhs_values, expected_rhs_names,
- expected_lhs_values, expected_lhs_names)
+ check_result(
+ expect_full_designs,
+ None,
+ rhs,
+ data,
+ expected_rhs_values,
+ expected_rhs_names,
+ expected_lhs_values,
+ expected_lhs_names,
+ )
# We inline assert_raises here to avoid complications with the
# depth argument.
@@ -125,11 +157,19 @@ def data_iter_maker():
for f in two_mat_fs:
(lhs, rhs) = f(formula_like, data, depth)
- check_result(expect_full_designs, lhs, rhs, data,
- expected_rhs_values, expected_rhs_names,
- expected_lhs_values, expected_lhs_names)
-
-def t_invalid(formula_like, data, depth, exc=PatsyError): # pragma: no cover
+ check_result(
+ expect_full_designs,
+ lhs,
+ rhs,
+ data,
+ expected_rhs_values,
+ expected_rhs_names,
+ expected_lhs_values,
+ expected_lhs_names,
+ )
+
+
+def t_invalid(formula_like, data, depth, exc=PatsyError): # pragma: no cover
if isinstance(depth, int):
depth += 1
fs = [dmatrix, dmatrices]
@@ -143,52 +183,92 @@ def t_invalid(formula_like, data, depth, exc=PatsyError): # pragma: no cover
else:
raise AssertionError
+
# Exercise all the different calling conventions for the high-level API
def test_formula_likes():
# Plain array-like, rhs only
- t([[1, 2, 3], [4, 5, 6]], {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
- t((None, [[1, 2, 3], [4, 5, 6]]), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
- t(np.asarray([[1, 2, 3], [4, 5, 6]]), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
- t((None, np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
+ t([[1, 2, 3], [4, 5, 6]], {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
+ t(
+ (None, [[1, 2, 3], [4, 5, 6]]),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ )
+ t(
+ np.asarray([[1, 2, 3], [4, 5, 6]]),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ )
+ t(
+ (None, np.asarray([[1, 2, 3], [4, 5, 6]])),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ )
dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
- t(dm, {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
- t((None, dm), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
+ t(dm, {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
+ t((None, dm), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
# Plain array-likes, lhs and rhs
- t(([1, 2], [[1, 2, 3], [4, 5, 6]]), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
- [[1], [2]], ["y0"])
- t(([[1], [2]], [[1, 2, 3], [4, 5, 6]]), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
- [[1], [2]], ["y0"])
- t((np.asarray([1, 2]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
- [[1], [2]], ["y0"])
- t((np.asarray([[1], [2]]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
- [[1], [2]], ["y0"])
+ t(
+ ([1, 2], [[1, 2, 3], [4, 5, 6]]),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ [[1], [2]],
+ ["y0"],
+ )
+ t(
+ ([[1], [2]], [[1, 2, 3], [4, 5, 6]]),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ [[1], [2]],
+ ["y0"],
+ )
+ t(
+ (np.asarray([1, 2]), np.asarray([[1, 2, 3], [4, 5, 6]])),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ [[1], [2]],
+ ["y0"],
+ )
+ t(
+ (np.asarray([[1], [2]]), np.asarray([[1, 2, 3], [4, 5, 6]])),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["x0", "x1", "x2"],
+ [[1], [2]],
+ ["y0"],
+ )
x_dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
y_dm = DesignMatrix([1, 2], default_column_prefix="bar")
- t((y_dm, x_dm), {}, 0,
- False,
- [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"],
- [[1], [2]], ["bar0"])
+ t(
+ (y_dm, x_dm),
+ {},
+ 0,
+ False,
+ [[1, 2, 3], [4, 5, 6]],
+ ["foo0", "foo1", "foo2"],
+ [[1], [2]],
+ ["bar0"],
+ )
# number of rows must match
t_invalid(([1, 2, 3], [[1, 2, 3], [4, 5, 6]]), {}, 0)
@@ -199,132 +279,210 @@ def test_formula_likes():
# plain Series and DataFrames
if have_pandas:
# Names are extracted
- t(pandas.DataFrame({"x": [1, 2, 3]}), {}, 0,
- False,
- [[1], [2], [3]], ["x"])
- t(pandas.Series([1, 2, 3], name="asdf"), {}, 0,
- False,
- [[1], [2], [3]], ["asdf"])
- t((pandas.DataFrame({"y": [4, 5, 6]}),
- pandas.DataFrame({"x": [1, 2, 3]})), {}, 0,
- False,
- [[1], [2], [3]], ["x"],
- [[4], [5], [6]], ["y"])
- t((pandas.Series([4, 5, 6], name="y"),
- pandas.Series([1, 2, 3], name="x")), {}, 0,
- False,
- [[1], [2], [3]], ["x"],
- [[4], [5], [6]], ["y"])
+ t(pandas.DataFrame({"x": [1, 2, 3]}), {}, 0, False, [[1], [2], [3]], ["x"])
+ t(
+ pandas.Series([1, 2, 3], name="asdf"),
+ {},
+ 0,
+ False,
+ [[1], [2], [3]],
+ ["asdf"],
+ )
+ t(
+ (pandas.DataFrame({"y": [4, 5, 6]}), pandas.DataFrame({"x": [1, 2, 3]})),
+ {},
+ 0,
+ False,
+ [[1], [2], [3]],
+ ["x"],
+ [[4], [5], [6]],
+ ["y"],
+ )
+ t(
+ (pandas.Series([4, 5, 6], name="y"), pandas.Series([1, 2, 3], name="x")),
+ {},
+ 0,
+ False,
+ [[1], [2], [3]],
+ ["x"],
+ [[4], [5], [6]],
+ ["y"],
+ )
# Or invented
- t((pandas.DataFrame([[4, 5, 6]]),
- pandas.DataFrame([[1, 2, 3]], columns=[7, 8, 9])), {}, 0,
- False,
- [[1, 2, 3]], ["x7", "x8", "x9"],
- [[4, 5, 6]], ["y0", "y1", "y2"])
- t(pandas.Series([1, 2, 3]), {}, 0,
- False,
- [[1], [2], [3]], ["x0"])
+ t(
+ (
+ pandas.DataFrame([[4, 5, 6]]),
+ pandas.DataFrame([[1, 2, 3]], columns=[7, 8, 9]),
+ ),
+ {},
+ 0,
+ False,
+ [[1, 2, 3]],
+ ["x7", "x8", "x9"],
+ [[4, 5, 6]],
+ ["y0", "y1", "y2"],
+ )
+ t(pandas.Series([1, 2, 3]), {}, 0, False, [[1], [2], [3]], ["x0"])
# indices must match
- t_invalid((pandas.DataFrame([[1]], index=[1]),
- pandas.DataFrame([[1]], index=[2])),
- {}, 0)
+ t_invalid(
+ (pandas.DataFrame([[1]], index=[1]), pandas.DataFrame([[1]], index=[2])),
+ {},
+ 0,
+ )
# Foreign ModelDesc factories
class ForeignModelSource(object):
def __patsy_get_model_desc__(self, data):
- return ModelDesc([Term([LookupFactor("Y")])],
- [Term([LookupFactor("X")])])
+ return ModelDesc([Term([LookupFactor("Y")])], [Term([LookupFactor("X")])])
+
foreign_model = ForeignModelSource()
- t(foreign_model,
- {"Y": [1, 2],
- "X": [[1, 2], [3, 4]]},
- 0,
- True,
- [[1, 2], [3, 4]], ["X[0]", "X[1]"],
- [[1], [2]], ["Y"])
+ t(
+ foreign_model,
+ {"Y": [1, 2], "X": [[1, 2], [3, 4]]},
+ 0,
+ True,
+ [[1, 2], [3, 4]],
+ ["X[0]", "X[1]"],
+ [[1], [2]],
+ ["Y"],
+ )
+
class BadForeignModelSource(object):
def __patsy_get_model_desc__(self, data):
return data
+
t_invalid(BadForeignModelSource(), {}, 0)
# string formulas
- t("y ~ x", {"y": [1, 2], "x": [3, 4]}, 0,
- True,
- [[1, 3], [1, 4]], ["Intercept", "x"],
- [[1], [2]], ["y"])
- t("~ x", {"y": [1, 2], "x": [3, 4]}, 0,
- True,
- [[1, 3], [1, 4]], ["Intercept", "x"])
- t("x + y", {"y": [1, 2], "x": [3, 4]}, 0,
- True,
- [[1, 3, 1], [1, 4, 2]], ["Intercept", "x", "y"])
+ t(
+ "y ~ x",
+ {"y": [1, 2], "x": [3, 4]},
+ 0,
+ True,
+ [[1, 3], [1, 4]],
+ ["Intercept", "x"],
+ [[1], [2]],
+ ["y"],
+ )
+ t("~ x", {"y": [1, 2], "x": [3, 4]}, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"])
+ t(
+ "x + y",
+ {"y": [1, 2], "x": [3, 4]},
+ 0,
+ True,
+ [[1, 3, 1], [1, 4, 2]],
+ ["Intercept", "x", "y"],
+ )
# ModelDesc
desc = ModelDesc([], [Term([LookupFactor("x")])])
- t(desc, {"x": [1.5, 2.5, 3.5]}, 0,
- True,
- [[1.5], [2.5], [3.5]], ["x"])
+ t(desc, {"x": [1.5, 2.5, 3.5]}, 0, True, [[1.5], [2.5], [3.5]], ["x"])
desc = ModelDesc([], [Term([]), Term([LookupFactor("x")])])
- t(desc, {"x": [1.5, 2.5, 3.5]}, 0,
- True,
- [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"])
- desc = ModelDesc([Term([LookupFactor("y")])],
- [Term([]), Term([LookupFactor("x")])])
- t(desc, {"x": [1.5, 2.5, 3.5], "y": [10, 20, 30]}, 0,
- True,
- [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"],
- [[10], [20], [30]], ["y"])
+ t(
+ desc,
+ {"x": [1.5, 2.5, 3.5]},
+ 0,
+ True,
+ [[1, 1.5], [1, 2.5], [1, 3.5]],
+ ["Intercept", "x"],
+ )
+ desc = ModelDesc([Term([LookupFactor("y")])], [Term([]), Term([LookupFactor("x")])])
+ t(
+ desc,
+ {"x": [1.5, 2.5, 3.5], "y": [10, 20, 30]},
+ 0,
+ True,
+ [[1, 1.5], [1, 2.5], [1, 3.5]],
+ ["Intercept", "x"],
+ [[10], [20], [30]],
+ ["y"],
+ )
# builders
- termlists = ([],
- [Term([LookupFactor("x")])],
- [Term([]), Term([LookupFactor("x")])],
- )
- builders = design_matrix_builders(termlists,
- lambda: iter([{"x": [1, 2, 3]}]),
- eval_env=0)
+ termlists = (
+ [],
+ [Term([LookupFactor("x")])],
+ [Term([]), Term([LookupFactor("x")])],
+ )
+ builders = design_matrix_builders(
+ termlists, lambda: iter([{"x": [1, 2, 3]}]), eval_env=0
+ )
# twople but with no LHS
- t((builders[0], builders[2]), {"x": [10, 20, 30]}, 0,
- True,
- [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"])
+ t(
+ (builders[0], builders[2]),
+ {"x": [10, 20, 30]},
+ 0,
+ True,
+ [[1, 10], [1, 20], [1, 30]],
+ ["Intercept", "x"],
+ )
# single DesignInfo
- t(builders[2], {"x": [10, 20, 30]}, 0,
- True,
- [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"])
+ t(
+ builders[2],
+ {"x": [10, 20, 30]},
+ 0,
+ True,
+ [[1, 10], [1, 20], [1, 30]],
+ ["Intercept", "x"],
+ )
# twople with LHS
- t((builders[1], builders[2]), {"x": [10, 20, 30]}, 0,
- True,
- [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"],
- [[10], [20], [30]], ["x"])
+ t(
+ (builders[1], builders[2]),
+ {"x": [10, 20, 30]},
+ 0,
+ True,
+ [[1, 10], [1, 20], [1, 30]],
+ ["Intercept", "x"],
+ [[10], [20], [30]],
+ ["x"],
+ )
# check depth arguments
x_in_env = [1, 2, 3]
- t("~ x_in_env", {}, 0,
- True,
- [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
- t("~ x_in_env", {"x_in_env": [10, 20, 30]}, 0,
- True,
- [[1, 10], [1, 20], [1, 30]], ["Intercept", "x_in_env"])
+ t("~ x_in_env", {}, 0, True, [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
+ t(
+ "~ x_in_env",
+ {"x_in_env": [10, 20, 30]},
+ 0,
+ True,
+ [[1, 10], [1, 20], [1, 30]],
+ ["Intercept", "x_in_env"],
+ )
# Trying to pull x_in_env out of our *caller* shouldn't work.
t_invalid("~ x_in_env", {}, 1, exc=(NameError, PatsyError))
+
# But then again it should, if called from one down on the stack:
def check_nested_call():
x_in_env = "asdf"
- t("~ x_in_env", {}, 1,
- True,
- [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
+ t(
+ "~ x_in_env",
+ {},
+ 1,
+ True,
+ [[1, 1], [1, 2], [1, 3]],
+ ["Intercept", "x_in_env"],
+ )
+
check_nested_call()
# passing in an explicit EvalEnvironment also works:
e = EvalEnvironment.capture(1)
t_invalid("~ x_in_env", {}, e, exc=(NameError, PatsyError))
e = EvalEnvironment.capture(0)
+
def check_nested_call_2():
x_in_env = "asdf"
- t("~ x_in_env", {}, e,
- True,
- [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
+ t(
+ "~ x_in_env",
+ {},
+ e,
+ True,
+ [[1, 1], [1, 2], [1, 3]],
+ ["Intercept", "x_in_env"],
+ )
+
check_nested_call_2()
+
def test_return_pandas():
if not have_pandas:
return
@@ -369,166 +527,220 @@ def test_return_pandas():
assert np.array_equal(df10.index, s1.index)
# pandas must be available
import patsy.highlevel
+
had_pandas = patsy.highlevel.have_pandas
try:
patsy.highlevel.have_pandas = False
- pytest.raises(PatsyError,
- dmatrix, "x", {"x": [1]}, 0, return_type="dataframe")
- pytest.raises(PatsyError,
- dmatrices, "y ~ x", {"x": [1], "y": [2]}, 0,
- return_type="dataframe")
+ pytest.raises(PatsyError, dmatrix, "x", {"x": [1]}, 0, return_type="dataframe")
+ pytest.raises(
+ PatsyError,
+ dmatrices,
+ "y ~ x",
+ {"x": [1], "y": [2]},
+ 0,
+ return_type="dataframe",
+ )
finally:
patsy.highlevel.have_pandas = had_pandas
+
def test_term_info():
data = balanced(a=2, b=2)
rhs = dmatrix("a:b", data)
- assert rhs.design_info.column_names == ["Intercept", "b[T.b2]",
- "a[T.a2]:b[b1]", "a[T.a2]:b[b2]"]
+ assert rhs.design_info.column_names == [
+ "Intercept",
+ "b[T.b2]",
+ "a[T.a2]:b[b1]",
+ "a[T.a2]:b[b2]",
+ ]
assert rhs.design_info.term_names == ["Intercept", "a:b"]
assert len(rhs.design_info.terms) == 2
assert rhs.design_info.terms[0] == INTERCEPT
+
def test_data_types():
- data = {"a": [1, 2, 3],
- "b": [1.0, 2.0, 3.0],
- "c": np.asarray([1, 2, 3], dtype=np.float32),
- "d": [True, False, True],
- "e": ["foo", "bar", "baz"],
- "f": C([1, 2, 3]),
- "g": C(["foo", "bar", "baz"]),
- "h": np.array(["foo", 1, (1, "hi")], dtype=object),
- }
- t("~ 0 + a", data, 0, True,
- [[1], [2], [3]], ["a"])
- t("~ 0 + b", data, 0, True,
- [[1], [2], [3]], ["b"])
- t("~ 0 + c", data, 0, True,
- [[1], [2], [3]], ["c"])
- t("~ 0 + d", data, 0, True,
- [[0, 1], [1, 0], [0, 1]], ["d[False]", "d[True]"])
- t("~ 0 + e", data, 0, True,
- [[0, 0, 1], [1, 0, 0], [0, 1, 0]], ["e[bar]", "e[baz]", "e[foo]"])
- t("~ 0 + f", data, 0, True,
- [[1, 0, 0], [0, 1, 0], [0, 0, 1]], ["f[1]", "f[2]", "f[3]"])
- t("~ 0 + g", data, 0, True,
- [[0, 0, 1], [1, 0, 0], [0, 1, 0]], ["g[bar]", "g[baz]", "g[foo]"])
+ data = {
+ "a": [1, 2, 3],
+ "b": [1.0, 2.0, 3.0],
+ "c": np.asarray([1, 2, 3], dtype=np.float32),
+ "d": [True, False, True],
+ "e": ["foo", "bar", "baz"],
+ "f": C([1, 2, 3]),
+ "g": C(["foo", "bar", "baz"]),
+ "h": np.array(["foo", 1, (1, "hi")], dtype=object),
+ }
+ t("~ 0 + a", data, 0, True, [[1], [2], [3]], ["a"])
+ t("~ 0 + b", data, 0, True, [[1], [2], [3]], ["b"])
+ t("~ 0 + c", data, 0, True, [[1], [2], [3]], ["c"])
+ t("~ 0 + d", data, 0, True, [[0, 1], [1, 0], [0, 1]], ["d[False]", "d[True]"])
+ t(
+ "~ 0 + e",
+ data,
+ 0,
+ True,
+ [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+ ["e[bar]", "e[baz]", "e[foo]"],
+ )
+ t(
+ "~ 0 + f",
+ data,
+ 0,
+ True,
+ [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+ ["f[1]", "f[2]", "f[3]"],
+ )
+ t(
+ "~ 0 + g",
+ data,
+ 0,
+ True,
+ [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+ ["g[bar]", "g[baz]", "g[foo]"],
+ )
# This depends on Python's sorting behavior:
- t("~ 0 + h", data, 0, True,
- [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
- ["h[1]", "h[foo]", "h[(1, 'hi')]"])
+ t(
+ "~ 0 + h",
+ data,
+ 0,
+ True,
+ [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
+ ["h[1]", "h[foo]", "h[(1, 'hi')]"],
+ )
+
def test_categorical():
data = balanced(a=2, b=2)
# There are more exhaustive tests for all the different coding options in
# test_build; let's just make sure that C() and stuff works.
- t("~ C(a)", data, 0,
- True,
- [[1, 0], [1, 0], [1, 1], [1, 1]], ["Intercept", "C(a)[T.a2]"])
- t("~ C(a, levels=['a2', 'a1'])", data, 0,
- True,
- [[1, 1], [1, 1], [1, 0], [1, 0]],
- ["Intercept", "C(a, levels=['a2', 'a1'])[T.a1]"])
- t("~ C(a, Treatment(reference=-1))", data, 0,
- True,
- [[1, 1], [1, 1], [1, 0], [1, 0]],
- ["Intercept", "C(a, Treatment(reference=-1))[T.a1]"])
+ t(
+ "~ C(a)",
+ data,
+ 0,
+ True,
+ [[1, 0], [1, 0], [1, 1], [1, 1]],
+ ["Intercept", "C(a)[T.a2]"],
+ )
+ t(
+ "~ C(a, levels=['a2', 'a1'])",
+ data,
+ 0,
+ True,
+ [[1, 1], [1, 1], [1, 0], [1, 0]],
+ ["Intercept", "C(a, levels=['a2', 'a1'])[T.a1]"],
+ )
+ t(
+ "~ C(a, Treatment(reference=-1))",
+ data,
+ 0,
+ True,
+ [[1, 1], [1, 1], [1, 0], [1, 0]],
+ ["Intercept", "C(a, Treatment(reference=-1))[T.a1]"],
+ )
# Different interactions
- t("a*b", data, 0,
- True,
- [[1, 0, 0, 0],
- [1, 0, 1, 0],
- [1, 1, 0, 0],
- [1, 1, 1, 1]],
- ["Intercept", "a[T.a2]", "b[T.b2]", "a[T.a2]:b[T.b2]"])
- t("0 + a:b", data, 0,
- True,
- [[1, 0, 0, 0],
- [0, 0, 1, 0],
- [0, 1, 0, 0],
- [0, 0, 0, 1]],
- ["a[a1]:b[b1]", "a[a2]:b[b1]", "a[a1]:b[b2]", "a[a2]:b[b2]"])
- t("1 + a + a:b", data, 0,
- True,
- [[1, 0, 0, 0],
- [1, 0, 1, 0],
- [1, 1, 0, 0],
- [1, 1, 0, 1]],
- ["Intercept", "a[T.a2]", "a[a1]:b[T.b2]", "a[a2]:b[T.b2]"])
+ t(
+ "a*b",
+ data,
+ 0,
+ True,
+ [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 1, 1]],
+ ["Intercept", "a[T.a2]", "b[T.b2]", "a[T.a2]:b[T.b2]"],
+ )
+ t(
+ "0 + a:b",
+ data,
+ 0,
+ True,
+ [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]],
+ ["a[a1]:b[b1]", "a[a2]:b[b1]", "a[a1]:b[b2]", "a[a2]:b[b2]"],
+ )
+ t(
+ "1 + a + a:b",
+ data,
+ 0,
+ True,
+ [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 0, 1]],
+ ["Intercept", "a[T.a2]", "a[a1]:b[T.b2]", "a[a2]:b[T.b2]"],
+ )
# Changing contrast with C()
data["a"] = C(data["a"], Helmert)
- t("a", data, 0,
- True,
- [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"])
- t("C(a, Treatment)", data, 0,
- True,
- [[1, 0], [1, 0], [1, 1], [1, 1]], ["Intercept", "C(a, Treatment)[T.a2]"])
+ t("a", data, 0, True, [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"])
+ t(
+ "C(a, Treatment)",
+ data,
+ 0,
+ True,
+ [[1, 0], [1, 0], [1, 1], [1, 1]],
+ ["Intercept", "C(a, Treatment)[T.a2]"],
+ )
# That didn't affect the original object
- t("a", data, 0,
- True,
- [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"])
+ t("a", data, 0, True, [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"])
+
def test_builtins():
- data = {"x": [1, 2, 3],
- "y": [4, 5, 6],
- "a b c": [10, 20, 30]}
- t("0 + I(x + y)", data, 0,
- True,
- [[1], [2], [3], [4], [5], [6]], ["I(x + y)"])
- t("Q('a b c')", data, 0,
- True,
- [[1, 10], [1, 20], [1, 30]], ["Intercept", "Q('a b c')"])
- t("center(x)", data, 0,
- True,
- [[1, -1], [1, 0], [1, 1]], ["Intercept", "center(x)"])
+ data = {"x": [1, 2, 3], "y": [4, 5, 6], "a b c": [10, 20, 30]}
+ t("0 + I(x + y)", data, 0, True, [[1], [2], [3], [4], [5], [6]], ["I(x + y)"])
+ t(
+ "Q('a b c')",
+ data,
+ 0,
+ True,
+ [[1, 10], [1, 20], [1, 30]],
+ ["Intercept", "Q('a b c')"],
+ )
+ t("center(x)", data, 0, True, [[1, -1], [1, 0], [1, 1]], ["Intercept", "center(x)"])
+
def test_incremental():
# incr_dbuilder(s)
# stateful transformations
datas = [
- {"a": ["a2", "a2", "a2"],
- "x": [1, 2, 3]},
- {"a": ["a2", "a2", "a1"],
- "x": [4, 5, 6]},
- ]
+ {"a": ["a2", "a2", "a2"], "x": [1, 2, 3]},
+ {"a": ["a2", "a2", "a1"], "x": [4, 5, 6]},
+ ]
x = np.asarray([1, 2, 3, 4, 5, 6])
sin_center_x = np.sin(x - np.mean(x))
x_col = sin_center_x - np.mean(sin_center_x)
+
def data_iter_maker():
return iter(datas)
- builders = incr_dbuilders("1 ~ a + center(np.sin(center(x)))",
- data_iter_maker)
+
+ builders = incr_dbuilders("1 ~ a + center(np.sin(center(x)))", data_iter_maker)
lhs, rhs = build_design_matrices(builders, datas[1])
assert lhs.design_info.column_names == ["Intercept"]
- assert rhs.design_info.column_names == ["Intercept",
- "a[T.a2]",
- "center(np.sin(center(x)))"]
+ assert rhs.design_info.column_names == [
+ "Intercept",
+ "a[T.a2]",
+ "center(np.sin(center(x)))",
+ ]
assert np.allclose(lhs, [[1], [1], [1]])
- assert np.allclose(rhs, np.column_stack(([1, 1, 1],
- [1, 1, 0],
- x_col[3:])))
+ assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))
- builder = incr_dbuilder("~ a + center(np.sin(center(x)))",
- data_iter_maker)
+ builder = incr_dbuilder("~ a + center(np.sin(center(x)))", data_iter_maker)
(rhs,) = build_design_matrices([builder], datas[1])
- assert rhs.design_info.column_names == ["Intercept",
- "a[T.a2]",
- "center(np.sin(center(x)))"]
+ assert rhs.design_info.column_names == [
+ "Intercept",
+ "a[T.a2]",
+ "center(np.sin(center(x)))",
+ ]
assert np.allclose(lhs, [[1], [1], [1]])
- assert np.allclose(rhs, np.column_stack(([1, 1, 1],
- [1, 1, 0],
- x_col[3:])))
+ assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))
pytest.raises(PatsyError, incr_dbuilder, "x ~ x", data_iter_maker)
pytest.raises(PatsyError, incr_dbuilders, "x", data_iter_maker)
+
def test_env_transform():
- t("~ np.sin(x)", {"x": [1, 2, 3]}, 0,
- True,
- [[1, np.sin(1)], [1, np.sin(2)], [1, np.sin(3)]],
- ["Intercept", "np.sin(x)"])
+ t(
+ "~ np.sin(x)",
+ {"x": [1, 2, 3]},
+ 0,
+ True,
+ [[1, np.sin(1)], [1, np.sin(2)], [1, np.sin(3)]],
+ ["Intercept", "np.sin(x)"],
+ )
+
# Term ordering:
# 1) all 0-order no-numeric
@@ -553,16 +765,14 @@ def t_terms(formula, order):
t_terms("b + a + x2 + x1", ["Intercept", "b", "a", "x2", "x1"])
t_terms("0 + x1 + a + x2 + b + 1", ["Intercept", "a", "b", "x1", "x2"])
t_terms("0 + a:b + a + b + 1", ["Intercept", "a", "b", "a:b"])
- t_terms("a + a:x1 + x2 + x1 + b",
- ["Intercept", "a", "b", "x1", "a:x1", "x2"])
- t_terms("0 + a:x1:x2 + a + x2:x1:b + x2 + x1 + a:x1 + x1:x2 + x1:a:x2:a:b",
- ["a",
- "x1:x2", "a:x1:x2", "x2:x1:b", "x1:a:x2:b",
- "x2",
- "x1",
- "a:x1"])
-
-def _check_division(expect_true_division): # pragma: no cover
+ t_terms("a + a:x1 + x2 + x1 + b", ["Intercept", "a", "b", "x1", "a:x1", "x2"])
+ t_terms(
+ "0 + a:x1:x2 + a + x2:x1:b + x2 + x1 + a:x1 + x1:x2 + x1:a:x2:a:b",
+ ["a", "x1:x2", "a:x1:x2", "x2:x1:b", "x1:a:x2:b", "x2", "x1", "a:x1"],
+ )
+
+
+def _check_division(expect_true_division): # pragma: no cover
# We evaluate the formula "I(x / y)" in our *caller's* scope, so the
# result depends on whether our caller has done 'from __future__ import
# division'.
@@ -573,25 +783,51 @@ def _check_division(expect_true_division): # pragma: no cover
else:
assert np.allclose(m, [[2]])
+
def test_multicolumn():
data = {
"a": ["a1", "a2"],
"X": [[1, 2], [3, 4]],
"Y": [[1, 3], [2, 4]],
- }
- t("X*Y", data, 0,
- True,
- [[1, 1, 2, 1, 3, 1 * 1, 2 * 1, 1 * 3, 2 * 3],
- [1, 3, 4, 2, 4, 3 * 2, 4 * 2, 3 * 4, 4 * 4]],
- ["Intercept", "X[0]", "X[1]", "Y[0]", "Y[1]",
- "X[0]:Y[0]", "X[1]:Y[0]", "X[0]:Y[1]", "X[1]:Y[1]"])
- t("a:X + Y", data, 0,
- True,
- [[1, 1, 0, 2, 0, 1, 3],
- [1, 0, 3, 0, 4, 2, 4]],
- ["Intercept",
- "a[a1]:X[0]", "a[a2]:X[0]", "a[a1]:X[1]", "a[a2]:X[1]",
- "Y[0]", "Y[1]"])
+ }
+ t(
+ "X*Y",
+ data,
+ 0,
+ True,
+ [
+ [1, 1, 2, 1, 3, 1 * 1, 2 * 1, 1 * 3, 2 * 3],
+ [1, 3, 4, 2, 4, 3 * 2, 4 * 2, 3 * 4, 4 * 4],
+ ],
+ [
+ "Intercept",
+ "X[0]",
+ "X[1]",
+ "Y[0]",
+ "Y[1]",
+ "X[0]:Y[0]",
+ "X[1]:Y[0]",
+ "X[0]:Y[1]",
+ "X[1]:Y[1]",
+ ],
+ )
+ t(
+ "a:X + Y",
+ data,
+ 0,
+ True,
+ [[1, 1, 0, 2, 0, 1, 3], [1, 0, 3, 0, 4, 2, 4]],
+ [
+ "Intercept",
+ "a[a1]:X[0]",
+ "a[a2]:X[0]",
+ "a[a1]:X[1]",
+ "a[a2]:X[1]",
+ "Y[0]",
+ "Y[1]",
+ ],
+ )
+
def test_dmatrix_dmatrices_no_data():
x = [1, 2, 3]
@@ -601,19 +837,22 @@ def test_dmatrix_dmatrices_no_data():
assert np.allclose(lhs, [[4], [5], [6]])
assert np.allclose(rhs, [[1, 1], [1, 2], [1, 3]])
+
def test_designinfo_describe():
- lhs, rhs = dmatrices("y ~ x + a", {"y": [1, 2, 3],
- "x": [4, 5, 6],
- "a": ["a1", "a2", "a3"]})
+ lhs, rhs = dmatrices(
+ "y ~ x + a", {"y": [1, 2, 3], "x": [4, 5, 6], "a": ["a1", "a2", "a3"]}
+ )
assert lhs.design_info.describe() == "y"
assert rhs.design_info.describe() == "1 + a + x"
+
def test_evalfactor_reraise():
# This will produce a PatsyError, but buried inside the factor evaluation,
# so the original code has no way to give it an appropriate origin=
# attribute. EvalFactor should notice this, and add a useful origin:
def raise_patsy_error(x):
raise PatsyError("WHEEEEEE")
+
formula = "raise_patsy_error(X) + Y"
try:
dmatrix(formula, {"X": [1, 2, 3], "Y": [4, 5, 6]})
@@ -632,6 +871,7 @@ def raise_patsy_error(x):
else:
assert False
+
def test_dmatrix_NA_action():
data = {"x": [1, 2, 3, np.nan], "y": [np.nan, 20, 30, 40]}
@@ -641,13 +881,17 @@ def test_dmatrix_NA_action():
for return_type in return_types:
mat = dmatrix("x + y", data=data, return_type=return_type)
- assert np.array_equal(mat, [[1, 2, 20],
- [1, 3, 30]])
+ assert np.array_equal(mat, [[1, 2, 20], [1, 3, 30]])
if return_type == "dataframe":
assert mat.index.equals(pandas.Index([1, 2]))
- pytest.raises(PatsyError, dmatrix, "x + y", data=data,
- return_type=return_type,
- NA_action="raise")
+ pytest.raises(
+ PatsyError,
+ dmatrix,
+ "x + y",
+ data=data,
+ return_type=return_type,
+ NA_action="raise",
+ )
lmat, rmat = dmatrices("y ~ x", data=data, return_type=return_type)
assert np.array_equal(lmat, [[20], [30]])
@@ -655,9 +899,14 @@ def test_dmatrix_NA_action():
if return_type == "dataframe":
assert lmat.index.equals(pandas.Index([1, 2]))
assert rmat.index.equals(pandas.Index([1, 2]))
- pytest.raises(PatsyError,
- dmatrices, "y ~ x", data=data, return_type=return_type,
- NA_action="raise")
+ pytest.raises(
+ PatsyError,
+ dmatrices,
+ "y ~ x",
+ data=data,
+ return_type=return_type,
+ NA_action="raise",
+ )
# Initial release for the NA handling code had problems with
# non-data-dependent matrices like "~ 1".
@@ -667,31 +916,38 @@ def test_dmatrix_NA_action():
if return_type == "dataframe":
assert lmat.index.equals(pandas.Index([1, 2, 3]))
assert rmat.index.equals(pandas.Index([1, 2, 3]))
- pytest.raises(PatsyError,
- dmatrices, "y ~ 1", data=data, return_type=return_type,
- NA_action="raise")
+ pytest.raises(
+ PatsyError,
+ dmatrices,
+ "y ~ 1",
+ data=data,
+ return_type=return_type,
+ NA_action="raise",
+ )
+
def test_0d_data():
# Use case from statsmodels/statsmodels#1881
data_0d = {"x1": 1.1, "x2": 1.2, "a": "a1"}
for formula, expected in [
- ("x1 + x2", [[1, 1.1, 1.2]]),
- ("C(a, levels=('a1', 'a2')) + x1", [[1, 0, 1.1]]),
- ]:
+ ("x1 + x2", [[1, 1.1, 1.2]]),
+ ("C(a, levels=('a1', 'a2')) + x1", [[1, 0, 1.1]]),
+ ]:
mat = dmatrix(formula, data_0d)
assert np.allclose(mat, expected)
- assert np.allclose(build_design_matrices([mat.design_info],
- data_0d)[0],
- expected)
+ assert np.allclose(
+ build_design_matrices([mat.design_info], data_0d)[0], expected
+ )
if have_pandas:
data_series = pandas.Series(data_0d)
assert np.allclose(dmatrix(formula, data_series), expected)
- assert np.allclose(build_design_matrices([mat.design_info],
- data_series)[0],
- expected)
+ assert np.allclose(
+ build_design_matrices([mat.design_info], data_series)[0], expected
+ )
+
def test_env_not_saved_in_builder():
x_in_env = [1, 2, 3]
@@ -702,6 +958,7 @@ def test_env_not_saved_in_builder():
assert np.allclose(design_matrix, design_matrix2)
+
def test_C_and_pandas_categorical():
if not have_pandas_categorical:
return
@@ -711,22 +968,14 @@ def test_C_and_pandas_categorical():
objs.append(pandas.Series(objs[0]))
for obj in objs:
d = {"obj": obj}
- assert np.allclose(dmatrix("obj", d),
- [[1, 1],
- [1, 0],
- [1, 1]])
-
- assert np.allclose(dmatrix("C(obj)", d),
- [[1, 1],
- [1, 0],
- [1, 1]])
-
- assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d),
- [[1, 1],
- [1, 0],
- [1, 1]])
-
- assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d),
- [[1, 0],
- [1, 1],
- [1, 0]])
+ assert np.allclose(dmatrix("obj", d), [[1, 1], [1, 0], [1, 1]])
+
+ assert np.allclose(dmatrix("C(obj)", d), [[1, 1], [1, 0], [1, 1]])
+
+ assert np.allclose(
+ dmatrix("C(obj, levels=['b', 'a'])", d), [[1, 1], [1, 0], [1, 1]]
+ )
+
+ assert np.allclose(
+ dmatrix("C(obj, levels=['a', 'b'])", d), [[1, 0], [1, 1], [1, 0]]
+ )
diff --git a/patsy/test_regressions.py b/patsy/test_regressions.py
index 8ab2d6d..2760846 100644
--- a/patsy/test_regressions.py
+++ b/patsy/test_regressions.py
@@ -5,16 +5,16 @@
# Regression tests for fixed bugs (when not otherwise better covered somewhere
# else)
-from patsy import (EvalEnvironment, dmatrix, build_design_matrices,
- PatsyError, Origin)
+from patsy import EvalEnvironment, dmatrix, build_design_matrices, PatsyError, Origin
+
def test_issue_11():
# Give a sensible error message for level mismatches
# (At some points we've failed to put an origin= on these errors)
env = EvalEnvironment.capture()
- data = {"X" : [0,1,2,3], "Y" : [1,2,3,4]}
+ data = {"X": [0, 1, 2, 3], "Y": [1, 2, 3, 4]}
formula = "C(X) + Y"
- new_data = {"X" : [0,0,1,2,3,3,4], "Y" : [1,2,3,4,5,6,7]}
+ new_data = {"X": [0, 0, 1, 2, 3, 3, 4], "Y": [1, 2, 3, 4, 5, 6, 7]}
info = dmatrix(formula, data)
try:
build_design_matrices([info.design_info], new_data)
diff --git a/patsy/test_splines_bs_data.py b/patsy/test_splines_bs_data.py
index ba02495..6b233fc 100644
--- a/patsy/test_splines_bs_data.py
+++ b/patsy/test_splines_bs_data.py
@@ -1,7 +1,31 @@
# This file auto-generated by tools/get-R-bs-test-vectors.R
# Using: R version 2.15.1 (2012-06-22)
import numpy as np
-R_bs_test_x = np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.893890380859375, 656.8408355712890625, 985.26125335693359375, 1477.8918800354003906, 2216.8378200531005859, ])
+
+R_bs_test_x = np.array(
+ [
+ 1,
+ 1.5,
+ 2.25,
+ 3.375,
+ 5.0625,
+ 7.59375,
+ 11.390625,
+ 17.0859375,
+ 25.62890625,
+ 38.443359375,
+ 57.6650390625,
+ 86.49755859375,
+ 129.746337890625,
+ 194.6195068359375,
+ 291.92926025390625,
+ 437.893890380859375,
+ 656.8408355712890625,
+ 985.26125335693359375,
+ 1477.8918800354003906,
+ 2216.8378200531005859,
+ ]
+)
R_bs_test_data = """
--BEGIN TEST CASE--
degree=1
diff --git a/patsy/test_splines_crs_data.py b/patsy/test_splines_crs_data.py
index a0dcaa2..3b85f71 100644
--- a/patsy/test_splines_crs_data.py
+++ b/patsy/test_splines_crs_data.py
@@ -1,7 +1,31 @@
# This file auto-generated by tools/get-R-crs-test-vectors.R
# Using: R version 3.0.3 (2014-03-06) and package 'mgcv' version 1.7.28
import numpy as np
-R_crs_test_x = np.array([1, -1.5, 2.25, -3.375, 5.0625, -7.59375, 11.390625, -17.0859375, 25.628906250000000000, -38.443359375000000000, 57.665039062500000000, -86.497558593750000000, 129.74633789062500000, -194.6195068359375, 291.92926025390625000, -437.89389038085937500, 656.84083557128906250, -985.26125335693359375, 1477.8918800354003906, -2216.8378200531005859, ])
+
+R_crs_test_x = np.array(
+ [
+ 1,
+ -1.5,
+ 2.25,
+ -3.375,
+ 5.0625,
+ -7.59375,
+ 11.390625,
+ -17.0859375,
+ 25.628906250000000000,
+ -38.443359375000000000,
+ 57.665039062500000000,
+ -86.497558593750000000,
+ 129.74633789062500000,
+ -194.6195068359375,
+ 291.92926025390625000,
+ -437.89389038085937500,
+ 656.84083557128906250,
+ -985.26125335693359375,
+ 1477.8918800354003906,
+ -2216.8378200531005859,
+ ]
+)
R_crs_test_data = """
--BEGIN TEST CASE--
spline_type=cr
diff --git a/patsy/test_state.py b/patsy/test_state.py
index 3c04611..2c5a8e8 100644
--- a/patsy/test_state.py
+++ b/patsy/test_state.py
@@ -7,6 +7,7 @@
from patsy.state import Center, Standardize, center
from patsy.util import atleast_2d_column_default
+
def check_stateful(cls, accepts_multicolumn, input, output, *args, **kwargs):
input = np.asarray(input)
output = np.asarray(output)
@@ -27,19 +28,25 @@ def check_stateful(cls, accepts_multicolumn, input, output, *args, **kwargs):
([np.array(input)[:, None]], atleast_2d_column_default(output)),
# 2-d but 1 column input, many chunks:
([np.array([[n]]) for n in input], atleast_2d_column_default(output)),
- ]
+ ]
if accepts_multicolumn:
# 2-d array input, one chunk:
test_cases += [
- ([np.column_stack((input, input[::-1]))],
- np.column_stack((output, output[::-1]))),
+ (
+ [np.column_stack((input, input[::-1]))],
+ np.column_stack((output, output[::-1])),
+ ),
# 2-d array input, many chunks:
- ([np.array([[input[i], input[-i-1]]]) for i in range(len(input))],
- np.column_stack((output, output[::-1]))),
- ]
+ (
+ [np.array([[input[i], input[-i - 1]]]) for i in range(len(input))],
+ np.column_stack((output, output[::-1])),
+ ),
+ ]
from patsy.util import have_pandas
+
if have_pandas:
import pandas
+
pandas_type = (pandas.Series, pandas.DataFrame)
pandas_index = np.linspace(0, 1, num=len(input))
# 1d and 2d here refer to the dimensionality of the input
@@ -51,24 +58,32 @@ def check_stateful(cls, accepts_multicolumn, input, output, *args, **kwargs):
# Series input, one chunk
([pandas.Series(input, index=pandas_index)], output_1d),
# Series input, many chunks
- ([pandas.Series([x], index=[idx])
- for (x, idx) in zip(input, pandas_index)],
- output_1d),
- ]
+ (
+ [
+ pandas.Series([x], index=[idx])
+ for (x, idx) in zip(input, pandas_index)
+ ],
+ output_1d,
+ ),
+ ]
if accepts_multicolumn:
input_2d_2col = np.column_stack((input, input[::-1]))
output_2d_2col = np.column_stack((output, output[::-1]))
- output_2col_dataframe = pandas.DataFrame(output_2d_2col,
- index=pandas_index)
+ output_2col_dataframe = pandas.DataFrame(output_2d_2col, index=pandas_index)
test_cases += [
# DataFrame input, one chunk
- ([pandas.DataFrame(input_2d_2col, index=pandas_index)],
- output_2col_dataframe),
+ (
+ [pandas.DataFrame(input_2d_2col, index=pandas_index)],
+ output_2col_dataframe,
+ ),
# DataFrame input, many chunks
- ([pandas.DataFrame([input_2d_2col[i, :]],
- index=[pandas_index[i]])
- for i in range(len(input))],
- output_2col_dataframe),
+ (
+ [
+ pandas.DataFrame([input_2d_2col[i, :]], index=[pandas_index[i]])
+ for i in range(len(input))
+ ],
+ output_2col_dataframe,
+ ),
]
for input_obj, output_obj in test_cases:
print(input_obj)
@@ -113,28 +128,31 @@ def check_stateful(cls, accepts_multicolumn, input, output, *args, **kwargs):
assert all_output2.ndim == all_input.ndim
assert np.allclose(all_output2, output_obj)
+
def test_Center():
check_stateful(Center, True, [1, 2, 3], [-1, 0, 1])
check_stateful(Center, True, [1, 2, 1, 2], [-0.5, 0.5, -0.5, 0.5])
- check_stateful(Center, True,
- [1.3, -10.1, 7.0, 12.0],
- [-1.25, -12.65, 4.45, 9.45])
+ check_stateful(Center, True, [1.3, -10.1, 7.0, 12.0], [-1.25, -12.65, 4.45, 9.45])
+
def test_stateful_transform_wrapper():
assert np.allclose(center([1, 2, 3]), [-1, 0, 1])
assert np.allclose(center([1, 2, 1, 2]), [-0.5, 0.5, -0.5, 0.5])
assert center([1.0, 2.0, 3.0]).dtype == np.dtype(float)
- assert (center(np.array([1.0, 2.0, 3.0], dtype=np.float32)).dtype
- == np.dtype(np.float32))
+ assert center(np.array([1.0, 2.0, 3.0], dtype=np.float32)).dtype == np.dtype(
+ np.float32
+ )
assert center([1, 2, 3]).dtype == np.dtype(float)
from patsy.util import have_pandas
+
if have_pandas:
import pandas
+
s = pandas.Series([1, 2, 3], index=["a", "b", "c"])
- df = pandas.DataFrame([[1, 2], [2, 4], [3, 6]],
- columns=["x1", "x2"],
- index=[10, 20, 30])
+ df = pandas.DataFrame(
+ [[1, 2], [2, 4], [3, 6]], columns=["x1", "x2"], index=[10, 20, 30]
+ )
s_c = center(s)
assert isinstance(s_c, pandas.Series)
assert np.array_equal(s_c.index, ["a", "b", "c"])
@@ -145,16 +163,17 @@ def test_stateful_transform_wrapper():
assert np.array_equal(df_c.columns, ["x1", "x2"])
assert np.allclose(df_c, [[-1, -2], [0, 0], [1, 2]])
+
def test_Standardize():
check_stateful(Standardize, True, [1, -1], [1, -1])
check_stateful(Standardize, True, [12, 10], [1, -1])
- check_stateful(Standardize, True,
- [12, 11, 10],
- [np.sqrt(3./2), 0, -np.sqrt(3./2)])
+ check_stateful(
+ Standardize, True, [12, 11, 10], [np.sqrt(3.0 / 2), 0, -np.sqrt(3.0 / 2)]
+ )
- check_stateful(Standardize, True,
- [12.0, 11.0, 10.0],
- [np.sqrt(3./2), 0, -np.sqrt(3./2)])
+ check_stateful(
+ Standardize, True, [12.0, 11.0, 10.0], [np.sqrt(3.0 / 2), 0, -np.sqrt(3.0 / 2)]
+ )
# XX: see the comment in Standardize.transform about why this doesn't
# work:
@@ -164,26 +183,25 @@ def test_Standardize():
r20 = list(range(20))
- check_stateful(Standardize, True, [1, -1], [np.sqrt(2)/2, -np.sqrt(2)/2],
- ddof=1)
-
- check_stateful(Standardize, True,
- r20,
- list((np.arange(20) - 9.5) / 5.7662812973353983),
- ddof=0)
- check_stateful(Standardize, True,
- r20,
- list((np.arange(20) - 9.5) / 5.9160797830996161),
- ddof=1)
- check_stateful(Standardize, True,
- r20,
- list((np.arange(20) - 9.5)),
- rescale=False, ddof=1)
- check_stateful(Standardize, True,
- r20,
- list(np.arange(20) / 5.9160797830996161),
- center=False, ddof=1)
- check_stateful(Standardize, True,
- r20,
- r20,
- center=False, rescale=False, ddof=1)
+ check_stateful(
+ Standardize, True, [1, -1], [np.sqrt(2) / 2, -np.sqrt(2) / 2], ddof=1
+ )
+
+ check_stateful(
+ Standardize, True, r20, list((np.arange(20) - 9.5) / 5.7662812973353983), ddof=0
+ )
+ check_stateful(
+ Standardize, True, r20, list((np.arange(20) - 9.5) / 5.9160797830996161), ddof=1
+ )
+ check_stateful(
+ Standardize, True, r20, list((np.arange(20) - 9.5)), rescale=False, ddof=1
+ )
+ check_stateful(
+ Standardize,
+ True,
+ r20,
+ list(np.arange(20) / 5.9160797830996161),
+ center=False,
+ ddof=1,
+ )
+ check_stateful(Standardize, True, r20, r20, center=False, rescale=False, ddof=1)
diff --git a/patsy/tokens.py b/patsy/tokens.py
index 542d464..9cc500c 100644
--- a/patsy/tokens.py
+++ b/patsy/tokens.py
@@ -17,8 +17,8 @@
from patsy import PatsyError
from patsy.origin import Origin
-__all__ = ["python_tokenize", "pretty_untokenize",
- "normalize_token_spacing"]
+__all__ = ["python_tokenize", "pretty_untokenize", "normalize_token_spacing"]
+
# A convenience wrapper around tokenize.generate_tokens. yields tuples
# (tokenize type, token string, origin object)
@@ -29,7 +29,7 @@ def python_tokenize(code):
code = code.replace("\n", " ").strip()
it = tokenize.generate_tokens(StringIO(code).readline)
try:
- for (pytype, string, (_, start), (_, end), code) in it:
+ for pytype, string, (_, start), (_, end), code in it:
if pytype == tokenize.ENDMARKER:
break
if pytype in (tokenize.NL, tokenize.NEWLINE):
@@ -37,13 +37,13 @@ def python_tokenize(code):
continue
origin = Origin(code, start, end)
if pytype == tokenize.ERRORTOKEN:
- raise PatsyError("error tokenizing input "
- "(maybe an unclosed string?)",
- origin)
+ raise PatsyError(
+ "error tokenizing input " "(maybe an unclosed string?)", origin
+ )
if pytype == tokenize.COMMENT:
raise PatsyError("comments are not allowed", origin)
yield (pytype, string, origin)
- else: # pragma: no cover
+ else: # pragma: no cover
raise ValueError("stream ended without ENDMARKER?!?")
except tokenize.TokenError as e:
# TokenError is raised iff the tokenizer thinks that there is
@@ -63,40 +63,55 @@ def python_tokenize(code):
assert "EOF in multi-line" in e.args[0]
return
+
def test_python_tokenize():
code = "a + (foo * -1)"
tokens = list(python_tokenize(code))
- expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
- (tokenize.OP, "+", Origin(code, 2, 3)),
- (tokenize.OP, "(", Origin(code, 4, 5)),
- (tokenize.NAME, "foo", Origin(code, 5, 8)),
- (tokenize.OP, "*", Origin(code, 9, 10)),
- (tokenize.OP, "-", Origin(code, 11, 12)),
- (tokenize.NUMBER, "1", Origin(code, 12, 13)),
- (tokenize.OP, ")", Origin(code, 13, 14))]
+ expected = [
+ (tokenize.NAME, "a", Origin(code, 0, 1)),
+ (tokenize.OP, "+", Origin(code, 2, 3)),
+ (tokenize.OP, "(", Origin(code, 4, 5)),
+ (tokenize.NAME, "foo", Origin(code, 5, 8)),
+ (tokenize.OP, "*", Origin(code, 9, 10)),
+ (tokenize.OP, "-", Origin(code, 11, 12)),
+ (tokenize.NUMBER, "1", Origin(code, 12, 13)),
+ (tokenize.OP, ")", Origin(code, 13, 14)),
+ ]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
- expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
- (tokenize.OP, "+", Origin(code2, 2, 3)),
- (tokenize.OP, "(", Origin(code2, 4, 5)),
- (tokenize.NAME, "b", Origin(code2, 5, 6))]
+ expected2 = [
+ (tokenize.NAME, "a", Origin(code2, 0, 1)),
+ (tokenize.OP, "+", Origin(code2, 2, 3)),
+ (tokenize.OP, "(", Origin(code2, 4, 5)),
+ (tokenize.NAME, "b", Origin(code2, 5, 6)),
+ ]
assert tokens2 == expected2
import pytest
+
pytest.raises(PatsyError, list, python_tokenize("a b # c"))
import pytest
- pytest.raises(PatsyError, list, python_tokenize("a b \"c"))
-_python_space_both = (list("+-*/%&^|<>")
- + ["==", "<>", "!=", "<=", ">=",
- "<<", ">>", "**", "//"])
-_python_space_before = (_python_space_both
- + ["!", "~"])
-_python_space_after = (_python_space_both
- + [",", ":"])
+ pytest.raises(PatsyError, list, python_tokenize('a b "c'))
+
+
+_python_space_both = list("+-*/%&^|<>") + [
+ "==",
+ "<>",
+ "!=",
+ "<=",
+ ">=",
+ "<<",
+ ">>",
+ "**",
+ "//",
+]
+_python_space_before = _python_space_both + ["!", "~"]
+_python_space_after = _python_space_both + [",", ":"]
+
def pretty_untokenize(typed_tokens):
text = []
@@ -106,8 +121,7 @@ def pretty_untokenize(typed_tokens):
prev_was_object_like = False
brackets = []
for token_type, token in typed_tokens:
- assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
- tokenize.NL)
+ assert token_type not in (tokenize.INDENT, tokenize.DEDENT, tokenize.NL)
if token_type == tokenize.NEWLINE:
continue
if token_type == tokenize.ENDMARKER:
@@ -123,8 +137,8 @@ def pretty_untokenize(typed_tokens):
brackets.append(token)
elif brackets and token in (")", "]", "}"):
brackets.pop()
- this_wants_space_before = (token in _python_space_before)
- this_wants_space_after = (token in _python_space_after)
+ this_wants_space_before = token in _python_space_before
+ this_wants_space_after = token in _python_space_after
# Special case for slice syntax: foo[:10]
# Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
if token == ":" and brackets and brackets[-1] == "[":
@@ -149,19 +163,22 @@ def pretty_untokenize(typed_tokens):
text.append(token)
prev_wants_space = this_wants_space_after
prev_was_space_delim = False
- if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
- or token == ")"):
+ if (
+ token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
+ or token == ")"
+ ):
prev_was_object_like = True
else:
prev_was_object_like = False
prev_was_open_paren_or_comma = token in ("(", ",")
return "".join(text)
+
def normalize_token_spacing(code):
- tokens = [(t[0], t[1])
- for t in tokenize.generate_tokens(StringIO(code).readline)]
+ tokens = [(t[0], t[1]) for t in tokenize.generate_tokens(StringIO(code).readline)]
return pretty_untokenize(tokens)
+
def test_pretty_untokenize_and_normalize_token_spacing():
assert normalize_token_spacing("1 + 1") == "1 + 1"
assert normalize_token_spacing("1+1") == "1 + 1"
diff --git a/patsy/user_util.py b/patsy/user_util.py
index c40c946..080af84 100644
--- a/patsy/user_util.py
+++ b/patsy/user_util.py
@@ -14,6 +14,7 @@
from patsy.categorical import C
from patsy.util import no_pickling, assert_no_pickling
+
def balanced(**kwargs):
"""balanced(factor_name=num_levels, [factor_name=num_levels, ..., repeat=1])
@@ -54,15 +55,41 @@ def balanced(**kwargs):
data[name] = list(value) * repeat
return data
+
def test_balanced():
data = balanced(a=2, b=3)
assert data["a"] == ["a1", "a1", "a1", "a2", "a2", "a2"]
assert data["b"] == ["b1", "b2", "b3", "b1", "b2", "b3"]
data = balanced(a=2, b=3, repeat=2)
- assert data["a"] == ["a1", "a1", "a1", "a2", "a2", "a2",
- "a1", "a1", "a1", "a2", "a2", "a2"]
- assert data["b"] == ["b1", "b2", "b3", "b1", "b2", "b3",
- "b1", "b2", "b3", "b1", "b2", "b3"]
+ assert data["a"] == [
+ "a1",
+ "a1",
+ "a1",
+ "a2",
+ "a2",
+ "a2",
+ "a1",
+ "a1",
+ "a1",
+ "a2",
+ "a2",
+ "a2",
+ ]
+ assert data["b"] == [
+ "b1",
+ "b2",
+ "b3",
+ "b1",
+ "b2",
+ "b3",
+ "b1",
+ "b2",
+ "b3",
+ "b1",
+ "b2",
+ "b3",
+ ]
+
def demo_data(*names, **kwargs):
"""demo_data(*names, nlevels=2, min_rows=5)
@@ -119,6 +146,7 @@ def demo_data(*names, **kwargs):
data[name] = r.normal(size=num_rows)
return data
+
def test_demo_data():
d1 = demo_data("a", "b", "x")
assert sorted(d1.keys()) == ["a", "b", "x"]
@@ -136,9 +164,11 @@ def test_demo_data():
assert len(demo_data("a", "b", "x", min_rows=10, nlevels=3)["x"]) == 18
import pytest
+
pytest.raises(PatsyError, demo_data, "a", "b", "__123")
pytest.raises(TypeError, demo_data, "a", "b", asdfasdf=123)
+
class LookupFactor(object):
"""A simple factor class that simply looks up a named entry in the given
data.
@@ -166,9 +196,10 @@ class LookupFactor(object):
.. versionadded:: 0.2.0
The ``force_categorical`` and related arguments.
"""
- def __init__(self, varname,
- force_categorical=False, contrast=None, levels=None,
- origin=None):
+
+ def __init__(
+ self, varname, force_categorical=False, contrast=None, levels=None, origin=None
+ ):
self._varname = varname
self._force_categorical = force_categorical
self._contrast = contrast
@@ -187,26 +218,35 @@ def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._varname)
def __eq__(self, other):
- return (isinstance(other, LookupFactor)
- and self._varname == other._varname
- and self._force_categorical == other._force_categorical
- and self._contrast == other._contrast
- and self._levels == other._levels)
+ return (
+ isinstance(other, LookupFactor)
+ and self._varname == other._varname
+ and self._force_categorical == other._force_categorical
+ and self._contrast == other._contrast
+ and self._levels == other._levels
+ )
def __ne__(self, other):
return not self == other
def __hash__(self):
- return hash((LookupFactor, self._varname,
- self._force_categorical, self._contrast, self._levels))
+ return hash(
+ (
+ LookupFactor,
+ self._varname,
+ self._force_categorical,
+ self._contrast,
+ self._levels,
+ )
+ )
def memorize_passes_needed(self, state, eval_env):
return 0
- def memorize_chunk(self, state, which_pass, data): # pragma: no cover
+ def memorize_chunk(self, state, which_pass, data): # pragma: no cover
assert False
- def memorize_finish(self, state, which_pass): # pragma: no cover
+ def memorize_finish(self, state, which_pass): # pragma: no cover
assert False
def eval(self, memorize_state, data):
@@ -217,6 +257,7 @@ def eval(self, memorize_state, data):
__getstate__ = no_pickling
+
def test_LookupFactor():
l_a = LookupFactor("a")
assert l_a.name() == "a"
@@ -231,14 +272,14 @@ def test_LookupFactor():
l_with_origin = LookupFactor("b", origin="asdf")
assert l_with_origin.origin == "asdf"
- l_c = LookupFactor("c", force_categorical=True,
- contrast="CONTRAST", levels=(1, 2))
+ l_c = LookupFactor("c", force_categorical=True, contrast="CONTRAST", levels=(1, 2))
box = l_c.eval({}, {"c": [1, 1, 2]})
assert box.data == [1, 1, 2]
assert box.contrast == "CONTRAST"
assert box.levels == (1, 2)
import pytest
+
pytest.raises(ValueError, LookupFactor, "nc", contrast="CONTRAST")
pytest.raises(ValueError, LookupFactor, "nc", levels=(1, 2))
diff --git a/patsy/util.py b/patsy/util.py
index 8a855ef..2c1c19d 100644
--- a/patsy/util.py
+++ b/patsy/util.py
@@ -4,24 +4,32 @@
# Some generic utilities.
-__all__ = ["atleast_2d_column_default", "uniqueify_list",
- "widest_float", "widest_complex", "wide_dtype_for", "widen",
- "repr_pretty_delegate", "repr_pretty_impl",
- "SortAnythingKey", "safe_scalar_isnan", "safe_isnan",
- "iterable",
- "have_pandas",
- "have_pandas_categorical",
- "have_pandas_categorical_dtype",
- "pandas_Categorical_from_codes",
- "pandas_Categorical_categories",
- "pandas_Categorical_codes",
- "safe_is_pandas_categorical_dtype",
- "safe_is_pandas_categorical",
- "safe_issubdtype",
- "no_pickling",
- "assert_no_pickling",
- "safe_string_eq",
- ]
+__all__ = [
+ "atleast_2d_column_default",
+ "uniqueify_list",
+ "widest_float",
+ "widest_complex",
+ "wide_dtype_for",
+ "widen",
+ "repr_pretty_delegate",
+ "repr_pretty_impl",
+ "SortAnythingKey",
+ "safe_scalar_isnan",
+ "safe_isnan",
+ "iterable",
+ "have_pandas",
+ "have_pandas_categorical",
+ "have_pandas_categorical_dtype",
+ "pandas_Categorical_from_codes",
+ "pandas_Categorical_categories",
+ "pandas_Categorical_codes",
+ "safe_is_pandas_categorical_dtype",
+ "safe_is_pandas_categorical",
+ "safe_issubdtype",
+ "no_pickling",
+ "assert_no_pickling",
+ "safe_string_eq",
+]
import sys
from io import StringIO
@@ -39,17 +47,22 @@
# Pandas versions < 0.9.0 don't have Categorical
# Can drop this guard whenever we drop support for such older versions of
# pandas.
-have_pandas_categorical = (have_pandas and hasattr(pandas, "Categorical"))
+have_pandas_categorical = have_pandas and hasattr(pandas, "Categorical")
if not have_pandas:
_pandas_is_categorical_dtype = None
else:
if hasattr(pandas, "CategoricalDtype"): # pandas >= 0.25
- _pandas_is_categorical_dtype = lambda x: isinstance(getattr(x, "dtype", x), pandas.CategoricalDtype)
+ _pandas_is_categorical_dtype = lambda x: isinstance(
+ getattr(x, "dtype", x), pandas.CategoricalDtype
+ )
elif hasattr(pandas, "api"): # pandas >= 0.19
- _pandas_is_categorical_dtype = getattr(pandas.api.types, "is_categorical_dtype", None)
+ _pandas_is_categorical_dtype = getattr(
+ pandas.api.types, "is_categorical_dtype", None
+ )
else: # pandas <=0.18
- _pandas_is_categorical_dtype = getattr(pandas.core.common,
- "is_categorical_dtype", None)
+ _pandas_is_categorical_dtype = getattr(
+ pandas.core.common, "is_categorical_dtype", None
+ )
have_pandas_categorical_dtype = _pandas_is_categorical_dtype is not None
# The handling of the `copy` keyword has been changed since numpy>=2.
@@ -78,13 +91,14 @@ def asarray_or_pandas(a, copy=copy_if_needed, dtype=None, subok=False):
def test_asarray_or_pandas():
import warnings
+
assert type(asarray_or_pandas([1, 2, 3])) is np.ndarray
with warnings.catch_warnings() as w:
- warnings.filterwarnings('ignore', 'the matrix subclass',
- PendingDeprecationWarning)
+ warnings.filterwarnings(
+ "ignore", "the matrix subclass", PendingDeprecationWarning
+ )
assert type(asarray_or_pandas(np.matrix([[1, 2, 3]]))) is np.ndarray
- assert type(asarray_or_pandas(
- np.matrix([[1, 2, 3]]), subok=True)) is np.matrix
+ assert type(asarray_or_pandas(np.matrix([[1, 2, 3]]), subok=True)) is np.matrix
assert w is None
a = np.array([1, 2, 3])
assert asarray_or_pandas(a) is a
@@ -92,8 +106,7 @@ def test_asarray_or_pandas():
assert np.array_equal(a, a_copy)
a_copy[0] = 100
assert not np.array_equal(a, a_copy)
- assert np.allclose(asarray_or_pandas([1, 2, 3], dtype=float),
- [1.0, 2.0, 3.0])
+ assert np.allclose(asarray_or_pandas([1, 2, 3], dtype=float), [1.0, 2.0, 3.0])
assert asarray_or_pandas([1, 2, 3], dtype=float).dtype == np.dtype(float)
a_view = asarray_or_pandas(a, dtype=a.dtype)
a_view[0] = 99
@@ -119,9 +132,7 @@ def test_asarray_or_pandas():
s_view2[10] = 99
assert s[10] == 99
- df = pandas.DataFrame([[1, 2, 3]],
- columns=["A", "B", "C"],
- index=[10])
+ df = pandas.DataFrame([[1, 2, 3]], columns=["A", "B", "C"], index=[10])
df_view1 = asarray_or_pandas(df)
df_view1.loc[10, "A"] = 101
assert np.array_equal(df_view1.columns, ["A", "B", "C"])
@@ -150,13 +161,12 @@ def test_asarray_or_pandas():
had_pandas = have_pandas
try:
have_pandas = False
- assert (type(asarray_or_pandas(pandas.Series([1, 2, 3])))
- is np.ndarray)
- assert (type(asarray_or_pandas(pandas.DataFrame([[1, 2, 3]])))
- is np.ndarray)
+ assert type(asarray_or_pandas(pandas.Series([1, 2, 3]))) is np.ndarray
+ assert type(asarray_or_pandas(pandas.DataFrame([[1, 2, 3]]))) is np.ndarray
finally:
have_pandas = had_pandas
+
# Like np.atleast_2d, but this converts lower-dimensional arrays into columns,
# instead of rows. It also converts ndarray subclasses into basic ndarrays,
# which makes it easier to guarantee correctness. However, there are many
@@ -179,6 +189,7 @@ def atleast_2d_column_default(a, preserve_pandas=False):
def test_atleast_2d_column_default():
import warnings
+
assert np.all(atleast_2d_column_default([1, 2, 3]) == [[1], [2], [3]])
assert atleast_2d_column_default(1).shape == (1, 1)
@@ -190,51 +201,72 @@ def test_atleast_2d_column_default():
assert atleast_2d_column_default([[1], [2], [3]]).shape == (3, 1)
with warnings.catch_warnings() as w:
- warnings.filterwarnings('ignore', 'the matrix subclass',
- PendingDeprecationWarning)
+ warnings.filterwarnings(
+ "ignore", "the matrix subclass", PendingDeprecationWarning
+ )
assert type(atleast_2d_column_default(np.matrix(1))) == np.ndarray
assert w is None
global have_pandas
if have_pandas:
- assert (type(atleast_2d_column_default(pandas.Series([1, 2])))
- == np.ndarray)
- assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]])))
- == np.ndarray)
- assert (type(atleast_2d_column_default(pandas.Series([1, 2]),
- preserve_pandas=True))
- == pandas.DataFrame)
- assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]),
- preserve_pandas=True))
- == pandas.DataFrame)
+ assert type(atleast_2d_column_default(pandas.Series([1, 2]))) == np.ndarray
+ assert (
+ type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]))) == np.ndarray
+ )
+ assert (
+ type(atleast_2d_column_default(pandas.Series([1, 2]), preserve_pandas=True))
+ == pandas.DataFrame
+ )
+ assert (
+ type(
+ atleast_2d_column_default(
+ pandas.DataFrame([[1], [2]]), preserve_pandas=True
+ )
+ )
+ == pandas.DataFrame
+ )
s = pandas.Series([10, 11, 12], name="hi", index=["a", "b", "c"])
df = atleast_2d_column_default(s, preserve_pandas=True)
assert isinstance(df, pandas.DataFrame)
assert np.all(df.columns == ["hi"])
assert np.all(df.index == ["a", "b", "c"])
with warnings.catch_warnings() as w:
- warnings.filterwarnings('ignore', 'the matrix subclass',
- PendingDeprecationWarning)
- assert (type(atleast_2d_column_default(np.matrix(1),
- preserve_pandas=True))
- == np.ndarray)
+ warnings.filterwarnings(
+ "ignore", "the matrix subclass", PendingDeprecationWarning
+ )
+ assert (
+ type(atleast_2d_column_default(np.matrix(1), preserve_pandas=True))
+ == np.ndarray
+ )
assert w is None
- assert (type(atleast_2d_column_default([1, 2, 3], preserve_pandas=True))
- == np.ndarray)
+ assert (
+ type(atleast_2d_column_default([1, 2, 3], preserve_pandas=True)) == np.ndarray
+ )
if have_pandas:
had_pandas = have_pandas
try:
have_pandas = False
- assert (type(atleast_2d_column_default(pandas.Series([1, 2]),
- preserve_pandas=True))
- == np.ndarray)
- assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]),
- preserve_pandas=True))
- == np.ndarray)
+ assert (
+ type(
+ atleast_2d_column_default(
+ pandas.Series([1, 2]), preserve_pandas=True
+ )
+ )
+ == np.ndarray
+ )
+ assert (
+ type(
+ atleast_2d_column_default(
+ pandas.DataFrame([[1], [2]]), preserve_pandas=True
+ )
+ )
+ == np.ndarray
+ )
finally:
have_pandas = had_pandas
+
# A version of .reshape() that knows how to down-convert a 1-column
# pandas.DataFrame into a pandas.Series. Useful for code that wants to be
# agnostic between 1d and 2d data, with the pattern:
@@ -254,15 +286,19 @@ def pandas_friendly_reshape(a, new_shape):
if new_shape[0] != a.shape[0]:
raise ValueError("arrays have incompatible sizes")
return a[a.columns[0]]
- raise ValueError("cannot reshape a DataFrame with shape %s to shape %s"
- % (a.shape, new_shape))
+ raise ValueError(
+ "cannot reshape a DataFrame with shape %s to shape %s" % (a.shape, new_shape)
+ )
+
def test_pandas_friendly_reshape():
import pytest
+
global have_pandas
- assert np.allclose(pandas_friendly_reshape(np.arange(10).reshape(5, 2),
- (2, 5)),
- np.arange(10).reshape(2, 5))
+ assert np.allclose(
+ pandas_friendly_reshape(np.arange(10).reshape(5, 2), (2, 5)),
+ np.arange(10).reshape(2, 5),
+ )
if have_pandas:
df = pandas.DataFrame({"x": [1, 2, 3]}, index=["a", "b", "c"])
noop = pandas_friendly_reshape(df, (3, 1))
@@ -287,6 +323,7 @@ def test_pandas_friendly_reshape():
finally:
have_pandas = had_pandas
+
def uniqueify_list(seq):
seq_new = []
seen = set()
@@ -296,46 +333,54 @@ def uniqueify_list(seq):
seen.add(obj)
return seq_new
+
def test_to_uniqueify_list():
assert uniqueify_list([1, 2, 3]) == [1, 2, 3]
assert uniqueify_list([1, 3, 3, 2, 3, 1]) == [1, 3, 2]
assert uniqueify_list([3, 2, 1, 4, 1, 2, 3]) == [3, 2, 1, 4]
+
for float_type in ("float128", "float96", "float64"):
if hasattr(np, float_type):
widest_float = getattr(np, float_type)
break
-else: # pragma: no cover
+else: # pragma: no cover
assert False
for complex_type in ("complex256", "complex196", "complex128"):
if hasattr(np, complex_type):
widest_complex = getattr(np, complex_type)
break
-else: # pragma: no cover
+else: # pragma: no cover
assert False
+
def wide_dtype_for(arr):
arr = np.asarray(arr)
- if (safe_issubdtype(arr.dtype, np.integer)
- or safe_issubdtype(arr.dtype, np.floating)):
+ if safe_issubdtype(arr.dtype, np.integer) or safe_issubdtype(
+ arr.dtype, np.floating
+ ):
return widest_float
elif safe_issubdtype(arr.dtype, np.complexfloating):
return widest_complex
raise ValueError("cannot widen a non-numeric type %r" % (arr.dtype,))
+
def widen(arr):
return np.asarray(arr, dtype=wide_dtype_for(arr))
+
def test_wide_dtype_for_and_widen():
assert np.allclose(widen([1, 2, 3]), [1, 2, 3])
assert widen([1, 2, 3]).dtype == widest_float
assert np.allclose(widen([1.0, 2.0, 3.0]), [1, 2, 3])
assert widen([1.0, 2.0, 3.0]).dtype == widest_float
- assert np.allclose(widen([1+0j, 2, 3]), [1, 2, 3])
- assert widen([1+0j, 2, 3]).dtype == widest_complex
+ assert np.allclose(widen([1 + 0j, 2, 3]), [1, 2, 3])
+ assert widen([1 + 0j, 2, 3]).dtype == widest_complex
import pytest
+
pytest.raises(ValueError, widen, ["hi"])
+
class PushbackAdapter(object):
def __init__(self, it):
self._it = it
@@ -353,6 +398,7 @@ def next(self):
else:
# May raise StopIteration
return next(self._it)
+
__next__ = next
def peek(self):
@@ -371,6 +417,7 @@ def has_more(self):
else:
return True
+
def test_PushbackAdapter():
it = PushbackAdapter(iter([1, 2, 3, 4]))
assert it.has_more()
@@ -387,6 +434,7 @@ def test_PushbackAdapter():
assert list(it) == [20, 10, 3, 4]
assert not it.has_more()
+
# The IPython pretty-printer gives very nice output that is difficult to get
# otherwise, e.g., look how much more readable this is than if it were all
# smooshed onto one line:
@@ -407,6 +455,7 @@ def test_PushbackAdapter():
# Pretty printer docs:
# http://ipython.org/ipython-doc/dev/api/generated/IPython.lib.pretty.html
+
class _MiniPPrinter(object):
def __init__(self):
self._out = StringIO()
@@ -433,10 +482,12 @@ def pretty(self, obj):
def getvalue(self):
return self._out.getvalue()
+
def _mini_pretty(obj):
- printer = _MiniPPrinter()
- printer.pretty(obj)
- return printer.getvalue()
+ printer = _MiniPPrinter()
+ printer.pretty(obj)
+ return printer.getvalue()
+
def repr_pretty_delegate(obj):
# If IPython is already loaded, then might as well use it. (Most commonly
@@ -453,19 +504,23 @@ def repr_pretty_delegate(obj):
# in their test suite (see patsy bug #12).
if optional_dep_ok and "IPython" in sys.modules:
from IPython.lib.pretty import pretty
+
return pretty(obj)
else:
return _mini_pretty(obj)
+
def repr_pretty_impl(p, obj, args, kwargs=[]):
name = obj.__class__.__name__
p.begin_group(len(name) + 1, "%s(" % (name,))
started = [False]
+
def new_item():
if started[0]:
p.text(",")
p.breakable()
started[0] = True
+
for arg in args:
new_item()
p.pretty(arg)
@@ -476,15 +531,18 @@ def new_item():
p.end_group(len(label) + 1, "")
p.end_group(len(name) + 1, ")")
+
def test_repr_pretty():
assert repr_pretty_delegate("asdf") == "'asdf'"
printer = _MiniPPrinter()
+
class MyClass(object):
pass
- repr_pretty_impl(printer, MyClass(),
- ["a", 1], [("foo", "bar"), ("asdf", "asdf")])
+
+ repr_pretty_impl(printer, MyClass(), ["a", 1], [("foo", "bar"), ("asdf", "asdf")])
assert printer.getvalue() == "MyClass('a', 1, foo='bar', asdf='asdf')"
+
# In Python 3, objects of different types are not generally comparable, so a
# list of heterogeneous types cannot be sorted. This implements a Python 2
# style comparison for arbitrary types. (It works on Python 2 too, but just
@@ -537,25 +595,38 @@ def __lt__(self, other):
if self.obj == other.obj:
return False
# Otherwise, we break ties based on class name and memory position
- return ((self.obj.__class__.__name__, id(self.obj))
- < (other.obj.__class__.__name__, id(other.obj)))
+ return (self.obj.__class__.__name__, id(self.obj)) < (
+ other.obj.__class__.__name__,
+ id(other.obj),
+ )
+
def test_SortAnythingKey():
assert sorted([20, 10, 0, 15], key=SortAnythingKey) == [0, 10, 15, 20]
assert sorted([10, -1.5], key=SortAnythingKey) == [-1.5, 10]
assert sorted([10, "a", 20.5, "b"], key=SortAnythingKey) == [10, 20.5, "a", "b"]
+
class a(object):
pass
+
class b(object):
pass
+
class z(object):
pass
+
a_obj = a()
b_obj = b()
z_obj = z()
o_obj = object()
- assert (sorted([z_obj, a_obj, 1, b_obj, o_obj], key=SortAnythingKey)
- == [1, a_obj, b_obj, o_obj, z_obj])
+ assert sorted([z_obj, a_obj, 1, b_obj, o_obj], key=SortAnythingKey) == [
+ 1,
+ a_obj,
+ b_obj,
+ o_obj,
+ z_obj,
+ ]
+
# NaN checking functions that work on arbitrary objects, on old Python
# versions (math.isnan is only in 2.6+), etc.
@@ -564,8 +635,11 @@ def safe_scalar_isnan(x):
return np.isnan(float(x))
except (TypeError, ValueError, NotImplementedError):
return False
+
+
safe_isnan = np.vectorize(safe_scalar_isnan, otypes=[bool])
+
def test_safe_scalar_isnan():
assert not safe_scalar_isnan(True)
assert not safe_scalar_isnan(None)
@@ -577,15 +651,18 @@ def test_safe_scalar_isnan():
assert safe_scalar_isnan(np.float32(np.nan))
assert safe_scalar_isnan(float(np.nan))
+
def test_safe_isnan():
- assert np.array_equal(safe_isnan([1, True, None, np.nan, "asdf"]),
- [False, False, False, True, False])
+ assert np.array_equal(
+ safe_isnan([1, True, None, np.nan, "asdf"]), [False, False, False, True, False]
+ )
assert safe_isnan(np.nan).ndim == 0
assert safe_isnan(np.nan)
assert not safe_isnan(None)
# raw isnan raises a *different* error for strings than for objects:
assert not safe_isnan("asdf")
+
def iterable(obj):
try:
iter(obj)
@@ -593,6 +670,7 @@ def iterable(obj):
return False
return True
+
def test_iterable():
assert iterable("asdf")
assert iterable([])
@@ -600,6 +678,7 @@ def test_iterable():
assert not iterable(1)
assert not iterable(iterable)
+
##### Handling Pandas's categorical stuff is horrible and hateful
# Basically they decided that they didn't like how numpy does things, so their
@@ -616,6 +695,7 @@ def test_iterable():
# Also there are hoops to jump through to handle both the old style
# (Categorical objects) and new-style (Series with dtype="category").
+
# Needed to support pandas < 0.15
def pandas_Categorical_from_codes(codes, categories):
assert have_pandas_categorical
@@ -628,6 +708,7 @@ def pandas_Categorical_from_codes(codes, categories):
else:
return pandas.Categorical(codes, categories)
+
def test_pandas_Categorical_from_codes():
if not have_pandas_categorical:
return
@@ -635,6 +716,7 @@ def test_pandas_Categorical_from_codes():
assert np.all(np.asarray(c)[:-1] == ["b", "b", "a"])
assert np.isnan(np.asarray(c)[-1])
+
# Needed to support pandas < 0.15
def pandas_Categorical_categories(cat):
# In 0.15+, a categorical Series has a .cat attribute which is similar to
@@ -647,6 +729,7 @@ def pandas_Categorical_categories(cat):
else:
return cat.levels
+
# Needed to support pandas < 0.15
def pandas_Categorical_codes(cat):
# In 0.15+, a categorical Series has a .cat attribute which is a
@@ -659,6 +742,7 @@ def pandas_Categorical_codes(cat):
else:
return cat.labels
+
def test_pandas_Categorical_accessors():
if not have_pandas_categorical:
return
@@ -671,12 +755,14 @@ def test_pandas_Categorical_accessors():
assert np.all(pandas_Categorical_categories(s) == ["a", "b"])
assert np.all(pandas_Categorical_codes(s) == [1, 1, 0, -1])
+
# Needed to support pandas >= 0.15 (!)
def safe_is_pandas_categorical_dtype(dt):
if not have_pandas_categorical_dtype:
return False
return _pandas_is_categorical_dtype(dt)
+
# Needed to support pandas >= 0.15 (!)
def safe_is_pandas_categorical(data):
if not have_pandas_categorical:
@@ -687,6 +773,7 @@ def safe_is_pandas_categorical(data):
return safe_is_pandas_categorical_dtype(data.dtype)
return False
+
def test_safe_is_pandas_categorical():
assert not safe_is_pandas_categorical(np.arange(10))
@@ -698,6 +785,7 @@ def test_safe_is_pandas_categorical():
s_obj = pandas.Series(["a", "b"], dtype="category")
assert safe_is_pandas_categorical(s_obj)
+
# Needed to support pandas >= 0.15 (!)
# Calling np.issubdtype on a pandas categorical will blow up -- the officially
# recommended solution is to replace every piece of code like
@@ -715,6 +803,7 @@ def safe_issubdtype(dt1, dt2):
return False
return np.issubdtype(dt1, dt2)
+
def test_safe_issubdtype():
assert safe_issubdtype(int, np.integer)
assert safe_issubdtype(np.dtype(float), np.floating)
@@ -725,17 +814,22 @@ def test_safe_issubdtype():
bad_dtype = pandas.Series(["a", "b"], dtype="category")
assert not safe_issubdtype(bad_dtype, np.integer)
+
def no_pickling(*args, **kwargs):
raise NotImplementedError(
"Sorry, pickling not yet supported. "
"See https://github.com/pydata/patsy/issues/26 if you want to "
- "help.")
+ "help."
+ )
+
def assert_no_pickling(obj):
import pickle
import pytest
+
pytest.raises(NotImplementedError, pickle.dumps, obj)
+
# Use like:
# if safe_string_eq(constraints, "center"):
# ...
@@ -747,6 +841,7 @@ def safe_string_eq(obj, value):
else:
return False
+
def test_safe_string_eq():
assert safe_string_eq("foo", "foo")
assert not safe_string_eq("foo", "bar")
diff --git a/setup.py b/setup.py
index 89eb46d..e1b63c7 100644
--- a/setup.py
+++ b/setup.py
@@ -2,8 +2,10 @@
from setuptools import setup
-DESC = ("A Python package for describing statistical models and for "
- "building design matrices.")
+DESC = (
+ "A Python package for describing statistical models and for "
+ "building design matrices."
+)
LONG_DESC = open("README.md").read()
@@ -12,7 +14,7 @@
setup(
name="patsy",
- version=__version__,
+ version=__version__, # noqa: F821
description=DESC,
long_description=LONG_DESC,
long_description_content_type="text/markdown",
@@ -27,24 +29,24 @@
"numpy >= 1.4",
],
extras_require={
- "test": ["pytest", "pytest-cov", "scipy"],
+ "test": ["pytest", "pytest-cov", "scipy"],
},
- python_requires='>=3.6',
+ python_requires=">=3.6",
classifiers=[
- "Development Status :: 4 - Beta",
- "Intended Audience :: Developers",
- "Intended Audience :: Science/Research",
- "Intended Audience :: Financial and Insurance Industry",
- "License :: OSI Approved :: BSD License",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.6",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- "Programming Language :: Python :: 3.13",
- "Topic :: Scientific/Engineering",
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "Intended Audience :: Financial and Insurance Industry",
+ "License :: OSI Approved :: BSD License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Topic :: Scientific/Engineering",
],
)