Spline regression¶
Patsy offers a set of specific stateful transforms (for more details about stateful transforms see Stateful transforms) that you can use in formulas to generate splines bases and express non-linear fits.
General B-splines¶
B-spline bases can be generated with the bs()
stateful
transform. The spline bases returned by bs()
are designed to be
compatible with those produced by the R bs
function.
The following code illustrates a typical basis and the resulting spline:
In [1]: import matplotlib.pyplot as plt
In [2]: plt.title("B-spline basis example (degree=3)");
In [3]: x = np.linspace(0., 1., 100)
In [4]: y = dmatrix("bs(x, df=6, degree=3, include_intercept=True) - 1", {"x": x})
ImportErrorTraceback (most recent call last)
<ipython-input-4-6f9fbf454421> in <module>()
----> 1 y = dmatrix("bs(x, df=6, degree=3, include_intercept=True) - 1", {"x": x})
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
34 def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
38 if sys.version_info[0] >= 3:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, expr, source_name, inner_namespace)
164 code = compile(expr, source_name, "eval", self.flags, False)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
168 @classmethod
<string> in <module>()
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/splines.py in transform(self, x, df, knots, degree, include_intercept, lower_bound, upper_bound)
237 include_intercept=False,
238 lower_bound=None, upper_bound=None):
--> 239 basis = _eval_bspline_basis(x, self._all_knots, self._degree)
240 if not include_intercept:
241 basis = basis[:, 1:]
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/splines.py in _eval_bspline_basis(x, knots, degree)
20 from scipy.interpolate import splev
21 except ImportError: # pragma: no cover
---> 22 raise ImportError("spline functionality requires scipy")
23 # 'knots' are assumed to be already pre-processed. E.g. usually you
24 # want to include duplicate copies of boundary knots; you should do
ImportError: spline functionality requires scipy
# Define some coefficients
In [5]: b = np.array([1.3, 0.6, 0.9, 0.4, 1.6, 0.7])
# Plot B-spline basis functions (colored curves) each multiplied by its coeff
In [6]: plt.plot(x, y*b);
# Plot the spline itself (sum of the basis functions, thick black curve)
In [7]: plt.plot(x, np.dot(y, b), color='k', linewidth=3);

In the following example we first set up our B-spline basis using some data and then make predictions on a new set of data:
In [8]: data = {"x": np.linspace(0., 1., 100)}
In [9]: design_matrix = dmatrix("bs(x, df=4)", data)
ImportErrorTraceback (most recent call last)
<ipython-input-9-43f0fa39c04c> in <module>()
----> 1 design_matrix = dmatrix("bs(x, df=4)", data)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
34 def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
38 if sys.version_info[0] >= 3:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, expr, source_name, inner_namespace)
164 code = compile(expr, source_name, "eval", self.flags, False)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
168 @classmethod
<string> in <module>()
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/splines.py in transform(self, x, df, knots, degree, include_intercept, lower_bound, upper_bound)
237 include_intercept=False,
238 lower_bound=None, upper_bound=None):
--> 239 basis = _eval_bspline_basis(x, self._all_knots, self._degree)
240 if not include_intercept:
241 basis = basis[:, 1:]
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/splines.py in _eval_bspline_basis(x, knots, degree)
20 from scipy.interpolate import splev
21 except ImportError: # pragma: no cover
---> 22 raise ImportError("spline functionality requires scipy")
23 # 'knots' are assumed to be already pre-processed. E.g. usually you
24 # want to include duplicate copies of boundary knots; you should do
ImportError: spline functionality requires scipy
In [10]: new_data = {"x": [0.1, 0.25, 0.9]}
In [11]: build_design_matrices([design_matrix.design_info], new_data)[0]
NameErrorTraceback (most recent call last)
<ipython-input-11-f9afc2e7fad3> in <module>()
----> 1 build_design_matrices([design_matrix.design_info], new_data)[0]
NameError: name 'design_matrix' is not defined
bs()
can produce B-spline bases of arbitrary degrees – e.g.,
degree=0
will give produce piecewise-constant functions,
degree=1
will produce piecewise-linear functions, and the default
degree=3
produces cubic splines. The next section describes more
specialized functions for producing different types of cubic splines.
Natural and cyclic cubic regression splines¶
Natural and cyclic cubic regression splines are provided through the stateful
transforms cr()
and cc()
respectively. Here the spline is
parameterized directly using its values at the knots. These splines were designed
to be compatible with those found in the R package
mgcv
(these are called cr, cs and cc in the context of mgcv), but
can be used with any model.
Warning
Note that the compatibility with mgcv applies only to the generation of spline bases: we do not implement any kind of mgcv-compatible penalized fitting process. Thus these spline bases can be used to precisely reproduce predictions from a model previously fitted with mgcv, or to serve as building blocks for other regression models (like OLS).
Here are some illustrations of typical natural and cyclic spline bases:
In [12]: plt.title("Natural cubic regression spline basis example");
In [13]: y = dmatrix("cr(x, df=6) - 1", {"x": x})
ImportErrorTraceback (most recent call last)
<ipython-input-13-69b1a79a20ea> in <module>()
----> 1 y = dmatrix("cr(x, df=6) - 1", {"x": x})
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
34 def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
38 if sys.version_info[0] >= 3:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, expr, source_name, inner_namespace)
164 code = compile(expr, source_name, "eval", self.flags, False)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
168 @classmethod
<string> in <module>()
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in transform(self, x, df, knots, lower_bound, upper_bound, constraints)
679 % (self._name,))
680 dm = _get_crs_dmatrix(x, self._all_knots,
--> 681 self._constraints, cyclic=self._cyclic)
682 if have_pandas:
683 if isinstance(x_orig, (pandas.Series, pandas.DataFrame)):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_crs_dmatrix(x, knots, constraints, cyclic)
363 :return: The (2-d array) design matrix.
364 """
--> 365 dm = _get_free_crs_dmatrix(x, knots, cyclic)
366 if constraints is not None:
367 dm = _absorb_constraints(dm, constraints)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_free_crs_dmatrix(x, knots, cyclic)
337 f = _get_cyclic_f(knots)
338 else:
--> 339 f = _get_natural_f(knots)
340
341 dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_natural_f(knots)
34 from scipy import linalg
35 except ImportError: # pragma: no cover
---> 36 raise ImportError("Cubic spline functionality requires scipy.")
37
38 h = knots[1:] - knots[:-1]
ImportError: Cubic spline functionality requires scipy.
# Plot natural cubic regression spline basis functions (colored curves) each multiplied by its coeff
In [14]: plt.plot(x, y*b);
# Plot the spline itself (sum of the basis functions, thick black curve)
In [15]: plt.plot(x, np.dot(y, b), color='k', linewidth=3);

In [16]: plt.title("Cyclic cubic regression spline basis example");
In [17]: y = dmatrix("cc(x, df=6) - 1", {"x": x})
# Plot cyclic cubic regression spline basis functions (colored curves) each multiplied by its coeff
In [18]: plt.plot(x, y*b);
# Plot the spline itself (sum of the basis functions, thick black curve)
In [19]: plt.plot(x, np.dot(y, b), color='k', linewidth=3);

In the following example we first set up our spline basis using same data as for the B-spline example above and then make predictions on a new set of data:
In [20]: design_matrix = dmatrix("cr(x, df=4, constraints='center')", data)
ImportErrorTraceback (most recent call last)
<ipython-input-20-2a180aba7b3c> in <module>()
----> 1 design_matrix = dmatrix("cr(x, df=4, constraints='center')", data)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
687 for term in termlist:
688 all_factors.update(term.factors)
--> 689 factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
690 # Now all the factors have working eval methods, so we can evaluate them
691 # on some data to find out what type of data they return.
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _factors_memorize(factors, data_iter_maker, eval_env)
368 factor.memorize_chunk(state, which_pass, data)
369 for factor in list(memorize_needed):
--> 370 factor.memorize_finish(factor_states[factor], which_pass)
371 if which_pass == passes_needed[factor] - 1:
372 memorize_needed.remove(factor)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in memorize_finish(self, state, which_pass)
559 def memorize_finish(self, state, which_pass):
560 for obj_name in state["pass_bins"][which_pass]:
--> 561 state["transforms"][obj_name].memorize_finish()
562
563 def eval(self, memorize_state, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in memorize_finish(self)
655 # Now we can compute centering constraints
656 constraints = _get_centering_constraint_from_dmatrix(
--> 657 _get_free_crs_dmatrix(x, self._all_knots, cyclic=self._cyclic)
658 )
659
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_free_crs_dmatrix(x, knots, cyclic)
337 f = _get_cyclic_f(knots)
338 else:
--> 339 f = _get_natural_f(knots)
340
341 dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_natural_f(knots)
34 from scipy import linalg
35 except ImportError: # pragma: no cover
---> 36 raise ImportError("Cubic spline functionality requires scipy.")
37
38 h = knots[1:] - knots[:-1]
ImportError: Cubic spline functionality requires scipy.
In [21]: new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0]
NameErrorTraceback (most recent call last)
<ipython-input-21-451026a46abe> in <module>()
----> 1 new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0]
NameError: name 'design_matrix' is not defined
In [22]: new_design_matrix
NameErrorTraceback (most recent call last)
<ipython-input-22-15149ebd340a> in <module>()
----> 1 new_design_matrix
NameError: name 'new_design_matrix' is not defined
In [23]: np.asarray(new_design_matrix)
NameErrorTraceback (most recent call last)
<ipython-input-23-935a6596c64d> in <module>()
----> 1 np.asarray(new_design_matrix)
NameError: name 'new_design_matrix' is not defined
Note that in the above example 5 knots are actually used to achieve 4 degrees of freedom since a centering constraint is requested.
Note that the API is different from mgcv:
- In patsy one can specify the number of degrees of freedom directly (actual number of columns of the resulting design matrix) whereas in mgcv one has to specify the number of knots to use. For instance, in the case of cyclic regression splines (with no additional constraints) the actual degrees of freedom is the number of knots minus one.
- In patsy one can specify inner knots as well as lower and upper exterior knots which can be useful for cyclic spline for instance.
- In mgcv a centering/identifiability constraint is automatically computed and
absorbed in the resulting design matrix.
The purpose of this is to ensure that if
b
is the array of initial parameters (corresponding to the initial unconstrained design matrixdm
), our model is centered, ienp.mean(np.dot(dm, b))
is zero. We can rewrite this asnp.dot(c, b)
being zero withc
a 1-row constraint matrix containing the mean of each column ofdm
. Absorbing this constraint in the final design matrix means that we rewrite the model in terms of unconstrained parameters (this is done through a QR-decomposition of the constraint matrix). Those unconstrained parameters have the property that when projected back into the initial parameters space (let’s callb_back
the result of this projection), the constraintnp.dot(c, b_back)
being zero is automatically verified. In patsy one can choose between no constraint, a centering constraint like mgcv ('center'
) or a user provided constraint matrix.
Tensor product smooths¶
Smooths of several covariates can be generated through a tensor product of
the bases of marginal univariate smooths. For these marginal smooths one can
use the above defined splines as well as user defined smooths provided they
actually transform input univariate data into some kind of smooth functions
basis producing a 2-d array output with the (i, j)
element corresponding
to the value of the j
th basis function at the i
th data point.
The tensor product stateful transform is called te()
.
Note
The implementation of this tensor product is compatible with mgcv when considering only cubic regression spline marginal smooths, which means that generated bases will match those produced by mgcv. Recall that we do not implement any kind of mgcv-compatible penalized fitting process.
In the following code we show an example of tensor product basis functions
used to represent a smooth of two variables x1
and x2
. Note how
marginal spline bases patterns can be observed on the x and y contour projections:
In [24]: from matplotlib import cm
In [25]: from mpl_toolkits.mplot3d.axes3d import Axes3D
In [26]: x1 = np.linspace(0., 1., 100)
In [27]: x2 = np.linspace(0., 1., 100)
In [28]: x1, x2 = np.meshgrid(x1, x2)
In [29]: df = 3
In [30]: y = dmatrix("te(cr(x1, df), cc(x2, df)) - 1",
....: {"x1": x1.ravel(), "x2": x2.ravel(), "df": df})
....:
ImportErrorTraceback (most recent call last)
<ipython-input-30-7750d95e45b5> in <module>()
1 y = dmatrix("te(cr(x1, df), cc(x2, df)) - 1",
----> 2 {"x1": x1.ravel(), "x2": x2.ravel(), "df": df})
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
687 for term in termlist:
688 all_factors.update(term.factors)
--> 689 factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
690 # Now all the factors have working eval methods, so we can evaluate them
691 # on some data to find out what type of data they return.
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _factors_memorize(factors, data_iter_maker, eval_env)
366 for factor in memorize_needed:
367 state = factor_states[factor]
--> 368 factor.memorize_chunk(state, which_pass, data)
369 for factor in list(memorize_needed):
370 factor.memorize_finish(factor_states[factor], which_pass)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in memorize_chunk(self, state, which_pass, data)
555 self._eval(state["memorize_code"][obj_name],
556 state,
--> 557 data)
558
559 def memorize_finish(self, state, which_pass):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
34 def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
38 if sys.version_info[0] >= 3:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, expr, source_name, inner_namespace)
164 code = compile(expr, source_name, "eval", self.flags, False)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
168 @classmethod
<string> in <module>()
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in transform(self, x, df, knots, lower_bound, upper_bound, constraints)
679 % (self._name,))
680 dm = _get_crs_dmatrix(x, self._all_knots,
--> 681 self._constraints, cyclic=self._cyclic)
682 if have_pandas:
683 if isinstance(x_orig, (pandas.Series, pandas.DataFrame)):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_crs_dmatrix(x, knots, constraints, cyclic)
363 :return: The (2-d array) design matrix.
364 """
--> 365 dm = _get_free_crs_dmatrix(x, knots, cyclic)
366 if constraints is not None:
367 dm = _absorb_constraints(dm, constraints)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_free_crs_dmatrix(x, knots, cyclic)
337 f = _get_cyclic_f(knots)
338 else:
--> 339 f = _get_natural_f(knots)
340
341 dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_natural_f(knots)
34 from scipy import linalg
35 except ImportError: # pragma: no cover
---> 36 raise ImportError("Cubic spline functionality requires scipy.")
37
38 h = knots[1:] - knots[:-1]
ImportError: Cubic spline functionality requires scipy.
In [31]: print y.shape
(100, 6)
In [32]: fig = plt.figure()
In [33]: fig.suptitle("Tensor product basis example (2 covariates)");
In [34]: for i in range(df * df):
....: ax = fig.add_subplot(df, df, i + 1, projection='3d')
....: yi = y[:, i].reshape(x1.shape)
....: ax.plot_surface(x1, x2, yi, rstride=4, cstride=4, alpha=0.15)
....: ax.contour(x1, x2, yi, zdir='z', cmap=cm.coolwarm, offset=-0.5)
....: ax.contour(x1, x2, yi, zdir='y', cmap=cm.coolwarm, offset=1.2)
....: ax.contour(x1, x2, yi, zdir='x', cmap=cm.coolwarm, offset=-0.2)
....: ax.set_xlim3d(-0.2, 1.0)
....: ax.set_ylim3d(0, 1.2)
....: ax.set_zlim3d(-0.5, 1)
....: ax.set_xticks([0, 1])
....: ax.set_yticks([0, 1])
....: ax.set_zticks([-0.5, 0, 1])
....:
ValueErrorTraceback (most recent call last)
<ipython-input-34-df35e7e13af8> in <module>()
1 for i in range(df * df):
2 ax = fig.add_subplot(df, df, i + 1, projection='3d')
----> 3 yi = y[:, i].reshape(x1.shape)
4 ax.plot_surface(x1, x2, yi, rstride=4, cstride=4, alpha=0.15)
5 ax.contour(x1, x2, yi, zdir='z', cmap=cm.coolwarm, offset=-0.5)
ValueError: cannot reshape array of size 100 into shape (100,100)
In [35]: fig.tight_layout()

Following what we did for univariate splines in the preceding sections, we will now set up a 3-d smooth basis using some data and then make predictions on a new set of data:
In [36]: data = {"x1": np.linspace(0., 1., 100),
....: "x2": np.linspace(0., 1., 100),
....: "x3": np.linspace(0., 1., 100)}
....:
In [37]: design_matrix = dmatrix("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3), constraints='center')",
....: data)
....:
ImportErrorTraceback (most recent call last)
<ipython-input-37-d5aa2ed102a4> in <module>()
1 design_matrix = dmatrix("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3), constraints='center')",
----> 2 data)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
687 for term in termlist:
688 all_factors.update(term.factors)
--> 689 factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
690 # Now all the factors have working eval methods, so we can evaluate them
691 # on some data to find out what type of data they return.
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/build.py in _factors_memorize(factors, data_iter_maker, eval_env)
366 for factor in memorize_needed:
367 state = factor_states[factor]
--> 368 factor.memorize_chunk(state, which_pass, data)
369 for factor in list(memorize_needed):
370 factor.memorize_finish(factor_states[factor], which_pass)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in memorize_chunk(self, state, which_pass, data)
555 self._eval(state["memorize_code"][obj_name],
556 state,
--> 557 data)
558
559 def memorize_finish(self, state, which_pass):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
34 def call_and_wrap_exc(msg, origin, f, *args, **kwargs):
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
38 if sys.version_info[0] >= 3:
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/eval.py in eval(self, expr, source_name, inner_namespace)
164 code = compile(expr, source_name, "eval", self.flags, False)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
168 @classmethod
<string> in <module>()
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in transform(self, x, df, knots, lower_bound, upper_bound, constraints)
679 % (self._name,))
680 dm = _get_crs_dmatrix(x, self._all_knots,
--> 681 self._constraints, cyclic=self._cyclic)
682 if have_pandas:
683 if isinstance(x_orig, (pandas.Series, pandas.DataFrame)):
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_crs_dmatrix(x, knots, constraints, cyclic)
363 :return: The (2-d array) design matrix.
364 """
--> 365 dm = _get_free_crs_dmatrix(x, knots, cyclic)
366 if constraints is not None:
367 dm = _absorb_constraints(dm, constraints)
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_free_crs_dmatrix(x, knots, cyclic)
337 f = _get_cyclic_f(knots)
338 else:
--> 339 f = _get_natural_f(knots)
340
341 dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \
/build/patsy-0.5.0+git13-g54dcf7b/doc/../patsy/mgcv_cubic_splines.py in _get_natural_f(knots)
34 from scipy import linalg
35 except ImportError: # pragma: no cover
---> 36 raise ImportError("Cubic spline functionality requires scipy.")
37
38 h = knots[1:] - knots[:-1]
ImportError: Cubic spline functionality requires scipy.
In [38]: new_data = {"x1": [0.1, 0.2],
....: "x2": [0.2, 0.3],
....: "x3": [0.3, 0.4]}
....:
In [39]: new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0]
NameErrorTraceback (most recent call last)
<ipython-input-39-451026a46abe> in <module>()
----> 1 new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0]
NameError: name 'design_matrix' is not defined
In [40]: new_design_matrix
NameErrorTraceback (most recent call last)
<ipython-input-40-15149ebd340a> in <module>()
----> 1 new_design_matrix
NameError: name 'new_design_matrix' is not defined
In [41]: np.asarray(new_design_matrix)
NameErrorTraceback (most recent call last)
<ipython-input-41-935a6596c64d> in <module>()
----> 1 np.asarray(new_design_matrix)
NameError: name 'new_design_matrix' is not defined