\name{Predict.matrix}
\alias{Predict.matrix}
\alias{Predict.matrix.tprs.smooth}
\alias{Predict.matrix.cr.smooth}
\alias{Predict.matrix.cyclic.smooth}
\alias{Predict.matrix.tensor.smooth}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Prediction methods for smooth terms in a GAM}
\description{ Takes \code{smooth} objects produced by \code{smooth.construct} methods and obtains the matrix mapping 
the parameters associated with such a smooth to the predicted values of the smooth at a set of new covariate values.
}

\usage{
Predict.matrix(object,data)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{object}{ is a smooth object produced by a \code{smooth.construct} method function. The object 
contains all the information required to specify the basis for a term of its class, and this information is
used by the appropriate \code{Predict.matrix} function to produce a prediction matrix for new covariate values.
 Further details are given in \code{\link{smooth.construct}}.}
\item{data}{A data frame containing the values of the (named) covariates at which the smooth term is to be 
evaluated.}
}

\value{ A matrix which will map the parameters associated with the smooth to the vector of values of the smooth 
evaluated at the covariate values given in \code{object}.}

}

\details{ Smooth terms in a GAM formula are turned into smooth specification objects of 
class \code{xx.smooth.spec} during processing of the formula. Each of these objects is
converted to a smooth object using an appropriate \code{smooth.construct} function. The \code{Predict.matrix} 
functions are used to obtain the matrix that will map the parameters associated with a smooth term to
the predicted values for the term at new covariate values.

Note that new smooth classes can be added by writing a new \code{smooth.construct} method function and a 
corresponding \code{\link{Predict.matrix}} method function: see the example code provided for 
\code{\link{smooth.construct}} for details.}

}


\references{ 

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.


\url{http://www.stats.gla.ac.uk/~simon/}
}

\author{Simon N. Wood \email{simon@stats.gla.ac.uk}}

\seealso{ \code{\link{gam}},\code{\link{gamm}}, \code{\link{smooth.construct}} }

\examples{# See smooth.construct examples
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...


\eof
\name{exclude.too.far}
\alias{exclude.too.far}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Exclude prediction grid points too far from data}
\description{ Takes two arrays defining the nodes of a grid over a 2D covariate space and two arrays 
defining the location of data in that space, and returns a logical vector with elements \code{TRUE} if 
the corresponding node is too far from data and \code{FALSE} otherwise. Basically a service routine for 
\code{vis.gam} and \code{plot.gam}.
}
\usage{
exclude.too.far(g1,g2,d1,d2,dist)
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
\item{g1}{co-ordinates of grid relative to first axis.}
\item{g2}{co-ordinates of grid relative to second axis.}
\item{d1}{co-ordinates of data relative to first axis.}
\item{d2}{co-ordinates of data relative to second axis.}
\item{dist}{how far away counts as too far. Grid and data are first scaled so that the grid lies exactly 
in the unit square, and \code{dist} is a distance within this unit square.} 
}
\details{ Linear scalings of the axes are first determined so that the grid defined by the nodes in 
\code{g1} and \code{g2} lies exactly in the unit square (i.e. on [0,1] by [0,1]). These scalings are 
applied to \code{g1}, \code{g2}, \code{d1} and \code{d2}. The minimum Euclidean 
distance from each node to a datum is then determined and if it is greater than \code{dist} the 
corresponding entry in the returned array is set to \code{TRUE} (otherwise to \code{FALSE}). The 
distance calculations are performed in compiled code for speed without storage overheads.
}

\value{A logical array with \code{TRUE} indicating a node in the grid defined by \code{g1}, \code{g2} that 
is `too far' from any datum. 
}

\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{ \code{\link{vis.gam}} }

\examples{
library(mgcv)
x<-rnorm(100);y<-rnorm(100) # some "data"
n<-40 # generate a grid....
mx<-seq(min(x),max(x),length=n)
my<-seq(min(y),max(y),length=n)
gx<-rep(mx,n);gy<-rep(my,rep(n,n))
tf<-exclude.too.far(gx,gy,x,y,0.1)
plot(gx[!tf],gy[!tf],pch=".");points(x,y,col=2)
}
\keyword{hplot}%-- one or more ...







\eof
\name{extract.lme.cov}
\alias{extract.lme.cov}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Extract the data covariance matrix from an lme object}
\description{ This is a service routine for \code{\link{gamm}}. It extracts 
the estimated covariance matrix of the data from an \code{lme} object, allowing the 
user control about which levels of random effects to include in this 
calculation. 
}
\usage{
extract.lme.cov(b,data,start.level=1)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{b}{ A fitted model object returned by a call to \code{\link{lme}}}

\item{data}{ The data frame/ model frame that was supplied to
 \code{\link{lme}}.}

\item{start.level}{The level of nesting at which to start including random 
effects in the calculation. This is used to allow smooth terms to be estimated
as random effects, but treated like fixed effects for variance calculations.}

}

\details{ The random effects, correlation structure and variance structure used
for a linear mixed model combine to imply a covariance matrix for the 
response data being modelled. This routine extracts that covariance matrix.
The process is slightly complicated, because different components of the 
fitted model object are stored in different orders (see function code for 
details!).  

The calculation is not optimally efficient, since it forms the full matrix,
which may in fact be sparse. In applications in which the main objective is
to allow non-independent `errors' in GAMs this is unlikely to cause great 
computational losses.
}



\value{ An estimated covariance matrix.}

\references{

For \code{lme} see:

Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Springer

For details of how GAMMs are set up here for estimation using \code{lme} see:
 
Wood, S.N. (manuscript) Tensor product smooth interaction terms in 
Generalized Additive Mixed Models.


\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{  
\code{\link{gamm}}
}

\examples{
library(nlme)
data(Rail)
b <- lme(travel~1,Rail,~1|Rail)
extract.lme.cov(b,Rail)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..






\eof
\name{full.score}
\alias{full.score}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{GCV/UBRE score for use within nlm}
\description{ \code{gam} allows the option to `polish' smoothing parameter 
estimates by minimising the GCV/UBRE score calculated at convergence of the IRLS 
algorithm, given a set of smoothing parameters, rather than using the `performance 
iteration' method which estimates smoothing parameters within the IRLS loop. The 
estimates are often slightly different, since the performance iteration effectively 
neglects the dependence of the iterative weights on the smoothing parameters.

The `polishing' optimisation is fairly crude and numerically costly. 
\code{\link{nlm}} is used to minimise the scores with respect to the smoothing 
parameters, and this routine is designed to be supplied to it as an argument. 

This is basically a service routine for \code{\link{gam}}, and is not usually 
called directly by users.
}
\usage{
full.score(sp,G,family,control,gamma)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{sp}{The logs of the smoothing parameters}

\item{G}{a list returned by \code{\link{gam.setup}}}

\item{family}{The family object for the GAM.}

\item{control}{a list returned be \code{\link{gam.control}}}

\item{gamma}{the degrees of freedom inflation factor (usually 1).}
}
\value{ The value of the GCV/UBRE score, with attribute \code{"full.gam.object"}
which is the full object returned by \code{\link{gam.fit}}.
}
 
}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{gam}
\alias{gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Models using penalized regression splines and 
GCV}
\description{ Fits the specified  generalized additive model (GAM) to
  data. \code{gam()} is not a clone of what S-PLUS provides.
Smooth terms are represented using penalized regression splines
with smoothing parameters selected by GCV/UBRE or by regression splines with
fixed degrees of freedom (mixtures of the two are
permitted). Multi-dimensional smooths are available using penalized thin plate
regression splines (isotropic) or tensor product splines (when an isotropic smooth is inappropriate). 
For more on specifying models see \code{\link{gam.models}}. For more on model 
selection see \code{\link{gam.selection}}.
}
\usage{

gam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
    na.action,control=gam.control(),scale=0,knots=NULL,sp=NULL,
    min.sp=NULL,H=NULL,gamma=1,fit=TRUE,G=NULL,...)
}
%- maybe also `usage' for other objects documented here.
\details{ 
A generalized additive model (GAM) is a generalized linear model (GLM) in which the linear 
predictor is given by a user specified sum of smooth functions of the covariates plus a 
conventional parametric component of the linear predictor. A simple example is:
\deqn{\log(E(y_i)) = f_1(x_{1i})+f_2(x_{2i})}{log(E(y_i))=f_1(x_1i)+f_2(x_2i)}
where the (independent) response variables \eqn{y_i \sim {\rm Poi }}{y_i~Poi}, and
\eqn{f_1}{f_1} and \eqn{f_2}{f_2} are smooth functions of covariates \eqn{x_1}{x_1} and 
\eqn{x_2}{x_2}. The log is an example of a link function. 

If absolutely any smooth functions were allowed in model fitting then maximum likelihood 
estimation of such models would invariably result in complex overfitting estimates of 
\eqn{f_1}{f_1}  and \eqn{f_2}{f_2}. For this reason the models are usually fit by 
penalized likelihood 
maximization, in which the model (negative log) likelihood is modified by the addition of 
a penalty for each smooth function, penalizing its `wiggliness'. To control the tradeoff 
between penalizing wiggliness and penalizing badness of fit each penalty is multiplied by 
an associated smoothing parameter: how to estimate these parameters, and 
how to practically represent the smooth functions are the main statistical questions 
introduced by moving from GLMs to GAMs. 

The \code{mgcv} implementation of \code{gam} represents the smooth functions using 
penalized regression splines, and by default uses basis functions for these splines that 
are designed to be optimal, given the number basis functions used. The smooth terms can be 
functions of any number of covariates and the user has some control over how smoothness of 
the functions is measured. 

\code{gam} in \code{mgcv} solves the smoothing parameter estimation problem by using the 
Generalized Cross Validation (GCV) criterion or an Un-Biased Risk Estimator criterion 
(UBRE) which is in practice an approximation to AIC. Smoothing parameters are chosen to 
minimize the GCV or UBRE score for the model, and the main computational challenge solved 
by the \code{mgcv} package is to do this efficiently and reliably. Two alternative 
numerical methods are provided, see \code{\link{mgcv}}, \code{\link{magic}} and 
\code{\link{gam.control}}. 

Broadly \code{gam} works by first constructing basis functions and one or more quadratic penalty 
coefficient matrices for each smooth term in the model formula, obtaining a model matrix for 
the strictly parametric part of the model formula, and combining these to obtain a 
complete model matrix (/design matrix) and a set of penalty matrices for the smooth terms. 
Some linear identifiability constraints are also obtained at this point. The model is 
fit using \code{\link{gam.fit}}, a modification of \code{\link{glm.fit}}. The GAM 
penalized likelihood maximization problem is solved by penalized Iteratively 
Reweighted  Least Squares (IRLS) (see e.g. Wood 2000). At each iteration a penalized 
weighted least squares problem is solved, and the smoothing parameters of that problem are 
estimated by GCV or UBRE. Eventually both model parameter estimates and smoothing 
parameter estimates converge. 

The fitting approach just described, in which the smoothing parameters are estimated for 
each approximating linear model of the IRLS process was suggested by Chong Gu (see, e.g. 
Gu 2002), and is very computationally efficient. However, because the approach neglects 
the dependence of the iterative weights on the smoothing parameters, it is usually 
possible to find smoothing parameters which actually yield slightly lower GCV/UBRE score 
estimates than those resulting from this `performance iteration'. \code{gam} therefore also 
allows the user to `improve' the smoothing parameter estimates, by using O'Sullivan's 
(1986) suggested method, in which for each trial set of smoothing parameters the IRLS is 
iterated to convergence before the UBRE/GCV score is evaluated. This requires much less 
efficient minimisation of the power iteration based on \code{\link{nlm}}, and is 
therefore quite slow. 





Three alternative bases are built in for representing model
smooths, but alternatives can easily be added (see \code{\link{smooth.construct}} which
uses p-splines to illustrate how to add new smooths). 
The built in alternatives for univariate smooths terms are: a conventional penalized
cubic regression spline basis, parameterized in terms of the function values at the knots; 
a cyclic cubic spline with a similar parameterization and thin plate regression splines. 
The cubic spline bases are computationally very efficient, but require `knot' locations to be 
chosen (automatically by default). The thin plate regression splines are optimal low rank 
smooths which do not have knots, but are more computationally costly to set up. Multivariate terms
can be represented using thin plate regression splines, or tensor products of any available basis 
including user defined bases (tensor product penalties are obtained automatically form 
the marginal basis penalties). The t.p.r.s. basis is isotropic, so if this is not appropriate tensor 
product terms should be used. Tensor product smooths have one penalty and smoothing parameter per marginal 
basis, which means that the relative scaling of covariates is essentially determined automatically by GCV/UBRE. 

For any  basis the user specifies the dimension of the basis for each smooth
term. The dimension of the basis is one more than the maximum degrees of freedom that the 
term can have, but usually the term will be fitted by penalized
maximum likelihood estimation and the actual degrees of freedom will be
chosen by GCV. However, the user can choose to fix the degrees of
freedom of a term, in which case the actual degrees of freedom will be
one less than the basis dimension.

  Thin plate regression splines are constructed by starting with the
  basis for a full thin plate spline and then truncating this basis in
  an optimal manner, to obtain a low rank smoother. Details are given in
  Wood (2003). One key advantage of the approach is that it avoids
  the knot placement problems of conventional regression spline
  modelling, but it also has the advantage that smooths of lower rank
  are nested within smooths of higher rank, so that it is legitimate to
  use conventional hypothesis testing methods to compare models based on
  pure regression splines. The t.p.r.s. basis can become expensive to
  calculate for large datasets. In this case the user can supply a reduced 
  set of knots to use in basis construction (see knots, in the argument list), or 
use tensor products of cheaper bases.
  
  In the case of the cubic regression spline basis, knots  of the spline are placed evenly
  throughout the covariate values to which the term refers:  For
  example, if fitting 101 data with an 11 knot spline of \code{x} then
  there would be a knot at every 10th (ordered)  \code{x} value. The
  parameterization used represents the spline in terms of its
   values at the knots. The values at neighbouring knots
     are connected by sections of  cubic polynomial constrained to be 
     continuous up to and including second derivative at the knots. The resulting curve
     is a natural cubic  spline through the values at the knots (given two extra conditions specifying 
     that the second derivative of the curve should be zero at the two end 
     knots). This parameterization gives the parameters a nice interpretability. 

     
     Details of \code{"mgcv"} GCV/UBRE minimization method are given in Wood (2000): 
the basis of the approach 
     is to alternate efficient global optimization with respect to one overall smoothing 
     parameter with Newton updates of a set of relative smoothing parameters for each smooth term. The Newton updates are
backed up by steepest descent, since the GCV/UBRE score functions are not positive definite everywhere.

 }

\arguments{ 
\item{formula}{ A GAM formula (see also \code{\link{gam.models}}). This is exactly like the formula for a
GLM except that smooth terms can be added to the right hand side of the
formula (and a formula of the form \code{y ~ .} is not allowed).
Smooth terms are specified by expressions of the form: \cr
\code{s(var1,var2,...,k=12,fx=FALSE,bs="tp",by=a.var)} where \code{var1},
\code{var2}, etc. are the covariates which the smooth
is a function of and \code{k} is the dimension of the basis used to
represent the smooth term. If \code{k} is not
 specified then \code{k=10*3^(d-1)} is used where \code{d} is the number
 of covariates for this term. \code{fx} is used to indicate whether or
 not this term has a fixed number of degrees of freedom (\code{fx=FALSE}
 to select d.f. by GCV/UBRE). \code{bs} indicates the basis to use, with
 \code{"cr"} indicating cubic regression spline, \code{"cc"} a cyclic cubic regression spline, and \code{"tp"}
 indicating thin plate regression spline: \code{"cr"} and \code{"cc"} can only be used
 with 1-d smooths. \code{by} can be used to specify a variable by which
 the smooth should be multiplied. For example \code{gam(y~z+s(x,by=z))}
   would specify a model \eqn{ E(y) = f(x)z}{E(y)=f(x)z} where
   \eqn{f(\cdot)}{f(.)} is a smooth function (the formula is
   \code{y~x+s(x,by=z)} rather than \code{y~s(x,by=z)} because
   the smooths are always set up to sum to zero over the covariate
   values). The \code{by} option is particularly useful for models in
   which different functions of the same variable are required for
   each level of a factor: see \code{\link{s}}. 

An alternative for specifying smooths of more than one covariate is e.g.: \cr
\code{te(x,z,bs=c("tp","tp"),m=c(2,3),k=c(5,10))} which would specify a tensor product 
smooth of the two covariates \code{x} and \code{z} constructed from marginal t.p.r.s. bases 
of dimension 5 and 10 with marginal penalties of order 2 and 3. Any combination of basis types is 
possible, as is any number of covariates.


Formulae can involve nested or ``overlapping'' terms such as \cr
\code{y~s(x)+s(z)+s(x,z)} or \code{y~s(x,z)+s(z,v)}: see
\code{\link{gam.side.conditions}} for further details and examples. Note that 
nesting with \code{\link{te}} terms is not supported. } 
 \item{family}{
This is a family object specifying the distribution and link to use in
fitting etc. See \code{\link{glm}} and \code{\link{family}} for more
details. The negative binomial families provided by the MASS library 
can be used, with or without known \eqn{\theta}{theta} parameter: see
\code{\link{gam.neg.bin}} for details.
} 

\item{data}{ A data frame containing the model response variable and 
covariates required by the formula. By default the variables are taken 
from \code{environment(formula)}: typically the environment from 
which \code{gam} is called.} 

\item{weights}{ prior weights on the data.}

\item{subset}{ an optional vector specifying a subset of observations to be
          used in the fitting process.}

\item{na.action}{ a function which indicates what should happen when the data
          contain `NA's.  The default is set by the `na.action' setting
          of `options', and is `na.fail' if that is unset.  The
          ``factory-fresh'' default is `na.omit'.}

\item{control}{A list of fit control parameters returned by 
\code{\link{gam.control}}.}

\item{scale}{ If this is zero then GCV is used for all distributions
except Poisson and binomial where UBRE is used with scale parameter
assumed to be 1. If this is greater than 1 it is assumed to be the scale
parameter/variance and UBRE is used: to use the negative binomial in this case
\eqn{\theta}{theta} must be known. If \code{scale} is negative  GCV 
is always used, which means that the scale parameter will be estimated by GCV and the Pearson 
estimator, or in the case of the negative binomial \eqn{\theta}{theta} will be estimated 
in order to force the GCV/Pearson scale estimate to unity (if this is possible). For binomial models in 
particular, it is probably worth  comparing UBRE and GCV results; for ``over-dispersed Poisson'' GCV is
probably more appropriate than UBRE.} 

\item{knots}{this is an optional list containing user specified knot values to be used for basis construction. 
For the \code{cr} and \code{cc} bases the user simply supplies the knots to be used, and there must be the same number as the basis
dimension, \code{k}, for the smooth concerned. For the \code{tp} basis \code{knots} has two uses. Firstly, for large datasets 
the calculation of the \code{tp} basis can be time-consuming. The user can retain most of the advantages of the t.p.r.s. 
approach by supplying  a reduced set of covariate values from which to obtain the basis - 
typically the number of covariate values used will be substantially 
smaller than the number of data, and substantially larger than the basis dimension, \code{k}. The second possibility 
is to avoid the eigen-decomposition used to find the t.p.r.s. basis altogether and simply use 
the basis implied by the chosen knots: this will happen if the number of knots supplied matches the 
basis dimension, \code{k}. For a given basis dimension the second option is 
faster, but gives poorer results (and the user must be quite careful in choosing knot locations). 
Different terms can use different 
numbers of knots, unless they share a covariate.
}

\item{sp}{A vector of smoothing parameters for each term can be provided here.
 Smoothing parameters must 
be supplied in the order that the smooth terms appear in the model 
formula. With fit method \code{"magic"} (see \code{\link{gam.control}} 
and \code{\link{magic}}) then negative elements indicate that the 
parameter should be estimated, and hence a mixture of fixed and estimated 
parameters is possible. With fit method \code{"mgcv"}, if \code{sp} is 
supplied then all its elements must be positive. Note that \code{fx=TRUE} 
in a smooth term over-rides what is supplied here effectively setting the 
smoothing parameter to zero.}

\item{min.sp}{for fit method \code{"magic"} only, lower bounds can be 
supplied for the smoothing parameters. Note that if this option is used then
the smoothing parameters \code{sp}, in the returned object, will need to be added to
what is supplied here to get the actual smoothing parameters. Lower bounds on the smoothing 
parameters can sometimes help stabilize otherwise divergent P-IRLS iterations.}

\item{H}{With fit method \code{"magic"} a user supplied fixed quadratic 
penalty on the parameters of the 
GAM can be supplied, with this as its coefficient matrix. A common use of this term is 
to add a ridge penalty to the parameters of the GAM in circumstances in which the model
is close to un-identifiable on the scale of the linear predictor, but perfectly well
defined on the response scale.}

\item{gamma}{It is sometimes useful to inflate the model degrees of 
freedom in the GCV or UBRE score by a constant multiplier. This allows 
such a multiplier to be supplied if fit method is \code{"magic"}.} 

\item{fit}{If this argument is \code{TRUE} then \code{gam} sets up the model and fits it, but if it is
\code{FALSE} then the model is set up and an object \code{G} is returned which is the output from 
\code{\link{gam.setup}} plus some extra items required to complete the GAM fitting process.}
\item{G}{Usually \code{NULL}, but may contain the object returned by a previous call to \code{gam} with 
\code{fit=FALSE}, in which case all other arguments are ignored except for \code{gamma}, \code{family}, \code{control} and \code{fit}.}
\item{...}{further arguments for 
passing on e.g. to \code{gam.fit}} }

\value{ 
If \code{fit = FALSE} the function returns a list \code{G} of items needed to fit a GAM, but doesn't actually fit it. 

Otherwise the function returns an object of class \code{"gam"} which has the following elements: 

\item{boundary}{did parameters end up at boundary of parameter space?} 

\item{call}{the matched call (allows \code{update} to be used with \code{gam} objects, for example). }

\item{coefficients}{the coefficients of the fitted model. Parametric
          coefficients are  first, followed  by coefficients for each
          spline term in turn.}

\item{converged}{indicates whether or not the iterative fitting method converged.} 

\item{deviance}{(unpenalized)}

\item{df.null}{null degrees of freedom.} 

\item{edf}{estimated degrees of freedom for each coefficient.}

\item{family}{family object specifying distribution and link used.}

\item{fit.method}{ The underlying multiple GCV/UBRE method used: \code{"magic"} 
for the new more stable method, \code{"mgcv"} for the Wood (2000) method.}

\item{fitted.values}{fitted model predictions of expected value for each
          datum.}

\item{formula}{the model formula.}

\item{full.formula}{the model formula with each smooth term fully
  expanded and with option arguments given explicitly (i.e. not with reference
  to other variables) - useful for later prediction from the model.}

\item{gcv.ubre}{The minimized GCV or UBRE score.}


\item{hat}{array of elements from the leading diagonal of the `hat' (or `influence') matrix. 
Same length as response data vector.}

\item{iter}{number of iterations of P-IRLS taken to get convergence.}

\item{linear.predictor}{fitted model prediction of link function of
expected value for  each datum.}

\item{method}{Either \code{"GCV"} or \code{"UBRE"}, depending on smoothing parameter selection method used 
(or appropriate, if none used).}

\item{mgcv.conv}{ A list of convergence diagnostics relating to smoothing
parameter estimation. Differs for method \code{"magic"} and \code{"mgcv"}. Here is 
the \code{"mgcv"} version:

\item{score}{corresponding to edf, an array of GCV or UBRE scores for the model given the final 
     estimated relative smoothing parameters.}

\item{g}{the gradient of the GCV/UBRE score w.r.t. the relative smoothing parameters at termination.}

\item{h}{the second derivatives corresponding to \code{g} above - i.e. the leading diagonal of the Hessian.}

\item{e}{the eigen-values of the Hessian. All non-negative indicates a positive definite Hessian.}

\item{iter}{the number of iterations taken.}

\item{in.ok}{\code{TRUE} if the second smoothing parameter guess improved the GCV/UBRE score.}

\item{step.fail}{\code{TRUE} if the algorithm terminated by failing to improve the GCV/UBRE score rather than by `converging'. 
Not necessarily a problem, but check the above derivative information quite carefully.}

In the case of \code{"magic"} the items are:

\item{full.rank}{The apparent rank of the problem given the model matrix and 
constraints.}

\item{rank}{The numerical rank of the problem.}

\item{fully.converged}{\code{TRUE} is multiple GCV/UBRE converged by meeting 
convergence criteria. \code{FALSE} if method stopped with a steepest descent step 
failure.}

\item{hess.pos.def}{Was the hessian of the GCV/UBRE score positive definite at 
smoothing parameter estimation convergence?}

\item{iter}{How many iterations were required to find the smoothing parameters?}

\item{score.calls}{and how many times did the GCV/UBRE score have to be evaluated?}

\item{rms.grad}{root mean square of the gradient of the GCV/UBRE score at 
convergence.}

} % end of mgcv.conv listing

\item{min.edf}{Minimum possible degrees of freedom for whole model.}

\item{model}{model frame containing all variables needed in original model fit.}

\item{nsdf}{number of parametric, non-smooth, model terms including the
          intercept.}

\item{null.deviance}{deviance for single parameter model.} 

\item{prior.weights}{prior weights on observations.} 

\item{residuals}{the deviance residuals for the fitted model.}

\item{sig2}{estimated or supplied variance/scale parameter.}

\item{smooth}{list of smooth objects, containing the basis information for each term in the 
model formula in the order in which they appear. These smooth objects are what gets returned by
the \code{\link{smooth.construct}} objects.}

\item{sp}{smoothing parameter for each smooth.}

\item{Vp}{estimated covariance matrix for the parameters. This is a Bayesian
posterior covariance matrix that results from adopting a particular Bayesian
model of the smoothing process.}

\item{weights}{final weights used in IRLS iteration.}



\item{y}{response data.}

}

\references{

Key References:

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.

Background References:

Green and Silverman (1994) Nonparametric Regression and Generalized  Linear Models. Chapman and Hall.
  
Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Gu (2002) Smoothing Spline ANOVA Models, Springer.

Hastie and Tibshirani (1990) Generalized Additive Models. Chapman and Hall.

O'Sullivan, Yandall and Raynor (1986) Automatic smoothing of regression functions in generalized linear models.
J. Am. Statist.Ass. 81:96-103 

Wahba (1990) Spline Models of Observational Data. SIAM 

Wood (2001) mgcv:GAMs and Generalized Ridge Regression for R. R News 1(2):20-25
   
Wood and Augustin (2002) GAMs with integrated model selection using penalized regression splines and applications 
to environmental modelling. Ecological Modelling 157:157-177

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNINGS }{

The \code{"mgcv"} code does not check for rank deficiency of the
model matrix that may result from lack of identifiability between the
parametric and smooth components of the model. 

You must have more unique combinations of covariates than the model has total
parameters. (Total parameters is sum of basis dimensions plus sum of non-spline 
terms less the number of spline terms). 

Automatic smoothing parameter selection is not likely to work well when 
fitting models to very few response data.

With large datasets (more than a few thousand data) the \code{"tp"}
basis gets very slow to use: use the \code{knots} argument as discussed above and 
shown in the examples. Alternatively, for 1-d smooths  you can use the \code{"cr"} basis and 
for multi-dimensional smooths use \code{te} smooths.

For data with many  zeroes clustered together in the covariate space it is quite easy to set up 
GAMs which suffer from identifiability problems, particularly when using Poisson or binomial
families. The problem is that with e.g. log or logit links, mean value zero corresponds to
an infinite range on the linear predictor scale. Some regularization is possible in such cases: see 
\code{\link{gam.control}} for details.
} 

\seealso{\code{\link{gam.models}}, \code{\link{s}}, \code{\link{predict.gam}},
  \code{\link{plot.gam}}, \code{\link{summary.gam}}, \code{\link{gam.side.conditions}},
\code{\link{gam.selection}},\code{\link{mgcv}}, \code{\link{gam.control}}
\code{\link{gam.check}}, \code{\link{gam.neg.bin}}, \code{\link{magic}},\code{\link{vis.gam}}
 }

\examples{
library(mgcv)
set.seed(0) 
n<-400
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- f + e
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3))
summary(b)
plot(b,pages=1)
# same fit in two parts .....
G<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),fit=FALSE)
b<-gam(G=G)
# an extra ridge penalty (useful with convergence problems) ....
bp<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),H=diag(0.5,41)) 
print(b);print(bp);rm(bp)
# set the smoothing parameter for the first term, estimate rest ...
bp<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),sp=c(0.01,-1,-1,-1))
plot(bp,pages=1);rm(bp)
# set lower bounds on smoothing parameters ....
bp<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),min.sp=c(0.001,0.01,0,10)) 
print(b);print(bp);rm(bp)

# now a GAM with 3df regression spline term & 2 penalized terms
b0<-gam(y~s(x0,k=4,fx=TRUE,bs="tp")+s(x1,k=12)+s(x2,k=15))
plot(b0,pages=1)
# now fit a 2-d term to x0,x1
b1<-gam(y~s(x0,x1)+s(x2)+s(x3))
par(mfrow=c(2,2))
plot(b1)
par(mfrow=c(1,1))
# now simulate poisson data
g<-exp(f/5)
y<-rpois(rep(1,n),g)
b2<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=poisson)
plot(b2,pages=1)
# and a pretty 2-d smoothing example....
test1<-function(x,z,sx=0.3,sz=0.4)  
{ (pi**sx*sz)*(1.2*exp(-(x-0.2)^2/sx^2-(z-0.3)^2/sz^2)+
  0.8*exp(-(x-0.7)^2/sx^2-(z-0.8)^2/sz^2))
}
n<-500
old.par<-par(mfrow=c(2,2))
x<-runif(n);z<-runif(n);
xs<-seq(0,1,length=30);zs<-seq(0,1,length=30)
pr<-data.frame(x=rep(xs,30),z=rep(zs,rep(30,30)))
truth<-matrix(test1(pr$x,pr$z),30,30)
contour(xs,zs,truth)
y<-test1(x,z)+rnorm(n)*0.1
b4<-gam(y~s(x,z))
fit1<-matrix(predict.gam(b4,pr,se=FALSE),30,30)
contour(xs,zs,fit1)
persp(xs,zs,truth)
vis.gam(b4)
par(old.par)
# very large dataset example using knots
n<-10000
x<-runif(n);z<-runif(n);
y<-test1(x,z)+rnorm(n)
ind<-sample(1:n,1000,replace=FALSE)
b5<-gam(y~s(x,z,k=50),knots=list(x=x[ind],z=z[ind]))
vis.gam(b5)
# and a pure "knot based" spline of the same data
b6<-gam(y~s(x,z,k=100),knots=list(x= rep((1:10-0.5)/10,10),
        z=rep((1:10-0.5)/10,rep(10,10))))
vis.gam(b6,color="heat")
}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..








\eof
\name{gam.check}
\alias{gam.check}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Some diagnostics for a fitted gam model.}
\description{ Takes a fitted \code{gam} object produced by \code{gam()} and produces some diagnostic information
about the fitting procedure and results. 
}
\usage{
gam.check(b)
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
\item{b}{ a fitted \code{gam} object as produced by \code{gam()}.}
}
\details{ This function plots 4 standard diagnostic plots, and some other convergence diagnostics. Output differs depending on whether the 
underlying fitting method was \code{"mgcv"} or \code{"magic"} (see \code{\link{gam.control}}).

For fit method \code{"mgcv"}, the first plot shows the GCV or UBRE score against model 
degrees of freedom given the final estimates of the relative smoothing parameters for the model. This is a slice through the 
GCV/UBRE score function that passes through the minimum found during fitting. Although not conclusive (except in the single 
smoothing parameter case), a lack of multiple local minima on this plot is suggestive of a lack of multiple local minima in the 
GCV/UBRE function and is therefore a good thing. Multiple local minima on this plot indicates that the GCV/UBRE function 
may have multiple local minima, but in a multiple smoothing parameter case this is not conclusive - multiple local minima on one slice 
through a function do not necessarily imply that the function has multiple local minima. A `good' plot here is a smooth curve with 
only one local minimum (which is therefore its global minimum).

The location of the minimum used for the fitted model is also marked on the first plot. Sometimes this location may be a local minimum
that is not the global minimum on the plot. There is a legitimate reason for this to happen, and it does not always indicate problems.
Smoothing parameter selection is based on applying GCV/UBRE to the approximating linear model produced by the GLM IRLS fitting method
employed in \code{gam.fit()}. It is sometimes possible for these approximating models to develop `phantom' minima in their GCV/UBRE scores. These 
minima usually imply a big change in model parameters, and have the characteristic that the minimia will not be present in the GCV/UBRE score 
of the approximating model that would result from actually applying this parameter change. In other words, these are spurious minima in regions 
of parameter space well beyond those for which the weighted least squares problem can be expected to represent the real underlying likelihood well.
Such minima can lead to convergence problems. To help ensure convergence even in the presence of phantom minima, 
\code{gam.fit} switches to a cautious optimization mode after a user controlled number of iterations of the IRLS algorithm (see \link{gam.control}).
In the presence of local minima in the GCV/UBRE score, this method selects the minimum that leads to the smallest change in 
model estimated degrees of freedom. This approach is usually sufficient to deal with phantom minima. Setting \code{trace} to \code{TRUE} in 
\code{gam.control} will allow you to check exactly what is happening. 

If the location of the point indicating the minimum is not on the curve showing the GCV/UBRE function then there are numerical problems with the 
estimation of the effective degrees of freedom: this usually reflects problems with  the relative scaling of covariates that are arguments of a single smooth. 
In this circumstance reported estimated degrees of freedom can not be trusted, although the fitted model and term estimates are likely to be quite 
acceptable. 

If the fit method is \code{"magic"} then there is no global search and the problems with phantom local minima are much reduced. The first plot in this case will simply be a normal QQ plot of the standardized residuals.

The other 3 plots are two residual plots and plot of fitted values against original data.

The function also prints out information about the convergence of the GCV minimization algorithm, indicating how 
many iterations  were required to minimise the GCV/UBRE score. A message is printed if the minimization terminated by 
failing to improve the score with a steepest descent step: otherwise minimization terminated by meeting convergence criteria.
The mean absolute gradient or RMS gradient of the GCV/UBRE function at the minimum is given. An indication of whether or not the Hessian of the GCV/UBRE function is positive definite is given. If some smoothing parameters 
are not well defined (effectively zero, or infinite) then it may not be, although this is not usually a problem. If the fit method is 
\code{"mgcv"}, a message is printed 
if the second guess smoothing parameters did not improve on the first guess - this is primarily there for the developer. 
For fit method \code{"magic"} the estimated rank of the model is printed.


The ideal results from this function have a smooth, single minima GCV/UBRE plot, good residual plots, and convergence to small 
gradients with a positive definite Hessian. However, failure to meet some of these criteria 
is often  acceptable,  and the information provided is primarily of use in diagnosing suspected problems. 
High gradients at convergence  are a clear indication of problems, however.

Fuller data can be extracted from \code{mgcv.conv} part of the \code{gam} object.  

}

\references{

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}

}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{  \code{\link{gam}}, \code{\link{mgcv}}, \code{\link{magic}}}

\examples{
library(mgcv)
set.seed(0)
n<-200
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
y <- 2 * sin(pi * x0)
y <- y + exp(2 * x1) - 3.75887
y <- y+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- y + e
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3))
plot(b,pages=1)
gam.check(b)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...







\eof
\name{gam.control}
\alias{gam.control}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Setting GAM fitting defaults}
\description{ This is an internal function of package \code{mgcv} which allows control of the numerical options for fitting a GAM. 
Typically users will want to modify the defaults if model fitting fails to converge, or if the warnings are generated which suggest a 
loss of numerical stability during fitting.  
}
\usage{
gam.control(irls.reg=0.0,epsilon = 1e-04, maxit = 20,globit = 20,
            mgcv.tol=1e-6,mgcv.half=15,nb.theta.mult=10000, trace = FALSE,
            fit.method="magic",perf.iter=TRUE,rank.tol=.Machine$double.eps^0.5) 
}
\arguments{ 
\item{irls.reg}{For most models this should be 0. The iteratively re-weighted least squares method
by which GAMs are fitted  can fail to converge in some circumstances. For example data with many zeroes can cause 
problems in a model with a log link, because a mean of zero corresponds to an infinite range of linear predictor 
values. Such convergence problems are caused by a fundamental lack of identifiability, but do not show up as 
lack of identifiability in the penalized linear model problems that have to be solved at each stage of iteration.
In such circumstances it is possible to apply a ridge regression penalty to the model to impose identifiability, and 
\code{irls.reg} is the size of the penalty. The penalty can only be used if \code{fit.method=="magic"}.
}

\item{epsilon}{This is used for judging conversion of the GLM IRLS loop in \code{gam.fit}.}

\item{maxit}{Maximum number of IRLS iterations to perform using cautious GCV/UBRE optimization, after \code{globit} 
IRLS iterations with normal GCV optimization have been performed. Note that fit method \code{"magic"} makes no distinction 
between cautious and global optimization.}

\item{globit}{Maximum number of IRLS iterations to perform with normal GCV/UBRE optimization. If convergence is not achieved after these 
iterations then a further \code{maxit} iterations will be performed using cautious GCV/UBRE optimization. } 

\item{mgcv.tol}{The convergence tolerance parameter to use in GCV/UBRE optimization.}

\item{mgcv.half}{If a step of  the GCV/UBRE optimization method leads to a worse GCV/UBRE score, then the step length is halved. This is
the number of halvings to try before giving up.}

\item{nb.theta.mult}{Controls the limits on theta when negative binomial parameter is to be estimated. Maximum theta is set to 
the initial value multiplied by \code{nb.theta.mult}, while the minimum value is set to the initial value divided by 
\code{nb.theta.mult}. }

\item{trace}{Set this to \code{TRUE} to turn on diagnostic output.}

\item{fit.method}{set to \code{"mgcv"} to use the method described in Wood (2000). Set to \code{"magic"} to use a 
newer numerically more stable method, which allows regularization and mixtures of fixed and estimated smoothing parameters. 
Set to \code{"fastest"} to use \code{"mgcv"} for single penalty models and \code{"magic"} otherwise.}

\item{perf.iter}{set to \code{TRUE} to use Gu's approach to finding smoothing 
parameters in which GCV or UBRE is
applied to the penalized linear modelling problem produced at each IRLS iteration. This method is very fast, but means that it 
is often possible to find smoothing parameters yielding a slightly lower GCV/UBRE score by perturbing some of the estimated 
smoothing parameters a little. Technically this occurs because the performance iteration effectively neglects the dependence of the 
iterative weights on the smoothing parameters. Setting \code{perf.iter} to \code{FALSE} uses O'Sullivan's approach in which
the IRLS is run to convergence for each trial set of smoothing parameters. This is much slower, since it uses \code{nlm} with 
finite differences and requires many more IRLS steps. } 

\item{rank.tol}{The tolerance used to estimate rank when using \code{fit.method="magic"}.}
}

\details{ 
With fit method \code{"mgcv"}, 
\code{maxit} and \code{globit} control the maximum iterations of the IRLS algorithm, as follows: 
the algorithm will first execute up to
\code{globit} steps in which the GCV/UBRE algorithm performs a global search for the best overall 
smoothing parameter at every iteration. If convergence is not achieved within \code{globit} iterations, then a further 
\code{maxit} steps are taken, in which the overall smoothing parameter estimate is taken as the 
one locally minimising the GCV/UBRE score and resulting in the lowest EDF change. The difference 
between the two phases is only significant if the GCV/UBRE function develops more than one minima. 
The reason for this approach is that the GCV/UBRE score for the IRLS problem can develop `phantom'
minimima for some models: these are minima which are not present in the GCV/UBRE score of the IRLS
problem resulting from moving the parameters to the minimum! Such minima can lead to convergence 
failures, which are usually fixed by the second phase. 
}


\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 
65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}


}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{   \code{\link{gam}}, \code{\link{gam.fit}}, \code{\link{glm.control}} }

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..





\eof
\name{gam.fit}
\alias{gam.fit}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Models fitting using penalized regression splines and 
GCV}
\description{ This is an internal function of package \code{mgcv}. It is a modification
  of the function \code{glm.fit}, designed to be called from \code{gam}. The major
  modification is that rather than solving a weighted least squares problem at each IRLS step, 
  a weighted, penalized least squares problem
  is solved at each IRLS step with smoothing parameters associated with each penalty chosen by GCV or UBRE,
  using routine \code{mgcv}. For further information on usage see code for \code{gam}. Some regularization of the 
IRLS weights is also permitted as a way of addressing identifiability related problems (see 
\code{\link{gam.control}}). Negative binomial parameter estimation is supported.
}
}



\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{   \code{\link{gam}}, \code{\link{mgcv}}, \code{\link{magic}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..

\eof
\name{gam.models}
\alias{gam.models}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Specifying generalized Additive Models.}
\description{ This page is intended to provide some more information on
  how to specify GAMs. Assume that in general we are interested in
  modelling some response variable \eqn{y_i}{y} with an exponential
  family distribution (e.g. Normal, Poisson, binomial, etc.) using predictor variables
  \eqn{x_{1i}, x_{2i}, \ldots}{x1,x2,...}, and let \eqn{\mu_i \equiv
    E(y_i)}{m=E(y)}. A typical GAM might be:
  \deqn{g(\mu_i) = \beta_0 + \beta_1 x_{1i} + \beta_2 x_{2i} +
    f_1(x_{3i})+f_2(x_{4i},x_{4i})}{g(m)=b0 + b1x1 + b2x2 + f1(x3) +
    f2(x4,x5) }
  where \eqn{g} is a smooth monotonic `link' function, and \eqn{f_1}{f1}
  and \eqn{f_2}{f2} are smooth functions. The key idea here is that the
  dependence of the response on the predictors can be represented as a
  parametric sub-model plus the sum of some smooth fucntions of one or
  more of the predictor variables. Thus the model is quite flexible
  relative to strictly parametric linear or generalized linear models,
  but still has much more structure than the completely general model
  that says that the response is just some smooth function of all the
  covariates.

  Note one important point. In order for the model to be identifiable
  the smooth functions have to be constrained to have zero mean (usually
  taken over the set of covariate values). Such constraints are always
  applied by \code{gam}.

  Specification of the distribution and link function is done using the \code{\link{family}}
  argument to \code{\link{gam}} and works in the same way as for
  \code{\link{glm}}. This page hence assumes the default identity link
  normal error family, since the generalizations are easy.

  Starting with the model given above then, the \code{gam} formula would
  be \cr
  \code{y~x1+x2+s(x3)+s(x4,x5)}. \cr
  This would use the default basis for the smooths (a thin plate
  regression spline basis for each), with automatic selection of the
  effective degrees of freedom for both smooths. The dimension of the
  smoothing basis is given a default value as well (the dimension of the
  basis sets an upper limit on the maximum possible degrees of
  freedom for the basis - the limit is typically one less than basis
  dimension). Full details of how to control smooths are given in
  \code{\link{s}} and \code{\link{te}}. For the moment suppose that we would like to change
  the basis of the first smooth to a cubic regression spline basis with
  a dimension of 20, while fixing the second term at 25 degrees of
  freedom. The appropriate formula would be:\cr
  \code{y~x1+x2+s(x3,bs="cr",k=20)+s(x4,x5,k=26,fx=T)}.


  Now consider some more unusual models. Consider a model in which
  \eqn{y}{y} is a smooth function of \eqn{x}{x} except at a point
  \eqn{x^*}{x*} where the function jumps discontinuously. This model can
  be written as:
  \deqn{E(y_i)= \beta_0+\beta_1h(x^*,x_i)+f_1(x_i)}{E(y) = b0 + b1
    h(x*,x) + f1(x)}
  where \eqn{h} is a step function jumping from 0 to 1 at
  \eqn{x^*}{x*}. The way to fit this model is to create a variable
  \code{h} which is zero for all \code{x} less than \eqn{x^*}{x*} and
  one otherwise. Then the model formula is:\cr
  \code{y~h+s(x)}. \cr

  Another situation that occurs quite often is the one in which we
  would like to find out if the model:
  \deqn{E(y_i) = f(x_i,z_i)}{E(y)=f(x,z)}
  is really necessary or whether:
  \deqn{E(y_i)=f_1(x_i)+f_2(z_i)}{E(y)=f1(x)+f2(z)}
  wouldn't do just as well. One way to do this is to look at the results
  of fitting:\cr
  \code{y~s(x)+s(z)+s(x,z)}.\cr
  \code{gam} automatically generates side conditions to make this model
  identifiable. You can also estimate `overlapping' models like:\cr
  \code{y~s(x,z)+s(z,v)}.
  
  Sometimes models of the form:
\deqn{E(y_i) = \beta_0+ f(x_i)z_i}{E(y)=b0+f(x)z}
  need to be estimated (where \eqn{f} is a smooth function, as usual.)
  The appropriate formula is:\cr
  \code{y~z+s(x,by=z)}\cr
  - the \code{by} argument ensures that the smooth function gets multiplied by
  covariate \code{z}, but GAM smooths are centred (average value zero),
  so the \code{z+} term is needed as well (\eqn{f} is being
  represented by a constant plus a centred smooth). If we'd wanted:
  \deqn{E(y_i) = f(x_i)z_i}{E(y)=f(x)z}
  then the appropriate formula would be:\cr
   \code{y~z+s(x,by=z)-1}.\cr
  
  The \code{by} mechanism also allows models to be estimated in which
  the form of a smooth depends on the level of a factor, but to do this
  the user must generate the dummy variables for each level of the
  factor. Suppose for example that \code{fac} is a factor with 3 levels
  \code{1}, \code{2}, \code{3},
  and at each level of this factor ther response depends smoothly on a
  variable \code{x} in a manner that is level dependent. Three dummy
  variables \code{fac.1}, \code{fac.2}, \code{fac.3}, can be generated for the factor   
  (e.g. \code{fac.1<-as.numeric(fac==1)}). Then the model formula would
  be:\cr
  \code{y~fac+s(x,by=fac.1)+s(x,by=fac.2)+s(x,by=fac.3)}.\cr

In the above examples the smooths of more than one covariate have all employed 
single penalty thin plate regression splines. These isotropic smooths are not 
alway appropriate: if variables are not naturally `well scaled' relative to each 
other then it is often preferable to use tensor product smooths, with a wiggliness 
penalty for each covariate of the term. See \code{\link{te}} for examples.

}

%- maybe also `usage' for other objects documented here.
\section{WARNING}{
There are no identifiability checks made between the smooth and
parametric parts of a gam formula, although fit method \code{\link{magic}}
can handle such problems numerically.
}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\examples{
set.seed(10)
n<-400
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
pi <- asin(1) * 2
f1 <- 2 * sin(pi * x2)
f2 <-  exp(2 * x2) - 3.75887
f3 <-  0.2 * x2^11 * (10 * (1 - x2))^6 + 
       10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
# A continuous `by' variable example.... 
y <- f3*x1 + e
b<-gam(y~x1-1+s(x2,by=x1))
plot(b,pages=1)
summary(b)
# A dummy `by' variable example (with a spurious covariate x0)
fac<-as.factor(c(rep(1,100),rep(2,100),rep(3,200)))
fac.1<-as.numeric(fac==1);fac.2<-as.numeric(fac==2);
fac.3<-as.numeric(fac==3)
y<-f1*fac.1+f2*fac.2+f3*fac.3+ e
b<-gam(y~fac+s(x2,by=fac.1)+s(x2,by=fac.2)+s(x2,by=fac.3)+s(x0))
plot(b,pages=1)
summary(b)
}
\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{gam.neg.bin}
\alias{gam.neg.bin}
\alias{mgcv.find.theta}
\alias{mgcv.get.scale}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{GAMs with the negative binomial distribution}
\description{The \code{gam} modelling function is designed to be able to use 
the \code{negative.binomial} and \code{neg.bin} families from the MASS library, 
with or without a known \eqn{\theta}{theta} parameter. A value for \code{theta} 
must always be passed to these families, but if \eqn{\theta}{theta} is to be
estimated then the passed value is treated as a starting value for estimation. 


If the \code{scale} argument passed to \code{gam} is positive, then it is used 
as the scale parameter \code{theta} is treated as a fixed known parameter and 
any smoothing parameters are chosen by UBRE. If \code{scale} is not positive then
\eqn{\theta}{theta} is estimated. The method of estimation is to choose \eqn{\hat \theta}{theta} 
so that the GCV (Pearson) estimate of the scale parameter is one (since the scale parameter 
is one for the negative binomial). 

\eqn{\theta}{theta} estimation is nested within the IRLS loop used for GAM fitting. After
each call to fit an iteratively weighted additive model to the IRLS pseudodata, the \eqn{\theta}{theta}
estimate is updated. This is done by conditioning on all components of the current GCV/Pearson 
estimator of the scale parameter except \eqn{\theta}{theta} and then searching for the 
\eqn{\hat \theta}{theta} which equates this conditional  estimator to one. The search is 
a simple bisection search after an initial crude line search to bracket one. The search will 
terminate at the upper boundary of the search region is a Poisson fit would have yielded an estimated 
scale parameter <1. Search limits can be set in \code{gam.control}.

Note that 
\code{neg.bin} only allows a log link, while \code{negative.binomial} also allows \code{"sqrt"} and
\code{"identity"}. In addition the \code{negative.binomial} family results in a more 
informative \code{gam} summary.  


}

%- maybe also `usage' for other objects documented here.

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\examples{
library(MASS) # required for negative binomial families
set.seed(3)
n<-400
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f + 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
g<-exp(f/5)
# negative binomial data  
y<-rnbinom(g,size=3,mu=g)
# unknown theta ...
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negative.binomial(1))
plot(b,pages=1)
print(b)
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=neg.bin(1)) # unknown theta
plot(b,pages=1)
print(b)
# known theta example ...
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negative.binomial(3),scale=1)
plot(b,pages=1)
print(b)
# Now use "sqrt" link available in negative.binomial (but not neg.bin)
set.seed(1)
f<-f-min(f);g<-f^2
y<-rnbinom(g,size=3,mu=g)
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negative.binomial(1,link="sqrt")) 
plot(b,pages=1)
print(b)
}
\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{gam.selection}
\alias{gam.selection}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Model Selection}
\description{ This page is intended to provide some more information on
  how to select GAMs. Given a model structure specified by a gam model formula,
\code{gam()} attempts to find the appropriate smoothness for each applicable model 
term using Generalized Cross Validation (GCV) or an Un-Biased Risk Estimator (UBRE),
the latter being used in cases in which the scale parameter is assumed known. GCV and 
UBRE are covered in Craven and Wahba (1979) and Wahba (1990). Fit method \code{"magic"}
uses Newton or failing that steepest descent updates of the smoothing parameters and is particularly numerically robust.
Fit method \code{"mgcv"} alternates 
grid searches for the correct overall level of smoothness for the whole model, given the 
relative smoothness of terms, with Newton/Steepest descent updates of the relative smoothness 
of terms, given the overall amount of smoothness. 

Automatic smoothness selection is unlikely to be successful with few data, particularly with
multiple terms to be selected. The \code{\link{mgcv}} method can also fail to find the real minimum of the GCV/UBRE
score if the model contains many smooth terms that should really be completely smooth, or close to it (e.g. a straight line 
for a default 1-d smooth). The problem is that in this circumstance the optimal overall smoothness given the relative
smoothness of terms may make all terms completely smooth - but this will tend to move the smoothing parameters 
to a location where the GCV/UBRE score is nearly completely flat with respect to the smoothing parameters so that Newton and steepest  descent are both ineffective. These problems can usually be overcome by replacing some completely smooth terms with purely
parametric model terms. 

A good example of where smoothing parameter selection can ``fail'', but in an unimportant manner is provided by the 
\code{rock.gam} example in Venables and Ripley. In this case 3 smoothing parameters are to estimated from 48 data, which is 
probably over-ambitious. \code{gam} will estimate either 1.4 or 1 degrees of freedom for the smooth of \code{shape}, depending on 
the exact details of model specification (e.g. k value for each \code{s()} term). The lower GCV score is really at 1.4 (and if the other
2 terms are replaced by straight lines this estimate is always returned), but the \code{shape} term is in no way significant and the 
lowest GCV score is obtained by removing it altogether. The problem here is that the GCV score contains very little information on the optimal 
degrees of freedom to associate with a term that GCV would suggest should really be dropped. 


In general the most logically consistent method to use for deciding which terms to include in the model is to compare GCV/UBRE scores
for models with and without the term. More generally the score for the model with a smooth term can be compared to the score for the model with 
the smooth term replaced by appropriate parametric terms. Candidates for removal can be identified by reference to the approximate p-values provided by \code{summary.gam}. Candidates for replacement by parametric terms are smooth terms with estimated degrees of freedom close to their 
minimum possible.

}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{
Craven and Wahba (1979) Smoothing Noisy Data with Spline Functions. Numer. Math. 31:377-403

Venables and Ripley (1999) Modern Applied Statistics with S-PLUS

Wahba (1990) Spline Models of Observational Data. SIAM.

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}

\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{gam.setup}
\alias{gam.setup}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Model set up.}
\description{ This is an internal function of package \code{mgcv}. It is called 
by \code{gam} to obtain the design matrix and penalty matrices for a GAM
set up using penalized regression splines. This is done by calling a mixture of
R routines and compiled C code.  For further information on usuage see code for \code{gam}.
}
}



\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}

}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}



\seealso{   \code{\link{gam}}, \code{\link{gamm}}, \code{\link{magic}}, \code{\link{mgcv}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..









\eof
\name{gam.side.conditions}
\alias{gam.side.conditions}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Identifiability side conditions for a GAM.}
\description{ GAM formulae with repeated variables only correspond to
  identifiable models given some side conditions on the null space of
  the model penalties. This routine works out appropriate side conditions
  and returns the coefficient matrix that imposes them. It is called
  from \code{\link{gam}} and is not intended to be called by users. 

  The routine will not deal with lack of identifiability between tensor product 
  smooth terms and other smooths. i.e. only the \code{s} terms are processed here,
  while \code{te} terms are ignored.
}
\usage{
gam.side.conditions(G)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{G}{ the object obtained by calling \code{\link{gam.setup}}. }
}
\details{ Part of the basis of a thin plate regression spline (of the
  sort used to define GAMs in package \code{mgcv}) is a polynomial basis
  for the space of functions of zero wiggliness according to the spline
  wiggliness penalty. A GAM with repeated dependence on one or more
  variables will have multiple copies of these basis terms, and will
  hence not be identifiable without extra constraints. For example, the
  design matrix for the model 
  \code{y~s(x)+s(z)+s(x,z)} will feature two copies of the columns for
  \code{x} and \code{z}, but by imposing the constraint that the second
  parameter for each repeat variable is zero, the model becomes
  identifiable again.

  This routine automates the process of producing these constraints in
  general. The method used is to create labels for each null basis
  column that uniquely identify the term while allowing multiple copies
  of the column to be identified. The algorithm works upwards throught the
  dimensions - i.e. starts with 1-d terms, then does 2-d terms , then
  3-d and so on. A stack of null basis columns is maintained, and each
  new basis term tested against it - if the new term is a repeat then
  its parameter is constrained to be zero - otherwise it is added to
  the stack. The method is symbolic and not numerical - you
  can fool it by making two copies of the same covariate with different names.

  If the model can not be made identifiable then an error is generated.

  The code critically depends on equivalence between the R code
  null.space.basis.powers and the equivalent C code in tprs.c.

  Note that the routine does nothing to the constant terms in the basis
  - these are already dealt with by the constraints that centre all
  smooths. 

}
\value{
A matrix \eqn{ \bf C}{C} defining the constraints on the model. The constraints
are that \eqn{ { \bf Cp}={ \bf 0}}{Cp=0} where \eqn{ \bf p}{p} is the
parameter vector. \code{C} includes the original sum to zero constraints from
  \code{G$C} and is hence suitable for directly replacing that matrix. 
}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{
Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}


}


\examples{
set.seed(0)
n<-400
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
pi <- asin(1) * 2
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f + 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- f + e
b<-gam(y~s(x0)+s(x1)+s(x0,x1)+s(x2))
plot(b,pages=1)
test1<-function(x,z,sx=0.3,sz=0.4)  
{ (pi**sx*sz)*(1.2*exp(-(x-0.2)^2/sx^2-(z-0.3)^2/sz^2)+
  0.8*exp(-(x-0.7)^2/sx^2-(z-0.8)^2/sz^2))
}
n<-500
old.par<-par(mfrow=c(2,2))
x<-runif(n);z<-runif(n);
y<-test1(x,z)+rnorm(n)*0.1
b<-gam(y~s(x)+s(z)+s(x,z))
plot(b)
par(old.par)
rm(list=c("f","x0","x1","x2","x","z","y","b","test1","n","sig2","pi","e"))
}
\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{gamm}
\alias{gamm}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Mixed Models}
\description{ Fits the specified  generalized additive mixed model (GAMM) to
data, by a call to \code{lme} in the normal errors identity link case, or by 
a call to \code{glmmPQL} from the \code{MASS} library otherwise. 
In the latter case estimates are only approximately MLEs. The routine is typically 
much slower than \code{gam}, and not quite as numerically robust.

Smooths are specified as in a call to \code{\link{gam}} as part of the fixed 
effects model formula, but the wiggly components of the smooth are treated as 
random effects. The random effects structures and correlation structures 
availabale for \code{lme} are used to specify other random effects and 
correlations. 

It is assumed that the random effects and correlation structures are employed 
primarily to model residual correlation in the data and that the prime interest
is in inference about the terms in the fixed effects model formula including 
the smooths. For this reason the routine calculates a posterior covariance 
matrix for the coefficients of all the terms in the fixed effects formula, 
including the smooths.

}
\usage{
gamm(formula,random=NULL,correlation=NULL,family=gaussian(),
data=list(),weights=NULL,subset=NULL,na.action,knots=NULL,
control=lmeControl(niterEM=3),niterPQL=20,verbosePQL=TRUE,...)
}

\arguments{ 
\item{formula}{ A GAM formula (see also \code{\link{gam.models}}). 
This is exactly like the formula for a
glm except that smooth terms can be added to the right hand side of the
formula (and a formula of the form \code{y ~ .} is not allowed).
Smooth terms are specified by expressions of the form: \cr
\code{s(var1,var2,...,k=12,fx=FALSE,bs="tp",by=a.var)} where \code{var1},
\code{var2}, etc. are the covariates which the smooth
is a function of and \code{k} is the dimension of the basis used to
represent the smooth term. If \code{k} is not
 specified then \code{k=10*3^(d-1)} is used where \code{d} is the number
 of covariates for this term. \code{fx} is used to indicate whether or
 not this term has a fixed muber of degrees of freedom (\code{fx=FALSE}
 to select d.f. by GCV/UBRE). \code{bs} indicates the basis to use, with
 \code{"cr"} indicating cubic regression spline, \code{"cc"} a periodic 
cubic regression spline, and \code{"tp"}
 indicating thin plate regression spline: \code{"cr"} and \code{"cc"} can only be used
 with 1-d smooths. Tensor product smooths are specified using \code{\link{te}} terms.
} 
\item{random}{The (optional) random effects structure as specified in a call to 
\code{\link{lme}}: only the \code{list} form is allowed, to facilitate 
manipulation of the random effects structure within \code{gamm} in order to deal
with smooth terms.}
\item{correlation}{An optional \code{corStruct} object (see \code{\link{corClasses}}) as used to define correlation 
structures in \code{\link{lme}}.}
\item{family}{A \code{family} as used in a call to \code{\link{glm}} or \code{\link{gam}}.
The default \code{gaussian} with identity link causes \code{gamm} to fit by a direct call to
\code{\link{lme}}, otherwise \code{glmmPQL} from the \code{MASS} library is used.}
\item{data}{ A data frame containing the model response variable and 
covariates required by the formula.  By default the variables are taken 
from \code{environment(formula)}, typically the environment from 
which \code{gamm} is called.} 
\item{weights}{ prior weights on the data. Read the documentation for \code{lme} and \code{glmmPQL} very 
carefully before even thinking about using this argument.}
\item{subset}{ an optional vector specifying a subset of observations to be
          used in the fitting process.}
\item{na.action}{ a function which indicates what should happen when the data
          contain `NA's.  The default is set by the `na.action' setting
          of `options', and is `na.fail' if that is unset.  The
          ``factory-fresh'' default is `na.omit'.}



\item{knots}{this is an optional list containing user specified knot values to be used for basis construction. 
For the \code{cr} basis the user simply supplies the knots to be used, and there must be the same number as the basis
dimension, \code{k}, for the smooth concerned. For the \code{tp} basis \code{knots} has two uses. 
Firstly, for large datasets 
the calculation of the \code{tp} basis can be time-consuming. The user can retain most of the advantages of the t.p.r.s. 
approach by supplying  a reduced set of covariate values from which to obtain the basis - 
typically the number of covariate values used will be substantially 
smaller than the number of data, and substantially larger than the basis dimension, \code{k}. The second possibility 
is to avoid the eigen-decomposition used to find the t.p.r.s. basis altogether and simply use 
the basis implied by the chosen knots: this will happen if the number of knots supplied matches the 
basis dimension, \code{k}. For a given basis dimension the second option is 
faster, but gives poorer results (and the user must be quite careful in choosing knot locations). 
Different terms can use different 
numbers of knots, unless they share a covariate.
}
\item{control}{A list of fit control parameters for \code{\link{lme}} returned by 
\code{\link{lmeControl}}. Note the default setting for the number of EM iterations 
used by \code{lme}: high settings tend to lead to numerical problems because variance components
for smooth terms can legitimately be non-finite.}

\item{niterPQL}{Maximum number of PQL iterations (if any).}

\item{verbosePQL}{Should PQL report its progress as it goes along?}

\item{...}{further arguments for passing on e.g. to \code{lme}} 
}
%- maybe also `usage' for other objects documented here.
\details{ The Bayesian model of spline smoothing introduced by Wahba (1983) and Silverman (1985) opens 
up the possibility of estimating the degree of smoothness of terms in a generalized additive model
as variances of the wiggly components of the smooth terms treated as random effects. Several authors 
have recognised this (see Wang 1998; Ruppert, Wand and Carroll, 2003) and in the normal errors, identity link case estimation can 
be performed using general linear mixed effects modelling software such as \code{lme}. In the generalized case only 
approximate inference is so far available, for example using the Penalized Quasi-Likelihood approach of Breslow 
and Clayton (1993) as implemented in \code{glmmPQL} by Venables and Ripley (2002). 
One advantage of this approach is that it allows correlated errors to be dealt with via random effects 
or the correlation structures available in the \code{nlme} library. 


Some brief details of how GAMs are represented as mixed models and estimated using \code{lme} or \code{glmmPQL} in \code{gamm} can be found in Wood (manuscript). In addition \code{gamm} obtains a posterior covariance matrix for the parameters of all the fixed effects and the smooth terms. The approach is similar to that described in (Lin & Zhang, 1999) - the covariance matrix of the data (or pseudodata in the generalized case) implied by the weights, correlation and random effects structure is obtained, based on the estimates of the parameters of these terms and this is used to obtain the posterior covariance matrix of the fixed and smooth effects. 

The bases used to represent smooth terms are the same as those used in \code{\link{gam}}.

 }


\value{ Returns a list with two items:
\item{gam}{an object of class \code{gam}, less information relating to GCV/UBRE model selection.}
\item{lme}{the fitted model object returned by \code{lme} or \code{glmmPQL}. Note that the model formulae and grouping 
structures may appear to be rather bizarre, because of the manner in which the GAMM is split up and the calls to 
\code{lme} and \code{glmmPQL} are constructed.}

}

\references{

Breslow, N. E. and Clayton, D. G. (1993) Approximate inference in generalized linear 
mixed models. Journal of the American Statistical Association 88, 9-25.

Lin, X and Zhang, D. (1999) Inference in generalized additive mixed models by using smoothing 
splines. JRSSB. 55(2):381-400

Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Springer

Ruppert, D., Wand, M.P. and Carroll, R.J. (2003) Semiparametric Regression. 
Cambridge

Silverman, B.W. (1985) Some aspects of the spline smoothing approach to nonparametric regression.
JRSSB 47:1-52 

Venables, W. N. and Ripley, B. D. (2002) Modern Applied Statistics
with S. Fourth edition.  Springer.

Wahba, G. (1983) Bayesian confidence intervals for the cross validated smoothing spline. 
JRSSB 45:133-150

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. Journal of the American Statistical Association.

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

Wood, S.N. (manuscript) Tensor product smooth interactions in Generalized Additive Mixed Models.

Wang, Y. (1998) Mixed effects smoothing spline analysis of variance. J.R. Statist. Soc. B 60, 159-174

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNINGS }{
\code{lme} and \code{glmmPQL} will not deal with offsets, so neither can \code{gamm}.

Models like \code{s(z)+s(x)+s(x,z)} are not currently supported.

\code{gamm} is not as numerically stable as \code{gam}: an \code{lme} call will occasionally fail. Experimenting with 
\code{niterEM} in the \code{control} argument can sometimes help.

\code{gamm} is usually much slower than \code{gam}.

Note that the weights returned in the fitted GAM object are dummy, and not
those used by the PQL iteration: this makes partial residual plots look odd.

} 

\seealso{\code{\link{te}}, \code{\link{s}}, \code{\link{predict.gam}},
  \code{\link{plot.gam}}, \code{\link{summary.gam}}, \code{\link{gam.neg.bin}}, 
\code{\link{vis.gam}},\code{\link{pdTens}},\code{\link{gamm.setup}}
 }

\examples{
library(mgcv)
set.seed(0) 
n <- 400
sig <- 2
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sig)
y <- f + e
b <- gamm(y~s(x0)+s(x1)+s(x2)+s(x3))
plot(b$gam,pages=1)

b <- gamm(y~te(x0,x1)+s(x2)+s(x3)) 
op <- par(mfrow=c(2,2))
plot(b$gam)
par(op)

g<-exp(f/5)
y<-rpois(rep(1,n),g)
b2<-gamm(y~s(x0)+s(x1)+s(x2)+s(x3),family=poisson)
plot(b2$gam,pages=1)

# now an example with autocorrelated errors....
x <- 0:(n-1)/(n-1)
f <- 0.2*x^11*(10*(1-x))^6+10*(10*x)^3*(1-x)^10-1.396
e <- rnorm(n,0,sig)
for (i in 2:n) e[i] <- 0.6*e[i-1] + e[i]
y <- f + e
op <- par(mfrow=c(2,2))
b <- gamm(y~s(x,k=20),correlation=corAR1())
plot(b$gam);lines(x,f-mean(f),col=2)
b <- gamm(y~s(x,k=20))
plot(b$gam);lines(x,f-mean(f),col=2)
b <- gam(y~s(x,k=20))
plot(b);lines(x,f-mean(f),col=2)
par(op)

# and a "spatial" example
library(nlme);set.seed(1)
test1<-function(x,z,sx=0.3,sz=0.4)
{ (pi**sx*sz)*(1.2*exp(-(x-0.2)^2/sx^2-(z-0.3)^2/sz^2)+
  0.8*exp(-(x-0.7)^2/sx^2-(z-0.8)^2/sz^2))
}
n<-200
old.par<-par(mfrow=c(2,2))
x<-runif(n);z<-runif(n);
xs<-seq(0,1,length=30);zs<-seq(0,1,length=30)
pr<-data.frame(x=rep(xs,30),z=rep(zs,rep(30,30)))
truth <- matrix(test1(pr$x,pr$z),30,30)
contour(xs,zs,truth)  # true function
f <- test1(x,z)  # true expectation of response

cstr <- corGaus(.1,form = ~x+z)  
cstr <- Initialize(cstr,data.frame(x=x,z=z))
V <- corMatrix(cstr) # correlation matrix for data
Cv <- chol(V)
e <- t(Cv) \%*\% rnorm(n)*0.05 # correlated errors
y <- f + e 
b<- gamm(y~s(x,z,k=50),correlation=corGaus(.1,form=~x+z))
plot(b$gam) # gamm fit accounting for correlation
# overfits when correlation ignored.....  
b1 <- gamm(y~s(x,z,k=50));plot(b1$gam) 
b2 <- gam(y~s(x,z,k=50));plot(b2)
par(old.par)

}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..








\eof
\name{gamm.setup}
\alias{gamm.setup}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Mixed Model set up.}
\description{ This is an internal function of package \code{mgcv}. It is called 
by \code{gamm} to set up a generalized additive mixed model in a form suitable for fitting by calls to 
\code{glmmPQL} from the \code{MASS} library or \code{lme} from the \code{nlme} library. The main task is the representation of the smooth terms as random effects. 
}

\references{

Wood, S.N. (manuscript) Tensor product smooth interactions in Generalized Additive Mixed Models.



\url{http://www.stats.gla.ac.uk/~simon/}

}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}



\seealso{   \code{\link{gamm}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..









\eof
\name{get.var}
\alias{get.var}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Get named variable or evaluate expression from list or data.frame}
\description{ This routine takes a text string and a data frame or list. It first sees if the 
string is the name of a variable in the data frame/ list. If it is then the value of this variable is returned. 
Otherwise the routine tries to evaluate the expression within the data.frame/list (but nowhere else) and if 
successful returns the result. If neither step works then \code{NULL} is returned. The routine is useful for
processing gam formulae.}

\usage{ get.var(txt,data)
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{txt}{a text string which is either the name of a variable in \code{data} or when 
parsed is an expression that can be evaluated in \code{data}. It can also be neither in which case the
function returns \code{NULL}.}
\item{data}{A data frame or list.} 
}

\value{The evaluated variable, or \code{NULL}}

\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk} } 

\seealso{  \code{\link{gam} } }

\examples{
y <- 1:4;dat<-data.frame(x=5:10)
get.var("x",dat)
get.var("y",dat)
get.var("x==6",dat)
}
\keyword{models} \keyword{smooth} \keyword{regression} %-- one or more ..







\eof
\name{interpret.gam}
\alias{interpret.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Interpret a GAM formula}
\description{ This is an internal function of package \code{mgcv}. It is a service routine for
 \code{gam} which splits off the strictly parametric part of the model formula, returning 
it as a formula, and interprets the smooth parts of the model formula. 

Not normally called directly.
}

\useage{interpret.gam(gf)}

\arguments{\item{gf}{A GAM formula as supplied to \code{\link{gam}} or \code{\link{gamm}}.}

}

\value{An object of class \code{split.gam.formula} with the following items:

\item{pf}{A model formula for the strictly parametric part of the model.}

\item{pfok}{TRUE if there is a \code{pf} formula.}

\item{smooth.spec}{A list of class \code{xx.smooth.spec} objects where \code{xx} depends on the basis 
specified for the term. (These can be passed to smooth constructor method functions to actually set up
penalties and bases.)}

\item{full.formula}{An expanded version of the model formula in which the options are fully expanded, and 
the options do not depend on variables which might not be available later.}

\item{fake.formula}{A formula suitable for use in evaluating a model frame.}

\item{response}{Name of the response variable.}
}

\references{

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{   \code{\link{gam}} \code{\link{gamm}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..




\eof
\name{magic}
\alias{magic}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Stable Multiple Smoothing Parameter Estimation by GCV or UBRE, 
with optional fixed penalty}
\description{
Function to efficiently estimate smoothing parameters in generalized
ridge regression problems with multiple (quadratic) penalties, by GCV 
or UBRE. The function uses Newton's method in multi-dimensions, backed up by 
steepest descent to iteratively adjust the smoothing parameters for each penalty 
(one penalty may have a smoothing parameter fixed at unity ). 

For maximal numerical stability the method is based on orthogonal decomposition methods, 
and attempts to deal with numerical rank deficiency gracefully using a truncated singular 
value decomposition approach.
} %- end description

\usage{
magic(y,X,sp,S,off,rank=NULL,H=NULL,C=NULL,w=NULL,
      gamma=1,scale=1,gcv=TRUE,ridge.parameter=NULL,
      control=list(maxit=50,tol=1e-6,step.half=25,
              rank.tol=.Machine$double.eps^0.5))
}
%- maybe also `usage' for other objects documented here.
\arguments{

\item{y}{is the response data vector.}

\item{X}{is the model matrix.}

\item{sp}{is the array of smoothing parameters multiplying the penalty matrices stored in 
\code{S}. Any that are negative are autoinitialized, otherwise they are taken as supplying 
starting values. A supplied starting value will be reset to a default starting value if the
gradient of the GCV/UBRE score is too small at the supplied value.  }
 
\item{S}{ is a list of of penalty matrices. \code{S[[i]]} is the ith penalty matrix, but note
that it is not stored as a full matrix, but rather as the smallest square matrix including all 
the non-zero elements of the penalty matrix. Element 1,1 of \code{S[[i]]}  occupies 
element \code{off[i]}, \code{off[i]} of the ith penalty matrix. Each \code{S[[i]]} must be 
positive semi-definite.  }

\item{off}{is an array indicating the first parameter in the parameter vector that is 
penalized by the penalty involving \code{S[[i]]}.}

\item{rank}{ is an array specifying the ranks of the penalties. This is useful, but not 
essential, for forming square roots of the penalty matrices.}

\item{H}{ is the optional offset penalty - i.e. a penalty with a smoothing parameter fixed at 
1. This is useful for allowing regularization of the estimation process, fixed smoothing 
penalties etc.}

 \item{C}{ is the optional matrix specifying any linear equality constraints on the fitting 
problem. If \eqn{\bf b}{b} is the parameter vector then the parameters are forced to satisfy 
\eqn{ {\bf Cb} = {\bf 0} }{Cb=0}. }

\item{w}{ the regression weights. If this is a matrix then it is taken as being the 
square root of the inverse of the covariance matrix of \code{y}, specifically 
\eqn{ {\bf V}_y^{-1} = {\bf w}^\prime{\bf w}}{V_y^{-1}=w'w}. If \code{w} is an array then 
it is taken as the diagonal of this matrix, or simply the weight for each element of 
\code{y}.}

\item{gamma}{is an inflation factor for the model degrees of freedom in the GCV or UBRE 
score.}

\item{scale}{ is the scale parameter for use with UBRE.}

\item{gcv}{ should be set to \code{TRUE} if GCV is to be used, \code{FALSE} for UBRE.}

\item{ridge.parameter}{It is sometimes useful to apply a ridge penalty to the fitting problem, 
penalizing the parameters in the constrained space directly. Setting this parameter to a value 
greater than zero will cause such a penalty to be used, with the magnitude given by the 
parameter value.}

\item{control}{ is a list of iteration control constants with the following elements:

\item{maxit}{The maximum number of iterations of the magic algorithm to allow.}

\item{tol}{The tolerance to use in judging convergence.}

\item{step.half}{If a trial step fails then the method tries halving it up to a maximum of 
\code{step.half} times.}

\item{rank.tol}{is a constant used to test for numerical rank deficiency of the problem. 
Basically any singular value less than \code{rank_tol} multiplied by the largest singular value of 
the  problem is set to zero.}

} %- end of control
}

\details{ 

The method is a computationally efficient means of applying GCV or UBRE (often approximately 
AIC) to the 
problem of smoothing parameter selection in generalized ridge regression problems 
of the form:
\deqn{ minimise~ \| { \bf W} ({ \bf Xb - y} ) \|^2 + {\bf b}^\prime {\bf Hb} + \sum_{i=1}^m
\theta_i {\bf b^\prime S}_i{\bf b} }{ minimise || W (Xb-y) ||^2 + b'Hb +
theta_1 b'S_1 b + theta_2 b'S_2 b + . . .}
possibly subject to constraints \eqn{ {\bf Cb}={\bf 0}}{Cb=0}. 
\eqn{ {\bf X}}{X} is a design matrix, \eqn{\bf b}{b} a parameter vector, 
\eqn{\bf y}{y} a data vector, \eqn{\bf W}{W} a weight matrix,
\eqn{ {\bf S}_i}{S_i} a positive semi-definite matrix  of coefficients
defining the ith penalty with associated smoothing parameter \eqn{\theta_i}{theta_i}, 
\eqn{\bf H}{H} is the positive semi-definite offset penalty matrix  and \eqn{\bf C}{C} a 
matrix of coefficients defining any linear equality constraints on the problem. 
\eqn{ {\bf X}}{X} need not be of full column rank.

The \eqn{\theta_i}{theta_i} are chosen to minimize either the GCV score:

\deqn{V_g = \frac{n\|{\bf W}({\bf y} - {\bf Ay})\|^2}{[tr({\bf I} - \gamma {\bf A})]^2}}{V_g = n ||W(y-Ay)||^2/[tr(I - g A)]^2}

or the UBRE score:

\deqn{V_u=\|{\bf W}({\bf y}-{\bf Ay})\|^2/n-2 \phi tr({\bf I}-\gamma {\bf A})/n + \phi}{
V_u =||W(y-Ay||^2/n - 2 s tr(I - g A)/n + s }

where \eqn{\gamma}{g} is \code{gamma} the inflation factor for degrees of freedom (usually set to 1) and \eqn{\phi}{s} 
is \code{scale}, the scale parameter. \eqn{\bf A}{A} is the hat matrix (influence matrix) for the fitting problem (i.e
the matrix mapping data to fitted values). Dependence of the scores on the smoothing parameters is through \eqn{\bf A}{A}. 

The method operates by  Newton or steepest descent updates of the logs of the 
\eqn{\theta_i}{theta_i}. A key aspect of the method is stable and economical calculation of the 
first and second derivatives of the scores w.r.t. the log smoothing parameters. 
Because the GCV/UBRE scores are flat w.r.t. very large or very small \eqn{\theta_i}{theta_i}, 
it's important to get good starting parameters, and to be careful not to step into a flat region
of the smoothing parameter space. For this reason the algorithm rescales any Newton step that 
would result in a \eqn{log(\theta_i)}{log(theta_i)} change of more than 5. Newton steps are 
only used if the Hessian of the GCV/UBRE is postive definite, otherwise steepest descent is 
used. Similarly steepest descent is used if the Newton step has to be contracted too far 
(indicating that the quadratic model underlying Newton is poor). All initial steepest descent 
steps are scaled so that their largest component is 1. However a step is calculated, 
it is never expanded if it is successful (to avoid flat portions of the objective), 
but steps are successively halved if they do not decrease the GCV/UBRE score, until 
they do, or the direction is deemed to have failed. (Given the smoothing parameters the optimal 
\eqn{\bf b}{b} parameters are easily found.)


The method is coded in \code{C} with matrix factorizations performed using LINPACK and LAPACK routines.
}
\value{The function returns a list with the following items:

\item{b}{The best fit parameters given the estimated smoothing parameters.}

\item{scale}{the estimated (GCV) or supplied (UBRE) scale parameter.}

\item{score}{the minimized GCV or UBRE score.}

\item{sp}{an array of the estimated smoothing parameters.}

\item{rV}{a factored form of the parameter covariance matrix. The (Bayesian)  covariance
matrix of the parametes \code{b} is given by \code{rV\%*\%t(rV)*scale}. }

\item{gcv.info}{is a list of information about the performance of the method with the following elements:
\item{full.rank}{The apparent rank of the problem: number of parameters less number of equality constraints.}
\item{rank}{The estimated actual rank of the problem (at the final iteration of the method).}
\item{fully.converged}{is \code{TRUE} if the method converged by satisfying the convergence criteria, and \code{FALSE} if it coverged 
by failing to decrease the score along the search direction.}
\item{hess.pos.def}{is \code{TRUE} if the hessian of the UBRE or GCV score was positive definite at convergence.}
\item{iter}{is the number of Newton/Steepest descent iterations taken.}
\item{score.calls}{is the number of times that the GCV/UBRE score had to be evaluated.}
\item{rms.grad}{is the root mean square of the gradient of the UBRE/GCV score w.r.t. the smoothing parameters.}
}
Note that some further useful quantities can be obtained using \code{\link{magic.post.proc}}.

}
\references{

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{  
\code{\link{magic.post.proc}},
\code{\link{mgcv}},
\code{\link{gam}},
}
\examples{
library(mgcv)
set.seed(1);n<-400;sig2<-4
x0 <- runif(n, 0, 1);x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1);x3 <- runif(n, 0, 1)
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sqrt(sig2))
y <- f + e
# set up additive model
G<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),fit=FALSE)
# fit using magic
mgfit<-magic(G$y,G$X,G$sp,G$S,G$off,G$rank,C=G$C)
# and fit using gam as consistency check
b<-gam(G=G)
mgfit$sp;b$sp  # compare smoothing parameter estimates
edf<-magic.post.proc(G$X,mgfit,G$w)$edf  # extract e.d.f. per parameter
# get termwise e.d.f.s
twedf<-0;for (i in 1:4) twedf[i]<-sum(edf[((i-1)*10+1):(i*10)])
twedf;b$edf  # compare
}


\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..






\eof
\name{magic.post.proc}
\alias{magic.post.proc}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Auxilliary information from magic fit}
\description{Obtains parameter covariance matrix, estimated degrees of 
freedom for each parameter and leading diagonal of influence/hat matrix, 
for a penalized regression estimated by \code{magic}.
}
\usage{
magic.post.proc(X,object,w)
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{X}{ is the model matrix.}
\item{object}{is the list returned by \code{magic} after fitting the 
model with model matrix \code{X}.}
\item{w}{is the weight vector used in fitting, or the weight matrix used 
in fitting (i.e. supplied to \code{magic}, if one was.) \code{t(w)\%*\%w} should typically give
the inverse of the covariance matrix of the response data supplied to \code{magic}.}
}
\details{ \code{object} contains \code{rV} (\eqn{ {\bf V}}{V}, say), and 
\code{scale} (\eqn{ \phi}{s}, say) which can be 
used to obtain the require quantities as follows. The covariance matrix of 
the parameters is \eqn{ {\bf VV}^\prime \phi}{VV's}. The vector of 
estimated degrees of freedom for each parameter is the leading diagonal of 
\eqn{ {\bf VV}^\prime {\bf X}^\prime {\bf W}^\prime {\bf W}{\bf X}}{ VV'X'W'WX} 
where \eqn{\bf{W}}{W} is either the 
weight matrix \code{w} or the matrix \code{diag(w)}. The 
hat/influence  matrix is given by 
\eqn{ {\bf WX}{\bf VV}^\prime {\bf X}^\prime {\bf W}^\prime }{ WXVV'X'W'} 
.
}
\value{ A list with three items:
\item{Vb}{the covariance matrix of the model parameters.}
\item{hat}{the leading diagonal of the hat (influence) matrix.}
\item{edf}{the array giving the estimated degrees of freedom associated 
with each parameter.}
}
 
\seealso{\code{\link{magic}}}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{mgcv}
\alias{mgcv}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Multiple Smoothing Parameter Estimation by GCV or UBRE}
\description{
Function to efficiently estimate smoothing parameters in Generalized
Ridge Regression Problem with multiple (quadratic) penalties, by GCV 
or UBRE. The function uses Newton's method in multi-dimensions, backed up by steepest descent to iteratively 
adjust a set of relative smoothing parameters for each penalty. To ensure that the overall level of smoothing
is optimal, and to guard against trapping by local minima, a highly efficient global minimisation with respect to 
one overall smoothing parameter is also made at each iteration.

For a listing of all routines in the \code{mgcv} package type:\cr
\code{library(help="mgcv")}
}
\usage{
mgcv(y,X,sp,S,off,C=NULL,w=rep(1,length(y)),H=NULL,
     scale=1,gcv=TRUE,control=mgcv.control())
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{y}{The response data vector.}

\item{X}{The design matrix for the problem, note that \code{ncol(X)}
            must give the number of model parameters, while \code{nrow(X)} 
            should give the number of data.}

\item{sp}{ An array of smoothing parameters. If \code{control$fixed==TRUE} then these are taken as being the 
smoothing parameters. Otherwise any positive values are assumed to be initial estimates and negative values to
signal auto-initialization.}

\item{S}{A list of penalty matrices. Only the smallest square block containing all non-zero matrix
elements is actually stored, and \code{off[i]} indicates the element of the parameter vector that 
\code{S[[i]][1,1]} relates to.}

\item{off}{ Offset values indicating where in the overall parameter a particular stored penalty starts operating. 
For example if \code{p} is the model parameter vector and \code{k=nrow(S[[i]])-1}, then the ith penalty is given by \cr
\code{t(p[off[i]:(off[i]+k)])\%*\%S[[i]]\%*\%p[off[i]:(off[i]+k)]}.}


\item{C}{Matrix containing any linear equality constraints 
            on the problem (i.e. \eqn{\bf C}{C} in \eqn{ {\bf Cp}={\bf 0} }{Cp=0}).}

\item{w}{A vector of weights for the data (often proportional to the 
           reciprocal of the standard deviation of \code{y}). }

\item{H}{ A single fixed penalty matrix to be used in place of the multiple 
penalty matrices in \code{S}. \code{mgcv} cannot mix fixed and estimated penalties.}

\item{scale}{ This is the known scale parameter/error variance to use with UBRE. 
Note that it is assumed that the variance of \eqn{y_i}{y_i} is 
given by \eqn{\sigma^2/w_i}{\code{scale}/w_i}.}   

\item{gcv}{ If \code{gcv} is TRUE then smoothing parameters are estimated by GCV,
otherwise UBRE is used.}

\item{control}{A list of control options returned by \code{\link{mgcv.control}}.}
}

\details{ 

This is documentation for the code implementing the method described in section 
4 of 
Wood (2000) . The method is a computationally efficient means of applying GCV to 
the 
problem of smoothing parameter selection in generalized ridge regression problems 
of 
the form:
\deqn{ minimise~ \| { \bf W} ({ \bf Xp - y} ) \|^2 \rho +  \sum_{i=1}^m
\lambda_i {\bf p^\prime S}_i{\bf p} }{ minimise || W (Xp-y) ||^2 rho + 
lambda_1 p'S_1 p + lambda_1 p'S_2 p + . . .}
possibly subject to constraints \eqn{ {\bf Cp}={\bf 0}}{Cp=0}. 
\eqn{ {\bf X}}{X} is a design matrix, \eqn{\bf p}{p} a parameter vector, 
\eqn{\bf y}{y} a data vector, \eqn{\bf W}{W} a diagonal weight matrix,
\eqn{ {\bf S}_i}{S_i} a positive semi-definite matrix  of coefficients
defining the ith penalty and \eqn{\bf C}{C} a matrix of coefficients 
defining any linear equality constraints on the problem. The smoothing
parameters are the \eqn{\lambda_i}{lambda_i} but there is an overall
smoothing parameter \eqn{\rho}{rho} as well. Note that \eqn{ {\bf X}}{X}
must be of full column rank, at least when projected  into the null space
of any equality constraints.  

The method operates by alternating very efficient direct searches for 
\eqn{\rho}{rho}
with Newton or steepest descent updates of the logs of the \eqn{\lambda_i}{lambda_i}. 
Because the GCV/UBRE scores are flat w.r.t. very large or very small \eqn{\lambda_i}{lambda_i}, 
it's important to get good starting parameters, and to be careful not to step into a flat region
of the smoothing parameter space. For this reason the algorithm rescales any Newton step that 
would result in a \eqn{log(\lambda_i)}{log(lambda_i)} change of more than 5. Newton steps are only used
if the Hessian of the GCV/UBRE is postive definite, otherwise steepest descent is used. Similarly steepest 
descent is used if the Newton step has to be contracted too far (indicating that the quadratic model 
underlying Newton is poor). All initial steepest descent steps are scaled so that their largest component is
1. However a step is calculated, it is never expanded if it is successful (to avoid flat portions of the objective), 
but steps are successively halved if they do not decrease the GCV/UBRE score, until they do, or the direction is deemed to have 
failed. \code{M$conv} provides some convergence diagnostics.

The method is coded in \code{C} and is intended to be portable. It should be 
noted that seriously ill conditioned problems (i.e. with close to column rank 
deficiency in the design matrix) may cause problems, especially if weights vary 
wildly between observations.  
}
\value{ An object is returned with the following elements:
  
\item{b}{The best fit parameters given the estimated smoothing parameters.}

\item{scale}{The estimated or supplied scale parameter/error variance.}

\item{score}{The UBRE or GCV score.}

\item{sp}{The estimated (or supplied) smoothing parameters (\eqn{\lambda_i/\rho}{lambda_i/rho})}

\item{Vb}{Estimated covariance matrix of model parameters.}

\item{hat}{diagonal of the hat/influence matrix.}

\item{edf}{array of estimated degrees of freedom for each parameter.}

\item{info}{A list of convergence diagnostics, with the following elements:
\itemize{
\item{edf}{Array of whole model estimated degrees of freedom.}
\item{score}{Array of ubre/gcv scores at the edfs for the final set of relative smoothing parameters.}
\item{g}{the gradient of the GCV/UBRE score w.r.t. the smoothing parameters at termination.}
\item{h}{the second derivatives corresponding to \code{g} above - i.e. the leading diagonal of the Hessian.}
\item{e}{the eigenvalues of the Hessian. These should all be non-negative!}
\item{iter}{the number of iterations taken.}
\item{in.ok}{\code{TRUE} if the second smoothing parameter guess improved the GCV/UBRE score. (Please report examples 
where this is \code{FALSE})}
\item{step.fail}{\code{TRUE} if the algorithm terminated by failing to improve the GCV/UBRE score rather than by "converging". 
Not necessarily a problem, but check the above derivative information quite carefully.}
} %info
}
}
\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428


\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNING }{ The method may not behave well with near column rank deficient \eqn{ {\bf
X}}{X}
especially in contexts where the weights vary wildly. } 

\seealso{  
\code{\link{gam}},
\code{\link{magic}}
}

\examples{
library(help="mgcv") # listing of all routines

set.seed(1);n<-400;sig2<-4
x0 <- runif(n, 0, 1);x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1);x3 <- runif(n, 0, 1)
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sqrt(sig2))
y <- f + e
# set up additive model
G<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),fit=FALSE)
# fit using mgcv
mgfit<-mgcv(G$y,G$X,G$sp,G$S,G$off,C=G$C)
 
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..






\eof
\name{mgcv.control}
\alias{mgcv.control}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Setting mgcv defaults}
\description{ This is an internal function of package \code{mgcv} which allows control of the numerical 
options for fitting a generalized ridge regression problem using routine \code{mgcv}. 
}
\usage{
mgcv.control(conv.tol=1e-7,max.half=20,target.edf=NULL,min.edf=-1)
}
\arguments{ 
\item{conv.tol}{ The convergence tolerance.}

\item{max.half}{successive step halvings are employed if the Newton method and then the steepest descent backup
fail to improve the UBRE/GCV score. This is how many to use before giving up.}

\item{target.edf}{If this is non-null it indicates that cautious optimization should be used, which 
opts for the local minimum closest to the target model edf if there are multiple local minima in the 
GCV/UBRE score.}

\item{min.edf}{Lower bound on the model edf. Useful for avoiding numerical problems at high smoothing parameter
values. Negative for none.}
}


\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

\url{http://www.stats.gla.ac.uk/~simon/}


}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\seealso{   \code{\link{mgcv}} }

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..





\eof
\name{mono.con}
\alias{mono.con}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Monotonicity constraints for a cubic regression spline.}
\description{ Finds linear constraints sufficient for monotonicity (and
  optionally upper and/or lower boundedness) of a cubic regression
  spline. The basis representation assumed is that given by the
  \code{gam}, \code{"cr"} basis: that is the spline has a set of knots,
  which have fixed x values, but the y values of which constitute the
  parameters of the spline. }
\usage{
mono.con(x,up=TRUE,lower=NA,upper=NA)
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{x}{The array of knot locations.}
 \item{up}{If \code{TRUE} then the constraints imply increase, if
   \code{FALSE} then decrease. }
 \item{lower}{This specifies the lower bound on the spline unless it is
   \code{NA} in which case no lower bound is imposed.}
 \item{upper}{This specifies the upper bound on the spline unless it is
   \code{NA} in which case no upper bound is imposed.}
}
\details{ Consider the natural cubic spline passing through the points:
  \eqn{ \{x_i,p_i:i=1 \ldots n \} }{ (x_i,p_i), i=1..n}. Then it is possible
  to find a relatively small set of linear constraints on \eqn{ \bf p}{p}
  sufficient to ensure monotonicity (and bounds if required):
  \eqn{ {\bf Ap}\ge{\bf b} }{Ap>=b}. Details are given in Wood (1994).
  This function returns a list containing \code{A} and \code{b}. 
}
\value{ The function returns a list containing constraint matrix
  \code{A} and constraint vector \code{b}.   
}
\references{

Gill, P.E., Murray, W. and Wright, M.H. (1981) Practical Optimization. Academic
Press, London. 

Wood, S.N. (1994) Monotonic smoothing splines fitted by cross validation SIAM
Journal on Scientific Computing 15(5):1126-1133


\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk} } 

\seealso{  \code{\link{mgcv} } \code{\link{pcls}}  }

\examples{
# Fit a monotonic penalized regression spline .....

# Generate data from a monotonic truth.
set.seed(10);x<-runif(100)*4-1;x<-sort(x);
f<-exp(4*x)/(1+exp(4*x));y<-f+rnorm(100)*0.1;plot(x,y)
dat<-data.frame(x=x,y=y)
# Show regular spline fit (and save fitted object)
f.ug<-gam(y~s(x,k=10,bs="cr"));lines(x,fitted(f.ug))
# Create Design matrix, constriants etc. for monotonic spline....
gam.setup(y~s(x,k=10,bs="cr")-1,dat,fit.method="mgcv")->G;
F<-mono.con(G$smooth[[1]]$xp);
G$Ain<-F$A;G$bin<-F$b;G$C<-matrix(0,0,0);G$sp<-f.ug$sp;
G$p<-G$smooth[[1]]$xp;G$y<-y;G$w<-y*0+1

p<-pcls(G);  # fit spline (using s.p. from unconstrained fit)

# now modify the gam object from unconstrained fit a little, to use it
# for predicting and plotting constrained fit. 
p<-c(0,p);f.ug$coefficients<-p; 
x<-seq(min(x),max(x),length=200)
lines(x,predict.gam(f.ug,newdata=data.frame(x=x)),col=2)
}
\keyword{models} \keyword{smooth} \keyword{regression} %-- one or more ..







\eof
\name{mroot}
\alias{mroot}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Smallest square root of matrix}
\description{ Find a square root of a positive semi-definite matrix, 
having as few columns as possible. Uses either pivoted choleski 
decomposition or singular value decomposition to do this.  
}
\usage{
mroot(A,rank=NULL,method="chol")
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{A}{ The positive semi-definite matrix, a square root of which is 
to be found.}
\item{rank}{if the rank of the matrix \code{A} is known then it should 
be supplied.}
\item{method}{ \code{"chol"} to use pivoted choloeski decompositon, 
which is fast but tends to over-estimate rank. \code{"svd"} to use 
singular value decomposition, which is slow, but is the most accurate way 
to estimate rank.}
}

\details{ The routine uses an LAPACK SVD routine, or the LINPACK pivoted 
Choleski routine. It is primarily of use for turning penalized regression 
problems into ordinary regression problems.} 
\value{ A matrix, \eqn{ {\bf B}}{B} with as many columns as the rank of 
\eqn{ {\bf A}}{A}, and such that \eqn{ {\bf A} = {\bf BB}^\prime}{A=BB'}.}
 

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{new.name}
\alias{new.name}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Obtain a name for a new variable that is not already in use}
\description{ \code{\link{gamm}} works by transforming a GAMM into something 
that can be estimated by \code{\link{lme}}, but this involves creating new 
variables, the names of which should not clash with the names of other 
variables on which the model depends. This simple service routine checks a 
suggested name against a list of those in use, and if neccesary modifies it 
so that there is no clash.}

\usage{ new.name(proposed,old.names)
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{proposed}{a suggested name}

 \item{old.names}{ An array of names that must not be duplicated} 
}

\value{A name that is not in \code{old.names}.}

\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk} } 

\seealso{  \code{\link{gamm} } }

\examples{
old <- c("a","tuba","is","tubby")
new.name("tubby",old)
}
\keyword{models} \keyword{smooth} \keyword{regression} %-- one or more ..







\eof
\name{notExp}
\alias{notExp}
\alias{notLog}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Functions for better-than-log positive parameterization}
\description{ It is common practice in statistical optimization to use log-parameterizations when a 
parameter ought to be positive. i.e. if an optimization parameter \code{a} should be non-negative then 
we use \code{a=exp(b)} and optimize with respect to the unconstrained parameter \code{b}. This often works 
well, but it does imply a rather limited working range for \code{b}: using 8 byte doubles, for example, 
 if \code{b}'s magnitude gets much above 700 then \code{a} overflows or underflows. This can cause 
problems for numerical optimization methods. 

\code{notExp} is a monotonic function for mapping the real line into the positive real line with much less
extreme underflow and overflow behaviour than \code{exp}. It is a piece-wise function, but is continuous 
to second derivative: see the source code for the exact definition, and the example below to see what it 
looks like.

\code{notLog} is the inverse function of \code{notExp}.

The major use of these functions is to provide more robust \code{pdMat} classes for \code{lme} for use by
\code{\link{gamm}}.
}

\usage{
notExp(x)

notLog(x)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{x}{Argument array of real numbers (\code{notExp}) or positive real numbers (\code{notLog}).}
}
\value{ An array of function values evaluated at the supplied argument values.}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{

\url{http://www.stats.gla.ac.uk/~simon/}

}

\seealso{ \code{\link{pdTens}}, \code{\link{pdIdnot}},  \code{\link{gamm}}}

\examples{
## Illustrate the notExp function: 
## less steep than exp, but still monotonic.
x <- -100:100/10
op <- par(mfrow=c(2,2))
plot(x,notExp(x),type="l")
lines(x,exp(x),col=2)
plot(x,log(notExp(x)),type="l")
lines(x,log(exp(x)),col=2) # redundancy intended
x <- x/4
plot(x,notExp(x),type="l")
lines(x,exp(x),col=2)
plot(x,log(notExp(x)),type="l")
lines(x,log(exp(x)),col=2) # redundancy intended
par(op)
range(notLog(notExp(x))-x) # show that inverse works!
}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{null.space.dimension}
\alias{null.space.dimension}
\alias{null.space.basis.powers}
\alias{null.space.basis.labels}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{The basis of the space of un-penalized functions for a t.p.r.s.}
\description{ The thin plate spline penalties give zero penalty to some
  functions. The space of these functions is spanned by a set of
  polynomial terms. \code{null.space.dimension} finds the dimension of this space, \eqn{M}{M}, given
  the number of covariates that the smoother is a function of, \eqn{d}{d},
  and the order of the smoothing penalty, \eqn{m}{m}. If \eqn{m}{m} does not
  satisfy \eqn{2m>d}{2m>d} then the smallest possible dimension
  for the null space is found given \eqn{d}{d} and the requirement that
  the smooth should be visually smooth. \code{null.space.basis.powers()}
  produces an \code{M} by \code{d} array, the ith row of which gives the
  powers of each of the d covariates featuring in the ith basis
  term. \code{null.space.basis.names()} gives the equivalent information
  as a list of basis labels - the key feature of which is that the same
  term will always have the same label, irrespective of the order in
  which variables are supplied to the routine. For some models a smooth
  may be multiplied by a \code{by} variable: in such cases the basis
  labels can include this name as well, but note that the constant label
  will be returned as if the \code{by} variable didn't exist (to meet
  requirements of function \code{gam.side.conditions}).
 
}
\usage{
null.space.dimension(d,m)
null.space.basis.powers(m,d)
null.space.basis.labels(names,m,by="NA")
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{d}{ is a positive integer - the number of variables of which the
   t.p.s. is a function. }
 \item{m}{ a non-negative integer giving the order of the penalty
   functional, or signalling that the default order should be used.}
 \item{names}{is an array of \code{d} variable names}
 \item{by}{the name of any \code{by} variable, which multiplies all the
   terms in the penalty null space basis.}
}
\details{ Thin plate splines are only visually smooth if the order of the
  wiggliness penalty, \eqn{m}{m}, satisfies \eqn{2m > d+1}{2m >
    d+1}. If \eqn{2m<d+1}{2m<d+1} then this routine finds the smallest
  \eqn{m}{m} giving visual smoothness
  for the given \eqn{d}{d}, otherwise the supplied \eqn{m}{m} is used. The null space dimension is given by:

  \eqn{M=(m+d+1)!/(d!(m-d)!)}{M=(m+d+1)!/(d!(m-d)!}

  which is the value returned.

  
}
\value{
\code{null.space.dimension} returns an integer (array), the null space dimension
\eqn{M}{M}: this is the only one of these functions that accepts array arguments. \cr  \code{null.space.basis.powers} produces a 2-d array,
\code{p}, say: each row corresponds to one of the \code{M} basis vectors, while each column
corresponds to one covariate. So if \code{x1}, \code{x2}, \code{x3}, etc
are the covariates, the ith basis vector is given by
\code{x1^p[i,1]*x2^p[i,2]*x3^p[i,3]...}. \cr
\code{null.space.basis.labels()} just produces uniques labels for the
basis terms.    
 
}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}

\section{WARNING}{ This routine operates symbolically, not numerically: your model may
  still be numerically un-identifiable, and the routine can't tell if
  two variables with different names are really the same.
}  

\seealso{\code{\link{gam.side.conditions}}, \code{\link{gam}}}

\examples{
null.space.dimension(2,0)
null.space.basis.powers(2,2)
null.space.basis.labels(c("x","z"),m=2)
null.space.basis.labels(c("z","x"),m=2) # labels are invariant
}
\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{pcls}
\alias{pcls}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Penalized Constrained Least Squares Fitting}
\description{
Solves least squares problems with quadratic penalties subject to linear
equality and inequality constraints using quadratic programming.
}
\usage{
pcls(M)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{M}{is the single list argument to \code{pcls}. It should have  the 
  following elements:
\itemize{
 \item{y}{The response data vector.}
 \item{w}{A vector of weights for the data (often proportional to the 
           reciprocal of the variance). }
 \item{X}{The design matrix for the problem, note that \code{ncol(M$X)}
            must give the number of model parameters, while \code{nrow(M$X)} 
            should give the number of data.}
\item{C}{Matrix containing any linear equality constraints 
            on the problem (e.g. \eqn{ \bf C}{C} in \eqn{ {\bf Cp}={\bf
		c} }{Cp=c}). If you have no equality constraints
	    initialize this to a zero by zero matrix. Note that there is no need 
            to supply the vector \eqn{ \bf c}{c}, it is defined implicitly by the 
            initial parameter estimates \eqn{ \bf p}{p}.}
 \item{S}{ A list of penalty matrices. \code{S[[i]]} is the smallest contiguous matrix including 
          all the non-zero elements of the ith penalty matrix. The first parameter it
          penalizes is given by \code{off[i]+1} (starting counting at 1). }
 \item{off}{ Offset values locating the elements of \code{M$S} in
   the correct location within each penalty coefficient matrix. (Zero
   offset implies starting in first location)}
\item{sp}{ An array of  smoothing parameter estimates.}
\item{p}{An array of feasible initial parameter estimates - these must
satisfy the constraints, but should avoid satisfying the inequality
constraints as equality constraints.}
\item{Ain}{Matrix for the inequality constraints \eqn{ {\bf A}_{in}
    {\bf p} > {\bf b}_{in}}{A_in p > b}. }
\item{bin}{vector in the inequality constraints. }
} % end itemize
} % end M
}
\details{ 

  This solves the problem:
 
\deqn{ minimise~ \| { \bf W}^{1/2} ({ \bf Xp - y} ) \|^2  +  \sum_{i=1}^m
\lambda_i {\bf p^\prime S}_i{\bf p} }{ minimise || W^0.5 (Xp-y) ||^2 + 
lambda_1 p'S_1 p + lambda_1 p'S_2 p + . . .}
subject to constraints \eqn{ {\bf Cp}={\bf c}}{Cp=c} and \eqn{ {\bf
    A}_{in}{\bf p}>{\bf b}_{in}}{A_in p > b_in}, w.r.t. \eqn{\bf p}{p} given the
smoothing parameters \eqn{\lambda_i}{lambda_i}.
\eqn{ {\bf X}}{X} is a design matrix, \eqn{\bf p}{p} a parameter vector, 
\eqn{\bf y}{y} a data vector, \eqn{\bf W}{W} a diagonal weight matrix,
\eqn{ {\bf S}_i}{S_i} a positive semi-definite matrix  of coefficients
defining the ith penalty and \eqn{\bf C}{C} a matrix of coefficients 
defining the linear equality constraints on the problem. The smoothing
parameters are the \eqn{\lambda_i}{lambda_i}. Note that \eqn{ {\bf X}}{X}
must be of full column rank, at least when projected  into the null space
of any equality constraints. \eqn{ {\bf A}_{in}}{A_in} is a matrix of
coefficients defining the inequality constraints, while \eqn{ {\bf
    b}_{in}}{b_in} is a vector involved in defining the inequality constraints.  

Quadratic programming is used to perform the solution. The method used
is designed for maximum stability with least squares problems:
i.e. \eqn{ {\bf X}^\prime {\bf X}}{X'X} is not formed explicitly. See
Gill et al. 1981.

}
\value{ The function returns an array containing the estimated parameter
  vector. 
   
}
\references{

Gill, P.E., Murray, W. and Wright, M.H. (1981) Practical Optimization. Academic
Press, London. 

Wood, S.N. (1994) Monotonic smoothing splines fitted by cross validation SIAM
Journal on Scientific Computing 15(5):1126-1133

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}} 

\seealso{  \code{\link{mgcv}} \code{\link{mono.con}}  }

\examples{
# first an un-penalized example - fit E(y)=a+bx subject to a>0
set.seed(0)
n<-100
x<-runif(n);y<-x-0.2+rnorm(n)*0.1
M<-list(X=matrix(0,n,2),p=c(0.1,0.5),off=array(0,0),S=list(),
Ain=matrix(0,1,2),bin=0,C=matrix(0,0,0),sp=0,y=y,w=y*0+1)
M$X[,1]<-1;M$X[,2]<-x;M$Ain[1,]<-c(1,0)
pcls(M)->M$p
plot(x,y);abline(M$p,col=2);abline(coef(lm(y~x)),col=3)

# Penalized example: monotonic penalized regression spline .....

# Generate data from a monotonic truth.
x<-runif(100)*4-1;x<-sort(x);
f<-exp(4*x)/(1+exp(4*x));y<-f+rnorm(100)*0.1;plot(x,y)
dat<-data.frame(x=x,y=y)
# Show regular spline fit (and save fitted object)
f.ug<-gam(y~s(x,k=10,bs="cr"));lines(x,fitted(f.ug))
# Create Design matrix, constraints etc. for monotonic spline....
sm<-smooth.construct(s(x,k=10,bs="cr"),dat,knots=NULL)
F<-mono.con(sm$xp);   # get constraints
G<-list(X=sm$X,C=matrix(0,0,0),sp=f.ug$sp,p=sm$xp,y=y,w=y*0+1)
G$Ain<-F$A;G$bin<-F$b;G$S<-sm$S;G$off<-0

p<-pcls(G);  # fit spline (using s.p. from unconstrained fit)

fv<-Predict.matrix(sm,data.frame(x=x))\%*\%p
lines(x,fv,col=2)

# now a tprs example of the same thing....

f.ug<-gam(y~s(x,k=10));lines(x,fitted(f.ug))
# Create Design matrix, constriants etc. for monotonic spline....
sm<-smooth.construct(s(x,k=10,bs="tp"),dat,knots=NULL)
nc<-40         # number of constraints
xc<-0:nc/nc # points on [0,1]  
xc<-xc*4-1  # points at which to impose constraints
A0<-Predict.matrix(sm,data.frame(x=xc)) 
# ... A0%*%p evaluates spline at xc points
A1<-Predict.matrix(sm,data.frame(x=xc+1e-6)) 
A<-(A1-A0)/1e-6    
# ... approx. constraint matrix (A\%*\%p is -ve spline gradient at points xc)
G<-list(X=sm$X,C=matrix(0,0,0),sp=f.ug$sp,y=y,w=y*0+1,S=sm$S,off=0)
G$Ain<-A;    # constraint matrix
G$bin<-rep(0,nc);  # constraint vector
G$p<-rep(0,10);G$p[10]<-0.1  
# ... monotonic start params, got by setting coefs of polynomial part
p<-pcls(G);  # fit spline (using s.p. from unconstrained fit)

fv2<-Predict.matrix(sm,data.frame(x=x))\%*\%p
lines(x,fv2,col=3)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..





\eof
\name{pdIdnot}
\alias{pdIdnot}
\alias{pdConstruct.pdIdnot}
\alias{pdFactor.pdIdnot}
\alias{pdMatrix.pdIdnot}
\alias{coef.pdIdnot}
\alias{corMatrix.pdIdnot}
\alias{Dim.pdIdnot}
\alias{logDet.pdIdnot}
\alias{solve.pdIdnot}
\alias{summary.pdIdnot}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Overflow proof pdMat class for multiples of the identity matrix}
\description{ This set of functions is a modification of the  \code{pdMat} class \code{pdIdent}
from library \code{nlme}. The modification is to replace the log parameterization used in \code{pdMat}
with a \code{\link{notLog}} parameterization, since the latter is much less susceptible to overflow 
and underflow of the parameters on the original scale. The functions are particularly useful for
working with Generalized Additive Mixed Models where variance parameters/smoothing parameters can
be very large or very small, so that overflow or underflow can be a problem.} 

These functions would not normally be called directly, although unlike the \code{\link{pdTens}} class it is easy to do so.
}

\usage{
pdIdnot(value = numeric(0), form = NULL, 
       nam = NULL, data = sys.frame(sys.parent()))
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{value}{Initialization values for parameters. Not normally used.}

\item{form}{A one sided formula specifying the random effects structure. }   

\item{nam}{a names argument, not normally used with this class.}

\item{data}{data frame in which to evaluate formula.}
}
\details{ 
Note that while the \code{pdFactor} and \code{pdMatrix} functions return the inverse of the scaled random 
effect covariance matrix or its factor, the \code{pdConstruct} function is initialised with estimates of the 
scaled covariance matrix itself.

}
\value{ A class \code{pdIdnot} object, or related quantities. See the \code{nlme} documentation for further details.}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{
Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Springer

The \code{nlme} source code.

\url{http://www.stats.gla.ac.uk/~simon/}

}

\seealso{ \code{\link{te}}, \code{\link{pdTens}}, \code{\link{notLog}}, \code{\link{gamm}}}

\examples{
# see gamm
}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{pdTens}
\alias{pdTens}
\alias{pdConstruct.pdTens}
\alias{pdFactor.pdTens}
\alias{pdMatrix.pdTens}
\alias{coef.pdTens}
\alias{summary.pdTens}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Functions implementing a pdMat class for tensor product smooths}
\description{This set of functions implements an \code{nlme} library \code{pdMat} class to allow
tensor product smooths to be estimated by \code{lme} as called by \code{gamm}. Tensor product smooths
have a penalty matrix made up of a weighted sum of penalty matrices, where the weights are the smoothing 
parameters. In the mixed model formulation the penalty matrix is the inverse of the covariance matrix for 
the random effects of a term, and the smoothing parameters (times a half) are variance parameters to be estimated. 
It's not 
possible to transform the problem to make the required random effects covariance matrix look like one of the standard 
\code{pdMat} classes: hence the need for the \code{pdTens} class. A \code{\link{notLog}} parameterization ensures that 
the parameters are positive. 

These functions would not normally be called directly.
}

\usage{
pdTens(value = numeric(0), form = NULL, 
       nam = NULL, data = sys.frame(sys.parent()))
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{value}{Initialization values for parameters. Not normally used.}

\item{form}{A one sided formula specifying the random effects structure. The formula should have
an attribute \code{S} which is a list of the penalty matrices the weighted sum of which gives the inverse of the 
covariance matrix for these random effects.}   

\item{nam}{a names argument, not normally used with this class.}

\item{data}{data frame in which to evaluate formula.}
}
\details{ This appears to be the minimum set of functions required to implement a new \code{pdMat} class. 

Note that while the \code{pdFactor} and \code{pdMatrix} functions return the inverse of the scaled random 
effect covariance matrix or its factor, the \code{pdConstruct} function is initialised with estimates of the 
scaled covariance matrix itself.

}
\value{ A class \code{pdTens} object, or it's coefficients or the matrix it represents or the factor of that matrix.}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\references{
Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Springer

The \code{nlme} source code.

\url{http://www.stats.gla.ac.uk/~simon/}

}

\seealso{ \code{\link{te}}  \code{\link{gamm}}}

\examples{
# see gamm
}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{place.knots}
\alias{place.knots}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Automatically place a set of knots evenly through covariate values}
\description{
Given a univariate array of covariate values, places a set of knots for a regressin spline evenly through the 
covariate values.
}
\usage{ place.knots(x,nk)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{x}{array of covariate values (need not be sorted).} 
 \item{nk}{integer indicating the required number of knots.}
}
\details{ 
Places knots evenly throughout a set of covariates. For example, if you had 11 covariate values and wanted 6 knots 
then a knot would be placed at the first (sorted) covariate value and every second (sorted) value thereafter. With 
less convenient numbers of data and knots the knots are placed within intervals between data in order to achieve 
even coverage, where even means having approximately the same number of data between each pair of knots.}

}
\value{ An array of knot locations.
}
\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}} 

\seealso{ \code{\link{smooth.construct.cc.smooth.spec}}  }

\examples{
x<-runif(30)
place.knots(x,7)
rm(x)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..





\eof
\name{plot.gam}
\alias{plot.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Default GAM plotting}
\description{ Takes a fitted \code{gam} object produced by \code{gam()} and plots the 
   component smooth functions that make it up, on the scale of the linear predictor.}
}
\usage{
plot.gam(x,residuals=FALSE,rug=TRUE,se=TRUE,pages=0,select=NULL,
         scale=-1,n=100,n2=40,pers=FALSE,theta=30,phi=30,jit=FALSE,
         xlab=NULL,ylab=NULL,main=NULL,ylim=NULL,xlim=NULL,too.far=0.1,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
  
\item{x}{ a fitted \code{gam} object as produced by \code{gam()}.}

\item{residuals}{If \code{TRUE} then partial residuals are added to plots of 1-D smooths. If \code{FALSE} 
then no residuals are added. If this is an array of the correct length then it is used as the array of 
residuals to be used for producing partial residuals. If \code{TRUE} then the
residuals are the working residuals from the IRLS iteration weighted by the
IRLS weights. Partial residuals for a smooth term are the
residuals that would be obtained by dropping the term concerned from the model, while leaving all other 
estimates fixed (i.e. the estimates for the term plus the residuals).}

\item{rug}{ when TRUE (default) then the covariate to which the plot applies is displayed as a rug plot
 at the foot of each plot of a 1-d smooth, and the locations of the
 covariates are plotted as points on the contour plot representing a 2-d
 smooth.} 
  
\item{se}{ when TRUE (default) upper and lower lines are added to the
  1-d plots at 2 standard errors
        above and below the estimate of the smooth being plotted while for
	2-d plots, surfaces at +1 and -1 standard errors are contoured
	and overlayed on the contour plot for the estimate. If a
	positive number is supplied then this number is multiplied by
	the standard errors when calculating standard error curves or surfaces.}

\item{pages}{ (default 0) the number of pages over which to spread the output. For example, 
if \code{pages=1} then all terms will be plotted on one page with the layout performed automatically. 
Set to 0 to have the routine leave all graphics settings as they are. }

\item{select}{Allows the  plot for a single model term to be selected for printing. e.g. if you just want the plot for the second smooth term set \code{select=2}. }

\item{scale}{ set to -1 (default) to have the same y-axis scale for each plot, and to 0 for a 
          different y axis for each plot. Ignored if \code{ylim} supplied.}

\item{n}{ number of points used for each 1-d plot - for a nice smooth plot this needs to be several times the estimated 
  degrees of freedom for the smooth. Default value 100.}

\item{n2}{Square root of number of points used to grid estimates of 2-d
  functions for contouring.}

\item{pers}{Set to \code{TRUE} if you want perspective plots for 2-d
  terms.}

\item{theta}{One of the perspective plot angles.}

\item{phi}{The other perspective plot angle.}

\item{jit}{Set to TRUE if you want rug plots for 1-d terms to be jittered.}

\item{xlab}{If supplied then this will be used as the x label for all plots.}

\item{ylab}{If supplied then this will be used as the y label for all plots.}

\item{main}{Used as title (or z axis label) for plots if supplied.}

\item{ylim}{If supplied then this pair of numbers are used as the y limits for each plot.}

\item{xlim}{If supplied then this pair of numbers are used as the x limits for each plot.}

\item{too.far}{If greater than 0 then this is used to determine when a location is too
far from data to be plotted when plotting 2-D smooths. This is useful since smooths tend to go wild away from data.
The data are scaled into the unit square before deciding what to exclude, and \code{too.far} is a distance 
within the unit square.}

\item{...}{ other graphics parameters to pass on to plotting commands.}

}
\details{ Produces default plot showing the smooth components of a
  fitted GAM.

  For plots of 1-d smooths, the x axis of each plot is labelled 
   with the covariate name, while the y axis is labelled \code{s(cov,edf) } where \code{cov}
   is the covariate name, and \code{edf} the estimated (or user defined for regression splines) degrees of freedom of the smooth.

Contour plots are produced for 2-d smooths with the x-axes labelled with the first covariate
name and the y axis with the second covariate name. The main title of
the plot is something like \code{s(var1,var2,edf)}, indicating the
variables of which the term is a function, and the estimated degrees of
freedom for the term. When \code{se=TRUE}, estimator variability is shown by overlaying
contour plots at plus and minus 1 s.e. relative to the main
estimate. If \code{se} is a positive number then contour plots are at plus or minus \code{se} multiplied
by the s.e. Contour levels are chosen to try and ensure reasonable
separation of the contours of the different plots, but this is not
always easy to achieve. Note that these plots can not be modified to the same extent as the other plot. 
   
   Within the function, the data for the plots is obtained by direct
   calls to the compiled C code that \code{predict.gam} uses.   

   Smooths of more than 2 variables are not currently dealt with, but
   simply generate a warning, but see \code{\link{vis.gam}}.
   }

\value{ The function simply generates plots.
}

\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398


Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNING }{ Note that the behaviour of this function is not identical to 
\code{plot.gam()} in S-PLUS.

Plots of 2-D smooths with standard error contours shown can not easily be customized.

The function can not deal with smooths of more than 2 variables!
} 

\seealso{  \code{\link{gam}}, \code{\link{predict.gam}}, \code{\link{vis.gam}}}

\examples{
library(mgcv)
set.seed(0)
n<-200
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
y <- 2 * sin(pi * x0)
y <- y + exp(2 * x1) - 3.75887
y <- y + 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- y + e
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3))
plot(b,pages=1,residuals=TRUE)
# example with 2-d plots
b1<-gam(y~s(x0,x1)+s(x2)+s(x3))
op<-par(mfrow=c(2,2))
plot(b1)
par(op) 
}
\keyword{models} \keyword{smooth} \keyword{regression} \keyword{hplot}%-- one or more ...







\eof
\name{predict.gam}
\alias{predict.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Prediction from fitted GAM model}
\description{ Takes a fitted \code{gam} object produced by \code{gam()} 
and produces predictions given a new set of values for the model covariates 
or the original values used for the model fit.}
}
\usage{
predict.gam(object,newdata,type="link",se.fit=FALSE,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
  
 \item{object}{ a fitted \code{gam} object as produced by \code{gam()}.
                }
 \item{newdata}{ A data frame containing the values of the model covariates at which predictions
  are required. If this is not provided then predictions corresponding to the
                original data are returned. If \code{newdata} is provided then
                it should contain all the variables needed for prediction: a
                warning is generated if not.

 } 
 
 
\item{type}{ When this has the value \code{"link"} (default) the linear predictor (possibly with
associated standard errors) is returned. When \code{type="terms"} each component of the 
linear predictor is returned seperately (possibly with standard errors): this includes 
parametric model components, followed by each smooth component, but excludes any offset. When \code{type="response"} predictions 
on the scale of the response are returned (possibly with approximate
standard errors). When \code{type="lpmatrix"} then a matrix is returned
which yields the values of the linear predictor (minus any offset) when applied to the
parameter vector (in this case \code{se.fit} is ignored). The latter
option is most useful for getting variance estimates for integrated quantities. }

 \item{se.fit}{ when this is TRUE (not default) standard error estimates are returned for each prediction.}

 \item{...}{ other arguments.}

}

\value{ If \code{type=="lpmatrix"} then a matrix is returned which will
  give a vector of linear predictor values (minus any offest) at the supplied covariate
  values, when applied to the model coefficient vector. 
Otherwise, if \code{se.fit} is \code{TRUE} then a 2 item list is returned with items (both arrays) \code{fit}
and \code{se.fit} containing predictions and associated standard error estimates, otherwise an 
array of predictions is returned. The dimensions of the returned arrays depends on whether 
\code{type} is \code{"terms"} or not: if it is then the array is 2 dimensional with each 
term in the linear predictor separate, otherwise the array is 1 dimensional and contains the 
linear predictor/predicted values (or corresponding s.e.s). The linear predictor returned termwise will 
not include the offset.

\code{newdata} can be a data frame, list or model.frame: if it's a model frame
then all variables must be supplied.

}

\details{The standard errors produced by \code{predict.gam} are based on the
Bayesian posterior covariance matrix of the parameters \code{Vp} in the fitted
gam object. }

\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNING }{ Note that the behaviour of this function is not identical to 
\code{predict.gam()} in Splus.

Note that the 
} 

\seealso{  \code{\link{gam}}, \code{\link{gamm}}, \code{\link{plot.gam}}}

\examples{
library(mgcv)
n<-200
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
y <- 2 * sin(pi * x0)
y <- y + exp(2 * x1) - 3.75887
y <- y + 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- y + e
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3))
newd<-data.frame(x0=(0:30)/30,x1=(0:30)/30,x2=(0:30)/30,x3=(0:30)/30)
pred<-predict.gam(b,newd)
 
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..


\eof
\name{print.gam}
\alias{print.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Model default print statement}
\description{ This is the default print statement for a GAM object. If you need a list of everything
that is part of a gam object see \code{\link{gam}}, or use \code{names()}. The family (including link), 
model formula, and estimated degrees of freedom for each model term (plus total) are printed, as 
well as the minimized GCV or UBRE score, depending on which was used.
}
}

\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@state.gla.ac.uk}}



\seealso{   \code{\link{gam}},\code{\link{mgcv}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{residuals.gam}
\alias{residuals.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized Additive Model residuals}
\description{Returns residuals for a fitted \code{gam} model
  object. Pearson, deviance, working and response residuals are
  available. 
}

\usage{
residuals.gam(object, type = c("deviance", "pearson","scaled.pearson", "working", "response"),...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{object}{ a \code{gam} fitted model object. }
  \item{type}{the type of residuals wanted. }
   \item{...}{other arguments.}
}
\details{Response residuals are the raw residuals (data minus fitted
  values). Scaled Pearson residuals are raw residuals divided by the standard
  deviation of the data according to the model mean variance
  relationship and estimated scale parameter. Pearson residuals are the same, but multiplied by the square root 
  of the scale parameter (so they are independent of the scale parameter):
  (\eqn{(y-\mu)/\sqrt{V(\mu)}}{(y-m)/V(m)^0.5}, where  \eqn{y}{y} is data \eqn{\mu}{m} 
  is model fitted value and \eqn{V}{V} is model mean-variance relationship.). Both are provided since not all texts 
  agree on the definition of Pearson residuals. Deviance residuals simply
  return the deviance residuals defined by the model family. Working
  residuals are the residuals returned from model fitting at convergence.

  There is  a special function for \code{gam} objects because of a bug
  in the calculation of Pearson residuals in some earlier versions of
  \code{residual.glm}.
 
}
\value{ An array of residuals.
}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}



\seealso{   \code{\link{gam}}}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..













\eof
\name{s}
\alias{s}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Defining smooths in GAM formulae}
\description{ Function used in definition of smooth terms within
  \code{gam} model formulae. The function does not evaluate a (spline)
  smooth - it exists purely to help set up a model using spline based smooths.
}

\usage{s(..., k=-1,fx=FALSE,bs="tp",m=0,by=NA)}
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{...}{ a list of variables that are the covariates that this
    smooth is a function of.}
  \item{k}{ the dimension of the basis used to represent the smooth term.
    The default depends on the number of variables that the smooth is a
    function of. \code{k} should not be less than the dimension of the
    null space of the penalty for the term (see
    \code{\link{null.space.dimension}}), but will be reset if
  it is.}
  \item{fx}{indicates whether the term is a fixed d.f. regression
  spline (\code{TRUE}) or a penalized regression spline (\code{FALSE}).}
  \item{bs}{this can be \code{"cr"} for a cubic regression spline, \code{"cc"} for
    a cyclic (periodic) spline,
    \code{"tp"} for a thin plate regression spline, or a user defined 
    charecter string for other user defined smooth classes. Of the built in alternatives, only thin plate
    regression splines can be used for multidimensional smooths, so this
    argument only has an effect for univariate smooths. Note that the
    \code{"cr"} and \code{"cc"} bases are faster to set up than the \code{"tp"} basis, particularly
  on large data sets.}
  \item{m}{The order of the penalty for this t.p.r.s. term (e.g. 2 for
    normal cubic spline penalty with 2nd derivatives). O signals
    autoinitialization, which sets the order to the lowest value
    satisfying 2m>d+1, where d is the number of covariates: this choise
    ensures visual smoothness. In addition, m must satisfy the technical
    restriction 2m>d, otherwise it will be autoinitialized.}
  \item{by}{specifies a covariate by which the whole smooth term is to
    be multiplied. This is particularly useful for creating models in
    which a smooth interacts with a factor: in this case the \code{by}
    variable would usually be the dummy variable coding one level of the
  factor. See the examples below.}
  }
}
\details{The function does not evaluate the variable arguments. To use this function to specify use of
your own smooths, note the relationships between the inputs and the output object and see the example
in \code{\link{smooth.construct}}.
}
\value{ A class \code{xx.smooth.spec} object, where \code{xx} is a basis identifying code given by
the \code{bs} argument of \code{s}. These \code{smooth.spec} objects define smooths and are turned into
bases and penalties by \code{smooth.construct} method functions. 

The returned object contains the following items:

  \item{term}{An array of text strings giving the names of the covariates that 
the term is a function of.}
  \item{bs.dim}{The dimension of the basis used to represent the smooth.}
  \item{fixed}{TRUE if the term is to be treated as a pure regression
    spline (with fixed degrees of freedom); FALSE if it is to be treated
  as a penalized regression spline}
  \item{dim}{The dimension of the smoother - i.e. the number of
    covariates that it is a function of.}
  \item{p.order}{The order of the t.p.r.s. penalty, or 0 for
    auto-selection of the penalty order.}
  \item{by}{is the name of any \code{by} variable as text (\code{"NA"} for none).}
  \item{full.call}{Text for pasting into a string to be converted to a
    gam formula, which has the values of function options given explicitly -
  this is useful for constructing a fully expanded gam formula which can
be used without needing access to any variables that may have been used
to define \code{k}, \code{fx}, \code{bs} or \code{m} in the original call. i.e. this is text which
when parsed and evaluated generates a call to \code{s()} with all the
options spelled out explicitly.}
\item{label}{A suitable text label for this smooth term.}
}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}
Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}

}

\seealso{ \code{\link{te}}, \code{\link{gam}}, \code{\link{gamm}}}

\examples{
# example utilising `by' variables
library(mgcv)
set.seed(0)
n<-200;sig2<-4
x1 <- runif(n, 0, 1);x2 <- runif(n, 0, 1);x3 <- runif(n, 0, 1)
fac<-c(rep(1,n/2),rep(2,n/2)) # create factor
fac.1<-rep(0,n)+(fac==1);fac.2<-1-fac.1 # and dummy variables
fac<-as.factor(fac)
f1 <-  exp(2 * x1) - 3.75887
f2 <-  0.2 * x1^11 * (10 * (1 - x1))^6 + 10 * (10 * x1)^3 * (1 - x1)^10
f<-f1*fac.1+f2*fac.2+x2
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- f + e
# NOTE: smooths will be centered, so need to include fac in model....
b<-gam(y~fac+s(x1,by=fac.1)+s(x1,by=fac.2)+x2) 
plot(b,pages=1)
}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{smooth.construct}
\alias{smooth.construct}
\alias{smooth.construct.tp.smooth.spec}
\alias{smooth.construct.cr.smooth.spec}
\alias{smooth.construct.cc.smooth.spec}
\alias{smooth.construct.tensor.smooth.spec}
\alias{p.spline}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Constructor functions for smooth terms in a GAM}
\description{Smooth terms in a GAM formula are turned into smooth specification objects of 
class \code{xx.smooth.spec} during processing of the formula. Each of these objects is
converted to a smooth object using an appropriate \code{smooth.construct} function. New smooth classes 
can be added by writing a new \code{smooth.construct} method function and a corresponding 
\code{\link{Predict.matrix}} method function (see example code below).}
}
\usage{
smooth.construct(object,data,knots)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{object}{ is a smooth specification object, generated by an \code{\link{s}} or \code{\link{te}} term in a GAM 
formula. Objects generated by \code{s} terms have class \code{xx.smooth.spec} where \code{xx} is given by the 
\code{bs} argument of \code{s} (this convention allows the user to add their own smoothers). 
If \code{object} is not class \code{tensor.smooth.spec} it will have the following elements:
\itemize{
\item{term}{The names of the covariates for this smooth, in an array.}
\item{bs.dim}{ Argument \code{k} of the \code{s} term generating the object. This is the dimension of the basis 
used to represent the term (or, arguably, 1 greater than the basis dimension for \code{cc} terms).}
\item{fixed}{\code{TRUE} if the term is to be unpenalized, otherwise \code{FALSE}.}
\item{dim}{the number covariates of which this smooth is a function.}
\item{p.order}{the order of the smoothness penalty or 0 for autoselection of this. This is argument 
\code{m} of the \code{s} term that generated \code{object}.}
\item{by}{the name of any \code{by} variable to multiply this term as supplied as an argument to \code{s}. 
\code{"NA"} if there is no such term.}
\item{full.call}{The full version of the \code{s} term, with all defaults expanded explicitly.}
\item{label}{A suitable label for use with this term.}
\item{null.space.dim}{The dimension of the null space of the wiggliness penalty.}
}
If \code{object} is of class \code{tensor.smooth.spec} then it was generated by a \code{te} term in the GAM formula, 
and specifies a smooth of several variables with a basis generated as a tensor product of lower dimensional bases. 
In this case the object will be different and will have the following elements:
\itemize{
\item{margin}{is a list of smooth specification objects of the type listed above, defining the bases which have 
their tensor product formed in order to construct this term.}
\item{term}{is the array of names of the covariates that are arguments of the smooth.}
\item{by}{is the name of any \code{by} variable, or \code{"NA"}.}
\item{fx}{is an array, the elements of which indicate whether (\code{TRUE}) any of the margins in the 
tensor product should be unpenalized.}
\item{full.call}{The full version of the \code{s} term, with all defaults expanded explicitly.}
\item{label}{A suitable label for use with this term.}
\item{dim}{is the number of covariates of which this smooth is a function.}
\item{null.space.dim}{The dimension of the null space of the wiggliness penalty.}
}}
\item{data}{a data frame in which the covariates and any \code{by} variable can be found.}
\item{knots}{an optional data frame specifying knot locations for each covariate. If it is null then the knot 
locations are generated automatically.}
}

\value{
The input argument \code{object}, assigned a new class to indicate what type of smooth it is and with at least the 
following items added:
\item{X}{The model matrix from this term.}
\item{C}{The matrix defining any constraints on the term - usually a one row matrix giving the column sums of the  
model matrix, which defines the constraint that each term should sum to zero over the covariate values.}
\item{S}{A list of positive semi-definite penalty matrices that apply to this term. The list will be empty 
if the term is to be left un-penalized.}
\item{rank}{an array giving the ranks of the penalties.}
\item{df}{the degrees of freedom associated with this term (at least when unpenalized).}

Usually the returned object will also include extra information required to define the basis, and used by 
\code{\link{Predict.matrix}} methods to make predictions using the basis. See the \code{Details} section for the infomation included for the built in smooth classes. 

\code{tensor.smooth} returned objects will additionally have each element of the \code{margin} list updated in the same way. 
}

\details{ The returned objects for the built in smooth classes have the following extra elements.
\code{cr.smooth} objects (generated using \code{bs="cr"}) have an additional array \code{xp} giving the knot locations used to generate the basis.

\code{cyclic.smooth} objects (generated using \code{bs="cc"}) have an array \code{xp} of knot locations and a matrix 
\code{BD} used to define the basis (BD transforms function values at the knots to second derivatives at the knots).

\code{tprs.smooth} objects require several items to be stored in order to define the basis. These are:
\itemize{ 
\item{shift}{A record of the shift applied to each covariate in order to center it around zero and 
avoid any co-linearity problems that might otehrwise occur in the penalty null space basis of the term. }
\item{Xu}{A matrix of the unique covariate combinations for this smooth (the basis is constructed by first stripping 
out duplicate locations).}
\item{UZ}{The matrix mapping the t.p.r.s. parameters back to the parameters of a full thin plate spline.}
\item{null.space.dimension}{The dimension of the space of functions that have zero wiggliness according to the 
wiggliness penalty for this term.}
}

Again, these extra elements would be found in the elements of the \code{margin} list of \code{tensor.smooth} 
class object.
}


\references{ 

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.

The p-spline code given in the example is based on:

Eilers, P.H.C. and B.D. Marx (1996) Flexible Smoothing with B-splines and Penalties. 
Statistical Science, 11(2):89-121

\url{http://www.stats.gla.ac.uk/~simon/}
}

\author{Simon N. Wood \email{simon@stats.gla.ac.uk}}

\seealso{ \code{get.var}, \code{\link{gamm}}, \code{\link{gam}}, \code{\link{Predict.matrix}} }

\examples{# adding "p-spline" classes and methods

smooth.construct.ps.smooth.spec<-function(object,data,knots)
# a p-spline constructor method function
{ require(splines)
  if (length(object$p.order)==1) m<-rep(object$p.order,2) 
  else m<-object$p.order  # m[1] - basis order, m[2] - penalty order
  nk<-object$bs.dim-m[1]  # number of interior knots
  if (nk<=0) stop("basis dimension too small for b-spline order")
  x <- get.var(object$term,data)  # find the data
  xl<-min(x);xu<-max(x);xr<-xu-xl # data limits and range
  xl<-xl-xr*0.001;xu<-xu+xr*0.001;dx<-(xu-xl)/(nk-1) 
  if (!is.null(knots)) k <- get.var(object$term,knots) 
  else k<-NULL
  if (is.null(k)) 
  k<-seq(min(x)-dx*(m[1]+1),max(x)+dx*(m[1]+1),length=nk+2*m[1]+2)   
  if (length(k)!=nk+2*m[1]+2) 
  stop(paste("there should be ",nk+2*m[1]+2," supplied knots"))
  object$X<-spline.des(k,x,m[1]+2,x*0)$design # get model matrix
  if (!object$fixed)       
  { S<-diag(object$bs.dim);if (m[2]) for (i in 1:m[2]) S<-diff(S)
    object$S<-list(t(S)\%*\%S)  # get penalty
    object$S[[1]] <- (object$S[[1]]+t(object$S[[1]]))/2 # exact symmetry
  }
  object$rank<-object$bs.dim-m[2]  # penalty rank 
  object$null.space.dim <- m[2]  # dimension of unpenalized space  
  object$knots<-k;object$m<-m      # store p-spline specific info.
  object$C<-matrix(colSums(object$X),1,object$bs.dim) #constraint
  object$df<-ncol(object$X)-1      # maximum DoF
  if (object$by!="NA")  # deal with "by" variable 
  { by <- get.var(object$by,data) # find by variable  
    if (is.null(by)) stop("Can't find by variable")
    object$X<-by*object$X # form diag(by)\%*\%X
  }
  class(object)<-"pspline.smooth"  # Give object a class
  object
}

Predict.matrix.pspline.smooth<-function(object,data)
# prediction method function for the p.spline smooth class
{ require(splines)
  x <- get.var(object$term,data)
  spline.des(object$knots,x,object$m[1]+2,x*0)$design
}

# an example, using the new class....
set.seed(0);n<-400;
x0 <- runif(n, 0, 1);x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1);x3 <- runif(n, 0, 1)
f <- 2 * sin(pi * x0)
f <- f + exp(2 * x1) - 3.75887
f <- f+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n)*2
y <- f + e
b<-gam(y~s(x0,bs="ps",m=2)+s(x1,bs="ps",m=c(1,3))+
         s(x2,bs="ps",m=2)+s(x3,bs="ps",m=2))
plot(b,pages=1)
# another example using tensor products of the new class

test1<-function(x,z,sx=0.3,sz=0.4)
{ (pi**sx*sz)*(1.2*exp(-(x-0.2)^2/sx^2-(z-0.3)^2/sz^2)+
  0.8*exp(-(x-0.7)^2/sx^2-(z-0.8)^2/sz^2))
}
n<-400
x<-runif(n);z<-runif(n);
f <- test1(x,z)
y <- f + rnorm(n)*0.1
b <- gam(y~te(x,z,bs=c("ps","ps"),m=c(2,2)))
vis.gam(b)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...


\eof
\name{summary.gam}
\alias{summary.gam}
\alias{print.summary.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Summary for a GAM fit}
\description{ Takes a fitted \code{gam} object produced by \code{gam()} and produces various useful
summaries from it. 
}
\usage{
summary.gam(object,...)
print.summary.gam(x,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
\item{object}{ a fitted \code{gam} object as produced by \code{gam()}.}
\item{x}{a \code{summary.gam} object produced by \code{summary.gam()}.} 
\item{...}{ other arguments.}
}
\details{ Model degrees of freedom are taken as the trace of the influence (or hat) matrix \eqn{ {\bf A}}{A} for the model fit.
Residual degrees of freedom are taken as number of data minus model degrees of freedom. Let \eqn{ {\bf P}_i}{P_i} be the matrix 
giving the parameters of the ith smooth when applied to the data (or pseudodata in the generalized case) and let \eqn{ {\bf X}}{X} 
be the design matrix of the model. Then \eqn{ tr({\bf XP}_i )}{tr(XP_i)} is the edf for the ith term. Clearly this definition causes 
the edf's to add up properly! 

\code{print.summary.gam} tries to print various bits of summary information useful for term selection in a pretty way.
}

\value{\code{summary.gam} produces a list of summary information for a fitted \code{gam} object. 
\item{p.coeff}{is an array of estimates of the strictly parametric model coefficients.}
\item{p.t}{is an array of the \code{p.coeff}'s divided by their standard errors.}
\item{p.pv}{is an array of p-values for the null hypothesis that the corresponding parameter is zero. 
Calculated with reference to the t distribution with the estimated residual degrees of freedom for the model fit.}
\item{m}{The number of smooth terms in the model.}
\item{chi.sq}{An array of test statistics for assessing the significance of model smooth terms. If \eqn{ {\bf p}_i}{p_i} 
is the parameter vector for the ith smooth term, and this term has estimated covariance matrix \eqn{ {\bf V}_i}{V_i} then the 
statistic is \eqn{ {\bf p}_i^\prime {\bf V}_i^{k-} {\bf p}_i}{p_i'V_i^{k-}p_i}, where \eqn{ {\bf V}^{k-}_i}{V_i^{k-}} is the rank k-1 
pseudo-inverse of \eqn{ {\bf V_i}}{V_i}, and k is the basis dimension.}
\item{s.pv}{An array of approximate p-values for the null hypotheses that each smooth term is zero. Be warned, these are only 
approximate. In the case in which UBRE has been used, they are obtained by comparing the chi.sq statistic given above to the 
chi-squared distribution with degrees of  freedom given by the estimated degrees of freedom for the term. In the GCV case (in 
which the scale parameter will have been estimated) the statistic is compared to an F distribution with upper d.f. given by
the estimate degrees of freedom for the term, and lower d.f. given by the residual degrees of freedom for the model . 
Use at your own risk! Typically the p-values will be somewhat inaccurate
because they are conditional on the smoothing parameters and the distributional assumption doesn't have a firm theoretical basis. 
A pragmatic approach to the latter issue is to check p-values by refitting the model using regression splines
with each basis dimension set to one more than the rounded edf for
the term (see example, below). In this latter case the distributional
assumption would be fine if the smoothing parameters were
known, but of course they are not, and conditioning on the smoothing
parameters will always be problematic. The difficulty is that GCV has
used the data to select the most plausible smoothing parameters for each
term - not surprisingly this tends to mean that the terms appear
a little more `significant' than they should. Of course this problem
is no different to the standard difficulty in interpreting p-values for
model terms when the model has been selected by hypothesis testing
methods such as backwards elimination.   
}
\item{se}{array of standard error estimates for all parameter estimates.}
\item{r.sq}{The adjusted r-squared for the model. Defined as the proportion of variance explained, where original variance and 
residual variance are both estimated using unbiased estimators. This quantity can be negative if your model is worse than a one 
parameter constant model, and can be higher for the smaller of two nested models! Note that proportion null deviance 
explained is probably more appropriate for non-normal errors.}
\item{dev.expl}{The proportion of the null deviance explained by the model.}
\item{edf}{array of estimated degrees of freedom for the model terms.}
\item{residual.df}{estimated residual degrees of freedom.}
\item{n}{number of data.}
\item{gcv}{minimized GCV score for the model, if GCV used.}
\item{ubre}{minimized UBRE score for the model, if UBRE used.}
\item{scale}{estimated (or given) scale parameter.}
\item{family}{the family used.}
\item{formula}{the original GAM formula.}
}

\references{

Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398

Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}

\section{WARNING }{ The supplied p-values are only approximate and should be treated with scepticism.
} 

\seealso{  \code{\link{gam}}, \code{\link{predict.gam}}, \code{\link{gam.check}} }

\examples{
library(mgcv)
set.seed(0)
n<-200
sig2<-4
x0 <- runif(n, 0, 1)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
pi <- asin(1) * 2
y <- 2 * sin(pi * x0)
y <- y + exp(2 * x1) - 3.75887
y <- y + 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * (1 - x2)^10 - 1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- y + e
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3))
plot(b,pages=1)
summary(b)
# now check the p-values by using a pure regression spline.....
b.d<-round(b$edf)+1 
b.d<-pmax(b.d,3) # can't have basis dimension less than this!
bc<-gam(y~s(x0,k=b.d[1],fx=TRUE)+s(x1,k=b.d[2],fx=TRUE)+
        s(x2,k=b.d[3],fx=TRUE)+s(x3,k=b.d[4],fx=TRUE))
plot(bc,pages=1)
summary(bc)
# p-value check - increase k to make this useful!
n<-200;p<-0;k<-20
for (i in 1:k)
{ b<-gam(y~s(x,z),data=data.frame(y=rnorm(n),x=runif(n),z=runif(n)))
  p[i]<-summary(b)$s.p[1]
}
plot(((1:k)-0.5)/k,sort(p))
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...







\eof
\name{te}
\alias{te}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Define tensor product smooths in GAM formulae}
\description{ Function used in definition of tensor product smooth terms within
  \code{gam} model formulae. The function does not evaluate a
  smooth - it exists purely to help set up a model using tensor product 
  based smooths.
}

\usage{te(..., k=NA,bs="cr",m=0,d=NA,by=NA,fx=FALSE,mp=TRUE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{...}{ a list of variables that are the covariates that this
    smooth is a function of.}

\item{k}{ the dimension(s) of the bases used to represent the smooth term.
    If not supplied then set to \code{5^d}. If supplied as a single number then this 
    basis dimension is used for each basis. If supplied as an array then the elements are
    the dimensions of the component (marginal) bases of the tensor product.}

\item{bs}{array (or single character string) specifying the type for each 
marginal basis. \code{"cr"} for cubic regression spline; \code{"cc"} for periodic/cyclic 
cubic regression spline; \code{"tp"} for thin plate regression spline. User defined bases can 
also be used here (see \code{\link{smooth.construct}} for an example). If only one 
identifier is given then this is used for all bases.}

\item{m}{The order of the penalty for each t.p.r.s. term (e.g. 2 for
    normal cubic spline penalty with 2nd derivatives). If a single number is given 
    then it is used for all terms. \code{0} autoinitializes. \code{m} is ignored for the 
\code{"cr"} and \code{"cc"} bases.}

\item{d}{array of marginal basis dimensions. For example if you want a smooth for 3 covariates 
made up of a tensor product of a 2 dimensional t.p.r.s. basis and a 1-dimensional basis, then 
set \code{d=c(2,1)}.}

\item{by}{specifies a covariate by which the whole smooth term is to
    be multiplied. This is particularly useful for creating models in
    which a smooth interacts with a factor: in this case the \code{by}
    variable would usually be the dummy variable coding one level of the
  factor. See the examples below.}

\item{fx}{indicates whether the term is a fixed d.f. regression
  spline (\code{TRUE}) or a penalized regression spline (\code{FALSE}).}

\item{mp}{\code{TRUE} to use multiple penalties for the smooth. \code{FALSE} to use only 
a single penalty: single penalties are not recommended - they tend to allow only rather 
wiggly models.}
}

\details{ Smooths of several covariates can be constructed from tensor products of the bases
used to represent smooths of one (or sometimes more) of the covariates. To do this `marginal' bases
are produced with associated model matrices and penalty matrices, and these are then combined in the
manner described in \code{\link{tensor.prod.model.matrix}} and \code{\link{tensor.prod.penalties}}, to produce 
a single model matrix for the smooth, but multiple penalties (one for each marginal basis). The basis dimension 
of the whole smooth is the product of the basis dimensions of the marginal smooths.
 
An option for operating with a single penalty (The Kronecker product of the marginal penalties) is provided, but 
it is rarely of practical use: the penalty is typically so rank deficient that even the smoothest resulting model 
will have rather high estimated degrees of freedom. 

Tensor product smooths are especially useful for representing functions of covariates measured in different units, 
although they are typically not quite as nicely behaved as t.p.r.s. smooths for well scaled covariates.

The function does not evaluate the variable arguments.
}
\value{ A class \code{tensor.smooth.spec} object defining a tensor product smooth
 to be turned into a basis and penalties by the \code{smooth.construct.tensor.smooth.spec} function. 

The returned object contains the following items:

\item{margin}{A list of \code{smooth.spec} objects of the type returned by \code{\link{s}}, 
defining the basis from which the tensor product smooth is constructed.}
  \item{term}{An array of text strings giving the names of the covariates that 
the term is a function of.}
\item{by}{is the name of any \code{by} variable as text (\code{"NA"} for none).}
  \item{fx}{ logical array with element for each penalty of the term
(tensor product smooths have multiple penalties). \code{TRUE} if the penalty is to 
be ignored, \code{FALSE}, otherwise. }
  
  \item{p.order}{The order of the t.p.r.s. penalty, or 0 for
    auto-selection of the penalty order.}
  
  \item{full.call}{Text for pasting into a string to be converted to a
    gam formula, which has the values of function options given explicitly -
  this is useful for constructing a fully expanded gam formula which can
be used without needing access to any variables that may have been used
to define k, fx, bs or m in the original call. i.e. this is text which
when parsed and evaluated generates a call to \code{s()} with all the
options spelled out explicitly.}
\item{label}{A suitable text label for this smooth term.}
\item{dim}{The dimension of the smoother - i.e. the number of
    covariates that it is a function of.}
}


\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}
Wood, S.N. (2000)  Modelling and Smoothing Parameter Estimation
with Multiple  Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428

Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114

\url{http://www.stats.gla.ac.uk/~simon/}

}

\seealso{ \code{\link{s}},\code{\link{gam}},\code{\link{gamm}}}

\examples{

# following shows how tensor pruduct deals nicely with 
# badly scaled covariates (range of x 5\% of range of z )
test1<-function(x,z,sx=0.3,sz=0.4)  
{ x<-x*20
  (pi**sx*sz)*(1.2*exp(-(x-0.2)^2/sx^2-(z-0.3)^2/sz^2)+
  0.8*exp(-(x-0.7)^2/sx^2-(z-0.8)^2/sz^2))
}
n<-500
old.par<-par(mfrow=c(2,2))
x<-runif(n)/20;z<-runif(n);
xs<-seq(0,1,length=30)/20;zs<-seq(0,1,length=30)
pr<-data.frame(x=rep(xs,30),z=rep(zs,rep(30,30)))
truth<-matrix(test1(pr$x,pr$z),30,30)
f <- test1(x,z)
y <- f + rnorm(n)*0.2
b1<-gam(y~s(x,z))
persp(xs,zs,truth);title("truth")
vis.gam(b1);title("t.p.r.s")
b2<-gam(y~te(x,z))
vis.gam(b2);title("tensor product")
b3<-gam(y~te(x,z,bs=c("tp","tp")))
vis.gam(b3);title("tensor product")
par(old.par)

test2<-function(u,v,w,sv=0.3,sw=0.4)  
{ ((pi**sv*sw)*(1.2*exp(-(v-0.2)^2/sv^2-(w-0.3)^2/sw^2)+
  0.8*exp(-(v-0.7)^2/sv^2-(w-0.8)^2/sw^2)))*(u-0.5)^2*20
}
n <- 500
v <- runif(n);w<-runif(n);u<-runif(n)
f <- test2(u,v,w)
y <- f + rnorm(n)*0.2
# tensor product of a 2-d thin plate regression spline and 1-d cr spline
b <- gam(y~te(v,w,u,k=c(30,5),d=c(2,1),bs=c("tp","cr")))
op <- par(mfrow=c(2,2))
vis.gam(b,cond=list(u=0),color="heat",zlim=c(-0.2,3.5))
vis.gam(b,cond=list(u=.33),color="heat",zlim=c(-0.2,3.5))
vis.gam(b,cond=list(u=.67),color="heat",zlim=c(-0.2,3.5))
vis.gam(b,cond=list(u=1),color="heat",zlim=c(-0.2,3.5))
par(op)


}

\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..










\eof
\name{tensor.prod.model.matrix}
\alias{tensor.prod.model.matrix}
\alias{tensor.prod.penalties}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Utility functions for constructing tensor product smooths}
\description{
Produce model matrices or penalty matrices for a tensor product smooth from the model matrices or
penalty matrices for the marginal bases of the smooth.
}
\usage{
tensor.prod.model.matrix(X)
tensor.prod.penalties(S)
}
%- maybe also `usage' for other objects documented here.
\arguments{
  \item{X}{a list of model matrices for the marginal bases of a smooth} 
 \item{S}{a list of penalties for the marginal bases of a smooth.}
}
\details{ If \code{X[[1]]}, \code{X[[2]]} ... \code{X[[m]]} are the model matrices of the marginal bases of 
a tensor product smooth then the ith row of the model matrix for the whole tensor product smooth is given by
\code{X[[1]][i,]\%x\%X[[2]][i,]\%x\% ... X[[m]][i,]}, where \code{\%x\%} is the Kronecker product. Of course 
the routine operates column-wise, not row-wise!

If \code{S[[1]]}, \code{S[[2]]} ... \code{S[[m]]} are  the penalty matrices for the marginal bases, and 
\code{I[[1]]}, \code{I[[2]]} ... \code{I[[m]]} are corresponding identity matrices, each of the same 
dimension as its corresponding penalty, then the tensor product smooth has m associate penalties of the form:

\code{S[[1]]\%x\%I[[2]]\%x\% ... I[[m]]}, 

\code{I[[1]]\%x\%S[[2]]\%x\% ... I[[m]]} 

... 

\code{I[[1]]\%x\%I[[2]]\%x\% ... S[[m]]}. 

Of course it's important that the model matrices and penalty matrices are presented in the same order when 
constructing tensor product smooths.

}
\value{ Either a single model matrix for a tensor product smooth, or a list of penalty terms for a tensor
product smooth.   
}
\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}} 

\seealso{  \code{\link{te}}, \code{\link{smooth.construct.tensor.smooth.spec}}  }

\examples{
X <- list(matrix(1:4,2,2),matrix(5:10,2,3))
tensor.prod.model.matrix(X)

S<-list(matrix(c(2,1,1,2),2,2),matrix(c(2,1,0,1,2,1,0,1,2),3,3))
tensor.prod.penalties(S)

}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..





\eof
\name{uniquecombs}
\alias{uniquecombs}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{find the unique rows in a matrix }
\description{
This routine returns a matrix containing all the unique rows of the
matrix supplied as its argument. That is, all the duplicate rows are
stripped out. Note that the ordering of the rows on exit is not the same
as on entry.
}
\usage{
uniquecombs(x)
}
%- maybe also `usage' for other objects documented here.
\arguments{
 \item{x}{ is an \R matrix }
}
\details{ Models with more parameters than unique combinations of
  covariates are not identifiable. This routine provides a means of
  evaluating the number of unique combinations of coavariates in a
  model. The routine calls compiled C code.
   
}
\value{
A matrix consisting of the unique rows of \code{x} (in arbitrary order).
 
}

\author{ Simon N. Wood \email{simon@stats.gla.ac.uk}}


\examples{
X<-matrix(c(1,2,3,1,2,3,4,5,6,1,3,2,4,5,6,1,1,1),6,3,byrow=TRUE)
print(X)
uniquecombs(X)
}
\keyword{models} \keyword{regression}%-- one or more ..



\eof
\name{vis.gam}
\alias{vis.gam}
\alias{persp.gam}
\title{Visualization of GAM objects}
\usage{
vis.gam(x,view=NULL,cond=list(),n.grid=30,too.far=0,col=NA,color="heat",
      contour.col=NULL,se=-1,type="link",plot.type="persp",zlim=NULL,...)
}

\arguments{
  \item{x}{a \code{gam} object, produced by \code{gam()}}

  \item{view}{an array containing the names of the two predictor variables to be displayed on the 
   x and y dimensions of the plot. If omitted the first two suitable variables
   will be used. Names must be names from \code{names(x$model)}.
}

  \item{cond}{a named list of the values to use for the other predictor variables (not in \code{view}). Variables omitted from 
this list will have their values set to their mean for continuous variables,
or first level for factors. Names must correspond to \code{names(x$model)}.
}
  \item{n.grid}{The number of grid nodes in each direction used for calculating the 
   plotted surface.} 

 \item{too.far}{ plot grid nodes that are too far from the points defined by the variables given in \code{view} 
can be excluded from the plot. \code{too.far} determines what is too far. The grid is scaled into the unit 
square along with the \code{view} variables and then grid nodes more than \code{too.far} from the predictor variables 
are excluded.}
\item{col}{The colours for the facets of the plot. If this is \code{NA} then if \code{se}>0 the facets are transparent, 
otherwise the colour scheme specified in \code{color} is used. If \code{col} is not \code{NA} then it is used as the facet 
colour.}
 \item{color}{ the colour scheme to use for plots when \code{se}<=0. One of \code{"topo"}, \code{"heat"}, \code{"cm"} or 
\code{"terrain"}.}
\item{contour.col}{sets the colour of contours when using \code{plot.type="contour"}. Default scheme used if \code{NULL}.}
\item{se}{if less than or equal to zero then only the predicted surface is plotted, but if greater than zero, then 3 
surfaces are plotted, one at the predicted values minus \code{se} standard errors, one at the predicted values and one at
the predicted values plus \code{se} standard errors.}
\item{type}{\code{"link"} to plot on linear predictor scale and \code{"response"} to plot on the response scale.}
\item{plot.type}{one of \code{"contour"} or \code{"persp"}.}
\item{zlim}{a two item array giving the lower and upper limits for the z-axis scale. \code{NULL} to choose automatically.}
\item{...}{other options to pass on to \code{\link{persp}}. In particular \code{ticktype="detailed"} will add proper axes 
labelling to the plots.}
}
\value{Simply produces a plot.}

\description{ Produces perspective or contour plot views of \code{gam} model predictions, fixing all but the values in \code{view} to the 
values supplied in \code{cond}. 
}
\details{ The x and y limits are determined by the ranges of the variables supplied in \code{view}. If \code{se}<=0 then 
a single (height colour coded, by default) surface is produced, otherwise three (by default see-through) meshes are produced at 
mean and +/- \code{se} standard errors. Parts of the x-y plane too far from data can be excluded by setting \code{too.far}}

\author{Simon Wood \email{simon@stats.gla.ac.uk}

 Based on an original idea and design by Mike Lonergan.}

\seealso{
\code{\link{persp}} and \code{\link{gam}}.
}
\examples{
library(mgcv)
set.seed(0)
n<-200;sig2<-4
x0 <- runif(n, 0, 1);x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
y<-x0^2+x1*x2 +runif(n,-0.3,0.3)
g<-gam(y~s(x0,x1,x2))
old.par<-par(mfrow=c(2,2))
# display the prediction surface in x0, x1 ....
vis.gam(g,ticktype="detailed",color="heat",theta=-35)  
vis.gam(g,se=2,theta=-35) # with twice standard error surfaces
vis.gam(g, view=c("x1","x2"),cond=list(x0=0.75)) # different view 
vis.gam(g, view=c("x1","x2"),cond=list(x0=0.75),theta=210,phi=40,too.far=0.07)
# contour examples....
vis.gam(g, view=c("x1","x2"),plot.type="contour",color="heat")
vis.gam(g, view=c("x1","x2"),plot.type="contour",color="terrain")
vis.gam(g, view=c("x1","x2"),plot.type="contour",color="topo")
vis.gam(g, view=c("x1","x2"),plot.type="contour",color="cm")

# ..... areas where there is no data are not plotted
par(old.par)

# Examples with factor and "by" variables

fac<-rep(1:4,20)
x<-runif(80)
y<-fac+2*x^2+rnorm(80)*0.1
fac<-factor(fac)
b<-gam(y~fac+s(x))

vis.gam(b,theta=-35,color="heat") # factor example

z<-rnorm(80)*0.4   
y<-as.numeric(fac)+3*x^2*z+rnorm(80)*0.1
b<-gam(y~fac+s(x,by=z))

vis.gam(b,theta=-35,color="heat",cond=list(z=1)) # by variable example

vis.gam(b,view=c("z","x"),theta= 35) # plot against by variable

}
\keyword{hplot} \keyword{models} \keyword{smooth} \keyword{regression}

\eof
