% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/choose_b.R
\name{choose_b}
\alias{choose_b}
\title{Block length diagnostic for the semiparametric maxima estimator}
\usage{
choose_b(
  data,
  b,
  bias_adjust = c("BB3", "BB1", "N", "none"),
  constrain = TRUE,
  varN = TRUE,
  level = 0.95,
  interval_type = c("norm", "lik"),
  conf_scale = c("theta", "log"),
  type = c("vertical", "cholesky", "spectral", "none")
)
}
\arguments{
\item{data}{A numeric vector of raw data.  No missing values are allowed.}

\item{b}{A numeric scalar.  The block size.}

\item{bias_adjust}{A character scalar.  Is bias-adjustment of the
raw estimate of \eqn{\theta} performed using the bias-reduced
estimator (\code{bias_adjust = "BB3"}), derived in Section 5 of
Berghaus and Bucher (2018); or a simpler version
(\code{bias_adjust = "BB1"}), in which the raw estimate is multiplied by
\eqn{(k-1) / k}, where \eqn{k} is the number of blocks; or the
bias-adjustment of the empirical distribution function used to calculate
the estimate, as detailed in Section 2 of Northrop (2015).  When disjoint
maxima are used \code{bias_adjust = "BB1"} and \code{bias_adjust = "N"}
give identical estimates of the Berghaus and Bucher (2018) variant,
as explained at the end of Section 5 of Berghaus and Bucher (2018).
If \code{bias_adjust = "none"} then no bias-adjustment is performed.}

\item{constrain}{A logical scalar.  If \code{constrain = TRUE} then
any estimates that are greater than 1 are set to 1,
that is, they are constrained to lie in (0, 1].  This is carried out
\emph{after} any bias-adjustment.  Otherwise,
estimates that are greater than 1 may be obtained.}

\item{varN}{A logical scalar.  If \code{varN = TRUE} then the estimation
of the sampling variance of the Northrop (2015) estimator is tailored
to that estimator.  Otherwise, the sampling variance derived in
Berghaus and Bucher (2018) is used.
See \strong{Details} for further information.}

\item{level}{A numeric scalar in (0, 1). The confidence level required.}

\item{interval_type}{A character scalar: \code{"norm"} for intervals of
type (a), \code{"lik"} for intervals of type (b).}

\item{conf_scale}{A character scalar.  If \code{interval_type = "norm"} then
  \code{conf_scale} determines the scale on which we use approximate
  large-sample normality of the estimators to estimate confidence intervals
  of type (a).

  If \code{conf_scale = "theta"}
  then confidence intervals are estimated for \eqn{\theta} directly.
  If \code{conf_scale = "log"} then confidence intervals are first
  estimated for \eqn{\log\theta}{log\theta} and then transformed back
  to the \eqn{\theta}-scale.

  Any bias-adjustment requested in the original call to \code{\link{spm}},
  using it's \code{bias_adjust} argument, is automatically applied here.}

\item{type}{A character scalar.  The argument \code{type} to be passed to
\code{\link[chandwich]{conf_intervals}} in the
\code{\link[chandwich]{chandwich}} package in order to estimate the
likelihood-based intervals.
Using \code{type = "none"} is \emph{not} advised because then the
intervals are based on naive estimated standard errors.  In particular,
if (the default) \code{sliding = TRUE} was used in the call to
\code{\link{spm}} then the unadjusted likelihood-based confidence
intervals provide \emph{vast} underestimates of uncertainty.}
}
\value{
An object of class \code{c("choose_b", "exdex")} containing
  \item{theta_sl,theta_dj }{numeric \code{b} by 3 matrices of estimates of
  \eqn{\theta} using sliding and disjoint blocks.  Columns 1-3 relate to the
   estimators \code{N2015}, \code{BB2018} and \code{BB2018b.}}
  \item{lower_sl,lower_dj }{Similarly for the lower limits of the confidence
    intervals.}
  \item{upper_sl,upper_dj }{Similarly for the upper limits of the confidence
    intervals.}
  \item{b }{the input \code{b}}
  \item{call }{the call to \code{choose_b}.}
}
\description{
Creates data for a plot to aid the choice of the block length \code{b} to
supply to \code{\link{spm}}.  The general idea is to select the smallest
value of \code{b} above which estimates of the extremal index \eqn{\theta}
appear to be constant with respect to \code{b}, taking into account sampling
variability.  \code{\link{plot.choose_b}} creates the plot.
}
\details{
For each block size in \code{b} the extremal index \eqn{\theta}
  is estimated using \code{\link{spm}}.  The estimates of \eqn{\theta}
  approximate \code{conf}\% confidence intervals for \eqn{\theta} are
  stored for plotting (by \code{\link{plot.choose_b}})
  to produce a simple graphical diagnostic to inform the choice of
  block size.  This plot is used to choose a block size above which the
  underlying value of \eqn{\theta} may be approximately constant.
  This is akin to a threshold stability plot: see Chapter 4 of Coles (2001),
  for example.

  The nature of the calculation of the sampling variances of the estimates
  of \eqn{\theta} (see \code{\link{spm}} for details) means that
  \code{choose_b} may be a little slow to run if \code{b} contains many
  values, particularly if some of them are small.

  For very small block sizes it may not be possible to estimate the
  confidence intervals.  See \strong{Details} in \code{\link{spm}}.
  For any such block sizes the intervals will be missing from the plot.
}
\examples{
\donttest{
# Newlyn seas surges
# Plot like the top left of Northrop (2015)
# Remove the last 14 values because 2880 has lots of factors
b_vals <- c(2,3,4,5,6,8,9,10,12,15,16,18,20,24,30,32,36,40,45,48,54,60)
res <- choose_b(newlyn[1:2880], b_vals)
# Some b are too small for the sampling variance of the sliding blocks
# estimator to be estimated
plot(res)
plot(res, estimator = "BB2018")
plot(res, maxima = "disjoint")

# S&P 500 index: similar to Berghaus and Bucher (2018), Fig 4 top left
b_vals <- c(10, seq(from = 25, to = 350, by = 25), 357)
res500 <- choose_b(sp500, b_vals)
plot(res500, ylim = c(0, 1))
plot(res500, estimator = "BB2018", ylim = c(0, 1))
}
}
\references{
Coles, S. G. (2001) \emph{An Introduction to Statistical
  Modeling of Extreme Values}, Springer-Verlag, London.
  \doi{10.1007/978-1-4471-3675-0_3}

Northrop, P. J. (2015) An efficient semiparametric maxima
  estimator of the extremal index. \emph{Extremes} \strong{18}(4), 585-603.
  \doi{10.1007/s10687-015-0221-5}

Berghaus, B., Bucher, A. (2018) Weak convergence of a pseudo
  maximum likelihood estimator for the extremal index. \emph{Ann. Statist.}
  \strong{46}(5), 2307-2335. \doi{10.1214/17-AOS1621}
}
\seealso{
\code{\link{plot.choose_b}} to produce the block length diagnostic
  plot.
}
