I am using the rcppglm to modify MASS:polr to enhance the speeds over a normal GLM, but i encountered an error like above in my attempts, here is the full code:
polr <- function (formula, data, weights, start, ..., subset, na.action,
contrasts = NULL, Hess = FALSE, model = TRUE, method = c("logistic",
"probit", "loglog", "cloglog", "cauchit"))
{
m <- match.call(expand.dots = FALSE)
method <- match.arg(method)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data)
m$start <- m$Hess <- m$method <- m$model <- m$... <- NULL
m[[1L]] <- quote(stats::model.frame)
m <- eval.parent(m)
Terms <- attr(m, "terms")
x <- model.matrix(Terms, m, contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0L)
n <- nrow(x)
pc <- ncol(x)
cons <- attr(x, "contrasts")
if (xint > 0L) {
x <- x[, -xint, drop = FALSE]
pc <- pc - 1L
}
else warning("an intercept is needed and assumed")
wt <- model.weights(m)
if (!length(wt))
wt <- rep(1, n)
offset <- model.offset(m)
if (length(offset) <= 1L)
offset <- rep(0, n)
y <- model.response(m)
if (!is.factor(y))
stop("response must be a factor")
lev <- levels(y)
llev <- length(lev)
if (llev <= 2L)
stop("response must have 3 or more levels")
y <- unclass(y)
q <- llev - 1L
if (missing(start)) {
q1 <- llev%/%2L
y1 <- (y > q1)
X <- cbind(Intercept = rep(1, n), x)
fit <- switch(method, logistic = glm.fit(X, y1, wt, family = binomial(),
offset = offset), probit = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), loglog = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), cloglog = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), cauchit = glm.fit(X, y1, wt, family = binomial("cauchit"),
offset = offset))
if (!fit$converged)
stop("attempt to find suitable starting values failed")
coefs <- fit$coefficients
if (any(is.na(coefs))) {
warning("design appears to be rank-deficient, so dropping some coefs")
keep <- names(coefs)[!is.na(coefs)]
coefs <- coefs[keep]
x <- x[, keep[-1L], drop = FALSE]
pc <- ncol(x)
}
logit <- function(p) log(p/(1 - p))
spacing <- logit((1L:q)/(q + 1L))
if (method != "logistic")
spacing <- spacing/1.7
gammas <- -coefs[1L] + spacing - spacing[q1]
start <- c(coefs[-1L], gammas)
}
else if (length(start) != pc + q)
stop("'start' is not of the correct length")
ans <- polr.fit(x, y, wt, start, offset, method, hessian = Hess,
...)
beta <- ans$coefficients
zeta <- ans$zeta
deviance <- ans$deviance
res <- ans$res
niter <- c(f.evals = res$counts[1L], g.evals = res$counts[2L])
eta <- if (pc)
offset + drop(x %*% beta)
else offset + rep(0, n)
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta),
, q)
fitted <- t(apply(cumpr, 1L, function(x) diff(c(0, x, 1))))
dimnames(fitted) <- list(row.names(m), lev)
fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) -
pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt),
call = match.call(), method = method, convergence = res$convergence,
niter = niter, lp = eta)
if (Hess) {
dn <- c(names(beta), names(zeta))
H <- res$hessian
dimnames(H) <- list(dn, dn)
fit$Hessian <- H
}
if (model)
fit$model <- m
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
class(fit) <- "polr"
fit
}
<bytecode: 0x000000002263b7f8>
<environment: namespace:MASS>
polr_cpp <- function(formula, data, weights, start, ..., subset, na.action,
contrasts = NULL, Hess = FALSE, model = TRUE,
method = c("logistic", "probit", "loglog", "cloglog", "cauchit")){
library(Rcpp)
library(RcppGLM)
library(RcppArmadillo)
cppFunction('NumericMatrix mmult(const NumericMatrix& m1, const NumericMatrix& m2){
if (m1.ncol() != m2.nrow()) stop ("Incompatible matrix dimensions");
NumericMatrix out(m1.nrow(),m2.ncol());
NumericVector rm1, cm2;
for (size_t i = 0; i < m1.nrow(); ++i) {
rm1 = m1(i,_);
for (size_t j = 0; j < m2.ncol(); ++j) {
cm2 = m2(_,j);
out(i,j) = std::inner_product(rm1.begin(), rm1.end(), cm2.begin(), 0.);
}
}
return out;
}')
polr.fit.enhanced <- function(x, y, wt, start, offset, method, ...){
fmin <- function(beta){
theta <- beta[pc + ind_q]
gamm <- c(-Inf , cumsum(c(theta[1L], exp(theta[-1L]))), Inf)
eta <- offset
if (pc) eta <- eta + drop(x %*% beta[ind_pc])
pr <- pfun(pmin(100, gamm[y + 1] - eta)) -
pfun(pmax(-100, gamm[y] - eta))
if (all(pr > 0)) -sum(wt * log(pr)) else Inf
}
gmin <- function(beta){
jacobian <- function(theta) { ## dgamma by dtheta matrix
k <- length(theta)
etheta <- exp(theta)
mat <- matrix(0 , k, k)
mat[, 1L] <- rep(1, k)
for (i in 2L:k) mat[i:k, i] <- etheta[i]
mat
}
theta <- beta[pc + ind_q]
gamm <- c(-Inf, cumsum(c(theta[1L], exp(theta[-1L]))), Inf)
eta <- offset
if(pc) eta <- eta + drop(x %*% beta[ind_pc])
z1 <- pmin(100, gamm[y+1L] - eta)
z2 <- pmax(-100, gamm[y] - eta)
pr <- pfun(z1) - pfun(z2)
p1 <- dfun(z1); p2 <- dfun(z2)
g1 <- if(pc) t(x) %*% (wt*(p1 - p2)/pr) else numeric()
xx <- .polrY1*p1 - .polrY2*p2
g2 <- - t(xx) %*% (wt/pr)
g2 <- t(g2) %*% jacobian(theta)
if(all(pr > 0)) c(g1, g2) else rep(NA_real_, pc+q)
}
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
dfun <- switch(method, logistic = dlogis, probit = dnorm,
loglog = dgumbel, cloglog = dGumbel, cauchit = dcauchy)
n <- nrow(x)
pc <- ncol(x)
ind_pc <- seq_len(pc)
lev <- levels(y)
if(length(lev) <= 2L) stop("response must have 3 or more levels")
y <- unclass(y)
q <- length(lev) - 1L
ind_q <- seq_len(q)
Y <- matrix(0, n, q)
.polrY1 <- col(Y) == y; .polrY2 <- col(Y) == (y - 1L)
# pc could be 0.
s0 <- if(pc) c(start[seq_len(pc+1L)], log(diff(start[-seq_len(pc)])))
else c(start[1L], log(diff(start)))
library(optimParallel)
res <- optimParallel(s0, fmin, gmin, method="BFGS", ...)
beta <- res$par[seq_len(pc)]
theta <- res$par[pc + ind_q]
zeta <- cumsum(c(theta[1L], exp(theta[-1L])))
deviance <- 2 * res$value
names(zeta) <- paste(lev[-length(lev)], lev[-1L], sep="|")
if(pc) names(beta) <- colnames(x)
list(coefficients = beta, zeta = zeta, deviance = deviance, res = res)
}
m <- match.call(expand.dots = FALSE)
method <- match.arg(method)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data)
m$start <- m$Hess <- m$method <- m$model <- m$... <- NULL
m[[1L]] <- quote(stats::model.frame)
m <- eval.parent(m)
Terms <- attr(m, "terms")
x <- model.matrix(Terms, m, contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0L)
n <- nrow(x)
pc <- ncol(x)
cons <- attr(x, "contrasts")
if (xint > 0L) {
x <- x[, -xint, drop = FALSE]
pc <- pc - 1L
}
else warning("an intercept is needed and assumed")
wt <- model.weights(m)
if (!length(wt))
wt <- rep(1, n)
offset <- model.offset(m)
if (length(offset) <= 1L)
offset <- rep(0, n)
y <- model.response(m)
if (!is.factor(y))
stop("response must be a factor")
lev <- levels(y)
llev <- length(lev)
if (llev <= 2L)
stop("response must have 3 or more levels")
y <- unclass(y)
q <- llev - 1L
if (missing(start)) {
q1 <- llev%/%2L
y1 <- (y > q1)
X <- cbind(Intercept = rep(1, n), x)
fit <- switch(method, logistic = glm_fit(X, y1, family = binomial()),
probit = glm_fit(X, y1, family = binomial("probit")),
loglog = glm_fit(X, y1, family = binomial("probit")),
cloglog = glm_fit(X, y1, family = binomial("probit")),
cauchit = glm_fit(X, y1, family = binomial("cauchit")))
if (!fit$converged)
stop("attempt to find suitable starting values failed")
coefs <- fit$coefficients
if (any(is.na(coefs))) {
warning("design appears to be rank-deficient, so dropping some coefs")
keep <- names(coefs)[!is.na(coefs)]
coefs <- coefs[keep]
x <- x[, keep[-1L], drop = FALSE]
pc <- ncol(x)
}
logit <- function(p) log(p/(1 - p))
spacing <- logit((1L:q)/(q + 1L))
if (method != "logistic")
spacing <- spacing/1.7
gammas <- -coefs[1L] + spacing - spacing[q1]
start <- c(coefs[-1L], gammas)
}
else if (length(start) != pc + q)
stop("'start' is not of the correct length")
ans <- polr.fit.enhanced(x, y, wt, start, offset, method, hessian = Hess, ...)
beta <- ans$coefficients
zeta <- ans$zeta
deviance <- ans$deviance
res <- ans$res
niter <- c(f.evals = res$counts[1L], g.evals = res$counts[2L])
eta <- if (pc){
beta <- matrix(beta, nrow = length(beta), ncol = 1)
offset + mmult(x, beta)
offset <- as.numeric(offset)
names(offset) <- 1:length(offset)
}
else offset + rep(0, n)
return(eta)
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta), , q)
fitted <- t(apply(cumpr, 1L, function(x) diff(c(0, x, 1))))
dimnames(fitted) <- list(row.names(m), lev)
fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) -
pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt),
call = match.call(), method = method, convergence = res$convergence,
niter = niter, lp = eta)
if (Hess) {
dn <- c(names(beta), names(zeta))
H <- res$hessian
dimnames(H) <- list(dn, dn)
fit$Hessian <- H
}
if (model)
fit$model <- m
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
class(fit) <- "polr"
fit
}