I've come up with a temporary and rather sloppy solution for use with my own data. But I comment below that there's an issue I haven't been able to deal with. My code should hopefully give you some ideas.
# Define four measures:
# 1. prc_micro: Precision-Recall Area Under the Curve with aggregation method set to 'micro'.
# 2. prc_macro: Precision-Recall Area Under the Curve with aggregation method set to 'macro'.
# 3. auc_micro: ROC Area Under the Curve with aggregation method set to 'micro'.
# 4. auc_macro: ROC Area Under the Curve with aggregation method set to 'macro'.
# 1. prc_micro
prc_micro <- msr('classif.auc')$clone(deep = TRUE)
prc_micro # Take a look- need to change a few things (id etc.)
prc_micro$id <- 'prc_micro'
prc_micro$average <- 'micro' # Aggregation method
prc_micro$packages <- 'PRROC'
prc_micro$man <- NA_character_
prc_micro # Take another look
# 2. prc_macro
prc_macro <- prc_micro$clone()
prc_macro$id <- 'prc_macro'
prc_macro$average <- 'macro'
# 3. auc_micro
auc_micro <- msr('classif.auc')$clone()
auc_micro$id <- 'auc_micro'
auc_micro$average <- 'micro'
# 4. auc_macro
auc_macro <- msr('classif.auc')$clone()
auc_macro$id <- 'auc_macro'
# Create dataset for binary classification
iris1 <- iris %>%
slice(1:100) %>%
mutate(Species = factor(Species)) %>%
as.data.table
task_iris <- TaskClassif$new("iris1", iris1,
target = "Species", positive = "setosa")
# Hard-code task inside prc_mirco$fun where PR AUC is calculated. See comments later on about why I've hard-coded this here
prc_micro$fun <- function(task = task_iris, prob, truth, na_value = NaN, ...) # NOTE: Hard-coded task to be task_iris. Commented on later.
{
truth1 <- ifelse(truth == task$positive, 1, 0) # Package PRROC assumes class '1' is the positive class.I've set 'setosa' as the positive class, so it needs to be set to '1' now
PRROC::pr.curve(prob, weights.class0 = truth1)[[2]] # Area under the curve computed by integration of the piecewise function
}
# Define learner, parameters etc. and auto-tune
learner <- lrn("classif.xgboost", predict_type = "prob")
resampling_inner <- rsmp("cv", folds = 3)
measures <- list(prc_micro, prc_macro, auc_micro, auc_macro)
tuner = tnr("grid_search", resolution = 4)
terminator <- term("evals", n_evals = 5)
param_set <- ParamSet$new(list(
ParamFct$new("booster", levels = "gbtree"),
ParamInt$new("nrounds", lower = 1, upper = 10),
ParamInt$new("max_depth", lower = 3, upper = 10),
ParamInt$new("min_child_weight", lower = 0, upper = 10),
ParamDbl$new("subsample", lower = 0, upper = 1),
ParamDbl$new("eta", lower = 0.1, upper = 0.6),
ParamDbl$new("colsample_bytree", lower = 0.5, upper = 1),
ParamInt$new("gamma", lower = 0, upper = 5) # Is it integer or real?
))
at = AutoTuner$new(
learner,
resampling_inner,
measures,
param_set,
terminator,
tuner)
resampling_outer = rsmp("cv", folds = 2)
rr = resample(task = task_iris, learner = at,
resampling = resampling_outer, store_models = TRUE)
rr$aggregate(measures)
# prc_micro prc_macro auc_micro auc_macro
# 0.8835709 0.7500000 0.8758000 0.7500000
# Derived micro metric for PR AUC is the same with the one from PROC::pr.curve
pred <- as.data.table(rr$prediction())
pred$truth <- ifelse(pred$truth == 'setosa', 1, 0) # Package PRROC assumes class '1' is the positive class. I've set 'setosa' as the positive class, so it needs to be set to '1' now.
pr.curve(pred$prob.setosa, weights.class0 = pred$truth, curve = TRUE)[[2]]
# [1] 0.8835709
I hope this helps.