bcw_analysis.knit

library(here)
library(tibble)
library(ranger)
library(ggplot2)

source(here("v2","src","evidencegeom.R"))
source(here("v2","src","bcw_train_test_split.R"))

set.seed(42)

Dataset 1: BCW

TL;DR

This notebook demonstrates the Evidence Geometry framework on the Breast Cancer Wisconsin dataset.

Instead of relying only on classifier probability, the framework analyzes each case by collecting log likelihood ratios between classes by selecting marginal likelihood distribution families for each feature based on feature type and magnitude of log likelihood.

This log-likelihood ratio space (evidence space) is then analyzed using two geometric risk signals:

Distance Contrast (d_dist): measures which class manifold better explains the case
Drift Projection (proj): measures directional drift toward positive-class structure

Validation Set Risk Map

Random Forest Probability scores vs sum of LLRs closely follow the Logistic regression curve between true class and sum of LLRs, suggesting that sum of LLRs has discriminatory power comparable to Random Forest

Validation-set risk map.

Validation-set risk map in the low-RF probability region (p < 0.45).

Main Observation

Within the subset of cases assigned low probability by the Random Forest (RF) classifier, the two geometric signals reveal additional structure:

Sum of LLRs produces decision boundary similar to Random Forest
Boundary of ambiguity, d_dist = 0, occurs much earlier in the feature space than p = 0.5 classifier ambiguous region.
Dominant benign cluster is present in the region d_dist < 0, proj < 0.
Pathological cases are distinctly present among benign points that cross the benign central region of d_dist < 0 and proj < 0.

This suggests that geometric evidence can detect hidden risk structure earlier than classifier probability alone.

Test Set Result

With a conservative triage policy on the test set, the risk signals help achieve:

Identification of true ambiguity in data
Abstention of classification of ambiguous cases
99.5% false negative capture rate through abstention
Maintain good automated classification rate at 95.6%

Takeaway

d_dist and proj provide a compact and interpretable view of how risk accumulates geometrically, especially in cases where standard discriminative classifiers assign deceptively low probability.

Full Analysis

Comment Block : Data prep from BCW raw data

# bcw_import <- read.delim(file=here("src","data","bcw","wdbc.data"), sep=",", header=FALSE)
# names(bcw_import) <- c("id","diagnosis",
#                          "mean_radius","mean_texture","mean_perimeter","mean_area","mean_smoothness","mean_compactness","mean_concavity",
#                          "mean_concave_points","mean_symmetry", "mean_fractal_dimension",
#                          "se_radius","se_texture","se_perimeter","se_area","se_smoothness","se_compactness","se_concavity","se_concave_points",
#                          "se_symmetry", "se_fractal_dimension",
#                          "worst_radius","worst_texture","worst_perimeter","worst_area","worst_smoothness","worst_compactness","worst_concavity",
#                          "worst_concave_points","worst_symmetry", "worst_fractal_dimension")
# bcw_import$id <- as.factor(bcw_import$id)
# bcw_import$diagnosis <- as.factor(bcw_import$diagnosis)

# bcw_split_1 <- enriched_split(
#    df = bcw_import,
#    rare_filter = quote(worst_perimeter >= 50 & worst_perimeter <= 105),
#    dev_frac = 0.65,
#    seed = 7
# )
# 
# bcw_split_2 <- enriched_split(
#    df = bcw_split_1$dev,
#    rare_filter = quote(worst_perimeter >= 50 & worst_perimeter <= 105),
#    dev_frac = 0.65,
#    seed = 7
# )
# 
# bcw_train <- bcw_split_2$dev
# bcw_val  <- bcw_split_2$test
# bcw_test <- bcw_split_1$test

# write.csv(bcw_train, here("v2","src","data","bcw","bcw_train.csv"))
# write.csv(bcw_val, here("v2","src","data","bcw","bcw_val.csv"))
# write.csv(bcw_test, here("v2","src","data","bcw","bcw_test.csv"))

Load Train-Val-Test splits

Splits performed ensuring a proportion of known failure modes are present in all three splits

bcw_train <- read.csv(here("v2","src","data","bcw","bcw_train.csv"))
bcw_train <- bcw_train[,-1]
bcw_train$id <- as.factor(bcw_train$id)
bcw_train$diagnosis <- as.factor(bcw_train$diagnosis)

bcw_val <- read.csv(here("v2","src","data","bcw","bcw_val.csv"))
bcw_val <- bcw_val[,-1]
bcw_val$id <- as.factor(bcw_val$id)
bcw_val$diagnosis <- as.factor(bcw_val$diagnosis)

bcw_test <- read.csv(here("v2","src","data","bcw","bcw_test.csv"))
bcw_test <- bcw_test[,-1]
bcw_test$id <- as.factor(bcw_test$id)
bcw_test$diagnosis <- as.factor(bcw_test$diagnosis)

Class overlap and hidden failures

ggplot(bcw_train%>%filter(worst_perimeter >= 50 & worst_perimeter <= 150)) +
  geom_point(aes(x=worst_perimeter, y=worst_area, color=diagnosis), alpha=0.8) +
  labs(title="Class Overlap in the BCW dataset") +
  theme_minimal()

nrow(bcw_train%>%filter(worst_perimeter >= 50 & worst_perimeter <= 105))

## [1] 141

Class Ratio to input for generating Evidence Space

alpha <- (nrow(bcw_train[bcw_train$diagnosis=="M", ])/nrow(bcw_train))

Evidence Space Transformation

risk_spec : Object to store function argument values for evidence space generation

fit : Learn feature-wise marginal likelihoods, and geometries and eigenmodes of class manifolds in evidence space

loglik_matrices : Compute marginal positive-class and negative-class evidences, and marginal relative evidence for input data using fitted evidence generator

score_risk : Compute total feature-wise evidence, distance constrast, drift projection, and eigenmode energies

Generate Evidence Space

bcw_spec <- risk_spec(
   y_col="diagnosis", positive="M",
   features = setdiff(names(bcw_train), c("diagnosis", "id")),
   alpha = alpha,
   laplace = 1, ridge = 1e-6, winsor_p=0.01,
   weights=FALSE,
   weight_method = "mutual_info",
   numeric_candidates = c("gaussian", "lognormal", "gamma"),
   numeric_val_frac = 0.2,
   numeric_min_n = 25,
   llr_cap_quantile = 0.01,
   mi_nbins = 10
 )

bcw_obj <- fit(bcw_train%>%select(-id), bcw_spec, k_eigen=2, k_energy=2, energy_ref="both")

head(print_feature_likelihoods(bcw_obj), 5)

##          feature    class feature_type likelihood_family
## 1    mean_radius positive      numeric             gamma
## 2    mean_radius negative      numeric             gamma
## 3   mean_texture positive      numeric         lognormal
## 4   mean_texture negative      numeric         lognormal
## 5 mean_perimeter positive      numeric         lognormal
##                     parameters
## 1   shape=31.2318, rate=1.7828
## 2   shape=61.2763, rate=5.0253
## 3 meanlog=3.0542, sdlog=0.1419
## 4 meanlog=2.8619, sdlog=0.2097
## 5 meanlog=4.7341, sdlog=0.1847

bcw_Lval <- loglik_matrices(bcw_val%>%select(-id), bcw_obj$fit, alpha=bcw_spec$alpha)
# val/test scoring
bcw_scores_val <- bind_cols(id=bcw_val$id, 
                            diagnosis=bcw_val$diagnosis, 
                            score_risk(bcw_Lval$l_pos, bcw_Lval$l_neg,  bcw_obj$fit$weights, bcw_obj$geom, 
                                       bcw_spec$alpha, bcw_spec$eps))

Weighted evidence matrix using optional weights. Default weights (when selected) : KL separation between positive and negative classes per evidence

bcw_Lval_w <- apply_llr_weights(bcw_Lval$L, bcw_obj$fit$weights)

Learn baseline Random Forest and predict

bcw_rf_train_1 <- ranger(formula = diagnosis ~ ., data=bcw_train%>%select(-id),
                         mtry=8, min.node.size=5, num.trees=500, probability = TRUE,
                         keep.inbag = TRUE)

bcw_scores_val <- bind_cols(bcw_scores_val,
                            pM = predict(bcw_rf_train_1, bcw_val%>%select(-id, -diagnosis))$predictions[,"M"])

Feature Importance using ratio of feature-wise evidences

feature_importance(df=bcw_train%>%select(-id), y_col="diagnosis", fit = bcw_obj$fit, method = "mutual_info", top_n = 15)

##                 feature     weight abs_weight
## 1       worst_perimeter 0.06802118 0.06802118
## 2            worst_area 0.06720117 0.06720117
## 3          worst_radius 0.06703131 0.06703131
## 4   mean_concave_points 0.06541001 0.06541001
## 5  worst_concave_points 0.06488235 0.06488235
## 6             mean_area 0.06011327 0.06011327
## 7           mean_radius 0.05935002 0.05935002
## 8        mean_perimeter 0.05923074 0.05923074
## 9        mean_concavity 0.05640648 0.05640648
## 10      worst_concavity 0.05002912 0.05002912
## 11              se_area 0.04956444 0.04956444
## 12            se_radius 0.03496687 0.03496687
## 13    worst_compactness 0.03488927 0.03488927
## 14     mean_compactness 0.03328922 0.03328922
## 15         se_perimeter 0.03231747 0.03231747

Sample from evidence output

l_pos : Sum of marginal positive evidence
l_neg : Sum of marginal negative evidence
l : Sum of marginal evidence (difference of positive and negative evidence)
proj : Projection of benign-relative z scores of evidences on mean deviation class separation direction learned from training
d_dist : Difference of Mahalanobis Distances of evidence vector from negative-class evidence manifold and positive-class evidence manifold
E_pos : Total energy (sum of squares of projections over k principal components of positive-class) of evidence vector
pM : Random Forest probability score for positive-class

head(bcw_scores_val, 5)

##       id diagnosis      l_pos      l_neg          l          t         proj
## 1 913512         B  -7.181215  25.174531 -32.355746  17.993315 -0.014474594
## 2 863031         B  -1.687254  19.441711 -21.128966  17.754457  0.002029465
## 3 855138         M   7.536457  -2.010215   9.546673   5.526242  0.241179508
## 4 883270         B  -8.495305   2.749076 -11.244380  -5.746229  0.166289857
## 5 921362         B -62.346930 -31.269338 -31.077592 -93.616268 -0.068812180
##      d_dist    l_norm      E_pos       E_neg          dE     eig_1     eig_2
## 1 -4.640003  8.647529  0.3514493   0.9006716  -0.5492223  7.842260 0.5472624
## 2 -2.529992  8.003247  5.5361752   6.5353216  -0.9991463  6.762143 2.8967998
## 3  6.814686 10.884689 78.3435176 152.1444580 -73.8009404 -1.541281 5.3848165
## 4 -2.850673  5.250365 26.9890565  35.6939991  -8.7049426  1.924869 0.2263808
## 5 -8.426509 13.472078 24.8759126  17.9959150   6.8799976  9.493974 5.1267741
##           pM
## 1 0.00740000
## 2 0.03666667
## 3 0.68913333
## 4 0.29660000
## 5 0.10830000

ggplot(bcw_scores_val) +
  geom_histogram(aes(x=l, fill=diagnosis), alpha=0.6) +
  xlab("Sum of relative marginal evidence from all features") +
  theme_minimal()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(bcw_scores_val) +
  geom_point(aes(x=l, y=pM, color=diagnosis), alpha=0.7) +
  stat_smooth(aes(x=l, y=as.numeric(diagnosis)-1), method = "glm", # Specify Generalized Linear Model
              method.args = list(family = "binomial"), # Specify logistic regression
              se = TRUE) +
  ylab("Random forest malignancy probability (pM)") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

ggplot(bcw_scores_val) +
  geom_point(aes(x=qlogis(pM), y=l, color=diagnosis), alpha=0.7) +
  xlab("log-odds of random forest malignancy probability (pM)") +
  theme_minimal()

ggplot(bcw_scores_val) +
  geom_point(aes(x=qlogis(pM), y=d_dist, color=diagnosis), alpha=0.7) +
  xlab("log-odds of random forest malignancy probability (pM)") +
  theme_minimal()

ggplot(bcw_scores_val) +
  geom_point(aes(x=qlogis(pM), y=proj, color=diagnosis), alpha=0.7) +
  xlab("log-odds of random forest malignancy probability (pM)") +
  theme_minimal()

ggplot(bcw_scores_val) +
  geom_point(aes(x=d_dist, y=proj, color=diagnosis), alpha=0.7) +
  theme_minimal()

ggplot(bcw_scores_val%>%filter(d_dist < 20)) +
  geom_point(aes(x=d_dist, y=proj, color=diagnosis), alpha=0.7) +
  theme_minimal()

patchwork::wrap_plots(
ggplot(bcw_scores_val) +
  geom_point(aes(x=d_dist, y=E_pos, color=diagnosis), alpha=0.7) +
  theme_minimal(),

ggplot(bcw_scores_val) +
  geom_point(aes(x=proj, y=E_pos, color=diagnosis), alpha=0.7) +
  theme_minimal()
) + patchwork::plot_layout(guides="collect")

patchwork::wrap_plots(

ggplot(bcw_scores_val%>%filter(d_dist < 10, E_pos < 400)) +
  geom_point(aes(x=d_dist, y=E_pos, color=diagnosis), alpha=0.7) +
  theme_minimal(),

ggplot(bcw_scores_val%>%filter(d_dist < 10, E_pos < 400)) +
  geom_point(aes(x=proj, y=E_pos, color=diagnosis), alpha=0.7) +
  theme_minimal()
) + patchwork::plot_layout(guides="collect")

bcw_val_L <- bind_cols(diagnosis=bcw_val$diagnosis,
           bcw_Lval$L)

Case-wise Feature Importance on validation set using unweighted relative evidence

Malignant cases

#apply(bcw_val_L, 1, function (x) {max(abs(x)) / sum(abs(x))})
bcw_val_M_dom_l <- apply(bcw_val_L%>%filter(diagnosis=="M")%>%select(-diagnosis), 1,
               function (x) {list(dom_l = sum(head(sort(abs(x), decreasing=TRUE),3)) / sum(abs(x)),
                                  dom_l_names = names(head(sort(abs(x), decreasing=TRUE),3)))})

bcw_val_M_dom_l <- bcw_val_M_dom_l[order(sapply(bcw_val_M_dom_l, function(x) x$dom_l), decreasing=TRUE)]

head(bcw_val_M_dom_l, 3)

## [[1]]
## [[1]]$dom_l
## [1] 0.4986962
## 
## [[1]]$dom_l_names
## [1] "worst_radius"    "worst_perimeter" "worst_area"     
## 
## 
## [[2]]
## [[2]]$dom_l
## [1] 0.4476173
## 
## [[2]]$dom_l_names
## [1] "worst_concavity"         "mean_concavity"         
## [3] "worst_fractal_dimension"
## 
## 
## [[3]]
## [[3]]$dom_l
## [1] 0.4400803
## 
## [[3]]$dom_l_names
## [1] "worst_radius"    "worst_perimeter" "worst_area"

Benign cases

bcw_val_B_dom_l <- sort(apply(bcw_val_L%>%filter(diagnosis=="B")%>%select(-diagnosis), 1,
               function (x) {sum(head(sort(abs(x), decreasing=TRUE),3)) / sum(abs(x))}), decreasing=TRUE)

head(bcw_val_B_dom_l, 3)

## [1] 0.5925254 0.4787052 0.4625822

tail(bcw_val_B_dom_l, 3)

## [1] 0.2135967 0.2131890 0.2097654

Eigenmode Analysis (PCA) on validation set

Eigenmodes of malignant subset

bcw_Lval_w_M <- bcw_Lval_w[bcw_val$diagnosis == "M", , drop = FALSE]
bcw_Lval_w_Sigma_M <- cov(bcw_Lval_w_M)
bcw_Lval_w_Sigma_M <- bcw_Lval_w_Sigma_M + diag(1e-6, ncol(bcw_Lval_w_Sigma_M))
bcw_Lval_w_eig_M <- eigen(bcw_Lval_w_Sigma_M, symmetric = TRUE)

bcw_Lval_w_eigvals_M  <- bcw_Lval_w_eig_M$values
bcw_Lval_w_eigvecs_M  <- bcw_Lval_w_eig_M$vectors

bcw_Lval_w_coords_M <- bcw_Lval_w_M %*% bcw_Lval_w_eigvecs_M[, 1:2]

Variance Explained

bcw_Lval_w_eigvals_M[1:2] / sum(bcw_Lval_w_eigvals_M[1:2])

## [1] 0.8062571 0.1937429

Top 5 feature loadings

decompose_eigenmode(bcw_Lval_w_eigvecs_M, k=1, feature_names = bcw_obj$fit$features, top_n=5)

## # A tibble: 5 × 3
##   feature             loading abs_loading
##   <chr>                 <dbl>       <dbl>
## 1 mean_concave_points  -0.482       0.482
## 2 worst_radius         -0.454       0.454
## 3 mean_concavity       -0.382       0.382
## 4 worst_perimeter      -0.314       0.314
## 5 worst_area           -0.227       0.227

decompose_eigenmode(bcw_Lval_w_eigvecs_M, k=2, feature_names = bcw_obj$fit$features, top_n=5)

## # A tibble: 5 × 3
##   feature              loading abs_loading
##   <chr>                  <dbl>       <dbl>
## 1 mean_concavity         0.408       0.408
## 2 worst_concavity        0.371       0.371
## 3 mean_compactness       0.362       0.362
## 4 worst_radius          -0.356       0.356
## 5 worst_concave_points   0.230       0.230

Eigenmode Plot

plot(bcw_Lval_w_coords_M[,1], bcw_Lval_w_coords_M[,2])

Eigenmodes of Benign subset

bcw_Lval_w_B <- bcw_Lval_w[bcw_val$diagnosis == "B", , drop = FALSE]
bcw_Lval_w_Sigma_B <- cov(bcw_Lval_w_B)
bcw_Lval_w_Sigma_B <- bcw_Lval_w_Sigma_B + diag(1e-6, ncol(bcw_Lval_w_Sigma_B))
bcw_Lval_w_eig_B <- eigen(bcw_Lval_w_Sigma_B, symmetric = TRUE)

bcw_Lval_w_eigvals_B  <- bcw_Lval_w_eig_B$values
bcw_Lval_w_eigvecs_B  <- bcw_Lval_w_eig_B$vectors

bcw_Lval_w_coords_B <- bcw_Lval_w_B %*% bcw_Lval_w_eigvecs_B[, 1:2]

Variance Explained

bcw_Lval_w_eigvals_B[1:2] / sum(bcw_Lval_w_eigvals_B[1:2])

## [1] 0.7269915 0.2730085

Top 5 feature loadings

decompose_eigenmode(bcw_Lval_w_eigvecs_B, k=1, feature_names = bcw_obj$fit$features, top_n=5)

## # A tibble: 5 × 3
##   feature              loading abs_loading
##   <chr>                  <dbl>       <dbl>
## 1 mean_concavity         0.686       0.686
## 2 mean_compactness       0.355       0.355
## 3 worst_concavity        0.319       0.319
## 4 worst_concave_points   0.264       0.264
## 5 mean_concave_points    0.255       0.255

decompose_eigenmode(bcw_Lval_w_eigvecs_B, k=2, feature_names = bcw_obj$fit$features, top_n=5)

## # A tibble: 5 × 3
##   feature         loading abs_loading
##   <chr>             <dbl>       <dbl>
## 1 worst_area        0.420       0.420
## 2 mean_perimeter    0.400       0.400
## 3 mean_area         0.391       0.391
## 4 worst_perimeter   0.327       0.327
## 5 mean_radius       0.315       0.315

Eigenmode Plot

plot(bcw_Lval_w_coords_B[,1], bcw_Lval_w_coords_B[,2])

Cosine Similarity of Eigenmode 1 Malignant and Eigenmode 1 Benign

acos((t(bcw_Lval_w_eigvecs_B[,1]) %*% bcw_Lval_w_eigvecs_M[,1]) / (sqrt(t(bcw_Lval_w_eigvecs_B[,1]) %*% bcw_Lval_w_eigvecs_B[,1]) * sqrt(t(bcw_Lval_w_eigvecs_M[,1]) %*% bcw_Lval_w_eigvecs_M[,1]))) * 180 / pi

##          [,1]
## [1,] 131.5815

Fitting Evidence Model and Random Forest on train+val set

bcw_obj_2 <- fit(bind_rows(bcw_train%>%select(-id),
                           bcw_val%>%select(-id)), bcw_spec, k_eigen=2, k_energy=2, energy_ref="both")

bcw_Lval_test <- loglik_matrices(bcw_test%>%select(-id), bcw_obj_2$fit, alpha=bcw_spec$alpha)
# val/test scoring
bcw_scores_test <- bind_cols(id=bcw_test$id, 
                            diagnosis=bcw_test$diagnosis, 
                            
                            score_risk(bcw_Lval_test$l_pos, bcw_Lval_test$l_neg,  bcw_obj_2$fit$weights, bcw_obj_2$geom, 
                                       bcw_spec$alpha, bcw_spec$eps))

head(print_feature_likelihoods(bcw_obj_2), 5)

##          feature    class feature_type likelihood_family
## 1    mean_radius positive      numeric             gamma
## 2    mean_radius negative      numeric             gamma
## 3   mean_texture positive      numeric         lognormal
## 4   mean_texture negative      numeric         lognormal
## 5 mean_perimeter positive      numeric          gaussian
##                     parameters
## 1   shape=31.6265, rate=1.8164
## 2   shape=54.4581, rate=4.4767
## 3 meanlog=3.0583, sdlog=0.1598
## 4 meanlog=2.8716, sdlog=0.2073
## 5    mean=114.8236, sd=21.1460

feature_importance(df=bind_rows(bcw_train%>%select(-id),
                           bcw_val%>%select(-id)), y_col="diagnosis", fit = bcw_obj_2$fit, method = "mutual_info", top_n = 15)

##                 feature     weight abs_weight
## 1          worst_radius 0.07318007 0.07318007
## 2            worst_area 0.07300859 0.07300859
## 3       worst_perimeter 0.07184495 0.07184495
## 4  worst_concave_points 0.06896854 0.06896854
## 5   mean_concave_points 0.06589216 0.06589216
## 6        mean_perimeter 0.06134988 0.06134988
## 7           mean_radius 0.05846728 0.05846728
## 8             mean_area 0.05846728 0.05846728
## 9        mean_concavity 0.05498769 0.05498769
## 10              se_area 0.05403280 0.05403280
## 11      worst_concavity 0.04891966 0.04891966
## 12            se_radius 0.03747952 0.03747952
## 13    worst_compactness 0.03286711 0.03286711
## 14         se_perimeter 0.03233489 0.03233489
## 15     mean_compactness 0.02980909 0.02980909

bcw_rf_train_2 <- ranger(formula = diagnosis ~ ., data=bind_rows(bcw_train%>%select(-id), bcw_val%>%select(-id)),
                         mtry=8, min.node.size=5, num.trees=500, probability = TRUE,
                         keep.inbag = TRUE)

Test Set Performace

bcw_scores_test <- bind_cols(bcw_scores_test,
                            pM = predict(bcw_rf_train_2, bcw_test%>%select(-id))$predictions[,"M"])

Triage Rule

If \(p \geq 0.65\) \(\rightarrow\) Malignant
Else If \(p > 0.45\) \(\rightarrow\) Review
Else
If \(proj > 0\) & \(d\_dist \geq 0\) \(\rightarrow\) Review
Else \(\rightarrow\) Benign

apply_bcw_triage <- function(df, p_hi = 0.65, p_review = 0.45, tau_d = 0, tau_p = 0) {
  df %>%
    mutate(
      action = case_when(
        pM >= p_hi ~ "Malignant",
        pM > p_review ~ "Review",
        (pM <= p_review) & (proj > tau_p) & (d_dist >= tau_d) ~ "Review",
        # (pM <= p_review) & (d_dist >= tau_d) ~ "Review",
        TRUE ~ "Benign"
      )
    )
}

triage_table <- function(df, truth_col = "diagnosis", action_col = "action") {
  tab <- table(df[[action_col]], df[[truth_col]])
  as.data.frame.matrix(tab) %>%
    tibble::rownames_to_column(action_col)
}

Triage Results

bcw_test_triaged <- bcw_scores_test %>%
  apply_bcw_triage(p_hi = 0.65, p_review = 0.45, tau_d = 0, tau_p = 0)

triage_table(bcw_test_triaged, truth_col = "diagnosis", action_col = "action")

##      action   B  M
## 1    Benign 113  1
## 2 Malignant   4 70
## 3    Review   9  4

bcw_metrics <- function(df, truth_col = "diagnosis") {
  truth <- df[[truth_col]]
  stopifnot(all(levels(truth) %in% c("B","M")))

  benign_path <- df %>% filter(action == "Benign")
  review_path <- df %>% filter(action == "Review")
  mal_path <- df %>% filter(action == "Malignant")

  tibble(
    auto_benign_fn_rate = mean(benign_path[[truth_col]] == "M"),
    fn_capture_rate = 1 - mean(df[[truth_col]] == "M" & df$action == "Benign"),
    overall_review_rate = mean(df$action == "Review"),
    benign_region_review_rate = mean(df$pM <= 0.45 & df$action == "Review") / mean(df$pM <= 0.45),
    malignant_region_fp_override_rate = mean(df$pM >= 0.65 & df[[truth_col]] == "B") / mean(df$pM >= 0.65) # optional
  )
}

bcw_metrics(bcw_test_triaged)

## # A tibble: 1 × 5
##   auto_benign_fn_rate fn_capture_rate overall_review_rate benign_region_review…¹
##                 <dbl>           <dbl>               <dbl>                  <dbl>
## 1             0.00877           0.995              0.0647                 0.0732
## # ℹ abbreviated name: ¹benign_region_review_rate
## # ℹ 1 more variable: malignant_region_fp_override_rate <dbl>