Replicate logloss function in R from one given in Kaggle Plankton tutorial by Aaron Sander.

y_true <- read.csv("y_true.csv")[,1]
y_true <- y_true + 1  # make 1 origin
length(y_true)
## [1] 30336
y_true[1]
## [1] 1
y_pred <- as.matrix(read.csv("y_pred.csv"))
dim(y_pred)
## [1] 30336   121
colnames(y_pred) <-NULL 
y_pred[1,]
##   [1] 0.27 0.00 0.00 0.00 0.00 0.01 0.01 0.01 0.00 0.00 0.00 0.00 0.00 0.00
##  [15] 0.07 0.01 0.02 0.04 0.00 0.02 0.00 0.01 0.02 0.00 0.03 0.15 0.00 0.01
##  [29] 0.00 0.00 0.00 0.00 0.00 0.00 0.02 0.00 0.04 0.00 0.00 0.00 0.01 0.00
##  [43] 0.00 0.01 0.00 0.00 0.00 0.01 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
##  [57] 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.03 0.00
##  [71] 0.00 0.00 0.07 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
##  [85] 0.00 0.01 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.00 0.00 0.00 0.00 0.00
##  [99] 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01
## [113] 0.00 0.00 0.01 0.00 0.00 0.00 0.00 0.01 0.00

No clip function (that I know of) in R, so brute force:

epsilon <- 1E-15

predictions <- y_pred
predictions[y_pred <   epsilon] <-   epsilon
predictions[y_pred > 1-epsilon] <- 1-epsilon
predictions[1,]
##   [1] 2.7e-01 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-02 1.0e-02 1.0e-02
##   [9] 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 7.0e-02 1.0e-02
##  [17] 2.0e-02 4.0e-02 1.0e-15 2.0e-02 1.0e-15 1.0e-02 2.0e-02 1.0e-15
##  [25] 3.0e-02 1.5e-01 1.0e-15 1.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15
##  [33] 1.0e-15 1.0e-15 2.0e-02 1.0e-15 4.0e-02 1.0e-15 1.0e-15 1.0e-15
##  [41] 1.0e-02 1.0e-15 1.0e-15 1.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-02
##  [49] 1.0e-15 1.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15
##  [57] 1.0e-15 1.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15
##  [65] 1.0e-15 2.0e-02 1.0e-15 1.0e-15 3.0e-02 1.0e-15 1.0e-15 1.0e-15
##  [73] 7.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15
##  [81] 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-02 1.0e-15 1.0e-15
##  [89] 1.0e-15 1.0e-15 2.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15
##  [97] 1.0e-15 1.0e-15 3.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15
## [105] 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-02
## [113] 1.0e-15 1.0e-15 1.0e-02 1.0e-15 1.0e-15 1.0e-15 1.0e-15 1.0e-02
## [121] 1.0e-15
actual <- matrix(0, nrow(y_pred), ncol(y_pred))

n_samples <- nrow(actual)

“easy” python-like way does not work in R

actual[1:121, y_true] <- 1

Brute force instead

for (i in 1:n_samples)
{
  actual[i, y_true[i]] <- 1
}

actual[1,]
##   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [36] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Element-wise product

prod <- actual * log(predictions)
prod[1,]
##   [1] -1.309333  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##   [8]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [15]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [22]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [29]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [36]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [43]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [50]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [57]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [64]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [71]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [78]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [85]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [92]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##  [99]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## [106]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## [113]  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## [120]  0.000000  0.000000

logloss by image

byImage <- apply(prod,1, sum)
hist(byImage, main="Image logloss distribution")

loss <- -1.0 * sum(byImage) / n_samples
loss
## [1] 3.711261

Counts by class of problem images

table(y_true[byImage < 5*median(byImage)])
## 
##   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
##  13   5  15   8   4  22   5   2   2   7   4   5   1   3  15  15  11  12 
##  20  21  22  23  24  25  27  28  29  30  31  32  33  34  35  36  37  38 
##   3   8  15   6  11   1  15  10  22  17  31   5  13  18  34  15   1   8 
##  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56 
##  10   9  12   8   4   1  16  12   2  13  11   6   7  20   9   6  14  13 
##  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74 
##   9  15  14   7   1   4  11  30  14  13  11   5   1   6  20  29   3  11 
##  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92 
##  24  10  10   5  12  14   8  40  23   8   3   5   3  14  12  15  23  22 
##  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
##  25   4  33  17  16  16  14  10  36  25  25   4  46  16  12   3   8   6 
## 111 112 113 114 115 117 118 119 120 121 
##   4   8  20   8  21  12  14  36  18  51

R Function

multiclassLogLoss <- function(y_true, y_pred, epsilon=1E-15)
{
  predictions <- y_pred
  predictions[y_pred <   epsilon] <-   epsilon
  predictions[y_pred > 1-epsilon] <- 1-epsilon
  
  actual <- matrix(0, nrow(y_pred), ncol(y_pred))
  n_samples <- nrow(actual)
  for (i in 1:n_samples)
  {
    actual[i, y_true[i]] <- 1
  }
  prod <- actual * log(predictions)  # element-wise matrix multiplication
  byImage <- apply(prod,1, sum)
  loss <- -1.0 * sum(byImage) / n_samples
  
  invisible(list(loss=loss, byImage=byImage))
}

Example use of R function

  result <- multiclassLogLoss(y_true, y_pred)
  result$loss
[1] 3.711261
  heuristicCut <- -5
  hist(result$byImage, main="Image logloss distribution")
  abline(v=heuristicCut, col="skyblue", lwd=3)

Problem counts by class

counts <- table(y_true[result$byImage < heuristicCut])
counts

  2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
 13   5  15   8   4  22   5   2   2   7   4   5   1   3  15  15  11  12 
 20  21  22  23  24  25  27  28  29  30  31  32  33  34  35  36  37  38 
  3   8  15   6  11   1  15  10  22  17  31   5  13  18  34  15   1   8 
 39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56 
 10   9  12   8   4   1  16  12   2  13  11   6   7  20   9   6  14  13 
 57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74 
  9  15  14   7   1   4  11  30  14  13  11   5   1   6  20  29   3  11 
 75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92 
 24  10  10   5  12  14   8  40  23   8   3   5   3  14  12  15  23  22 
 93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
 25   4  33  17  16  16  14  10  36  25  25   4  46  16  12   3   8   6 
111 112 113 114 115 117 118 119 120 121 
  4   8  20   8  21  12  14  36  18  51 
sum(counts)
[1] 1518