SETUP <- "Center-Scale"
library(caret)
Loading required package: lattice
Loading required package: ggplot2
set.seed(19937)
options(width=150)
time.1 <- Sys.time()
format(time.1, "%Y-%m-%d-%H%M%S")
[1] "2015-03-02-231511"
load("../../Features/plankton-train-wndchrm-skimage-features.Rdata", verbose=TRUE)
Loading objects:
train.features
train.class
length(train.class)
[1] 30336
dim(train.features)
[1] 30336 2923
skimage features have many missing values, especially for a number of smaller images. Until this gets resolved, let’s remove the skimage features. ```
colnames(train.features)[2894] # last wndchrm feature
[1] "Gini Coefficient (Wavelet (Edge ())) [0]"
colnames(train.features)[2895:2923] # skimage features
[1] "area" "area_convex" "area_filled" "box_max_col" "box_max_row"
[6] "box_min_col" "box_min_row" "centroid_col" "centroid_row" "countCoords"
[11] "diameter_equivalent" "euler_number" "inertia_tensor_eigenvalue1" "inertia_tensor_eigenvalue2" "label"
[16] "length_major_axis" "length_minor_axis" "moments_hu1" "moments_hu2" "moments_hu3"
[21] "moments_hu4" "moments_hu5" "moments_hu6" "moments_hu7" "orientation"
[26] "perimeter" "ratio_eccentricity" "ratio_extent" "ratio_solidity"
train.features <- train.features[,-2895:-2923]
dim(train.features)
[1] 30336 2894
Verify there are no NAs
sum(is.na(train.features))
[1] 0
library(doParallel)
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
rCluster <- makePSOCKcluster(6) # Use 6 cores
registerDoParallel(rCluster)
See
nzv <- nearZeroVar(train.features, saveMetrics=TRUE)
NZ <- nzv$nzv
countNzv <- sum(NZ)
countNzv
[1] 189
nzv[NZ,]
freqRatio percentUnique zeroVar nzv
Edge Features () [15] 9998.33333 1.104298523 FALSE TRUE
Otsu Object Features () [1] 81.77049 0.013185654 FALSE TRUE
Otsu Object Features () [2] 119.80080 0.009889241 FALSE TRUE
Otsu Object Features () [3] 195.95455 0.013185654 FALSE TRUE
Otsu Object Features () [4] 311.71134 0.009889241 FALSE TRUE
Otsu Object Features () [5] 582.36538 0.009889241 FALSE TRUE
Otsu Object Features () [6] 704.44186 0.009889241 FALSE TRUE
Otsu Object Features () [7] 776.84615 0.006592827 FALSE TRUE
Otsu Object Features () [8] 1684.27778 0.009889241 FALSE TRUE
Otsu Object Features () [18] 26.64968 0.065928270 FALSE TRUE
Otsu Object Features () [19] 20.53412 0.098892405 FALSE TRUE
Inverse-Otsu Object Features () [3] 28.09584 0.039556962 FALSE TRUE
Inverse-Otsu Object Features () [4] 37.67606 0.023074895 FALSE TRUE
Inverse-Otsu Object Features () [5] 44.99391 0.026371308 FALSE TRUE
Inverse-Otsu Object Features () [6] 53.06071 0.016482068 FALSE TRUE
Inverse-Otsu Object Features () [7] 65.30635 0.019778481 FALSE TRUE
Inverse-Otsu Object Features () [8] 69.31090 0.016482068 FALSE TRUE
Chebyshev-Fourier Coefficients () [8] 37.50000 0.023074895 FALSE TRUE
Chebyshev-Fourier Coefficients () [9] 71.92701 0.019778481 FALSE TRUE
Chebyshev-Fourier Coefficients () [10] 83.91268 0.019778481 FALSE TRUE
Chebyshev-Fourier Coefficients () [11] 52.59043 0.019778481 FALSE TRUE
Chebyshev-Fourier Coefficients () [12] 19.48070 0.016482068 FALSE TRUE
Chebyshev-Fourier Coefficients () [26] 20.03745 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients () [27] 37.69388 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients () [28] 50.07071 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients () [29] 49.98487 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients () [30] 60.28485 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients () [31] 80.98919 0.006592827 FALSE TRUE
Chebyshev Coefficients () [1] 19.18254 0.013185654 FALSE TRUE
Chebyshev Coefficients () [2] 72.25971 0.013185654 FALSE TRUE
Chebyshev Coefficients () [3] 59.64848 0.013185654 FALSE TRUE
Chebyshev Coefficients () [4] 43.58209 0.016482068 FALSE TRUE
Chebyshev Coefficients () [5] 28.84600 0.019778481 FALSE TRUE
Chebyshev Coefficients () [22] 130.32468 0.006592827 FALSE TRUE
Chebyshev Coefficients () [23] 241.68800 0.006592827 FALSE TRUE
Chebyshev Coefficients () [24] 432.37143 0.006592827 FALSE TRUE
Chebyshev Coefficients () [25] 918.27273 0.006592827 FALSE TRUE
Chebyshev Coefficients () [26] 2332.53846 0.006592827 FALSE TRUE
Chebyshev Coefficients () [27] 6066.20000 0.006592827 FALSE TRUE
Comb Moments () [3] 6065.80000 0.009889241 FALSE TRUE
Comb Moments () [4] 19.61249 0.026371308 FALSE TRUE
Comb Moments () [5] 19.68234 0.026371308 FALSE TRUE
Comb Moments () [16] 21.12437 0.059335443 FALSE TRUE
Multiscale Histograms () [2] 30322.00000 0.049446203 FALSE TRUE
Radon Coefficients () [1] 91.48505 0.253823840 FALSE TRUE
Radon Coefficients () [10] 65.15085 0.243934599 FALSE TRUE
Fractal Features () [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features () [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features () [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features () [19] 28324.00000 6.635680380 FALSE TRUE
Pixel Intensity Statistics () [1] 122.11203 0.445015823 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier ()) [24] 22.61216 0.026371308 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier ()) [25] 32.48420 0.023074895 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier ()) [26] 27.52703 0.023074895 FALSE TRUE
Multiscale Histograms (Fourier ()) [3] 30314.00000 0.075817511 FALSE TRUE
Multiscale Histograms (Fourier ()) [4] 30314.00000 0.075817511 FALSE TRUE
Multiscale Histograms (Fourier ()) [8] 6000.00000 0.903217300 FALSE TRUE
Multiscale Histograms (Fourier ()) [9] 6000.00000 0.903217300 FALSE TRUE
Multiscale Histograms (Fourier ()) [15] 2220.84615 2.768987342 FALSE TRUE
Multiscale Histograms (Fourier ()) [16] 2220.92308 2.768987342 FALSE TRUE
Multiscale Histograms (Fourier ()) [17] 30331.00000 0.019778481 FALSE TRUE
Radon Coefficients (Fourier ()) [11] 6066.20000 0.006592827 FALSE TRUE
Fractal Features (Fourier ()) [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features (Fourier ()) [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features (Fourier ()) [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features (Fourier ()) [19] 28324.00000 6.635680380 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [26] 147.62745 0.009889241 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [27] 432.34286 0.009889241 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [28] 918.27273 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [29] 1263.00000 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [30] 2021.40000 0.006592827 FALSE TRUE
Chebyshev-Fourier Coefficients (Wavelet ()) [31] 5055.00000 0.006592827 FALSE TRUE
Chebyshev Coefficients (Wavelet ()) [3] 69.23598 0.013185654 FALSE TRUE
Chebyshev Coefficients (Wavelet ()) [4] 66.50673 0.023074895 FALSE TRUE
Chebyshev Coefficients (Wavelet ()) [30] 37.26162 0.016482068 FALSE TRUE
Chebyshev Coefficients (Wavelet ()) [31] 108.04029 0.016482068 FALSE TRUE
Comb Moments (Wavelet ()) [3] 7581.75000 0.013185654 FALSE TRUE
Comb Moments (Wavelet ()) [4] 36.16564 0.016482068 FALSE TRUE
Comb Moments (Wavelet ()) [5] 36.25707 0.013185654 FALSE TRUE
Comb Moments (Wavelet ()) [9] 604.70000 0.019778481 FALSE TRUE
Comb Moments (Wavelet ()) [10] 249.12397 0.016482068 FALSE TRUE
Comb Moments (Wavelet ()) [11] 201.20667 0.009889241 FALSE TRUE
Comb Moments (Wavelet ()) [22] 413.89041 0.019778481 FALSE TRUE
Multiscale Histograms (Wavelet ()) [0] 30254.00000 0.273602321 FALSE TRUE
Radon Coefficients (Wavelet ()) [1] 42.54036 0.210970464 FALSE TRUE
Radon Coefficients (Wavelet ()) [10] 28.29291 0.224156118 FALSE TRUE
Fractal Features (Wavelet ()) [18] 13754.00000 9.322257384 FALSE TRUE
Fractal Features (Wavelet ()) [19] 27902.00000 8.026766878 FALSE TRUE
Haralick Textures (Chebyshev ()) [16] 94.47703 7.489451477 FALSE TRUE
Haralick Textures (Chebyshev ()) [17] 72.83651 9.088212025 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [6] 25.98496 1.325158228 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [9] 65.52406 7.832278481 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [13] 1503.55000 0.237341772 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [16] 303.70370 2.254746835 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [21] 818.94444 0.616429325 FALSE TRUE
Multiscale Histograms (Chebyshev ()) [22] 6055.80000 0.089003165 FALSE TRUE
Tamura Textures (Chebyshev ()) [2] 7277.75000 3.454641350 FALSE TRUE
Radon Coefficients (Chebyshev ()) [2] 917.90909 0.036260549 FALSE TRUE
Radon Coefficients (Chebyshev ()) [11] 1082.35714 0.009889241 FALSE TRUE
Fractal Features (Chebyshev ()) [16] 13713.50000 9.539820675 FALSE TRUE
Fractal Features (Chebyshev ()) [17] 9262.33333 8.363001055 FALSE TRUE
Fractal Features (Chebyshev ()) [18] 14041.50000 7.403744726 FALSE TRUE
Fractal Features (Chebyshev ()) [19] 14162.00000 6.622494726 FALSE TRUE
Comb Moments (Chebyshev (Fourier ())) [0] 20.53076 0.032964135 FALSE TRUE
Comb Moments (Chebyshev (Fourier ())) [12] 26.12141 0.032964135 FALSE TRUE
Multiscale Histograms (Chebyshev (Fourier ())) [1] 14051.00000 7.337816456 FALSE TRUE
Tamura Textures (Chebyshev (Fourier ())) [0] 30333.00000 0.013185654 FALSE TRUE
Fractal Features (Chebyshev (Fourier ())) [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features (Chebyshev (Fourier ())) [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features (Chebyshev (Fourier ())) [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features (Chebyshev (Fourier ())) [19] 28324.00000 6.635680380 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [6] 806.30556 1.437236287 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [11] 3248.77778 1.793248945 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [12] 10077.66667 0.296677215 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [13] 277.03659 2.215189873 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [19] 10088.33333 0.217563291 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [20] 15158.50000 0.062631857 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [21] 1643.05556 1.104298523 FALSE TRUE
Multiscale Histograms (Wavelet (Fourier ())) [22] 61.50661 2.412974684 FALSE TRUE
Tamura Textures (Wavelet (Fourier ())) [2] 14937.50000 1.516350211 FALSE TRUE
Fractal Features (Wavelet (Fourier ())) [18] 13754.00000 9.322257384 FALSE TRUE
Fractal Features (Wavelet (Fourier ())) [19] 27902.00000 8.026766878 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [14] 19.50665 0.029667722 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [15] 41.62014 0.029667722 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [16] 69.44471 0.023074895 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [17] 103.78472 0.023074895 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [18] 139.80930 0.016482068 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [19] 235.73438 0.026371308 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [20] 298.72277 0.016482068 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [21] 397.36842 0.009889241 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [22] 530.42105 0.009889241 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [23] 343.45455 0.013185654 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [24] 151.35678 0.009889241 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [25] 66.23503 0.013185654 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Wavelet ())) [26] 22.75098 0.013185654 FALSE TRUE
Chebyshev Coefficients (Fourier (Wavelet ())) [27] 24.19618 0.013185654 FALSE TRUE
Chebyshev Coefficients (Fourier (Wavelet ())) [28] 44.40569 0.009889241 FALSE TRUE
Chebyshev Coefficients (Fourier (Wavelet ())) [29] 103.60000 0.009889241 FALSE TRUE
Chebyshev Coefficients (Fourier (Wavelet ())) [30] 173.34483 0.006592827 FALSE TRUE
Chebyshev Coefficients (Fourier (Wavelet ())) [31] 392.97403 0.006592827 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [10] 357.16667 3.457937764 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [13] 67.45118 1.865770042 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [17] 56.97802 6.256592827 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [18] 591.35714 2.775580169 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [20] 251.29730 2.614055907 FALSE TRUE
Multiscale Histograms (Fourier (Wavelet ())) [22] 10103.00000 0.079113924 FALSE TRUE
Fractal Features (Fourier (Wavelet ())) [18] 13754.00000 9.322257384 FALSE TRUE
Fractal Features (Fourier (Wavelet ())) [19] 27902.00000 8.026766878 FALSE TRUE
Comb Moments (Fourier (Chebyshev ())) [0] 3030.70000 0.029667722 FALSE TRUE
Comb Moments (Fourier (Chebyshev ())) [12] 2525.41667 0.029667722 FALSE TRUE
Fractal Features (Fourier (Chebyshev ())) [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features (Fourier (Chebyshev ())) [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features (Fourier (Chebyshev ())) [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features (Fourier (Chebyshev ())) [19] 28324.00000 6.635680380 FALSE TRUE
Tamura Textures (Chebyshev (Wavelet ())) [2] 9846.66667 2.538238397 FALSE TRUE
Fractal Features (Chebyshev (Wavelet ())) [18] 13754.00000 9.276107595 FALSE TRUE
Fractal Features (Chebyshev (Wavelet ())) [19] 13951.00000 7.980617089 FALSE TRUE
Chebyshev Coefficients (Edge ()) [30] 22.43090 0.016482068 FALSE TRUE
Chebyshev Coefficients (Edge ()) [31] 28.06635 0.016482068 FALSE TRUE
Multiscale Histograms (Edge ()) [0] 30230.00000 0.352716245 FALSE TRUE
Tamura Textures (Edge ()) [0] 30076.00000 0.860363924 FALSE TRUE
Fractal Features (Edge ()) [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features (Edge ()) [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features (Edge ()) [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features (Edge ()) [19] 28324.00000 6.635680380 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Edge ())) [20] 19.36595 0.029667722 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Edge ())) [21] 27.14867 0.032964135 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Edge ())) [22] 39.24189 0.026371308 FALSE TRUE
Chebyshev-Fourier Coefficients (Fourier (Edge ())) [23] 34.32147 0.023074895 FALSE TRUE
Chebyshev Coefficients (Fourier (Edge ())) [27] 33.34014 0.013185654 FALSE TRUE
Chebyshev Coefficients (Fourier (Edge ())) [28] 59.48303 0.013185654 FALSE TRUE
Chebyshev Coefficients (Fourier (Edge ())) [29] 106.88612 0.009889241 FALSE TRUE
Chebyshev Coefficients (Fourier (Edge ())) [30] 183.87805 0.013185654 FALSE TRUE
Chebyshev Coefficients (Fourier (Edge ())) [31] 258.19658 0.013185654 FALSE TRUE
Multiscale Histograms (Fourier (Edge ())) [13] 665.51724 8.076213080 FALSE TRUE
Multiscale Histograms (Fourier (Edge ())) [20] 389.87097 9.533227848 FALSE TRUE
Multiscale Histograms (Fourier (Edge ())) [21] 628.85714 8.517932489 FALSE TRUE
Multiscale Histograms (Fourier (Edge ())) [22] 1351.44444 5.607199367 FALSE TRUE
Tamura Textures (Fourier (Edge ())) [0] 30045.00000 0.962552743 FALSE TRUE
Fractal Features (Fourier (Edge ())) [16] 13713.50000 9.589266878 FALSE TRUE
Fractal Features (Fourier (Edge ())) [17] 27787.00000 8.405854430 FALSE TRUE
Fractal Features (Fourier (Edge ())) [18] 28083.00000 7.430116034 FALSE TRUE
Fractal Features (Fourier (Edge ())) [19] 28324.00000 6.635680380 FALSE TRUE
Chebyshev Coefficients (Wavelet (Edge ())) [29] 19.65862 0.019778481 FALSE TRUE
Chebyshev Coefficients (Wavelet (Edge ())) [30] 29.54462 0.016482068 FALSE TRUE
Chebyshev Coefficients (Wavelet (Edge ())) [31] 38.74605 0.019778481 FALSE TRUE
Fractal Features (Wavelet (Edge ())) [18] 13754.00000 9.322257384 FALSE TRUE
Fractal Features (Wavelet (Edge ())) [19] 27902.00000 8.026766878 FALSE TRUE
Pixel Intensity Statistics (Wavelet (Edge ())) [1] 3196.77778 5.010548523 FALSE TRUE
train.features <- train.features[,!NZ]
dim(train.features)
[1] 30336 2705
cor.matrix <- cor(train.features)
# Note very high correlations
cor.high.count <- sum(abs(cor.matrix[upper.tri(cor.matrix)]) > 0.99)
cor.high.count
[1] 418
# Range check
summary(cor.matrix[upper.tri(cor.matrix)])
Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.0000000 -0.0879200 0.0008799 0.0059710 0.0928200 1.0000000
COR.HIGH.CUTOFF <- 0.75 # try higher values later
cor.high <- findCorrelation(cor.matrix, cutoff=COR.HIGH.CUTOFF)
length(cor.high)
[1] 1458
train.features <- train.features[, -cor.high]
dim(train.features)
[1] 30336 1247
# Repeat range check
cor.matrix <- cor(train.features)
summary(cor.matrix[upper.tri(cor.matrix)])
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.747700 -0.047860 0.003115 0.006503 0.058700 0.748600
linearCombos <- findLinearCombos(train.features)
LINEAR <- linearCombos$remove
length(LINEAR)
[1] 53
train.features <- train.features[, -LINEAR]
dim(train.features)
[1] 30336 1194
Because of some of the small Plankton classes, this technique cannot be used without receiving this error:
centroids <- classDist(as.factor(train.class), train.features)
"there must be more rows than columns for this class"
YeoJohnson is like BoxCox but can be used with zero and negative values.
Caret execution order: Box-Cox/Yeo-Johnson/expoTrans, center, scale, range, imputation, PCA/ICA, spatial sign
Variations to try:
1a. pca, thresh=0.95 or pcaComp=75
or
1b. ica, n.comp=3
PREPROC.METHOD <- c("center", "scale")
trainPreProcessed <- preProcess(train.features,
method=PREPROC.METHOD,
#thresh=0.75,
na.remove=FALSE, # already removed
verbose=TRUE)
Calculating means for centering
Calculating standard deviations for scaling
trainTransformed <- predict(trainPreProcessed, train.features)
dim(trainTransformed)
[1] 30336 1194
save(trainPreProcessed,
trainTransformed,
train.class,
NZ,
cor.high,
LINEAR,
PREPROC.METHOD,
file=paste0("TRAIN-SETUP-", SETUP, ".RData"))
stopCluster(rCluster)
time.2 <- Sys.time()
cat(sprintf("%.1f", as.numeric(difftime(time.2, time.1, units="secs"))), " secs\n")
534.8 secs
efg @EarlGlynn
2015-03-02 2324