time.1 <- Sys.time()
format(time.1, "%Y-%m-%d-%H%M%S")
## [1] "2015-02-26-004309"

Training set

Load Training Set files

Load wndchrm features

feature.matrix is a matrix

load(file="../Wnd-Charm/plankton-train-wndchrm-features.Rdata", verbose=TRUE)   
Loading objects:
  feature.matrix
str(feature.matrix)
 num [1:30336, 1:2895] 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:30336] "100224-l.sig" "100723-l.sig" "101165-l.sig" "101232-l.sig" ...
  ..$ : chr [1:2895] "class" "Edge Features () [0]" "Edge Features () [1]" "Edge Features () [2]" ...
trainFeatures <- colnames(feature.matrix)

Column 1 of the feature.matrix defines the plankton class.

counts1 <- table(feature.matrix[,1])
counts1

   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
 889   13   71   49   16  696  532  242  393  170  815 1934  694   77  681 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
 173   96  178   63  286  106   49   87   30  899 1189   24  201  113   42 
  31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
  53   38   55  363  394  914  519  500   36   92   80   88  385  536   96 
  46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
  27   14  136   38  511   10   31   85  114   64   16   10  127   75   35 
  61   62   63   64   65   66   67   68   69   70   71   72   73   74   75 
 229    9   19   23  336  132   12  190  412  274  150   76  703  123   43 
  76   77   78   79   80   81   82   83   84   85   86   87   88   89   90 
  56   14   61   14   24  141  131  108  372  625 1172  113  108   13   65 
  91   92   93   94   95   96   97   98   99  100  101  102  103  104  105 
 287  158   52   49  153  174  212  135  483  179   57  247   29   30  128 
 106  107  108  109  110  111  112  113  114  115  116  117  118  119  120 
  21   24   38  708   54 1979  678   29  439  417  352  236   73  317  175 
 121 
 425 

The feature.matrix row names define the image filename. For example, the original image 100224.jpg was converted to 100224.tif. The file of wndchrm properties was named 100224-l.sig, which is the rowname of feature.matrix.

Load skimage features

skimage is a data.frame

skimage <- read.csv("../skimage/train-properties-MASTER.csv", as.is=TRUE)
str(skimage)
'data.frame':   30336 obs. of  31 variables:
 $ area                      : num  273 451 292 319 444 161 336 176 165 397 ...
 $ area_convex               : num  1520 4579 2022 1307 2877 ...
 $ area_filled               : num  273 451 292 320 446 162 337 176 166 397 ...
 $ box_max_col               : num  48 71 64 40 58 32 49 32 49 96 ...
 $ box_max_row               : num  56 80 66 64 88 40 68 71 63 79 ...
 $ box_min_col               : num  0 0 6 8 0 8 11 0 11 10 ...
 $ box_min_row               : num  8 8 10 8 11 8 11 0 2 10 ...
 $ centroid_col              : num  24.7 33.1 35.5 24.2 28.3 ...
 $ centroid_row              : num  29.8 40.6 35.1 33.7 50.6 ...
 $ countCoords               : num  273 451 292 319 444 161 336 176 165 397 ...
 $ diameter_equivalent       : num  18.6 24 19.3 20.2 23.8 ...
 $ euler_number              : num  0 0 0 -1 -1 -1 -1 0 -1 0 ...
 $ filename                  : chr  "100224.jpg" "100723.jpg" "101165.jpg" "101232.jpg" ...
 $ inertia_tensor_eigenvalue1: num  126 331 161 199 342 ...
 $ inertia_tensor_eigenvalue2: num  83.9 254.8 80.7 36.3 97.7 ...
 $ label                     : num  1 2 1 1 1 1 1 1 2 2 ...
 $ length_major_axis         : num  44.9 72.8 50.7 56.4 74 ...
 $ length_minor_axis         : num  36.6 63.9 35.9 24.1 39.5 ...
 $ moments_hu1               : num  0.768 1.298 0.827 0.737 0.99 ...
 $ moments_hu2               : num  0.0236 0.0284 0.075 0.2597 0.3024 ...
 $ moments_hu3               : num  0.10632 0.18037 0.00876 0.01296 0.16193 ...
 $ moments_hu4               : num  0.00261 0.01568 0.00525 0.00066 0.08363 ...
 $ moments_hu5               : num  -8.95e-06 -3.43e-04 1.33e-05 1.81e-06 9.71e-03 ...
 $ moments_hu6               : num  0.000209 0.002065 -0.000807 -0.000237 0.042954 ...
 $ moments_hu7               : num  -4.27e-05 -7.60e-04 3.30e-05 6.76e-07 6.33e-04 ...
 $ orientation               : num  -0.568 0.381 0.31 1.223 1.125 ...
 $ perimeter                 : num  175 340 217 218 315 ...
 $ ratio_eccentricity        : num  0.577 0.479 0.706 0.904 0.845 ...
 $ ratio_extent              : num  0.1185 0.0882 0.0899 0.178 0.0994 ...
 $ ratio_solidity            : num  0.1796 0.0985 0.1444 0.2441 0.1543 ...
 $ train                     : int  0 0 0 0 0 0 0 0 0 0 ...
head(skimage,2)
  area area_convex area_filled box_max_col box_max_row box_min_col
1  273        1520         273          48          56           0
2  451        4579         451          71          80           0
  box_min_row centroid_col centroid_row countCoords diameter_equivalent
1           8     24.67399     29.82784         273            18.64388
2           8     33.11973     40.62084         451            23.96312
  euler_number   filename inertia_tensor_eigenvalue1
1            0 100224.jpg                   125.8685
2            0 100723.jpg                   330.8140
  inertia_tensor_eigenvalue2 label length_major_axis length_minor_axis
1                   83.91497     1          44.87646          36.64205
2                  254.80400     2          72.75317          63.85033
  moments_hu1 moments_hu2 moments_hu3 moments_hu4   moments_hu5
1   0.7684377  0.02361631   0.1063240 0.002614545 -8.948032e-06
2   1.2984877  0.02840454   0.1803685 0.015682721 -3.427971e-04
   moments_hu6   moments_hu7 orientation perimeter ratio_eccentricity
1 0.0002093655 -4.266406e-05   -0.567969  175.4325          0.5773322
2 0.0020647507 -7.603912e-04    0.380896  340.2792          0.4793396
  ratio_extent ratio_solidity train
1   0.11848958     0.17960526     0
2   0.08822379     0.09849312     0

skimage$train defines the plankton class.

skimage$filename defines the image filename (e.g., 10024.jpg).

Filename comparisons betweem skimage and the feature.matrix must drop the extension (e.g., 10024).

Load index conversion chart

The cause was not investigated, but for some reason traversing the directory tree of plankton .tif files using CentOS/IPython resulted in a slightly different directory order than traversing the corresponding directory tree of the original .jpg files.

Instead of resolving the original problem, a conversion table was created the tells the order for the two sources.

Here skimage rows are ordered to match the feature.matrix rows using filenames.

xlate <- read.csv("wndchrm-skimage-index-translate.csv", as.is=TRUE)

skiorder <- skimage$train

Convert skimage class to wndchrm class

Use brute force here

for (i in 1:121)
{
  skimage$train[skiorder == xlate$skimage[i]] <- xlate$wndchrm[i]
}

standardize row order sort: plankton class, filename

wndchrm <- feature.matrix[order(feature.matrix[,1], row.names(feature.matrix) ),]
skimage <- skimage[order(skimage$train, skimage$filename), ]

Do classes and files line up?

Verify match by plankton class

all((skimage$train) == feature.matrix[,1])
## [1] TRUE
sum((skimage$train) == feature.matrix[,1])
## [1] 30336

Verify match by filename

wndchrm

splits <- strsplit(row.names(feature.matrix), "-")
wndchrmFileIndex <- unlist(lapply(splits, "[", 1))
head(wndchrmFileIndex)
## [1] "100224" "100723" "101165" "101232" "101260" "101358"

skimage

splits <- strsplit(skimage$filename, "\\.")
skimageFileIndex <- unlist(lapply(splits, "[", 1))

all(wndchrmFileIndex == skimageFileIndex)
## [1] TRUE
sum(wndchrmFileIndex == skimageFileIndex)
## [1] 30336

With files aligned now, combine feature sources

Drop columns skimage\(train and skimage\)filename since they align with corresponding info in the feature.matrix.

str(skimage)
'data.frame':   30336 obs. of  31 variables:
 $ area                      : num  273 451 292 319 444 161 336 176 165 397 ...
 $ area_convex               : num  1520 4579 2022 1307 2877 ...
 $ area_filled               : num  273 451 292 320 446 162 337 176 166 397 ...
 $ box_max_col               : num  48 71 64 40 58 32 49 32 49 96 ...
 $ box_max_row               : num  56 80 66 64 88 40 68 71 63 79 ...
 $ box_min_col               : num  0 0 6 8 0 8 11 0 11 10 ...
 $ box_min_row               : num  8 8 10 8 11 8 11 0 2 10 ...
 $ centroid_col              : num  24.7 33.1 35.5 24.2 28.3 ...
 $ centroid_row              : num  29.8 40.6 35.1 33.7 50.6 ...
 $ countCoords               : num  273 451 292 319 444 161 336 176 165 397 ...
 $ diameter_equivalent       : num  18.6 24 19.3 20.2 23.8 ...
 $ euler_number              : num  0 0 0 -1 -1 -1 -1 0 -1 0 ...
 $ filename                  : chr  "100224.jpg" "100723.jpg" "101165.jpg" "101232.jpg" ...
 $ inertia_tensor_eigenvalue1: num  126 331 161 199 342 ...
 $ inertia_tensor_eigenvalue2: num  83.9 254.8 80.7 36.3 97.7 ...
 $ label                     : num  1 2 1 1 1 1 1 1 2 2 ...
 $ length_major_axis         : num  44.9 72.8 50.7 56.4 74 ...
 $ length_minor_axis         : num  36.6 63.9 35.9 24.1 39.5 ...
 $ moments_hu1               : num  0.768 1.298 0.827 0.737 0.99 ...
 $ moments_hu2               : num  0.0236 0.0284 0.075 0.2597 0.3024 ...
 $ moments_hu3               : num  0.10632 0.18037 0.00876 0.01296 0.16193 ...
 $ moments_hu4               : num  0.00261 0.01568 0.00525 0.00066 0.08363 ...
 $ moments_hu5               : num  -8.95e-06 -3.43e-04 1.33e-05 1.81e-06 9.71e-03 ...
 $ moments_hu6               : num  0.000209 0.002065 -0.000807 -0.000237 0.042954 ...
 $ moments_hu7               : num  -4.27e-05 -7.60e-04 3.30e-05 6.76e-07 6.33e-04 ...
 $ orientation               : num  -0.568 0.381 0.31 1.223 1.125 ...
 $ perimeter                 : num  175 340 217 218 315 ...
 $ ratio_eccentricity        : num  0.577 0.479 0.706 0.904 0.845 ...
 $ ratio_extent              : num  0.1185 0.0882 0.0899 0.178 0.0994 ...
 $ ratio_solidity            : num  0.1796 0.0985 0.1444 0.2441 0.1543 ...
 $ train                     : int  1 1 1 1 1 1 1 1 1 1 ...
skimage$filename <- NULL
skimage$train    <- NULL

str(skimage)
'data.frame':   30336 obs. of  29 variables:
 $ area                      : num  273 451 292 319 444 161 336 176 165 397 ...
 $ area_convex               : num  1520 4579 2022 1307 2877 ...
 $ area_filled               : num  273 451 292 320 446 162 337 176 166 397 ...
 $ box_max_col               : num  48 71 64 40 58 32 49 32 49 96 ...
 $ box_max_row               : num  56 80 66 64 88 40 68 71 63 79 ...
 $ box_min_col               : num  0 0 6 8 0 8 11 0 11 10 ...
 $ box_min_row               : num  8 8 10 8 11 8 11 0 2 10 ...
 $ centroid_col              : num  24.7 33.1 35.5 24.2 28.3 ...
 $ centroid_row              : num  29.8 40.6 35.1 33.7 50.6 ...
 $ countCoords               : num  273 451 292 319 444 161 336 176 165 397 ...
 $ diameter_equivalent       : num  18.6 24 19.3 20.2 23.8 ...
 $ euler_number              : num  0 0 0 -1 -1 -1 -1 0 -1 0 ...
 $ inertia_tensor_eigenvalue1: num  126 331 161 199 342 ...
 $ inertia_tensor_eigenvalue2: num  83.9 254.8 80.7 36.3 97.7 ...
 $ label                     : num  1 2 1 1 1 1 1 1 2 2 ...
 $ length_major_axis         : num  44.9 72.8 50.7 56.4 74 ...
 $ length_minor_axis         : num  36.6 63.9 35.9 24.1 39.5 ...
 $ moments_hu1               : num  0.768 1.298 0.827 0.737 0.99 ...
 $ moments_hu2               : num  0.0236 0.0284 0.075 0.2597 0.3024 ...
 $ moments_hu3               : num  0.10632 0.18037 0.00876 0.01296 0.16193 ...
 $ moments_hu4               : num  0.00261 0.01568 0.00525 0.00066 0.08363 ...
 $ moments_hu5               : num  -8.95e-06 -3.43e-04 1.33e-05 1.81e-06 9.71e-03 ...
 $ moments_hu6               : num  0.000209 0.002065 -0.000807 -0.000237 0.042954 ...
 $ moments_hu7               : num  -4.27e-05 -7.60e-04 3.30e-05 6.76e-07 6.33e-04 ...
 $ orientation               : num  -0.568 0.381 0.31 1.223 1.125 ...
 $ perimeter                 : num  175 340 217 218 315 ...
 $ ratio_eccentricity        : num  0.577 0.479 0.706 0.904 0.845 ...
 $ ratio_extent              : num  0.1185 0.0882 0.0899 0.178 0.0994 ...
 $ ratio_solidity            : num  0.1796 0.0985 0.1444 0.2441 0.1543 ...

Convert to matrix and cbind to feature.matrix

skimage <- as.matrix(skimage)

dim(feature.matrix)
[1] 30336  2895
dim(skimage)
[1] 30336    29
feature.matrix <- cbind(feature.matrix, skimage)
dim(feature.matrix)
[1] 30336  2924

Let’s treat the plankton class as a separate vector and remove class from the matrix

train.class    <- feature.matrix[, 1]
train.features <- feature.matrix[,-1]

Final look at number of images by feature

table(train.class)
train.class
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
 889   13   71   49   16  696  532  242  393  170  815 1934  694   77  681 
  16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
 173   96  178   63  286  106   49   87   30  899 1189   24  201  113   42 
  31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
  53   38   55  363  394  914  519  500   36   92   80   88  385  536   96 
  46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
  27   14  136   38  511   10   31   85  114   64   16   10  127   75   35 
  61   62   63   64   65   66   67   68   69   70   71   72   73   74   75 
 229    9   19   23  336  132   12  190  412  274  150   76  703  123   43 
  76   77   78   79   80   81   82   83   84   85   86   87   88   89   90 
  56   14   61   14   24  141  131  108  372  625 1172  113  108   13   65 
  91   92   93   94   95   96   97   98   99  100  101  102  103  104  105 
 287  158   52   49  153  174  212  135  483  179   57  247   29   30  128 
 106  107  108  109  110  111  112  113  114  115  116  117  118  119  120 
  21   24   38  708   54 1979  678   29  439  417  352  236   73  317  175 
 121 
 425 

Save train data to .Rdata file for fast loads

dim(train.features)
[1] 30336  2923
object.size(train.features)
711597520 bytes
object.size(train.class)
2184368 bytes
save(train.features, train.class, file="plankton-train-wndchrm-skimage-features.Rdata")

Save training and testing data into separate .Rdata files so testing data is only brought into memory after model has been built.

Save some memory to process testing set now

rm(train.features, train.class)

Testing set

Load Testing Set files

Load wndchrm features

load(file="../Wnd-Charm/plankton-test-wndchrm-features.Rdata", verbose=TRUE)   
Loading objects:
  feature.matrix
str(feature.matrix)
 num [1:130400, 1:2898] 285 387 1999 332 967 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:130400] "1-l.sig" "10-l.sig" "100-l.sig" "1000-l.sig" ...
  ..$ : chr [1:2898] "Edge Features () [0]" "Edge Features () [1]" "Edge Features () [2]" "Edge Features () [3]" ...
testFeatures <- colnames(feature.matrix)

Load skimage features

skimage <- read.csv("../skimage/test-properties-MASTER.csv", as.is=TRUE)
str(skimage)
'data.frame':   130400 obs. of  30 variables:
 $ area                      : num  166 216 1413 138 945 ...
 $ area_convex               : num  217 413 3673 241 1028 ...
 $ area_filled               : num  166 216 3478 138 945 ...
 $ box_max_col               : num  23 44 86 27 51 36 76 30 45 24 ...
 $ box_max_row               : num  44 32 72 35 51 35 48 32 43 39 ...
 $ box_min_col               : num  10 12 11 10 13 0 10 10 11 1 ...
 $ box_min_row               : num  12 11 10 10 11 12 8 10 11 3 ...
 $ centroid_col              : num  15.1 27.8 46.6 15.1 31.4 ...
 $ centroid_row              : num  28.3 25.1 40.6 22.6 30.9 ...
 $ countCoords               : num  166 216 1413 138 945 ...
 $ diameter_equivalent       : num  14.5 16.6 42.4 13.3 34.7 ...
 $ euler_number              : num  0 0 -17 0 0 -7 -8 -2 -1 0 ...
 $ filename                  : chr  "1.jpg" "10.jpg" "100.jpg" "1000.jpg" ...
 $ inertia_tensor_eigenvalue1: num  73.9 52.2 445.2 34.4 122.1 ...
 $ inertia_tensor_eigenvalue2: num  2.67 20.58 303.03 10.53 47.33 ...
 $ label                     : num  3 1 1 1 1 1 1 1 2 1 ...
 $ length_major_axis         : num  34.4 28.9 84.4 23.5 44.2 ...
 $ length_minor_axis         : num  6.53 18.15 69.63 12.98 27.52 ...
 $ moments_hu1               : num  0.461 0.337 0.53 0.326 0.179 ...
 $ moments_hu2               : num  0.18418 0.02142 0.01012 0.02995 0.00625 ...
 $ moments_hu3               : num  8.48e-04 2.84e-02 1.62e-04 1.37e-02 3.40e-06 ...
 $ moments_hu4               : num  5.42e-04 1.55e-03 5.10e-05 3.58e-04 3.80e-07 ...
 $ moments_hu5               : num  3.65e-07 -1.02e-05 4.50e-09 -4.20e-07 3.07e-13 ...
 $ moments_hu6               : num  2.10e-04 -1.45e-04 1.71e-06 2.49e-05 1.42e-08 ...
 $ moments_hu7               : num  -4.92e-08 2.73e-07 1.12e-09 -6.70e-07 -3.05e-13 ...
 $ orientation               : num  -1.365 0.327 0.397 -1.112 0.845 ...
 $ perimeter                 : num  72.4 92.9 801.3 73.7 136.5 ...
 $ ratio_eccentricity        : num  0.982 0.778 0.565 0.833 0.782 ...
 $ ratio_extent              : num  0.399 0.321 0.304 0.325 0.622 ...
 $ ratio_solidity            : num  0.765 0.523 0.385 0.573 0.919 ...
head(skimage,2)
  area area_convex area_filled box_max_col box_max_row box_min_col
1  166         217         166          23          44          10
2  216         413         216          44          32          12
  box_min_row centroid_col centroid_row countCoords diameter_equivalent
1          12     15.08434     28.33735         166            14.53815
2          11     27.76389     25.09722         216            16.58372
  euler_number filename inertia_tensor_eigenvalue1
1            0    1.jpg                   73.90947
2            0   10.jpg                   52.19420
  inertia_tensor_eigenvalue2 label length_major_axis length_minor_axis
1                   2.668406     3          34.38825          6.534103
2                  20.583193     1          28.89822         18.147481
  moments_hu1 moments_hu2  moments_hu3  moments_hu4   moments_hu5
1   0.4613125  0.18418092 0.0008481068 0.0005423557  3.645311e-07
2   0.3369324  0.02141752 0.0284133342 0.0015455760 -1.023864e-05
    moments_hu6   moments_hu7 orientation perimeter ratio_eccentricity
1  0.0002102579 -4.918098e-08  -1.3645697  72.42031          0.9817822
2 -0.0001452330  2.732447e-07   0.3266472  92.94113          0.7782301
  ratio_extent ratio_solidity
1    0.3990385      0.7649770
2    0.3214286      0.5230024

Make sure train and test features match.

trainFeatures <- trainFeatures[-1]  # drop class from train; not in test

length(trainFeatures)
[1] 2894
length(testFeatures)
[1] 2898
length(intersect(testFeatures, trainFeatures))  # count of features in common
[1] 2894
setdiff(testFeatures, trainFeatures)
[1] "Chebyshev Coefficients () [28]"                  
[2] "Pixel Intensity Statistics () [4]"               
[3] "Radon Coefficients (Fourier ()) [2]"             
[4] "Multiscale Histograms (Wavelet (Fourier ())) [0]"
setdiff(trainFeatures, testFeatures)
character(0)

Remove features in test set not in train set

Four features must be removed from the test set. These features were constant in the training set and were removed, but they were not constant in the test set.

featuresToRemove <- setdiff(testFeatures, trainFeatures)
featuresToRemoveIndices <- which(colnames(feature.matrix) %in% featuresToRemove)

colnames(feature.matrix)[featuresToRemoveIndices]
[1] "Chebyshev Coefficients () [28]"                  
[2] "Pixel Intensity Statistics () [4]"               
[3] "Radon Coefficients (Fourier ()) [2]"             
[4] "Multiscale Histograms (Wavelet (Fourier ())) [0]"
dim(feature.matrix)
[1] 130400   2898
feature.matrix <- feature.matrix[,-featuresToRemoveIndices]
dim(feature.matrix)
[1] 130400   2894

Final check that wndchrm features in train and test match exactly

testFeatures <- colnames(feature.matrix)
length(trainFeatures)
[1] 2894
length(testFeatures)
[1] 2894
all(testFeatures == trainFeatures)
[1] TRUE

Make sure wndchrm and skimage files processed in the same order.

Verify match by filename

wndchrm

splits <- strsplit(row.names(feature.matrix), "-")
wndchrmFileIndex <- unlist(lapply(splits, "[", 1))
head(wndchrmFileIndex)
## [1] "1"      "10"     "100"    "1000"   "10000"  "100000"

skimage

splits <- strsplit(skimage$filename, "\\.")
skimageFileIndex <- unlist(lapply(splits, "[", 1))

all(wndchrmFileIndex == skimageFileIndex)
## [1] TRUE
sum(wndchrmFileIndex == skimageFileIndex)
## [1] 130400

Cleanup on skimage before combining with windchrm features

Drop columns skimage\(train and skimage\)filename since they align with corresponding info in the feature.matrix.

str(skimage)
'data.frame':   130400 obs. of  30 variables:
 $ area                      : num  166 216 1413 138 945 ...
 $ area_convex               : num  217 413 3673 241 1028 ...
 $ area_filled               : num  166 216 3478 138 945 ...
 $ box_max_col               : num  23 44 86 27 51 36 76 30 45 24 ...
 $ box_max_row               : num  44 32 72 35 51 35 48 32 43 39 ...
 $ box_min_col               : num  10 12 11 10 13 0 10 10 11 1 ...
 $ box_min_row               : num  12 11 10 10 11 12 8 10 11 3 ...
 $ centroid_col              : num  15.1 27.8 46.6 15.1 31.4 ...
 $ centroid_row              : num  28.3 25.1 40.6 22.6 30.9 ...
 $ countCoords               : num  166 216 1413 138 945 ...
 $ diameter_equivalent       : num  14.5 16.6 42.4 13.3 34.7 ...
 $ euler_number              : num  0 0 -17 0 0 -7 -8 -2 -1 0 ...
 $ filename                  : chr  "1.jpg" "10.jpg" "100.jpg" "1000.jpg" ...
 $ inertia_tensor_eigenvalue1: num  73.9 52.2 445.2 34.4 122.1 ...
 $ inertia_tensor_eigenvalue2: num  2.67 20.58 303.03 10.53 47.33 ...
 $ label                     : num  3 1 1 1 1 1 1 1 2 1 ...
 $ length_major_axis         : num  34.4 28.9 84.4 23.5 44.2 ...
 $ length_minor_axis         : num  6.53 18.15 69.63 12.98 27.52 ...
 $ moments_hu1               : num  0.461 0.337 0.53 0.326 0.179 ...
 $ moments_hu2               : num  0.18418 0.02142 0.01012 0.02995 0.00625 ...
 $ moments_hu3               : num  8.48e-04 2.84e-02 1.62e-04 1.37e-02 3.40e-06 ...
 $ moments_hu4               : num  5.42e-04 1.55e-03 5.10e-05 3.58e-04 3.80e-07 ...
 $ moments_hu5               : num  3.65e-07 -1.02e-05 4.50e-09 -4.20e-07 3.07e-13 ...
 $ moments_hu6               : num  2.10e-04 -1.45e-04 1.71e-06 2.49e-05 1.42e-08 ...
 $ moments_hu7               : num  -4.92e-08 2.73e-07 1.12e-09 -6.70e-07 -3.05e-13 ...
 $ orientation               : num  -1.365 0.327 0.397 -1.112 0.845 ...
 $ perimeter                 : num  72.4 92.9 801.3 73.7 136.5 ...
 $ ratio_eccentricity        : num  0.982 0.778 0.565 0.833 0.782 ...
 $ ratio_extent              : num  0.399 0.321 0.304 0.325 0.622 ...
 $ ratio_solidity            : num  0.765 0.523 0.385 0.573 0.919 ...
skimage$filename <- NULL  # no longer needed since aligned
str(skimage)
'data.frame':   130400 obs. of  29 variables:
 $ area                      : num  166 216 1413 138 945 ...
 $ area_convex               : num  217 413 3673 241 1028 ...
 $ area_filled               : num  166 216 3478 138 945 ...
 $ box_max_col               : num  23 44 86 27 51 36 76 30 45 24 ...
 $ box_max_row               : num  44 32 72 35 51 35 48 32 43 39 ...
 $ box_min_col               : num  10 12 11 10 13 0 10 10 11 1 ...
 $ box_min_row               : num  12 11 10 10 11 12 8 10 11 3 ...
 $ centroid_col              : num  15.1 27.8 46.6 15.1 31.4 ...
 $ centroid_row              : num  28.3 25.1 40.6 22.6 30.9 ...
 $ countCoords               : num  166 216 1413 138 945 ...
 $ diameter_equivalent       : num  14.5 16.6 42.4 13.3 34.7 ...
 $ euler_number              : num  0 0 -17 0 0 -7 -8 -2 -1 0 ...
 $ inertia_tensor_eigenvalue1: num  73.9 52.2 445.2 34.4 122.1 ...
 $ inertia_tensor_eigenvalue2: num  2.67 20.58 303.03 10.53 47.33 ...
 $ label                     : num  3 1 1 1 1 1 1 1 2 1 ...
 $ length_major_axis         : num  34.4 28.9 84.4 23.5 44.2 ...
 $ length_minor_axis         : num  6.53 18.15 69.63 12.98 27.52 ...
 $ moments_hu1               : num  0.461 0.337 0.53 0.326 0.179 ...
 $ moments_hu2               : num  0.18418 0.02142 0.01012 0.02995 0.00625 ...
 $ moments_hu3               : num  8.48e-04 2.84e-02 1.62e-04 1.37e-02 3.40e-06 ...
 $ moments_hu4               : num  5.42e-04 1.55e-03 5.10e-05 3.58e-04 3.80e-07 ...
 $ moments_hu5               : num  3.65e-07 -1.02e-05 4.50e-09 -4.20e-07 3.07e-13 ...
 $ moments_hu6               : num  2.10e-04 -1.45e-04 1.71e-06 2.49e-05 1.42e-08 ...
 $ moments_hu7               : num  -4.92e-08 2.73e-07 1.12e-09 -6.70e-07 -3.05e-13 ...
 $ orientation               : num  -1.365 0.327 0.397 -1.112 0.845 ...
 $ perimeter                 : num  72.4 92.9 801.3 73.7 136.5 ...
 $ ratio_eccentricity        : num  0.982 0.778 0.565 0.833 0.782 ...
 $ ratio_extent              : num  0.399 0.321 0.304 0.325 0.622 ...
 $ ratio_solidity            : num  0.765 0.523 0.385 0.573 0.919 ...
skimage <- as.matrix(skimage)

cbind feature.matrix (wndchrm) and skimage features

dim(feature.matrix)
[1] 130400   2894
dim(skimage)
[1] 130400     29
test.features <- cbind(feature.matrix, skimage)
rm(feature.matrix)

Save test data to .Rdata file for fast loads

dim(test.features)
[1] 130400   2923
object.size(test.features)
3057898152 bytes
save(test.features, file="plankton-test-wndchrm-skimage-features.Rdata")

time.2 <- Sys.time()
cat(sprintf("%.1f", as.numeric(difftime(time.2, time.1, units="secs"))), " secs\n")
## 224.9  secs

efg @EarlGlynn

2015-02-26 0046