Firstly must be run the last lines of this notebook. In the section miscelaneous one can find previous code required to execute this notebook. Below we load the features previously extracted using MatLab. Here is a brief description of every feature:
divcurlhistogram = scipy.io.loadmat('../data/histv6/DivCurlHistogramNorm.mat')
meanpyramidalvector = scipy.io.loadmat('../data/histv6/meanPyramidVectorHistogramPool.mat')
maxpyramidalvector = scipy.io.loadmat('../data/histv6/maxPyramidVectorHistogramPool.mat')
pyramidalvector = scipy.io.loadmat('../data/histv6/PyramidalVectorHistogram.mat')
listfeatures = {'pvector': pyramidalvector, 'maxpvector': maxpyramidalvector,\
'meanpvector': meanpyramidalvector, 'divcurl': divcurlhistogram}
#bof = scipy.io.loadmat('../data/histv6/normBOF.mat')
hoofhistogram = scipy.io.loadmat('../data/histv6/histHOOF.mat')
maxhoofhistogram = scipy.io.loadmat('../data/histv6/maxHOOF.mat')
pyrhoofhistogram = scipy.io.loadmat('../data/histv6/pyrHOOF.mat')
hoof = {'hoof': hoofhistogram, 'maxhoof': maxhoofhistogram,\
'pyrhoof': pyrhoofhistogram}
We extract the labels for training and testing
features = listfeatures['maxpvector']['maxfeatures']
y = np.asarray([x[1][0][0] for x in features])
testlabels = y[214:413]
trainlabels=np.append(y[:214], y[413:])
trainlabels
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=uint8)
Then we specify the parameter grid we are exploring for the extracted features. We explore in RBF a total of 576 sets of parameters(24 possible Gamma values and 24 possible C values), using LinealSVM we explore 24 values of C and using Random Forest we explore 5 number of estimators.
Cparameters = []
for a in range(-15,9):
Cparameters.append(2 ** a)
Gammaparameters = []
for a in range(-15,9):
Gammaparameters.append(2 ** a)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': Gammaparameters,
'C': Cparameters},
{'kernel': ['linear'], 'C': Cparameters}]
nparameters = []
for a in range(1,6):
nparameters.append(10 ** a)
rf_tuned_parameters = [{'n_estimators': nparameters}]
This parameter exploration was done using python scripts and below we load the results for visualization
ls exploration/
rf_divcurl.dat svm_lineal_divcurlv3.dat svm_rbf_divcurlv3.dat rf_hoof.dat svm_lineal_hoofv3.dat svm_rbf_hoofv3.dat rf_maxhoof.dat svm_lineal_maxhoofv3.dat svm_rbf_maxhoofv3.dat rf_maxpvector.dat svm_lineal_maxpvectorv3.dat svm_rbf_maxpvectorv3.dat rf_meanpvector.dat svm_lineal_meanpvectorv3.dat svm_rbf_meanpvectorv3.dat rf_pvector.dat svm_lineal_pvectorv3.dat svm_rbf_pvectorv3.dat rf_pyrhoof.dat svm_lineal_pyrhoofv3.dat svm_rbf_pyrhoofv3.dat
parameter_files = listdir('exploration/')
exploration = dict()
for filename in parameter_files:
with open('exploration/' + filename, 'rb') as fp:
[scores, best_param, best_score] = pickle.load(fp)
exploration[filename[:-4]] = [scores, best_param, best_score]
As in the previous notebook was explained. The metric to measure the performance is the average of the diagonal of the confusion matrix normalized per row. Below we find the best combination of parameters for maxpyramidalvector, pyramidalvector, meanpvector, divcurlhistogram and the three strategies of hoof(in the same order)
print_exploration_rbf('svm_rbf_maxpvectorv3')
The best parameters are {'kernel': 'rbf', 'C': 2, 'gamma': 0.0009765625} with a score of 0.83
print_exploration_rbf('svm_rbf_pvectorv3')
The best parameters are {'kernel': 'rbf', 'C': 4, 'gamma': 3.0517578125e-05} with a score of 0.83
print_exploration_rbf('svm_rbf_meanpvectorv3')
The best parameters are {'kernel': 'rbf', 'C': 2, 'gamma': 0.0009765625} with a score of 0.83
print_exploration_rbf('svm_rbf_divcurlv3')
The best parameters are {'kernel': 'rbf', 'C': 2, 'gamma': 0.0009765625} with a score of 0.83
print_exploration_rbf('svm_rbf_hoofv3')
The best parameters are {'kernel': 'rbf', 'C': 8, 'gamma': 0.0078125} with a score of 0.39
print_exploration_rbf('svm_rbf_maxhoofv3')
The best parameters are {'kernel': 'rbf', 'C': 4, 'gamma': 0.0078125} with a score of 0.39
print_exploration_rbf('svm_rbf_pyrhoofv3')
The best parameters are {'kernel': 'rbf', 'C': 2, 'gamma': 0.000244140625} with a score of 0.69
This is a comparison of the strategies using svm lineal
print_exploration_lineal_complete(['svm_lineal_pvectorv3', 'svm_lineal_maxpvectorv3', 'svm_lineal_divcurlv3', 'svm_lineal_pyrhoofv3'])
print_exploration_lineal('svm_lineal_maxpvectorv3')
The best parameters are {'kernel': 'linear', 'C': 0.00390625} with a score of 0.80
print_exploration_lineal('svm_lineal_pvectorv3')
The best parameters are {'kernel': 'linear', 'C': 0.000244140625} with a score of 0.85
print_exploration_lineal('svm_lineal_divcurlv3')
The best parameters are {'kernel': 'linear', 'C': 0.00390625} with a score of 0.80
print_exploration_rf_complete(['rf_pvector', 'rf_maxpvector', 'rf_divcurl', 'rf_pyrhoof'])
print_exploration_rf('rf_pvector')
The best parameters are {'n_estimators': 10000} with a score of 0.81
print_exploration_rf('rf_maxpvector')
The best parameters are {'n_estimators': 100000} with a score of 0.80
print_exploration_rf('rf_divcurl')
The best parameters are {'n_estimators': 100000} with a score of 0.80
print_exploration_rf('rf_pyrhoof')
The best parameters are {'n_estimators': 100000} with a score of 0.67
SVM RBF: - Histogram of Divergence and Curl= 'C': 2, 'gamma': 0.0009765625 - Pyramid over Histogram of Divergence and Curl= 'C': 4, 'gamma': 3.0517578125e-05 - Max over Pyramid over Histogram of Divergence and Curl= 'C': 2, 'gamma': 0.0009765625 - Pyramid over HOOF= 'C': 2, 'gamma': 0.000244140625 SVM Lineal: - Histogram of Divergence and Curl= 'C': 0.00390625 - Pyramid over Histogram of Divergence and Curl= 'C': 0.000244140625 - Max over Pyramid over Histogram of Divergence and Curl= 'C': 0.00390625 - Pyramid over HOOF= None Random Forest: - Histogram of Divergence and Curl= n: 10000 - Pyramid over Histogram of Divergence and Curl= n: 10000 - Max over Pyramid over Histogram of Divergence and Curl= n: 10000 - Pyramid over HOOF= n: 10000
features = listfeatures['divcurl']['listfeatures']
estimator = svm.SVC(C=2, gamma= 0.0009765625)
test_features(features, estimator)
[[ 0.69230769 0.02564103 0.15384615 0.12820513] [ 0. 0.87234043 0.0212766 0.10638298] [ 0. 0.02564103 0.8974359 0.07692308] [ 0.08571429 0.11428571 0.2 0.6 ]] 0.765521003819
features = listfeatures['pvector']['listfeatures']
estimator = svm.SVC(C=2, gamma= 3.0517578125e-05)
test_features(features, estimator)
[[ 0.71794872 0.05128205 0.16666667 0.06410256] [ 0. 0.82978723 0.0212766 0.14893617] [ 0.02564103 0. 0.92307692 0.05128205] [ 0.05714286 0. 0.17142857 0.77142857]] 0.810560361624
features = listfeatures['maxpvector']['maxfeatures']
estimator = svm.SVC(C=2, gamma= 0.0009765625)
test_features(features, estimator)
[[ 0.69230769 0.02564103 0.15384615 0.12820513] [ 0. 0.87234043 0.0212766 0.10638298] [ 0. 0.02564103 0.8974359 0.07692308] [ 0.08571429 0.11428571 0.2 0.6 ]] 0.765521003819
features = hoof['pyrhoof']['pyrfeatures']
estimator = svm.SVC(C=2, gamma= 0.000244140625)
test_features(features, estimator)
[[ 0.76923077 0. 0.1025641 0.12820513] [ 0.0212766 0.87234043 0.0212766 0.08510638] [ 0.1025641 0. 0.56410256 0.33333333] [ 0.08571429 0.02857143 0.4 0.48571429]] 0.672847011145
With Lineal
features = listfeatures['divcurl']['listfeatures']
estimator = svm.SVC(kernel= 'linear', C=0.00390625)
test_features(features, estimator)
[[ 0.75641026 0.02564103 0.1025641 0.11538462] [ 0.0212766 0.78723404 0.0212766 0.17021277] [ 0. 0.05128205 0.87179487 0.07692308] [ 0.08571429 0.14285714 0.17142857 0.6 ]] 0.75385979269
features = listfeatures['pvector']['listfeatures']
estimator = svm.SVC(kernel= 'linear', C=0.000244140625)
test_features(features, estimator)
[[ 0.74358974 0.01282051 0.15384615 0.08974359] [ 0. 0.80851064 0.04255319 0.14893617] [ 0. 0. 0.92307692 0.07692308] [ 0.05714286 0. 0.22857143 0.71428571]] 0.797365754813
features = listfeatures['maxpvector']['maxfeatures']
estimator = svm.SVC(kernel= 'linear', C=0.00390625)
test_features(features, estimator)
[[ 0.75641026 0.02564103 0.1025641 0.11538462] [ 0.0212766 0.78723404 0.0212766 0.17021277] [ 0. 0.05128205 0.87179487 0.07692308] [ 0.08571429 0.14285714 0.17142857 0.6 ]] 0.75385979269
Using Random Forest
features = listfeatures['divcurl']['listfeatures']
estimator = RandomForestClassifier(n_estimators=10000, n_jobs=-1)
test_features(features, estimator)
[[ 0.69230769 0.05128205 0.15384615 0.1025641 ] [ 0. 0.85106383 0.06382979 0.08510638] [ 0. 0.02564103 0.92307692 0.05128205] [ 0.11428571 0.14285714 0.28571429 0.45714286]] 0.730897825579
features = listfeatures['pvector']['listfeatures']
estimator = RandomForestClassifier(n_estimators=10000, n_jobs=-1)
test_features(features, estimator)
[[ 0.73076923 0.05128205 0.20512821 0.01282051] [ 0. 0.80851064 0.08510638 0.10638298] [ 0. 0. 0.94871795 0.05128205] [ 0.08571429 0.02857143 0.28571429 0.6 ]] 0.771999454446
features = listfeatures['maxpvector']['maxfeatures']
estimator = RandomForestClassifier(n_estimators=10000, n_jobs=-1)
test_features(features, estimator)
[[ 0.69230769 0.05128205 0.16666667 0.08974359] [ 0. 0.85106383 0.06382979 0.08510638] [ 0. 0.02564103 0.92307692 0.05128205] [ 0.11428571 0.11428571 0.25714286 0.51428571]] 0.745183539864
features = hoof['pyrhoof']['pyrfeatures']
estimator = RandomForestClassifier(n_estimators=10000, n_jobs=-1)
test_features(features, estimator)
[[ 0.62820513 0.02564103 0.1025641 0.24358974] [ 0. 0.9787234 0.0212766 0. ] [ 0. 0.05128205 0.58974359 0.35897436] [ 0.22857143 0.05714286 0.37142857 0.34285714]] 0.634882316265
As a miscelaneous we used a function that receives a scikit-learn estimator, like Random Forest or SVM, a set of features and return the average accuracy
trainXpvector.shape
(594, 10752)
For instance the following is the mean values of the best feature reported in test, a.k.a, Pyramidal layout over histogram of divergence, curl and negative divergence. Its shape is 10752, corresponding to 768bins * 14 layouts
label = np.array([[x] for x in trainlabels])
histdf = np.concatenate((trainXpvector,label), axis=1)
histdf = pd.DataFrame(histdf)
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 0]
class1 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 1]
class2 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 2]
class3 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 3]
class4 = tmp.ix[:,:len(tmp.columns)-2]
fig = plt.figure(figsize=(17,10))
plt.plot(range(10752), np.mean(class1, axis=0), 'r-', label='Vortices')
plt.plot(range(10752), np.mean(class2, axis=0), 'b-', label='Divergencias')
plt.plot(range(10752), np.mean(class3, axis=0), 'g-', label='Confluencias')
plt.plot(range(10752), np.mean(class4, axis=0), 'y-', label='Puntos de Silla')
#plt.axis([0, 6, 0, 20])
plt.legend()
plt.show()
This are randomly chosen examples of each class
label = np.array([[x] for x in trainlabels])
histdf = np.concatenate((trainXpvector,label), axis=1)
fig = plt.figure(figsize=(17,10))
plt.plot(range(256), trainXpvector[30][:256], 'r-', label='Vortices')
plt.plot(range(256), trainXpvector[40][:256], 'b-', label='Divergencias')
plt.plot(range(256), trainXpvector[100][:256], 'g-', label='Confluencias')
plt.plot(range(256), trainXpvector[200][:256], 'y-', label='Puntos de Silla')
#plt.axis([0, 6, 0, 20])
plt.legend()
plt.show()
The following is a comparison of the first layout of the pyramid(1x1) in terms of the divergence descriptor.
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True, sharey=True)
f.set_size_inches(15,10)
ax1.plot(range(256), trainXpvector[30][:256], 'r-', label='Vortex')
ax1.plot(range(256), trainXpvector[40][:256], 'b-', label='Difluence')
ax1.set_title('1x1 layout Histogram of divergence')
ax1.legend()
ax2.plot(range(256), trainXpvector[40][:256], 'b-', label='Difluence')
ax2.plot(range(256), trainXpvector[100][:256], 'g-', label='Confluence')
ax2.legend()
ax3.plot(range(256), trainXpvector[100][:256], 'g-', label='Confluence')
ax3.plot(range(256), trainXpvector[30][:256], 'r-', label='Vortex')
ax3.legend()
ax4.plot(range(256), trainXpvector[30][:256], 'r-', label='Vortex')
ax4.plot(range(256), trainXpvector[200][:256], 'y-', label='Saddle Point')
ax4.legend()
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
f.subplots_adjust(hspace=0.2)
#plt.legend()
plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
plt.savefig('foo.png', bbox_inches='tight')
The following one is a comparison of the histogram of curl at the first layout of the pyramidal representation
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True, sharey=True)
f.set_size_inches(15,10)
ax1.plot(range(256), trainXpvector[30][256:512], 'r-', label='Vortex')
ax1.plot(range(256), trainXpvector[40][256:512], 'b-', label='Difluence')
ax1.set_title('1x1 layout Histogram of curl')
ax1.legend()
ax2.plot(range(256), trainXpvector[40][256:512], 'b-', label='Difluence')
ax2.plot(range(256), trainXpvector[100][256:512], 'g-', label='Confluence')
ax2.legend()
ax3.plot(range(256), trainXpvector[100][256:512], 'g-', label='Confluence')
ax3.plot(range(256), trainXpvector[30][256:512], 'r-', label='Vortex')
ax3.legend()
ax4.plot(range(256), trainXpvector[30][256:512], 'r-', label='Vortex')
ax4.plot(range(256), trainXpvector[200][256:512], 'y-', label='Saddle Point')
ax4.legend()
# Fine-tune figure; make subplots close to each other and hide x ticks for
# all but bottom plot.
f.subplots_adjust(hspace=0.2)
#plt.legend()
plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
plt.savefig('foo.png', bbox_inches='tight')
Before histogram of optical flow there were proposed several baselines. Let's see how well they behave or differentiate. This strategy consists of a joint histogram of magnitude and direction. In the axe of magnitud we have as the minimum value 0 and the maximum value is 15. In the axe of direction the minimum value for the histogram is -180° and the maximum value is 180°. All this data is discretized in a histogram of 16 bins for magnitude and 16 bins for direction.
jointhistogram = scipy.io.loadmat('../data/histv6/JointHistogram.mat')
pyramidalhistogram = scipy.io.loadmat('../data/histv6/PyramidalJointHistogram.mat')
baselinefeatures = {'joint': jointhistogram, 'pyramidal': pyramidalhistogram}
features = baselinefeatures['joint']['listfeatures']
Xjoint = np.asarray([x[0][0] for x in features])
labelmag = np.asarray([x[1][0][0] for x in features])
label = np.array([[x] for x in labelmag])
histdf = np.concatenate((Xjoint,label), axis=1)
histdf = pd.DataFrame(histdf)
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 0]
class1 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 1]
class2 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 2]
class3 = tmp.ix[:,:len(tmp.columns)-2]
tmp = histdf[histdf.ix[:,len(histdf.columns)-1] == 3]
class4 = tmp.ix[:,:len(tmp.columns)-2]
m1=np.reshape(np.mean(class1),(16,16))
m2=np.reshape(np.mean(class2),(16,16))
m3=np.reshape(np.mean(class3),(16,16))
m4=np.reshape(np.mean(class4),(16,16))
fig = plt.figure(figsize=(18,10))
ax = plt.subplot(1,4,1)
ax.matshow(m1)
ax.set_title('Vorticidad')
#ax.set_xticks(tick_marks, target_names)
#ax.set_yticks(tick_marks, target_names)
ax.set_ylabel('Direction')
ax.set_xlabel('Magnitude')
ax = plt.subplot(1,4,2)
ax.matshow(m2)
ax.set_title('Divergencia')
#ax.set_xticks(tick_marks, target_names)
#ax.set_yticks(tick_marks, target_names)
ax.set_ylabel('Direction')
ax.set_xlabel('Magnitude')
ax = plt.subplot(1,4,3)
im = ax.matshow(m3)
ax.set_title('Confluencia')
#ax.set_xticks(tick_marks, target_names)
#ax.set_yticks(tick_marks, target_names)
ax.set_ylabel('Direction')
ax.set_xlabel('Magnitude')
ax = plt.subplot(1,4,4)
im = ax.matshow(m3)
ax.set_title('Puntos de silla')
#ax.set_xticks(tick_marks, target_names)
#ax.set_yticks(tick_marks, target_names)
ax.set_ylabel('Direction')
ax.set_xlabel('Magnitude')
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(im, cax=cbar_ax)
plt.show()
It's worth use them as classifiers to establish a better baseline than Histogram of Oriented Optical flow. Below we can find the parameter exploration done with this baseline features.
parameter_files = listdir('exploration_joint/')
exploration = dict()
for filename in parameter_files:
with open('exploration_joint/' + filename, 'rb') as fp:
[scores, best_param, best_score] = pickle.load(fp)
exploration[filename[:-4]] = [scores, best_param, best_score]
There were two basic strategies explored: pyramid(1x1, 2x2, 3x3) of the joint(2D) histogram visualized above over the whole annotation; And the other one was the simple histogram over the whole annotation. It was explored using rbf svm, lineal svm and random forest. Below you can find simple joint histogram parameter exploration followed by pyramidal of the 2D histogram parameter exploration.
print_exploration_rbf('rbf_join')
The best parameters are {'kernel': 'rbf', 'C': 32, 'gamma': 0.0001220703125} with a score of 0.48
print_exploration_rbf('rbf_pyrjoint')
The best parameters are {'kernel': 'rbf', 'C': 4, 'gamma': 0.000244140625} with a score of 0.81
Below there is a comparison of strategies using random forests and svm lineal
print_exploration_rf_complete(['rf_join', 'rf_pyrjoint'])
print_exploration_lineal_complete(['lineal_join', 'lineal_pyrjoint'])
features = baselinefeatures['pyramidal']['listfeatures']
estimator = svm.SVC(kernel='linear', C=0.000488281)
test_features(features, estimator)
[[ 0.66666667 0. 0.1025641 0.23076923] [ 0.0212766 0.93617021 0. 0.04255319] [ 0.07692308 0. 0.82051282 0.1025641 ] [ 0.31428571 0.02857143 0.08571429 0.57142857]] 0.748694567844
features = baselinefeatures['pyramidal']['listfeatures']
estimator = svm.SVC(C=4, gamma=0.000244140625)
test_features(features, estimator)
[[ 0.52564103 0.30769231 0.06410256 0.1025641 ] [ 0. 0.9787234 0. 0.0212766 ] [ 0.07692308 0.15384615 0.69230769 0.07692308] [ 0.31428571 0.02857143 0.08571429 0.57142857]] 0.692025173408
jointfeatures = baselinefeatures['pyramidal']['listfeatures']
estimator = RandomForestClassifier(n_estimators=1000)
test_features(jointfeatures, estimator)
[[ 0.75641026 0. 0.1025641 0.14102564] [ 0. 0.78723404 0. 0.21276596] [ 0. 0. 0.92307692 0.07692308] [ 0. 0.05714286 0.17142857 0.77142857]] 0.809537448367
pyrdivcurlfeatures = listfeatures['pvector']['listfeatures']
estimator = svm.SVC(C=2, gamma= 3.0517578125e-05)
test_features(pyrdivcurlfeatures, estimator)
[[ 0.71794872 0.05128205 0.16666667 0.06410256] [ 0. 0.82978723 0.0212766 0.14893617] [ 0.02564103 0. 0.92307692 0.05128205] [ 0.05714286 0. 0.17142857 0.77142857]] 0.810560361624
jointfeatures = baselinefeatures['pyramidal']['listfeatures']
pyrdivcurlfeatures = listfeatures['pvector']['listfeatures']
estimator = RandomForestClassifier(n_estimators=1000)
test_combined_features(jointfeatures, pyrdivcurlfeatures, estimator)
[[ 0.71794872 0.03846154 0.21794872 0.02564103] [ 0. 0.82978723 0.04255319 0.12765957] [ 0. 0. 0.94871795 0.05128205] [ 0.08571429 0. 0.28571429 0.62857143]] 0.78125633232
jointfeatures = baselinefeatures['pyramidal']['listfeatures']
pyrdivcurlfeatures = listfeatures['pvector']['listfeatures']
estimator = svm.SVC(C=2, gamma= 3.0517578125e-05)
test_combined_features(jointfeatures, pyrdivcurlfeatures, estimator)
[[ 0.73076923 0.15384615 0.07692308 0.03846154] [ 0. 0.91489362 0. 0.08510638] [ 0.02564103 0.02564103 0.8974359 0.05128205] [ 0.08571429 0. 0.17142857 0.74285714]] 0.821488972021
Below we can see the comparison of the best strategies used for classification reported on test.
comp_results = pd.DataFrame(columns=['Test','Vorticity','Divergence', 'Convergence', 'Saddle Point'])
comp_results.loc[len(comp_results),:] = [0.82, 0.73, 0.91, 0.89, 0.74]
comp_results.loc[len(comp_results),:] = [0.81, 0.71, 0.82, 0.92, 0.77]
comp_results.loc[len(comp_results),:] = [0.80, 0.75, 0.78, 0.92, 0.77]
comp_results.loc[len(comp_results),:] = [0.74, 0.66, 0.93, 0.82, 0.57]
comp_results.index = ['Pyramid over Joint + Pyramid over DivCurl', 'Pyramidal over DivCurl', \
'Pyramid over Joint(Using RF)', 'Pyramid over Joint(Using SVM linear)']
comp_results
Test | Vorticity | Divergence | Convergence | Saddle Point | |
---|---|---|---|---|---|
Pyramid over Joint + Pyramid over DivCurl | 0.82 | 0.73 | 0.91 | 0.89 | 0.74 |
Pyramidal over DivCurl | 0.81 | 0.71 | 0.82 | 0.92 | 0.77 |
Pyramid over Joint(Using RF) | 0.8 | 0.75 | 0.78 | 0.92 | 0.77 |
Pyramid over Joint(Using SVM linear) | 0.74 | 0.66 | 0.93 | 0.82 | 0.57 |
from misc.oversampling import *
from misc.utils import *
%matplotlib inline
def test_combined_features(f_features, s_features, estimator):
Xfirst = np.asarray([x[0][0] for x in f_features])
Xsecond = np.asarray([x[0][0] for x in s_features])
Xpvector = np.concatenate((Xfirst, Xsecond), axis=1)
trainXpvector = np.append(Xpvector[:214], Xpvector[413:],axis=0)
testXpvector = Xpvector[214:413]
clftest = estimator
X = trainXpvector
scaler = StandardScaler()
X = scaler.fit_transform(X)
clftest.fit(X, trainlabels)
Xtest = scaler.transform(testXpvector)
result = clftest.predict(Xtest)
print get_average_precision(testlabels, result, printable=True)
def print_exploration_rbf(name):
scores = [x[1] for x in exploration[name][0]]
scores = np.array(scores).reshape(len(Cparameters), len(Gammaparameters))
plt.figure(figsize=(8, 6))
plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,
norm=MidpointNormalize(vmin=0.21, midpoint=0.80))
plt.xlabel('Gamma')
plt.ylabel('C')
plt.colorbar()
plt.yticks(np.arange(len(Cparameters)), Cparameters)
plt.xticks(np.arange(len(Gammaparameters)), Gammaparameters, rotation=45)
plt.title('Validation accuracy')
plt.show()
print("The best parameters are %s with a score of %0.2f"
% (exploration[name][1], exploration[name][2]))
def print_exploration_rf(name):
scores = [x[1] for x in exploration[name][0]]
plt.figure(figsize=(8, 6))
plt.plot(scores, 'r-')
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.xticks(np.arange(len(nparameters)), nparameters, rotation=45)
plt.title('Validation accuracy')
plt.show()
print("The best parameters are %s with a score of %0.2f"
% (exploration[name][1], exploration[name][2]))
def print_exploration_rf_complete(rf_exploration):
plt.figure(figsize=(8, 6))
for class_name in rf_exploration:
scores = [x[1] for x in exploration[class_name][0]]
line1, = plt.plot(scores, label=class_name)
plt.xlabel('N - estimators')
plt.ylabel('Accuracy')
plt.xticks(np.arange(len(nparameters)), nparameters, rotation=45)
plt.title('Validation accuracy')
plt.legend(bbox_to_anchor=(1.2, 0.9), bbox_transform=plt.gcf().transFigure)
plt.show()
def print_exploration_lineal(name):
scores = [x[1] for x in exploration[name][0]]
plt.figure(figsize=(8, 6))
plt.plot(scores, '-')
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.xticks(np.arange(len(Cparameters)), Cparameters, rotation=45)
plt.title('Validation accuracy')
plt.show()
print("The best parameters are %s with a score of %0.2f"
% (exploration[name][1], exploration[name][2]))
def print_exploration_lineal_complete(lineal_exploration):
plt.figure(figsize=(8, 6))
for class_name in lineal_exploration:
scores = [x[1] for x in exploration[class_name][0]]
line1, = plt.plot(scores, label=class_name)
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.xticks(np.arange(len(Cparameters)), Cparameters, rotation=45)
plt.title('Validation accuracy')
plt.legend(bbox_to_anchor=(1.3, 0.9), bbox_transform=plt.gcf().transFigure)
plt.show()
def test_features(features, estimator):
Xpvector = np.asarray([x[0][0] for x in features])
trainXpvector = np.append(Xpvector[:214], Xpvector[413:],axis=0)
testXpvector = Xpvector[214:413]
clftest = estimator
X = trainXpvector
scaler = StandardScaler()
X = scaler.fit_transform(X)
clftest.fit(X, trainlabels)
Xtest = scaler.transform(testXpvector)
result = clftest.predict(Xtest)
print get_average_precision(testlabels, result, printable=True)
from os import listdir