2

我正在使用本教程使用 HoG 功能构建用于对象识别的 Adaboost.SAMME 分类器。这是我下面的代码,大部分只是根据我的问题定制了最上面的部分,其他大部分都和教程中的一样。这是我正在做的一个非常小的测试,总共只有 17 个图像,10 个用于训练,7 个用于测试。一旦我启动并运行它,我将添加更多图像以进行适当的训练。

import sys from scipy 
import misc, ndimage from skimage 
import data, io, filter, color, exposure 
from skimage.viewer import ImageViewer 
from skimage.feature import hog from skimage.transform 
import resize import matplotlib.pyplot as plt 
from sklearn.datasets import make_gaussian_quantiles 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.externals.six.moves import xrange 
from sklearn.metrics import accuracy_score 
from sklearn.tree import DecisionTreeClassifier 
import pylab as pl from sklearn.externals.six.moves 
import zip

f = open("PATH_TO_LIST_OF_SAMPLES\\samples.txt",'r') 
out = f.read().splitlines() import numpy as np

imgs = [] tmp_hogs = []
#tmp_hogs = np.zeros((17,1728)) labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]

i=0 for file in out:
        filepath = "PATH_TO_IMAGES\\imgs\\"
        readfile = filepath + file
        curr_img = color.rgb2gray(io.imread(readfile))
        imgs.append(curr_img)
        fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
                 cells_per_block=(1, 1), visualise=True, normalise=True)
        tmp_hogs.append(fd) 
        i+=1
        img_hogs = np.array(tmp_hogs, dtype =float) 

n_split = 10 
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:]) 
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])


#now all the code below is straight off the example on scikit-learn's website

bdt_real = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1)

bdt_discrete = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2),
    n_estimators=600,
    learning_rate=1.5,
    algorithm="SAMME")

bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)

real_test_errors = []
discrete_test_errors = []

for real_test_predict, discrete_train_predict in zip(
        bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
    real_test_errors.append(
        1. - accuracy_score(real_test_predict, y_test))
    discrete_test_errors.append(
        1. - accuracy_score(discrete_train_predict, y_test))

n_trees = xrange(1, len(bdt_discrete) + 1)

pl.figure(figsize=(15, 5))

pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
        linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')

pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
        max(bdt_real.estimator_errors_.max(),
            bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))

# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()

但我收到以下错误:

Traceback (most recent call last):
  File "C:\Users\app\Documents\Python Scripts\carclassify.py", line 101, in <module>
    pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\pyplot.py", line 2987, in plot
    ret = ax.plot(*args, **kwargs)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 4137, in plot
    for line in self._get_lines(*args, **kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 317, in _grab_next_args
    for seg in self._plot_args(remaining, kwargs):
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 295, in _plot_args
    x, y = self._xy_from_xy(x, y)
  File "C:\Users\app\Anaconda\lib\site-packages\matplotlib\axes.py", line 237, in _xy_from_xy
    raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension

所以我在代码教程部分之前添加了这些行,以便查看 X 和 Y 数组的维度:

print X_train.shape 
print y_train.shape
print X_test.shape 
print y_test.shape

输出是:

(10L, 48L)
(10L,)
(7L, 48L)
(7L,)

但我不确定错误中的 x 和 y 是否指的是我的 X 和 y……因为训练和测试数据集具有不同大小肯定是正常的。我究竟做错了什么?

4

0 回答 0