0

这是我为计算 SVM 编写的代码。

from matplotlib import style
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
style.use('ggplot')

class SVM:
    def __init__ (self, visualization=True):
        self.color = { 1:'r', -1:'b'}
        if visualization:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)

    #train

    def fit(self, data):
        self.data = data
        opt_dict = {}
        transforms = [ [1,1],
                     [-1, 1],
                     [-1, -1],
                     [1,-1]]

        all_data=[]
        for yi in self.data:
            for featureset in self.data[yi]:
                for feature in featureset:
                    all_data.append(feature)

        self.max_feature_value= max(all_data)
        self.min_feature_value= min(all_data)
        all_data = None

        step_sizes = [self.max_feature_value*0.1,
                    self.max_feature_value*0.01,
                    self.max_feature_value*0.001,]

        b_range_multiple = 5
        b_multiple = 5
        latest_optimum = self.max_feature_value*10

        for step in step_sizes:
            w= np.array([latest_optimum, latest_optimum])
            #we can do this because it is convex
            optimized = False
            while not optimized:
                for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
                                    self.max_feature_value*b_range_multiple,
                                    step*b_multiple):
                    for transformation in transforms:
                        w_t = w*transformation
                        #weak link in the SVM fundamentally
                        # SMO tries to fix this a bit
                        # yi(xi.w+b)
                        for i in self.data:
                            for xi in self.data[i]:
                                yi=i
                                if not yi*(np.dot(w_t, xi) + b) >= 1:
                                    found_option = False

                        if found_option:
                            opt_dict[np.linalg.norm(w_t)] = [w_t, b]

                if w[0]<0:
                    optimized = True;
                    print ('Optimized a step')
                else:
                    w=w-step

                norms = sorted([n for n in opt_dict])

                opt_choice = opt_dict[norms[0]]
                self.w = opt_choice[0]
                self.w = opt_choice[1]
                latest_optimum = opt_choice[0][0]+ step*2

            def predict(self, features):
                classification = np.sign(np.dot(np.array(features), self.w)+ self.b)
                if classification !=0 and self.visualization:
                    self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification])
                return classification

        def visualize(self):
            [[self.ax.scatter(x[0], x[1], s=100, color=self.colors[i]) for x in data_dict[i]] for i in data_dict]

            #hyperplane
            # v = x.w + b
            #psv = 1
            #nsv = -1
            # dec = 0
            def hyperplane(x, w, b, v):
                return (-w[0]*x + b -v)/w[1]
            datarange = (self.min_feature_value*0.9, self.min_feature_value*1.1)
            hyp_x_min = datarange[0]
            hyp_x_max = datarange[1]


            # (w.x + b) = 1
            # positive support vector hyperplanes
            psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
            psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
            self.ax.plot([hyp_x_min, hyp_x_max, [psv1, psv2]])

            # (w.x + b) = 0
            # negative support vector hyperplanes
            nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
            nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
            self.ax.plot([hyp_x_min, hyp_x_max, [nsv1, nsv2]])

            # (w.x + b) = 0
            db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
            db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
            self.ax.plot([hyp_x_min, hyp_x_max, [db1, db2]])

            plt.show()

data_dict = { -1: np.array([[1,7], [2,8], [5,8]]), 1: np.array([[5,1], [6, -1], [7,3]])}

svm = SVM()
svm.fit(data=data_dict)
svm.visualize()

它显示行号的此错误 - 72。

opt_choice = opt_dict[norms[0]]
IndexError: list index out of range

相关参数几乎在第 120 行结束。 svm.fit(data=data_dict)

我在视频教程之后编写的这段代码,但它在视频中运行良好,但它给出了一个错误。

4

1 回答 1

0

好的,我想我明白了。问题是您仅在为 Trueopt_dict时才添加新元素。found_option但是,你从来没有设置found_option为 True,所以我认为opt_dict它总是空的 :)

是循环found_option中的局部变量。for

于 2017-07-12T13:17:10.627 回答