1

我正在尝试在 PyCharm 中运行 Jupyter 脚本。该脚本需要 +- 2 小时才能运行。当我第一次运行它时,我的内存用完了,我退出了 pyCharm。我从我的 PC 中删除了一些文件并再次启动了 PyCharm。当我打开我的脚本并再次运行代码时,它给了我一个错误:突然它不再识别我的类“Progressbar”,而它在我第一次运行脚本时识别它。我以前没有这个错误。有谁知道这里发生了什么?

这是脚本:

import sys
import collections
import itertools
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mode
from scipy.spatial.distance import squareform
import pandas as p

plt.style.use('bmh')
%matplotlib inline

try:
    from IPython.display import clear_output
    have_ipython = True
except ImportError:
    have_ipython = False

class KnnDtw(object):
    """K-nearest neighbor classifier using dynamic time warping
    as the distance measure between pairs of time series arrays
    
    Arguments
    ---------
    n_neighbors : int, optional (default = 5)
        Number of neighbors to use by default for KNN
        
    max_warping_window : int, optional (default = infinity)
        Maximum warping window allowed by the DTW dynamic
        programming function
            
    subsample_step : int, optional (default = 1)
        Step size for the timeseries array. By setting subsample_step = 2,
        the timeseries length will be reduced by 50% because every second
        item is skipped. Implemented by x[:, ::subsample_step]
    """
    
    def __init__(self, n_neighbors=5, max_warping_window=10000, subsample_step=1):
        self.n_neighbors = n_neighbors
        self.max_warping_window = max_warping_window
        self.subsample_step = subsample_step
    
    def fit(self, x, l):
        """Fit the model using x as training data and l as class labels
        
        Arguments
        ---------
        x : array of shape [n_samples, n_timepoints]
            Training data set for input into KNN classifer
            
        l : array of shape [n_samples]
            Training labels for input into KNN classifier
        """
        
        self.x = x
        self.l = l
        
    def _dtw_distance(self, ts_a, ts_b, d = lambda x,y: abs(x-y)):
        """Returns the DTW similarity distance between two 2-D
        timeseries numpy arrays.

        Arguments
        ---------
        ts_a, ts_b : array of shape [n_samples, n_timepoints]
            Two arrays containing n_samples of timeseries data
            whose DTW distance between each sample of A and B
            will be compared
        
        d : DistanceMetric object (default = abs(x-y))
            the distance measure used for A_i - B_j in the
            DTW dynamic programming function
        
        Returns
        -------
        DTW distance between A and B
        """

        # Create cost matrix via broadcasting with large int
        ts_a, ts_b = np.array(ts_a), np.array(ts_b)
        M, N = len(ts_a), len(ts_b)
        cost = sys.maxsize * np.ones((M, N))

        # Initialize the first row and column
        cost[0, 0] = d(ts_a[0], ts_b[0])
        for i in range(1, M):
            cost[i, 0] = cost[i-1, 0] + d(ts_a[i], ts_b[0])

        for j in range(1, N):
            cost[0, j] = cost[0, j-1] + d(ts_a[0], ts_b[j])

        # Populate rest of cost matrix within window
        for i in range(1, M):
            for j in range(max(1, i - self.max_warping_window),
                            min(N, i + self.max_warping_window)):
                choices = cost[i - 1, j - 1], cost[i, j-1], cost[i-1, j]
                cost[i, j] = min(choices) + d(ts_a[i], ts_b[j])

        # Return DTW distance given window 
        return cost[-1, -1]
    
    def _dist_matrix(self, x, y):
        """Computes the M x N distance matrix between the training
        dataset and testing dataset (y) using the DTW distance measure
        
        Arguments
        ---------
        x : array of shape [n_samples, n_timepoints]
        
        y : array of shape [n_samples, n_timepoints]
        
        Returns
        -------
        Distance matrix between each item of x and y with
            shape [training_n_samples, testing_n_samples]
        """
        
        # Compute the distance matrix        
        dm_count = 0
        
        # Compute condensed distance matrix (upper triangle) of pairwise dtw distances
        # when x and y are the same array
        if(np.array_equal(x, y)):
            x_s = np.shape(x)
            dm = np.zeros((x_s[0] * (x_s[0] - 1)) // 2, dtype=np.double)
            
            p = ProgressBar(np.shape(dm)[0])
            
            for i in range(0, x_s[0] - 1):
                for j in range(i + 1, x_s[0]):
                    dm[dm_count] = self._dtw_distance(x[i, ::self.subsample_step],
                                                      y[j, ::self.subsample_step])
                    
                    dm_count += 1
                    p.animate(dm_count)
            
            # Convert to squareform
            dm = squareform(dm)
            return dm
        
        # Compute full distance matrix of dtw distnces between x and y
        else:
            x_s = np.shape(x)
            y_s = np.shape(y)
            dm = np.zeros((x_s[0], y_s[0])) 
            dm_size = x_s[0]*y_s[0]
            
            p = ProgressBar(dm_size)
        
            for i in range(0, x_s[0]):
                for j in range(0, y_s[0]):
                    dm[i, j] = self._dtw_distance(x[i, ::self.subsample_step],
                                                  y[j, ::self.subsample_step])
                    # Update progress bar
                    dm_count += 1
                    p.animate(dm_count)
        
            return dm
        
    def predict(self, x):
        """Predict the class labels or probability estimates for 
        the provided data

        Arguments
        ---------
          x : array of shape [n_samples, n_timepoints]
              Array containing the testing data set to be classified
          
        Returns
        -------
          2 arrays representing:
              (1) the predicted class labels 
              (2) the knn label count probability
        """
        
        dm = self._dist_matrix(x, self.x)

        # Identify the k nearest neighbors
        knn_idx = dm.argsort()[:, :self.n_neighbors]

        # Identify k nearest labels
        knn_labels = self.l[knn_idx]
        
        # Model Label
        mode_data = mode(knn_labels, axis=1)
        mode_label = mode_data[0]
        mode_proba = mode_data[1]/self.n_neighbors

        return mode_label.ravel(), mode_proba.ravel()

class ProgressBar:
    """This progress bar was taken from PYMC
    """
    def __init__(self, iterations):
        self.iterations = iterations
        self.prog_bar = '[]'
        self.fill_char = '*'
        self.width = 40
        self.__update_amount(0)
        if have_ipython:
            self.animate = self.animate_ipython
        else:
            self.animate = self.animate_noipython

    def animate_ipython(self, iter):
        print ('\r', self,
        sys.stdout.flush())
        self.update_iteration(iter + 1)

    def update_iteration(self, elapsed_iter):
        self.__update_amount((elapsed_iter / float(self.iterations)) * 100.0)
        self.prog_bar += '  %d of %s complete' % (elapsed_iter, self.iterations)

    def __update_amount(self, new_amount):
        percent_done = int(round((new_amount / 100.0) * 100.0))
        all_full = self.width - 2
        num_hashes = int(round((percent_done / 100.0) * all_full))
        self.prog_bar = '[' + self.fill_char * num_hashes + ' ' * (all_full - num_hashes) + ']'
        pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
        pct_string = '%d%%' % percent_done
        self.prog_bar = self.prog_bar[0:pct_place] + \
            (pct_string + self.prog_bar[pct_place + len(pct_string):])

    def __str__(self):
        return str(self.prog_bar)
    
    time = np.linspace(0,20,1000)
    amplitude_a = 5*np.sin(time)
    amplitude_b = 3*np.sin(time + 1)

    m = KnnDtw()
    distance = m._dtw_distance(amplitude_a, amplitude_b)

    fig = plt.figure(figsize=(12,4))
    _ = plt.plot(time, amplitude_a, label='A')
    _ = plt.plot(time, amplitude_b, label='B')
    _ = plt.title('DTW distance between A and B is %.2f' % distance)
    _ = plt.ylabel('Amplitude')
    _ = plt.xlabel('Time')
    _ = plt.legend()

    #m._dist_matrix(np.random.random((4,50)), np.random.random((4,50)))

    # Import the HAR dataset
    x_train_file = open('UCI HAR Dataset/train/X_train.txt', 'r')
    y_train_file = open('UCI HAR Dataset/train/y_train.txt', 'r')

    x_test_file = open('UCI HAR Dataset/test/X_test.txt', 'r')
    y_test_file = open('UCI HAR Dataset/test/y_test.txt', 'r')

    # Create empty lists
    x_train = []
    y_train = []
    x_test = []
    y_test = []

    # Mapping table for classes
    labels = {1:'WALKING', 2:'WALKING UPSTAIRS', 3:'WALKING DOWNSTAIRS',
              4:'SITTING', 5:'STANDING', 6:'LAYING'}

    # Loop through datasets
    for x in x_train_file:
        x_train.append([float(ts) for ts in x.split()])

    for y in y_train_file:
        y_train.append(int(y.rstrip('\n')))

    for x in x_test_file:
        x_test.append([float(ts) for ts in x.split()])

    for y in y_test_file:
        y_test.append(int(y.rstrip('\n')))

    # Convert to numpy for efficiency
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    x_test = np.array(x_test)
    y_test = np.array(y_test)

    m = KnnDtw(n_neighbors=1, max_warping_window=10)
    m.fit(x_train[::10], y_train[::10])
    label, proba = m.predict(x_test[::10])

    from sklearn.metrics import classification_report, confusion_matrix
    # print(classification_report(label, y_test[::10], target_names=[l for l in labels.values()]))

    conf_mat = confusion_matrix(label, y_test[::10])

    fig = plt.figure(figsize=(6,6))
    width = np.shape(conf_mat)[1]
    height = np.shape(conf_mat)[0]

    res = plt.imshow(np.array(conf_mat), cmap=plt.cm.summer, interpolation='nearest')
    for i, row in enumerate(conf_mat):
        for j, c in enumerate(row):
            if c>0:
                plt.text(j-.2, i+.1, c, fontsize=16)

    cb = fig.colorbar(res)
    plt.title('Confusion Matrix')
    _ = plt.xticks(range(6), [l for l in labels.values()], rotation=90)
    _ = plt.yticks(range(6), [l for l in labels.values()])

    import time

    time_taken = []
    windows = [1,2,5,10,50,100,500,1000,5000]

    for w in windows:
        begin = time.time()

        t = KnnDtw(n_neighbors=1, max_warping_window=w)
        t.fit(x_train[:20], y_train[:20])
        label, proba = t.predict(x_test[:20])

        end = time.time()
        time_taken.append(end - begin)

    fig = plt.figure(figsize=(12,5))
    _ = plt.plot(windows, [t/400. for t in time_taken], lw=4)
    plt.title('DTW Execution Time with \nvarying Max Warping Window')
    plt.ylabel('Execution Time (seconds)')
    plt.xlabel('Max Warping Window')
    plt.xscale('log')

错误代码:

---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

<ipython-input-3-4c142a5156b6> in <module>
    188         return mode_label.ravel(), mode_proba.ravel()
    189 
--> 190 class ProgressBar:
    191     """This progress bar was taken from PYMC
    192     """

<ipython-input-3-4c142a5156b6> in ProgressBar()
    279     m = KnnDtw(n_neighbors=1, max_warping_window=10)
    280     m.fit(x_train[::10], y_train[::10])
--> 281     label, proba = m.predict(x_test[::10])
    282 
    283     from sklearn.metrics import classification_report, confusion_matrix

<ipython-input-3-4c142a5156b6> in predict(self, x)
    173         """
    174 
--> 175         dm = self._dist_matrix(x, self.x)
    176 
    177         # Identify the k nearest neighbors

<ipython-input-3-4c142a5156b6> in _dist_matrix(self, x, y)
    145             dm_size = x_s[0]*y_s[0]
    146 
--> 147             p = ProgressBar(dm_size)
    148 
    149             for i in range(0, x_s[0]):

NameError: name 'ProgressBar' is not defined
4

0 回答 0