4

我正在尝试运行以下脚本,据报道该脚本适用于某些人:

"""Preprocessing script.

This script walks over the directories and dump the frames into a csv file
"""
import os
import csv
import sys
import random
import scipy
from scipy import misc as m
import numpy as np
import dicom
from skimage import io, transform
from joblib import Parallel, delayed
import dill

os.chdir("P:\\hackr\\dsb")

def mkdir(fname):
   try:
       os.mkdir(fname)
   except:
       pass

def get_frames(root_path):
   """Get path to all the frame in view SAX and contain complete frames"""
   ret = []
   for root, _, files in os.walk(root_path):
       if len(files) == 0 or not files[0].endswith(".dcm") or root.find("sax") == -1:
           continue
       prefix = files[0].rsplit('-', 1)[0]
       fileset = set(files)
       expected = ["%s-%04d.dcm" % (prefix, i + 1) for i in range(30)]
       if all(x in fileset for x in expected):
           ret.append([root + "/" + x for x in expected])
   # sort for reproduciblity
   return sorted(ret, key = lambda x: x[0])


def get_label_map(fname):
   labelmap = {}
   fi = open(fname)
   fi.readline()
   for line in fi:
       arr = line.split(',')
       labelmap[int(arr[0])] = line
   return labelmap


def write_label_csv(fname, frames, label_map):
   fo = open(fname, "w")
   for lst in frames:
       index = int(lst[0].split("\\")[1]) #changed from split("/")[3]
       if label_map != None:
           fo.write(label_map[index])
       else:
           fo.write("%d,0,0\n" % index)
   fo.close()


def get_data(lst,preproc):
    data = []
    result = []
    for path in lst:
        f = dicom.read_file(path)
        img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
        dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
        m.imsave(dst_path, img)
        result.append(dst_path)
        data.append(img)
    data = np.array(data, dtype=np.uint8)
    data = data.reshape(data.size)
    data = np.array(data,dtype=np.str_)
    data = data.reshape(data.size)
    return [data,result]


def write_data_csv(fname, frames, preproc):
   """Write data to csv file"""
   #if __name__ == 'builtins': #'__main__':   
   fdata = open(fname, "w")
   dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
   data,result = zip(*dr)
   for entry in data:
    fdata.write(','.join(entry)+'\r\n')
    print("All finished, %d slices in total" % len(data))
    fdata.close()
    result = np.ravel(result)
    return result


def crop_resize(img, size):
   """crop center and resize"""
   if img.shape[0] < img.shape[1]:
       img = img.T
   # we crop image from center
   short_egde = min(img.shape[:2])
   yy = int((img.shape[0] - short_egde) / 2)
   xx = int((img.shape[1] - short_egde) / 2)
   crop_img = img[yy : yy + short_egde, xx : xx + short_egde]
   # resize to 64, 64
   resized_img = transform.resize(crop_img, (size, size))
   resized_img *= 255
   return resized_img.astype("uint8")


def local_split(train_index):
   random.seed(0)
   train_index = set(train_index)
   all_index = sorted(train_index)
   num_test = int(len(all_index) / 3)
   random.shuffle(all_index)
   train_set = set(all_index[num_test:])
   test_set = set(all_index[:num_test])
   return train_set, test_set


def split_csv(src_csv, split_to_train, train_csv, test_csv):
   ftrain = open(train_csv, "w")
   ftest = open(test_csv, "w")
   cnt = 0
   for l in open(src_csv):
       if split_to_train[cnt]:
           ftrain.write(l)
       else:
           ftest.write(l)
       cnt = cnt + 1
   ftrain.close()
   ftest.close()

# Load the list of all the training frames, and shuffle them
# Shuffle the training frames
random.seed(10)
train_frames = get_frames("./train_tiny")
random.shuffle(train_frames)
validate_frames = get_frames("./validate_tiny")

# Write the corresponding label information of each frame into file.
write_label_csv("train-label.csv", train_frames, get_label_map("./train.csv"))
write_label_csv("validate-label.csv", validate_frames, None)

# Dump the data of each frame into a CSV file, apply crop to 64 preprocessor
train_lst = write_data_csv("train-64x64-data.csv", train_frames, lambda x: crop_resize(x, 64))
valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))

# Generate local train/test split, which you could use to tune your model locally.
train_index = np.loadtxt("./train-label.csv", delimiter=",")[:,0].astype("int")
train_set, test_set = local_split(train_index)
split_to_train = [x in train_set for x in train_index]
split_csv("./train-label.csv", split_to_train, "./local_train-label.csv", "./local_test-label.csv")
split_csv("./train-64x64-data.csv", split_to_train, "./local_train-64x64-data.csv", "./local_test-64x64-data.csv")

引发错误。当我以交互方式运行代码时,我发现它write_data_csv是破坏脚本的调用。

例如,脚本在使用之前运行没有错误,write_data_csv但该行valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))抛出此错误:

Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "<input>", line 5, in write_data_csv
  File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 804, in __call__
    while self.dispatch_one_batch(iterator):
  File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 657, in dispatch_one_batch
    tasks = BatchedCalls(itertools.islice(iterator, batch_size))
  File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 68, in __init__
    self.items = list(iterator_slice)
  File "<input>", line 5, in <genexpr>
  File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 161, in delayed
    pickle.dumps(function)
_pickle.PicklingError: Can't pickle <function get_data at 0x0000000007B432F0>: attribute lookup get_data on __main__ failed

我不确定我的下一步是如何解决这个问题。我发现了其他有关属性查找失败的 StackOverflow 问题,但它们要么本质上不同,要么没有答案。

4

0 回答 0