0

使用numpy.reshape帮助很大,使用map帮助不大。是否有可能加快速度?

import pydicom
import numpy as np
import cProfile
import pstats


def parse_coords(contour):
    """Given a contour from a DICOM ROIContourSequence, returns coordinates
    [loop][[x0, x1, x2, ...][y0, y1, y2, ...][z0, z1, z2, ...]]"""
    if not hasattr(contour, "ContourSequence"):
        return [] # empty structure
    def _reshape_contour_data(loop):
        return np.reshape(np.array(loop.ContourData),
                          (3, len(loop.ContourData) // 3),
                          order='F')
    return list(map(_reshape_contour_data,contour.ContourSequence))


def profile_load_contours():
    rs = pydicom.dcmread('RS.gyn1.dcm')
    structs = [parse_coords(contour) for contour in rs.ROIContourSequence]


cProfile.run('profile_load_contours()','prof.stats')
p = pstats.Stats('prof.stats')
p.sort_stats('cumulative').print_stats(30)

使用从 Varian Eclipse 导出的真实结构集。

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   12.165   12.165 {built-in method builtins.exec}
        1    0.151    0.151   12.165   12.165 <string>:1(<module>)
        1    0.000    0.000   12.014   12.014 load_contour_time.py:19(profile_load_contours)
        1    0.000    0.000   11.983   11.983 load_contour_time.py:21(<listcomp>)
       56    0.009    0.000   11.983    0.214 load_contour_time.py:7(parse_coords)
50745/33837    0.129    0.000   11.422    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:455(__getattr__)
50741/33825    0.152    0.000   10.938    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:496(__getitem__)
    16864    0.069    0.000    9.839    0.001 load_contour_time.py:12(_reshape_contour_data)
    16915    0.101    0.000    9.780    0.001 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataelem.py:439(DataElement_from_raw)
    16915    0.052    0.000    9.300    0.001 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:320(convert_value)
    16864    0.038    0.000    7.099    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:89(convert_DS_string)
    16870    0.042    0.000    7.010    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/valuerep.py:495(MultiString)
    16908    1.013    0.000    6.826    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/multival.py:29(__init__)
  3004437    3.013    0.000    5.577    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/multival.py:42(number_string_type_constructor)
3038317/3038231    1.037    0.000    3.171    0.000 {built-in method builtins.hasattr}

大部分时间都在convert_DS_string. 有没有可能让它更快?我想部分问题是坐标在 DICOM 文件中的存储效率不是很高。

编辑:作为一种避免循环结束的方法,MultiVal.__init__我想知道获取每个 ContourData 的原始双字符串并numpy.fromstring 在其上使用。但是,我无法获得原始双字符串。

4

1 回答 1

0

消除循环MultiVal.__init__和使用numpy.fromstring提供了 4 倍以上的加速。我将在 pydicom github 上发布,看看是否有兴趣将其纳入库代码。这有点难看。我欢迎有关进一步改进的建议。

import pydicom
import numpy as np
import cProfile
import pstats


def parse_coords(contour):
    """Given a contour from a DICOM ROIContourSequence, returns coordinates
    [loop][[x0, x1, x2, ...][y0, y1, y2, ...][z0, z1, z2, ...]]"""
    if not hasattr(contour, "ContourSequence"):
        return [] # empty structure
    cd_tag = pydicom.tag.Tag(0x3006, 0x0050) # ContourData tag
    def _reshape_contour_data(loop):
        val = super(loop.__class__, loop).__getitem__(cd_tag).value
        try:
            double_string = val.decode(encoding='utf-8')
            double_vec = np.fromstring(double_string, dtype=float, sep=chr(92)) # 92 is '/'
        except AttributeError: # 'MultiValue'  has no 'decode' (bytes does)
            # It's already been converted to doubles and cached
            double_vec = loop.ContourData
        return np.reshape(np.array(double_vec),
                          (3, len(double_vec) // 3),
                          order='F')
    return list(map(_reshape_contour_data, contour.ContourSequence))


def profile_load_contours():
    rs = pydicom.dcmread('RS.gyn1.dcm')
    structs = [parse_coords(contour) for contour in rs.ROIContourSequence]


profile_load_contours()
cProfile.run('profile_load_contours()','prof.stats')
p = pstats.Stats('prof.stats')
p.sort_stats('cumulative').print_stats(15)

结果

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    2.800    2.800 {built-in method builtins.exec}
        1    0.017    0.017    2.800    2.800 <string>:1(<module>)
        1    0.000    0.000    2.783    2.783 load_contour_time3.py:29(profile_load_contours)
        1    0.000    0.000    2.761    2.761 load_contour_time3.py:31(<listcomp>)
       56    0.006    0.000    2.760    0.049 load_contour_time3.py:9(parse_coords)
  153/109    0.001    0.000    2.184    0.020 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:455(__getattr__)
   149/97    0.001    0.000    2.182    0.022 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:496(__getitem__)
       51    0.000    0.000    2.178    0.043 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataelem.py:439(DataElement_from_raw)
       51    0.000    0.000    2.177    0.043 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:320(convert_value)
       44    0.000    0.000    2.176    0.049 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:255(convert_SQ)
       44    0.035    0.001    2.176    0.049 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:427(read_sequence)
   152/66    0.000    0.000    2.171    0.033 {built-in method builtins.hasattr}
    16920    0.147    0.000    1.993    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:452(read_sequence_item)
    16923    0.116    0.000    1.267    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:365(read_dataset)
    84616    0.113    0.000    0.699    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:960(__setattr__)
于 2018-04-18T21:17:28.287 回答