这是第二篇文章,是我之前问题的一部分。
我在 Python 2.7(在 Window OS 64 位上)中编写了一个函数,以便从参考多边形(Ref)和一个或多个 ESRI shapefile 格式的分段(Seg)多边形计算相交区域的平均值。代码非常慢,因为我有超过 2000 个参考多边形,并且对于每个 Ref_polygon,该函数每次都针对所有 Seg 多边形(超过 7000 个)运行。很抱歉,该函数是原型。
遵循大卫罗宾逊的建议
from multiprocessing import Pool
p = Pool() # run multiple processes
for l in p.imap_unordered(process_reference_object, range(ref_layer.GetFeatureCount())):
file_out.write(l)
和TimothyAWiseman,我希望以优化方式使用多处理以提高我的功能的速度。
我有以下问题:
- 定位 p = Pool().... 的最佳位置在哪里?在第二个函数内部(例如:segmentation_accuracy)还是在最后?
我曾尝试在此处插入(在 segmentation_accuracy 末尾)
p = Pool()
for l in p.imap_unordered(object_accuracy, range(ref_layer.GetFeatureCount())):
file_out.write(l)
file_out.close()
但我的电脑结冰了
- 我可以改进(我认为可以)我的代码吗?如何改进?
from shapely.geometry import Polygon
import math
import numpy as np
import osgeo.gdal
import ogr
import numpy
import os
from multiprocessing import Pool
def shapefile_NameFilter(inFile):
if inFile.endswith(".shp"):
return inFile
else:
raise ValueError('"%s" is not an ESRI shapefile' % inFile)
def object_accuracy(ref,seg, index,threshold=10.0,noData=-9999):
"""
segmetation accuracy metrics
"""
ref_layer = ref
seg_layer = seg
# convert in a shapely polygon
ref_feature = ref_layer.GetFeature(index)
# get FID (=Feature ID)
FID = str(ref_feature.GetFID())
ref_geometry = ref_feature.GetGeometryRef()
# exterior boundaries
pts = ref_geometry.GetGeometryRef(0)
points = []
for p in xrange(pts.GetPointCount()):
points.append((pts.GetX(p), pts.GetY(p)))
# convert in a shapely polygon
ref_polygon_exterior = Polygon(points)
nHole = ref_geometry.GetGeometryCount()
if nHole != 1:
for h in range(1, nHole):
# interior boundaries or "holes" of the feature
pts = ref_geometry.GetGeometryRef(h)
points = []
for p in range(pts.GetPointCount()):
points.append((pts.GetX(p), pts.GetY(p)))
ref_polygon_interior = Polygon(points)
ref_polygon_exterior = ref_polygon_exterior.difference(ref_polygon_interior)
# Net Reference Polygon
ref_polygon = ref_polygon_exterior
# get the area
ref_Area = ref_polygon.area
# get centroid of the reference object-i
geom, xy = ref_polygon.centroid.wkt.split(None, 1)
xy = xy.strip('()').split()
xcr, ycr = (float(i) for i in xy)
# create empty lists
nObject = 0
Area_seg, Area_intersect = ([] for _ in range(2))
RAor, RAos = ([] for _ in range(2))
OverSeg, UnderSeg, OverMerg, UnderMerg = ([] for _ in range(4))
qr, SimSize, SegError, Dsr = ([] for _ in range(4))
# For each segmented objects-j
for segment in xrange(seg_layer.GetFeatureCount()):
seg_feature = seg_layer.GetFeature(segment)
seg_geometry = seg_feature.GetGeometryRef()
pts = seg_geometry.GetGeometryRef(0)
points = []
for p in xrange(pts.GetPointCount()):
points.append((pts.GetX(p), pts.GetY(p)))
seg_polygon_exterior = Polygon(points)
nHole = seg_geometry.GetGeometryCount()
if nHole != 1:
for h in range(1, nHole):
# interior boundaries or "holes" of the feature
pts = seg_geometry.GetGeometryRef(h)
points = []
for p in range(pts.GetPointCount()):
points.append((pts.GetX(p), pts.GetY(p)))
seg_polygon_interior = Polygon(points)
seg_polygon_exterior = seg_polygon_exterior.difference(seg_polygon_interior)
# Net Segemted Polygon
seg_polygon = seg_polygon_exterior
seg_Area = seg_polygon.area
# get centroid of the segemented object-j
geom, xy = seg_polygon.centroid.wkt.split(None, 1)
xy = xy.strip('()').split()
xcs, ycs = (float(i) for i in xy)
# intersection (overlap) of reference object with the segmented object
intersect_polygon = ref_polygon.intersection(seg_polygon)
# area of intersection (= 0, No intersection)
intersect_Area = intersect_polygon.area
# Refinement in order to eliminate spurious effects
if intersect_Area > (ref_Area*(float(threshold)/100)):
# Union
union_polygon = ref_polygon.union(seg_polygon)
# area of union
union_Area = union_polygon.area
# Number of segmented objects
nObject += 1
# segmented object area
Area_seg.append(seg_Area)
# intersected (=overlapped) region area
Area_intersect.append(intersect_Area)
# Area-Based Measures
# Relative Area of a reference object (RAor)
RAor.append(intersect_Area/ref_Area)
# Relative Area of a segmented object (RAos)
RAos.append(intersect_Area/seg_Area)
# OverSegmentation (OverSeg)
OverSeg.append(1-(intersect_Area/ref_Area))
# UnderSegmentation (UnderSeg)
UnderSeg.append(1-(intersect_Area/seg_Area))
# OverMerging (OverMerg)
OverMerg.append((ref_Area - intersect_Area)/ref_Area)
# UnderMerging (UnderMerg)
UnderMerg.append((seg_Area -intersect_Area)/ref_Area)
# Quality rate (qr)
qr.append(1-(intersect_Area/(union_Area)))
# SimSize
SimSize.append(min(ref_Area,seg_Area)/max(ref_Area,seg_Area))
# Mean Absolute Segmentation Error (SegError)
SegError.append(abs(ref_Area - seg_Area)/(ref_Area + seg_Area))
# Location-based Measures
# Position discrepancy of segmented object to a reference object
# Euclidean distance in the xy plane
Eucdist_sr = math.sqrt(math.pow((xcs-xcr),2)+math.pow((ycs-ycr),2))
Dsr.append(Eucdist_sr)
# No segmented objects of intrest
if nObject == 0:
AREAs_average, SDs, seg_AreaMax = (noData for _ in range(3))
AREAo_average, SDo, intersect_AreaMax = (noData for _ in range(3))
ORrs,RAor_average,RAos_average = (noData for _ in range(3))
OverSeg_average,UnderSeg_average = (noData for _ in range(2))
OverMerg_average,UnderMerg_average = (noData for _ in range(2))
qr_average,SimSize_average,SegError_average, AFI = (noData for _ in range(4))
Dsr_avarage,RPsr_average,dmax,D = (noData for _ in range(4))
else:
ORrs = (1.0/nObject)*100
AREAs_average = numpy.average(Area_seg)
SDs = numpy.std(Area_seg)
seg_AreaMax = numpy.max(Area_seg)
AREAo_average = numpy.average(Area_intersect)
SDo = numpy.std(Area_intersect)
intersect_AreaMax = numpy.max(Area_intersect)
# Avarage for all segmented objects
RAor_average = numpy.average(RAor)*100
RAos_average = numpy.average(RAos)*100
OverSeg_average = numpy.average(OverSeg)
UnderSeg_average = numpy.average(UnderSeg)
OverMerg_average = numpy.average(OverMerg)
UnderMerg_average = numpy.average(UnderMerg)
qr_average = numpy.average(qr)
SimSize_average = numpy.average(SimSize)
SegError_average = numpy.average(SegError)
# Area Fit Index
AFI = (ref_Area-seg_AreaMax)/ref_Area
Dsr_avarage = numpy.average(Dsr)
# Maximum Distance
dmax = numpy.max(Dsr)
# Avarage Realative Position (RPsr)
RPsr_average = numpy.average(numpy.array(Dsr)/numpy.max(Dsr))
# D index
D = math.sqrt((math.pow(OverSeg_average,2)+math.pow(UnderSeg_average,2))/2)
return(" ".join(["%s" %i for i in [FID, ref_Area, nObject, ORrs,\
AREAs_average, SDs, seg_AreaMax, AREAo_average, SDo, intersect_AreaMax,\
RAor_average, RAos_average, OverSeg_average, UnderSeg_average,\
OverMerg_average, UnderMerg_average,qr_average, SimSize_average,\
SegError_average, AFI, Dsr_avarage, RPsr_average, dmax, D]])+ "\n")
def segmentation_accuracy(reference,segmented,outFile,threshold=10.0,noData=-9999):
"""
Segmentation accuracy
"""
# check if reference and segmented are ESRI shapefile format
reference = shapefile_NameFilter(reference)
segmented = shapefile_NameFilter(segmented)
# open shapefile
ref = osgeo.ogr.Open(reference)
if ref is None:
raise SystemExit('Unable to open %s' % reference)
seg = osgeo.ogr.Open(segmented)
if seg is None:
raise SystemExit('Unable to open %s' % segmented)
ref_layer = ref.GetLayer()
seg_layer = seg.GetLayer()
# create outfile
if not os.path.split(outFile)[0]:
file_path, file_name_ext = os.path.split(os.path.abspath(reference))
outFile_filename = os.path.splitext(os.path.basename(outFile))[0]
file_out = open(os.path.abspath("{0}\\{1}.txt".format(file_path, outFile_filename)), "w")
else:
file_path_name, file_ext = os.path.splitext(outFile)
file_out = open(os.path.abspath("{0}.txt".format(file_path_name)), "w")
# Header
file_out.write(" ".join(["%s" %i for i in ["ReferenceFID","AREAr",\
"nObject","ORrs","AREAs","SDs","AREAsMAX","AREAo","SDo","AREAoMAX",\
"RAor","RAos","OverSeg","UnderSeg","OverMerg","UnderMerg","qr","SimSize",\
"SegError","AFI","Dsr","RPro","dmax","D"]])+ "\n")
for index in xrange(ref_layer.GetFeatureCount()):
file_out.write(object_accuracy(index))
file_out.close()