python - Python 多处理 arcgis shapefile 与 PP 或大文件上的异步停止

Question

我是新尝试实现并行 Python (PP) 或异步到多进程 arcgis shapefile 剪辑。我在 pool_async 和 PP 上都取得了成功；但是，它在大文件上永远停滞不前（是的，我尝试让 python 访问大地址）。这是我使用 PP 的代码，请提供任何解决方案，如果有明显的错误，请原谅:-)

def ClipDo(F,M,O,OW = ""):

#for F in F:
    print "\n"+"PID:%s"%(os.getpid())

    arcpy.env.overwriteOutput = False

    if OW == "":
        pass
    else:
        arcpy.env.overwriteOutput = True

    FPath = os.path.dirname(F)
    F = os.path.basename(F)
    ClipList = []
    pattern = '*.shp'

    for filename in M:
            ClipList.append(filename)
            clipN = str(os.path.splitext(os.path.basename(filename))[0])
            if not os.path.isdir(O+"/"+clipN+"/"):
                os.makedirs(O+"/"+clipN+"/")

    #Counts files in clip directory
    count = len(ClipList)
    for num in range(0,count):

        clip = ClipList[num]

        clipN = str(os.path.splitext(os.path.basename(clip))[0])

        OutShp = clipN +"_"+ F

        try:
            print "Clipping, Base File: %s Clip File: %s Output: %s" % (F,clip,O+"\\"+OutShp)
            arcpy.Clip_analysis(os.path.join(FPath,F),os.path.join(M,clip), os.path.join(os.path.join(O+"\\",clipN),OutShp))
            print "Clipping SUCCESS "

        except:
            print "Clipping FAILED "  +F


def PP(F,M,O,OW):
    print F
    #~ # tuple of all parallel python servers to connect with
    ncpus = 6
    ncpus = ncpus
    ppservers = ("localhost",)
    #~ #ppservers = ("10.0.0.1",)

    if len(sys.argv) > 1:
        ncpus = int(sys.argv[1])
        # Creates jobserver with ncpus workers
        job_server = pp.Server(ncpus, ppservers=ppservers)
    else:
        #~ # Creates jobserver with automatically detected number of workers
        job_server = pp.Server(ncpus,ppservers=ppservers)

    print "Starting pp with", job_server.get_ncpus(), "workers"

    jobs = []
    start_time = time.time()

    for f in F:

        job = job_server.submit(ClipDo, (f,M,O,OW),(),  ("arcpy","NullGeomFilter"))
        jobs.append(job)

    for job in jobs:
         result = job()
         print result
         if result:
            break

    job_server.destroy()    

    print "\n"+"PID:%s"%(os.getpid())

    print "Time elapsed: ", time.time() - start_time, "s"

score 0 · Accepted Answer

是不是你的大块对于 arcpy 来说太大了，而并行化不是问题？

作为测试，最好通过您的函数以交互方式/本地方式运行大数据中的一个 arg 列表，看看它是否有效。如果是这样，那么您可以继续记录和调试并行版本。

python - Python 多处理 arcgis shapefile 与 PP 或大文件上的异步停止

1 回答 1

Related

Reference