尝试根据大小将 Shapefile 分成多个部分。如果 Shapefile 的 zip 文件的文件大小超过 10 MB,则会将其分成块。
早些时候,我们使用 Geopandas 来完成这项任务,它运行良好,但消耗了更多 RAM 来完成这项任务。所以尝试一下 PyShp 库。
主要问题是它正在生成分割文件,但没有记录插入到 Shapefile 中,并且其中也缺少 DBF 文件。
我是否在以下代码中遗漏了什么,请告诉我
import os
import math
import csv
import zipfile
import shutil
from shutil import copyfile
import shapefile
path = '<shapefile_data_path>'
storage_path = '<path_to_extract_zip_file>'
current_dir = '<path_for_divided_shapefiles>'
ALLOWED_SIZE = 10
procs = []
# Here filepath means Shapefile's zip file path
def function_name(filepath):
file_name = file_path.split('/')[-1]
name = file_name.split('.zip')[0]
storage_file = os.path.join(storage_path, file_name)
storage_file = storage_file.replace('\\', '/')
src = path +'/'+file_path
shutil.copy(src,storage_file)
statinfo = os.stat(storage_file)
if (statinfo.st_size >> 20) > ALLOWED_SIZE:
storage_path_1 = storage_path + '/' + name
zip_ref = zipfile.ZipFile(storage_file)
zip_ref.extractall(storage_path_1)
zip_ref.close()
prj_file_path = ''
for _file1 in os.listdir(storage_path_1):
print _file1
if _file1.endswith('.prj'):
prj_file_path = os.path.join(storage_path_1, _file1)
for _file1 in os.listdir(storage_path_1):
if _file1.endswith('.shp'):
file_size = statinfo.st_size >> 20
parts = int(math.ceil(float(file_size) / float(ALLOWED_SIZE)))
# data = gpd.read_file(storage_path_1 + '/' + _file1)
data = shapefile.Reader(storage_path_1 + '/' + _file1)
records = data.records()
num_lines = len(data)
increment = int(num_lines / parts)
start_index = 0
part = 1
file_name_new = file_name.split('.zip')[0]
while start_index < num_lines:
part_name = '{1}_part{0}'.format(part, file_name_new)
outpath = os.path.join(current_dir, part_name)
os.mkdir(outpath)
outfile = os.path.join(outpath, part_name)
end_index = start_index + increment
if end_index > num_lines:
end_index = num_lines
chunk = records[start_index:end_index]
with open(outfile,'w') as f:
f.write(str(chunk))
copyfile(prj_file_path, os.path.join(outpath, file_name_new+'.prj'))
shutil.make_archive(outpath, 'zip', outpath)
shutil.rmtree(outpath)
start_index = end_index
part += 1
在写入新的 shapefile 时,在生成的 zip 文件中遇到困难。DBF 文件丢失并且 SHP 文件没有正确的记录
任何帮助深表感谢。