import re
import sys, getopt
import mmap
shakes = open(sys.argv[1:][0],'r')
love = open(sys.argv[1:][1], "w")
#moreLove = open (sys.argv[1:][2], "w")
#HardLove = open (sys.argv[1:][3], "w")
node = re.compile('\*NODE[a-zA-Z, \r\n\t0-9\.-]+')
element3 = '\*ELEMENT, TYPE=S3RS[a-zA-Z, \r\n\t0-9\.=;_-]+'
element4 = '\*ELEMENT, TYPE=S4RS[a-zA-Z, \r\n\t0-9\.=;_-]+'
m = mmap.mmap(shakes.fileno(), 0, access=mmap.ACCESS_READ)
line = node.findall(m.read().decode('utf-8'))
#for item in line:
# love.write(item)
#print(m.read())
print(line)
以下是我尝试在完整文件上应用正则表达式的代码。每当我在小于 1MB 的较小文件上测试此代码时,代码都可以正常工作,但在大文件上则无法正常工作并返回空数组。下面是我试图解析的数据样本。通常它涉及 3M 行这样的数据。
*Assembly, name=Assembly
**
*Instance, name=vessel-1, part=vessel_bot
*Node
1, 24.8572464, 213.8125, 53.1415176
2, 41.4983292, 213.8125, 41.4983292
3, 44.4593391, 213.8125, 44.4593391
4, 28.0079861, 213.8125, 56.2922592
5, 24.8572464, 233.8125, 53.1415176
6, 28.0079861, 233.8125, 56.2922592
7, 48.2778168, 233.8125, 61.0057411
8, 46.156498, 233.8125, 61.0057411
9, 53.5811195, 223.3125, 53.5811195
10, 54.641777, 224.8125, 54.641777
11, 49.6920319, 233.8125, 62.4199524
12, 56.0559921, 224.8125, 56.0559921
13, 50.7526894, 233.8125, 61.3592911
14, 56.0559921, 226.3125, 56.0559921
15, 41.4983292, 226.3125, 41.4983292
16, 35.8528366, 233.8125, 46.4594383
17, 37.5893517, 233.8125, 52.4385948
18, 45.8735542, 223.3125, 45.8735542
19, 44.4593391, 221.3125, 44.4593391
20, 35.0599136, 233.8125, 52.1926079
21, 34.0794373, 233.8125, 44.686039
22, 31.5089321, 233.8125, 44.4683838
23, 38.5373192, 243.3125, 38.5373192