0
import re
import sys, getopt
import mmap

shakes = open(sys.argv[1:][0],'r')
love = open(sys.argv[1:][1], "w")
#moreLove = open (sys.argv[1:][2], "w")
#HardLove = open (sys.argv[1:][3], "w")

node =  re.compile('\*NODE[a-zA-Z, \r\n\t0-9\.-]+')
element3 = '\*ELEMENT, TYPE=S3RS[a-zA-Z, \r\n\t0-9\.=;_-]+'
element4 =  '\*ELEMENT, TYPE=S4RS[a-zA-Z, \r\n\t0-9\.=;_-]+'

m = mmap.mmap(shakes.fileno(), 0, access=mmap.ACCESS_READ)

line = node.findall(m.read().decode('utf-8'))
#for item in line:
#  love.write(item)
#print(m.read())
print(line)

以下是我尝试在完整文件上应用正则表达式的代码。每当我在小于 1MB 的较小文件上测试此代码时,代码都可以正常工作,但在大文件上则无法正常工作并返回空数组。下面是我试图解析的数据样本。通常它涉及 3M 行这样的数据。

*Assembly, name=Assembly
**  
*Instance, name=vessel-1, part=vessel_bot
*Node
      1,   24.8572464,     213.8125,   53.1415176
      2,   41.4983292,     213.8125,   41.4983292
      3,   44.4593391,     213.8125,   44.4593391
      4,   28.0079861,     213.8125,   56.2922592
      5,   24.8572464,     233.8125,   53.1415176
      6,   28.0079861,     233.8125,   56.2922592
      7,   48.2778168,     233.8125,   61.0057411
      8,    46.156498,     233.8125,   61.0057411
      9,   53.5811195,     223.3125,   53.5811195
     10,    54.641777,     224.8125,    54.641777
     11,   49.6920319,     233.8125,   62.4199524
     12,   56.0559921,     224.8125,   56.0559921
     13,   50.7526894,     233.8125,   61.3592911
     14,   56.0559921,     226.3125,   56.0559921
     15,   41.4983292,     226.3125,   41.4983292
     16,   35.8528366,     233.8125,   46.4594383
     17,   37.5893517,     233.8125,   52.4385948
     18,   45.8735542,     223.3125,   45.8735542
     19,   44.4593391,     221.3125,   44.4593391
     20,   35.0599136,     233.8125,   52.1926079
     21,   34.0794373,     233.8125,    44.686039
     22,   31.5089321,     233.8125,   44.4683838
     23,   38.5373192,     243.3125,   38.5373192
4

0 回答 0