0

我的原始 .csv 文件有数千行,但为了清楚起见,只显示了一行。我对示例行创建了 4 次连续更改。Python 将做出正确的更改,但附加这些更改。我没有包含所有更改的单行,而是有 4 行,每行代表 4 个更改中的每一个的单个更改。

我尝试使用r, r+, w,a模式,每个模式基本上都是一样的。尝试使用单个print语句。单个print语句保存单行,但只有 1 处更改。

import re

with open("orig.csv","r") as fi:
    contents=fi.readlines()

with open("output.csv","r+") as fi:
    for line in contents:
        fi = re.sub(r"<.*?>","",line)
        print(fi)

with open("orig.csv","r") as fi:
    contents=fi.readlines()
with open("output.csv","r+") as fi:
    for line in contents:
        fi=re.sub(r",[^,]+,Skipped,",",",line)
        print(fi)

with open("orig.csv","r") as fi:
    contents=fi.readlines()
with open("output.csv","r+") as fi:
    for line in contents:
        fi=re.sub(r",[^,]+,Done,",",",line)
        print(fi)

with open("orig.csv","r") as fi:
    contents=fi.readlines()

with open("output.csv","r+") as fi:
    for line in contents:
        fi=re.sub(r",,",",",line)
        print(fi)

我的原始数据:

<UUT><H s='12' v='2.8'/>    <V t='s' s='2'/>Profile Debug   <V t='s' s='2'/>Cycle   Normal  <V t='s' s='2'/>PMVer   14.0.1.103  <V t='s' s='2'/>SeqFileVer  1.6.0.0 <V t='s' s='2'/>User    administrator   <V t='s' s='2'/>Station TS-0357A    <V t='s' s='2'/>Socket  0   <V t='s' s='2'/>Date    9/10/2018   <V t='s' s='2'/>Time    17:23:51    <V t='n' s='2'/>CycleTime   0   <V t='s' s='2'/>Status  Failed  <V t='s' s='2'/>MAC_Address f8dc7a128189    <R s='42'/> <S t='a' s='3'/>CycleTimes  Done<S t='a' s='3'/>DEBUG_PRODUCTION_FALSE  Skipped<S t='a' s='3'/>DEBUG_TROUBLESHOOTING_TRUE

预期结果:

,Profile,Debug,Cycle,Normal,PMVer,14.0.1.103,SeqFileVer,1.6.0.0,User,administrator,Station,TS-0357A,Socket,0,Date,9/10/2018,Time,17:23:51,CycleTime,0,Status,Failed,MAC_Address,f8dc7a128189,CycleTimes,DEBUG_TROUBLESHOOTING_TRUE
4

1 回答 1

0

在将其写入文件之前,您应该只使用一个for-loop 并且只读取一次并在这一行上使用 allre.sub

import re

with open("orig.csv", "r") as file_in:
    contents = file_in.readlines()

with open("output.csv", "w") as file_out:
    for line in contents:
        line = re.sub(r"<.*?>", "", line)
        line = re.sub(r",[^,]+,Skipped,", ",", line)
        line = re.sub(r",[^,]+,Done,", ",", line)
        line = re.sub(r",,", ",", line)
        line = re.sub(r"\s+", ",", line) # remove spaces

        file_out.write(line) # write in file

        print(line) # display on screen

每个人都可以轻松复制和测试的示例

import re

data = "<UUT><H s='12' v='2.8'/>    <V t='s' s='2'/>Profile Debug   <V t='s' s='2'/>Cycle   Normal  <V t='s' s='2'/>PMVer   14.0.1.103  <V t='s' s='2'/>SeqFileVer  1.6.0.0 <V t='s' s='2'/>User    administrator   <V t='s' s='2'/>Station TS-0357A    <V t='s' s='2'/>Socket  0   <V t='s' s='2'/>Date    9/10/2018   <V t='s' s='2'/>Time    17:23:51    <V t='n' s='2'/>CycleTime   0   <V t='s' s='2'/>Status  Failed  <V t='s' s='2'/>MAC_Address f8dc7a128189    <R s='42'/> <S t='a' s='3'/>CycleTimes  Done<S t='a' s='3'/>DEBUG_PRODUCTION_FALSE  Skipped<S t='a' s='3'/>DEBUG_TROUBLESHOOTING_TRUE"
contents = data.split('\n')

with open("output.csv", "w") as file_out:
    for line in contents:
        line = re.sub(r"<.*?>", "", line)
        line = re.sub(r",[^,]+,Skipped,", ",", line)
        line = re.sub(r",[^,]+,Done,", ",", line)
        line = re.sub(r",,", ",", line)
        line = re.sub(r"\s+", ",", line) # remove spaces
        file_out.write(line)
        print(line)
于 2019-09-08T19:02:41.300 回答