此代码应该运行得非常快(在 0.2 秒内拆分 1M 个字符):
%generate random file
% w=[10,13,32*ones(1,10),97:122,97:122];
% FILE_LENGTH=10*1000*1000;mytext=char(w(randi(length(w),1,FILE_LENGTH)));
% fileID = fopen('z:\mytest.asc','w');fprintf(fileID,'%s',mytext);fclose(fileID);
clear
tic
%settings
Filename='z:\test.asc';
LineDelimiter=newline;%=char(10)
WordDelimiter=' ';
%read file
fid=fopen(Filename,'r');
text=fread(fid,'*char')';
fclose(fid);
%fix text
text(text==char(9))=WordDelimiter; %replace tab with space
text(text==char(13))=[];%remove '\r'
if text(end)~=LineDelimiter, text(end+1)=LineDelimiter;end %add eol if needed
IdxWords=find(text==WordDelimiter);
text(IdxWords(diff(IdxWords)==1))=[];% remove 2 spaces or more
%count words per line
IdxNewline=find(text==LineDelimiter);
NumOfLines=length(IdxNewline); %2eol=2lines
WordsPerLine=zeros(1,NumOfLines); %
IdxWords=find(text==WordDelimiter|text==LineDelimiter);
iword=1; iword_max=length(IdxWords);
for i=1:NumOfLines
while iword<=iword_max && IdxWords(iword)<=IdxNewline(i)
WordsPerLine(i)=WordsPerLine(i)+1;
iword=iword+1;
end
end
MaxWords=max(WordsPerLine);
LongestWord=max(diff(IdxWords));
%split
Output=cell(NumOfLines,MaxWords);
pos=1;iword=0;
for i=1:NumOfLines
idxline=IdxNewline(i);
for j=1:WordsPerLine(i)
iword=iword+1;
Output{i,j}=text(pos:IdxWords(iword)-1);
pos=IdxWords(iword)+1;
end
end
toc
% disp(Output)