1


我必须在一个 txt 文件中写入很多行中的几个信息。结果是一个类似的文件:

result.txt:
RED;12;7;0;2;1;4;7;0.0140
RED;12;7;0;2;2;9;7;0.1484
RED;12;7;0;2;3;7;4;0.1787
RED;12;7;0;2;4;2;6;0.7891
RED;12;7;0;2;5;9;6;0.1160
RED;12;7;0;2;6;9;1;0.9893
...

这是由以下代码构建的(具有一些减小的尺寸):

/* the variables 'str1', 'num1', 'day', 'vect1', 'vect2' and 'MD' are inputs of this function
/* str1 is a string 1x1
/* num1 is a integer 1x1 
/* day is a vector 10x1
/* vect1 is a vector 7x1
/* vect2 is a vector 180x1
/* MD is a 4D matrix (7x180x10x15)*/

fid = fopen(path_result, 'Wt');    
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180
            /* print all the values */
                fprintf(fid,'%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%.4f \n',...
                str1,';',num1,';',i1,';',0,';',2,';',...
                day(i2,1),';',vect1(i3),';',...
                vect2(i4),';',MD(i3,i4,i2,i1));
            end
        end
    end
end

我在这里看到了一些矢量化(stackoverflow 帖子),但我认为不可能在这里应用。有任何想法吗?
提前致谢

4

3 回答 3

3

可以应用于任何情况的第一个优化是生成格式字符串,我们称之为fmt

fmt       = sprintf('%s;%d;%%d;%d;%d;%%d;%%d;%%d;%%.4f \\n',str1,num1,0,2)
fmt =
RED;4;%d;0;2;%d;%d;%d;%.4f \n

之后循环内的代码变为:

x = sprintf(fmt, i1, day(i2,1), vect1(i3), vect2(i4), MD(i3,i4,i2,i1));

现在,一个完全矢量化的解决方案,它在 RAM 上进行权衡,但实现了一个数量级的加速,在我的设置中,从 9.61 秒到 0.89 秒大约 10.8 倍。

tic
[a,b,c,d] = ndgrid(vect2,vect1,day,1:15);
out       = sprintf(fmt, [d(:), c(:), b(:), a(:), reshape(permute(MD,[2,1,3,4]),[],1)]'); 
toc
于 2013-08-27T22:50:58.590 回答
2

One of the things you can do to optimize your code is to look for "repeated" bits of code. In your case, you format ALL of the result string in the innermost loop - although much of the string doesn't change. You also "format" the separator string ';' several times - you can have that directly in your formatting string (you can intersperse text and formatting commands in the format string). I combined these ideas in a few different ways, and timed them:

str1 = 'hello';
num1 = 123;
day = (1:10)';
vect1 = (1:7)';
vect2 = (1:180)';
MD = rand(7,180,10,15);
path_result = './mixedOutput1.txt';
fid = fopen(path_result, 'Wt');    
tic
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180
%             /* print all the values */
                fprintf(fid,'%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%.4f \n',...
                str1,';',num1,';',i1,';',0,';',2,';',...
                day(i2,1),';',vect1(i3),';',...
                vect2(i4),';',MD(i3,i4,i2,i1));
            end
        end
    end
end
fprintf(1, 'time for original loop: %.2f sec\n',toc)
fclose(fid);
%%
path_result = './mixedOutput2.txt';
fid = fopen(path_result, 'Wt');    
tic
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180
%             /* print all the values */
                fprintf(fid,'%s;%d;%d;0;2;%d;%d;%d;%.4f \n',...
                str1, num1, i1, day(i2,1), vect1(i3), vect2(i4),MD(i3,i4,i2,i1));
            end
        end
    end
end
fprintf(1, 'time for faster loop: %.2f sec\n',toc)
fclose(fid);
%%
path_result = './mixedOutput3.txt';
fid = fopen(path_result, 'Wt');    
tic
y = cell(1,15*10*7*180);
cc = 0;
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180
%             /* print all the values */
                cc = cc + 1;
                y{1,cc} = sprintf('%s;%d;%d;0;2;%d;%d;%d;%.4f \n',...
                str1, num1, i1, day(i2,1), vect1(i3), vect2(i4), MD(i3,i4,i2,i1));
            end
        end
    end
end
fprintf(1, 'time for loop with sprintf intermediate step: %.2f sec\n', toc)
fprintf(fid, '%s', y{:});
fprintf(1, 'time including file write: %.2f sec\n', toc);
fclose(fid);

%% optimize loop more:

path_result = './mixedOutput4.txt';
fid = fopen(path_result, 'Wt');    
tic
y = cell(1,15*10*7*180);
cc = 0;
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
            x = sprintf('%s;%d;%d;0;2;%d;%d;', ...
                str1, num1, i1, day(i2,1), vect1(i3));
            for i4= 1:180
                fprintf(fid, '%s%d;%.4f \n', ...
                     x, vect2(i4), MD(i3,i4,i2,i1));
            end
        end
    end
end
fprintf(1, 'time for fastest loop: %.2f sec\n', toc);
fclose(fid);

On my machine, this resulted in the following benchmarks:

Original loop: 15.9 sec
Faster format:  9.2 sec
With sprintf:   8.2 sec
preformat:      6.2 sec

The "preformat" was not done as efficiently as possible - it was just there for illustration. The intermediate string x is computed much less frequently, then re-used.

Finally - I did create a "vectorized" version of the code - meaning that the entire sprintf happens in a single line. This requires creating a big cell array (Kahuna, below) with the right elements - it turns out that's actually marginally less efficient than the last code above (with the "preformatting"), but here it is just in case:

%% truly vectorized:
tic
Kahuna = cell(7, 15*10*7*180);
N = 15 * 10 * 7 * 180;
N1 = ones(1, N);
% final order needs to be [180 7 10 15] - inner loop first
Kahuna(1,:) = cellstr(repmat(str1, [N 1]))';
Kahuna(2,:) = mat2cell(repmat(num1, [N 1]), N1, 1);
Kahuna(3,:) = mat2cell(reshape(repmat(reshape(1:15,    1, 1, 1, 15), [180 7 10  1]), [], 1), N1, 1);
Kahuna(4,:) = mat2cell(reshape(repmat(reshape(day,     1, 1, 10, 1), [180 7  1 15]), [], 1), N1, 1);
Kahuna(5,:) = mat2cell(reshape(repmat(reshape(vect1,   1, 7, 1,  1), [180 1 10 15]), [], 1), N1, 1);
Kahuna(6,:) = mat2cell(reshape(repmat(reshape(vect2, 180, 1, 1,  1), [  1 7 10 15]), [], 1), N1, 1);
Kahuna(7,:) = mat2cell(reshape(permute(MD, [2 1 3 4]), [], 1), N1, 1);

x = sprintf('%s;%d;%d;0;2;%d;%d;%d;%.4f \n', Kahuna{:});
toc
于 2013-08-27T19:15:13.323 回答
0

实际上,似乎简单地先创建所有内容然后再编写它(我尝试过的方式)并没有提高速度。我最初的想法是保存数据,save(file,data,'-ascii')但结果出乎意料。

如果您只有数字数据,您也许可以使用dlmwrite,但我想现在这不是一个选择。

与您的原始代码相比,这是我尝试的时间,包括一些假设的输入:

str1 = 'RED';
num1 = 4;
day = rand(10);
vect1 = 1:7;
vect2 = 1:180;
MD = rand(7,180,10,15);
y=[];

tic
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180

                x=sprintf('%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%.4f \n',...
                str1,';',num1,';',i1,';',0,';',2,';',...
                day(i2,1),';',vect1(i3),';',...
                vect2(i4),';',MD(i3,i4,i2,i1));
                y{end+1} = x;
            end
        end
    end
end

fid = fopen('test.txt','w');
for i=1:length(y)
         fprintf(fid,y{i});
end
fclose(fid)
t1=toc;

tic
fid = fopen('test.txt', 'Wt');    
for i1 = 1:15   
    for i2 = 1:10    
        for i3 = 1:7           
           for i4= 1:180
                fprintf(fid,'%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%.4f \n',...
                str1,';',num1,';',i1,';',0,';',2,';',...
                day(i2,1),';',vect1(i3),';',...
                vect2(i4),';',MD(i3,i4,i2,i1));
            end
        end
    end
end
t2=toc;

myTime = t1 % 56 secs
originalTime = t2 % 12 secs
于 2013-08-27T14:02:31.283 回答