我在一个正方形中有很多点。我想将正方形划分为许多小矩形并检查每个矩形中有多少点,即我想计算点的联合概率分布。我正在报告几种常识方法,使用循环并且效率不高:
% Data
N = 1e5; % number of points
xy = rand(N, 2); % coordinates of points
xy(randi(2*N, 100, 1)) = 0; % add some points on one side
xy(randi(2*N, 100, 1)) = 1; % add some points on the other side
xy(randi(N, 100, 1), :) = 0; % add some points on one corner
xy(randi(N, 100, 1), :) = 1; % add some points on one corner
inds= unique(randi(N, 100, 1)); xy(inds, :) = repmat([0 1], numel(inds), 1); % add some points on one corner
inds= unique(randi(N, 100, 1)); xy(inds, :) = repmat([1 0], numel(inds), 1); % add some points on one corner
% Intervals for rectangles
K1 = ceil(sqrt(N/5)); % number of intervals along x
K2 = K1; % number of intervals along y
int_x = [0:(1 / K1):1, 1+eps]; % intervals along x
int_y = [0:(1 / K2):1, 1+eps]; % intervals along y
% First approach
tic
count_cells = zeros(K1 + 1, K2 + 1);
for k1 = 1:K1+1
inds1 = (xy(:, 1) >= int_x(k1)) & (xy(:, 1) < int_x(k1 + 1));
for k2 = 1:K2+1
inds2 = (xy(:, 2) >= int_y(k2)) & (xy(:, 2) < int_y(k2 + 1));
count_cells(k1, k2) = sum(inds1 .* inds2);
end
end
toc
% Elapsed time is 46.090677 seconds.
% Second approach
tic
count_again = zeros(K1 + 2, K2 + 2);
for k1 = 1:K1+1
inds1 = (xy(:, 1) >= int_x(k1));
for k2 = 1:K2+1
inds2 = (xy(:, 2) >= int_y(k2));
count_again(k1, k2) = sum(inds1 .* inds2);
end
end
count_again_fix = diff(diff(count_again')');
toc
% Elapsed time is 22.903767 seconds.
% Check: the two solutions are equivalent
all(count_cells(:) == count_again_fix(:))
如何在时间、内存和可能避免循环方面更有效地做到这一点?
编辑 -->我也刚刚找到了这个,这是迄今为止找到的最好的解决方案:
tic
count_cells_hist = hist3(xy, 'Edges', {int_x int_y});
count_cells_hist(end, :) = []; count_cells_hist(:, end) = [];
toc
all(count_cells(:) == count_cells_hist(:))
% Elapsed time is 0.245298 seconds.
但它需要统计工具箱。
编辑 --> chappjc 建议的测试解决方案
tic
xcomps = single(bsxfun(@ge,xy(:,1),int_x));
ycomps = single(bsxfun(@ge,xy(:,2),int_y));
count_again = xcomps.' * ycomps; %' 143x143 = 143x1e5 * 1e5x143
count_again_fix = diff(diff(count_again')');
toc
% Elapsed time is 0.737546 seconds.
all(count_cells(:) == count_again_fix(:))