1

我在 erlang 中丢失的消息有一些问题。

我正在使用的代码在我手动使用时 100% 正确,仅当我的代码用于“负载测试”时,我与大量请求并行测试代码时,接收器部分从未收到某些消息. 在记录所有步骤和参数值时,我发现我发送消息的地址是正确的。消息本身也没有问题。

我的问题如下:在erlang中是否知道这种“丢失消息”,这可能是erlang本身的一些错误吗?

如果需要,我可以发布一些我正在使用的代码,但我认为它不会特别为这个问题增加很多价值。

更新:我的应用程序的主要部分。这是很多代码来说明我的问题,但我无法在简化版本中重现我的问题。该应用程序是一个分配系统,即它将并行保留网格中的单元集合。重要的部分是: globalManager,一个将控制整个分配系统的参与者。rowManager 将管理整个网格的一行,并在进行预订时锁定该行。当必须保留一个单元区域时,调用函数 request_specific_cells。此函数将向必须修改行的所有行管理器发送预订请求。当行管理器在其行中保留了区域时,它将向 globalManager 发送确认。当所有的rowmanagers都发送确认信息后,

    globalManager(Grid) ->
    receive
        {Pid, request_specific_cells, ReservationId, Coordinates, Ctr, XX} ->
            NewGrid = request_specific_cells(Grid, Pid, ReservationId, Coordinates, Ctr, XX);

        {Pid, confirm_region, ResId, Rid, Sid, Region, Section, Ctr, XX} ->
            NewGrid = confirm_region(Grid, Pid, ResId, Rid, Sid, Region, Section, Ctr, XX);

        {Pid, failed_region, Rid, Region, Ctr, XX} ->
            NewGrid = failed_region(Grid, Pid, Rid, Region, Ctr, XX);

        Else ->
            erlang:display({unexpectedMessage, actor, Else}),
            NewGrid = Grid
    end,
    globalManager(NewGrid).


request_specific_cells(Grid, Pid, ReservationId, Coordinates, Ctr, XX) ->
    {{Width, Height}, GridRows, MaxAllocationSize, FreeCells, {UnspecificRequests, NextId}, PendingRequests, BlockedRows} = Grid,
    {X, Y, W, H} = Coordinates,
    Rows         = lists:seq(Y,Y+H-1),
    % Is one of the blocks that have to be reserved currently blocked?
    BlockedRow   = lists:foldl(fun(B, Acc) -> Acc xor search_list(B,BlockedRows) end, false, Rows),
    Request      = lists:keyfind(ReservationId, 1, UnspecificRequests),
    {ReservationId, _} = Request,
    % Now we need the addresses of the sections in which the regions has to be reserved.
    SubSectionIds = [ SPid || {_,SPid} <- [ lists:keyfind(Row, 1, GridRows) || Row <- Rows]],
    % Storing request enables us to rollback if one of the registrations fails.
    NewPendingRequests = PendingRequests ++ [{length(PendingRequests), 0, lists:map(fun(S) -> {S,null} end, SubSectionIds)}],
    % Send a registration command with the needed section to each corresponding section manager.
    [SPid ! {self(), request, Pid, ReservationId, length(PendingRequests), Coordinates, Ctr, XX} || SPid<- SubSectionIds],
    NewBlockedRows = Rows ++ BlockedRows,
    {{Width, Height}, GridRows, MaxAllocationSize, FreeCells, {UnspecificRequests, NextId}, NewPendingRequests, NewBlockedRows}
    end.


confirm_region(Grid, Pid, URid, Rid, Sid, Region, Section, Cttr, XX) ->
    {Dimensions, GridRows, MaxAllocationSize, FreeCells, {UnspecificRequests, NextId}, PendingRequests, BlockedRows} = Grid,
    {_,RY,_,_} = Region,
    if
        % All blocks have confirmed the reservation so the entire request is successful
        (Ctr+1) == length(Spids) -> 
                NewUnspecificRequests = lists:keydelete(URid, 1, UnspecificRequests),
                NewPendingRequests = lists:keydelete(Rid, 1, PendingRequests),
                NewSpids = lists:keyreplace(Sid, 1, Spids, {Sid, Section}),
                [Spid ! {self(), confirm_region, Sec} || {Spid, Sec} <- NewSpids],
                Pid ! {self(), request_specific_cells, Rid, success};
        true -> 
                NewUnspecificRequests = UnspecificRequests,
                % Safe the region that has to be marked/rolled back in the row
                NewSpids = lists:keyreplace(Sid, 1, Spids, {Sid, Section}),
                % Increase counter of confirmations
                NewPendingRequests = lists:keyreplace(Rid, 1, PendingRequests, {Rid, Ctr+1, NewSpids})
    end,
    NewBlockedRows = delete_list(RY, BlockedRows)
    {Dimensions, GridRows, MaxAllocationSize, FreeCells, {NewUnspecificRequests, NextId}, NewPendingRequests, NewBlockedRows}.



rowManager(Row) ->
    receive
        {Mid, request, Pid, URid, Rid, Region, Ctr, XX} ->
            NewRow = request_region(Row, Mid, Pid, URid, Rid, Region, Ctr, XX);
        Else ->
            erlang:display({unexpectedMessage, rowManager, Else}),
            NewRow = Row
    end,

    rowManager(NewRow).

request_region(Row, Mid, Pid, URid, Rid, Coordinates, Ctr, XX) ->
    {RY, Content, Modified} = Row,
    {X,_,W,_}    = Coordinates,
    if
        Modified == false -> 
                Free = region_is_empty({X,1,W,1}, Content),
                if
                    Free -> NewModified = true,
                            NewContent = mark_region({X,1,W,1}, Content, reserved),
                            Mid ! {Pid, confirm_region, URid, Rid, self(), Coordinates, {X,1,W,1}, Ctr, XX};
                    true -> NewModified = false,
                            NewContent = Content,
                            Mid ! {Pid, failed_region, Rid, Coordinates, Ctr, XX}
                end;
        true -> NewModified = false,
                NewContent = Content,
                Mid ! {Pid, failed_region, Rid, Coordinates, Ctr, XX}
    end,
    {RY, NewContent, NewModified}. 

保留者将使用此代码:

request_specific_cells(FollowUpPid, ReservationId, {X, Y, Width, Height}, Ctr, XX) ->
   FollowUpPid ! {self(), request_specific_cells, ReservationId, {X, Y, Width, Height}, Ctr, XX},
   receive
      {FollowUpPid, request_specific_cells, ReservationId, SuccessOrFailure} ->
        SuccessOrFailure
end.

我认为这个接收器在收到答案之前就死了,因为我知道

Pid ! {self(), request_specific_cells, Rid, success}

从 confirm/9 函数总是以正确的值执行,但并不总是在函数中接收。

4

1 回答 1

3

如果接收者还活着,Erlang 在同一节点内有很强的消息传递保证。

您的代码中似乎有一些竞争条件。尝试编写具有相同问题的应用程序的较小示例并将其发布在此处。

于 2013-04-18T13:24:51.770 回答