11

我正在尝试将Maia 包中的一些 matlab 代码转换为可以与 Octave 一起使用的代码。我目前陷入困境,因为其中一个文件有几个调用containers.Map,显然这些调用尚未在 octave 中实现。有没有人有任何想法可以轻松实现类似的功能,而无需在 octave 中做大量额外的工作?谢谢大家的时间。

function [adj_direct contig_direct overlap names longest_path_direct...
          weigth_direct deltafiles deltafiles_ref ReferenceAlignment ...
          contig_ref overlap_ref name_hash_ref] = ...
          assembly_driver(assemblies,ref_genome,target_chromosome, ... 
                          deltafiles_ref,contig_ref, overlap_ref, ...
                          name_hash_ref, varargin)

% ASSEMBLY_DRIVER Combines contig sets into one assembled chromosome
%
% INPUT 
%   assemblies
%   ref_chromosome
%   Startnode_name
%   Endnode_name
%        OPTIONAL              DEFAULT
%       'z_weigths'            [.25 .25 .25 .25]
%       'clipping_thrs'        10
%       'ref_distance'         -10
%       'ref_quality'          1E-5
%       'max_chromosome_dist'  100
%       'quit_treshold'        15
%       'tabu_time'            3
%       'minimum_improvement'  -inf
%       'ref_node_assemblies'  all assemblies (slow)
%       'endextend'            true
%
%

    % SET DEFAULTS
    % General parameters
    z_weights           = [.25 .25 .25 .25];
    clipping_thrs       = 10;
    mapfilter           = '-rq';
    alignlen            = 75;
    ident               = 85;

    % Reference nod parameters
    ref_distance        = -10;
    ref_quality         = 1E-5;
    max_chromosome_dist = 100;
    % TABU parameters
    quit_treshold       = 15;
    tabu_time           = 3;
    minimum_improvement = -inf;
    ref_node_assemblies = assemblies;
    % Extending the assembly outwards from the start and en node
    endextend           = true;
    AllowReverse        = true;
    % If no start and end node are given, they will be determined from tiling
    Startnode_name      = '';
    Endnode_name        = '';
    containment_edge    = true;
    ref_first           = true;

    % If contigs have already been aligned to the reference, give the
    % deltafile 
    ReferenceAlignment = 'NotYetDoneByMaia';

    % Get VARARGIN user input
    if length(varargin) > 0
        while 1
            switch varargin{1}
                case 'Startnode_name'
                    Startnode_name = varargin{2};
                case 'Endnode_name'
                    Endnode_name = varargin{2};
                case 'z_weigths'
                    z_weights = varargin{2};
                case 'clipping_thrs' 
                    clipping_thrs = varargin{2};
                case 'ref_distance' 
                    ref_distance = varargin{2};
                case 'ref_quality' 
                    ref_quality = varargin{2};
                case 'max_chromosome_dist' 
                    max_chromosome_dist = varargin{2};
                case 'quit_treshold' 
                    quit_treshold = varargin{2};
                case 'tabu_time' 
                    tabu_time = varargin{2};
                case 'minimum_improvement' 
                    minimum_improvement = varargin{2};
                case 'ref_node_assemblies'
                    ref_node_assemblies = assemblies(varargin{2},:);
                case 'extend_ends'
                    endextend = assemblies(varargin{2},:);
                case 'AllowReverse'
                    AllowReverse = varargin{2};
                case 'ReferenceAlignment'
                    ReferenceAlignment = varargin{2};
                case 'containment_edge'
                    containment_edge = varargin{2};
                case 'ref_first'
                    ref_first = varargin{2};
                case 'mapfilter'
                    mapfilter = varargin{2};
                case 'alignlen'
                    alignlen = varargin{2};
                case 'ident'
                    ident = varargin{2};
                otherwise
                    error(['Input ' varargin{2} ' is not known']);
            end
            if length(varargin) > 2
                varargin = varargin(3:end);
            else
                break;
            end
        end
    end

    % Read input assemblies
    assembly_names   = assemblies(:,1);
    assembly_locs    = assemblies(:,2);
    assembly_quality = containers.Map(assemblies(:,1),assemblies(:,3));
    assembly_quality('reference') = ref_quality;

    % Read input assemblies for creation of reference nodes
    ref_node_assembly_names   = ref_node_assemblies(:,1);
    ref_node_assembly_locs    = ref_node_assemblies(:,2);
    ref_node_assembly_quality = containers.Map(ref_node_assemblies(:,1),ref_node_assemblies(:,3));
    ref_node_assembly_quality('reference') = ref_quality;


    % If there is only one assembly there is nothing to align
    if size(assemblies,1) >= 2

        % Align assemblies against each other
        assembly_pairs = {};
        coordsfiles = [];
        deltafiles = [];
        for i = 1:length(assembly_locs)-1
            for j = i+1:length(assembly_locs)
                [coordsfile,deltafile] = align_assemblies({assembly_locs{i},assembly_locs{j}},{assembly_names{i}, assembly_names{j}}, ...
                                                           mapfilter, alignlen, ident);
                coordsfiles = [coordsfiles; coordsfile];
                %deltafiles = [deltafiles deltafile];
                deltafiles = [deltafiles; {deltafile}];
                assembly_pairs = [assembly_pairs;[assembly_names(i) assembly_names(j)]];
            end
        end


     %   fprintf('Loading alignment files.\n');
     %   load alignments_done;

        % Put the nucmer alignments in an adjency matrix
        %[adj, names, name_hash, contig, overlap] = get_adj_matrix(coordsfiles, assembly_pairs, assembly_quality, z_weights, 'clipping_thrs', clipping_thrs, 'dove_tail', 'double','edge_weight','z-scores', 'containment_edge', true);
        [adj, names, name_hash, contig, overlap] = get_adj_matrix(deltafiles, assembly_pairs, assembly_quality, z_weights, 'clipping_thrs', clipping_thrs, 'dove_tail', 'double','edge_weight','z-scores', 'containment_edge', containment_edge);



        % Merge deltafiles
        deltafilesnew = deltafiles{1};
        if size(deltafiles,1) > 1
            for di = 2:size(deltafiles,1)
                deltafilesnew = [deltafilesnew deltafiles{di}];
            end
        end
        deltafiles = deltafilesnew;

    else
        assembly_pairs = {};
        coordsfiles = [];
        deltafiles = [];        
        adj = [];
        names = {};
        name_hash = containers.Map;
        contig  = struct('name',{},'size',[],'chromosome',[],'number',[], 'assembly', [], 'assembly_quality', []);
        overlap = struct('Q',{},'R',[],'S1',[],'E1', [], 'S2', [], 'E2', [], 'LEN1', [], 'LEN2', [], 'IDY', [], 'COVR', [], 'COVQ', [],'LENR',[], 'LENQ',[]);
    end


    % Ad the pseudo nodes to the graph. If the contigs have already been
    % aligned to the reference genome, just select the alignments that
    % correspond to the target chromosome
    if isequal(ReferenceAlignment,'NotYetDoneByMaia')
        % Align all contigs in 'contig_sets_fasta' to the reference chromosome 
        [contig_ref, overlap_ref, name_hash_ref, deltafiles_ref] = align_contigs_sets(... 
            ref_genome, ref_node_assembly_locs, ref_node_assembly_names, ... 
            ref_node_assembly_quality, clipping_thrs, z_weights, ... 
            ref_distance,max_chromosome_dist);

        ReferenceAlignment = 'out2.delta';
    end
    % Select only the entries in the deltafile for the current target chromosome
    [contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref] = ...
              GetVariablesForTargetChromosome(...
              contig_ref, overlap_ref, deltafiles_ref);


    % Ref clipping should be high in case of tiling
    %if isequal(max_chromosome_dist,'tiling')
    %    clipping_thrs = 10000
    %end

    % Add reference nodes to the adjency matrix
    [adj, names, name_hash, contig, overlap, delta_target_ref, Startnode_name, Endnode_name] = get_reference_nodes( ...
                     adj, names, name_hash, contig, overlap, target_chromosome, ...
                     contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref, ...
                     max_chromosome_dist, ref_distance, clipping_thrs, ref_first,...
                     Startnode_name, Endnode_name, AllowReverse);


    % Give reference edges some small extra value to distict between
    % assemblies to which a reference node leads
    % adj = rank_reference_edges(adj,contig,assembly_quality);

    % Specify a start and an end node for the assembly
    Startnode = name_hash(Startnode_name);
    Endnode = name_hash(Endnode_name);


    % Find the best scoring path
    fprintf('Directing the final graph\n');
    % Calculate path on undirected graph to get an idea on how to direct the graph
    [longest_path weigth] = longest_path_tabu(adj, Startnode, Endnode, quit_treshold, tabu_time, minimum_improvement);
    % Make the graph directed (greedy)
    [adj_direct contig_direct] = direct_graph(adj,overlap, contig, names, name_hash,clipping_thrs, Startnode, longest_path, true, ref_first);
    % Calcultate final layout-path
    fprintf('Find highest scoring path\n');
    [longest_path_direct weigth_direct] = longest_path_tabu(adj_direct, Startnode, Endnode, quit_treshold, tabu_time, minimum_improvement);


    function [contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref] = ...
              GetVariablesForTargetChromosome(...
              contig_ref, overlap_ref, deltafiles_ref)

        % Select only the entries in the deltafile for the current target chromosome
        delta_target_ref = deltafiles_ref;
        for di = size(delta_target_ref,2):-1:1
            if ~isequal(delta_target_ref(di).R,target_chromosome)
                delta_target_ref(di) = [];
            end
        end
        overlap_target_ref = overlap_ref;
        for oi = size(overlap_target_ref,2):-1:1
            if ~isequal(overlap_target_ref(oi).R,target_chromosome)
                overlap_target_ref(oi) = [];
            end
        end    
        contig_target_ref = contig_ref;
        for ci = size(contig_target_ref,1):-1:1
            if isequal(contig_target_ref(ci).assembly, 'reference') && ~isequal(contig_target_ref(ci).name,target_chromosome)
                contig_target_ref(ci) = [];
            end
        end    
        name_hash_target_ref = make_hash({contig_target_ref.name}');
    end


end
4

1 回答 1

13

containers.Map据我所知,在 Octave 中没有完全等价的...

一种选择是使用java 包来创建java.util.Hashtable. 使用这个例子

pkg load java
d = javaObject("java.util.Hashtable");
d.put('a',1)
d.put('b',2)
d.put('c',3)
d.get('b')

如果您愿意进行一些重写,则可以将内置struct函数用作基本哈希表,其中字符串(有效的变量名)作为键,几乎所有内容都存储在值中。

例如,给定以下内容:

keys = {'Mon','Tue','Wed'}
values = {10, 20, 30}

你可以替换这个:

map = containers.Map(keys,values);
map('Mon')

经过:

s = struct();
for i=1:numel(keys)
    s.(keys{i}) = values{i};
end
s.('Mon')

您可能需要使用genvarname来生成有效的密钥,或者可能需要使用适当的散列函数来生成有效的密钥字符串。

还要查看与结构相关的函数:getfield、setfield、isfield、fieldnames、rmfield 等。

于 2012-07-24T02:04:25.417 回答