1

我使用 Debian GNU/Linux Squeeze。

当我尝试运行此脚本时,我收到停止运行脚本错误消息“内存不足!”。

脚本:

use XML::Parser;
my $Filename = shift();

# Temporary data
my (%MainAttr,$Type,%Tags, @WaySegments);
# Stats
my %AllTags;
# Stored data
my (@Nodes, @Segments, @Ways, %Stats);

# Processing stage
#----------------------------------------------
my $P = new XML::Parser(Handlers => {Start => \&DoStart, End => \&DoEnd, Char => \&DoChar});
$P->parsefile($Filename);
printf STDERR "Creating output files\n";


# Combine way data into segments
#----------------------------------------------
if(open(WAYS,">ways.txt")){
  foreach my $Way (@Ways){
    #printf WAYS "Way: %s,%s\n", $Way->{"segments"}, $Way->{"name"};
    my @SubSegments = split(/,/,$Way->{"segments"});
    $Stats{"empty ways"}++ if(scalar(@SubSegments) < 1); 

    printf WAYS "Copying keys: %s to segments %s\n",
      join(",",keys(%$Way)),
      join(",",@SubSegments);

    # Each segment in a way inherits the way's attributes
    foreach my $Segment(@SubSegments){
      foreach my $Key(keys(%$Way)){
        $Segments[$Segment]{$Key} = $Way->{$Key}
      }
    }
  }
  close WAYS;
}

# Main output (segments)
#----------------------------------------------
if(open(OSM, ">osm.txt")){
  foreach my $Segment(@Segments){
    my $From = $Segment->{"from"};
    my $To = $Segment->{"to"};
    $Stats{"segments without endpoints"}++ if($From == 0 or $To == 0);
    printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
      $Nodes[$From]{"lat"},
      $Nodes[$From]{"lon"},
      $Nodes[$To]{"lat"},
      $Nodes[$To]{"lon"},
      $Segment->{"class"},
      $Segment->{"name"},
      $Segment->{"highway"};
  }
  close OSM;
}

# Secondary output (named points)
#----------------------------------------------
if(open(POINTS, ">points.txt")){
  foreach my $Node(@Nodes){
    $Stats{"Nodes with zero lat/long"}++ if($Node->{"lat"} == 0 and $Node->{"lon"} == 0);

    if($Node->{"name"} || $Node->{"amenity"} || $Node->{"class"}){
      printf POINTS "%f,%f,%s,%s,%s\n",
        $Node->{"lat"},
        $Node->{"lon"},
        $Node->{"name"},
        $Node->{"amenity"},
        $Node->{"class"};
      }
  }
  close POINTS;
}

# Statistics output
#----------------------------------------------
if(open(STATS, ">stats.txt")){
  foreach(sort {$AllTags{$b} <=> $AllTags{$a}} keys(%AllTags)){
    printf STATS "* %d %s\n", $AllTags{$_}, $_;
  }
  printf STATS "\n\nStats:\n";
  foreach(keys(%Stats)){
    printf STATS "* %d %s\n", $Stats{$_}, $_;
  }
}
printf STDERR "Done\n";
exit;

# Function is called whenever an XML tag is started
#----------------------------------------------
sub DoStart()
{
  my ($Expat, $Name, %Attr) = @_;

  if($Name eq "node"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "n";
  }
  if($Name eq "segment"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "s";
  }
  if($Name eq "way"){
    undef %Tags;
    undef @WaySegments;
    %MainAttr = %Attr;
    $Type = "w";
  }
  if($Name eq "tag"){
    # TODO: protect against id,from,to,lat,long,etc. being used as tags
    $Tags{$Attr{"k"}} = $Attr{"v"};
    $AllTags{$Attr{"k"}}++;
    $Stats{"tags"}++;
  }
  if($Name eq "seg"){
    push(@WaySegments, $Attr{"id"});
  }
}

# Function is called whenever an XML tag is ended
#----------------------------------------------
sub DoEnd(){
  my ($Expat, $Element) = @_;
  if($Element eq "node"){
    my $ID = $MainAttr{"id"};
    $Nodes[$ID]{"lat"} = $MainAttr{"lat"};
    $Nodes[$ID]{"lon"} = $MainAttr{"lon"};
    foreach(keys(%Tags)){
      $Nodes[$ID]{$_} = $Tags{$_};
    }
    $Stats{"named nodes"}++ if($Nodes[$ID]{"name"});
    $Stats{"tagged nodes"}++ if($MainAttr{"tags"});
    $Stats{"nodes"}++;
    #print "Node:".join(",",keys(%Tags))."\n" if(scalar(keys(%Tags))>0);
  }
  if($Element eq "segment"){
    my $ID = $MainAttr{"id"};
    $Segments[$ID]{"from"} = $MainAttr{"from"};
    $Segments[$ID]{"to"} = $MainAttr{"to"};
    foreach(keys(%Tags)){
      $Segments[$ID]{$_} = $Tags{$_};
    }
    $Stats{"tagged segments"}++ if($MainAttr{"tags"});
    $Stats{"segments"}++;
  }
  if($Element eq "way"){
    my $ID = $MainAttr{"id"};
    $Ways[$ID]{"segments"} = join(",",@WaySegments);
    foreach(keys(%Tags)){
      $Ways[$ID]{$_} = $Tags{$_};
    }    
    $Stats{"Ways"}++;
  }
}

# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub DoChar(){
  my ($Expat, $String) = @_;
}

终端:

root@Delta:~/Perl/Map# perl convert.pl map.osm
Out of memory!

root@Delta:~/Perl/Map# ulimit -a
core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 16382
max locked memory       (kbytes, -l) 64
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) unlimited
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

我需要使用脚本将 OpenStreetMap XML 文档转换为文本文件。

4

1 回答 1

2

我发现您使用数字元素 ID 作为@nodes和的索引@ways。我的测试中的有效索引@nodes从 开始1010888852,所以 perl 试图创建超过十亿个标量值来undef填充元素01010888851

将这些变量更改为哈希(以及@segments在我的测试中为空但使用方式相同的 )似乎可以解决问题。

请检查这个程序,它纠正了我发现的错误,它是用 Perl 编写的(你似乎是一个 C 程序员)。

use strict;
use warnings;

use autodie;

STDOUT->autoflush;
STDERR->autoflush;

use XML::Parser;

my ($filename) = @ARGV;

# Processing stage
#----------------------------------------------
my $parser = XML::Parser->new(
  Handlers => { Start => \&do_start, End => \&do_end, Char => \&do_char });
$parser->parsefile($filename);

print STDERR "Creating output files\n";

# Stats
my %all_tags;

# Stored data
my (%nodes, %segments, %ways, %stats);

# Combine way data into segments
#----------------------------------------------
open WAYS, '>', 'ways.txt';

foreach my $way (values %ways) {

  #printf WAYS "way: %s,%s\n", $way->{segments}, $way->{name};
  my @subsegments = split /,/, $way->{segments};
  $stats{'empty ways'}++ if @subsegments < 1;

  printf WAYS "Copying keys: %s to segments %s\n",
      join(',', keys %$way),
      join(',', @subsegments);

  # Each segment in a way inherits the way's attributes
  foreach my $segment (@subsegments) {
    foreach my $key (keys %$way) {
      $segments{$segment}{$key} = $way->{$key};
    }
  }
}

close WAYS;

# Main output (segments)
#----------------------------------------------
open OSM, '>', 'osm.txt';

foreach my $segment (values %segments) {

  my $from = $segment->{from};
  my $to = $segment->{to};
  $stats{'segments without endpoints'}++ if $from == 0 or $to == 0;

  printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
      $nodes{$from}{lat},
      $nodes{$from}{lon},
      $nodes{$to}{lat},
      $nodes{$to}{lon},
      $segment->{class},
      $segment->{name},
      $segment->{highway};
}

close OSM;

# Secondary output (named points)
#----------------------------------------------
open POINTS, '>', 'points.txt';

foreach my $node (values %nodes) {
  $stats{'nodes with zero lat/long'}++
      if $node->{lat} == 0 and $node->{lon} == 0;

  if ($node->{name} or $node->{amenity} or $node->{class}) {
    printf POINTS "%f,%f,%s,%s,%s\n",
        $node->{lat} // 'none',
        $node->{lon} // 'none',
        $node->{name} // 'none',
        $node->{amenity} // 'none',
        $node->{class} // 'none';
  }
}

close POINTS;

# Statistics output
#----------------------------------------------
open STATS, '>', 'stats.txt';

foreach (sort { $all_tags{$b} <=> $all_tags{$a} } keys %all_tags) {
  printf STATS "* %d %s\n", $all_tags{$_}, $_;
}

printf STATS "\n\nStats:\n";

foreach (keys %stats) {
  printf STATS "* %d %s\n", $stats{$_}, $_;
}

close STATS;

printf STDERR "Done\n";

exit;

# Temporary data
my ($main_attr, %tags, @way_segments);

# Function is called whenever an XML tag is started
#----------------------------------------------
sub do_start {

  my ($expat, $name, %attr) = @_;

  if ($name eq 'node') {
    %tags = ();
    $main_attr = \%attr;
  }
  elsif ($name eq 'segment') {
    %tags = ();
    $main_attr = \%attr;
  }
  elsif ($name eq 'way') {
    %tags = ();
    @way_segments = ();
    $main_attr = \%attr;
  }
  elsif ($name eq 'tag') {

    # TODO: protect against id,from,to,lat,long,etc. being used as tags
    $tags{ $attr{k} } = $attr{v};
    $all_tags{ $attr{k} }++;
    $stats{tags}++;
  }
  elsif ($name eq 'seg') {
    push @way_segments, $attr{id};
  }
}

# Function is called whenever an XML tag is ended
#----------------------------------------------
sub do_end {
  my ($expat, $element) = @_;

  if ($element eq 'node') {
    my $id = $main_attr->{id};

    $nodes{$id}{lat} = $main_attr->{lat};
    $nodes{$id}{lon} = $main_attr->{lon};
    $nodes{$id}{$_} = $tags{$_} foreach keys %tags;

    $stats{'named nodes'}++  if $nodes{$id}{name};
    $stats{'tagged nodes'}++ if $main_attr->{tags};
    $stats{nodes}++;

    #print "node:", join(',', keys %tags), "\n" if keys %tags > 0;
  }
  elsif ($element eq 'segment') {
    my $id = $main_attr->{id};

    $segments{$id}{from} = $main_attr->{from};
    $segments{$id}{to} = $main_attr->{to};
    $segments{$id}{$_} = $tags{$_} for keys %tags;

    $stats{'tagged segments'}++ if $main_attr->{tags};
    $stats{segments}++;
  }
  elsif ($element eq 'way') {
    my $id = $main_attr->{id};

    $ways{$id}{segments} = join ',', @way_segments;
    $ways{$id}{$_} = $tags{$_} for keys %tags;

    $stats{ways}++;
  }
}

# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub do_char {
  my ($expat, $string) = @_;
}
于 2013-01-27T23:26:41.503 回答