perl - 剥离附件后的 perl 电子邮件 mimetype

Question

这是 perl 专家的问题。我在这里找到了一个脚本：http: //www.developertoolshed.com/how-to/141/

此脚本用于接收电子邮件和删除附件。除了一个问题之外，这一切都很好 - 它需要所有 unicode 数据并将其转换为 ascii，我真的找不到原因。

我在 perl 方面的知识很差，但我试图阅读 cpan 文档，这似乎是找到这个问题的答案的地方，但没有任何效果。

我的大部分工作都是在 deatch_message 函数上完成的，该函数从 STDIN 获取输入并输出电子邮件消息。

似乎以下行：

$实体->打印（）；

使一些如何从属性编码消息切换到乱码文本。

sub detach_message {
    my $self = shift;
    my $parser = new MIME::Parser;

    $parser->output_under("/mnt/must/mustinbox/big_files/tmp");
    $parser->extract_uuencode(1);
    my $envelope = <STDIN>;
    my $entity = $parser->parse(\*STDIN);
    #$entity->dump_skeleton($fh);          # for debugging

    $self->detach_all($entity);
    ### if we're in aggressive mode, we need to
    ### add the blurb to all text/* parts
    $self->append_blurbs($entity)  if $self->{aggressive};

    print $envelope;
    $entity->print();
    system("/bin/rm", "-rf", $parser->output_dir());
    if (@{$self->{detached}}) {
        $self->print_index($entity->head()->get('From'),
                           $entity->head()->get('Subject'));
    }
}

这是我完整的 perl 脚本：

#!/usr/bin/perl -w

###########################################################################
### Strips attachments out of email messages (of certain types)
### and replaces them with HTML links
###
### For documentation and latest versions see:
###   http://detach.optimism.cc/
###
### This program by and copyright Ryan Hamilton <ryan@optimism.cc>,
### and Jason Fesler <jfesler@gigo.com>
###  all rights reserved
###
### Edited by Jack Zielke <detach@linuxcoffee.com> and
### Bobby Burden <bobby@codebutcher.com>
### http://linuxcoffee.com/detach
###########################################################################
###
### THIS SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND.
###
###########################################################################
### $Id: detach.pl 213 2011-09-14 15:14:41Z jzielke $
###########################################################################

package Detach;

use strict;
use Log::Log4perl qw(:easy);
use Data::Dumper;
use File::Basename;
use MIME::Parser;
use POSIX qw(strftime);
use Number::Bytes::Human qw(format_bytes);
use Digest::MD5 qw(md5_base64);

sub new {
    my $class = shift;
    my $self  = {};
    my $args = shift;
    my %ARGS = map { $_ => 1 } qw(aggressive web_root dir_root hash shorten msize);
    DEBUG(Dumper(\%ARGS));
    DEBUG(Dumper($args));
    for my $key (keys %$args) {
        if (!$ARGS{$key}) {
            die "ERROR: invalid option '$key'\n";
        }
        $self->{$key} = $args->{$key};
    }
    for my $required (qw(web_root dir_root) ) {
        if (! $self->{$required} ) {
            die "ERROR: required option '$required' not specified\n";
        }
    }
    bless $self, $class;

    my $stamp;
    if (! $self->{hash}) {
        $stamp = strftime("%Y/%m/%d/%H:%M:%S-$$",localtime);
    } else {
        # use jfesler's hash based stamp
        my $hash = '';
        do {
            $hash .= md5_base64(join("",time,$$,$<,$>,$self));
            $hash =~ s/[^a-zA-Z0-9]//g;    # Really, I want base 62
            $hash =~ tr/vVO0Il12Z5S/vV/sd; # And, avoid pain with visual cut/paste,
                                           # now base 53
        } while (length($hash) < 10);      # Just in case
        $stamp= substr($hash,0,2) .        # 168287943181908783 combos 53^10 - 2(53^9)
            "/" . substr($hash,2,8);
    }
    $self->{dir_root} .= "/$stamp/";
    $self->{web_root} .= "/$stamp/";
    $self->{dir_root} =~ s|//|/|g;
    $self->{web_root} =~ s|([^:])//|$1/|g;
    $self->{detached} = [];
    $self->{urls}     = [];
    $self->{firsts}   = {};
    $self->{cids}     = {};

    DEBUG(Dumper($self));

    return $self;
}

sub detach_message {
    my $self = shift;
    my $parser = new MIME::Parser;

    $parser->output_under("/tmp");
    $parser->extract_uuencode(1);
    $parser->decode_bodies(1);
    my $envelope = <STDIN>;
    my $entity = $parser->parse(\*STDIN);
    #$entity->dump_skeleton(\*STDERR);          # for debugging

    $self->detach_all($entity);
    ### if we're in aggressive mode, we need to
    ### add the blurb to all text/* parts
    $self->append_blurbs($entity)  if $self->{aggressive};

    print $envelope;
    $entity->print();

    system("/bin/rm", "-rf", $parser->output_dir());
    if (@{$self->{detached}}) {
        $self->print_index($entity->head()->get('From'),
                           $entity->head()->get('Subject'));
    }
}

### If we're in aggressive mode, we need to append
### the detachment blurb to the first "part"
### for each text/* mime type
sub append_blurbs {
    my $self = shift;
    my($entity) = @_;

    DEBUG("appending blurbs ".Dumper([ keys %{$self->{firsts}}]));
    if (@{$self->{urls}}) {
        DEBUG("got urls");
        if (!scalar keys %{$self->{firsts}}) {
            DEBUG("There are detachments and nothing to append the blurb to.  Creating empty text/plain.");
            my $part = build MIME::Entity (
                Data     => '',
                Encoding => 'quoted-printable',
            );
            $entity->add_part($part, 0);
            $self->{firsts}{'text/plain'} = $part;
        }
        foreach my $m (keys %{$self->{firsts}}) {
            DEBUG($m);
            my $e = $self->{firsts}{$m};
            my $body =  $e->bodyhandle;
            my @lines = $body->as_lines;
            next unless ($body);
            DEBUG(" ready to append $m");

            my $lines = $self->append_blurb($m,\@lines);
            if (!$lines) {
                print STDERR "got no lines when appending blurb $m\n";
            }
            if ($lines) {
                my $b = new MIME::Body::InCore $lines;
                if ($b) {
                    $e->bodyhandle($b) ;
                } else {
                    DEBUG("Failed to update body part with index while allocating new  MIME::Body::InCore");
                }
            }
        }
    }
}

sub append_blurb {
    my $self = shift;
    my($type,$lines) = @_;

    DEBUG("appending blurb of type $type");

    my $header  = " -  The message contains attachments.<br />webservices delivers your attachments as links for saving disk space and backup purposes.";
    my $footer  = "Only click these links if you trust the sender, as well as this message.";
    my $footer2 = "";

    if ($type =~ m#text/plain#) {
        return $self->append_blurb_plain($header, $footer, $footer2, $lines);
    } elsif ($type =~ m#text/html#) {
        return $self->append_blurb_html($header, $footer, $footer2, $lines);
    } elsif ($type =~ "text/(rich|enriched)#") {
        return $self->append_blurb_rtf($header, $footer, $footer2, $lines);
    }
}

sub append_blurb_plain {
    my $self = shift;
    my($header, $footer, $footer2, $lines) = @_;

    my @blurb = ("\n\n\n",
                 " --- 8< --- detachments --- 8< ---\n",
                 " $header\n",
                 map("  $_\n", (@{$self->{urls}})),
                 " $footer\n",
                 " $footer2\n",
                 " --- 8< --- detachments --- 8< ---\n",
                 "\n");
    push(@$lines, @blurb);
    return $lines;
}

sub append_blurb_html {
    my $self = shift;
    my($header, $footer, $footer2, $lines) = @_;

    my @blurb = ("<hr /><p><b>Attachments</b>  $header\n",
                 "<ul>",);
    foreach(@{$self->{urls}}) {
        my $pretty_url = $_;
        $pretty_url =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
        push (@blurb, "<li><a href=\"$_\">$pretty_url</a>\n");
        my $cid = $self->{cids}{$_};
        if ($cid) {
            $cid =~ /<(.+)>/;
            $cid = $1;
            my $url = $_;
            DEBUG("Replacing cid:$cid with $url");
            foreach (@$lines) {
                $_ =~ s#cid:$cid#$url#g;
            }
        }
    }
    push(@blurb, ("</ul>$footer<br />\n",
                 "$footer2\n",
                 "<p>",));

    DEBUG("Adding html blurb: ". join("\n", @blurb));

    my $found=0;
    foreach my $line (@$lines) {
        if ($line =~ m#</body>#i) {
            # Sneak it in before </body>
            my $blurb = join("",@blurb);
            $line =~ s#</body>#$blurb </body>#;
            $found++; last;
        }
    }
    unless ($found) {
        push(@$lines,@blurb);
    }
    return $lines;
}

sub append_blurb_rtf {
    my $self = shift;
    my($header, $footer, $footer2, $lines) = @_;

    my @blurb = (" \\par\n--- 8< --- detachments --- 8< ---\\par\n",
                 "$header\\par\n",
                 map("  $_\\par\n", (@{$self->{urls}})),
                 "$footer\\par\n",
                 "$footer2\\par\n",
                 "\\par\n");

    $lines->[@$lines-1] =~ #}$##;  Remove trailing container bracket
    push(@$lines, @blurb);
    $lines->[@$lines-1] .= "}"; # Replace trailing container bracket

    return $lines;
}

sub detach_all {
    my $self = shift;
    my($entity) = @_;

    for my $part ($entity->parts()) {
        if ($part->head()->recommended_filename() || $part->head()->get('Content-ID',0)) {
            my($h,$b) = $self->detach_part($part);
        } elsif ($part->parts()) {
            $self->detach_all($part);
        } else {
            # keep track of the first part for each mime type
            # so that later we can come back and
            # add a blurb to each of these parts
            # (when using opt_aggressive)
            my $m = $part->head->mime_type;
            $self->{firsts}{$m} ||= $part;
        }
    }
    if ($self->{aggressive}) {
        my @keep = grep (! $_->{detached} ,  $entity->parts);
        $entity->parts(\@keep);
    }
}

sub detach_part {
    my $self = shift;
    my($entity) = @_;

    my $src = $entity->bodyhandle()->path();
    my $base = basename($src);

    if ($self->{msize}) {
        my $filesize = -s $src;
        DEBUG("File name: ".$src);
        DEBUG("File size: ".$filesize);
        if ($self->{msize} > $filesize) {
            return;
        }
    }

    system("mkdir", "-p", $self->{dir_root}) == 0
        or die "ERROR: unable to create $self->{dir_root} : $!\n";
    chmod(0777, $self->{dir_root});

    my $name = $self->uniq_name($self->{dir_root},$base);
    my $dest = $self->{dir_root} . $name;
    $name =~ s/([^A-Za-z0-9.])/sprintf("%%%02X", ord($1))/seg;
    my $url  = $self->{web_root} . $name;

    my $cid = $entity->head()->get('Content-ID',0);
    if ($cid) {
        $self->{cids}{$url} = $cid;
    }

    system("/bin/mv",$src,$dest);

    push(@{$self->{detached}},$dest);
    push(@{$self->{urls}},$url);

    DEBUG("Detach path: ".$self->{dir_root});
    DEBUG("Detach  url: ".$self->{web_root});
    DEBUG("Detach  url: ".$url);

    $entity->{detached}=1;

    my $h = MIME::Head->new();
    #$h->replace('Content-type','text/plain; charset=US-ASCII');
    $h->replace('Content-type','text/plain; charset=8BIT');

    my $b = new MIME::Body::InCore ["\n", $self->{web_root}."\n", $url."\n"];
    $entity->head($h);
    $entity->bodyhandle($b);
}

sub uniq_name {
    my $self = shift;
    my($dir,$name) = @_;


    $name =~ s#^\.#/_#sg;  # No leading dot
    if ($self->{shorten}) {
        $name =~ s/^[^a-zA-Z0-9_.-]/_/g;
        $name =~ s#^([^/]{21,})\.([^/.]+)$#substr($1,0,20) . ".$2" #ge;
    }

    if (-f "$dir$name") {
        DEBUG("$name exists");
        my($base,$ext);
        if ($name =~ /^(.+)\.(.+)$/) {
            $base = $1;
            $ext  = $2;
        } else {
            $base = "";
            $ext  = $name;
        }
        my $i=1;
        $i++ while (-f "$dir$base.$i.$ext");
        $name = "$base.$i.$ext";
        DEBUG("USING $name");
    }
    return $name;
}

sub print_index {
    my $self = shift;
    my($from,$subj) = @_;
    chomp($from);
    chomp($subj);

    my $f = "$self->{dir_root}/index.html";
    open(F,">$f") or die "ERROR: unable to open $f : $!\n";
    print F "<HTML><BODY><PRE>From: $from\nSubj: $subj\n</PRE><UL>\n";
    for (@{$self->{detached}}) {
        my $u = substr($_,length($self->{dir_root}));
        $u =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
        my $b = basename($_);

        my $size = format_bytes((stat($_))[7]);
        print F "<LI><A href='$self->{web_root}$u'>$b</A>";
        print F " - $size</LI>\n";
    }
    print F "</UL>\n";
    close(F);
}

###########################################################################
package main;

use strict;
use Getopt::Long;
use Log::Log4perl qw(:easy);

$|=1;

umask(0000);

my($opt_web, $opt_dir, $opt_verbose, $opt_help, $opt_aggressive, $opt_hash, $opt_shorten, $opt_size);
if (!GetOptions("d|dir-root=s" => \$opt_dir,
                "w|web-root=s" => \$opt_web,
                "a|aggressive" => \$opt_aggressive,
                "s|shorten"    => \$opt_shorten,
                "hash"         => \$opt_hash,
                "size=s"       => \$opt_size,
                "v|verbose"    => \$opt_verbose,
                "h|help"       => \$opt_help) || $opt_help) {
    print STDERR <<EOF;
Usage: detach [options] [file]

Options:
  -d, --dir-root    root of directory tree for detachemnts
  -w, --web-root    URL to dir-root
  -s, --shorten     shorten attachment file names
      --hash        use hash instead of date in dir names
      --size        minimum file size to detach (in bytes)
  -a, --aggressive  remove detached attachments, and embed
                    the blurb in text parts instead
  -v, --verbose     debugging output
EOF
    exit(1);

}


if (!$opt_web || !$opt_dir) {
    my @info = getpwuid( $< );
    my $user = $info[0];
    my $home = $info[7];
    $opt_web = "http://localhost/~$user/detach" if !$opt_web;
    $opt_dir = "$home/html/detach" if !$opt_dir;
}
Log::Log4perl->easy_init($opt_verbose ? $DEBUG : $INFO);

DEBUG("Web root : $opt_web");
DEBUG("Dir root : $opt_dir");

my $detach = new Detach( { dir_root   => $opt_dir,
                           web_root   => $opt_web,
                           aggressive => $opt_aggressive,
                           shorten    => $opt_shorten,
                           msize      => $opt_size,
                           hash       => $opt_hash });
$detach->detach_message();

任何帮助将不胜感激

perl - 剥离附件后的 perl 电子邮件 mimetype

0 回答 0

Related

Reference