#!/usr/bin/env perl -w
#
# parp -- Perl Anti-spam Replacement for Procmail
#
# Copyright (c) 1999--2001 Adam Spiers <adam@spiers.net>. All rights
# reserved. This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: parp,v 1.114 2001/10/21 19:39:19 adam Exp $
#

use strict;

use vars qw($VERSION);
$VERSION = '0.55';

use Data::Dumper;
use Mail::Internet;
use Mail::Filter;
use Mail::Folder;
use Mail::Folder::Mbox;

use lib "$ENV{HOME}/.parp";
use MyFilter qw(%CONFIG %RE);

use Mail::Filterable;
use Parp::IdCache;
use Parp::Friends;
use Parp::Options qw(opt opts usage);
use Parp::Utils qw(init_log vprint log_to_file log_rule fatal
                   global_lock global_unlock check_file_dir);

local $SIG{__DIE__} = \&die_handler;

# Process options
Parp::Options::process();

# Prepare the filter
my $filter = new Mail::Filter(\&filter);

# Prepare for output
$| = 1;
init_log() unless opt('test_run');

# Get a lock as soon as we mean business
global_lock();

Parp::Friends::init();
Parp::IdCache::init();

opt('filter_files') ? filter_argv() : do_filter_mode();

exit 0;

END {
  global_unlock();
}

sub do_filter_mode {
  # Behave like a filter; take one e-mail from STDIN

  usage() if -t;
  usage() if @ARGV;

  my $mail = new Mail::Internet( [<>] );
  $filter->filter($mail);
  log_rule('-');
}

sub filter_argv {
  # Filter all folders given in @ARGV

  usage() unless @ARGV;

  log_to_file "###\n### Run started at ", scalar(localtime()), "\n###\n\n";

  my %counts = (
    parsed  => 0,
    total   => 0,
    main    => 0,
    aux     => 0,
    spam    => 0,
    dups    => 0,
    special => 0,
  );
  my %inodes_seen = ();

  foreach my $file (@ARGV) {
    unless (-f $file) {
      # TODO: allow symlinks
      vprint "Skipping non-file $file.\n";
      next;
    }

    my ($inode, $size) = (stat $file)[1, 7];

    if ($size == 0) {
      vprint "Skipping empty file $file.\n";
      next;
    }

    if ($inodes_seen{$inode}) {
      vprint "Skipping $file\n" .
             "  (already seen file $inodes_seen{$inode} with inode $inode\n";
      next;
    }
    $inodes_seen{$inode} = $file;

    filter_folder($file, \%counts);
  }

  log_to_file <<EOF;
Parsed $counts{parsed} of $counts{total} messages:
  delivered $counts{main} to $CONFIG{main_folder}
  delivered $counts{aux} to auxiliary folders
  tagged $counts{spam} as spam
  discarded $counts{dups} as duplicate
  tagged $counts{special} as special
EOF
  log_to_file "Run ended at ", scalar(localtime()), "\n";
  log_rule('=');
}

sub filter_folder {
  my ($file, $counts) = @_;

  vprint "Reading $file ... ";
  my $folder = new Mail::Folder('AUTODETECT', $file);
  vprint "done.\nSetting $file read-only ... ";
  $folder->set_readonly();
  vprint "done.\n";

  my $msg_num = $folder->first_message();
  my ($file_total, $friends) = (0, 0);

  my ($sample, $extract_friends) = opts(qw/sample extracted_friend/);
  do {
    $counts->{total}++;
    my $mail = $folder->get_message($msg_num);
    $mail->{parp_foldername} = $file;
    if (! $mail) {
      fatal('Mail::Folder::get_message failed', "\$mail:\n", Dumper $mail);
      next;
    }

    my $rv = $filter->filter($mail);
    $counts->{parsed}++  if $rv;
    $counts->{dups}++    if $rv =~ /IS_DUPLICATE/;
    $counts->{spam}++    if $rv =~ /IS_SPAM/;
    $counts->{main}++    if $rv =~ /TO_MAIN/;
    $counts->{aux}++     if $rv =~ /TO_AUX/;
    $counts->{special}++ if $rv =~ /IS_SPECIAL/;
    $counts->{friends}++ if $rv eq 'EXTRACTED_FRIEND';
    $counts->{file_total}++;
    log_rule('-');
  }
  while ($msg_num = $folder->next_message($msg_num))
        &&
        (
         ((! $sample)          || ($file_total < $sample)) ||
         ((! $extract_friends) || ($friends == 0))
        );
}

##############################################################################
#
# The main filtering logic.
#

sub filter {
  my ($filter, $mail) = @_;

  my $folder;
  $folder = $mail->{parp_foldername} if $mail->{parp_foldername};

  if (! $mail) {
    fatal('message parsing failed',
          "\$folder:\n", Dumper($folder),
          "\n",
          "\$mail:\n", Dumper($mail),          
         );
    return 0;
  }

  my $m = Mail::Filterable->new($mail);

  log_header($m);

  return $m->extract_friends($folder) if opt('extract_friends');

  my $wrong_class = opt('wrong_class');
  if (opt('no_dups') && ! $wrong_class && $m->is_duplicate()) {
    $m->reject_mail('was duplicate by message id');
#   $m->deliver_to_inbox('duplicates');
    $m->{backup} = 0;
    return 'IS_DUPLICATE';
  }

  $m->check_for_old_addresses();

  # FIXME: There could be more than one X-Loop header.
  if (($m->{header}->get('X-Loop') || '') eq $CONFIG{loop_value} and
     ! $wrong_class) {
    $m->accept_mail('looped');
    return 'LOOPED';
  }

  $m->{filter_category} = $m->categorize();

  if (! $wrong_class) {
    $m->parse_received_headers();

    if ($m->{recvd_parses_failed}) {
      if ($m->{filter_category} eq 'IS_SPAM') {
        $m->deliver_to('spam_recvds');
      }
      else {
        vprint $m->{recvd_parses_out};
        $m->deliver_to('bad_recvds');
      }
    }

    if ($m->{filter_category} eq 'TO_MAIN') {
      $m->deliver_mail();
    }
    elsif ($m->{filter_category} eq 'IS_SPAM') {
      if ($m->{complain}) {
        # TODO: write and send a rude letter to relevant abuse@foo.com address.
        log_to_file "Would complain\n";
      }
    }
    elsif ($m->{filter_category} eq 'TO_AUX') {
      # list mail; already delivered to primary target
      #  - maybe back up though
      $m->maybe_backup();
    }
    elsif ($m->{filter_category} eq 'IS_SPECIAL') {
      # special case mail; already delivered to primary target
      #  - maybe back up though
      $m->maybe_backup();
    }
    else {
      die "Oh dear.";
    }
  }
  else {
    # The user's telling us that the filter_category we've just
    # calculated is wrong.
    if ($m->{filter_category} eq 'IS_SPAM') {
      vprint "Reclassification: was incorrectly identified as spam\n";
      $m->{filter_category} = 'UNKNOWN_NOT_SPAM';
    }
    elsif ($m->{filter_category} ne 'IS_SPAM') {
      vprint "Reclassification: was incorrectly identified as bona-fide\n";
      $m->{filter_category} = 'IS_SPAM';
    }
  }

  return $m->{filter_category};
}

sub log_header {
  my ($m) = @_;
  
  vprint "Parp-ID: $m->{parp_id}\n";

  log_to_file <<EOF;
From: $m->{from}
To: $m->{to}
EOF

  log_to_file "Cc: $m->{cc}\n" if $m->{cc};
  log_to_file "Subject: $m->{subject}\n";

  if ($m->{parp_id}) {
  }
  elsif (1) {
    fatal("Parp-ID not defined",
          "\$m:\n", Dumper($m),
         );
  }
  # the following cases should never happen
  elsif ($m->{id} && $m->{id} ne '<>') {
    vprint "Message-ID: $m->{id}\n";
  }
  elsif ($m->{date}) {
    vprint "Date: $m->{date}\n";
  }
  elsif ($m->{subject}) {
    vprint "Subject: $m->{subject}\n";
  }
  elsif ($m->{from}) {
    vprint "From: $m->{from}\n";
  }
  else {
    vprint "From $m->{env_from}\n";
  }
  log_to_file "\n";
}

sub die_handler {
  my ($error) = @_;

  fatal($error, "Called via DIE handler\n");
  exit 255;
}


