Dre4m Shell
Server IP : 85.214.239.14  /  Your IP : 3.144.6.9
Web Server : Apache/2.4.62 (Debian)
System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64
User : www-data ( 33)
PHP Version : 7.4.18
Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
MySQL : OFF  |  cURL : OFF  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : OFF
Directory :  /proc/3/task/3/cwd/proc/2/root/usr/share/perl5/Mail/SpamAssassin/Plugin/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ HOME SHELL ]     

Current File : /proc/3/task/3/cwd/proc/2/root/usr/share/perl5/Mail/SpamAssassin/Plugin/HashBL.pm
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

=head1 NAME

HashBL - query hashed (and unhashed) DNS blocklists

=head1 SYNOPSIS

  loadplugin Mail::SpamAssassin::Plugin::HashBL

  # NON-WORKING usage examples below, replace xxx.example.invalid with real list
  # See documentation below for detailed usage

  header   HASHBL_EMAIL eval:check_hashbl_emails('ebl.example.invalid')
  describe HASHBL_EMAIL Message contains email address found on EBL
  tflags   HASHBL_EMAIL net

  # rewrite googlemail.com -> gmail.com, applied before acl/welcomelist
  hashbl_email_domain_alias gmail.com googlemail.com
  # only query gmail.com addresses
  hashbl_acl_freemail gmail.com
  header   HASHBL_OSENDR eval:check_hashbl_emails('rbl.example.invalid/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
  describe HASHBL_OSENDR Message contains email address found on HASHBL
  tflags   HASHBL_OSENDR net

  body     HASHBL_BTC eval:check_hashbl_bodyre('btcbl.example.invalid', 'sha1/max=10/shuffle', '\b([13][a-km-zA-HJ-NP-Z1-9]{25,34})\b')
  describe HASHBL_BTC Message contains BTC address found on BTCBL
  tflags   HASHBL_BTC net

  header   HASHBL_URI eval:check_hashbl_uris('rbl.example.invalid', 'sha1', '^127\.0\.0\.32$')
  describe HASHBL_URI Message contains uri found on rbl
  tflags   HASHBL_URI net

  body     HASHBL_ATTACHMENT eval:check_hashbl_attachments('attbl.example.invalid', 'sha256')
  describe HASHBL_ATTACHMENT Message contains attachment found on attbl
  tflags   HASHBL_ATTACHMENT net

  # Capture tag using SA 4.0 regex named capture feature
  header   __X_SOME_ID X-Some-ID =~ /^(?<XSOMEID>\d{10,20})$/
  # Query the tag value as is from a DNSBL
  header   HASHBL_TAG eval:check_hashbl_tag('idbl.example.invalid/A', 'raw', 'XSOMEID', '^127\.')

=head1 DESCRIPTION

This plugin supports multiple types of hashed or unhashed DNS blocklist queries.

=over 4

=item Common OPTS that apply to all functions:

  raw      no hashing, query as is (can break if value is not valid DNS label)
  md5      hash query with MD5
  sha1     hash query with SHA1
  sha256   hash query with Base32 encoded SHA256
  case     keep case before hashing, default is to lowercase
  max=x	   maximum number of queries (defaults to 10 if not specified)
  shuffle  if max exceeded, random shuffle queries before truncating to limit

Multiple options can be separated with slash.

When rule OPTS is empty ('') or missing, default is used as documented by
each query type.  If any options are defined, then all needed options must
be explicitly defined.

=back 

=over 4

=item header RULE check_hashbl_emails('bl.example.invalid/A', 'OPTS', 'HEADERS', '^127\.')

Check email addresses from DNS list.  Note that "body" can be specified
along with headers to search message body for emails.  Rule type must always
be "header".

Optional DNS query type can be appended to list with /A (default) or /TXT.

Default OPTS: sha1/notag/noquote/max=10/shuffle

Additional supported OPTS:

  nodot    strip username dots from email
  notag    strip username tags from email
  nouri    ignore emails inside uris
  noquote  ignore emails inside < > or possible quotings
  user     query userpart of email only
  host     query hostpart of email only
  domain   query domain of email only (hostpart+trim_domain)

Default HEADERS: ALLFROM/Reply-To/body

HEADERS refers to slash separated list of Headers to process:

  ALL           all headers
  ALLFROM       all From headers as returned by $pms->all_from_addrs()
  EnvelopeFrom  message envelope from (Return-Path etc)
  <HeaderName>  any header as used with header rules or $pms->get()
  body          all emails found in message body

If HEADERS is empty ('') or missing, default is used.

Optional subtest regexp to match DNS answer (default: '^127\.').

For existing public email blocklist, see: http://msbl.org/ebl.html

  # Working example, see https://msbl.org/ebl.html before usage
  header   HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
  describe HASHBL_EMAIL Message contains email address found on EBL
  tflags   HASHBL_EMAIL net

Default regex for matching and capturing emails can be overridden with
C<hashbl_email_regex>.  Likewise, the default welcomelist can be changed with
C<hashbl_email_welcomelist>.  Only change if you know what you are doing, see
plugin source code for the defaults.  Example: hashbl_email_regex \S+@\S+.com

=back

=over 4

=item header RULE check_hashbl_uris('bl.example.invalid/A', 'OPTS', '^127\.')

Check all URIs parsed from message from DNS list.

Optional DNS query type can be appended to list with /A (default) or /TXT.

Default OPTS: sha1/max=10/shuffle

Optional subtest regexp to match DNS answer (default: '^127\.').

=back

=over 4

=item [raw]body RULE check_hashbl_bodyre('bl.example.invalid/A', 'OPTS', '\b(match)\b', '^127\.')

Search body for matching regexp and query the string captured.  Regexp must
have a single capture ( ) for the string ($1).  Rule type must be "body" or
"rawbody".

Optional DNS query type can be appended to list with /A (default) or /TXT.

Default OPTS: sha1/max=10/shuffle

Additional supported OPTS:

  num      remove the chars from the match that are not numbers

Optional subtest regexp to match DNS answer (default: '^127\.').

=back

=over 4

=item header RULE check_hashbl_tag('bl.example.invalid/A', 'OPTS', 'TAGNAME', '^127\.')

Query value of SpamAssassin tag _TAGNAME_ from DNS list.

Optional DNS query type can be appended to list with /A (default) or /TXT.

Default OPTS: sha1/max=10/shuffle

Additional supported OPTS:

  ip        only query if value is valid IPv4/IPv6 address
  ipv4      only query if value is valid IPv4 address
  ipv6      only query if value is valid IPv6 address
  revip     reverse IP before query
  fqdn      only query if value is valid FQDN (is_fqdn_valid)
  tld       only query if value has valid TLD (is_domain_valid)
  trim      trim name from hostname to domain (trim_domain)

  If both ip/ipv4/ipv6 and fqdn/tld are enabled, only either of them is
  required to match.  Both fqdn and tld are needed for complete FQDN+TLD
  check.

Optional subtest regexp to match DNS answer (default: '^127\.').

=back

=over 4

=item header RULE check_hashbl_attachments('bl.example.invalid/A', 'OPTS', '^127\.')

Check all all message attachments (mimeparts) from DNS list.

Optional DNS query type can be appended to list with /A (default) or /TXT.

Default OPTS: sha1/max=10/shuffle

Additional supported OPTS:

  minsize=x  skip any parts smaller than x bytes
  maxsize=x  skip any parts larger than x bytes

Optional subtest regexp to match DNS answer (default: '^127\.').

Specific attachment filenames can be skipped with C<hashbl_ignore>.  For
example "hashbl_ignore safe.pdf".

Specific mime types can be skipped with C<hashbl_ignore>.  For example
"hashbl_ignore text/plain".

=back

=over 4

=item hashbl_ignore value [value...]

Skip any type of query, if either the hash or original value (email for
example) matches.  Multiple values can be defined, separated by whitespace. 
Matching is case-insensitive.

Any host or its domain part matching uridnsbl_skip_domains is also ignored
by default.

=back

=cut

package Mail::SpamAssassin::Plugin::HashBL;
use strict;
use warnings;
use re 'taint';

my $VERSION = 0.101;

use Digest::MD5 qw(md5_hex);
use Digest::SHA qw(sha1_hex sha256);

use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Constants qw(:ip);
use Mail::SpamAssassin::Util qw(compile_regexp is_fqdn_valid reverse_ip_address
                                base32_encode);

our @ISA = qw(Mail::SpamAssassin::Plugin);

sub dbg { my $msg = shift; Mail::SpamAssassin::Plugin::dbg("HashBL: $msg", @_); }

sub new {
  my ($class, $mailsa) = @_;

  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsa);
  bless ($self, $class);

  # are network tests enabled?
  if ($mailsa->{local_tests_only}) {
    $self->{hashbl_available} = 0;
    dbg("local tests only, disabling HashBL");
  } else {
    $self->{hashbl_available} = 1;
  }

  $self->{evalfuncs} = {
    'check_hashbl_emails' => $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
    'check_hashbl_uris' => $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
    'check_hashbl_bodyre' => $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS,
    'check_hashbl_tag' => $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
    'check_hashbl_attachments' => $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS,
  };
  while (my ($func, $type) = each %{$self->{evalfuncs}}) {
    $self->register_eval_rule($func, $type);
  }
  $self->set_config($mailsa->{conf});

  return $self;
}

sub set_config {
  my($self, $conf) = @_;
  my @cmds;

  push (@cmds, {
    setting => 'hashbl_ignore',
    is_admin => 1,
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
    default => {},
    code => sub {
      my ($self, $key, $value, $line) = @_;
      if (!defined $value || $value eq '') {
        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
      }
      foreach my $str (split (/\s+/, $value)) {
        $self->{hashbl_ignore}->{lc $str} = 1;
      }
    }
  });

  push (@cmds, {
    setting => 'hashbl_email_domain_alias',
    is_admin => 1,
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
    default => {},
    code => sub {
      my ($self, $key, $value, $line) = @_;
      if (!defined $value || $value eq '') {
        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
      }
      my @vals = split(/\s+/, lc $value);
      if (@vals < 2 || index($value, '@') >= 0) {
        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
      }
      my $domain = shift @vals;
      foreach my $alias (@vals) {
        $self->{hashbl_email_domain_alias}->{$alias} = $domain;
      }
    }
  });

  push (@cmds, {
    setting => 'hashbl_email_regex',
    is_admin => 1,
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
    # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
    # full email regex v0.02
    default => qr/(?i)
      (?=.{0,64}\@)				# limit userpart to 64 chars (and speed up searching?)
      (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
      (						# capture email
      [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+		# no dot in beginning
      (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)*	# no consecutive dots, no ending dot
      \@
      (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
      _TLDS_ # ends with valid tld, _TLDS_ is template which will be replaced in finish_parsing_end()
      )
    /x,
    code => sub {
      my ($self, $key, $value, $line) = @_;
      if (!defined $value || $value eq '') {
        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
      }
      my ($rec, $err) = compile_regexp($value, 0);
      if (!$rec) {
        dbg("config: invalid hashbl_email_regex '$value': $err");
        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
      }
      $self->{hashbl_email_regex} = $rec;
    }
  });

  push (@cmds, {
    setting => 'hashbl_email_welcomelist',
    aliases => ['hashbl_email_whitelist'], # removed in 4.1
    is_admin => 1,
    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
    default => qr/(?i)
      ^(?:
          abuse|support|sales|info|helpdesk|contact|kontakt
        | (?:post|host|domain)master
        | undisclosed.*                     # yahoo.com etc(?)
        | request-[a-f0-9]{16}              # live.com
        | bounced?-                         # yahoo.com etc
        | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
        | .+=.+=.+                          # gmail forward
      )\@
    /x,
    code => sub {
      my ($self, $key, $value, $line) = @_;
      if (!defined $value || $value eq '') {
      }
      my ($rec, $err) = compile_regexp($value, 0);
      if (!$rec) {
        dbg("config: invalid hashbl_email_welcomelist '$value': $err");
        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
      }
      $self->{hashbl_email_welcomelist} = $rec;
    }
  });

  $conf->{parser}->register_commands(\@cmds);
}

sub parse_config {
  my ($self, $opt) = @_;

  if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
    $self->inhibit_further_callbacks();
    return 1 unless $self->{hashbl_available};

    my $acl = lc($1);
    my @opts = split(/\s+/, $opt->{value});
    foreach my $tmp (@opts) {
      if ($tmp =~ /^(\!)?(\S+)$/i) {
        my $neg = $1;
        my $value = lc($2);
        if (defined $neg) {
          $self->{hashbl_acl}{$acl}{$value} = 0;
        } else {
          next if $acl eq 'all';
          # exclusions overrides
          if (!defined $self->{hashbl_acl}{$acl}{$value}) {
            $self->{hashbl_acl}{$acl}{$value} = 1
          }
        }
      } else {
        warn("invalid acl: $tmp");
      }
    }
    return 1;
  }

  return 0;
}

sub finish_parsing_end {
  my ($self, $opts) = @_;

  return 0 if !$self->{hashbl_available};

  # valid_tlds_re will be available at finish_parsing_end, compile it now,
  # we only need to do it once and before possible forking
  # replace _TLDS_ with valid list of TLDs
  $opts->{conf}->{hashbl_email_regex} =~ s/_TLDS_/$self->{main}->{registryboundaries}->{valid_tlds_re}/g;
  #dbg("hashbl_email_regex: $opts->{conf}->{hashbl_email_regex}");
  $opts->{conf}->{hashbl_email_welcomelist} =~ s/_TLDS_/$self->{main}->{registryboundaries}->{valid_tlds_re}/g;
  #dbg("hashbl_email_welcomelist: $opts->{conf}->{hashbl_email_regex}");

  return 0;
}

sub _parse_opts {
  my %opts;
  foreach my $o (split(/\s*\/\s*/, lc $_[0])) {
    my ($k, $v) = split(/=/, $o);
    $opts{$k} = defined $v ? $v : 1;
  }
  return \%opts;
}

sub _get_emails {
  my ($self, $pms, $opts, $from, $acl) = @_;
  my $conf = $pms->{conf};

  my @emails; # keep find order
  my %seen;

  foreach my $hdr (split(/\s*\/\s*/, $from)) {
    my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
    foreach my $email (@$parsed_emails) {
      my ($username, $domain) = ($email =~ /(.*)\@(.+)/);
      next unless defined $domain;
      if (exists $conf->{hashbl_email_domain_alias}->{lc $domain}) {
        $domain = $conf->{hashbl_email_domain_alias}->{lc $domain};
        $email = $username.'@'.$domain;
      }
      next if $seen{$email}++;
      next if defined $acl && $acl ne 'all' && !$self->{hashbl_acl}{$acl}{$domain};
      push @emails, $email;
    }
  }

  return \@emails;
}

sub _parse_emails {
  my ($self, $pms, $opts, $hdr) = @_;

  if (exists $pms->{hashbl_email_cache}{$hdr}) {
    return $pms->{hashbl_email_cache}{$hdr};
  }

  if ($hdr eq 'ALLFROM') {
    my @emails = $pms->all_from_addrs();
    return $pms->{hashbl_email_cache}{$hdr} = \@emails;
  }

  if (!exists $pms->{hashbl_welcomelist}) {
    %{$pms->{hashbl_welcomelist}} = map { lc($_) => 1 }
        ( $pms->get("X-Original-To:addr"),
          $pms->get("Apparently-To:addr"),
          $pms->get("Delivered-To:addr"),
          $pms->get("Envelope-To:addr"),
        );
    delete $pms->{hashbl_welcomelist}{''};
  }

  my $str = '';
  if ($hdr eq 'ALL') {
    $str = join("\n", $pms->get('ALL'));
  } elsif ($hdr eq 'body') {
    # get all <a href="mailto:", since they don't show up on stripped_body
    my $uris = $pms->get_uri_detail_list();
    while (my($uri, $info) = each %{$uris}) {
      if (defined $info->{types}->{a} && !defined $info->{types}->{parsed}) {
        if ($uri =~ /^mailto:(.+)/i) {
          $str .= "$1\n";
        }
      }
    }
    my $body = join('', @{$pms->get_decoded_stripped_body_text_array()});
    if ($opts->{nouri}) {
      # strip urls with possible emails inside
      $body =~ s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
    }
    if ($opts->{noquote}) {
      # strip emails contained in <>, not mailto:
      # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
      $body =~ s#<?(?<!mailto:)$pms->{conf}->{hashbl_email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
    }
    $str .= $body;
  } else {
    $str .= join("\n", $pms->get($hdr));
  }

  my @emails; # keep find order
  my %seen;

  while ($str =~ /($pms->{conf}->{hashbl_email_regex})/g) {
    next if $seen{$1}++;
    push @emails, $1;
  }

  return $pms->{hashbl_email_cache}{$hdr} = \@emails;
}

sub check_hashbl_emails {
  my ($self, $pms, $list, $opts, $from, $subtest, $acl) = @_;

  return 0 if !$self->{hashbl_available};
  return 0 if !$pms->is_dns_available();

  my $conf = $pms->{conf};
  my $rulename = $pms->get_current_eval_rule_name();

  if (!defined $list) {
    warn "HashBL: $rulename blocklist argument missing\n";
    return 0;
  }

  if (defined $acl && $acl ne 'all' && !exists $self->{hashbl_acl}{$acl}) {
    warn "HashBL: $rulename acl '$acl' not defined\n";
    return 0;
  }

  if ($subtest) {
    my ($rec, $err) = compile_regexp($subtest, 0);
    if (!$rec) {
      warn "HashBL: $rulename invalid subtest regex: $@\n";
      return 0;
    }
    $subtest = $rec;
  }

  # Parse opts, defaults
  $opts = _parse_opts($opts || 'sha1/notag/noquote/max=10/shuffle');
  $from = 'ALLFROM/Reply-To/body' if !$from;

  # Find all emails
  my $emails = $self->_get_emails($pms, $opts, $from, $acl);
  if (!@$emails) {
    if (defined $acl) {
      dbg("$rulename: no emails found ($from) on acl $acl");
    } else {
      dbg("$rulename: no emails found ($from)");
    }
    return 0;
  } else {
    dbg("$rulename: raw emails found: ".join(', ', @$emails));
  }

  # Filter list
  my @filtered_emails; # keep order
  my %seen;
  foreach my $email (@$emails) {
    next if $seen{$email}++;
    if (exists $pms->{hashbl_welcomelist}{$email} ||
        $email =~ $conf->{hashbl_email_welcomelist})
    {
      dbg("query skipped, address welcomelisted: $email");
      next;
    }
    my ($username, $domain) = ($email =~ /(.*)\@(.*)/);
    # Don't check uridnsbl_skip_domains when explicit acl is used
    if (!defined $acl) {
      if (exists $conf->{uridnsbl_skip_domains}->{lc $domain}) {
        dbg("query skipped, uridnsbl_skip_domains: $email");
        next;
      }
      my $dom = $pms->{main}->{registryboundaries}->trim_domain($domain);
      if (exists $conf->{uridnsbl_skip_domains}->{lc $dom}) {
        dbg("query skipped, uridnsbl_skip_domains: $email");
        next;
      }
    }
    $username =~ tr/.//d if $opts->{nodot};
    $username =~ s/\+.*// if $opts->{notag};
    # Final query assembly
    my $qmail;
    if ($opts->{host} || $opts->{domain}) {
      if ($opts->{domain}) {
        $domain = $pms->{main}->{registryboundaries}->trim_domain($domain);
      }
      $qmail = $domain;
    } elsif ($opts->{user}) {
      $qmail = $username;
    } else {
      $qmail = $username.'@'.$domain;
    }
    $qmail = lc $qmail  if !$opts->{case};
    push @filtered_emails, $qmail;
  }

  return 0 unless @filtered_emails;

  # Unique
  @filtered_emails = do { my %seen; grep { !$seen{$_}++ } @filtered_emails; };

  # Randomize order
  if ($opts->{shuffle}) {
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_emails);
  }

  # Truncate list
  my $max = $opts->{max} || 10;
  $#filtered_emails = $max-1 if scalar @filtered_emails > $max;

  my $queries;
  foreach my $email (@filtered_emails) {
    my $ret = $self->_submit_query($pms, $rulename, $email, $list, $opts, $subtest);
    $queries++ if defined $ret;
  }

  return 0 if !$queries; # no query started
  return; # return undef for async status
}

sub check_hashbl_uris {
  my ($self, $pms, $list, $opts, $subtest) = @_;

  return 0 if !$self->{hashbl_available};
  return 0 if !$pms->is_dns_available();

  my $conf = $pms->{conf};
  my $rulename = $pms->get_current_eval_rule_name();

  if (!defined $list) {
    warn "HashBL: $rulename blocklist argument missing\n";
    return 0;
  }

  if ($subtest) {
    my ($rec, $err) = compile_regexp($subtest, 0);
    if (!$rec) {
      warn "HashBL: $rulename invalid subtest regex: $@\n";
      return 0;
    }
    $subtest = $rec;
  }

  # Parse opts, defaults
  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');

  if ($opts->{raw}) {
    warn "HashBL: $rulename raw option invalid\n";
    return 0;
  }

  my $uris = $pms->get_uri_detail_list();
  my %seen;
  my @filtered_uris;

URI:
  while (my($uri, $info) = each %{$uris}) {
    # we want to skip mailto: uris
    next if ($uri =~ /^mailto:/i);
    next if $seen{$uri}++;

    # no hosts/domains were found via this uri, so skip
    next unless $info->{hosts};
    next unless $info->{cleaned};
    next unless $info->{types}->{a} || $info->{types}->{parsed};
    foreach my $host (keys %{$info->{hosts}}) {
      if (exists $conf->{uridnsbl_skip_domains}->{$host} ||
          exists $conf->{uridnsbl_skip_domains}->{$info->{hosts}->{$host}})
      {
        dbg("query skipped, uridnsbl_skip_domains: $uri");
        next URI;
      }
    }
    foreach my $uri (@{$info->{cleaned}}) {
      # check url
      push @filtered_uris, $opts->{case} ? $uri : lc($uri);
    }
  }

  return 0 unless @filtered_uris;

  # Unique
  @filtered_uris = do { my %seen; grep { !$seen{$_}++ } @filtered_uris; };

  # Randomize order
  if ($opts->{shuffle}) {
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_uris);
  }

  # Truncate list
  my $max = $opts->{max} || 10;
  $#filtered_uris = $max-1 if scalar @filtered_uris > $max;

  my $queries;
  foreach my $furi (@filtered_uris) {
    my $ret = $self->_submit_query($pms, $rulename, $furi, $list, $opts, $subtest);
    $queries++ if defined $ret;
  }

  return 0 if !$queries; # no query started
  return; # return undef for async status
}

sub check_hashbl_bodyre {
  my ($self, $pms, $bodyref, $list, $opts, $re, $subtest) = @_;

  return 0 if !$self->{hashbl_available};
  return 0 if !$pms->is_dns_available();

  my $rulename = $pms->get_current_eval_rule_name();

  if (!defined $list) {
    warn "HashBL: $rulename blocklist argument missing\n";
    return 0;
  }

  if (!$re) {
    warn "HashBL: $rulename missing body regex\n";
    return 0;
  }
  my ($rec, $err) = compile_regexp($re, 0);
  if (!$rec) {
    warn "HashBL: $rulename invalid body regex: $@\n";
    return 0;
  }
  $re = $rec;

  if ($subtest) {
    my ($rec, $err) = compile_regexp($subtest, 0);
    if (!$rec) {
      warn "HashBL: $rulename invalid subtest regex: $@\n";
      return 0;
    }
    $subtest = $rec;
  }

  # Parse opts, defaults
  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');

  # Search body
  my @matches;
  my %seen;

  if (ref($bodyref) eq 'ARRAY') {
    # body, rawbody
    foreach my $body (@$bodyref) {
      while ($body =~ /$re/gs) {
        next if !defined $1;
        my $match = $opts->{case} ? $1 : lc($1);
        if($opts->{num}) {
          $match =~ tr/0-9//cd;
        }
        next if $seen{$match}++;
        push @matches, $match if $match ne '';
      }
    }
  } else {
    # full
    while ($$bodyref =~ /$re/gs) {
      next if !defined $1;
      my $match = $opts->{case} ? $1 : lc($1);
      if($opts->{num}) {
        $match =~ tr/0-9//cd;
      }
      next if $seen{$match}++;
      push @matches, $match if $match ne '';
    }
  }

  if (!@matches) {
    dbg("$rulename: no matches found");
    return 0;
  } else {
    dbg("$rulename: matches found: '".join("', '", @matches)."'");
  }

  # Unique
  @matches = do { my %seen; grep { !$seen{$_}++ } @matches; };

  # Randomize order
  if ($opts->{shuffle}) {
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@matches);
  }

  # Truncate list
  my $max = $opts->{max} || 10;
  $#matches = $max-1 if scalar @matches > $max;

  my $queries;
  foreach my $match (@matches) {
    my $ret = $self->_submit_query($pms, $rulename, $match, $list, $opts, $subtest);
    $queries++ if defined $ret;
  }

  return 0 if !$queries; # no query started
  return; # return undef for async status
}

sub check_hashbl_tag {
  my ($self, $pms, $list, $opts, $tag, $subtest) = @_;

  return 0 if !$self->{hashbl_available};
  return 0 if !$pms->is_dns_available();

  my $rulename = $pms->get_current_eval_rule_name();

  if (!defined $list) {
    warn "HashBL: $rulename blocklist argument missing\n";
    return 0;
  }

  if (!defined $tag || $tag eq '') {
    warn "HashBL: $rulename tag argument missing\n";
    return 0;
  }

  if ($subtest) {
    my ($rec, $err) = compile_regexp($subtest, 0);
    if (!$rec) {
      warn "HashBL: $rulename invalid subtest regex: $@\n";
      return 0;
    }
    $subtest = $rec;
  }

  # Parse opts, defaults
  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');
  $opts->{fqdn} = $opts->{tld} = 1  if $opts->{trim};

  # Strip possible _ delimiters
  $tag =~ s/^_(.+)_$/$1/;

  # Force uppercase
  $tag = uc($tag);

  $pms->action_depends_on_tags($tag, sub {
    $self->_check_hashbl_tag($pms, $list, $opts, $tag, $subtest, $rulename);
  });

  return; # return undef for async status
}

sub _check_hashbl_tag {
  my ($self, $pms, $list, $opts, $tag, $subtest, $rulename) = @_;
  my $conf = $pms->{conf};

  # Get raw array of tag values, get_tag() returns joined string
  my $valref = $pms->get_tag_raw($tag);
  my @vals = ref $valref ? @$valref : $valref;

  # Lowercase
  @vals = map { lc } @vals  if !$opts->{case};

  # Options
  foreach my $value (@vals) {
    my $is_ip = $value =~ IS_IP_ADDRESS;
    if ($opts->{ip}) {
      if (!$is_ip) {
        $value = undef;
        next;
      }
    }
    if ($opts->{ipv4}) {
      if ($value =~ IS_IPV4_ADDRESS) {
        $is_ip = 1;
      } else {
        $value = undef;
        next;
      }
    }
    if ($opts->{ipv6}) {
      if (!$is_ip || $value =~ IS_IPV4_ADDRESS) {
        $value = undef;
        next;
      }
    }
    if ($is_ip && $opts->{revip}) {
      $value = reverse_ip_address($value);
    }
    if (!$is_ip) {
      my $fqdn_valid = is_fqdn_valid($value);
      if ($opts->{fqdn} && !$fqdn_valid) {
        $value = undef;
        next;
      }
      my $domain;
      if ($fqdn_valid) {
        $domain = $pms->{main}->{registryboundaries}->trim_domain($value);
        if (exists $conf->{uridnsbl_skip_domains}->{lc $value} ||
            exists $conf->{uridnsbl_skip_domains}->{lc $domain})
        {
          dbg("query skipped, uridnsbl_skip_domains: $value");
          $value = undef;
          next;
        }
      }
      if ($opts->{tld} && !$pms->{main}->{registryboundaries}->is_domain_valid($value)) {
        $value = undef;
        next;
      }
      if ($opts->{trim} && $domain) {
        $value = $domain;
      }
    }
  }

  # Unique (and remove empty)
  @vals = do { my %seen; grep { defined $_ && !$seen{$_}++ } @vals; };

  if (!@vals) {
    $pms->rule_ready($rulename); # mark rule ready for metas
    return;
  }

  # Randomize order
  if ($opts->{shuffle}) {
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@vals);
  }

  # Truncate list
  my $max = $opts->{max} || 10;
  $#vals = $max-1 if scalar @vals > $max;

  foreach my $value (@vals) {
    $self->_submit_query($pms, $rulename, $value, $list, $opts, $subtest);
  }

  return;
}

sub check_hashbl_attachments {
  my ($self, $pms, undef, $list, $opts, $subtest) = @_;

  return 0 if !$self->{hashbl_available};
  return 0 if !$pms->is_dns_available();

  my $rulename = $pms->get_current_eval_rule_name();

  if (!defined $list) {
    warn "HashBL: $rulename blocklist argument missing\n";
    return 0;
  }

  if ($subtest) {
    my ($rec, $err) = compile_regexp($subtest, 0);
    if (!$rec) {
      warn "HashBL: $rulename invalid subtest regex: $@\n";
      return 0;
    }
    $subtest = $rec;
  }

  # Parse opts, defaults
  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');

  if ($opts->{raw}) {
    warn "HashBL: $rulename raw option invalid\n";
    return 0;
  }

  my %seen;
  my @hashes;
  foreach my $part ($pms->{msg}->find_parts(qr/./, 1, 1)) {
    my $body = $part->decode();
    next if !defined $body || $body eq '';
    my $type = lc $part->{'type'} || '';
    my $name = $part->{'name'} || '';
    my $len = length($body);
    dbg("found attachment, type: $type, length: $len, name: $name");
    if (exists $pms->{conf}->{hashbl_ignore}->{$type}) {
      dbg("query skipped, ignored type: $type");
      next;
    }
    if (exists $pms->{conf}->{hashbl_ignore}->{lc $name}) {
      dbg("query skipped, ignored filename: $name");
      next;
    }
    if ($opts->{minsize} && $len < $opts->{minsize}) {
      dbg("query skipped, size smaller than $opts->{minsize}");
      next;
    }
    if ($opts->{maxsize} && $len > $opts->{minsize}) {
      dbg("query skipped, size larger than $opts->{maxsize}");
      next;
    }
    my $hash = $self->_hash($opts, $body);
    next if $seen{$hash}++;
    push @hashes, $hash;
  }

  return 0 unless @hashes;

  # Randomize order
  if ($opts->{shuffle}) {
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@hashes);
  }

  # Truncate list
  my $max = $opts->{max} || 10;
  $#hashes = $max-1 if scalar @hashes > $max;

  my $queries;
  foreach my $hash (@hashes) {
    my $ret = $self->_submit_query($pms, $rulename, $hash, $list, $opts, $subtest, 1);
    $queries++ if defined $ret;
  }

  return 0 if !$queries; # no query started
  return; # return undef for async status
}

sub _hash {
  my ($self, $opts, $value) = @_;

  if ($opts->{sha256}) {
    utf8::encode($value) if utf8::is_utf8($value); # sha256 expects bytes
    return lc base32_encode(sha256($value));
  } elsif ($opts->{sha1}) {
    utf8::encode($value) if utf8::is_utf8($value); # sha1_hex expects bytes
    return sha1_hex($value);
  } elsif ($opts->{md5}) {
    utf8::encode($value) if utf8::is_utf8($value); # md5_hex expects bytes
    return md5_hex($value);
  } else {
    return $value;
  }
}

sub _submit_query {
  my ($self, $pms, $rulename, $value, $list, $opts, $subtest, $already_hashed) = @_;
  my $conf = $pms->{conf};

  if (!$already_hashed && exists $conf->{hashbl_ignore}->{lc $value}) {
    dbg("query skipped, ignored string: $value");
    return 0;
  }

  my $hash = $already_hashed ? $value : $self->_hash($opts, $value);
  if (exists $conf->{hashbl_ignore}->{lc $hash}) {
    dbg("query skipped, ignored hash: $value");
    return 0;
  }

  dbg("querying $value ($hash) from $list");

  my $type = $list =~ s,/(A|TXT)$,,i ? uc($1) : 'A';
  my $lookup = "$hash.$list";

  my $ent = {
    rulename => $rulename,
    type => "HASHBL",
    hash => $hash,
    value => $value,
    subtest => $subtest,
  };
  return $pms->{async}->bgsend_and_start_lookup($lookup, $type, undef, $ent,
    sub { my ($ent, $pkt) = @_; $self->_finish_query($pms, $ent, $pkt); },
    master_deadline => $pms->{master_deadline}
  );
}

sub _finish_query {
  my ($self, $pms, $ent, $pkt) = @_;

  my $rulename = $ent->{rulename};

  if (!$pkt) {
    # $pkt will be undef if the DNS query was aborted (e.g. timed out)
    dbg("lookup was aborted: $rulename $ent->{key}");
    return;
  }

  $pms->rule_ready($rulename); # mark rule ready for metas

  my $dnsmatch = $ent->{subtest} ? $ent->{subtest} : qr/^127\./;
  my @answer = $pkt->answer;
  foreach my $rr (@answer) {
    if ($rr->address =~ $dnsmatch) {
      dbg("$rulename: $ent->{zone} hit '$ent->{value}'");
      $ent->{value} =~ s/\@/[at]/g;
      $pms->test_log($ent->{value}, $rulename);
      $pms->got_hit($rulename, '', ruletype => 'eval');
      return;
    }
  }
}

# Version features
sub has_hashbl_bodyre { 1 }
sub has_hashbl_bodyre_num { 1 }
sub has_hashbl_emails { 1 }
sub has_hashbl_uris { 1 }
sub has_hashbl_ignore { 1 }
sub has_hashbl_email_regex { 1 }
sub has_hashbl_email_welcomelist { 1 }
sub has_hashbl_email_whitelist { 1 }
sub has_hashbl_tag { 1 }
sub has_hashbl_sha256 { 1 }
sub has_hashbl_attachments { 1 }
sub has_hashbl_email_domain { 1 } # user/host/domain option for emails
sub has_hashbl_email_domain_alias { 1 } # hashbl_email_domain_alias

1;

Anon7 - 2022
AnonSec Team