perl / FromCPAN / Mail / Address.pmon commit userdiff: two simplifications of patterns for rust (33be7b3)
   1# Copyrights 1995-2018 by [Mark Overmeer].
   2#  For other contributors see ChangeLog.
   3# See the manual pages for details on the licensing terms.
   4# Pod stripped from pm file by OODoc 2.02.
   5# This code is part of the bundle MailTools.  Meta-POD processed with
   6# OODoc into POD and HTML manual-pages.  See README.md for Copyright.
   7# Licensed under the same terms as Perl itself.
   8
   9package Mail::Address;
  10use vars '$VERSION';
  11$VERSION = '2.20';
  12
  13use strict;
  14
  15use Carp;
  16
  17# use locale;   removed in version 1.78, because it causes taint problems
  18
  19sub Version { our $VERSION }
  20
  21
  22
  23# given a comment, attempt to extract a person's name
  24sub _extract_name
  25{   # This function can be called as method as well
  26    my $self = @_ && ref $_[0] ? shift : undef;
  27
  28    local $_ = shift
  29        or return '';
  30
  31    # Using encodings, too hard. See Mail::Message::Field::Full.
  32    return '' if m/\=\?.*?\?\=/;
  33
  34    # trim whitespace
  35    s/^\s+//;
  36    s/\s+$//;
  37    s/\s+/ /;
  38
  39    # Disregard numeric names (e.g. 123456.1234@compuserve.com)
  40    return "" if /^[\d ]+$/;
  41
  42    s/^\((.*)\)$/$1/; # remove outermost parenthesis
  43    s/^"(.*)"$/$1/;   # remove outer quotation marks
  44    s/\(.*?\)//g;     # remove minimal embedded comments
  45    s/\\//g;          # remove all escapes
  46    s/^"(.*)"$/$1/;   # remove internal quotation marks
  47    s/^([^\s]+) ?, ?(.*)$/$2 $1/; # reverse "Last, First M." if applicable
  48    s/,.*//;
  49
  50    # Change casing only when the name contains only upper or only
  51    # lower cased characters.
  52    unless( m/[A-Z]/ && m/[a-z]/ )
  53    {   # Set the case of the name to first char upper rest lower
  54        s/\b(\w+)/\L\u$1/igo;  # Upcase first letter on name
  55        s/\bMc(\w)/Mc\u$1/igo; # Scottish names such as 'McLeod'
  56        s/\bo'(\w)/O'\u$1/igo; # Irish names such as 'O'Malley, O'Reilly'
  57        s/\b(x*(ix)?v*(iv)?i*)\b/\U$1/igo; # Roman numerals, eg 'Level III Support'
  58    }
  59
  60    # some cleanup
  61    s/\[[^\]]*\]//g;
  62    s/(^[\s'"]+|[\s'"]+$)//g;
  63    s/\s{2,}/ /g;
  64
  65    $_;
  66}
  67
  68sub _tokenise
  69{   local $_ = join ',', @_;
  70    my (@words,$snippet,$field);
  71
  72    s/\A\s+//;
  73    s/[\r\n]+/ /g;
  74
  75    while ($_ ne '')
  76    {   $field = '';
  77        if(s/^\s*\(/(/ )    # (...)
  78        {   my $depth = 0;
  79
  80     PAREN: while(s/^(\(([^\(\)\\]|\\.)*)//)
  81            {   $field .= $1;
  82                $depth++;
  83                while(s/^(([^\(\)\\]|\\.)*\)\s*)//)
  84                {   $field .= $1;
  85                    last PAREN unless --$depth;
  86                    $field .= $1 if s/^(([^\(\)\\]|\\.)+)//;
  87                }
  88            }
  89
  90            carp "Unmatched () '$field' '$_'"
  91                if $depth;
  92
  93            $field =~ s/\s+\Z//;
  94            push @words, $field;
  95
  96            next;
  97        }
  98
  99        if( s/^("(?:[^"\\]+|\\.)*")\s*//       # "..."
 100         || s/^(\[(?:[^\]\\]+|\\.)*\])\s*//    # [...]
 101         || s/^([^\s()<>\@,;:\\".[\]]+)\s*//
 102         || s/^([()<>\@,;:\\".[\]])\s*//
 103          )
 104        {   push @words, $1;
 105            next;
 106        }
 107
 108        croak "Unrecognised line: $_";
 109    }
 110
 111    push @words, ",";
 112    \@words;
 113}
 114
 115sub _find_next
 116{   my ($idx, $tokens, $len) = @_;
 117
 118    while($idx < $len)
 119    {   my $c = $tokens->[$idx];
 120        return $c if $c eq ',' || $c eq ';' || $c eq '<';
 121        $idx++;
 122    }
 123
 124    "";
 125}
 126
 127sub _complete
 128{   my ($class, $phrase, $address, $comment) = @_;
 129
 130    @$phrase || @$comment || @$address
 131       or return undef;
 132
 133    my $o = $class->new(join(" ",@$phrase), join("",@$address), join(" ",@$comment));
 134    @$phrase = @$address = @$comment = ();
 135    $o;
 136}
 137
 138#------------
 139
 140sub new(@)
 141{   my $class = shift;
 142    bless [@_], $class;
 143}
 144
 145
 146sub parse(@)
 147{   my $class = shift;
 148    my @line  = grep {defined} @_;
 149    my $line  = join '', @line;
 150
 151    my (@phrase, @comment, @address, @objs);
 152    my ($depth, $idx) = (0, 0);
 153
 154    my $tokens  = _tokenise @line;
 155    my $len     = @$tokens;
 156    my $next    = _find_next $idx, $tokens, $len;
 157
 158    local $_;
 159    for(my $idx = 0; $idx < $len; $idx++)
 160    {   $_ = $tokens->[$idx];
 161
 162        if(substr($_,0,1) eq '(') { push @comment, $_ }
 163        elsif($_ eq '<')    { $depth++ }
 164        elsif($_ eq '>')    { $depth-- if $depth }
 165        elsif($_ eq ',' || $_ eq ';')
 166        {   warn "Unmatched '<>' in $line" if $depth;
 167            my $o = $class->_complete(\@phrase, \@address, \@comment);
 168            push @objs, $o if defined $o;
 169            $depth = 0;
 170            $next = _find_next $idx+1, $tokens, $len;
 171        }
 172        elsif($depth)       { push @address, $_ }
 173        elsif($next eq '<') { push @phrase,  $_ }
 174        elsif( /^[.\@:;]$/ || !@address || $address[-1] =~ /^[.\@:;]$/ )
 175        {   push @address, $_ }
 176        else
 177        {   warn "Unmatched '<>' in $line" if $depth;
 178            my $o = $class->_complete(\@phrase, \@address, \@comment);
 179            push @objs, $o if defined $o;
 180            $depth = 0;
 181            push @address, $_;
 182        }
 183    }
 184    @objs;
 185}
 186
 187#------------
 188
 189sub phrase  { shift->set_or_get(0, @_) }
 190sub address { shift->set_or_get(1, @_) }
 191sub comment { shift->set_or_get(2, @_) }
 192
 193sub set_or_get($)
 194{   my ($self, $i) = (shift, shift);
 195    @_ or return $self->[$i];
 196
 197    my $val = $self->[$i];
 198    $self->[$i] = shift if @_;
 199    $val;
 200}
 201
 202
 203my $atext = '[\-\w !#$%&\'*+/=?^`{|}~]';
 204sub format
 205{   my @addrs;
 206
 207    foreach (@_)
 208    {   my ($phrase, $email, $comment) = @$_;
 209        my @addr;
 210
 211        if(defined $phrase && length $phrase)
 212        {   push @addr
 213              , $phrase =~ /^(?:\s*$atext\s*)+$/o ? $phrase
 214              : $phrase =~ /(?<!\\)"/             ? $phrase
 215              :                                    qq("$phrase");
 216
 217            push @addr, "<$email>"
 218                if defined $email && length $email;
 219        }
 220        elsif(defined $email && length $email)
 221        {   push @addr, $email;
 222        }
 223
 224        if(defined $comment && $comment =~ /\S/)
 225        {   $comment =~ s/^\s*\(?/(/;
 226            $comment =~ s/\)?\s*$/)/;
 227        }
 228
 229        push @addr, $comment
 230            if defined $comment && length $comment;
 231
 232        push @addrs, join(" ", @addr)
 233            if @addr;
 234    }
 235
 236    join ", ", @addrs;
 237}
 238
 239#------------
 240
 241sub name
 242{   my $self   = shift;
 243    my $phrase = $self->phrase;
 244    my $addr   = $self->address;
 245
 246    $phrase    = $self->comment
 247        unless defined $phrase && length $phrase;
 248
 249    my $name   = $self->_extract_name($phrase);
 250
 251    # first.last@domain address
 252    if($name eq '' && $addr =~ /([^\%\.\@_]+([\._][^\%\.\@_]+)+)[\@\%]/)
 253    {   ($name  = $1) =~ s/[\._]+/ /g;
 254        $name   = _extract_name $name;
 255    }
 256
 257    if($name eq '' && $addr =~ m#/g=#i)    # X400 style address
 258    {   my ($f) = $addr =~ m#g=([^/]*)#i;
 259        my ($l) = $addr =~ m#s=([^/]*)#i;
 260        $name   = _extract_name "$f $l";
 261    }
 262
 263    length $name ? $name : undef;
 264}
 265
 266
 267sub host
 268{   my $addr = shift->address || '';
 269    my $i    = rindex $addr, '@';
 270    $i >= 0 ? substr($addr, $i+1) : undef;
 271}
 272
 273
 274sub user
 275{   my $addr = shift->address || '';
 276    my $i    = rindex $addr, '@';
 277    $i >= 0 ? substr($addr,0,$i) : $addr;
 278}
 279
 2801;