[Zope] Re: Non-caching version of POPMail ?

Chris McDonough chrism@digicool.com
Tue, 3 Apr 2001 19:32:52 -0400


Whoops, wrong URL...

Here it actually is:  ;-)

[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\
015()]*(?:\\[^\x80-\xff][^\\\x80-\
xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:(?:[^(\040)<>@,;:
".\\\[\]\000-\037\x80-\xff]+(?![^(
\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\
xff][^\\\x80-\xff\n\015"]*)*")[\04
0\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015(
)]*(?:\\[^\x80-\xff][^\\\x80-\xff\
n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\
x80-\xff\n\015()]*(?:(?:\\[^\x80-\
xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[
^\\\x80-\xff\n\015()]*)*\)[\040\t]
*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\00
0-\037\x80-\xff])|"[^\\\x80-\xff\n
\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xf
f\n\015()]*(?:(?:\\[^\x80-\xff]|\(
[^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80
-\xff\n\015()]*)*\)[\040\t]*)*)*@[
\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\0
15()]*(?:\\[^\x80-\xff][^\\\x80-\x
ff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\
\[\]\000-\037\x80-\xff]+(?![^(\040
)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\
xff])*\])[\040\t]*(?:\([^\\\x80-\x
ff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][
^\\\x80-\xff\n\015()]*)*\))[^\\\x8
0-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:
(?:\\[^\x80-\xff]|\([^\\\x80-\xff\
n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]
*)*\)[\040\t]*)*(?:[^(\040)<>@,;:"
.\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(
?:[^\\\x80-\xff\n\015\[\]]|\\[^\x8
0-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\
\x80-\xff\n\015()]*(?:\\[^\x80-\xf
f][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*|(?:[^
(\040)<>@,;:".\\\[\]\000-\037\x80-
\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(
?:\\[^\x80-\xff][^\\\x80-\xff\n\01
5"]*)*")[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]*(?:(?:\([^\\\x80-\xff
\n\015()]*(?:(?:\\[^\x80-\xff]|\([
^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-
\xff\n\015()]*)*\)|"[^\\\x80-\xff\
n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^()<>@,;:".\\\[\]\x80-\x
ff\000-\010\012-\037]*)*<[\040\t]*
(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?
:\\[^\x80-\xff][^\\\x80-\xff\n\015
()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:@[\040\t]*(?:\([^\\\x80-\x
ff\n\015()]*(?:(?:\\[^\x80-\xff]|\
([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x8
0-\xff\n\015()]*)*\)[\040\t]*)*(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037
\x80-\xff])|\[(?:[^\\\x80-\xff\n\0
15\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x
80-\xff]|\([^\\\x80-\xff\n\015()]*
(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\04
0\t]*)*(?:\.[\040\t]*(?:\([^\\\x80
-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xf
f][^\\\x80-\xff\n\015()]*)*\))[^\\
\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xf
f]+(?![^(\040)<>@,;:".\\\[\]\000-\
037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:
\([^\\\x80-\xff\n\015()]*(?:(?:\\[
^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]
*)*\))[^\\\x80-\xff\n\015()]*)*\)[
\040\t]*)*)*(?:,[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\(
[^\\\x80-\xff\n\015()]*(?:\\[^\x80
-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*@[\0
40\t]*(?:\([^\\\x80-\xff\n\015()]*
(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff
\n\015()]*)*\))[^\\\x80-\xff\n\015
()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<
>@,;:".\\\[\]\000-\037\x80-\xff])|
\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff
\n\015()]*(?:(?:\\[^\x80-\xff]|\([
^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-
\xff\n\015()]*)*\)[\040\t]*)*(?:\.
[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\
015()]*(?:\\[^\x80-\xff][^\\\x80-\
xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\
\\[\]\000-\037\x80-\xff]+(?![^(\04
0)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-
\xff])*\])[\040\t]*(?:\([^\\\x80-\
xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff]
[^\\\x80-\xff\n\015()]*)*\))[^\\\x
80-\xff\n\015()]*)*\)[\040\t]*)*)*)*:[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?
:(?:\\[^\x80-\xff]|\([^\\\x80-\xff
\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()
]*)*\)[\040\t]*)*)?(?:[^(\040)<>@,
;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|
"[^\\\x80-\xff\n\015"]*(?:\\[^\x80
-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\
\[^\x80-\xff]|\([^\\\x80-\xff\n\01
5()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\
)[\040\t]*)*(?:\.[\040\t]*(?:\([^\
\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x8
0-\xff][^\\\x80-\xff\n\015()]*)*\)
)[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x8
0-\xff]+(?![^(\040)<>@,;:".\\\[\]\
000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\
015"]*)*")[\040\t]*(?:\([^\\\x80-\
xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff]
[^\\\x80-\xff\n\015()]*)*\))[^\\\x
80-\xff\n\015()]*)*\)[\040\t]*)*)*@[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(
?:\\[^\x80-\xff]|\([^\\\x80-\xff\n
\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*
)*\)[\040\t]*)*(?:[^(\040)<>@,;:".
\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?
:[^\\\x80-\xff\n\015\[\]]|\\[^\x80
-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\
x80-\xff\n\015()]*(?:\\[^\x80-\xff
][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\04
0\t]*(?:\([^\\\x80-\xff\n\015()]*(
?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\
n\015()]*)*\))[^\\\x80-\xff\n\015(
)]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>
@,;:".\\\[\]\000-\037\x80-\xff])|\
[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\
n\015()]*(?:(?:\\[^\x80-\xff]|\([^
\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\
xff\n\015()]*)*\)[\040\t]*)*)*>)

----- Original Message -----
From: "Chris McDonough" <chrism@digicool.com>
To: "David Shaw" <david.shaw@zapmedia.com>; "Loren Stafford"
<lstafford@morphics.com>
Cc: <zope@zope.org>
Sent: Tuesday, April 03, 2001 7:25 PM
Subject: Re: [Zope] Re: Non-caching version of POPMail ?


> Here's a fun regex for matching email addresses from that book:
>
> #
> # Program to build a regex to match an internet email address,
> # from Chapter 7 of _Mastering Regular Expressions_ (Friedl / O'Reilly)
> # (http://www.ora.com/catalog/regexp/)
> #
> # Optimized version.
> #
> # Copyright 1997 O'Reilly & Associates, Inc.
> #
>
>
>
> # Some things for avoiding backslashitis later on.
> $esc        = '\\\\';               $Period      = '\.';
> $space      = '\040';               $tab         = '\t';
> $OpenBR     = '\[';                 $CloseBR     = '\]';
> $OpenParen  = '\(';                 $CloseParen  = '\)';
> $NonASCII   = '\x80-\xff';          $ctrl        = '\000-\037';
> $CRlist     = '\n\015';  # note: this should really be only \015.
>
> # Items 19, 20, 21
> $qtext = qq/[^$esc$NonASCII$CRlist\"]/;               # for within "..."
> $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;  # for within [...]
> $quoted_pair = qq< $esc [^$NonASCII] >; # an escaped character
>
>
############################################################################
> ##
> # Items 22 and 23, comment.
> # Impossible to do properly with a regex, I make do by allowing at most
one
> level of nesting.
> $ctext   = qq< [^$esc$NonASCII$CRlist()] >;
>
> # $Cnested matches one non-nested comment.
> # It is unrolled, with normal of $ctext, special of $quoted_pair.
> $Cnested = qq<
>    $OpenParen                            #  (
>       $ctext*                            #     normal*
>       (?: $quoted_pair $ctext* )*        #     (special normal*)*
>    $CloseParen                           #                       )
> >;
>
> # $comment allows one level of nested parentheses
> # It is unrolled, with normal of $ctext, special of
($quoted_pair|$Cnested)
> $comment = qq<
>    $OpenParen                              #  (
>        $ctext*                             #     normal*
>        (?:                                 #       (
>           (?: $quoted_pair | $Cnested )    #         special
>            $ctext*                         #         normal*
>        )*                                  #            )*
>    $CloseParen                             #                )
> >;
>
>
############################################################################
> ##
>
> # $X is optional whitespace/comments.
> $X = qq<
>    [$space$tab]*                    # Nab whitespace.
>    (?: $comment [$space$tab]* )*    # If comment found, allow more spaces.
> >;
>
>
>
> # Item 10: atom
> $atom_char   = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
> $atom = qq<
>   $atom_char+    # some number of atom characters...
>   (?!$atom_char) # ..not followed by something that could be part of an
atom
> >;
>
> # Item 11: doublequoted string, unrolled.
> $quoted_str = qq<
>     \"                                     # "
>        $qtext *                            #   normal
>        (?: $quoted_pair $qtext * )*        #   ( special normal* )*
>     \"                                     #        "
> >;
>
> # Item 7: word is an atom or quoted string
> $word = qq<
>     (?:
>        $atom                 # Atom
>        |                       #  or
>        $quoted_str           # Quoted string
>      )
> >;
>
> # Item 12: domain-ref is just an atom
> $domain_ref  = $atom;
>
> # Item 13: domain-literal is like a quoted string, but [...] instead of
> "..."
> $domain_lit  = qq<
>     $OpenBR                            # [
>     (?: $dtext | $quoted_pair )*     #    stuff
>     $CloseBR                           #           ]
> >;
>
> # Item 9: sub-domain is a domain-ref or domain-literal
> $sub_domain  = qq<
>   (?:
>     $domain_ref
>     |
>     $domain_lit
>    )
>    $X # optional trailing comments
> >;
>
> # Item 6: domain is a list of subdomains separated by dots.
> $domain = qq<
>      $sub_domain
>      (?:
>         $Period $X $sub_domain
>      )*
> >;
>
> # Item 8: a route. A bunch of "@ $domain" separated by commas, followed by
a
> colon.
> $route = qq<
>     \@ $X $domain
>     (?: , $X \@ $X $domain )*  # additional domains
>     :
>     $X # optional trailing comments
> >;
>
> # Item 6: local-part is a bunch of $word separated by periods
> $local_part = qq<
>     $word $X
>     (?:
>         $Period $X $word $X # additional words
>     )*
> >;
>
> # Item 2: addr-spec is local@domain
> $addr_spec  = qq<
>   $local_part \@ $X $domain
> >;
>
> # Item 4: route-addr is <route? addr-spec>
> $route_addr = qq[
>     < $X                 # <
>        (?: $route )?     #       optional route
>        $addr_spec        #       address spec
>     >                    #                 >
> ];
>
>
> # Item 3: phrase........
> $phrase_ctrl = '\000-\010\012-\037'; # like ctrl, but without tab
>
> # Like atom-char, but without listing space, and uses phrase_ctrl.
> # Since the class is negated, this matches the same as atom-char plus
space
> and tab
> $phrase_char =
>    qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
>
> # We've worked it so that $word, $comment, and $quoted_str to not consume
> trailing $X
> # because we take care of it manually.
> $phrase = qq<
>    $word                        # leading word
>    $phrase_char *               # "normal" atoms and/or spaces
>    (?:
>       (?: $comment | $quoted_str ) # "special" comment or quoted string
>       $phrase_char *            #  more "normal"
>    )*
> >;
>
> ## Item #1: mailbox is an addr_spec or a phrase/route_addr
> $mailbox = qq<
>     $X                                  # optional leading comment
>     (?:
>             $addr_spec                  # address
>             |                             #  or
>             $phrase  $route_addr      # name and address
>      )
> >;
>
>
>
>
###########################################################################
> # Here's a little snippet to test it.
> # Addresses given on the commandline are described.
> #
>
> my $error = 0;
> my $valid;
> foreach $address (@ARGV) {
>     $valid = $address =~ m/^$mailbox$/xo;
>     printf "`$address' is syntactically %s.\n", $valid ? "valid" :
> "invalid";
>     $error = 1 if not $valid;
> }
> exit $error;
>
>
>
> ----- Original Message -----
> From: "David Shaw" <david.shaw@zapmedia.com>
> To: "Loren Stafford" <lstafford@morphics.com>
> Cc: <zope@zope.org>
> Sent: Tuesday, April 03, 2001 6:37 PM
> Subject: [Zope] Re: Non-caching version of POPMail ?
>
>
> > I actually solved this on my working version in a different way.  I
simply
> > call UIDL on a refresh and if any of the UIDs I have are not in the UID
> list
> > from the server, I delete the message from the MessageDict.  It
> accomplishes
> > the same thing without the large performance hit of not caching.
> >
> > I've started working on this product again.  I'd be happy to send you my
> > current working revision if you want to take a look.  It's not anything
> > significant enough yet to warrant a new release, but I am making it
> better.
> > My next adventure is to do better message parsing to make URLs and email
> > addresses clickable.  I just got the O'Reilly Regular Expression book
and
> > plan on delving into it when I get a chance.
> >
> >
> > Loren Stafford said:
> >
> > > David,
> > >
> > > I'm planning to modify POPMail (or more likely, make a derived product
> > > POPMailNc) so that there is no persistent message cache. I just wanted
> to
> > > pass the idea by you, so you could tell me if I'm doing something
stupid
> or
> > > if you've already solved my problem for me in a newer version of
> POPMail.
> > >
> > > I'm using POPMail to couple my Zope server to another product
> > > (PerfectTracker). Zope initiates Tracker incidents via sendmail and
> receives
> > > responses from the Tracker at a dedicated POP3 mailbox, which it polls
> > > (using Xron) every 5-10 minutes. I've discovered that if I ever delete
> > > messages from the mailbox manually (i.e. using a POP client other than
> > > POPMail) POPMail's persistent message cache gets hopelessly out of
sync
> with
> > > the mailbox, and it begins to deliver messages from its cache when
there
> are
> > > new messages with the same UID in the mailbox. That's bad.
> > >
> > > While I don't need to delete messages behind POPMail's back, I can't
> really
> > > prevent someone from doing so, due to the nature of our mail system.
So
> I
> > > propose to make POPMail's cache non-persistent (I guess it really
> wouldn't
> > > be a cache then, would it?). I don't expect a performance problem,
> because,
> > > if I delete old messages regularly I will never have more than a few
> hundred
> > > messages in the mailbox. (Messages correspond to new employees.)
> > >
> > > I think I can make uidDict and MessageDict nonpersistent simply by
> changing
> > > their names to _v_uidDict and _v_MessasgeDict. Is that correct?
> > >
> > > -- Thanks for your input
> > > -- Loren
> > >
> >
> > --
> > David Shaw -- Senior Software Developer -- ZapMedia -- 678.420.2715
> >
> >
> >
> > _______________________________________________
> > Zope maillist  -  Zope@zope.org
> > http://lists.zope.org/mailman/listinfo/zope
> > **   No cross posts or HTML encoding!  **
> > (Related lists -
> >  http://lists.zope.org/mailman/listinfo/zope-announce
> >  http://lists.zope.org/mailman/listinfo/zope-dev )
> >
>
>
> _______________________________________________
> Zope maillist  -  Zope@zope.org
> http://lists.zope.org/mailman/listinfo/zope
> **   No cross posts or HTML encoding!  **
> (Related lists -
>  http://lists.zope.org/mailman/listinfo/zope-announce
>  http://lists.zope.org/mailman/listinfo/zope-dev )
>