[Zope] TextIndexNG Queryparser bug?

Andreas Jung Andreas Jung <andreas@andreas-jung.com>
Tue, 18 Jun 2002 16:27:33 -0400


--==========02868970==========
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

Hallo Juergen,

anbei ein gefixter PyQueryParser.py

Andreas

--On Tuesday, June 18, 2002 16:20 +0200 "Juergen R. Plasser / Hexagon" 
<plasser@hexagon.at> wrote:

> When I do a search for 'andorf' (small town in Upperaustria) I always get
> a Queryparser error: Syntax error at 'and'. Should not there be a blank
> after 'and' (and some expression before 'and') for a search phrase? When
> I capitalize the a ('Andorf') queryparser does his job as intended.
>
> I have tried the strings 'Andorf andAndorf' and 'Wels andPichl' and that
> search works pretty fine (resulting in 'Andorf' and 'Pichl bei Wels').
>
> Juergen
>
>
> _______________________________________________
> Zope maillist  -  Zope@zope.org
> http://lists.zope.org/mailman/listinfo/zope
> **   No cross posts or HTML encoding!  **
> (Related lists -  http://lists.zope.org/mailman/listinfo/zope-announce
>  http://lists.zope.org/mailman/listinfo/zope-dev )





    ---------------------------------------------------------------------
   -    Andreas Jung                     http://www.andreas-jung.com   -
  -   EMail: andreas at andreas-jung.com                              -
   -            "Life is too short to (re)write parsers"               -
    ---------------------------------------------------------------------

--==========02868970==========
Content-Type: text/plain; charset=iso-8859-1; name="PyQueryParser.py"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment; filename="PyQueryParser.py"; size=3947

############################################################################=
# TextIndexNG                The next generation TextIndex for Zope## This =
software is governed by a license. See# LICENSE.txt for the terms of this =
license.####################################################################=
################################################################ a new =
native Python QueryParser for TextIndexNG# based on the QueryParser by =
Sidnei da Silva# $Id: PyQueryParser.py,v 1.11.2.4 2002/06/18 20:29:25 ajung =
Exp $#######################################################import sys, =
refrom Products.TextIndexNG.BaseParser import =
BaseParser,QueryParserErrortokens =3D (    'STRING' ,     'OR',     'AND',  =
   'ANDNOT',     'NEAR',    'QUOTE',    'OPENP',     'CLOSEP', )         =
t_QUOTE     =3D r'\"'                   t_OPENP     =3D r'\('           =
t_CLOSEP    =3D r'\)'          t_ignore    =3D '\t'def t_ANDNOT(t):    =
'\s+ANDNOT\s+|\s+andnot\s+'    return tdef t_AND(t):    =
r'\s+AND\s+|\s+and\s+'                return tdef t_OR(t):    =
'\s+OR\s+|\s+or\s+'         return tdef t_NEAR(t):    =
'\s+NEAR\s+|\s+near\s+'    return t        def t_STRING(t):    r'[\w%?*]+'  =
   return tdef t_newline(t):    r'\n+'    t.lineno +=3D t.value.count("\n") =
   def t_error(t):    print t    if t.value[0] in [' ']:        t.skip(1)   =
 else:        raise QueryParserError,"Illegal character '%s'" % t.value[0]# =
Build the lexerimport lexlex.lex(debug=3D0)op_dict =3D {'AND'     : =
'txI','OR'      : 'txU','NEAR'    : 'txN','ANDNOT'  : 'txAN'}def =
p_expr_parens(t):    """expr :    OPENP expr CLOSEP """    t[0] =3D '(%s)' =
%  t[2]def p_expr_op(t):    """expr :    expr AND expr               | expr =
OR expr               | expr NEAR expr               | expr ANDNOT expr    =
"""    t[0] =3D '%s(%s,%s)' % (op_dict[t[2].strip().upper()], t[1], =
t[3])def p_expr_noop(t):    """expr :    expr expr"""    t[0] =3D =
'txI(%s,%s)' % (t[1], t[2])def p_expr_expr_factor(t):    """expr :  factor =
"""    t[0] =3D t[1]def p_factor_string(t):    """factor : string"""    =
t[0] =3D t[1]def p_factor_quote(t):    """factor :  QUOTE term QUOTE"""    =
t[0] =3D "txQ(%s)" % t[2] def p_term_1(t):    """ term : string term"""    =
t[0] =3D "%s,%s" % (t[1],t[2]) def p_term_2(t):    """ term : string"""    =
t[0] =3D t[1]str_regex =3D re.compile(r'[\w]+$', =
re.LOCALE|re.UNICODE)sim_regex =3D re.compile(r'[%][\w]+$', =
re.LOCALE|re.UNICODE)rt_regex  =3D re.compile(r'[\w]+[*]$', =
re.LOCALE|re.UNICODE)def p_string(t):    """string :  STRING"""    if =
str_regex.match(t[1]):        t[0] =3D 'LL("%s")' % t[1]    elif =
sim_regex.match(t[1]):        t[0] =3D 'PL("%s")' % t[1][1:]    elif =
rt_regex.match(t[1]):        t[0] =3D 'RTL("%s")' % t[1][:-1]    else:      =
  t[0] =3D 'PML("%s")' % t[1] def p_error(t):    raise =
QueryParserError,"Syntax error at '%s'" % t.valueimport =
yaccyacc.yacc(debug=3D0)class Parser(BaseParser):    id =3D =
'NewQueryParser'    parser_description =3D 'A TextIndex compatible parser =
(native Python version)'    def parse(self, query, operator):               =
 try:            return yacc.parse( query )        except QueryParserError: =
           raise         except:            import traceback            =
traceback.print_exc()            raise QueryParserError, 'parser failed for =
query: %s' % query def test():    import os, sys, re,traceback, atexit    =
histfile =3D os.path.expanduser('~/.pyhist')    try:        import readline =
       readline.read_history_file(histfile)        =
atexit.register(readline.write_history_file,histfile)    except: pass    =
print "entering interactive query mode:"    while 1:        s =3D =
raw_input('> ')        print s          try:            P =3D Parser()      =
      res =3D P(s)                    print 'res:',res        except:       =
     traceback.print_exc()if __name__ =3D=3D '__main__':    test()
--==========02868970==========--