[Zope] Zope 2.3.2 version

JL BERLIET jl.berliet@wanadoo.fr
Fri, 18 May 2001 18:54:26 +0200


C'est un message de format MIME en plusieurs parties.

------=_NextPart_000_0007_01C0DFCB.F5F36CA0
Content-Type: text/plain;
	charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

> The search has to bring related
> results to the keyword, not only results that has it. A
> basic feature to do that is to ignore accentuation

A patch for "Splitter.c" i'm using with french words to ignore accentuation
! (see the function "sans_accent" and add yours portuguese caracters)
Jean-Louis BERLIET

------=_NextPart_000_0007_01C0DFCB.F5F36CA0
Content-Type: application/octet-stream;
	name="Splitter.c"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="Splitter.c"

/************************************************************************=
*****=0A=
  =0A=
  Zope Public License (ZPL) Version 1.0=0A=
  -------------------------------------=0A=
  =0A=
  Copyright (c) Digital Creations.  All rights reserved.=0A=
  =0A=
  This license has been certified as Open Source(tm).=0A=
  =0A=
  Redistribution and use in source and binary forms, with or without=0A=
  modification, are permitted provided that the following conditions are=0A=
  met:=0A=
  =0A=
  1. Redistributions in source code must retain the above copyright=0A=
     notice, this list of conditions, and the following disclaimer.=0A=
  =0A=
  2. Redistributions in binary form must reproduce the above copyright=0A=
     notice, this list of conditions, and the following disclaimer in=0A=
     the documentation and/or other materials provided with the=0A=
     distribution.=0A=
  =0A=
  3. Digital Creations requests that attribution be given to Zope=0A=
     in any manner possible. Zope includes a "Powered by Zope"=0A=
     button that is installed by default. While it is not a license=0A=
     violation to remove this button, it is requested that the=0A=
     attribution remain. A significant investment has been put=0A=
     into Zope, and this effort will continue if the Zope community=0A=
     continues to grow. This is one way to assure that growth.=0A=
  =0A=
  4. All advertising materials and documentation mentioning=0A=
     features derived from or use of this software must display=0A=
     the following acknowledgement:=0A=
  =0A=
       "This product includes software developed by Digital Creations=0A=
       for use in the Z Object Publishing Environment=0A=
       (http://www.zope.org/)."=0A=
  =0A=
     In the event that the product being advertised includes an=0A=
     intact Zope distribution (with copyright and license included)=0A=
     then this clause is waived.=0A=
  =0A=
  5. Names associated with Zope or Digital Creations must not be used to=0A=
     endorse or promote products derived from this software without=0A=
     prior written permission from Digital Creations.=0A=
  =0A=
  6. Modified redistributions of any form whatsoever must retain=0A=
     the following acknowledgment:=0A=
  =0A=
       "This product includes software developed by Digital Creations=0A=
       for use in the Z Object Publishing Environment=0A=
       (http://www.zope.org/)."=0A=
  =0A=
     Intact (re-)distributions of any official Zope release do not=0A=
     require an external acknowledgement.=0A=
  =0A=
  7. Modifications are encouraged but must be packaged separately as=0A=
     patches to official Zope releases.  Distributions that do not=0A=
     clearly separate the patches from the original work must be clearly=0A=
     labeled as unofficial distributions.  Modifications which do not=0A=
     carry the name Zope may be packaged in any form, as long as they=0A=
     conform to all of the clauses above.=0A=
  =0A=
  =0A=
  Disclaimer=0A=
  =0A=
    THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY=0A=
    EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE=0A=
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR=0A=
    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS=0A=
    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,=0A=
    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT=0A=
    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF=0A=
    USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND=0A=
    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,=0A=
    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT=0A=
    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF=0A=
    SUCH DAMAGE.=0A=
  =0A=
  =0A=
  This software consists of contributions made by Digital Creations and=0A=
  many individuals on behalf of Digital Creations.  Specific=0A=
  attributions are listed in the accompanying credits file.=0A=
  =0A=
 =
*************************************************************************=
***/=0A=
#include "Python.h"=0A=
#include <ctype.h>=0A=
=0A=
#define ASSIGN(V,E) {PyObject *__e; __e=3D(E); Py_XDECREF(V); (V)=3D__e;}=0A=
#define UNLESS(E) if(!(E))=0A=
#define UNLESS_ASSIGN(V,E) ASSIGN(V,E) UNLESS(V)=0A=
=0A=
static PyObject *next_word();=0A=
=0A=
typedef struct =0A=
{=0A=
    PyObject_HEAD=0A=
    PyObject *text, *synstop;=0A=
    char *here, *end;=0A=
    int index;=0A=
} Splitter;=0A=
 =0A=
static void=0A=
Splitter_reset(Splitter *self)=0A=
{=0A=
    self->here =3D PyString_AsString(self->text);=0A=
    self->index =3D -1;=0A=
}=0A=
=0A=
static void=0A=
Splitter_dealloc(Splitter *self) =0A=
{=0A=
    Py_XDECREF(self->text);=0A=
    Py_XDECREF(self->synstop);=0A=
    PyMem_DEL(self);=0A=
}=0A=
=0A=
static int=0A=
Splitter_length(Splitter *self)=0A=
{=0A=
    PyObject *res=3D0;=0A=
=0A=
    Splitter_reset(self);=0A=
    while(1)=0A=
      {=0A=
	UNLESS_ASSIGN(res,next_word(self,NULL,NULL)) return -1;=0A=
	UNLESS(PyString_Check(res))=0A=
	  {=0A=
	    Py_DECREF(res);=0A=
	    break;=0A=
	  }=0A=
      }=0A=
    return self->index+1;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_concat(Splitter *self, PyObject *other)=0A=
{=0A=
    PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");=0A=
    return NULL;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_repeat(Splitter *self, long n)=0A=
{=0A=
    PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");=0A=
    return NULL;=0A=
}=0A=
=0A=
/*=0A=
  Map an input word to an output word by applying standard=0A=
  filtering/mapping words, including synonyms/stop words.=0A=
=0A=
  Input is a word.=0A=
  =0A=
  Output is:=0A=
=0A=
     None -- The word is a stop word=0A=
=0A=
     sometext -- A replacement for the word=0A=
 */=0A=
static PyObject *=0A=
check_synstop(Splitter *self, PyObject *word)=0A=
{=0A=
    PyObject *value;=0A=
    char *cword;=0A=
    int len;=0A=
    =0A=
    cword =3D PyString_AsString(word);=0A=
    len =3D PyString_Size(word);=0A=
    if(len < 2)	/* Single-letter words are stop words! */=0A=
    {=0A=
      Py_INCREF(Py_None);=0A=
      return Py_None;=0A=
    }=0A=
=0A=
    /*************************************************************=0A=
      Test whether a word has any letters.                       *=0A=
                                                                 */    =0A=
    for (; --len >=3D 0 && ! isalpha((unsigned char)cword[len]); );=0A=
    if (len < 0)=0A=
    {=0A=
        Py_INCREF(Py_None);=0A=
        return Py_None;=0A=
    }=0A=
    /*=0A=
     * If no letters, treat it as a stop word.=0A=
     *************************************************************/=0A=
=0A=
    Py_INCREF(word);=0A=
=0A=
    if (self->synstop =3D=3D NULL) return word;=0A=
=0A=
    while ((value =3D PyObject_GetItem(self->synstop, word)) &&=0A=
	   PyString_Check(value))=0A=
    {=0A=
        ASSIGN(word,value);=0A=
	if(len++ > 100) break;	/* Avoid infinite recurssion */=0A=
    }=0A=
=0A=
    if (value =3D=3D NULL)=0A=
    {=0A=
        PyErr_Clear();=0A=
        return word;=0A=
    }=0A=
=0A=
    return value;		/* Which must be None! */=0A=
}=0A=
=0A=
static char =0A=
sans_accent(char c) =0A=
{ =0A=
  switch (c) { =0A=
    case '=E0': return 'a'; =0A=
    case '=C0': return 'a'; =0A=
    case '=E2': return 'a'; =0A=
    case '=C2': return 'a'; =0A=
    case '=E4': return 'a'; =0A=
    case '=C4': return 'a'; =0A=
    case '=E9': return 'e'; =0A=
    case '=C9': return 'e';=0A=
    case '=E8': return 'e';=0A=
    case '=C8': return 'e';=0A=
    case '=EA': return 'e';=0A=
    case '=CA': return 'e';=0A=
    case '=EB': return 'e';=0A=
    case '=CB': return 'e';=0A=
    case '=E7': return 'c';=0A=
    case '=C7': return 'c';=0A=
    case '=F4': return 'o';=0A=
    case '=D4': return 'o';=0A=
    case '=F6': return 'o';=0A=
    case '=D6': return 'o';=0A=
    case '=EE': return 'i';=0A=
    case '=CE': return 'i';=0A=
    case '=EF': return 'i';=0A=
    case '=CF': return 'i';=0A=
    case '=FB': return 'u';=0A=
    case '=DB': return 'u';=0A=
    case '=F9': return 'u';=0A=
    case '=D9': return 'u';=0A=
    case '=FC': return 'u';=0A=
    case '=DC': return 'u';=0A=
    default: return c;=0A=
  }=0A=
}=0A=
=0A=
 =0A=
#define MAX_WORD 64		/* Words longer than MAX_WORD are stemmed */=0A=
=0A=
static PyObject *=0A=
next_word(Splitter *self, char **startpos, char **endpos)=0A=
{=0A=
  char wbuf[MAX_WORD];=0A=
  char *end, *here, *b;=0A=
  int i =3D 0, c;=0A=
  PyObject *pyword, *res;=0A=
=0A=
  here=3Dself->here;=0A=
  end=3Dself->end;=0A=
  b=3Dwbuf;=0A=
  while (here < end)=0A=
    {=0A=
      /* skip hyphens */ =0A=
      if ((i > 0) && (*here =3D=3D '-'))=0A=
        {=0A=
	  here++;=0A=
	  while (isspace((unsigned char) *here) && (here < end)) here++;=0A=
	  continue;=0A=
	}=0A=
=0A=
      c=3Dtolower((unsigned char) sans_accent(*here));=0A=
      =0A=
      /* Check to see if this character is part of a word */=0A=
      if(isalnum((unsigned char)c) || c=3D=3D'/' || c=3D=3D'_')=0A=
        { /* Found a word character */=0A=
	  if(startpos && i=3D=3D0) *startpos=3Dhere;=0A=
	  if(i++ < MAX_WORD) *b++ =3D c;=0A=
        }=0A=
      else if (i !=3D 0)=0A=
        { /* We've found the end of a word */=0A=
	  if(i >=3D MAX_WORD) i=3DMAX_WORD; /* "stem" the long word */=0A=
=0A=
	  UNLESS(pyword =3D PyString_FromStringAndSize(wbuf, i))=0A=
            {=0A=
	      self->here=3Dhere;=0A=
	      return NULL;=0A=
	    }=0A=
	  =0A=
	  UNLESS(res =3D check_synstop(self, pyword))=0A=
            {=0A=
	      self->here=3Dhere;=0A=
	      Py_DECREF(pyword);=0A=
	      return NULL;=0A=
	    }=0A=
	  =0A=
	  if (res !=3D Py_None)=0A=
            {=0A=
	      if(endpos) *endpos=3Dhere;=0A=
	      self->here=3Dhere;=0A=
	      Py_DECREF(pyword);=0A=
	      self->index++;=0A=
	      return res;=0A=
	    }=0A=
=0A=
	  /* The word is a stopword, so ignore it */ =0A=
=0A=
	  Py_DECREF(res);          =0A=
	  Py_DECREF(pyword);=0A=
	  i =3D 0;=0A=
	  b=3Dwbuf;=0A=
        }=0A=
      =0A=
      here++;=0A=
    }=0A=
=0A=
  self->here=3Dhere;=0A=
=0A=
  /* We've reached the end of the string */=0A=
=0A=
  if(i >=3D MAX_WORD) i=3DMAX_WORD; /* "stem" the long word */=0A=
  if (i =3D=3D 0)=0A=
    { =0A=
      /* No words */=0A=
      self->here=3Dhere;=0A=
      Py_INCREF(Py_None);=0A=
      return Py_None;=0A=
    }=0A=
  =0A=
  UNLESS(pyword =3D PyString_FromStringAndSize(wbuf, i)) return NULL;=0A=
  =0A=
  if(endpos) *endpos=3Dhere;=0A=
  res =3D check_synstop(self, pyword);=0A=
  Py_DECREF(pyword);=0A=
  if(PyString_Check(res)) self->index++;=0A=
  return res;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_item(Splitter *self, int i)=0A=
{=0A=
    PyObject *word =3D NULL;=0A=
=0A=
    if (i <=3D self->index) Splitter_reset(self);=0A=
=0A=
    while(self->index < i)=0A=
    {=0A=
        Py_XDECREF(word);=0A=
=0A=
        UNLESS(word =3D next_word(self,NULL,NULL)) return NULL; =0A=
        if (word =3D=3D Py_None)=0A=
        {=0A=
            Py_DECREF(word);=0A=
            PyErr_SetString(PyExc_IndexError,=0A=
			    "Splitter index out of range");=0A=
            return NULL;=0A=
        }=0A=
    }=0A=
=0A=
    return word;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_slice(Splitter *self, int i, int j)=0A=
{=0A=
    PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");=0A=
    return NULL;=0A=
}=0A=
=0A=
static PySequenceMethods Splitter_as_sequence =3D {=0A=
    (inquiry)Splitter_length,        /*sq_length*/=0A=
    (binaryfunc)Splitter_concat,     /*sq_concat*/=0A=
    (intargfunc)Splitter_repeat,     /*sq_repeat*/=0A=
    (intargfunc)Splitter_item,       /*sq_item*/=0A=
    (intintargfunc)Splitter_slice,   /*sq_slice*/=0A=
    (intobjargproc)0,                    /*sq_ass_item*/=0A=
    (intintobjargproc)0,                 /*sq_ass_slice*/=0A=
};=0A=
=0A=
static PyObject *=0A=
Splitter_pos(Splitter *self, PyObject *args)=0A=
{=0A=
    char *start, *end, *ctext;=0A=
    PyObject *res;=0A=
    int i;=0A=
=0A=
    UNLESS(PyArg_Parse(args, "i", &i)) return NULL;=0A=
=0A=
    if (i <=3D self->index) Splitter_reset(self);=0A=
=0A=
    while(self->index < i)=0A=
    {=0A=
	UNLESS(res=3Dnext_word(self, &start, &end)) return NULL;=0A=
	if(PyString_Check(res))=0A=
	  {=0A=
            self->index++;=0A=
	    Py_DECREF(res);=0A=
	    continue;=0A=
	  }=0A=
	Py_DECREF(res);=0A=
	PyErr_SetString(PyExc_IndexError, "Splitter index out of range");=0A=
	return NULL;=0A=
    }=0A=
=0A=
    ctext=3DPyString_AsString(self->text);=0A=
    return Py_BuildValue("(ii)", start - ctext, end - ctext);=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_indexes(Splitter *self, PyObject *args)=0A=
{=0A=
  PyObject *word, *r, *w=3D0, *index=3D0;=0A=
  int i=3D0;=0A=
=0A=
  UNLESS(PyArg_ParseTuple(args,"O",&word)) return NULL;=0A=
  UNLESS(r=3DPyList_New(0)) return NULL;=0A=
  UNLESS(word=3Dcheck_synstop(self, word)) goto err;=0A=
=0A=
  Splitter_reset(self);=0A=
  while(1)=0A=
    {=0A=
      UNLESS_ASSIGN(w,next_word(self, NULL, NULL)) goto err;=0A=
      UNLESS(PyString_Check(w)) break;=0A=
      if(PyObject_Compare(word,w)=3D=3D0)=0A=
	{=0A=
	  UNLESS_ASSIGN(index,PyInt_FromLong(i)) goto err;=0A=
	  if(PyList_Append(r,index) < 0) goto err;=0A=
	}=0A=
      i++;=0A=
    }=0A=
  Py_XDECREF(w);=0A=
  Py_XDECREF(index);=0A=
  return r;=0A=
=0A=
err:=0A=
  Py_DECREF(r);=0A=
  Py_XDECREF(index);=0A=
  return NULL;=0A=
}=0A=
=0A=
static struct PyMethodDef Splitter_methods[] =3D {=0A=
    { "pos", (PyCFunction)Splitter_pos, 0,=0A=
      "pos(index) -- Return the starting and ending position of a token" =
},=0A=
    { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,=0A=
      "indexes(word) -- Return al list of the indexes of word in the =
sequence",=0A=
    },=0A=
    { NULL, NULL }		/* sentinel */=0A=
};=0A=
=0A=
static PyObject *=0A=
Splitter_getattr(Splitter *self, char *name) =0A=
{=0A=
    return Py_FindMethod(Splitter_methods, (PyObject *)self, name);=0A=
}=0A=
=0A=
static char SplitterType__doc__[] =3D "";=0A=
=0A=
static PyTypeObject SplitterType =3D {=0A=
    PyObject_HEAD_INIT(NULL)=0A=
    0,                                 /*ob_size*/=0A=
    "Splitter",                    /*tp_name*/=0A=
    sizeof(Splitter),              /*tp_basicsize*/=0A=
    0,                                 /*tp_itemsize*/=0A=
    /* methods */=0A=
    (destructor)Splitter_dealloc,  /*tp_dealloc*/=0A=
    (printfunc)0,                      /*tp_print*/=0A=
    (getattrfunc)Splitter_getattr, /*tp_getattr*/=0A=
    (setattrfunc)0,                    /*tp_setattr*/=0A=
    (cmpfunc)0,                        /*tp_compare*/=0A=
    (reprfunc)0,                       /*tp_repr*/=0A=
    0,                                 /*tp_as_number*/=0A=
    &Splitter_as_sequence,         /*tp_as_sequence*/=0A=
    0,                                 /*tp_as_mapping*/=0A=
    (hashfunc)0,                       /*tp_hash*/=0A=
    (ternaryfunc)0,                    /*tp_call*/=0A=
    (reprfunc)0,                       /*tp_str*/=0A=
=0A=
    /* Space for future expansion */=0A=
    0L,0L,0L,0L,=0A=
    SplitterType__doc__ /* Documentation string */=0A=
};=0A=
=0A=
static PyObject *=0A=
get_Splitter(PyObject *modinfo, PyObject *args)=0A=
{=0A=
    Splitter *self;=0A=
    PyObject *doc, *synstop =3D NULL;=0A=
=0A=
    UNLESS(PyArg_ParseTuple(args,"O|O",&doc,&synstop)) return NULL;=0A=
=0A=
    UNLESS(self =3D PyObject_NEW(Splitter, &SplitterType)) return NULL;=0A=
=0A=
    if(synstop)=0A=
      {=0A=
	self->synstop=3Dsynstop;=0A=
	Py_INCREF(synstop);=0A=
      }=0A=
    else self->synstop=3DNULL;=0A=
=0A=
    UNLESS(self->text =3D PyObject_Str(doc)) goto err;=0A=
    UNLESS(self->here=3DPyString_AsString(self->text)) goto err;=0A=
    self->end =3D self->here + PyString_Size(self->text);=0A=
    self->index =3D -1;=0A=
    return (PyObject*)self;=0A=
err:=0A=
    Py_DECREF(self);=0A=
    return NULL;=0A=
}=0A=
=0A=
static struct PyMethodDef Splitter_module_methods[] =3D {=0A=
    { "Splitter", (PyCFunction)get_Splitter, METH_VARARGS,=0A=
      "Splitter(doc[,synstop]) -- Return a word splitter" },=0A=
    { NULL, NULL }=0A=
};=0A=
=0A=
static char Splitter_module_documentation[] =3D =0A=
"Parse source strings into sequences of words\n"=0A=
"\n"=0A=
"for use in an inverted index\n"=0A=
"\n"=0A=
"$Id: Splitter.c,v 1.14.28.2 2001/03/21 16:37:53 jim Exp $\n"=0A=
;=0A=
=0A=
=0A=
void=0A=
initSplitter() =0A=
{=0A=
  PyObject *m, *d;=0A=
  char *rev=3D"$Revision: 1.14.28.2 $";=0A=
  =0A=
  /* Create the module and add the functions */=0A=
  m =3D Py_InitModule4("Splitter", Splitter_module_methods,=0A=
                     Splitter_module_documentation,=0A=
                     (PyObject*)NULL,PYTHON_API_VERSION);=0A=
  =0A=
  /* Add some symbolic constants to the module */=0A=
  d =3D PyModule_GetDict(m);=0A=
  PyDict_SetItemString(d, "__version__",=0A=
		       PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));=0A=
=0A=
  if (PyErr_Occurred()) Py_FatalError("can't initialize module =
Splitter");=0A=
}=0A=

------=_NextPart_000_0007_01C0DFCB.F5F36CA0--