[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish - danishstem.c:1.1.2.1 output.txt:1.1.2.1 stem.h:1.1.2.1 stem.sbl:1.1.2.1 stemmer.html:1.1.2.1 voc.txt:1.1.2.1

Andreas Jung andreas@digicool.com
Wed, 13 Feb 2002 11:26:17 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish
In directory cvs.zope.org:/tmp/cvs-serv30556/PyStemmer/danish

Added Files:
      Tag: ajung-textindexng-branch
	danishstem.c output.txt stem.h stem.sbl stemmer.html voc.txt 
Log Message:
added PyStemmer


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/danishstem.c ===

#include "header.h"

extern int danish_stem(struct SN_env * z);
static int r_undouble(struct SN_env * z);
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);

static struct among a_0[32] =
{
/*  0 */ { 3, (byte *)"hed", -1, 1, 0},
/*  1 */ { 5, (byte *)"ethed", 0, 1, 0},
/*  2 */ { 4, (byte *)"ered", -1, 1, 0},
/*  3 */ { 1, (byte *)"e", -1, 1, 0},
/*  4 */ { 5, (byte *)"erede", 3, 1, 0},
/*  5 */ { 4, (byte *)"ende", 3, 1, 0},
/*  6 */ { 6, (byte *)"erende", 5, 1, 0},
/*  7 */ { 3, (byte *)"ene", 3, 1, 0},
/*  8 */ { 4, (byte *)"erne", 3, 1, 0},
/*  9 */ { 3, (byte *)"ere", 3, 1, 0},
/* 10 */ { 2, (byte *)"en", -1, 1, 0},
/* 11 */ { 5, (byte *)"heden", 10, 1, 0},
/* 12 */ { 4, (byte *)"eren", 10, 1, 0},
/* 13 */ { 2, (byte *)"er", -1, 1, 0},
/* 14 */ { 5, (byte *)"heder", 13, 1, 0},
/* 15 */ { 4, (byte *)"erer", 13, 1, 0},
/* 16 */ { 1, (byte *)"s", -1, 2, 0},
/* 17 */ { 4, (byte *)"heds", 16, 1, 0},
/* 18 */ { 2, (byte *)"es", 16, 1, 0},
/* 19 */ { 5, (byte *)"endes", 18, 1, 0},
/* 20 */ { 7, (byte *)"erendes", 19, 1, 0},
/* 21 */ { 4, (byte *)"enes", 18, 1, 0},
/* 22 */ { 5, (byte *)"ernes", 18, 1, 0},
/* 23 */ { 4, (byte *)"eres", 18, 1, 0},
/* 24 */ { 3, (byte *)"ens", 16, 1, 0},
/* 25 */ { 6, (byte *)"hedens", 24, 1, 0},
/* 26 */ { 5, (byte *)"erens", 24, 1, 0},
/* 27 */ { 3, (byte *)"ers", 16, 1, 0},
/* 28 */ { 3, (byte *)"ets", 16, 1, 0},
/* 29 */ { 5, (byte *)"erets", 28, 1, 0},
/* 30 */ { 2, (byte *)"et", -1, 1, 0},
/* 31 */ { 4, (byte *)"eret", 30, 1, 0}
};

static struct among a_1[4] =
{
/*  0 */ { 2, (byte *)"gd", -1, -1, 0},
/*  1 */ { 2, (byte *)"dt", -1, -1, 0},
/*  2 */ { 2, (byte *)"gt", -1, -1, 0},
/*  3 */ { 2, (byte *)"kt", -1, -1, 0}
};

static struct among a_2[5] =
{
/*  0 */ { 2, (byte *)"ig", -1, 1, 0},
/*  1 */ { 3, (byte *)"lig", 0, 1, 0},
/*  2 */ { 4, (byte *)"elig", 1, 1, 0},
/*  3 */ { 3, (byte *)"els", -1, 1, 0},
/*  4 */ { 4, (byte *)"l\x9B" "st", -1, 2, 0}
};


static byte g_v[] = { 17, 65, 16, 1, 32, 0, 1, 4 };

static byte g_s_ending[] = { 239, 254, 42, 3, 32 };

static int r_mark_regions(struct SN_env * z) {
    z->I[0] = z->l;
    while(1) { /* goto, line 33 */
        int c = z->c;
        if (!(in_grouping(z, g_v, 97, 155))) goto lab0;
        z->c = c;
        break;
    lab0:
        z->c = c;
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 33 */
        if (!(out_grouping(z, g_v, 97, 155))) goto lab1;
        break;
    lab1:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[0] = z->c; /* setmark p1, line 33 */
     /* try, line 34 */
    if (!(z->I[0] < 3)) goto lab2;
    z->I[0] = 3;
lab2:
    return 1;
}

static int r_main_suffix(struct SN_env * z) {
    int among_var;
    {   int m = z->l - z->c; /* setlimit, line 40 */
        int m3;
        if (z->c < z->I[0]) return 0;
        z->c = z->I[0]; /* tomark, line 40 */
        m3 = z->lb; z->lb = z->c;
        z->c = z->l - m;
        z->ket = z->c; /* [, line 40 */
        among_var = find_among_b(z, a_0, 32); /* substring, line 40 */
        if (!(among_var)) { z->lb = m3; return 0; }
        z->bra = z->c; /* ], line 40 */
        z->lb = m3;
    }
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 47 */
            break;
        case 2:
            if (!(in_grouping_b(z, g_s_ending, 97, 134))) return 0;
            slice_del(z); /* delete, line 49 */
            break;
    }
    return 1;
}

static int r_consonant_pair(struct SN_env * z) {
    {   int m_test = z->l - z->c; /* test, line 54 */
        {   int m = z->l - z->c; /* setlimit, line 55 */
            int m3;
            if (z->c < z->I[0]) return 0;
            z->c = z->I[0]; /* tomark, line 55 */
            m3 = z->lb; z->lb = z->c;
            z->c = z->l - m;
            z->ket = z->c; /* [, line 55 */
            if (!(find_among_b(z, a_1, 4))) { z->lb = m3; return 0; } /* substring, line 55 */
            z->bra = z->c; /* ], line 55 */
            z->lb = m3;
        }
        z->c = z->l - m_test;
    }
    if (z->c <= z->lb) return 0;
    z->c--; /* next, line 61 */
    z->bra = z->c; /* ], line 61 */
    slice_del(z); /* delete, line 61 */
    return 1;
}

static int r_other_suffix(struct SN_env * z) {
    int among_var;
    {   int m = z->l - z->c; /* do, line 65 */
        z->ket = z->c; /* [, line 65 */
        if (!(eq_s_b(z, 2, "st"))) goto lab0;
        z->bra = z->c; /* ], line 65 */
        if (!(eq_s_b(z, 2, "ig"))) goto lab0;
        slice_del(z); /* delete, line 65 */
    lab0:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* setlimit, line 66 */
        int m3;
        if (z->c < z->I[0]) return 0;
        z->c = z->I[0]; /* tomark, line 66 */
        m3 = z->lb; z->lb = z->c;
        z->c = z->l - m;
        z->ket = z->c; /* [, line 66 */
        among_var = find_among_b(z, a_2, 5); /* substring, line 66 */
        if (!(among_var)) { z->lb = m3; return 0; }
        z->bra = z->c; /* ], line 66 */
        z->lb = m3;
    }
    switch(among_var) {
        case 0: return 0;
        case 1:
            slice_del(z); /* delete, line 69 */
            {   int m = z->l - z->c; /* do, line 69 */
                if (!r_consonant_pair(z)) goto lab1; /* call consonant_pair, line 69 */
            lab1:
                z->c = z->l - m;
            }
            break;
        case 2:
            slice_from_s(z, 3, "l\x9B" "s"); /* <-, line 71 */
            break;
    }
    return 1;
}

static int r_undouble(struct SN_env * z) {
    {   int m = z->l - z->c; /* setlimit, line 75 */
        int m3;
        if (z->c < z->I[0]) return 0;
        z->c = z->I[0]; /* tomark, line 75 */
        m3 = z->lb; z->lb = z->c;
        z->c = z->l - m;
        z->ket = z->c; /* [, line 75 */
        if (!(out_grouping_b(z, g_v, 97, 155))) { z->lb = m3; return 0; }
        z->bra = z->c; /* ], line 75 */
        z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 75 */
        z->lb = m3;
    }
    if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 76 */
    slice_del(z); /* delete, line 77 */
    return 1;
}

extern int danish_stem(struct SN_env * z) {
    {   int c = z->c; /* do, line 83 */
        if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 83 */
    lab0:
        z->c = c;
    }
    z->lb = z->c; z->c = z->l; /* backwards, line 84 */

    {   int m = z->l - z->c; /* do, line 85 */
        if (!r_main_suffix(z)) goto lab1; /* call main_suffix, line 85 */
    lab1:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 86 */
        if (!r_consonant_pair(z)) goto lab2; /* call consonant_pair, line 86 */
    lab2:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 87 */
        if (!r_other_suffix(z)) goto lab3; /* call other_suffix, line 87 */
    lab3:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 88 */
        if (!r_undouble(z)) goto lab4; /* call undouble, line 88 */
    lab4:
        z->c = z->l - m;
    }
    z->c = z->lb;    return 1;
}

extern struct SN_env * danish_create_env(void) { return SN_create_env(1, 1, 0); }

extern void danish_close_env(struct SN_env * z) { SN_close_env(z); }



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/output.txt === (23729/23829 lines abridged)
a
aab
aabenbaring
ab
abaddon
abagta
aban
abana
abarim
abarimbjerg
abarimbjerg
abba
abda
abda
abdeel
abdi
abdiel
abdis
abdon
abed
abel
abel
aber
abi
abia
abia
abiasaf
abiathar
abib
abiba
abibajil
abibu
abida
abidan
abidan
abiel
abiez
abiez
abiezrit
abiezrit
abiezrit
abigajil
abih
abihajil
abihajil
abihu
abija
abija
abil
abimael

[-=- -=- -=- 23729 lines omitted -=- -=- -=-]

›ged
›ger
›ges
›get
›jeblik
›jeblik
›jet
›jn
›jned
›jn
›mm
›mmed
›nn
›nsk
›nsked
›nsked
›nsk
›nsk
›re
›rer
›ret
›rkesl›s
›rkesl›s
›rn
›se
›ser
›ses
›st
›st
›st
›sterfra
›sterp†
›st
›stport
›stp†
›str
›v
›ve
›ved
›ved
›ved
›ver
›verst
›verst
›ves
›vet
›vr
›vr
›vr
›vr


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/stem.h ===

extern struct SN_env * danish_create_env(void);
extern void danish_close_env(struct SN_env * z);

extern int danish_stem(struct SN_env * z);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/stem.sbl ===
routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
           undouble
)

externals ( stem )

strings ( ch )

integers ( p1 )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin) */

stringdef ae   hex '91'
stringdef ao   hex '86'
stringdef o/   hex '9B'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending  'abcdfghjklmnoprtvyz{ao}'

define mark_regions as (

    $p1 = limit

    goto v gopast non-v  setmark p1
    try ( $p1 < 3  $p1 = 3 )
)

backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
            'erets' 'et' 'eret'
                (delete)
            's'
                (s_ending delete)
        )
    )

    define consonant_pair as (
        test (
            setlimit tomark p1 for ([substring])
            among(
                'gd' // significant in the call from other_suffix
                'dt' 'gt' 'kt'
            )
        )
        next] delete
    )

    define other_suffix as (
        do ( ['st'] 'ig' delete )
        setlimit tomark p1 for ([substring])
        among(
            'ig' 'lig' 'elig' 'els'
                (delete do consonant_pair)
            'l{o/}st'
                (<-'l{o/}s')
        )
    )
    define undouble as (
        setlimit tomark p1 for ([non-v] ->ch)
        ch
        delete
    )
)

define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
        do undouble
    )
)


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/stemmer.html ===

<HTML>
<HEAD>
<TITLE>Danish stemming algorith</TITLE></HEAD>
<BODY BGCOLOR=WHITE>
<TABLE WIDTH=75% ALIGN=CENTER COLS=1>
<H1 ALIGN=CENTER>Danish stemming algorithm</H1>

<TR><TD>
<BR>&nbsp;<H2>Links to resources</H2>

<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="stem.sbl">    The stemmer in Snowball</A>
<TR><TD><A HREF="stem.c">      The ANSI C stemmer</A>
<TR><TD><A HREF="stem.h">      - and its header</A>
<TR><TD><A HREF="voc.txt">     Sample Danish vocabulary (ISO Latin codings)</A>
<TR><TD><A HREF="output.txt">  Its stemmed equivalent</A>
<TR><TD><A HREF="diffs.txt">   Vocabulary + stemmed equivalent in pure ASCII</A>
<TR><TD><A HREF="tarball.tgz"> Tar-gzipped file of all of the above</A>
</TABLE></DL>

<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="../texts/scandinavian.html">
                  Scandinavian language stemmers</A>
</TABLE></DL>

</TR>

<TR><TD BGCOLOR="lightpink">

<BR><BR>

Here is a sample of Danish vocabulary, with the stemmed forms that will
be generated with this algorithm.

<BR><BR>



<DL><DD><TABLE CELLPADDING=0>
<TR><TD>  <B>word</B> </TD>
 <TD></TD><TD> </TD>
 <TD></TD><TD> <B>stem</B> </TD>
 <TD></TD><TD>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD>
 <TD></TD><TD> <B>word</B> </TD>
 <TD></TD><TD> </TD>
 <TD></TD><TD> <B>stem</B> </TD>
</TR>

<TR><TD>
indtage<BR>
indtagelse<BR>
indtager<BR>
indtages<BR>
indtaget<BR>
indtil<BR>
indtog<BR>
indtraf<BR>
indtryk<BR>
indtr&aelig;de<BR>
indtr&aelig;der<BR>
indtr&aelig;ffe<BR>
indtr&aelig;ffer<BR>
indtr&aelig;ngende<BR>
indt&aelig;gt<BR>
indt&aelig;gter<BR>
indvandrede<BR>
indvandret<BR>
indvender<BR>
indvendig<BR>
indvendige<BR>
indvendigt<BR>
indvending<BR>
indvendingerne<BR>
indvie<BR>
indviede<BR>
indvielse<BR>
indvielsen<BR>
indvielsesl&oslash;fte<BR>
indvielsestid<BR>
indvier<BR>
indvies<BR>
indviet<BR>
indvikle<BR>
indvikler<BR>
indvolde<BR>
indvoldene<BR>
indvortes<BR>
ind&aring;nde<BR>
ind&aring;ndede<BR>
</TD>
<TD></TD><TD> &nbsp;<TT><B> => </B></TT>&nbsp; </TD>
<TD></TD><TD>
indtag<BR>
indtag<BR>
indtag<BR>
indtag<BR>
indtag<BR>
indtil<BR>
indtog<BR>
indtraf<BR>
indtryk<BR>
indtr&aelig;d<BR>
indtr&aelig;d<BR>
indtr&aelig;f<BR>
indtr&aelig;f<BR>
indtr&aelig;ng<BR>
indt&aelig;g<BR>
indt&aelig;g<BR>
indvandred<BR>
indvandr<BR>
indvend<BR>
indvend<BR>
indvend<BR>
indvend<BR>
indvending<BR>
indvending<BR>
indvi<BR>
indvied<BR>
indvi<BR>
indvi<BR>
indvielsesl&oslash;ft<BR>
indvielsestid<BR>
indvi<BR>
indvi<BR>
indvi<BR>
indvikl<BR>
indvikl<BR>
indvold<BR>
indvold<BR>
indvort<BR>
ind&aring;nd<BR>
ind&aring;nded<BR>
</TD>
<TD></TD><TD> </TD>
<TD></TD><TD>
underste<BR>
unders&aring;tter<BR>
unders&aring;tters<BR>
unders&oslash;g<BR>
unders&oslash;ge<BR>
unders&oslash;gelse<BR>
unders&oslash;gelsen<BR>
unders&oslash;ger<BR>
unders&oslash;gt<BR>
unders&oslash;gte<BR>
undertryk<BR>
undertrykke<BR>
undertrykkelse<BR>
undertrykker<BR>
undertrykkere<BR>
undertrykkeren<BR>
undertrykkerens<BR>
undertrykkeres<BR>
undertrykkes<BR>
undertrykt<BR>
undertrykte<BR>
undertryktes<BR>
undertvang<BR>
undertvunget<BR>
undertvungne<BR>
undervejs<BR>
underverdenen<BR>
undervise<BR>
underviser<BR>
undervises<BR>
undervisning<BR>
undervisningen<BR>
undervist<BR>
underviste<BR>
underv&aelig;rk<BR>
underv&aelig;rker<BR>
undevise<BR>
undeviste<BR>
undfange<BR>
undfanged<BR>
</TD>
<TD></TD><TD> &nbsp;<TT><B> => </B></TT>&nbsp; </TD>
<TD></TD><TD>
underst<BR>
unders&aring;t<BR>
unders&aring;t<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
unders&oslash;g<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertryk<BR>
undertvang<BR>
undertvung<BR>
undertvungn<BR>
undervej<BR>
underverden<BR>
undervis<BR>
undervis<BR>
undervis<BR>
undervisning<BR>
undervisning<BR>
undervist<BR>
undervist<BR>
underv&aelig;rk<BR>
underv&aelig;rk<BR>
undevis<BR>
undevist<BR>
undfang<BR>
undfanged<BR>
</TD>
</TR>
</TABLE></DL>


</TR>

<TR><TD>

<BR><BR>
<BR>&nbsp;<H2>The stemming algorith</H2>


The Danish alphabet includes the following additional letters,
<DL><DD>
    <B><I>&aelig;  &nbsp;  &aring;  &nbsp;  &oslash;</I></B>
</DL>


The following letters are vowels:
<DL><DD>
    <B><I>a  &nbsp;  e  &nbsp;  i  &nbsp;  o  &nbsp;  u  &nbsp;  y  &nbsp;  &aelig;  &nbsp;  &aring;  &nbsp;  &oslash;</I></B>
</DL>
A consonant is defined as a non-vowel.
<BR><BR>
<I>R</I>2 is not used: <I>R</I>1 is defined in the same way as in the
<A HREF="../german/stemmer.html">German stemmer</A>.
(See the <A HREF="../texts/r1r2.html"> note</A> on <I>R</I>1 and <I>R</I>2.)
<BR><BR>
Define a valid <B><I>s</I></B>-ending as one of
<DL><DD>
<B><I>a  &nbsp;  b  &nbsp;  c  &nbsp;  d  &nbsp;  f  &nbsp;  g  &nbsp;  h  &nbsp;  j  &nbsp;  k  &nbsp;  l  &nbsp;  m  &nbsp;  n  &nbsp;  o  &nbsp;  p  &nbsp;  r
 &nbsp;  t  &nbsp;  v  &nbsp;  y  &nbsp;  z  &nbsp;  &aring;</I></B>
</DL>

Do each of steps 1, 2, 3 and 4.

<BR><BR>
Step 1:
<DL><DD>
    Search for the longest among the following suffixes in <I>R</I>1, and
    perform the action indicated.
<BR><BR><DL>
        <DT>(<I>a</I>)
            <B><I>hed  &nbsp;  ethed  &nbsp;  ered  &nbsp;  e  &nbsp;  erede  &nbsp;  ende  &nbsp;  erende  &nbsp;  ene
             &nbsp;  erne  &nbsp;  ere  &nbsp;  en  &nbsp;  heden  &nbsp;  eren  &nbsp;  er  &nbsp;  heder  &nbsp;  erer
             &nbsp;  heds  &nbsp;  es  &nbsp;  endes  &nbsp;  erendes  &nbsp;  enes  &nbsp;  ernes  &nbsp;  eres  &nbsp;
            ens  &nbsp;  hedens  &nbsp;  erens  &nbsp;  ers  &nbsp;  ets  &nbsp;  erets  &nbsp;  et  &nbsp;  eret</I></B>
                <DD>delete
<BR><BR>
        <DT>(<I>b</I>)
            <B><I>s</I></B>
                <DD>delete if preceded by a valid <B><I>s</I></B>-ending
</DL>
<BR>
    (Of course the letter of the valid <B><I>s</I></B>-ending is
    not necessarily in <I>R</I>1)
</DL>

Step 2:
<DL><DD>
    Search for one of the following suffixes in <I>R</I>1, and if found
    delete the last letter.
<BR><BR><DL><DD>
        <B><I>gd  &nbsp;  dt  &nbsp;  gt  &nbsp;  kt</I></B>
</DL>
<BR>
    (For example, <I>friskt</I> <TT>-&gt;</TT> <I>frisk</I>)
</DL>

Step 3:
<DL><DD>
    If the word ends <B><I>igst</I></B>, remove the final <B><I>st</I></B>

    Search for the longest among the following suffixes in <I>R</I>1, and
    perform the action indicated.
<BR><BR>
<DL>
    <DT>(<I>a</I>)
    <B><I>ig  &nbsp;  lig  &nbsp;  elig  &nbsp;  els</I></B>
        <DD>delete, and then repeat step 2
<BR><BR>
    <DT>(<I>b</I>)
    <B><I>l&oslash;st</I></B>
        <DD>replace with <B><I>l&oslash;s</I></B>
</DL>
</DL>

Step 4: undouble
<DL><DD>
    If the word ends with double consonant in <I>R</I>1, remove one of the
    consonants.
<BR><BR>
    (For example, <I>bestemmelse</I> <TT>-&gt;</TT> <I>bestemmels</I> (step 1)
    <TT>-&gt;</TT> <I>bestemm</I> (step 3<I>a</I>)
    <TT>-&gt;</TT> <I>bestem</I> in this step.)
</DL>


</TR>

<TR><TD BGCOLOR="lightblue">

<BR>&nbsp;<H2>The same algorithm in Snowball</H2>

<FONT SIZE=-1><PRE>
<DL><DD>
routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
           undouble
)

externals ( stem )

strings ( ch )

integers ( p1 )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin) */

stringdef ae   hex '91'
stringdef ao   hex '86'
stringdef o/   hex '9B'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending  'abcdfghjklmnoprtvyz{ao}'

define mark_regions as (

    $p1 = limit

    goto v gopast non-v  setmark p1
    try ( $p1 < 3  $p1 = 3 )
)

backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
            'erets' 'et' 'eret'
                (delete)
            's'
                (s_ending delete)
        )
    )

    define consonant_pair as (
        test (
            setlimit tomark p1 for ([substring])
            among(
                'gd' // significant in the call from other_suffix
                'dt' 'gt' 'kt'
            )
        )
        next] delete
    )

    define other_suffix as (
        do ( ['st'] 'ig' delete )
        setlimit tomark p1 for ([substring])
        among(
            'ig' 'lig' 'elig' 'els'
                (delete do consonant_pair)
            'l{o/}st'
                (<-'l{o/}s')
        )
    )
    define undouble as (
        setlimit tomark p1 for ([non-v] ->ch)
        ch
        delete
    )
)

define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
        do undouble
    )
)
</DL>
</PRE></FONT>
</TABLE>
</BODY>
</HTML>


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/danish/voc.txt === (23729/23829 lines abridged)
a
aab
aabenbaringen
ab
abaddon
abagta
aban
abana
abarim
abarimbjergene
abarimbjerget
abba
abda
abdas
abdeels
abdi
abdiel
abdis
abdon
abed
abel
abels
aber
abi
abia
abias
abiasaf
abiathar
abib
abiba
abibajils
abibu
abida
abidan
abidans
abiel
abiezer
abiezers
abiezriten
abiezriterne
abiezriternes
abigajil
abih
abihajil
abihajils
abihu
abija
abijas
abilene
abimael

[-=- -=- -=- 23729 lines omitted -=- -=- -=-]

›gede
›ger
›ges
›get
›jeblik
›jeblikkelig
›jet
›jne
›jnede
›jner
›mme
›mmede
›nner
›nske
›nsked
›nskede
›nsker
›nsket
›re
›rer
›ret
›rkesl›se
›rkesl›st
›rn
›se
›ser
›ses
›st
›ste
›sten
›sterfra
›sterp†
›stlig
›stport
›stp†
›stre
›v
›ve
›ved
›vede
›vedes
›ver
›verst
›verste
›ves
›vet
›vre
›vrig
›vrige
›vrigt