Index: build.xml
===================================================================
--- build.xml	(revision 918599)
+++ build.xml	(working copy)
@@ -319,6 +319,7 @@
 
           <packageset dir="contrib/analyzers/common/src/java"/>
           <packageset dir="contrib/analyzers/smartcn/src/java"/>
+          <packageset dir="contrib/analyzers/stempel/src/java"/>
           <packageset dir="contrib/ant/src/java"/>
           <packageset dir="contrib/benchmark/src/java"/>
           <packageset dir="contrib/icu/src/java"/>
@@ -351,7 +352,7 @@
   
           <group title="Demo" packages="org.apache.lucene.demo*"/>
   
-          <group title="contrib: Analysis" packages="org.apache.lucene.analysis.*:org.tartarus.snowball*"/>
+          <group title="contrib: Analysis" packages="org.apache.lucene.analysis.*:org.tartarus.snowball*:org.egothor.stemmer*"/>
           <group title="contrib: Ant" packages="org.apache.lucene.ant*"/>
           <group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
           <group title="contrib: ICU" packages="org.apache.lucene.collation*"/>
Index: contrib/analyzers/build.xml
===================================================================
--- contrib/analyzers/build.xml	(revision 918599)
+++ contrib/analyzers/build.xml	(working copy)
@@ -23,6 +23,7 @@
     Additional Analyzers
       - common:	Additional Analyzers
       - smartcn:	Smart Analyzer for Simplified Chinese Text
+      - stempel:	Algorithmic Stemmer for Polish
   </description>
 
   <target name="common">
@@ -33,23 +34,31 @@
     <ant dir="smartcn" />
   </target>
 
-  <target name="default" depends="common,smartcn" />
+  <target name="stempel">
+    <ant dir="stempel" />
+  </target>
 
+  <target name="default" depends="common,smartcn,stempel" />
+
   <target name="clean">
     <ant dir="common" target="clean" />
     <ant dir="smartcn" target="clean" />
+    <ant dir="stempel" target="clean" />
   </target>
   <target name="compile-core">
     <ant dir="common" target="compile-core" />
     <ant dir="smartcn" target="compile-core" />
+    <ant dir="stempel" target="compile-core" />
   </target>
   <target name="compile-test">
     <ant dir="common" target="compile-test" />
     <ant dir="smartcn" target="compile-test" />
+    <ant dir="stempel" target="compile-test" />
   </target>
   <target name="test">
     <ant dir="common" target="test" />
     <ant dir="smartcn" target="test" />
+    <ant dir="stempel" target="test" />
   </target>
 
   <target name="build-artifacts-and-tests" depends="default,compile-test" />
@@ -57,16 +66,19 @@
   <target name="dist-maven" depends="default">
     <ant dir="common" target="dist-maven" />
     <ant dir="smartcn" target="dist-maven" />
+    <ant dir="stempel" target="dist-maven" />
   </target>  	
 
   <target name="javadocs">
     <ant dir="common" target="javadocs" />
     <ant dir="smartcn" target="javadocs" />
+    <ant dir="stempel" target="javadocs" />
   </target>  	
 
   <target name="javadocs-index.html">
     <ant dir="common" target="javadocs-index.html" />
     <ant dir="smartcn" target="javadocs-index.html" />
+    <ant dir="stempel" target="javadocs-index.html" />
   </target>
 	
 </project>
Index: contrib/analyzers/stempel/build.xml
===================================================================
--- contrib/analyzers/stempel/build.xml	(revision 0)
+++ contrib/analyzers/stempel/build.xml	(revision 0)
@@ -0,0 +1,38 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="stempel" default="default">
+
+  <description>
+    Stempel Analyzer
+  </description>
+	
+  <property name="build.dir" location="../../../build/contrib/analyzers/stempel" />
+  <property name="dist.dir" location="../../../dist/contrib/analyzers/stempel" />
+  <property name="maven.dist.dir" location="../../../dist/maven" />
+
+  <import file="../../contrib-build.xml"/>
+	
+  <path id="test.classpath">
+    <path refid="classpath"/>
+    <pathelement location="../../../build/classes/test/"/>
+    <path refid="junit-path"/>
+    <pathelement location="${build.dir}/classes/java"/>
+  </path>	
+</project>
\ No newline at end of file

Property changes on: contrib\analyzers\stempel\build.xml
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/pom.xml.template
===================================================================
--- contrib/analyzers/stempel/pom.xml.template	(revision 0)
+++ contrib/analyzers/stempel/pom.xml.template	(revision 0)
@@ -0,0 +1,35 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-contrib</artifactId>
+    <version>@version@</version>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-stempel</artifactId>
+  <name>Lucene Stempel Analyzer</name>
+  <version>@version@</version>
+  <description>Stempel Analyzer</description>
+  <packaging>jar</packaging>
+</project>
Index: contrib/analyzers/stempel/src/java/overview.html
===================================================================
--- contrib/analyzers/stempel/src/java/overview.html	(revision 0)
+++ contrib/analyzers/stempel/src/java/overview.html	(revision 0)
@@ -0,0 +1,458 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <meta content="text/html; charset=UTF-8" http-equiv="content-type">
+  <title>Stempel - Algorithmic Stemmer for Polish Language</title>
+  <meta content="Andrzej Bialecki" name="author">
+  <meta name="keywords"
+ content="stemming, stemmer, algorithmic stemmer, Polish stemmer">
+  <meta
+ content="This page describes a software package consisting of high-quality stemming tables for Polish, and a universal algorithmic stemmer, which operates using these tables."
+ name="description">
+</head>
+<body style="font-family: Arial,SansSerif;">
+<h1><i>Stempel</i> - Algorithmic Stemmer for Polish Language</h1>
+<h2>Introduction</h2>
+<p>A method for conflation of different inflected word forms is an
+important component of many Information Retrieval systems. It helps to
+improve the system's recall and can significantly reduce the index
+size. This is especially true for highly-inflectional languages like
+those from the Slavic language family (Czech, Slovak, Polish, Russian,
+Bulgarian, etc).</p>
+<p>This page describes a software package consisting of high-quality
+stemming tables for Polish, and a universal algorithmic stemmer, which
+operates using these tables. The stemmer code is taken virtually
+unchanged from the <a href="http://www.egothor.org">Egothor project</a>.</p>
+<p>The software distribution includes stemmer
+tables prepared using an extensive corpus of Polish language (see
+details below).</p>
+<p>This work is available under Apache-style Open Source license - the
+stemmer code is covered by Egothor License, the tables and other
+additions are covered by Apache License 2.0. Both licenses allow to use
+the code in Open Source as well as commercial (closed source) projects.</p>
+<h3>Terminology</h3>
+<p>A short explanation is in order about the terminology used in this
+text.</p>
+<p>In the following sections I make a distinction between <b>stem</b>
+and <b>lemma</b>.</p>
+<p>Lemma is a base grammatical form (dictionary form, headword) of a
+word. Lemma is an existing, grammatically correct word in some human
+language.</p>
+<p>Stem on the other hand is just a unique token, not necessarily
+making any sense in any human language, but which can serve as a unique
+label instead of lemma for the same set of inflected forms. Quite often
+stem is referred to as a "root" of the word - which is incorrect and
+misleading (stems sometimes have very little to do with the linguistic
+root of a word, i.e. a pattern found in a word which is common to all
+inflected forms or within a family of languages).</p>
+<p>For an IR system stems are usually sufficient, for a morphological
+analysis system obviously lemmas are a must. In practice, various
+stemmers produce a mix of stems and lemmas, as is the case with the
+stemmer described here. Additionally, for some languages, which use
+suffix-based inflection rules many stemmers based on suffix-stripping
+will produce a large percentage of stems equivalent to lemmas. This is
+however not the case for languages with complex, irregular inflection
+rules (such as Slavic languages) - here simplistic suffix-stripping
+stemmers produce very poor results.</p>
+<h3>Background</h3>
+<p>Lemmatization is a process of finding the base, non-inflected form
+of a word. The result of lemmatization is a correct existing word,
+often in nominative case for nouns and infinitive form for verbs. A
+given inflected form may correspond to several lemmas (e.g. "found"
+-&gt; find, found) - the correct choice depends on the context.<br>
+<br>
+Stemming is concerned mostly with finding a unique "root" of a word,
+which not necessarily results in any existing word or lemma. The
+quality of stemming is measured by the rate of collisions (overstemming
+- which causes words with different lemmas to be incorrectly conflated
+into one "root"), and the rate of superfluous word "roots"
+(understemming - which assigns several "roots" to words with the same
+lemma). <br>
+<br>
+Both stemmer and lemmatizer can be implemented in various ways. The two
+most common approaches are:<br>
+</p>
+<ul>
+  <li>dictionary-based: where the stemmer uses an extensive dictionary
+of morphological forms in order to find the corresponding stem or lemma</li>
+  <li>algorithmic: where the stemmer uses an algorithm, based on
+general morphological properties of a given language plus a set of
+heuristic rules<br>
+  </li>
+</ul>
+There are many existing and well-known implementations of stemmers for
+English (Porter, Lovins, Krovetz) and other European languages
+(<a href="http://snowball.tartarus.org">Snowball</a>). There are also
+good quality commercial lemmatizers for Polish. However, there is only
+one
+freely available Polish stemmer, implemented by
+<a
+ href="http://www.cs.put.poznan.pl/dweiss/xml/projects/lametyzator/index.xml?lang=en">Dawid
+Weiss</a>, based on the "ispell" dictionary and Jan Daciuk's <a
+ href="http://www.eti.pg.gda.pl/%7Ejandac/">FSA package</a>. That
+stemmer is dictionary-based. This means that even
+though it can achieve
+perfect accuracy for previously known word forms found in its
+dictionary, it
+completely fails in case of all other word forms. This deficiency is
+somewhat mitigated by the comprehensive dictionary distributed with
+this stemmer (so there is a high probability that most of the words in
+the input text will be found in the dictionary), however the problem
+still remains (please see the page above for more detailed description).<br>
+<br>
+The implementation described here uses an algorithmic method. This
+method
+and particular algorithm implementation are described in detail in
+[1][2].
+The main advantage of algorithmic stemmers is their ability to process
+previously
+unseen word forms with high accuracy. This particular algorithm uses a
+set
+of
+transformation rules (patch commands), which describe how a word with a
+given pattern should be transformed to its stem. These rules are first
+learned from a training corpus. They don't
+cover
+all possible cases, so there is always some loss of precision/recall
+(which
+means that even the words from the training corpus are sometimes
+incorrectly stemmed).<br>
+<h2>Algorithm and implementation<span style="font-style: italic;"></span></h2>
+The algorithm and its Java implementation is described in detail in the
+publications cited below. Here's just a short excerpt from [2]:<br>
+<br>
+<center>
+<div style="width: 80%;" align="justify">"The aim is separation of the
+stemmer execution code from the data
+structures [...]. In other words, a static algorithm configurable by
+data must be developed. The word transformations that happen in the
+stemmer must be then encoded to the data tables.<br>
+<br>
+The tacit input of our method is a sample set (a so-called dictionary)
+of words (as keys) and their stems. Each record can be equivalently
+stored as a key and the record of key's transformation to its
+respective stem. The transformation record is termed a patch command
+(P-command). It must be ensured that P-commands are universal, and that
+P-commands can transform any word to its stem. Our solution[6,8] is
+based on the Levenstein metric [10], which produces P-command as the
+minimum cost path in a directed graph.<br>
+<br>
+One can imagine the P-command as an algorithm for an operator (editor)
+that rewrites a string to another string. The operator can use these
+instructions (PP-command's): <span style="font-weight: bold;">removal </span>-
+deletes a sequence of characters starting at the current cursor
+position and moves the cursor to the next character. The length of this
+sequence is the parameter; <span style="font-weight: bold;">insertion </span>-
+inserts a character ch, without moving the cursor. The character ch is
+a parameter; <span style="font-weight: bold;">substitution&nbsp;</span>
+- rewrites a character at the current cursor position to the character
+ch and moves the cursor to the next character. The character ch is a
+parameter; <span style="font-weight: bold;">no operation</span> (NOOP)
+- skip a sequence of characters starting at the current cursor
+position. The length of this sequence is the parameter.<br>
+<br>
+The P-commands are applied from the end of a word (right to left). This
+assumption can reduce the set of P-command's, because the last NOOP,
+moving the cursor to the end of a string without any changes, need not
+be stored."</div>
+</center>
+<br>
+Data structure used to keep the dictionary (words and their P-commands)
+is a trie. Several optimization steps are applied in turn to reduce and
+optimize the initial trie, by eliminating useless information and
+shortening the paths in the trie.<br>
+<br>
+Finally, in order to obtain a stem from the input word, the word is
+passed once through a matching path in the trie (applying at each node
+the P-commands stored there). The result is a word stem.<br>
+<h2>Corpus</h2>
+<p><i>(to be completed...)</i></p>
+<p>The following Polish corpora have been used:</p>
+<ul>
+  <li><a
+ href="http://sourceforge.net/project/showfiles.php?group_id=49316&amp;package_id=65354">Polish
+dictionary
+from ispell distribution</a></li>
+  <li><a href="http://www.mimuw.edu.pl/polszczyzna/">Wzbogacony korpus
+słownika frekwencyjnego</a></li>
+<!--<li><a href="http://www.korpus.pl">Korpus IPI PAN</a></li>-->
+<!--<li>The Bible (so called "Warsaw Bible" or "Brytyjka")</li>--><li>The
+Bible (so called "Tysiąclecia") - unauthorized electronic version</li>
+  <li><a
+ href="http://www.mimuw.edu.pl/polszczyzna/Debian/sam34_3.4a.02-1_i386.deb">Analizator
+morfologiczny SAM v. 3.4</a> - this was used to recover lemmas
+missing from other texts</li>
+</ul>
+<p>This step was the most time-consuming - and it would probably be
+even more tedious and difficult if not for the
+help of
+<a href="http://www.python.org/">Python</a>. The source texts had to be
+brought to a common encoding (UTF-8) - some of them used quite ancient
+encodings like Mazovia or DHN - and then scripts were written to
+collect all lemmas and
+inflected forms from the source texts. In cases when the source text
+was not
+tagged,
+I used the SAM analyzer to produce lemmas. In cases of ambiguous
+lemmatization I decided to put references to inflected forms from all
+base forms.<br>
+</p>
+<p>All grammatical categories were allowed to appear in the corpus,
+i.e. nouns, verbs, adjectives, numerals, and pronouns. The resulting
+corpus consisted of roughly 87,000+ inflection sets, i.e. each set
+consisted of one base form (lemma) and many inflected forms. However,
+because of the nature of the training method I restricted these sets to
+include only those where there were at least 4 inflected forms. Sets
+with 3 or less inflected forms were removed, so that the final corpus
+consisted of ~69,000 unique sets, which in turn contained ~1.5 mln
+inflected forms. <br>
+</p>
+<h2>Testing</h2>
+<p>I tested the stemmer tables produced using the implementation
+described above. The following sections give some details about
+the testing setup.
+</p>
+<h3>Testing procedure</h3>
+<p>The testing procedure was as follows:
+</p>
+<ul>
+  <li>the whole corpus of ~69,000 unique sets was shuffled, so that the
+input sets were in random order.</li>
+  <li>the corpus was split into two parts - one with 30,000 sets (Part
+1), the other with ~39,000 sets (Part 2).</li>
+  <li>Training samples were drawn in sequential order from the Part 1.
+Since the sets were already randomized, the training samples were also
+randomized, but this procedure ensured that each larger training sample
+contained all smaller samples.</li>
+  <li>Part 2 was used for testing. Note: this means that the testing
+run used <em>only</em> words previously unseen during the training
+phase. This is the worst scenario, because it means that stemmer must
+extrapolate the learned rules to unknown cases. This also means that in
+a real-life case (where the input is a mix between known and unknown
+words) the F-measure of the stemmer will be even higher than in the
+table below.</li>
+</ul>
+<h3>Test results</h3>
+<p>The following table summarizes test results for varying sizes
+of training samples. The meaning of the table columns is
+described below:
+</p>
+<ul>
+  <li><b>training sets:</b> the number of training sets. One set
+consists of one lemma and at least 4 and up to ~80 inflected forms
+(including pre- and suffixed forms).</li>
+  <li><b>testing forms:</b> the number of testing forms. Only inflected
+forms were used in testing.</li>
+  <li><b>stem OK:</b> the number of cases when produced output was a
+correct (unique) stem. Note: quite often correct stems were also
+correct lemmas.</li>
+  <li><b>lemma OK:</b> the number of cases when produced output was a
+correct lemma.</li>
+  <li><b>missing:</b> the number of cases when stemmer was unable to
+provide any output.</li>
+  <li><b>stem bad:</b> the number of cases when produced output was a
+stem, but already in use identifying a different set.</li>
+  <li><b>lemma bad:</b> the number of cases when produced output was an
+incorrect lemma. Note: quite often in such case the output was a
+correct stem.</li>
+  <li><b>table size:</b> the size in bytes of the stemmer table.</li>
+</ul>
+<div align="center">
+<table border="1" cellpadding="2" cellspacing="0">
+  <tbody>
+    <tr bgcolor="#a0b0c0">
+      <th>Training sets</th>
+      <th>Testing forms</th>
+      <th>Stem OK</th>
+      <th>Lemma OK</th>
+      <th>Missing</th>
+      <th>Stem Bad</th>
+      <th>Lemma Bad</th>
+      <th>Table size [B]</th>
+    </tr>
+    <tr align="right">
+      <td>100</td>
+      <td>1022985</td>
+      <td>842209</td>
+      <td>593632</td>
+      <td>172711</td>
+      <td>22331</td>
+      <td>256642</td>
+      <td>28438</td>
+    </tr>
+    <tr align="right">
+      <td>200</td>
+      <td>1022985</td>
+      <td>862789</td>
+      <td>646488</td>
+      <td>153288</td>
+      <td>16306</td>
+      <td>223209</td>
+      <td>48660</td>
+    </tr>
+    <tr align="right">
+      <td>500</td>
+      <td>1022985</td>
+      <td>885786</td>
+      <td>685009</td>
+      <td>130772</td>
+      <td>14856</td>
+      <td>207204</td>
+      <td>108798</td>
+    </tr>
+    <tr align="right">
+      <td>700</td>
+      <td>1022985</td>
+      <td>909031</td>
+      <td>704609</td>
+      <td>107084</td>
+      <td>15442</td>
+      <td>211292</td>
+      <td>139291</td>
+    </tr>
+    <tr align="right">
+      <td>1000</td>
+      <td>1022985</td>
+      <td>926079</td>
+      <td>725720</td>
+      <td>90117</td>
+      <td>14941</td>
+      <td>207148</td>
+      <td>183677</td>
+    </tr>
+    <tr align="right">
+      <td>2000</td>
+      <td>1022985</td>
+      <td>942886</td>
+      <td>746641</td>
+      <td>73429</td>
+      <td>14903</td>
+      <td>202915</td>
+      <td>313516</td>
+    </tr>
+    <tr align="right">
+      <td>5000</td>
+      <td>1022985</td>
+      <td>954721</td>
+      <td>759930</td>
+      <td>61476</td>
+      <td>14817</td>
+      <td>201579</td>
+      <td>640969</td>
+    </tr>
+    <tr align="right">
+      <td>7000</td>
+      <td>1022985</td>
+      <td>956165</td>
+      <td>764033</td>
+      <td>60364</td>
+      <td>14620</td>
+      <td>198588</td>
+      <td>839347</td>
+    </tr>
+    <tr align="right">
+      <td>10000</td>
+      <td>1022985</td>
+      <td>965427</td>
+      <td>775507</td>
+      <td>50797</td>
+      <td>14662</td>
+      <td>196681</td>
+      <td>1144537</td>
+    </tr>
+    <tr align="right">
+      <td>12000</td>
+      <td>1022985</td>
+      <td>967664</td>
+      <td>782143</td>
+      <td>48722</td>
+      <td>14284</td>
+      <td>192120</td>
+      <td>1313508</td>
+    </tr>
+    <tr align="right">
+      <td>15000</td>
+      <td>1022985</td>
+      <td>973188</td>
+      <td>788867</td>
+      <td>43247</td>
+      <td>14349</td>
+      <td>190871</td>
+      <td>1567902</td>
+    </tr>
+    <tr align="right">
+      <td>17000</td>
+      <td>1022985</td>
+      <td>974203</td>
+      <td>791804</td>
+      <td>42319</td>
+      <td>14333</td>
+      <td>188862</td>
+      <td>1733957</td>
+    </tr>
+    <tr align="right">
+      <td>20000</td>
+      <td>1022985</td>
+      <td>976234</td>
+      <td>791554</td>
+      <td>40058</td>
+      <td>14601</td>
+      <td>191373</td>
+      <td>1977615</td>
+    </tr>
+  </tbody>
+</table>
+</div>
+<p>I also measured the time to produce a stem (which involves
+traversing a trie,
+retrieving a patch command and applying the patch command to the input
+string).
+On a machine running Windows XP (Pentium 4, 1.7 GHz, JDK 1.4.2_03
+HotSpot),
+for tables ranging in size from 1,000 to 20,000 cells, the time to
+produce a
+single stem varies between 5-10 microseconds.<br>
+</p>
+<p>This means that the stemmer can process up to <span
+ style="font-weight: bold;">200,000 words per second</span>, an
+outstanding result when compared to other stemmers (Morfeusz - ~2,000
+w/s, FormAN (MS Word analyzer) - ~1,000 w/s).<br>
+</p>
+<p>The package contains a class <code>org.getopt.stempel.Benchmark</code>,
+which you can use to produce reports
+like the one below:<br>
+</p>
+<pre>--------- Stemmer benchmark report: -----------<br>Stemmer table:  /res/tables/stemmer_2000.out<br>Input file:     ../test3.txt<br>Number of runs: 3<br><br> RUN NUMBER:            1       2       3<br> Total input words      1378176 1378176 1378176<br> Missed output words    112     112     112<br> Time elapsed [ms]      6989    6940    6640<br> Hit rate percent       99.99%  99.99%  99.99%<br> Miss rate percent      00.01%  00.01%  00.01%<br> Words per second       197192  198584  207557<br> Time per word [us]     5.07    5.04    4.82<br></pre>
+<h2>Summary</h2>
+<p>The results of these tests are very encouraging. It seems that using
+the
+training corpus and the stemming algorithm described above results in a
+high-quality stemmer useful for most applications. Moreover, it can
+also
+be used as a better than average lemmatizer.</p>
+<p>Both the author of the implementation
+(Leo Galambos, &lt;leo.galambos AT egothor DOT org&gt;) and the author
+of this
+compilation (Andrzej Bialecki &lt;ab AT getopt DOT org&gt;) would
+appreciate any
+feedback and suggestions for further improvements.</p>
+<h2>Bibliography</h2>
+<ol>
+  <li>Galambos, L.: Multilingual Stemmer in Web Environment, PhD
+Thesis,
+Faculty of Mathematics and Physics, Charles University in Prague, in
+press.</li>
+  <li>Galambos, L.: Semi-automatic Stemmer Evaluation. International
+Intelligent Information Processing and Web Mining Conference, 2004,
+Zakopane, Poland.</li>
+  <li>Galambos, L.: Lemmatizer for Document Information Retrieval
+Systems in JAVA.<span style="text-decoration: underline;"> </span><a
+ class="moz-txt-link-rfc2396E"
+ href="http://www.informatik.uni-trier.de/%7Eley/db/conf/sofsem/sofsem2001.html#Galambos01">&lt;http://www.informatik.uni-trier.de/%7Eley/db/conf/sofsem/sofsem2001.html#Galambos01&gt;</a>
+SOFSEM 2001, Piestany, Slovakia. <br>
+  </li>
+</ol>
+<br>
+<br>
+</body>
+</html>

Property changes on: contrib\analyzers\stempel\src\java\overview.html
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java	(revision 0)
@@ -0,0 +1,154 @@
+package org.apache.lucene.analysis.pl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
+import org.apache.lucene.analysis.stempel.StempelFilter;
+import org.apache.lucene.util.Version;
+import org.egothor.stemmer.Trie;
+
+/**
+ * {@link Analyzer} for Polish.
+ */
+public final class PolishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  private final Trie stemTable;
+  
+  /** File containing default Polish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultsHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultsHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+    static final Trie DEFAULT_TABLE;
+    
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(PolishAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set", ex);
+      }
+      
+      InputStream stream = PolishAnalyzer.class.getResourceAsStream("stemmer_20000.tbl");
+      try {
+        DataInputStream in = new DataInputStream(new BufferedInputStream(stream));
+        String method = in.readUTF().toUpperCase();
+        if (method.indexOf('M') < 0) {
+          DEFAULT_TABLE = new org.egothor.stemmer.Trie(in);
+        } else {
+          DEFAULT_TABLE = new org.egothor.stemmer.MultiTrie2(in);
+        }
+        in.close();
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stemming tables", ex);
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public PolishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultsHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public PolishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public PolishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemTable = DefaultsHolder.DEFAULT_TABLE;
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a
+   * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+   * which tokenizes all the text in the provided {@link Reader}.
+   * 
+   * @return A
+   *         {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+   *         built from an {@link StandardTokenizer} filtered with
+   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+   *         , {@link KeywordMarkerTokenFilter} if a stem exclusion set is
+   *         provided and {@link StempelFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new StempelFilter(result, new StempelStemmer(stemTable));
+    return new TokenStreamComponents(source, result);
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\pl\PolishAnalyzer.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html
===================================================================
--- contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html	(revision 0)
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Polish.
+</body>
+</html>

Property changes on: contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\pl\package.html
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java	(revision 0)
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.analysis.stempel;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * Transforms the token stream as per the stemming algorithm.
+ * <p>
+ * Note: the input to the stemming filter must already be in lower case, so you
+ * will need to use LowerCaseFilter or LowerCaseTokenizer farther down the
+ * Tokenizer chain in order for this to work properly!
+ */
+public final class StempelFilter extends TokenFilter {
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  private final StempelStemmer stemmer;
+  private final int minLength;
+  
+  /**
+   * Minimum length of input words to be processed. Shorter words are returned
+   * unchanged.
+   */
+  public static final int DEFAULT_MIN_LENGTH = 3;
+  
+  /**
+   * Create filter using the supplied stemming table.
+   * 
+   * @param in input token stream
+   * @param stemmer stemmer
+   */
+  public StempelFilter(TokenStream in, StempelStemmer stemmer) {
+    this(in, stemmer, DEFAULT_MIN_LENGTH);
+  }
+  
+  /**
+   * Create filter using the supplied stemming table.
+   * 
+   * @param in input token stream
+   * @param stemmer stemmer
+   * @param minLength For performance reasons words shorter than minLength
+   * characters are not processed, but simply returned.
+   */
+  public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
+    super(in);
+    this.stemmer = stemmer;
+    this.minLength = minLength;
+  }
+  
+  /** Returns the next input Token, after being stemmed */
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAtt.isKeyword() && termAtt.termLength() > minLength) {
+        char term[] = termAtt.termBuffer();
+        StringBuilder sb = stemmer.stem(term, termAtt.termLength());
+        if (sb != null) { // if we can't stem it, return unchanged
+          if (sb.length() < term.length)
+            term = termAtt.resizeTermBuffer(sb.length());
+          sb.getChars(0, sb.length(), term, 0);
+          termAtt.setTermLength(sb.length());
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
\ No newline at end of file

Property changes on: contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\stempel\StempelFilter.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java	(revision 0)
@@ -0,0 +1,93 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.analysis.stempel;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.egothor.stemmer.Diff;
+import org.egothor.stemmer.Trie;
+
+/**
+ * <p>
+ * Stemmer class is a convenient facade for other stemmer-related classes. The
+ * core stemming algorithm and its implementation is taken verbatim from the
+ * Egothor project ( <a href="http://www.egothor.org">www.egothor.org </a>).
+ * </p>
+ * <p>
+ * Even though the stemmer tables supplied in the distribution package are built
+ * for Polish language, there is nothing language-specific here.
+ * </p>
+ */
+public class StempelStemmer {
+  private Trie stemmer = null;
+  private StringBuilder buffer = new StringBuilder();
+
+  /**
+   * Create a Stemmer using selected stemmer table
+   * 
+   * @param stemmerTable stemmer table.
+   */
+  public StempelStemmer(InputStream stemmerTable) throws IOException {
+    if (stemmerTable == null) return;
+    
+    DataInputStream in = new DataInputStream(new BufferedInputStream(
+        stemmerTable));
+    String method = in.readUTF().toUpperCase();
+    if (method.indexOf('M') < 0) {
+      stemmer = new org.egothor.stemmer.Trie(in);
+    } else {
+      stemmer = new org.egothor.stemmer.MultiTrie2(in);
+    }
+    in.close();
+  }
+
+  /**
+   * Create a Stemmer using pre-loaded stemmer table
+   * 
+   * @param stemmer pre-loaded stemmer table
+   */
+  public StempelStemmer(Trie stemmer) {
+    this.stemmer = stemmer;
+  }
+
+  /**
+   * Stem a word. 
+   * 
+   * @param word input word to be stemmed.
+   * @param length valid length of input word.
+   * @return stemmed word, or null if the stem could not be generated.
+   */
+  public StringBuilder stem(char word[], int length) {
+    buffer.setLength(0);
+    buffer.append(word, 0, length);
+
+    CharSequence cmd = stemmer.getLastOnPath(buffer);
+    
+    if (cmd == null)
+        return null;
+    
+    Diff.apply(buffer, cmd);
+    
+    if (buffer.length() > 0)
+      return buffer;
+    else
+      return null;
+  }
+}
\ No newline at end of file

Property changes on: contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\stempel\StempelStemmer.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html
===================================================================
--- contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html	(revision 0)
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+	<body>
+		<p>Stempel: Algorithmic Stemmer</p>
+	</body>
+</html>

Property changes on: contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\stempel\package.html
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java	(revision 0)
@@ -0,0 +1,94 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+/**
+ * A Cell is a portion of a trie.
+ */
+class Cell {
+  /** next row id in this way */
+  int ref = -1;
+  /** command of the cell */
+  int cmd = -1;
+  /** how many cmd-s was in subtrie before pack() */
+  int cnt = 0;
+  /** how many chars would be discarded from input key in this way */
+  int skip = 0;
+  
+  /** Constructor for the Cell object. */
+  Cell() {}
+  
+  /**
+   * Construct a Cell using the properties of the given Cell.
+   * 
+   * @param a the Cell whose properties will be used
+   */
+  Cell(Cell a) {
+    ref = a.ref;
+    cmd = a.cmd;
+    cnt = a.cnt;
+    skip = a.skip;
+  }
+  
+  /**
+   * Return a String containing this Cell's attributes.
+   * 
+   * @return a String representation of this Cell
+   */
+  @Override
+  public String toString() {
+    return "ref(" + ref + ")cmd(" + cmd + ")cnt(" + cnt + ")skp(" + skip + ")";
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Cell.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java	(revision 0)
@@ -0,0 +1,205 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.util.StringTokenizer;
+
+/**
+ * The Compile class is used to compile a stemmer table.
+ */
+public class Compile {
+  
+  static boolean backward;
+  static boolean multi;
+  static Trie trie;
+  
+  /**
+   * Entry point to the Compile application.
+   * <p>
+   * This program takes any number of arguments: the first is the name of the
+   * desired stemming algorithm to use (a list is available in the package
+   * description) , all of the rest should be the path or paths to a file or
+   * files containing a stemmer table to compile.
+   * 
+   * @param args the command line arguments
+   */
+  public static void main(java.lang.String[] args) {
+    if (args.length < 1) {
+      return;
+    }
+    
+    args[0].toUpperCase();
+    
+    backward = args[0].charAt(0) == '-';
+    int qq = (backward) ? 1 : 0;
+    boolean storeorig = false;
+    
+    if (args[0].charAt(qq) == '0') {
+      storeorig = true;
+      qq++;
+    }
+    
+    multi = args[0].charAt(qq) == 'M';
+    if (multi) {
+      qq++;
+    }
+    
+    String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
+    
+    char optimizer[] = new char[args[0].length() - qq];
+    for (int i = 0; i < optimizer.length; i++) {
+      optimizer[i] = args[0].charAt(qq + i);
+    }
+    
+    for (int i = 1; i < args.length; i++) {
+      LineNumberReader in;
+      // System.out.println("[" + args[i] + "]");
+      Diff diff = new Diff();
+      try {
+        int stems = 0;
+        int words = 0;
+        
+        allocTrie();
+        
+        System.out.println(args[i]);
+        in = new LineNumberReader(new BufferedReader(new InputStreamReader(
+            new FileInputStream(args[i]), charset)));
+        for (String line = in.readLine(); line != null; line = in.readLine()) {
+          try {
+            line = line.toLowerCase();
+            StringTokenizer st = new StringTokenizer(line);
+            String stem = st.nextToken();
+            if (storeorig) {
+              trie.add(stem, "-a");
+              words++;
+            }
+            while (st.hasMoreTokens()) {
+              String token = st.nextToken();
+              if (token.equals(stem) == false) {
+                trie.add(token, diff.exec(token, stem));
+                words++;
+              }
+            }
+          } catch (java.util.NoSuchElementException x) {
+            // no base token (stem) on a line
+          }
+        }
+        
+        Optimizer o = new Optimizer();
+        Optimizer2 o2 = new Optimizer2();
+        Lift l = new Lift(true);
+        Lift e = new Lift(false);
+        Gener g = new Gener();
+        
+        for (int j = 0; j < optimizer.length; j++) {
+          String prefix;
+          switch (optimizer[j]) {
+            case 'G':
+              trie = trie.reduce(g);
+              prefix = "G: ";
+              break;
+            case 'L':
+              trie = trie.reduce(l);
+              prefix = "L: ";
+              break;
+            case 'E':
+              trie = trie.reduce(e);
+              prefix = "E: ";
+              break;
+            case '2':
+              trie = trie.reduce(o2);
+              prefix = "2: ";
+              break;
+            case '1':
+              trie = trie.reduce(o);
+              prefix = "1: ";
+              break;
+            default:
+              continue;
+          }
+          trie.printInfo(prefix + " ");
+        }
+               
+        DataOutputStream os = new DataOutputStream(new BufferedOutputStream(
+            new FileOutputStream(args[i] + ".out")));
+        os.writeUTF(args[0]);
+        trie.store(os);
+        os.close();
+        
+      } catch (FileNotFoundException x) {
+        x.printStackTrace();
+      } catch (IOException x) {
+        x.printStackTrace();
+      }
+    }
+  }
+  
+  static void allocTrie() {
+    if (multi) {
+      trie = new MultiTrie2(!backward);
+    } else {
+      trie = new Trie(!backward);
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Compile.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java	(revision 0)
@@ -0,0 +1,295 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+/**
+ * The Diff object generates a patch string.
+ * <p>
+ * A patch string is actually a command to a stemmer telling it how to reduce a
+ * word to its root. For example, to reduce the word teacher to its root teach
+ * the patch string Db would be generated. This command tells the stemmer to
+ * delete the last 2 characters from the word teacher to reach the stem (the
+ * patch commands are applied starting from the last character in order to save
+ */
+public class Diff {
+  int sizex = 0;
+  int sizey = 0;
+  int net[][];
+  int way[][];
+  
+  int INSERT;
+  int DELETE;
+  int REPLACE;
+  int NOOP;
+  
+  /**
+   * Constructor for the Diff object.
+   */
+  public Diff() {
+    this(1, 1, 1, 0);
+  }
+  
+  /**
+   * Constructor for the Diff object
+   * 
+   * @param ins Description of the Parameter
+   * @param del Description of the Parameter
+   * @param rep Description of the Parameter
+   * @param noop Description of the Parameter
+   */
+  public Diff(int ins, int del, int rep, int noop) {
+    INSERT = ins;
+    DELETE = del;
+    REPLACE = rep;
+    NOOP = noop;
+  }
+  
+  /**
+   * Apply the given patch string <tt>diff</tt> to the given string <tt>
+   * dest</tt>.
+   * 
+   * @param dest Destination string
+   * @param diff Patch string
+   */
+  public static void apply(StringBuilder dest, CharSequence diff) {
+    try {
+      
+      if (diff == null) {
+        return;
+      }
+
+      int pos = dest.length() - 1;
+      if (pos < 0) {
+        return;
+      }
+      // orig == ""
+      for (int i = 0; i < diff.length() / 2; i++) {
+        char cmd = diff.charAt(2 * i);
+        char param = diff.charAt(2 * i + 1);
+        int par_num = (param - 'a' + 1);
+        switch (cmd) {
+          case '-':
+            pos = pos - par_num + 1;
+            break;
+          case 'R':
+            dest.setCharAt(pos, param);
+            break;
+          case 'D':
+            int o = pos;
+            pos -= par_num - 1;
+            /*
+             * delete par_num chars from index pos
+             */
+            // String s = orig.toString();
+            // s = s.substring( 0, pos ) + s.substring( o + 1 );
+            // orig = new StringBuffer( s );
+            dest.delete(pos, o + 1);        
+            break;
+          case 'I':
+            dest.insert(pos += 1, param);
+            break;
+        }
+        pos--;
+      }
+    } catch (StringIndexOutOfBoundsException x) {
+      // x.printStackTrace();
+    } catch (ArrayIndexOutOfBoundsException x) {
+      // x.printStackTrace();
+    }
+  }
+  
+  /**
+   * Construct a patch string that transforms a to b.
+   * 
+   * @param a String 1st string
+   * @param b String 2nd string
+   * @return String
+   */
+  public synchronized String exec(String a, String b) {
+    if (a == null || b == null) {
+      return null;
+    }
+    
+    int x;
+    int y;
+    int maxx;
+    int maxy;
+    int go[] = new int[4];
+    final int X = 1;
+    final int Y = 2;
+    final int R = 3;
+    final int D = 0;
+    
+    /*
+     * setup memory if needed => processing speed up
+     */
+    maxx = a.length() + 1;
+    maxy = b.length() + 1;
+    if ((maxx >= sizex) || (maxy >= sizey)) {
+      sizex = maxx + 8;
+      sizey = maxy + 8;
+      net = new int[sizex][sizey];
+      way = new int[sizex][sizey];
+    }
+    
+    /*
+     * clear the network
+     */
+    for (x = 0; x < maxx; x++) {
+      for (y = 0; y < maxy; y++) {
+        net[x][y] = 0;
+      }
+    }
+    
+    /*
+     * set known persistent values
+     */
+    for (x = 1; x < maxx; x++) {
+      net[x][0] = x;
+      way[x][0] = X;
+    }
+    for (y = 1; y < maxy; y++) {
+      net[0][y] = y;
+      way[0][y] = Y;
+    }
+    
+    for (x = 1; x < maxx; x++) {
+      for (y = 1; y < maxy; y++) {
+        go[X] = net[x - 1][y] + DELETE;
+        // way on x costs 1 unit
+        go[Y] = net[x][y - 1] + INSERT;
+        // way on y costs 1 unit
+        go[R] = net[x - 1][y - 1] + REPLACE;
+        go[D] = net[x - 1][y - 1]
+            + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100);
+        // diagonal costs 0, when no change
+        short min = D;
+        if (go[min] >= go[X]) {
+          min = X;
+        }
+        if (go[min] > go[Y]) {
+          min = Y;
+        }
+        if (go[min] > go[R]) {
+          min = R;
+        }
+        way[x][y] = min;
+        net[x][y] = (short) go[min];
+      }
+    }
+    
+    // read the patch string
+    StringBuffer result = new StringBuffer();
+    final char base = 'a' - 1;
+    char deletes = base;
+    char equals = base;
+    for (x = maxx - 1, y = maxy - 1; x + y != 0;) {
+      switch (way[x][y]) {
+        case X:
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          deletes++;
+          x--;
+          break;
+        // delete
+        case Y:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          result.append('I');
+          result.append(b.charAt(--y));
+          break;
+        // insert
+        case R:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          result.append('R');
+          result.append(b.charAt(--y));
+          x--;
+          break;
+        // replace
+        case D:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          equals++;
+          x--;
+          y--;
+          break;
+        // no change
+      }
+    }
+    if (deletes != base) {
+      result.append("D" + (deletes));
+      deletes = base;
+    }
+    
+    return result.toString();
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Diff.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java	(revision 0)
@@ -0,0 +1,121 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.StringTokenizer;
+
+/**
+ * The DiffIt class is a means generate patch commands from an already prepared
+ * stemmer table.
+ */
+public class DiffIt {
+  
+  static int get(int i, String s) {
+    try {
+      return Integer.parseInt(s.substring(i, i + 1));
+    } catch (Throwable x) {
+      return 1;
+    }
+  }
+  
+  /**
+   * Entry point to the DiffIt application.
+   * <p>
+   * This application takes one argument, the path to a file containing a
+   * stemmer table. The program reads the file and generates the patch commands
+   * for the stems.
+   * 
+   * @param args the path to a file containing a stemmer table
+   */
+  public static void main(java.lang.String[] args) {
+    
+    int ins = get(0, args[0]);
+    int del = get(1, args[0]);
+    int rep = get(2, args[0]);
+    int nop = get(3, args[0]);
+    
+    for (int i = 1; i < args.length; i++) {
+      LineNumberReader in;
+      // System.out.println("[" + args[i] + "]");
+      Diff diff = new Diff(ins, del, rep, nop);
+      try {
+        in = new LineNumberReader(new BufferedReader(new FileReader(args[i])));
+        for (String line = in.readLine(); line != null; line = in.readLine()) {
+          try {
+            line = line.toLowerCase();
+            StringTokenizer st = new StringTokenizer(line);
+            String stem = st.nextToken();
+            System.out.println(stem + " -a");
+            while (st.hasMoreTokens()) {
+              String token = st.nextToken();
+              if (token.equals(stem) == false) {
+                System.out.println(stem + " " + diff.exec(token, stem));
+              }
+            }
+          } catch (java.util.NoSuchElementException x) {
+            // no base token (stem) on a line
+          }
+        }
+        
+      } catch (IOException x) {
+        x.printStackTrace();
+      }
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\DiffIt.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java	(revision 0)
@@ -0,0 +1,132 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Gener object helps in the discarding of nodes which break the reduction
+ * effort and defend the structure against large reductions.
+ */
+public class Gener extends Reduce {
+  /**
+   * Constructor for the Gener object.
+   */
+  public Gener() {}
+  
+  /**
+   * Return a Trie with infrequent values occurring in the given Trie removed.
+   * 
+   * @param orig the Trie to optimize
+   * @return a new optimized Trie
+   */
+  @Override
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    Arrays.fill(remap, 1);
+    for (int j = orows.size() - 1; j >= 0; j--) {
+      if (eat(orows.get(j), remap)) {
+        remap[j] = 0;
+      }
+    }
+    
+    Arrays.fill(remap, -1);
+    rows = removeGaps(orig.root, orows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[orig.root], cmds, rows);
+  }
+  
+  /**
+   * Test whether the given Row of Cells in a Trie should be included in an
+   * optimized Trie.
+   * 
+   * @param in the Row to test
+   * @param remap Description of the Parameter
+   * @return <tt>true</tt> if the Row should remain, <tt>false
+     *      </tt> otherwise
+   */
+  public boolean eat(Row in, int remap[]) {
+    int sum = 0;
+    for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext();) {
+      Cell c = i.next();
+      sum += c.cnt;
+      if (c.ref >= 0) {
+        if (remap[c.ref] == 0) {
+          c.ref = -1;
+        }
+      }
+    }
+    int frame = sum / 10;
+    boolean live = false;
+    for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext();) {
+      Cell c = i.next();
+      if (c.cnt < frame && c.cmd >= 0) {
+        c.cnt = 0;
+        c.cmd = -1;
+      }
+      if (c.cmd >= 0 || c.ref >= 0) {
+        live |= true;
+      }
+    }
+    return !live;
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Gener.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java	(revision 0)
@@ -0,0 +1,147 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Lift class is a data structure that is a variation of a Patricia trie.
+ * <p>
+ * Lift's <i>raison d'etre</i> is to implement reduction of the trie via the
+ * Lift-Up method., which makes the data structure less liable to overstemming.
+ */
+public class Lift extends Reduce {
+  boolean changeSkip;
+  
+  /**
+   * Constructor for the Lift object.
+   * 
+   * @param changeSkip when set to <tt>true</tt>, comparison of two Cells takes
+   *          a skip command into account
+   */
+  public Lift(boolean changeSkip) {
+    this.changeSkip = changeSkip;
+  }
+  
+  /**
+   * Optimize (eliminate rows with no content) the given Trie and return the
+   * reduced Trie.
+   * 
+   * @param orig the Trie to optimized
+   * @return the reduced Trie
+   */
+  @Override
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    for (int j = orows.size() - 1; j >= 0; j--) {
+      liftUp(orows.get(j), orows);
+    }
+    
+    Arrays.fill(remap, -1);
+    rows = removeGaps(orig.root, orows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[orig.root], cmds, rows);
+  }
+  
+  /**
+   * Reduce the trie using Lift-Up reduction.
+   * <p>
+   * The Lift-Up reduction propagates all leaf-values (patch commands), where
+   * possible, to higher levels which are closer to the root of the trie.
+   * 
+   * @param in the Row to consider when optimizing
+   * @param nodes contains the patch commands
+   */
+  public void liftUp(Row in, List<Row> nodes) {
+    Iterator<Cell> i = in.cells.values().iterator();
+    for (; i.hasNext();) {
+      Cell c = i.next();
+      if (c.ref >= 0) {
+        Row to = nodes.get(c.ref);
+        int sum = to.uniformCmd(changeSkip);
+        if (sum >= 0) {
+          if (sum == c.cmd) {
+            if (changeSkip) {
+              if (c.skip != to.uniformSkip + 1) {
+                continue;
+              }
+              c.skip = to.uniformSkip + 1;
+            } else {
+              c.skip = 0;
+            }
+            c.cnt += to.uniformCnt;
+            c.ref = -1;
+          } else if (c.cmd < 0) {
+            c.cnt = to.uniformCnt;
+            c.cmd = sum;
+            c.ref = -1;
+            if (changeSkip) {
+              c.skip = to.uniformSkip + 1;
+            } else {
+              c.skip = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Lift.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java	(revision 0)
@@ -0,0 +1,208 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The MultiTrie is a Trie of Tries. It stores words and their associated patch
+ * commands. The MultiTrie handles patch commmands individually (each command by
+ * itself).
+ */
+public class MultiTrie extends Trie {
+  final char EOM = '*';
+  final String EOM_NODE = "" + EOM;
+  
+  List<Trie> tries = new ArrayList<Trie>();
+  
+  int BY = 1;
+  
+  /**
+   * Constructor for the MultiTrie object.
+   * 
+   * @param is the input stream
+   * @exception IOException if an I/O error occurs
+   */
+  public MultiTrie(DataInput is) throws IOException {
+    super(false);
+    forward = is.readBoolean();
+    BY = is.readInt();
+    for (int i = is.readInt(); i > 0; i--) {
+      tries.add(new Trie(is));
+    }
+  }
+  
+  /**
+   * Constructor for the MultiTrie object
+   * 
+   * @param forward set to <tt>true</tt> if the elements should be read left to
+   *          right
+   */
+  public MultiTrie(boolean forward) {
+    super(forward);
+  }
+  
+  /**
+   * Return the element that is stored in a cell associated with the given key.
+   * 
+   * @param key the key to the cell holding the desired element
+   * @return the element
+   */
+  @Override
+  public CharSequence getFully(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    for (int i = 0; i < tries.size(); i++) {
+      CharSequence r = tries.get(i).getFully(key);
+      if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+        return result;
+      }
+      result.append(r);
+    }
+    return result;
+  }
+  
+  /**
+   * Return the element that is stored as last on a path belonging to the given
+   * key.
+   * 
+   * @param key the key associated with the desired element
+   * @return the element that is stored as last on a path
+   */
+  @Override
+  public CharSequence getLastOnPath(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    for (int i = 0; i < tries.size(); i++) {
+      CharSequence r = tries.get(i).getLastOnPath(key);
+      if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+        return result;
+      }
+      result.append(r);
+    }
+    return result;
+  }
+  
+  /**
+   * Write this data structure to the given output stream.
+   * 
+   * @param os the output stream
+   * @exception IOException if an I/O error occurs
+   */
+  @Override
+  public void store(DataOutput os) throws IOException {
+    os.writeBoolean(forward);
+    os.writeInt(BY);
+    os.writeInt(tries.size());
+    for (Trie trie : tries)
+      trie.store(os);
+  }
+  
+  /**
+   * Add an element to this structure consisting of the given key and patch
+   * command. 
+   * <p>
+   * This method will return without executing if the <tt>cmd</tt>
+   * parameter's length is 0.
+   * 
+   * @param key the key
+   * @param cmd the patch command
+   */
+  @Override
+  public void add(CharSequence key, CharSequence cmd) {
+    if (cmd.length() == 0) {
+      return;
+    }
+    int levels = cmd.length() / BY;
+    while (levels >= tries.size()) {
+      tries.add(new Trie(forward));
+    }
+    for (int i = 0; i < levels; i++) {
+      tries.get(i).add(key, cmd.subSequence(BY * i, BY * i + BY));
+    }
+    tries.get(levels).add(key, EOM_NODE);
+  }
+  
+  /**
+   * Remove empty rows from the given Trie and return the newly reduced Trie.
+   * 
+   * @param by the Trie to reduce
+   * @return the newly reduced Trie
+   */
+  @Override
+  public Trie reduce(Reduce by) {
+    List<Trie> h = new ArrayList<Trie>();
+    for (Trie trie : tries)
+      h.add(trie.reduce(by));
+    
+    MultiTrie m = new MultiTrie(forward);
+    m.tries = h;
+    return m;
+  }
+  
+  /**
+   * Print the given prefix and the position(s) in the Trie where it appears.
+   * 
+   * @param prefix the desired prefix
+   */
+  @Override
+  public void printInfo(CharSequence prefix) {
+    int c = 0;
+    for (Trie trie : tries)
+      trie.printInfo(prefix + "[" + (++c) + "] ");
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\MultiTrie.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie2.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie2.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie2.java	(revision 0)
@@ -0,0 +1,333 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The MultiTrie is a Trie of Tries.
+ * <p>
+ * It stores words and their associated patch commands. The MultiTrie handles
+ * patch commmands broken into their constituent parts, as a MultiTrie does, but
+ * the commands are delimited by the skip command.
+ */
+public class MultiTrie2 extends MultiTrie {
+  /**
+   * Constructor for the MultiTrie object.
+   * 
+   * @param is the input stream
+   * @exception IOException if an I/O error occurs
+   */
+  public MultiTrie2(DataInput is) throws IOException {
+    super(is);
+  }
+  
+  /**
+   * Constructor for the MultiTrie2 object
+   * 
+   * @param forward set to <tt>true</tt> if the elements should be read left to
+   *          right
+   */
+  public MultiTrie2(boolean forward) {
+    super(forward);
+  }
+  
+  /**
+   * Return the element that is stored in a cell associated with the given key.
+   * 
+   * @param key the key to the cell holding the desired element
+   * @return the element
+   */
+  @Override
+  public CharSequence getFully(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    try {
+      CharSequence lastkey = key;
+      CharSequence p[] = new CharSequence[tries.size()];
+      char lastch = ' ';
+      for (int i = 0; i < tries.size(); i++) {
+        CharSequence r = tries.get(i).getFully(lastkey);
+        if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+          return result;
+        }
+        if (cannotFollow(lastch, r.charAt(0))) {
+          return result;
+        } else {
+          lastch = r.charAt(r.length() - 2);
+        }
+        // key=key.substring(lengthPP(r));
+        p[i] = r;
+        if (p[i].charAt(0) == '-') {
+          if (i > 0) {
+            key = skip(key, lengthPP(p[i - 1]));
+          }
+          key = skip(key, lengthPP(p[i]));
+        }
+        // key = skip(key, lengthPP(r));
+        result.append(r);
+        if (key.length() != 0) {
+          lastkey = key;
+        }
+      }
+    } catch (IndexOutOfBoundsException x) {}
+    return result;
+  }
+  
+  /**
+   * Return the element that is stored as last on a path belonging to the given
+   * key.
+   * 
+   * @param key the key associated with the desired element
+   * @return the element that is stored as last on a path
+   */
+  @Override
+  public CharSequence getLastOnPath(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    try {
+      CharSequence lastkey = key;
+      CharSequence p[] = new CharSequence[tries.size()];
+      char lastch = ' ';
+      for (int i = 0; i < tries.size(); i++) {
+        CharSequence r = tries.get(i).getLastOnPath(lastkey);
+        if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+          return result;
+        }
+        // System.err.println("LP:"+key+" last:"+lastch+" new:"+r);
+        if (cannotFollow(lastch, r.charAt(0))) {
+          return result;
+        } else {
+          lastch = r.charAt(r.length() - 2);
+        }
+        // key=key.substring(lengthPP(r));
+        p[i] = r;
+        if (p[i].charAt(0) == '-') {
+          if (i > 0) {
+            key = skip(key, lengthPP(p[i - 1]));
+          }
+          key = skip(key, lengthPP(p[i]));
+        }
+        // key = skip(key, lengthPP(r));
+        result.append(r);
+        if (key.length() != 0) {
+          lastkey = key;
+        }
+      }
+    } catch (IndexOutOfBoundsException x) {}
+    return result;
+  }
+  
+  /**
+   * Write this data structure to the given output stream.
+   * 
+   * @param os the output stream
+   * @exception IOException if an I/O error occurs
+   */
+  @Override
+  public void store(DataOutput os) throws IOException {
+    super.store(os);
+  }
+  
+  /**
+   * Add an element to this structure consisting of the given key and patch
+   * command. 
+   * <p>
+   * This method will return without executing if the <tt>cmd</tt>
+   * parameter's length is 0.
+   * 
+   * @param key the key
+   * @param cmd the patch command
+   */
+  @Override
+  public void add(CharSequence key, CharSequence cmd) {
+    if (cmd.length() == 0) {
+      return;
+    }
+    // System.err.println( cmd );
+    CharSequence p[] = decompose(cmd);
+    int levels = p.length;
+    // System.err.println("levels "+key+" cmd "+cmd+"|"+levels);
+    while (levels >= tries.size()) {
+      tries.add(new Trie(forward));
+    }
+    CharSequence lastkey = key;
+    for (int i = 0; i < levels; i++) {
+      if (key.length() > 0) {
+        tries.get(i).add(key, p[i]);
+        lastkey = key;
+      } else {
+        tries.get(i).add(lastkey, p[i]);
+      }
+      // System.err.println("-"+key+" "+p[i]+"|"+key.length());
+      /*
+       * key=key.substring(lengthPP(p[i]));
+       */
+      if (p[i].length() > 0 && p[i].charAt(0) == '-') {
+        if (i > 0) {
+          key = skip(key, lengthPP(p[i - 1]));
+        }
+        key = skip(key, lengthPP(p[i]));
+      }
+      // System.err.println("--->"+key);
+    }
+    if (key.length() > 0) {
+      tries.get(levels).add(key, EOM_NODE);
+    } else {
+      tries.get(levels).add(lastkey, EOM_NODE);
+    }
+  }
+  
+  /**
+   * Break the given patch command into its constituent pieces. The pieces are
+   * delimited by NOOP commands.
+   * 
+   * @param cmd the patch command
+   * @return an array containing the pieces of the command
+   */
+  public CharSequence[] decompose(CharSequence cmd) {
+    int parts = 0;
+    
+    for (int i = 0; 0 <= i && i < cmd.length();) {
+      int next = dashEven(cmd, i);
+      if (i == next) {
+        parts++;
+        i = next + 2;
+      } else {
+        parts++;
+        i = next;
+      }
+    }
+    
+    CharSequence part[] = new CharSequence[parts];
+    int x = 0;
+    
+    for (int i = 0; 0 <= i && i < cmd.length();) {
+      int next = dashEven(cmd, i);
+      if (i == next) {
+        part[x++] = cmd.subSequence(i, i + 2);
+        i = next + 2;
+      } else {
+        part[x++] = (next < 0) ? cmd.subSequence(i, cmd.length()) : cmd.subSequence(i, next);
+        i = next;
+      }
+    }
+    return part;
+  }
+  
+  /**
+   * Remove empty rows from the given Trie and return the newly reduced Trie.
+   * 
+   * @param by the Trie to reduce
+   * @return the newly reduced Trie
+   */
+  @Override
+  public Trie reduce(Reduce by) {
+    List<Trie> h = new ArrayList<Trie>();
+    for (Trie trie : tries)
+      h.add(trie.reduce(by));
+
+    MultiTrie2 m = new MultiTrie2(forward);
+    m.tries = h;
+    return m;
+  }
+  
+  private boolean cannotFollow(char after, char goes) {
+    switch (after) {
+      case '-':
+      case 'D':
+        return after == goes;
+    }
+    return false;
+  }
+  
+  private CharSequence skip(CharSequence in, int count) {
+    if (forward) {
+      return in.subSequence(count, in.length());
+    } else {
+      return in.subSequence(0, in.length() - count);
+    }
+  }
+  
+  private int dashEven(CharSequence in, int from) {
+    while (from < in.length()) {
+      if (in.charAt(from) == '-') {
+        return from;
+      } else {
+        from += 2;
+      }
+    }
+    return -1;
+  }
+  
+  private int lengthPP(CharSequence cmd) {
+    int len = 0;
+    for (int i = 0; i < cmd.length(); i++) {
+      switch (cmd.charAt(i++)) {
+        case '-':
+        case 'D':
+          len += cmd.charAt(i) - 'a' + 1;
+          break;
+        case 'R':
+          len++;
+        case 'I':
+          break;
+      }
+    }
+    return len;
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\MultiTrie2.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer.java	(revision 0)
@@ -0,0 +1,198 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Optimizer class is a Trie that will be reduced (have empty rows removed).
+ * <p>
+ * The reduction will be made by joining two rows where the first is a subset of
+ * the second.
+ */
+public class Optimizer extends Reduce {
+  /**
+   * Constructor for the Optimizer object.
+   */
+  public Optimizer() {}
+  
+  /**
+   * Optimize (remove empty rows) from the given Trie and return the resulting
+   * Trie.
+   * 
+   * @param orig the Trie to consolidate
+   * @return the newly consolidated Trie
+   */
+  @Override
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    for (int j = orows.size() - 1; j >= 0; j--) {
+      Row now = new Remap(orows.get(j), remap);
+      boolean merged = false;
+      
+      for (int i = 0; i < rows.size(); i++) {
+        Row q = merge(now, rows.get(i));
+        if (q != null) {
+          rows.set(i, q);
+          merged = true;
+          remap[j] = i;
+          break;
+        }
+      }
+      
+      if (merged == false) {
+        remap[j] = rows.size();
+        rows.add(now);
+      }
+    }
+    
+    int root = remap[orig.root];
+    Arrays.fill(remap, -1);
+    rows = removeGaps(root, rows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[root], cmds, rows);
+  }
+  
+  /**
+   * Merge the given rows and return the resulting Row.
+   * 
+   * @param master the master Row
+   * @param existing the existing Row
+   * @return the resulting Row, or <tt>null</tt> if the operation cannot be
+   *         realized
+   */
+  public Row merge(Row master, Row existing) {
+    Iterator<Character> i = master.cells.keySet().iterator();
+    Row n = new Row();
+    for (; i.hasNext();) {
+      Character ch = i.next();
+      // XXX also must handle Cnt and Skip !!
+      Cell a = master.cells.get(ch);
+      Cell b = existing.cells.get(ch);
+      
+      Cell s = (b == null) ? new Cell(a) : merge(a, b);
+      if (s == null) {
+        return null;
+      }
+      n.cells.put(ch, s);
+    }
+    i = existing.cells.keySet().iterator();
+    for (; i.hasNext();) {
+      Character ch = i.next();
+      if (master.at(ch) != null) {
+        continue;
+      }
+      n.cells.put(ch, existing.at(ch));
+    }
+    return n;
+  }
+  
+  /**
+   * Merge the given Cells and return the resulting Cell.
+   * 
+   * @param m the master Cell
+   * @param e the existing Cell
+   * @return the resulting Cell, or <tt>null</tt> if the operation cannot be
+   *         realized
+   */
+  public Cell merge(Cell m, Cell e) {
+    Cell n = new Cell();
+    
+    if (m.skip != e.skip) {
+      return null;
+    }
+    
+    if (m.cmd >= 0) {
+      if (e.cmd >= 0) {
+        if (m.cmd == e.cmd) {
+          n.cmd = m.cmd;
+        } else {
+          return null;
+        }
+      } else {
+        n.cmd = m.cmd;
+      }
+    } else {
+      n.cmd = e.cmd;
+    }
+    if (m.ref >= 0) {
+      if (e.ref >= 0) {
+        if (m.ref == e.ref) {
+          if (m.skip == e.skip) {
+            n.ref = m.ref;
+          } else {
+            return null;
+          }
+        } else {
+          return null;
+        }
+      } else {
+        n.ref = m.ref;
+      }
+    } else {
+      n.ref = e.ref;
+    }
+    n.cnt = m.cnt + e.cnt;
+    n.skip = m.skip;
+    return n;
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Optimizer.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer2.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer2.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer2.java	(revision 0)
@@ -0,0 +1,90 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+/**
+ * The Optimizer class is a Trie that will be reduced (have empty rows removed).
+ * <p>
+ * This is the result of allowing a joining of rows when there is no collision
+ * between non-<tt>null</tt> values in the rows. Information loss, resulting in
+ * the stemmer not being able to recognize words (as in Optimizer), is
+ * curtailed, allowing the stemmer to recognize words for which the original
+ * trie was built. Use of this class allows the stemmer to be self-teaching.
+ */
+public class Optimizer2 extends Optimizer {
+  /**
+   * Constructor for the Optimizer2 object.
+   */
+  public Optimizer2() {}
+  
+  /**
+   * Merge the given Cells and return the resulting Cell.
+   * 
+   * @param m the master Cell
+   * @param e the existing Cell
+   * @return the resulting Cell, or <tt>null</tt> if the operation cannot be
+   *         realized
+   */
+  @Override
+  public Cell merge(Cell m, Cell e) {
+    if (m.cmd == e.cmd && m.ref == e.ref && m.skip == e.skip) {
+      Cell c = new Cell(m);
+      c.cnt += e.cnt;
+      return c;
+    } else {
+      return null;
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Optimizer2.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Reduce.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Reduce.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Reduce.java	(revision 0)
@@ -0,0 +1,134 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Reduce object is used to remove gaps in a Trie which stores a dictionary.
+ */
+public class Reduce {
+  
+  /**
+   * Constructor for the Reduce object.
+   */
+  public Reduce() {}
+  
+  /**
+   * Optimize (remove holes in the rows) the given Trie and return the
+   * restructured Trie.
+   * 
+   * @param orig the Trie to optimize
+   * @return the restructured Trie
+   */
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    Arrays.fill(remap, -1);
+    rows = removeGaps(orig.root, rows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[orig.root], cmds, rows);
+  }
+  
+  List<Row> removeGaps(int ind, List<Row> old, List<Row> to, int remap[]) {
+    remap[ind] = to.size();
+    
+    Row now = old.get(ind);
+    to.add(now);
+    Iterator<Cell> i = now.cells.values().iterator();
+    for (; i.hasNext();) {
+      Cell c = i.next();
+      if (c.ref >= 0 && remap[c.ref] < 0) {
+        removeGaps(c.ref, old, to, remap);
+      }
+    }
+    to.set(remap[ind], new Remap(now, remap));
+    return to;
+  }
+  
+  /**
+   * This class is part of the Egothor Project
+   */
+  class Remap extends Row {
+    /**
+     * Constructor for the Remap object
+     * 
+     * @param old Description of the Parameter
+     * @param remap Description of the Parameter
+     */
+    public Remap(Row old, int remap[]) {
+      super();
+      Iterator<Character> i = old.cells.keySet().iterator();
+      for (; i.hasNext();) {
+        Character ch = i.next();
+        Cell c = old.at(ch);
+        Cell nc;
+        if (c.ref >= 0) {
+          nc = new Cell(c);
+          nc.ref = remap[nc.ref];
+        } else {
+          nc = new Cell(c);
+        }
+        cells.put(ch, nc);
+      }
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Reduce.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Row.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Row.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Row.java	(revision 0)
@@ -0,0 +1,309 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.TreeMap;
+
+/**
+ * The Row class represents a row in a matrix representation of a trie.
+ */
+public class Row {
+  TreeMap<Character,Cell> cells = new TreeMap<Character,Cell>();
+  int uniformCnt = 0;
+  int uniformSkip = 0;
+  
+  /**
+   * Construct a Row object from input carried in via the given input stream.
+   * 
+   * @param is the input stream
+   * @exception IOException if an I/O error occurs
+   */
+  public Row(DataInput is) throws IOException {
+    for (int i = is.readInt(); i > 0; i--) {
+      char ch = is.readChar();
+      Cell c = new Cell();
+      c.cmd = is.readInt();
+      c.cnt = is.readInt();
+      c.ref = is.readInt();
+      c.skip = is.readInt();
+      cells.put(ch, c);
+    }
+  }
+  
+  /**
+   * The default constructor for the Row object.
+   */
+  public Row() {}
+  
+  /**
+   * Construct a Row using the cells of the given Row.
+   * 
+   * @param old the Row to copy
+   */
+  public Row(Row old) {
+    cells = old.cells;
+  }
+  
+  /**
+   * Set the command in the Cell of the given Character to the given integer.
+   * 
+   * @param way the Character defining the Cell
+   * @param cmd the new command
+   */
+  public void setCmd(Character way, int cmd) {
+    Cell c = at(way);
+    if (c == null) {
+      c = new Cell();
+      c.cmd = cmd;
+      cells.put(way, c);
+    } else {
+      c.cmd = cmd;
+    }
+    c.cnt = (cmd >= 0) ? 1 : 0;
+  }
+  
+  /**
+   * Set the reference to the next row in the Cell of the given Character to the
+   * given integer.
+   * 
+   * @param way the Character defining the Cell
+   * @param ref The new ref value
+   */
+  public void setRef(Character way, int ref) {
+    Cell c = at(way);
+    if (c == null) {
+      c = new Cell();
+      c.ref = ref;
+      cells.put(way, c);
+    } else {
+      c.ref = ref;
+    }
+  }
+  
+  /**
+   * Return the number of cells in use.
+   * 
+   * @return the number of cells in use
+   */
+  public int getCells() {
+    Iterator<Character> i = cells.keySet().iterator();
+    int size = 0;
+    for (; i.hasNext();) {
+      Character c = i.next();
+      Cell e = at(c);
+      if (e.cmd >= 0 || e.ref >= 0) {
+        size++;
+      }
+    }
+    return size;
+  }
+  
+  /**
+   * Return the number of references (how many transitions) to other rows.
+   * 
+   * @return the number of references
+   */
+  public int getCellsPnt() {
+    Iterator<Character> i = cells.keySet().iterator();
+    int size = 0;
+    for (; i.hasNext();) {
+      Character c = i.next();
+      Cell e = at(c);
+      if (e.ref >= 0) {
+        size++;
+      }
+    }
+    return size;
+  }
+  
+  /**
+   * Return the number of patch commands saved in this Row.
+   * 
+   * @return the number of patch commands
+   */
+  public int getCellsVal() {
+    Iterator<Character> i = cells.keySet().iterator();
+    int size = 0;
+    for (; i.hasNext();) {
+      Character c = i.next();
+      Cell e = at(c);
+      if (e.cmd >= 0) {
+        size++;
+      }
+    }
+    return size;
+  }
+  
+  /**
+   * Return the command in the Cell associated with the given Character.
+   * 
+   * @param way the Character associated with the Cell holding the desired
+   *          command
+   * @return the command
+   */
+  public int getCmd(Character way) {
+    Cell c = at(way);
+    return (c == null) ? -1 : c.cmd;
+  }
+  
+  /**
+   * Return the number of patch commands were in the Cell associated with the
+   * given Character before the Trie containing this Row was reduced.
+   * 
+   * @param way the Character associated with the desired Cell
+   * @return the number of patch commands before reduction
+   */
+  public int getCnt(Character way) {
+    Cell c = at(way);
+    return (c == null) ? -1 : c.cnt;
+  }
+  
+  /**
+   * Return the reference to the next Row in the Cell associated with the given
+   * Character.
+   * 
+   * @param way the Character associated with the desired Cell
+   * @return the reference, or -1 if the Cell is <tt>null,/tt>
+   */
+  public int getRef(Character way) {
+    Cell c = at(way);
+    return (c == null) ? -1 : c.ref;
+  }
+  
+  /**
+   * Write the contents of this Row to the given output stream.
+   * 
+   * @param os the output stream
+   * @exception IOException if an I/O error occurs
+   */
+  public void store(DataOutput os) throws IOException {
+    os.writeInt(cells.size());
+    Iterator<Character> i = cells.keySet().iterator();
+    for (; i.hasNext();) {
+      Character c = i.next();
+      Cell e = at(c);
+      if (e.cmd < 0 && e.ref < 0) {
+        continue;
+      }
+      
+      os.writeChar(c.charValue());
+      os.writeInt(e.cmd);
+      os.writeInt(e.cnt);
+      os.writeInt(e.ref);
+      os.writeInt(e.skip);
+    }
+  }
+  
+  /**
+   * Return the number of identical Cells (containing patch commands) in this
+   * Row.
+   * 
+   * @param eqSkip when set to <tt>false</tt> the removed patch commands are
+   *          considered
+   * @return the number of identical Cells, or -1 if there are (at least) two
+   *         different cells
+   */
+  public int uniformCmd(boolean eqSkip) {
+    Iterator<Cell> i = cells.values().iterator();
+    int ret = -1;
+    uniformCnt = 1;
+    uniformSkip = 0;
+    for (; i.hasNext();) {
+      Cell c = i.next();
+      if (c.ref >= 0) {
+        return -1;
+      }
+      if (c.cmd >= 0) {
+        if (ret < 0) {
+          ret = c.cmd;
+          uniformSkip = c.skip;
+        } else if (ret == c.cmd) {
+          if (eqSkip) {
+            if (uniformSkip == c.skip) {
+              uniformCnt++;
+            } else {
+              return -1;
+            }
+          } else {
+            uniformCnt++;
+          }
+        } else {
+          return -1;
+        }
+      }
+    }
+    return ret;
+  }
+  
+  /**
+   * Write the contents of this Row to stdout.
+   */
+  public void print() {
+    for (Iterator<Character> i = cells.keySet().iterator(); i.hasNext();) {
+      Character ch = i.next();
+      Cell c = at(ch);
+      System.out.print("[" + ch + ":" + c + "]");
+    }
+    System.out.println();
+  }
+  
+  Cell at(Character index) {
+    return cells.get(index);
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Row.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/java/org/egothor/stemmer/Trie.java
===================================================================
--- contrib/analyzers/stempel/src/java/org/egothor/stemmer/Trie.java	(revision 0)
+++ contrib/analyzers/stempel/src/java/org/egothor/stemmer/Trie.java	(revision 0)
@@ -0,0 +1,419 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A Trie is used to store a dictionary of words and their stems.
+ * <p>
+ * Actually, what is stored are words with their respective patch commands. A
+ * trie can be termed forward (keys read from left to right) or backward (keys
+ * read from right to left). This property will vary depending on the language
+ * for which a Trie is constructed.
+ */
+public class Trie {
+  List<Row> rows = new ArrayList<Row>();
+  List<CharSequence> cmds = new ArrayList<CharSequence>();
+  int root;
+  
+  boolean forward = false;
+  
+  /**
+   * Constructor for the Trie object.
+   * 
+   * @param is the input stream
+   * @exception IOException if an I/O error occurs
+   */
+  public Trie(DataInput is) throws IOException {
+    forward = is.readBoolean();
+    root = is.readInt();
+    for (int i = is.readInt(); i > 0; i--) {
+      cmds.add(is.readUTF());
+    }
+    for (int i = is.readInt(); i > 0; i--) {
+      rows.add(new Row(is));
+    }
+  }
+  
+  /**
+   * Constructor for the Trie object.
+   * 
+   * @param forward set to <tt>true</tt>
+   */
+  public Trie(boolean forward) {
+    rows.add(new Row());
+    root = 0;
+    this.forward = forward;
+  }
+  
+  /**
+   * Constructor for the Trie object.
+   * 
+   * @param forward <tt>true</tt> if read left to right, <tt>false</tt> if read
+   *          right to left
+   * @param root index of the row that is the root node
+   * @param cmds the patch commands to store
+   * @param rows a Vector of Vectors. Each inner Vector is a node of this Trie
+   */
+  public Trie(boolean forward, int root, List<CharSequence> cmds, List<Row> rows) {
+    this.rows = rows;
+    this.cmds = cmds;
+    this.root = root;
+    this.forward = forward;
+  }
+  
+  /**
+   * Gets the all attribute of the Trie object
+   * 
+   * @param key Description of the Parameter
+   * @return The all value
+   */
+  public CharSequence[] getAll(CharSequence key) {
+    int res[] = new int[key.length()];
+    int resc = 0;
+    Row now = getRow(root);
+    int w;
+    StrEnum e = new StrEnum(key, forward);
+    boolean br = false;
+    
+    for (int i = 0; i < key.length() - 1; i++) {
+      Character ch = new Character(e.next());
+      w = now.getCmd(ch);
+      if (w >= 0) {
+        int n = w;
+        for (int j = 0; j < resc; j++) {
+          if (n == res[j]) {
+            n = -1;
+            break;
+          }
+        }
+        if (n >= 0) {
+          res[resc++] = n;
+        }
+      }
+      w = now.getRef(ch);
+      if (w >= 0) {
+        now = getRow(w);
+      } else {
+        br = true;
+        break;
+      }
+    }
+    if (br == false) {
+      w = now.getCmd(new Character(e.next()));
+      if (w >= 0) {
+        int n = w;
+        for (int j = 0; j < resc; j++) {
+          if (n == res[j]) {
+            n = -1;
+            break;
+          }
+        }
+        if (n >= 0) {
+          res[resc++] = n;
+        }
+      }
+    }
+    
+    if (resc < 1) {
+      return null;
+    }
+    CharSequence R[] = new CharSequence[resc];
+    for (int j = 0; j < resc; j++) {
+      R[j] = cmds.get(res[j]);
+    }
+    return R;
+  }
+  
+  /**
+   * Return the number of cells in this Trie object.
+   * 
+   * @return the number of cells
+   */
+  public int getCells() {
+    int size = 0;
+    for (Row row : rows)
+      size += row.getCells();
+    return size;
+  }
+  
+  /**
+   * Gets the cellsPnt attribute of the Trie object
+   * 
+   * @return The cellsPnt value
+   */
+  public int getCellsPnt() {
+    int size = 0;
+    for (Row row : rows)
+      size += row.getCellsPnt();
+    return size;
+  }
+  
+  /**
+   * Gets the cellsVal attribute of the Trie object
+   * 
+   * @return The cellsVal value
+   */
+  public int getCellsVal() {
+    int size = 0;
+    for (Row row : rows)
+      size += row.getCellsVal();
+    return size;
+  }
+  
+  /**
+   * Return the element that is stored in a cell associated with the given key.
+   * 
+   * @param key the key
+   * @return the associated element
+   */
+  public CharSequence getFully(CharSequence key) {
+    Row now = getRow(root);
+    int w;
+    Cell c;
+    int cmd = -1;
+    StrEnum e = new StrEnum(key, forward);
+    Character ch = null;
+    Character aux = null;
+    
+    for (int i = 0; i < key.length();) {
+      ch = new Character(e.next());
+      i++;
+      
+      c = now.at(ch);
+      if (c == null) {
+        return null;
+      }
+      
+      cmd = c.cmd;
+      
+      for (int skip = c.skip; skip > 0; skip--) {
+        if (i < key.length()) {
+          aux = new Character(e.next());
+        } else {
+          return null;
+        }
+        i++;
+      }
+      
+      w = now.getRef(ch);
+      if (w >= 0) {
+        now = getRow(w);
+      } else if (i < key.length()) {
+        return null;
+      }
+    }
+    return (cmd == -1) ? null : cmds.get(cmd);
+  }
+  
+  /**
+   * Return the element that is stored as last on a path associated with the
+   * given key.
+   * 
+   * @param key the key associated with the desired element
+   * @return the last on path element
+   */
+  public CharSequence getLastOnPath(CharSequence key) {
+    Row now = getRow(root);
+    int w;
+    CharSequence last = null;
+    StrEnum e = new StrEnum(key, forward);
+    
+    for (int i = 0; i < key.length() - 1; i++) {
+      Character ch = new Character(e.next());
+      w = now.getCmd(ch);
+      if (w >= 0) {
+        last = cmds.get(w);
+      }
+      w = now.getRef(ch);
+      if (w >= 0) {
+        now = getRow(w);
+      } else {
+        return last;
+      }
+    }
+    w = now.getCmd(new Character(e.next()));
+    return (w >= 0) ? cmds.get(w) : last;
+  }
+  
+  /**
+   * Return the Row at the given index.
+   * 
+   * @param index the index containing the desired Row
+   * @return the Row
+   */
+  private Row getRow(int index) {
+    if (index < 0 || index >= rows.size()) {
+      return null;
+    }
+    return rows.get(index);
+  }
+  
+  /**
+   * Write this Trie to the given output stream.
+   * 
+   * @param os the output stream
+   * @exception IOException if an I/O error occurs
+   */
+  public void store(DataOutput os) throws IOException {
+    os.writeBoolean(forward);
+    os.writeInt(root);
+    os.writeInt(cmds.size());
+    for (CharSequence cmd : cmds)
+      os.writeUTF(cmd.toString());
+    
+    os.writeInt(rows.size());
+    for (Row row : rows)
+      row.store(os);
+  }
+  
+  /**
+   * Add the given key associated with the given patch command. If either
+   * parameter is null this method will return without executing.
+   * 
+   * @param key the key
+   * @param cmd the patch command
+   */
+  public void add(CharSequence key, CharSequence cmd) {
+    if (key == null || cmd == null) {
+      return;
+    }
+    if (cmd.length() == 0) {
+      return;
+    }
+    int id_cmd = cmds.indexOf(cmd);
+    if (id_cmd == -1) {
+      id_cmd = cmds.size();
+      cmds.add(cmd);
+    }
+    
+    int node = root;
+    Row r = getRow(node);
+    
+    StrEnum e = new StrEnum(key, forward);
+    
+    for (int i = 0; i < e.length() - 1; i++) {
+      Character ch = new Character(e.next());
+      node = r.getRef(ch);
+      if (node >= 0) {
+        r = getRow(node);
+      } else {
+        node = rows.size();
+        Row n;
+        rows.add(n = new Row());
+        r.setRef(ch, node);
+        r = n;
+      }
+    }
+    r.setCmd(new Character(e.next()), id_cmd);
+  }
+  
+  /**
+   * Remove empty rows from the given Trie and return the newly reduced Trie.
+   * 
+   * @param by the Trie to reduce
+   * @return the newly reduced Trie
+   */
+  public Trie reduce(Reduce by) {
+    return by.optimize(this);
+  }
+  
+  public void printInfo(CharSequence prefix) {
+    System.out.println(prefix + "nds " + rows.size() + " cmds " + cmds.size()
+        + " cells " + getCells() + " valcells " + getCellsVal() + " pntcells "
+        + getCellsPnt());
+  }
+  
+  /**
+   * This class is part of the Egothor Project
+   */
+  class StrEnum {
+    CharSequence s;
+    int from;
+    int by;
+    
+    /**
+     * Constructor for the StrEnum object
+     * 
+     * @param s Description of the Parameter
+     * @param up Description of the Parameter
+     */
+    StrEnum(CharSequence s, boolean up) {
+      this.s = s;
+      if (up) {
+        from = 0;
+        by = 1;
+      } else {
+        from = s.length() - 1;
+        by = -1;
+      }
+    }
+    
+    int length() {
+      return s.length();
+    }
+    
+    char next() {
+      char ch = s.charAt(from);
+      from += by;
+      return ch;
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\java\org\egothor\stemmer\Trie.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stemmer_20000.tbl
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream

Property changes on: contrib\analyzers\stempel\src\resources\org\apache\lucene\analysis\pl\stemmer_20000.tbl
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Index: contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt
===================================================================
--- contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt	(revision 0)
+++ contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt	(revision 0)
@@ -0,0 +1,183 @@
+# from carrot2 project: trunk/core/carrot2-util-text/src-resources/stopwords.pl
+vol
+o.o.
+mgr
+godz
+zł
+www
+pl
+ul
+tel
+hab
+prof
+inż
+dr
+i
+u
+aby
+albo
+ale
+ani
+aż
+bardzo
+bez
+bo
+bowiem
+by
+byli
+bym
+był
+była
+było
+były
+być
+będzie
+będą
+chce
+choć
+co
+coraz
+coś
+czy
+czyli
+często
+dla
+do
+gdy
+gdyby
+gdyż
+gdzie
+go
+ich
+im
+inne
+iż
+ja
+jak
+jakie
+jako
+je
+jednak
+jednym
+jedynie
+jego
+jej
+jest
+jeszcze
+jeśli
+jeżeli
+już
+ją
+kiedy
+kilku
+kto
+która
+które
+którego
+której
+który
+których
+którym
+którzy
+lat
+lecz
+lub
+ma
+mają
+mamy
+mi
+miał
+mimo
+mnie
+mogą
+może
+można
+mu
+musi
+na
+nad
+nam
+nas
+nawet
+nic
+nich
+nie
+niej
+nim
+niż
+no
+nowe
+np
+nr
+o
+od
+ok
+on
+one
+oraz
+pan
+po
+pod
+ponad
+ponieważ
+poza
+przed
+przede
+przez
+przy
+raz
+razie
+roku
+również
+się
+sobie
+sposób
+swoje
+są
+ta
+tak
+takich
+takie
+także
+tam
+te
+tego
+tej
+temu
+ten
+teraz
+też
+to
+trzeba
+tu
+tych
+tylko
+tym
+tys
+tzw
+tę
+w
+we
+wie
+więc
+wszystko
+wśród
+właśnie
+z
+za
+zaś
+ze
+że
+żeby
+ii
+iii
+iv
+vi
+vii
+viii
+ix
+xi
+xii
+xiii
+xiv
+xv

Property changes on: contrib\analyzers\stempel\src\resources\org\apache\lucene\analysis\pl\stopwords.txt
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
===================================================================
--- contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java	(revision 0)
+++ contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java	(revision 0)
@@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.pl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new PolishAnalyzer(TEST_VERSION_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "studenta", "student");
+    checkOneTermReuse(a, "studenci", "student");
+    // stopword
+    assertAnalyzesTo(a, "coraz", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("studenta");
+    Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, 
+        PolishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "studenta", "studenta");
+    checkOneTermReuse(a, "studenci", "student");
+  }
+}

Property changes on: contrib\analyzers\stempel\src\test\org\apache\lucene\analysis\pl\TestPolishAnalyzer.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestCompile.java
===================================================================
--- contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestCompile.java	(revision 0)
+++ contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestCompile.java	(revision 0)
@@ -0,0 +1,153 @@
+package org.egothor.stemmer;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This  software  is  copyrighted  by  the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or  document.  If  this  license  applies  to the Egothor project as a
+ whole,  the  copyright holders are the people or entities mentioned in
+ the  file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution  and  use  in  source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions  in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer  that  follows  these  conditions  in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived  from  this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products  derived  from this software may not be called "Egothor",
+ nor  may  "Egothor"  appear  in  their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided  with  the  redistribution  and/or  in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+ CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+ BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This  software  consists  of  voluntary  contributions  made  by  many
+ individuals  on  behalf  of  the  Egothor  Project  and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.net.URI;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestCompile extends LuceneTestCase {
+  
+  public void testCompile() throws Exception {
+    URI uri = getClass().getResource("testRules.txt").toURI();
+    String path = uri.getPath();
+    Compile.main(new String[] {"test", path});
+    String compiled = path + ".out";
+    Trie trie = loadTrie(compiled);
+    assertTrie(trie, path, true, true);
+    assertTrie(trie, path, false, true);
+    new File(compiled).delete();
+  }
+  
+  public void testCompileBackwards() throws Exception {
+    URI uri = getClass().getResource("testRules.txt").toURI();
+    String path = uri.getPath();
+    Compile.main(new String[] {"-test", path});
+    String compiled = path + ".out";
+    Trie trie = loadTrie(compiled);
+    assertTrie(trie, path, true, true);
+    assertTrie(trie, path, false, true);
+    new File(compiled).delete();
+  }
+  
+  public void testCompileMulti() throws Exception {
+    URI uri = getClass().getResource("testRules.txt").toURI();
+    String path = uri.getPath();
+    Compile.main(new String[] {"Mtest", path});
+    String compiled = path + ".out";
+    Trie trie = loadTrie(compiled);
+    assertTrie(trie, path, true, true);
+    assertTrie(trie, path, false, true);
+    new File(compiled).delete();
+  }
+  
+  static Trie loadTrie(String path) throws IOException {
+    Trie trie;
+    DataInputStream is = new DataInputStream(new BufferedInputStream(
+        new FileInputStream(path)));
+    String method = is.readUTF().toUpperCase();
+    if (method.indexOf('M') < 0) {
+      trie = new Trie(is);
+    } else {
+      trie = new MultiTrie(is);
+    }
+    is.close();
+    return trie;
+  }
+  
+  private static void assertTrie(Trie trie, String file, boolean usefull,
+      boolean storeorig) throws Exception {
+    LineNumberReader in = new LineNumberReader(new BufferedReader(
+        new FileReader(file)));
+    
+    for (String line = in.readLine(); line != null; line = in.readLine()) {
+      try {
+        line = line.toLowerCase();
+        StringTokenizer st = new StringTokenizer(line);
+        String stem = st.nextToken();
+        if (storeorig) {
+          CharSequence cmd = (usefull) ? trie.getFully(stem) : trie
+              .getLastOnPath(stem);
+          StringBuilder stm = new StringBuilder(stem);
+          Diff.apply(stm, cmd);
+          assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
+        }
+        while (st.hasMoreTokens()) {
+          String token = st.nextToken();
+          if (token.equals(stem)) {
+            continue;
+          }
+          CharSequence cmd = (usefull) ? trie.getFully(token) : trie
+              .getLastOnPath(token);
+          StringBuilder stm = new StringBuilder(token);
+          Diff.apply(stm, cmd);
+          assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
+        }
+      } catch (java.util.NoSuchElementException x) {
+        // no base token (stem) on a line
+      }
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\test\org\egothor\stemmer\TestCompile.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestStemmer.java
===================================================================
--- contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestStemmer.java	(revision 0)
+++ contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestStemmer.java	(revision 0)
@@ -0,0 +1,168 @@
+package org.egothor.stemmer;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This  software  is  copyrighted  by  the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or  document.  If  this  license  applies  to the Egothor project as a
+ whole,  the  copyright holders are the people or entities mentioned in
+ the  file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution  and  use  in  source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions  of  source  code  must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions  in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer  that  follows  these  conditions  in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived  from  this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products  derived  from this software may not be called "Egothor",
+ nor  may  "Egothor"  appear  in  their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided  with  the  redistribution  and/or  in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+ CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+ BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This  software  consists  of  voluntary  contributions  made  by  many
+ individuals  on  behalf  of  the  Egothor  Project  and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestStemmer extends LuceneTestCase {
+  
+  public void testTrie() {
+    Trie t = new Trie(true);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    String vals[] = {"1", "2", "2", "4"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+    
+    assertEquals(0, t.root);
+    assertEquals(2, t.rows.size());
+    assertEquals(3, t.cmds.size());   
+    assertTrieContents(t, keys, vals);
+  }
+  
+  public void testTrieBackwards() {
+    Trie t = new Trie(false);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    String vals[] = {"1", "2", "2", "4"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+       
+    assertTrieContents(t, keys, vals);
+  }
+  
+  public void testMultiTrie() {
+    Trie t = new MultiTrie(true);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    String vals[] = {"1", "2", "2", "4"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+    
+    assertTrieContents(t, keys, vals);   
+  }
+  
+  public void testMultiTrieBackwards() {
+    Trie t = new MultiTrie(false);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    String vals[] = {"1", "2", "2", "4"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+    
+    assertTrieContents(t, keys, vals);   
+  }
+  
+  public void testMultiTrie2() {
+    Trie t = new MultiTrie2(true);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    /* 
+     * short vals won't work, see line 155 for example
+     * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+     */
+    String vals[] = {"1111", "2222", "2223", "4444"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+    
+    assertTrieContents(t, keys, vals);   
+  }
+  
+  public void testMultiTrie2Backwards() {
+    Trie t = new MultiTrie2(false);
+    
+    String keys[] = {"a", "ba", "bb", "c"};
+    /* 
+     * short vals won't work, see line 155 for example
+     * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+     */
+    String vals[] = {"1111", "2222", "2223", "4444"};
+    
+    for (int i = 0; i < keys.length; i++) {
+      t.add(keys[i], vals[i]);
+    }
+    
+    assertTrieContents(t, keys, vals);   
+  }
+ 
+  private static void assertTrieContents(Trie trie, String keys[], String vals[]) {
+    Trie[] tries = new Trie[] {
+        trie,
+        trie.reduce(new Optimizer()),
+        trie.reduce(new Optimizer2()),
+        trie.reduce(new Gener()),
+        trie.reduce(new Lift(true)),
+        trie.reduce(new Lift(false))
+    };
+    
+    for (Trie t : tries) {
+      for (int i = 0; i < keys.length; i++) {
+        assertEquals(vals[i], t.getFully(keys[i]).toString());
+        assertEquals(vals[i], t.getLastOnPath(keys[i]).toString());
+      }
+    }
+  }
+}

Property changes on: contrib\analyzers\stempel\src\test\org\egothor\stemmer\TestStemmer.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/analyzers/stempel/src/test/org/egothor/stemmer/testRules.txt
===================================================================
--- contrib/analyzers/stempel/src/test/org/egothor/stemmer/testRules.txt	(revision 0)
+++ contrib/analyzers/stempel/src/test/org/egothor/stemmer/testRules.txt	(revision 0)
@@ -0,0 +1,4 @@
+act acted acting actor
+walk walked walking
+wander wandered wanderer
+want wanted wanting

Property changes on: contrib\analyzers\stempel\src\test\org\egothor\stemmer\testRules.txt
___________________________________________________________________
Added: svn:eol-style
   + native

Index: src/site/src/documentation/content/xdocs/site.xml
===================================================================
--- src/site/src/documentation/content/xdocs/site.xml	(revision 918599)
+++ src/site/src/documentation/content/xdocs/site.xml	(working copy)
@@ -54,6 +54,7 @@
 		 <javadoc-contrib label="Contrib">
 		    <javadoc-contrib-analyzers label="Analyzers" href="ext:javadocs-contrib-analyzers"/>
 		    <javadoc-contrib-smartcn label="Smart Chinese Analyzer" href="ext:javadocs-contrib-smartcn"/>
+		    <javadoc-contrib-stempel label="Stempel Polish Analyzer" href="ext:javadocs-contrib-stempel"/>
 		    <javadoc-contrib-ant label="Ant" href="ext:javadocs-contrib-ant"/>
 		    <javadoc-contrib-bdb label="Bdb" href="ext:javadocs-contrib-bdb"/>
 		    <javadoc-contrib-bdb-je label="Bdb-je" href="ext:javadocs-contrib-bdb-je"/>
@@ -108,6 +109,7 @@
 	<javadocs-demo href="api/demo/index.html"/>
 	<javadocs-contrib-analyzers href="api/contrib-analyzers/index.html"/>
 	<javadocs-contrib-smartcn href="api/contrib-smartcn/index.html"/>
+	<javadocs-contrib-stempel href="api/contrib-stempel/index.html"/>
 	<javadocs-contrib-ant href="api/contrib-ant/index.html"/>
 	<javadocs-contrib-bdb href="api/contrib-bdb/index.html"/>
 	<javadocs-contrib-bdb-je href="api/contrib-bdb-je/index.html"/>
