Index: contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java	(revision 0)
@@ -0,0 +1,117 @@
+package org.apache.lucene.analysis.fa;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
+
+/**
+ * Analyzer for Persian. 
+ * 
+ * Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around ZWNJ in addition to space.
+ * Some persian-specific variant forms (such as farsi yeh and keheh) are standardized. 
+ * "Stemming" is accomplished via stopwords.
+ * 
+ */
+public final class PersianAnalyzer extends Analyzer {
+
+  /**
+   * File containing default Persian stopwords.
+   * 
+   * Default stopword list is from http://members.unine.ch/jacques.savoy/clef/index.html
+   * The stopword list is BSD-Licensed.
+   * 
+   */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+  /**
+   * Contains the stopwords used with the StopFilter.
+   */
+  private Set stoptable = new HashSet();
+  /**
+   * The comment character in the stopwords file.  All lines prefixed with this will be ignored  
+   */
+  public static final String STOPWORDS_COMMENT = "#";
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public PersianAnalyzer() {
+    try {
+      InputStream stream = PersianAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE);
+      InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
+      stoptable = WordlistLoader.getWordSet(reader, STOPWORDS_COMMENT);
+      reader.close();
+      stream.close();
+    } catch (IOException e) {
+      // TODO: throw IOException
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public PersianAnalyzer( String[] stopwords ) {
+    stoptable = StopFilter.makeStopSet( stopwords );
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public PersianAnalyzer( Hashtable stopwords ) {
+    stoptable = new HashSet(stopwords.keySet());
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
+   */
+  public PersianAnalyzer( File stopwords ) throws IOException {
+    stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT);
+  }
+
+
+  /**
+   * Creates a TokenStream which tokenizes all the text in the provided Reader.
+   *
+   * @return  A TokenStream build from a ArabicLetterTokenizer filtered with
+   * 			ArabicNormalizationFilter, PersianNormalizationFilter and Persian Stop words
+   */
+  public final TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new ArabicLetterTokenizer( reader ); 
+    result = new ArabicNormalizationFilter( result );
+    result = new PersianNormalizationFilter( result ); // additional persian-specific normalization
+    result = new StopFilter( result, stoptable );
+
+    return result;
+  }
+}
+
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java	(revision 0)
@@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.fa;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A TokenFilter that applies {@link PersianNormalizer} to normalize the orthography.
+ * 
+ */
+
+public class PersianNormalizationFilter extends TokenFilter {
+
+  protected PersianNormalizer normalizer = null;
+
+  public PersianNormalizationFilter(TokenStream input) {
+    super(input);
+    normalizer = new PersianNormalizer();
+  }
+
+
+
+  public Token next(Token reusableToken) throws IOException {
+    if ((reusableToken = input.next(reusableToken)) == null) {
+      return null;
+    } else {
+      int oldlen = reusableToken.termLength();
+      int newlen = normalizer.normalize(reusableToken.termBuffer(), oldlen);
+      if (oldlen != newlen)
+        reusableToken.setTermLength(newlen);
+      return reusableToken;
+    }
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java	(revision 0)
@@ -0,0 +1,91 @@
+package org.apache.lucene.analysis.fa;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  Normalizer for Persian.
+ *  <p>
+ *  Normalization is done in-place for efficiency, operating on a termbuffer.
+ *  <p>
+ *  Normalization is defined as:
+ *  <ul>
+ *  <li> Normalization of various heh + hamza forms and heh goal to heh.
+ *  <li> Normalization of farsi yeh and yeh barree to arabic yeh
+ *  <li> Normalization of persian keheh to arabic kaf
+ * </ul>
+ *
+ */
+public class PersianNormalizer {
+  public static final char YEH = '\u064A';
+  public static final char FARSI_YEH = '\u06CC';
+  public static final char YEH_BARREE = '\u06D2';
+  
+  public static final char KEHEH = '\u06A9';
+  public static final char KAF = '\u0643';
+
+  public static final char HAMZA_ABOVE = '\u0654';
+  public static final char HEH_YEH = '\u06C0';
+  public static final char HEH_GOAL = '\u06C1';
+  public static final char HEH = '\u0647';
+ 
+
+  /**
+   * Normalize an input buffer of Persian text
+   * 
+   * @param s input buffer
+   * @param len length of input buffer
+   * @return length of input buffer after normalization
+   */
+  public int normalize(char s[], int len) {
+ 
+    for (int i = 0; i < len; i++) {
+      if (s[i] == FARSI_YEH || s[i] == YEH_BARREE)
+        s[i] = YEH;
+
+      if (s[i] == KEHEH)
+        s[i] = KAF;
+      
+      if (s[i] == HEH_YEH || s[i] == HEH_GOAL)
+        s[i] = HEH;
+      
+      if (s[i] == HAMZA_ABOVE) { // necessary for HEH + HAMZA
+        len = delete(s, i, len);
+        i--;
+      }
+    }
+
+    return len;
+  }
+  
+  /**
+   * Delete a character in-place
+   * 
+   * @param s Input Buffer
+   * @param pos Position of character to delete
+   * @param len length of input buffer
+   * @return length of input buffer after deletion
+   */
+  protected int delete(char s[], int pos, int len) {
+    if (pos < len) 
+      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
+
+    return len - 1;
+  }
+
+
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/fa/package.html
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/fa/package.html	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/fa/package.html	(revision 0)
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Analyzer for Persian.
+</body>
+</html>
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/fa/stopwords.txt
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/fa/stopwords.txt	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/fa/stopwords.txt	(revision 0)
@@ -0,0 +1,311 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+وگو
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+و
+دو
+نخستين
+ولي
+چرا
+چه
+وسط
+ه
+كدام
+قابل
+يك
+رفت
+هفت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرفته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+حق
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرفت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+فقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استفاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رفته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+گفت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+حدود
+مختلف
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تحت
+ضمن
+هستيم
+گفته
+فكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+حتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطفا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+فوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
Index: contrib/analyzers/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java
===================================================================
--- contrib/analyzers/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java	(revision 0)
+++ contrib/analyzers/src/test/org/apache/lucene/analysis/fa/TestPersianNormalizationFilter.java	(revision 0)
@@ -0,0 +1,75 @@
+package org.apache.lucene.analysis.fa;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+/**
+ * Test the Arabic Normalization Filter
+ *
+ */
+public class TestPersianNormalizationFilter extends TestCase {
+
+  public void testFarsiYeh() throws IOException {
+    check("های", "هاي");
+  }
+  
+  public void testYehBarree() throws IOException {
+    check("هاے", "هاي");
+  }
+  
+  public void testKeheh() throws IOException {
+    check("کشاندن", "كشاندن");
+  }
+  
+  public void testHehYeh() throws IOException {
+    check("كتابۀ", "كتابه");
+  }
+  
+  public void testHehHamzaAbove() throws IOException {
+    check("كتابهٔ", "كتابه");
+  }
+  
+  public void testHehGoal() throws IOException {
+    check("زادہ", "زاده");
+  }
+  
+  private void check(final String input, final String expected) throws IOException {
+    ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
+    PersianNormalizationFilter filter = new PersianNormalizationFilter(tokenStream);
+    final Token reusableToken = new Token();
+    Token nextToken = filter.next(reusableToken);
+    if (nextToken == null)
+      fail();
+    assertEquals(expected, nextToken.term());
+    filter.close();
+  }
+
+}
