- *
- * You must specify the required {@link Version} compatibility when creating
- * {@link ArabicLetterTokenizer}:
- *
- *
As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link #isTokenChar(int)} and
- * {@link #normalize(int)} for details.
- *
- * @deprecated (3.1) Use {@link StandardTokenizer} instead.
- */
-@Deprecated
-public class ArabicLetterTokenizer extends LetterTokenizer {
- /**
- * Construct a new ArabicLetterTokenizer.
- * @param matchVersion Lucene version
- * to match See {@link above}
- *
- * @param in
- * the input to split up into tokens
- */
- public ArabicLetterTokenizer(Version matchVersion, Reader in) {
- super(matchVersion, in);
- }
-
- /**
- * Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}.
- *
- * @param matchVersion
- * Lucene version to match See {@link above}
- * @param source
- * the attribute source to use for this Tokenizer
- * @param in
- * the input to split up into tokens
- */
- public ArabicLetterTokenizer(Version matchVersion, AttributeSource source, Reader in) {
- super(matchVersion, source, in);
- }
-
- /**
- * Construct a new ArabicLetterTokenizer using a given
- * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * @param
- * matchVersion Lucene version to match See
- * {@link above}
- *
- * @param factory
- * the attribute factory to use for this Tokenizer
- * @param in
- * the input to split up into tokens
- */
- public ArabicLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
- super(matchVersion, factory, in);
- }
-
- /**
- * Allows for Letter category or NonspacingMark category
- * @see org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int)
- */
- @Override
- protected boolean isTokenChar(int c) {
- return super.isTokenChar(c) || Character.getType(c) == Character.NON_SPACING_MARK;
- }
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
index 0a74366..c9f0fef 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ar;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
index c566176..5912f10 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ar;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
index 636481e..d8c01e2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ar;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
index c52916e..908147a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
@@ -1,7 +1,7 @@
package org.apache.lucene.analysis.ar;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
index a6dbf70..538a465 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.bg;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
index e426420..69108c1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.bg;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
index 2aa23cd..e79a8bb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.bg;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
index 3e2e23b..d8c4256 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.br;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
index c7ef2f6..5dac3b1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.br;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
index 095773b..5cb4c71 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.br;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
index 294f18e..2dc4e69 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ca;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -38,14 +38,6 @@ import org.tartarus.snowball.ext.CatalanStemmer;
/**
* {@link Analyzer} for Catalan.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating CatalanAnalyzer:
- *
- *
As of 3.6, ElisionFilter with a set of Catalan
- * contractions is used by default.
- *
*/
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
@@ -126,8 +118,8 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
- * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
- * , {@link KeywordMarkerFilter} if a stem exclusion set is
+ * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter},
+ * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
@@ -135,9 +127,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
- }
+ result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
index 163e036..3eb4ee7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
index 8687e93..f71e1cb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index 2b83aa0..fabd33c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -2,7 +2,7 @@
package org.apache.lucene.analysis.charfilter;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
index 48439cd..39061e2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
index a7d93fc..c647061 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
index 87aa5c8..33e08c8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.cjk;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -89,16 +89,11 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- // run the widthfilter first before bigramming, it sometimes combines characters.
- TokenStream result = new CJKWidthFilter(source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new CJKBigramFilter(result);
- return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
- } else {
- final Tokenizer source = new CJKTokenizer(reader);
- return new TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
- }
+ final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+ // run the widthfilter first before bigramming, it sometimes combines characters.
+ TokenStream result = new CJKWidthFilter(source);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new CJKBigramFilter(result);
+ return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
index a5979b6..cdd5f73 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.cjk;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -205,7 +205,7 @@ public final class CJKBigramFilter extends TokenFilter {
/**
* refills buffers with new data from the current token.
*/
- private void refill() throws IOException {
+ private void refill() {
// compact buffers to keep them smallish if they become large
// just a safety check, but technically we only need the last codepoint
if (bufferLen > 64) {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
deleted file mode 100644
index ab091b7..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
+++ /dev/null
@@ -1,317 +0,0 @@
-package org.apache.lucene.analysis.cjk;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * CJKTokenizer is designed for Chinese, Japanese, and Korean languages.
- *
- * The tokens returned are every two adjacent characters with overlap match.
- *
- *
- * Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
- *
- * Additionally, the following is applied to Latin text (such as English):
- *
- *
Text is converted to lowercase.
- *
Numeric digits, '+', '#', and '_' are tokenized as letters.
- *
Full-width forms are converted to half-width forms.
- *
- * For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
- * please search google
- *
- * @deprecated Use StandardTokenizer, CJKWidthFilter, CJKBigramFilter, and LowerCaseFilter instead.
- */
-@Deprecated
-public final class CJKTokenizer extends Tokenizer {
- //~ Static fields/initializers ---------------------------------------------
- /** Word token type */
- static final int WORD_TYPE = 0;
-
- /** Single byte token type */
- static final int SINGLE_TOKEN_TYPE = 1;
-
- /** Double byte token type */
- static final int DOUBLE_TOKEN_TYPE = 2;
-
- /** Names for token types */
- static final String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
-
- /** Max word length */
- private static final int MAX_WORD_LEN = 255;
-
- /** buffer size: */
- private static final int IO_BUFFER_SIZE = 256;
-
- //~ Instance fields --------------------------------------------------------
-
- /** word offset, used to imply which character(in ) is parsed */
- private int offset = 0;
-
- /** the index used only for ioBuffer */
- private int bufferIndex = 0;
-
- /** data length */
- private int dataLen = 0;
-
- /**
- * character buffer, store the characters which are used to compose
- * the returned Token
- */
- private final char[] buffer = new char[MAX_WORD_LEN];
-
- /**
- * I/O buffer, used to store the content of the input(one of the
- * members of Tokenizer)
- */
- private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
- /** word type: single=>ASCII double=>non-ASCII word=>default */
- private int tokenType = WORD_TYPE;
-
- /**
- * tag: previous character is a cached double-byte character "C1C2C3C4"
- * ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
- * C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
- */
- private boolean preIsTokened = false;
-
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-
- //~ Constructors -----------------------------------------------------------
-
- /**
- * Construct a token stream processing the given input.
- *
- * @param in I/O reader
- */
- public CJKTokenizer(Reader in) {
- super(in);
- }
-
- public CJKTokenizer(AttributeSource source, Reader in) {
- super(source, in);
- }
-
- public CJKTokenizer(AttributeFactory factory, Reader in) {
- super(factory, in);
- }
-
- //~ Methods ----------------------------------------------------------------
-
- /**
- * Returns true for the next token in the stream, or false at EOS.
- * See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
- * for detail.
- *
- * @return false for end of stream, true otherwise
- *
- * @throws java.io.IOException - throw IOException when read error
- * happened in the InputStream
- *
- */
- @Override
- public boolean incrementToken() throws IOException {
- clearAttributes();
- /** how many character(s) has been stored in buffer */
-
- while(true) { // loop until we find a non-empty token
-
- int length = 0;
-
- /** the position used to create Token */
- int start = offset;
-
- while (true) { // loop until we've found a full token
- /** current character */
- char c;
-
- /** unicode block of current character for detail */
- Character.UnicodeBlock ub;
-
- offset++;
-
- if (bufferIndex >= dataLen) {
- dataLen = input.read(ioBuffer);
- bufferIndex = 0;
- }
-
- if (dataLen == -1) {
- if (length > 0) {
- if (preIsTokened == true) {
- length = 0;
- preIsTokened = false;
- }
- else{
- offset--;
- }
-
- break;
- } else {
- offset--;
- return false;
- }
- } else {
- //get current character
- c = ioBuffer[bufferIndex++];
-
- //get the UnicodeBlock of the current character
- ub = Character.UnicodeBlock.of(c);
- }
-
- //if the current character is ASCII or Extend ASCII
- if ((ub == Character.UnicodeBlock.BASIC_LATIN)
- || (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
- ) {
- if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
- int i = (int) c;
- if (i >= 65281 && i <= 65374) {
- // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
- i = i - 65248;
- c = (char) i;
- }
- }
-
- // if the current character is a letter or "_" "+" "#"
- if (Character.isLetterOrDigit(c)
- || ((c == '_') || (c == '+') || (c == '#'))
- ) {
- if (length == 0) {
- // "javaC1C2C3C4linux"
- // ^--: the current character begin to token the ASCII
- // letter
- start = offset - 1;
- } else if (tokenType == DOUBLE_TOKEN_TYPE) {
- // "javaC1C2C3C4linux"
- // ^--: the previous non-ASCII
- // : the current character
- offset--;
- bufferIndex--;
-
- if (preIsTokened == true) {
- // there is only one non-ASCII has been stored
- length = 0;
- preIsTokened = false;
- break;
- } else {
- break;
- }
- }
-
- // store the LowerCase(c) in the buffer
- buffer[length++] = Character.toLowerCase(c);
- tokenType = SINGLE_TOKEN_TYPE;
-
- // break the procedure if buffer overflowed!
- if (length == MAX_WORD_LEN) {
- break;
- }
- } else if (length > 0) {
- if (preIsTokened == true) {
- length = 0;
- preIsTokened = false;
- } else {
- break;
- }
- }
- } else {
- // non-ASCII letter, e.g."C1C2C3C4"
- if (Character.isLetter(c)) {
- if (length == 0) {
- start = offset - 1;
- buffer[length++] = c;
- tokenType = DOUBLE_TOKEN_TYPE;
- } else {
- if (tokenType == SINGLE_TOKEN_TYPE) {
- offset--;
- bufferIndex--;
-
- //return the previous ASCII characters
- break;
- } else {
- buffer[length++] = c;
- tokenType = DOUBLE_TOKEN_TYPE;
-
- if (length == 2) {
- offset--;
- bufferIndex--;
- preIsTokened = true;
-
- break;
- }
- }
- }
- } else if (length > 0) {
- if (preIsTokened == true) {
- // empty the buffer
- length = 0;
- preIsTokened = false;
- } else {
- break;
- }
- }
- }
- }
-
- if (length > 0) {
- termAtt.copyBuffer(buffer, 0, length);
- offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
- typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]);
- return true;
- } else if (dataLen == -1) {
- offset--;
- return false;
- }
-
- // Cycle back and try for the next token (don't
- // return an empty string)
- }
- }
-
- @Override
- public final void end() {
- // set final offset
- final int finalOffset = correctOffset(offset);
- this.offsetAtt.setOffset(finalOffset, finalOffset);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- offset = bufferIndex = dataLen = 0;
- preIsTokened = false;
- tokenType = WORD_TYPE;
- }
-
- @Override
- public void reset(Reader reader) throws IOException {
- super.reset(reader);
- reset();
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilter.java
index d0eb3b2..7a47983 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.cjk;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
deleted file mode 100644
index 886f5e7..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
+++ /dev/null
@@ -1,50 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Tokenizer;
-
-/**
- * An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and
- * filters with {@link ChineseFilter}
- * @deprecated (3.1) Use {@link StandardAnalyzer} instead, which has the same functionality.
- * This analyzer will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseAnalyzer extends Analyzer {
-
- /**
- * Creates
- * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
- * used to tokenize all the text in the provided {@link Reader}.
- *
- * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
- * built from a {@link ChineseTokenizer} filtered with
- * {@link ChineseFilter}
- */
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- final Tokenizer source = new ChineseTokenizer(reader);
- return new TokenStreamComponents(source, new ChineseFilter(source));
- }
-}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
deleted file mode 100644
index aa9743b..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-/**
- * A {@link TokenFilter} with a stop word table.
- *
- *
Numeric tokens are removed.
- *
English tokens must be larger than 1 character.
- *
One Chinese character as one Chinese word.
- *
- * TO DO:
- *
- *
Add Chinese stop words, such as \ue400
- *
Dictionary based Chinese word extraction
- *
Intelligent Chinese word extraction
- *
- *
- * @deprecated (3.1) Use {@link StopFilter} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseFilter extends TokenFilter {
-
-
- // Only English now, Chinese to be added later.
- public static final String[] STOP_WORDS = {
- "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with"
- };
-
-
- private CharArraySet stopTable;
-
- private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
- public ChineseFilter(TokenStream in) {
- super(in);
-
- stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
-
- while (input.incrementToken()) {
- char text[] = termAtt.buffer();
- int termLength = termAtt.length();
-
- // why not key off token type here assuming ChineseTokenizer comes first?
- if (!stopTable.contains(text, 0, termLength)) {
- switch (Character.getType(text[0])) {
-
- case Character.LOWERCASE_LETTER:
- case Character.UPPERCASE_LETTER:
-
- // English word/token should larger than 1 character.
- if (termLength>1) {
- return true;
- }
- break;
- case Character.OTHER_LETTER:
-
- // One Chinese character as one Chinese word.
- // Chinese word extraction to be added later here.
-
- return true;
- }
-
- }
-
- }
- return false;
- }
-
-}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
deleted file mode 100644
index ddbbf18..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
+++ /dev/null
@@ -1,175 +0,0 @@
-package org.apache.lucene.analysis.cn;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-
-/**
- * Tokenize Chinese text as individual chinese characters.
- *
- *
- * The difference between ChineseTokenizer and
- * CJKTokenizer is that they have different
- * token parsing logic.
- *
- *
- * For example, if the Chinese text
- * "C1C2C3C4" is to be indexed:
- *
- *
The tokens returned from ChineseTokenizer are C1, C2, C3, C4.
- *
The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
- *
- *
- *
- * Therefore the index created by CJKTokenizer is much larger.
- *
- *
- * The problem is that when searching for C1, C1C2, C1C3,
- * C4C2, C1C2C3 ... the ChineseTokenizer works, but the
- * CJKTokenizer will not work.
- *
- * @deprecated (3.1) Use {@link StandardTokenizer} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class ChineseTokenizer extends Tokenizer {
-
-
- public ChineseTokenizer(Reader in) {
- super(in);
- }
-
- public ChineseTokenizer(AttributeSource source, Reader in) {
- super(source, in);
- }
-
- public ChineseTokenizer(AttributeFactory factory, Reader in) {
- super(factory, in);
- }
-
- private int offset = 0, bufferIndex=0, dataLen=0;
- private final static int MAX_WORD_LEN = 255;
- private final static int IO_BUFFER_SIZE = 1024;
- private final char[] buffer = new char[MAX_WORD_LEN];
- private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
-
- private int length;
- private int start;
-
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
- private final void push(char c) {
-
- if (length == 0) start = offset-1; // start of token
- buffer[length++] = Character.toLowerCase(c); // buffer it
-
- }
-
- private final boolean flush() {
-
- if (length>0) {
- //System.out.println(new String(buffer, 0,
- //length));
- termAtt.copyBuffer(buffer, 0, length);
- offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
- return true;
- }
- else
- return false;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- clearAttributes();
-
- length = 0;
- start = offset;
-
-
- while (true) {
-
- final char c;
- offset++;
-
- if (bufferIndex >= dataLen) {
- dataLen = input.read(ioBuffer);
- bufferIndex = 0;
- }
-
- if (dataLen == -1) {
- offset--;
- return flush();
- } else
- c = ioBuffer[bufferIndex++];
-
-
- switch(Character.getType(c)) {
-
- case Character.DECIMAL_DIGIT_NUMBER:
- case Character.LOWERCASE_LETTER:
- case Character.UPPERCASE_LETTER:
- push(c);
- if (length == MAX_WORD_LEN) return flush();
- break;
-
- case Character.OTHER_LETTER:
- if (length>0) {
- bufferIndex--;
- offset--;
- return flush();
- }
- push(c);
- return flush();
-
- default:
- if (length>0) return flush();
- break;
- }
- }
- }
-
- @Override
- public final void end() {
- // set final offset
- final int finalOffset = correctOffset(offset);
- this.offsetAtt.setOffset(finalOffset, finalOffset);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- offset = bufferIndex = dataLen = 0;
- }
-
- @Override
- public void reset(Reader input) throws IOException {
- super.reset(input);
- reset();
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/package.html
deleted file mode 100644
index 6d9ea04..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/package.html
+++ /dev/null
@@ -1,41 +0,0 @@
-
-
-
-
-
-
-
-Analyzer for Chinese, which indexes unigrams (individual chinese characters).
-
-Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
-
-
StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
-
CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
-
SmartChineseAnalyzer (in the analyzers/smartcn package): Index words (attempt to segment Chinese text into words) as tokens.
-
-
-Example phrase: "我是ä¸å›½äºº"
-
-
StandardAnalyzer: æˆ‘ï¼æ˜¯ï¼ä¸ï¼å›½ï¼äºº
-
CJKAnalyzer: æˆ‘æ˜¯ï¼æ˜¯ä¸ï¼ä¸å›½ï¼å›½äºº
-
SmartChineseAnalyzer: æˆ‘ï¼æ˜¯ï¼ä¸å›½ï¼äºº
-
-
-
-
-
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
index 7d2e33f..eeaec84 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
index 1d693d7..e73f554 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.compound;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index 8946cc9..e26a03e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.compound;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
index 9f46bde..518d652 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.compound;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
index eececf2..5d105d7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/PatternParser.java
@@ -64,7 +64,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
static final int ELEM_HYPHEN = 4;
- public PatternParser() throws HyphenationException {
+ public PatternParser() {
token = new StringBuilder();
parser = createParser();
parser.setContentHandler(this);
@@ -74,7 +74,7 @@ public class PatternParser extends DefaultHandler implements PatternConsumer {
}
- public PatternParser(PatternConsumer consumer) throws HyphenationException {
+ public PatternParser(PatternConsumer consumer) {
this();
this.consumer = consumer;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java
index aaa2d56..0aad882 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java
@@ -648,7 +648,7 @@ public class TernaryTree implements Cloneable {
}
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) {
TernaryTree tt = new TernaryTree();
tt.insert("Carlos", 'C');
tt.insert("Car", 'r');
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
index 5bf37e3..b32b187 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
index 44ee084..7d5abff 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
index 3bf3497..5fcbd48 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
index 0e1c7e6..16e5820 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index ecdf550..ce7b0a8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index a458626..a7abddc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index c4f36aa..2eb97b5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
index f27fc01..b2808f1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.io.IOException;
import java.util.Arrays;
import java.util.List;
@@ -122,7 +121,7 @@ public final class StopFilter extends FilteringTokenFilter {
* Returns the next input Token whose term() is not a stop word.
*/
@Override
- protected boolean accept() throws IOException {
+ protected boolean accept() {
return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
index f564af2..8c23fcf 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
@@ -48,7 +48,7 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
* When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes
*/
@Override
- protected boolean accept() throws IOException {
+ protected boolean accept() {
return useWhiteList == stopTypes.contains(typeAttribute.type());
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
index e22952d..ecc4716 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
index 01004c6..c3e6de5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.core;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
index b0d9c80..b630726 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.cz;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -40,17 +40,6 @@ import java.io.*;
* all). A default set of stopwords is used unless an alternative list is
* specified.
*
- *
- *
- *
- * You must specify the required {@link Version} compatibility when creating
- * CzechAnalyzer:
- *
- *
As of 3.1, words are stemmed with {@link CzechStemFilter}
- *
As of 2.9, StopFilter preserves position increments
- *
As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
- * LUCENE-1068)
- *
*/
public final class CzechAnalyzer extends StopwordAnalyzerBase {
/** File containing default Czech stopwords. */
@@ -86,8 +75,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*
- * @param matchVersion Lucene version to match See
- * {@link above}
+ * @param matchVersion Lucene version to match
*/
public CzechAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_SET);
@@ -96,8 +84,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the given stop words.
*
- * @param matchVersion Lucene version to match See
- * {@link above}
+ * @param matchVersion Lucene version to match
* @param stopwords a stopword set
*/
public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) {
@@ -108,8 +95,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* Builds an analyzer with the given stop words and a set of work to be
* excluded from the {@link CzechStemFilter}.
*
- * @param matchVersion Lucene version to match See
- * {@link above}
+ * @param matchVersion Lucene version to match
* @param stopwords a stopword set
* @param stemExclusionTable a stemming exclusion set
*/
@@ -127,7 +113,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
- * a version is >= LUCENE_31 and a stem exclusion set is provided via
+ * a stem exclusion set is provided via
* {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a
* {@link KeywordMarkerFilter} is added before
* {@link CzechStemFilter}.
@@ -139,11 +125,9 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
TokenStream result = new StandardFilter(matchVersion, source);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter( matchVersion, result, stopwords);
- if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- if(!this.stemExclusionTable.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionTable);
- result = new CzechStemFilter(result);
- }
+ if(!this.stemExclusionTable.isEmpty())
+ result = new KeywordMarkerFilter(result, stemExclusionTable);
+ result = new CzechStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
index 4f0e17f..9cde006 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
@@ -8,7 +8,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
index 120b287..5646ccd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.cz;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
index 0006e58..25f1ee1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.da;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
index d64c93b..4a88c70 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@@ -1,7 +1,7 @@
package org.apache.lucene.analysis.de;
// This file is encoded in UTF-8
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.de;
import java.io.IOException;
import java.io.Reader;
-import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -37,7 +36,6 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.German2Stemmer;
/**
* {@link Analyzer} for German language.
@@ -49,39 +47,11 @@ import org.tartarus.snowball.ext.German2Stemmer;
* exclusion list is empty by default.
*
*
- *
- *
You must specify the required {@link Version}
- * compatibility when creating GermanAnalyzer:
- *
- *
As of 3.6, GermanLightStemFilter is used for less aggressive stemming.
- *
As of 3.1, Snowball stemming is done with SnowballFilter, and
- * Snowball stopwords are used by default.
- *
As of 2.9, StopFilter preserves position
- * increments
- *
- *
*
NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.
*/
public final class GermanAnalyzer extends StopwordAnalyzerBase {
- /** @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) */
- @Deprecated
- private final static String[] GERMAN_STOP_WORDS = {
- "einer", "eine", "eines", "einem", "einen",
- "der", "die", "das", "dass", "daß",
- "du", "er", "sie", "es",
- "was", "wer", "wie", "wir",
- "und", "oder", "ohne", "mit",
- "am", "im", "in", "aus", "auf",
- "ist", "sein", "war", "wird",
- "ihr", "ihre", "ihres",
- "als", "für", "von", "mit",
- "dich", "dir", "mich", "mir",
- "mein", "sein", "kein",
- "durch", "wegen", "wird"
- };
-
/** File containing default German stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "german_stop.txt";
@@ -94,10 +64,6 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
}
private static class DefaultSetHolder {
- /** @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) */
- @Deprecated
- private static final CharArraySet DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
- Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
private static final CharArraySet DEFAULT_SET;
static {
try {
@@ -125,9 +91,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
* {@link #getDefaultStopSet()}.
*/
public GermanAnalyzer(Version matchVersion) {
- this(matchVersion,
- matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET
- : DefaultSetHolder.DEFAULT_SET_30);
+ this(matchVersion, DefaultSetHolder.DEFAULT_SET);
}
/**
@@ -176,14 +140,8 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter( matchVersion, result, stopwords);
result = new KeywordMarkerFilter(result, exclusionSet);
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- result = new GermanNormalizationFilter(result);
- result = new GermanLightStemFilter(result);
- } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- result = new SnowballFilter(result, new German2Stemmer());
- } else {
- result = new GermanStemFilter(result);
- }
+ result = new GermanNormalizationFilter(result);
+ result = new GermanLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
index a949a7d..865b82d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
index 04d8b58..5b84ab3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
index 9a291c3..fe15b15 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
index 52fc4a0..fd99966 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilter.java
index 1ad4f00..9c50b31 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
index e5461d7..fee5b88 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.de;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
index 86e118f..90d0183 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@@ -1,7 +1,7 @@
package org.apache.lucene.analysis.de;
// This file is encoded in UTF-8
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
index 3b1b5ac..625c9d1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@@ -37,15 +37,6 @@ import org.apache.lucene.util.Version;
* that will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
*
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating GreekAnalyzer:
- *
- *
As of 3.1, StandardFilter and GreekStemmer are used by default.
- *
As of 2.9, StopFilter preserves position
- * increments
- *
*
*
NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.
@@ -78,8 +69,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
/**
* Builds an analyzer with the default stop words.
- * @param matchVersion Lucene compatibility version,
- * See above
+ * @param matchVersion Lucene compatibility version
*/
public GreekAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_SET);
@@ -91,8 +81,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
* NOTE: The stopwords set should be pre-processed with the logic of
* {@link GreekLowerCaseFilter} for best results.
*
- * @param matchVersion Lucene compatibility version,
- * See above
+ * @param matchVersion Lucene compatibility version
* @param stopwords a stopword set
*/
public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) {
@@ -114,11 +103,9 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- result = new StandardFilter(matchVersion, result);
+ result = new StandardFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- result = new GreekStemFilter(result);
+ result = new GreekStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
index 59ccc9a..2ea235c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.el;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
index bd26338..815474f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@@ -5,7 +5,7 @@ import org.apache.lucene.util.Version;
import java.util.Arrays;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -196,7 +196,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "Ï€", "ιδ", "παÏ"),
false);
@@ -222,7 +222,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ",
"αντιδ", "φυσ", "βÏωμ", "γεÏ", "εξωδ", "καλπ", "καλλιν", "καταδ",
"μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ",
@@ -247,7 +247,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ",
"πεθ", "πικÏ", "ποτ", "σιχ", "χ"),
false);
@@ -274,11 +274,11 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_50,
Arrays.asList("Ï„Ï", "τσ"),
false);
- private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_50,
Arrays.asList("βετεÏ", "βουλκ", "βÏαχμ", "γ", "δÏαδουμ", "θ", "καλπουζ",
"καστελ", "κοÏμοÏ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ",
"Ï€", "πελεκ", "πλ", "πολισ", "ποÏτολ", "σαÏακατσ", "σουλτ",
@@ -337,7 +337,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("αβαÏ", "βεν", "εναÏ", "αβÏ", "αδ", "αθ", "αν", "απλ",
"βαÏον", "ντÏ", "σκ", "κοπ", "μποÏ", "νιφ", "παγ", "παÏακαλ", "σεÏÏ€",
"σκελ", "συÏφ", "τοκ", "Ï…", "δ", "εμ", "θαÏÏ", "θ"),
@@ -425,11 +425,11 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_50,
Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"),
false);
- private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_50,
Arrays.asList("αλ", "αÏ", "εκτελ", "ζ", "μ", "ξ", "παÏακαλ", "αÏ", "Ï€Ïο", "νισ"),
false);
@@ -449,7 +449,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("διαθ", "θ", "παÏακαταθ", "Ï€Ïοσθ", "συνθ"),
false);
@@ -483,7 +483,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("φαÏμακ", "χαδ", "αγκ", "αναÏÏ", "βÏομ", "εκλιπ", "λαμπιδ",
"λεχ", "μ", "πατ", "Ï", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ",
"ανηκ", "δεσποζ", "ενδιαφεÏ", "δε", "δευτεÏευ", "καθαÏευ", "πλε",
@@ -521,7 +521,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_50,
Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "Ï", "ασπ", "αφ", "αμαλ",
"αμαλλι", "ανυστ", "απεÏ", "ασπαÏ", "αχαÏ", "δεÏβεν", "δÏοσοπ",
"ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "Ï€Ïοστ", "Ï€Ïοσωποπ", "συμπ",
@@ -530,7 +530,7 @@ public class GreekStemmer {
"ουλαμ", "ουÏ", "Ï€", "Ï„Ï", "μ"),
false);
- private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_50,
Arrays.asList("ψοφ", "ναυλοχ"),
false);
@@ -567,7 +567,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("ν", "χεÏσον", "δωδεκαν", "εÏημον", "μεγαλον", "επταν"),
false);
@@ -587,7 +587,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("ασβ", "σβ", "αχÏ", "χÏ", "απλ", "αειμν", "δυσχÏ", "ευχÏ", "κοινοχÏ", "παλιμψ"),
false);
@@ -601,7 +601,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("ν", "Ï", "σπι", "στÏαβομουτσ", "κακομουτσ", "εξων"),
false);
@@ -625,7 +625,7 @@ public class GreekStemmer {
return len;
}
- private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_31,
+ private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_50,
Arrays.asList("παÏασουσ", "φ", "χ", "ωÏιοπλ", "αζ", "αλλοσουσ", "ασουσ"),
false);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
index 1648b9e..4517dd7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -94,7 +94,8 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
- * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+ * {@link StandardFilter}, {@link EnglishPossessiveFilter},
+ * {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link PorterStemFilter}.
*/
@@ -103,9 +104,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
- // prior to this we get the classic behavior, standardfilter does it for us.
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- result = new EnglishPossessiveFilter(matchVersion, result);
+ result = new EnglishPossessiveFilter(matchVersion, result);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
index fede035..131f441 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java
index e7fe007..52d62c0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
index 8d12099..9f6f218 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -26,30 +26,13 @@ import org.apache.lucene.util.Version;
/**
* TokenFilter that removes possessives (trailing 's) from words.
- *
- *
You must specify the required {@link Version}
- * compatibility when creating EnglishPossessiveFilter:
- *
- *
As of 3.6, U+2019 RIGHT SINGLE QUOTATION MARK and
- * U+FF07 FULLWIDTH APOSTROPHE are also treated as
- * quotation marks.
- *
*/
public final class EnglishPossessiveFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private Version matchVersion;
-
- /**
- * @deprecated Use {@link #EnglishPossessiveFilter(Version, TokenStream)} instead.
- */
- @Deprecated
- public EnglishPossessiveFilter(TokenStream input) {
- this(Version.LUCENE_35, input);
- }
+ // NOTE: version now unused
public EnglishPossessiveFilter(Version version, TokenStream input) {
super(input);
- this.matchVersion = version;
}
@Override
@@ -63,7 +46,8 @@ public final class EnglishPossessiveFilter extends TokenFilter {
if (bufferLength >= 2 &&
(buffer[bufferLength-2] == '\'' ||
- (matchVersion.onOrAfter(Version.LUCENE_36) && (buffer[bufferLength-2] == '\u2019' || buffer[bufferLength-2] == '\uFF07'))) &&
+ buffer[bufferLength-2] == '\u2019' ||
+ buffer[bufferLength-2] == '\uFF07') &&
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
index 6e88ab9..1a04641 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
index 3cfd813..17dbf30 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -281,9 +281,9 @@ public class KStemmer {
DictEntry entry;
CharArrayMap d = new CharArrayMap(
- Version.LUCENE_31, 1000, false);
+ Version.LUCENE_50, 1000, false);
- d = new CharArrayMap(Version.LUCENE_31, 1000, false);
+ d = new CharArrayMap(Version.LUCENE_50, 1000, false);
for (int i = 0; i < exceptionWords.length; i++) {
if (!d.containsKey(exceptionWords[i])) {
entry = new DictEntry(exceptionWords[i], true);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
index c5f5181..c52a942 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java
index 0b90f0a..123ae84 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.en;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
index b4eb2e0..9e30a16 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.es;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -34,17 +34,9 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.SpanishStemmer;
/**
* {@link Analyzer} for Spanish.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating SpanishAnalyzer:
- *
- *
As of 3.6, SpanishLightStemFilter is used for less aggressive stemming.
- *
*/
public final class SpanishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
@@ -132,11 +124,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- result = new SpanishLightStemFilter(result);
- } else {
- result = new SnowballFilter(result, new SpanishStemmer());
- }
+ result = new SpanishLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
index 79ade5c..eed28a8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.es;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
index 382faa8..cd9f418 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.es;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
index 4fde234..731d590 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.eu;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index 2b47f68..eff8eac 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fa;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
@@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Persian.
*
- * This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around
+ * This Analyzer uses {@link PersianCharFilter} which implies tokenizing around
* zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
* yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
*
@@ -118,12 +117,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source;
- if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- source = new StandardTokenizer(matchVersion, reader);
- } else {
- source = new ArabicLetterTokenizer(matchVersion, reader);
- }
+ final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new LowerCaseFilter(matchVersion, source);
result = new ArabicNormalizationFilter(result);
/* additional persian-specific normalization */
@@ -139,9 +133,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
* Wraps the Reader with {@link PersianCharFilter}
*/
@Override
- protected Reader initReader(Reader reader) {
- return matchVersion.onOrAfter(Version.LUCENE_31) ?
- new PersianCharFilter(CharReader.get(reader)) :
- reader;
+ protected Reader initReader(String fieldName, Reader reader) {
+ return new PersianCharFilter(CharReader.get(reader));
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
index 962f839..cf3a820 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fa;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
index 20c1b17..914e179 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fa;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
index 68407d2..051e3a8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fa;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
index 3cd1777..56783fe 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
index 12f58b9..8d8911e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
index ae43caa..c866c07 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
index 99c53c6..a05a8f9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fr;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
index 1f8b47b..778e750 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.fr;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -35,7 +35,6 @@ import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
-import java.util.Arrays;
/**
* {@link Analyzer} for French language.
@@ -47,53 +46,11 @@ import java.util.Arrays;
* exclusion list is empty by default.
*
*
- *
- *
You must specify the required {@link Version}
- * compatibility when creating FrenchAnalyzer:
- *
- *
As of 3.6, FrenchLightStemFilter is used for less aggressive stemming.
- *
As of 3.1, Snowball stemming is done with SnowballFilter,
- * LowerCaseFilter is used prior to StopFilter, and ElisionFilter and
- * Snowball stopwords are used by default.
- *
As of 2.9, StopFilter preserves position
- * increments
- *
- *
*
NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.
- * The used stemmer can be changed at runtime after the
- * filter object is created (as long as it is a {@link FrenchStemmer}).
- *
- *
- * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
- * the {@link KeywordAttribute} before this {@link TokenStream}.
- *
- * @see KeywordMarkerFilter
- * @deprecated (3.1) Use {@link SnowballFilter} with
- * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
- * same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class FrenchStemFilter extends TokenFilter {
-
- /**
- * The actual token in the input stream.
- */
- private FrenchStemmer stemmer = new FrenchStemmer();
-
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
-
- public FrenchStemFilter( TokenStream in ) {
- super(in);
- }
-
- /**
- * @return Returns true for the next token in the stream, or false at EOS
- */
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- String term = termAtt.toString();
-
- // Check the exclusion table
- if (!keywordAttr.isKeyword()) {
- String s = stemmer.stem( term );
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals( term ) )
- termAtt.setEmpty().append(s);
- }
- return true;
- } else {
- return false;
- }
- }
- /**
- * Set a alternative/custom {@link FrenchStemmer} for this filter.
- */
- public void setStemmer( FrenchStemmer stemmer ) {
- if ( stemmer != null ) {
- this.stemmer = stemmer;
- }
- }
-}
-
-
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
deleted file mode 100644
index cf741c7..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
+++ /dev/null
@@ -1,712 +0,0 @@
-package org.apache.lucene.analysis.fr;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A stemmer for French words.
- *
- * The algorithm is based on the work of
- * Dr Martin Porter on his snowball project
- * refer to http://snowball.sourceforge.net/french/stemmer.html
- * (French stemming algorithm) for details
- *
You must specify the required {@link Version}
- * compatibility when creating HindiAnalyzer:
- *
- *
As of 3.6, StandardTokenizer is used for tokenization
- *
*/
public final class HindiAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
@@ -126,12 +118,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- final Tokenizer source;
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- source = new StandardTokenizer(matchVersion, reader);
- } else {
- source = new IndicTokenizer(matchVersion, reader);
- }
+ final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new LowerCaseFilter(matchVersion, source);
if (!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
index b8da072..dfdf56a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
index 245afd9..8d9de3b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
index 946fe34..a8ac018 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
index 68ef2cc..65d2ce9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hi;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
index da9686e..23111f4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hu;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
index f3f06fb..23cbfb8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hu;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
index 53f0dc8..4746a60 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hu;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java
index 5325870..97376c0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hunspell;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
index e66389b..135584e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hunspell;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -37,6 +37,7 @@ public class HunspellDictionary {
static final HunspellWord NOFLAGS = new HunspellWord();
+ private static final String ALIAS_KEY = "AF";
private static final String PREFIX_KEY = "PFX";
private static final String SUFFIX_KEY = "SFX";
private static final String FLAG_KEY = "FLAG";
@@ -49,6 +50,7 @@ public class HunspellDictionary {
private static final String SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
private static final boolean IGNORE_CASE_DEFAULT = false;
+ private static final boolean STRICT_AFFIX_PARSING_DEFAULT = true;
private CharArrayMap> words;
private CharArrayMap> prefixes;
@@ -59,6 +61,9 @@ public class HunspellDictionary {
private final Version version;
+ private String[] aliases;
+ private int aliasCount = 0;
+
/**
* Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
* and dictionary files
@@ -100,11 +105,27 @@ public class HunspellDictionary {
* @throws ParseException Can be thrown if the content of the files does not meet expected formats
*/
public HunspellDictionary(InputStream affix, List dictionaries, Version version, boolean ignoreCase) throws IOException, ParseException {
+ this(affix, dictionaries, version, ignoreCase, STRICT_AFFIX_PARSING_DEFAULT);
+ }
+
+ /**
+ * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
+ * and dictionary files
+ *
+ * @param affix InputStream for reading the hunspell affix file
+ * @param dictionaries InputStreams for reading the hunspell dictionary file
+ * @param version Lucene Version
+ * @param ignoreCase If true, dictionary matching will be case insensitive
+ * @param strictAffixParsing Affix strict parsing enabled or not (an error while reading a rule causes exception or is ignored)
+ * @throws IOException Can be thrown while reading from the InputStreams
+ * @throws ParseException Can be thrown if the content of the files does not meet expected formats
+ */
+ public HunspellDictionary(InputStream affix, List dictionaries, Version version, boolean ignoreCase, boolean strictAffixParsing) throws IOException, ParseException {
this.version = version;
this.ignoreCase = ignoreCase;
String encoding = getDictionaryEncoding(affix);
CharsetDecoder decoder = getJavaEncoding(encoding);
- readAffixFile(affix, decoder);
+ readAffixFile(affix, decoder, strictAffixParsing);
words = new CharArrayMap>(version, 65535 /* guess */, this.ignoreCase);
for (InputStream dictionary : dictionaries) {
readDictionaryFile(dictionary, decoder);
@@ -154,17 +175,19 @@ public class HunspellDictionary {
* @param decoder CharsetDecoder to decode the content of the file
* @throws IOException Can be thrown while reading from the InputStream
*/
- private void readAffixFile(InputStream affixStream, CharsetDecoder decoder) throws IOException {
+ private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
prefixes = new CharArrayMap>(version, 8, ignoreCase);
suffixes = new CharArrayMap>(version, 8, ignoreCase);
-
- BufferedReader reader = new BufferedReader(new InputStreamReader(affixStream, decoder));
+
+ LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
String line = null;
while ((line = reader.readLine()) != null) {
- if (line.startsWith(PREFIX_KEY)) {
- parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
+ if (line.startsWith(ALIAS_KEY)) {
+ parseAlias(line);
+ } else if (line.startsWith(PREFIX_KEY)) {
+ parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
} else if (line.startsWith(SUFFIX_KEY)) {
- parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
+ parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
} else if (line.startsWith(FLAG_KEY)) {
// Assume that the FLAG line comes before any prefix or suffixes
// Store the strategy so it can be used when parsing the dic file
@@ -186,8 +209,9 @@ public class HunspellDictionary {
*/
private void parseAffix(CharArrayMap> affixes,
String header,
- BufferedReader reader,
- String conditionPattern) throws IOException {
+ LineNumberReader reader,
+ String conditionPattern,
+ boolean strict) throws IOException, ParseException {
String args[] = header.split("\\s+");
boolean crossProduct = args[2].equals("Y");
@@ -197,6 +221,13 @@ public class HunspellDictionary {
String line = reader.readLine();
String ruleArgs[] = line.split("\\s+");
+ if (ruleArgs.length < 5) {
+ if (strict) {
+ throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
+ }
+ continue;
+ }
+
HunspellAffix affix = new HunspellAffix();
affix.setFlag(flagParsingStrategy.parseFlag(ruleArgs[1]));
@@ -206,7 +237,13 @@ public class HunspellDictionary {
int flagSep = affixArg.lastIndexOf('/');
if (flagSep != -1) {
- char appendFlags[] = flagParsingStrategy.parseFlags(affixArg.substring(flagSep + 1));
+ String flagPart = affixArg.substring(flagSep + 1);
+
+ if (aliasCount > 0) {
+ flagPart = getAliasValue(Integer.parseInt(flagPart));
+ }
+
+ char appendFlags[] = flagParsingStrategy.parseFlags(flagPart);
Arrays.sort(appendFlags);
affix.setAppendFlags(appendFlags);
affix.setAppend(affixArg.substring(0, flagSep));
@@ -330,8 +367,12 @@ public class HunspellDictionary {
if (end == -1)
end = line.length();
+ String flagPart = line.substring(flagSep + 1, end);
+ if (aliasCount > 0) {
+ flagPart = getAliasValue(Integer.parseInt(flagPart));
+ }
- wordForm = new HunspellWord(flagParsingStrategy.parseFlags(line.substring(flagSep + 1, end)));
+ wordForm = new HunspellWord(flagParsingStrategy.parseFlags(flagPart));
Arrays.sort(wordForm.getFlags());
entry = line.substring(0, flagSep);
if(ignoreCase) {
@@ -352,6 +393,25 @@ public class HunspellDictionary {
return version;
}
+ private void parseAlias(String line) {
+ String ruleArgs[] = line.split("\\s+");
+ if (aliases == null) {
+ //first line should be the aliases count
+ final int count = Integer.parseInt(ruleArgs[1]);
+ aliases = new String[count];
+ } else {
+ aliases[aliasCount++] = ruleArgs[1];
+ }
+ }
+
+ private String getAliasValue(int id) {
+ try {
+ return aliases[id - 1];
+ } catch (IndexOutOfBoundsException ex) {
+ throw new IllegalArgumentException("Bad flag alias number:" + id, ex);
+ }
+ }
+
/**
* Abstraction of the process of parsing flags taken from the affix and dic files
*/
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
index 57a5f06..9e5e95d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hunspell;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
index e1d0f58..6dd4aa1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hunspell;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java
index 8a0c206..fe216d3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hunspell;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
index 9177122..9e5b306 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.hy;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
index b3861d2..ef38215 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.id;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilter.java
index 517151f..50a4f04 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.id;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
index 0b7308c..7dca443 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.id;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
index de485b0..be4bec5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.in;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
index 2f3c374..bfef661 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.in;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
deleted file mode 100644
index e6ae4e7..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package org.apache.lucene.analysis.in;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizer; // javadocs
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
-
-/**
- * Simple Tokenizer for text in Indian Languages.
- * @deprecated (3.6) Use {@link StandardTokenizer} instead.
- */
-@Deprecated
-public final class IndicTokenizer extends CharTokenizer {
-
- public IndicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
- super(matchVersion, factory, input);
- }
-
- public IndicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
- super(matchVersion, source, input);
- }
-
- public IndicTokenizer(Version matchVersion, Reader input) {
- super(matchVersion, input);
- }
-
- @Override
- protected boolean isTokenChar(int c) {
- return Character.isLetter(c)
- || Character.getType(c) == Character.NON_SPACING_MARK
- || Character.getType(c) == Character.FORMAT
- || Character.getType(c) == Character.COMBINING_SPACING_MARK;
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index f5d2ef9..c6aedf2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.it;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -36,19 +36,9 @@ import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
-import org.tartarus.snowball.ext.ItalianStemmer;
/**
* {@link Analyzer} for Italian.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating ItalianAnalyzer:
- *
- *
As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
- *
As of 3.2, ElisionFilter with a set of Italian
- * contractions is used by default.
- *
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
@@ -139,18 +129,12 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
- if (matchVersion.onOrAfter(Version.LUCENE_32)) {
- result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
- }
+ result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- result = new ItalianLightStemFilter(result);
- } else {
- result = new SnowballFilter(result, new ItalianStemmer());
- }
+ result = new ItalianLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
index af9625c..eae6752 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.it;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
index 50a80bd..f7b5fb9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.it;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
index 1c2ab24..72f02a3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.lv;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java
index b39b39e..0ca8e29 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.lv;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java
index 7d59fc0..7b24926 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemmer.java
@@ -2,7 +2,7 @@ package org.apache.lucene.analysis.lv;
import static org.apache.lucene.analysis.util.StemmerUtil.*;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java
index 74d885e..27b396f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
index a41314e..c3d0a96 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
index ee6c5eb..6b2b2db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,15 +19,13 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
-import java.io.IOException;
-
/**
* An always exhausted token stream.
*/
public final class EmptyTokenStream extends TokenStream {
@Override
- public final boolean incrementToken() throws IOException {
+ public final boolean incrementToken() {
return false;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
index 2c7e765..598d5ae 100755
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
index 935c96f..f3209a8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,9 +17,6 @@
package org.apache.lucene.analysis.miscellaneous;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -43,7 +40,7 @@ public final class KeepWordFilter extends FilteringTokenFilter {
}
@Override
- public boolean accept() throws IOException {
+ public boolean accept() {
return words.contains(termAtt.buffer(), 0, termAtt.length());
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
index 96be252..da4a95d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
index 3f36f2f..984d9f8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,10 +17,7 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -48,7 +45,7 @@ public final class LengthFilter extends FilteringTokenFilter {
}
@Override
- public boolean accept() throws IOException {
+ public boolean accept() {
final int len = termAtt.length();
return (len >= min && len <= max);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
index a52ac5f..eb5b83b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
index 9587f38..54b0b44 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
deleted file mode 100644
index a3d2a11..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
+++ /dev/null
@@ -1,518 +0,0 @@
-package org.apache.lucene.analysis.miscellaneous;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-/**
- * Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
- * {@link java.io.Reader}, that can flexibly separate text into terms via a regular expression {@link Pattern}
- * (with behaviour identical to {@link String#split(String)}),
- * and that combines the functionality of
- * {@link org.apache.lucene.analysis.core.LetterTokenizer},
- * {@link org.apache.lucene.analysis.core.LowerCaseTokenizer},
- * {@link org.apache.lucene.analysis.core.WhitespaceTokenizer},
- * {@link org.apache.lucene.analysis.core.StopFilter} into a single efficient
- * multi-purpose class.
- *
- * If you are unsure how exactly a regular expression should look like, consider
- * prototyping by simply trying various expressions on some test texts via
- * {@link String#split(String)}. Once you are satisfied, give that regex to
- * PatternAnalyzer. Also see Java Regular Expression Tutorial.
- *
- * This class can be considerably faster than the "normal" Lucene tokenizers.
- * It can also serve as a building block in a compound Lucene
- * {@link org.apache.lucene.analysis.TokenFilter} chain. For example as in this
- * stemming example:
- *
- * PatternAnalyzer pat = ...
- * TokenStream tokenStream = new SnowballFilter(
- * pat.tokenStream("content", "James is running round in the woods"),
- * "English"));
- *
- * @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
- */
-@Deprecated
-public final class PatternAnalyzer extends Analyzer {
-
- /** "\\W+"; Divides text at non-letters (NOT Character.isLetter(c)) */
- public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
-
- /** "\\s+"; Divides text at whitespaces (Character.isWhitespace(c)) */
- public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
-
- private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
- CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT,
- Arrays.asList(
- "a", "about", "above", "across", "adj", "after", "afterwards",
- "again", "against", "albeit", "all", "almost", "alone", "along",
- "already", "also", "although", "always", "among", "amongst", "an",
- "and", "another", "any", "anyhow", "anyone", "anything",
- "anywhere", "are", "around", "as", "at", "be", "became", "because",
- "become", "becomes", "becoming", "been", "before", "beforehand",
- "behind", "being", "below", "beside", "besides", "between",
- "beyond", "both", "but", "by", "can", "cannot", "co", "could",
- "down", "during", "each", "eg", "either", "else", "elsewhere",
- "enough", "etc", "even", "ever", "every", "everyone", "everything",
- "everywhere", "except", "few", "first", "for", "former",
- "formerly", "from", "further", "had", "has", "have", "he", "hence",
- "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
- "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
- "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
- "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
- "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
- "must", "my", "myself", "namely", "neither", "never",
- "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
- "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
- "once one", "only", "onto", "or", "other", "others", "otherwise",
- "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
- "rather", "s", "same", "seem", "seemed", "seeming", "seems",
- "several", "she", "should", "since", "so", "some", "somehow",
- "someone", "something", "sometime", "sometimes", "somewhere",
- "still", "such", "t", "than", "that", "the", "their", "them",
- "themselves", "then", "thence", "there", "thereafter", "thereby",
- "therefor", "therein", "thereupon", "these", "they", "this",
- "those", "though", "through", "throughout", "thru", "thus", "to",
- "together", "too", "toward", "towards", "under", "until", "up",
- "upon", "us", "very", "via", "was", "we", "well", "were", "what",
- "whatever", "whatsoever", "when", "whence", "whenever",
- "whensoever", "where", "whereafter", "whereas", "whereat",
- "whereby", "wherefrom", "wherein", "whereinto", "whereof",
- "whereon", "whereto", "whereunto", "whereupon", "wherever",
- "wherewith", "whether", "which", "whichever", "whichsoever",
- "while", "whilst", "whither", "who", "whoever", "whole", "whom",
- "whomever", "whomsoever", "whose", "whosoever", "why", "will",
- "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
- "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
- "yourselves"
- ), true));
-
- /**
- * A lower-casing word analyzer with English stop words (can be shared
- * freely across threads without harm); global per class loader.
- */
- public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
- Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-
- /**
- * A lower-casing word analyzer with extended English stop words
- * (can be shared freely across threads without harm); global per class
- * loader. The stop words are borrowed from
- * http://thomas.loc.gov/home/stopwords.html, see
- * http://thomas.loc.gov/home/all.about.inquery.html
- */
- public static final PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(
- Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
-
- private final Pattern pattern;
- private final boolean toLowerCase;
- private final CharArraySet stopWords;
-
- private final Version matchVersion;
-
- /**
- * Constructs a new instance with the given parameters.
- *
- * @param matchVersion currently does nothing
- * @param pattern
- * a regular expression delimiting tokens
- * @param toLowerCase
- * if true returns tokens after applying
- * String.toLowerCase()
- * @param stopWords
- * if non-null, ignores all tokens that are contained in the
- * given stop set (after previously having applied toLowerCase()
- * if applicable). For example, created via
- * {@link StopFilter#makeStopSet(Version, String[])}and/or
- * {@link org.apache.lucene.analysis.util.WordlistLoader}as in
- * WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")
- * or other stop words
- * lists .
- */
- public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, CharArraySet stopWords) {
- if (pattern == null)
- throw new IllegalArgumentException("pattern must not be null");
-
- if (eqPattern(NON_WORD_PATTERN, pattern)) pattern = NON_WORD_PATTERN;
- else if (eqPattern(WHITESPACE_PATTERN, pattern)) pattern = WHITESPACE_PATTERN;
-
- if (stopWords != null && stopWords.size() == 0) stopWords = null;
-
- this.pattern = pattern;
- this.toLowerCase = toLowerCase;
- this.stopWords = stopWords;
- this.matchVersion = matchVersion;
- }
-
- /**
- * Creates a token stream that tokenizes the given string into token terms
- * (aka words).
- *
- * @param fieldName
- * the name of the field to tokenize (currently ignored).
- * @param reader
- * reader (e.g. charfilter) of the original text. can be null.
- * @param text
- * the string to tokenize
- * @return a new token stream
- */
- public TokenStreamComponents createComponents(String fieldName, Reader reader, String text) {
- // Ideally the Analyzer superclass should have a method with the same signature,
- // with a default impl that simply delegates to the StringReader flavour.
- if (text == null)
- throw new IllegalArgumentException("text must not be null");
-
- if (pattern == NON_WORD_PATTERN) { // fast path
- return new TokenStreamComponents(new FastStringTokenizer(reader, text, true, toLowerCase, stopWords));
- } else if (pattern == WHITESPACE_PATTERN) { // fast path
- return new TokenStreamComponents(new FastStringTokenizer(reader, text, false, toLowerCase, stopWords));
- }
-
- Tokenizer tokenizer = new PatternTokenizer(reader, text, pattern, toLowerCase);
- TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
- return new TokenStreamComponents(tokenizer, result);
- }
-
- /**
- * Creates a token stream that tokenizes all the text in the given Reader;
- * This implementation forwards to tokenStream(String, Reader, String) and is
- * less efficient than tokenStream(String, Reader, String).
- *
- * @param fieldName
- * the name of the field to tokenize (currently ignored).
- * @param reader
- * the reader delivering the text
- * @return a new token stream
- */
- @Override
- public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- try {
- String text = toString(reader);
- return createComponents(fieldName, reader, text);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- /**
- * Indicates whether some other object is "equal to" this one.
- *
- * @param other
- * the reference object with which to compare.
- * @return true if equal, false otherwise
- */
- @Override
- public boolean equals(Object other) {
- if (this == other) return true;
- if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) return false;
- if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) return false;
-
- if (other instanceof PatternAnalyzer) {
- PatternAnalyzer p2 = (PatternAnalyzer) other;
- return
- toLowerCase == p2.toLowerCase &&
- eqPattern(pattern, p2.pattern) &&
- eq(stopWords, p2.stopWords);
- }
- return false;
- }
-
- /**
- * Returns a hash code value for the object.
- *
- * @return the hash code.
- */
- @Override
- public int hashCode() {
- if (this == DEFAULT_ANALYZER) return -1218418418; // fast path
- if (this == EXTENDED_ANALYZER) return 1303507063; // fast path
-
- int h = 1;
- h = 31*h + pattern.pattern().hashCode();
- h = 31*h + pattern.flags();
- h = 31*h + (toLowerCase ? 1231 : 1237);
- h = 31*h + (stopWords != null ? stopWords.hashCode() : 0);
- return h;
- }
-
- /** equality where o1 and/or o2 can be null */
- private static boolean eq(Object o1, Object o2) {
- return (o1 == o2) || (o1 != null ? o1.equals(o2) : false);
- }
-
- /** assumes p1 and p2 are not null */
- private static boolean eqPattern(Pattern p1, Pattern p2) {
- return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().equals(p2.pattern()));
- }
-
- /**
- * Reads until end-of-stream and returns all read chars, finally closes the stream.
- *
- * @param input the input stream
- * @throws IOException if an I/O error occurs while reading the stream
- */
- private static String toString(Reader input) throws IOException {
- if (input instanceof FastStringReader) { // fast path
- return ((FastStringReader) input).getString();
- }
-
- try {
- int len = 256;
- char[] buffer = new char[len];
- char[] output = new char[len];
-
- len = 0;
- int n;
- while ((n = input.read(buffer)) >= 0) {
- if (len + n > output.length) { // grow capacity
- char[] tmp = new char[Math.max(output.length << 1, len + n)];
- System.arraycopy(output, 0, tmp, 0, len);
- System.arraycopy(buffer, 0, tmp, len, n);
- buffer = output; // use larger buffer for future larger bulk reads
- output = tmp;
- } else {
- System.arraycopy(buffer, 0, output, len, n);
- }
- len += n;
- }
-
- return new String(output, 0, len);
- } finally {
- input.close();
- }
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- /**
- * The work horse; performance isn't fantastic, but it's not nearly as bad
- * as one might think - kudos to the Sun regex developers.
- */
- private static final class PatternTokenizer extends Tokenizer {
-
- private final Pattern pattern;
- private String str;
- private final boolean toLowerCase;
- private Matcher matcher;
- private int pos = 0;
- private static final Locale locale = Locale.getDefault();
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
- public PatternTokenizer(Reader input, String str, Pattern pattern, boolean toLowerCase) {
- super(input);
- this.pattern = pattern;
- this.str = str;
- this.matcher = pattern.matcher(str);
- this.toLowerCase = toLowerCase;
- }
-
- @Override
- public final boolean incrementToken() {
- if (matcher == null) return false;
- clearAttributes();
- while (true) { // loop takes care of leading and trailing boundary cases
- int start = pos;
- int end;
- boolean isMatch = matcher.find();
- if (isMatch) {
- end = matcher.start();
- pos = matcher.end();
- } else {
- end = str.length();
- matcher = null; // we're finished
- }
-
- if (start != end) { // non-empty match (header/trailer)
- String text = str.substring(start, end);
- if (toLowerCase) text = text.toLowerCase(locale);
- termAtt.setEmpty().append(text);
- offsetAtt.setOffset(correctOffset(start), correctOffset(end));
- return true;
- }
- if (!isMatch) return false;
- }
- }
-
- @Override
- public final void end() {
- // set final offset
- final int finalOffset = correctOffset(str.length());
- this.offsetAtt.setOffset(finalOffset, finalOffset);
- }
-
- @Override
- public void reset(Reader input) throws IOException {
- super.reset(input);
- this.str = PatternAnalyzer.toString(input);
- this.matcher = pattern.matcher(this.str);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.pos = 0;
- }
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- /**
- * Special-case class for best performance in common cases; this class is
- * otherwise unnecessary.
- */
- private static final class FastStringTokenizer extends Tokenizer {
-
- private String str;
- private int pos;
- private final boolean isLetter;
- private final boolean toLowerCase;
- private final CharArraySet stopWords;
- private static final Locale locale = Locale.getDefault();
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
- public FastStringTokenizer(Reader input, String str, boolean isLetter, boolean toLowerCase, CharArraySet stopWords) {
- super(input);
- this.str = str;
- this.isLetter = isLetter;
- this.toLowerCase = toLowerCase;
- this.stopWords = stopWords;
- }
-
- @Override
- public boolean incrementToken() {
- clearAttributes();
- // cache loop instance vars (performance)
- String s = str;
- int len = s.length();
- int i = pos;
- boolean letter = isLetter;
-
- int start = 0;
- String text;
- do {
- // find beginning of token
- text = null;
- while (i < len && !isTokenChar(s.charAt(i), letter)) {
- i++;
- }
-
- if (i < len) { // found beginning; now find end of token
- start = i;
- while (i < len && isTokenChar(s.charAt(i), letter)) {
- i++;
- }
-
- text = s.substring(start, i);
- if (toLowerCase) text = text.toLowerCase(locale);
-// if (toLowerCase) {
-//// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
-//// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
-// text = s.substring(start, i).toLowerCase();
-//// char[] chars = new char[i-start];
-//// for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
-//// text = new String(chars);
-// } else {
-// text = s.substring(start, i);
-// }
- }
- } while (text != null && isStopWord(text));
-
- pos = i;
- if (text == null)
- {
- return false;
- }
- termAtt.setEmpty().append(text);
- offsetAtt.setOffset(correctOffset(start), correctOffset(i));
- return true;
- }
-
- @Override
- public final void end() {
- // set final offset
- final int finalOffset = str.length();
- this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
- }
-
- private boolean isTokenChar(char c, boolean isLetter) {
- return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c);
- }
-
- private boolean isStopWord(String text) {
- return stopWords != null && stopWords.contains(text);
- }
-
- @Override
- public void reset(Reader input) throws IOException {
- super.reset(input);
- this.str = PatternAnalyzer.toString(input);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.pos = 0;
- }
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- /**
- * A StringReader that exposes it's contained string for fast direct access.
- * Might make sense to generalize this to CharSequence and make it public?
- */
- static final class FastStringReader extends StringReader {
-
- private final String s;
-
- FastStringReader(String s) {
- super(s);
- this.s = s;
- }
-
- String getString() {
- return s;
- }
- }
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
index 514f211..448acc5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
index f8004a6..48dd355 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -49,14 +49,14 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
}
public Token updateInputToken(Token inputToken, Token lastPrefixToken) {
- inputToken.setStartOffset(lastPrefixToken.endOffset() + inputToken.startOffset());
- inputToken.setEndOffset(lastPrefixToken.endOffset() + inputToken.endOffset());
+ inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(),
+ lastPrefixToken.endOffset() + inputToken.endOffset());
return inputToken;
}
public Token updateSuffixToken(Token suffixToken, Token lastInputToken) {
- suffixToken.setStartOffset(lastInputToken.endOffset() + suffixToken.startOffset());
- suffixToken.setEndOffset(lastInputToken.endOffset() + suffixToken.endOffset());
+ suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(),
+ lastInputToken.endOffset() + suffixToken.endOffset());
return suffixToken;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
index 603e6e1..7ec6dd2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@@ -93,7 +93,7 @@ public class PrefixAwareTokenFilter extends TokenStream {
} else {
previousPrefixToken.reinit(nextToken);
// Make it a deep copy
- Payload p = previousPrefixToken.getPayload();
+ BytesRef p = previousPrefixToken.getPayload();
if (p != null) {
previousPrefixToken.setPayload(p.clone());
}
@@ -153,8 +153,8 @@ public class PrefixAwareTokenFilter extends TokenStream {
* @return consumer token
*/
public Token updateSuffixToken(Token suffixToken, Token lastPrefixToken) {
- suffixToken.setStartOffset(lastPrefixToken.endOffset() + suffixToken.startOffset());
- suffixToken.setEndOffset(lastPrefixToken.endOffset() + suffixToken.endOffset());
+ suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(),
+ lastPrefixToken.endOffset() + suffixToken.endOffset());
return suffixToken;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
index cdaba5b..ac77981 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -35,7 +35,7 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
// use a fixed version, as we don't care about case sensitivity.
- private final CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+ private final CharArraySet previous = new CharArraySet(Version.LUCENE_50, 8, false);
/**
* Creates a new RemoveDuplicatesTokenFilter
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
index 5a0e09a..0bdc357 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
@@ -46,7 +44,7 @@ public final class SingleTokenTokenStream extends TokenStream {
}
@Override
- public final boolean incrementToken() throws IOException {
+ public final boolean incrementToken() {
if (exhausted) {
return false;
} else {
@@ -58,7 +56,7 @@ public final class SingleTokenTokenStream extends TokenStream {
}
@Override
- public void reset() throws IOException {
+ public void reset() {
exhausted = false;
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
index 1125036..b44ac8f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
index d152846..73d20f6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
index 48448c9..41a3f80 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java
index 3bcdceb..b5c242b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index f43a6d7..94ee613 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ngram;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
index f647a17..b5163ae 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ngram;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index ae87350..ebb6e12 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ngram;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index 574eeec..d4b21e6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ngram;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index 1780c54..a9a5254 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.nl;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -47,20 +47,6 @@ import java.io.Reader;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
*
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating DutchAnalyzer:
- *
- *
As of 3.6, {@link #DutchAnalyzer(Version, CharArraySet)} and
- * {@link #DutchAnalyzer(Version, CharArraySet, CharArraySet)} also populate
- * the default entries for the stem override dictionary
- *
As of 3.1, Snowball stemming is done with SnowballFilter,
- * LowerCaseFilter is used prior to StopFilter, and Snowball
- * stopwords are used by default.
- *
As of 2.9, StopFilter preserves position
- * increments
- *
*
*
NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.
@@ -119,26 +105,15 @@ public final class DutchAnalyzer extends Analyzer {
*
*/
public DutchAnalyzer(Version matchVersion) {
- // historically, only this ctor populated the stem dict!!!!!
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords){
- // historically, this ctor never the stem dict!!!!!
- // so we populate it only for >= 3.6
- this(matchVersion, stopwords, CharArraySet.EMPTY_SET,
- matchVersion.onOrAfter(Version.LUCENE_36)
- ? DefaultSetHolder.DEFAULT_STEM_DICT
- : CharArrayMap.emptyMap());
+ this(matchVersion, stopwords, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
}
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable){
- // historically, this ctor never the stem dict!!!!!
- // so we populate it only for >= 3.6
- this(matchVersion, stopwords, stemExclusionTable,
- matchVersion.onOrAfter(Version.LUCENE_36)
- ? DefaultSetHolder.DEFAULT_STEM_DICT
- : CharArrayMap.emptyMap());
+ this(matchVersion, stopwords, stemExclusionTable, DefaultSetHolder.DEFAULT_STEM_DICT);
}
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap stemOverrideDict) {
@@ -160,25 +135,15 @@ public final class DutchAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader aReader) {
- if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stoptable);
- if (!excltable.isEmpty())
- result = new KeywordMarkerFilter(result, excltable);
- if (!stemdict.isEmpty())
- result = new StemmerOverrideFilter(matchVersion, result, stemdict);
- result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
- return new TokenStreamComponents(source, result);
- } else {
- final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, stoptable);
- if (!excltable.isEmpty())
- result = new KeywordMarkerFilter(result, excltable);
- result = new DutchStemFilter(result, stemdict);
- return new TokenStreamComponents(source, result);
- }
+ final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+ TokenStream result = new StandardFilter(matchVersion, source);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stoptable);
+ if (!excltable.isEmpty())
+ result = new KeywordMarkerFilter(result, excltable);
+ if (!stemdict.isEmpty())
+ result = new StemmerOverrideFilter(matchVersion, result, stemdict);
+ result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
+ return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
deleted file mode 100644
index 252ce9e..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
+++ /dev/null
@@ -1,108 +0,0 @@
-package org.apache.lucene.analysis.nl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * A {@link TokenFilter} that stems Dutch words.
- *
- * It supports a table of words that should
- * not be stemmed at all. The stemmer used can be changed at runtime after the
- * filter object is created (as long as it is a {@link DutchStemmer}).
- *
- *
- * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
- * the {@link KeywordAttribute} before this {@link TokenStream}.
- *
- * @see KeywordMarkerFilter
- * @deprecated (3.1) Use {@link SnowballFilter} with
- * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
- * same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public final class DutchStemFilter extends TokenFilter {
- /**
- * The actual token in the input stream.
- */
- private DutchStemmer stemmer = new DutchStemmer();
-
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
-
- public DutchStemFilter(TokenStream _in) {
- super(_in);
- }
-
- /**
- * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
- */
- public DutchStemFilter(TokenStream _in, Map,?> stemdictionary) {
- this(_in);
- stemmer.setStemDictionary(stemdictionary);
- }
-
- /**
- * Returns the next token in the stream, or null at EOS
- */
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- final String term = termAtt.toString();
-
- // Check the exclusion table.
- if (!keywordAttr.isKeyword()) {
- final String s = stemmer.stem(term);
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals(term))
- termAtt.setEmpty().append(s);
- }
- return true;
- } else {
- return false;
- }
- }
-
- /**
- * Set a alternative/custom {@link DutchStemmer} for this filter.
- */
- public void setStemmer(DutchStemmer stemmer) {
- if (stemmer != null) {
- this.stemmer = stemmer;
- }
- }
-
- /**
- * Set dictionary for stemming, this dictionary overrules the algorithm,
- * so you can correct for a particular unwanted word-stem pair.
- */
- public void setStemDictionary(HashMap,?> dict) {
- if (stemmer != null)
- stemmer.setStemDictionary(dict);
- }
-}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
deleted file mode 100644
index d146fe6..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
+++ /dev/null
@@ -1,409 +0,0 @@
-package org.apache.lucene.analysis.nl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Map;
-
-/**
- * A stemmer for Dutch words.
- *
- * The algorithm is an implementation of
- * the dutch stemming
- * algorithm in Martin Porter's snowball project.
- *
- * @deprecated (3.1) Use {@link org.tartarus.snowball.ext.DutchStemmer} instead,
- * which has the same functionality. This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public class DutchStemmer {
- /**
- * Buffer for the terms while stemming them.
- */
- private StringBuilder sb = new StringBuilder();
- private boolean _removedE;
- private Map _stemDict;
-
- private int _R1;
- private int _R2;
-
- //TODO convert to internal
- /*
- * Stems the given term to an unique discriminator.
- *
- * @param term The term that should be stemmed.
- * @return Discriminator for term
- */
- public String stem(String term) {
- term = term.toLowerCase();
- if (!isStemmable(term))
- return term;
- if (_stemDict != null && _stemDict.containsKey(term))
- if (_stemDict.get(term) instanceof String)
- return (String) _stemDict.get(term);
- else
- return null;
-
- // Reset the StringBuilder.
- sb.delete(0, sb.length());
- sb.insert(0, term);
- // Stemming starts here...
- substitute(sb);
- storeYandI(sb);
- _R1 = getRIndex(sb, 0);
- _R1 = Math.max(3, _R1);
- step1(sb);
- step2(sb);
- _R2 = getRIndex(sb, _R1);
- step3a(sb);
- step3b(sb);
- step4(sb);
- reStoreYandI(sb);
- return sb.toString();
- }
-
- private boolean enEnding(StringBuilder sb) {
- String[] enend = new String[]{"ene", "en"};
- for (int i = 0; i < enend.length; i++) {
- String end = enend[i];
- String s = sb.toString();
- int index = s.length() - end.length();
- if (s.endsWith(end) &&
- index >= _R1 &&
- isValidEnEnding(sb, index - 1)
- ) {
- sb.delete(index, index + end.length());
- unDouble(sb, index);
- return true;
- }
- }
- return false;
- }
-
-
- private void step1(StringBuilder sb) {
- if (_R1 >= sb.length())
- return;
-
- String s = sb.toString();
- int lengthR1 = sb.length() - _R1;
- int index;
-
- if (s.endsWith("heden")) {
- sb.replace(_R1, lengthR1 + _R1, sb.substring(_R1, lengthR1 + _R1).replaceAll("heden", "heid"));
- return;
- }
-
- if (enEnding(sb))
- return;
-
- if (s.endsWith("se") &&
- (index = s.length() - 2) >= _R1 &&
- isValidSEnding(sb, index - 1)
- ) {
- sb.delete(index, index + 2);
- return;
- }
- if (s.endsWith("s") &&
- (index = s.length() - 1) >= _R1 &&
- isValidSEnding(sb, index - 1)) {
- sb.delete(index, index + 1);
- }
- }
-
- /**
- * Delete suffix e if in R1 and
- * preceded by a non-vowel, and then undouble the ending
- *
- * @param sb String being stemmed
- */
- private void step2(StringBuilder sb) {
- _removedE = false;
- if (_R1 >= sb.length())
- return;
- String s = sb.toString();
- int index = s.length() - 1;
- if (index >= _R1 &&
- s.endsWith("e") &&
- !isVowel(sb.charAt(index - 1))) {
- sb.delete(index, index + 1);
- unDouble(sb);
- _removedE = true;
- }
- }
-
- /**
- * Delete "heid"
- *
- * @param sb String being stemmed
- */
- private void step3a(StringBuilder sb) {
- if (_R2 >= sb.length())
- return;
- String s = sb.toString();
- int index = s.length() - 4;
- if (s.endsWith("heid") && index >= _R2 && sb.charAt(index - 1) != 'c') {
- sb.delete(index, index + 4); //remove heid
- enEnding(sb);
- }
- }
-
- /**
- *
A d-suffix, or derivational suffix, enables a new word,
- * often with a different grammatical category, or with a different
- * sense, to be built from another word. Whether a d-suffix can be
- * attached is discovered not from the rules of grammar, but by
- * referring to a dictionary. So in English, ness can be added to
- * certain adjectives to form corresponding nouns (littleness,
- * kindness, foolishness ...) but not to all adjectives
- * (not for example, to big, cruel, wise ...) d-suffixes can be
- * used to change meaning, often in rather exotic ways.
You must specify the required {@link Version}
- * compatibility when creating PortugueseAnalyzer:
- *
- *
As of 3.6, PortugueseLightStemFilter is used for less aggressive stemming.
- *
*/
public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
@@ -132,11 +124,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- result = new PortugueseLightStemFilter(result);
- } else {
- result = new SnowballFilter(result, new PortugueseStemmer());
- }
+ result = new PortugueseLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
index 81d268a..9e95a3b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
index 367cb71..aa60f5d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
index aa5a371..44be026 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
index 441d236..b1796a0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
index c761abd..58addf0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
index de0e497..a03e7d1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
index 2fb7a1f..629b08e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.pt;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -134,7 +134,7 @@ public abstract class RSLPStemmerBase {
if (!exceptions[i].endsWith(suffix))
System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
}
- this.exceptions = new CharArraySet(Version.LUCENE_31,
+ this.exceptions = new CharArraySet(Version.LUCENE_50,
Arrays.asList(exceptions), false);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
index 8b70d12..d4c7103 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@@ -1,5 +1,5 @@
package org.apache.lucene.analysis.query;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
index bcf7545..e729786 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -31,14 +31,6 @@ import java.io.IOException;
* that character. For example, with a marker of \u0001, "country" =>
* "\u0001yrtnuoc". This is useful when implementing efficient leading
* wildcards search.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating ReverseStringFilter, or when using any of
- * its static methods:
- *
- *
As of 3.1, supplementary characters are handled correctly
- *
*/
public final class ReverseStringFilter extends TokenFilter {
@@ -74,7 +66,7 @@ public final class ReverseStringFilter extends TokenFilter {
* The reversed tokens will not be marked.
*
*
- * @param matchVersion See above
+ * @param matchVersion Lucene compatibility version
* @param in {@link TokenStream} to filter
*/
public ReverseStringFilter(Version matchVersion, TokenStream in) {
@@ -89,7 +81,7 @@ public final class ReverseStringFilter extends TokenFilter {
* character.
*
*
- * @param matchVersion See above
+ * @param matchVersion compatibility version
* @param in {@link TokenStream} to filter
* @param marker A character used to mark reversed tokens
*/
@@ -119,7 +111,7 @@ public final class ReverseStringFilter extends TokenFilter {
/**
* Reverses the given input string
*
- * @param matchVersion See above
+ * @param matchVersion compatibility version
* @param input the string to reverse
* @return the given input string in reversed order
*/
@@ -131,7 +123,7 @@ public final class ReverseStringFilter extends TokenFilter {
/**
* Reverses the given input buffer in-place
- * @param matchVersion See above
+ * @param matchVersion compatibility version
* @param buffer the input char array to reverse
*/
public static void reverse(Version matchVersion, final char[] buffer) {
@@ -141,7 +133,7 @@ public final class ReverseStringFilter extends TokenFilter {
/**
* Partially reverses the given input buffer in-place from offset 0
* up to the given length.
- * @param matchVersion See above
+ * @param matchVersion compatibility version
* @param buffer the input char array to reverse
* @param len the length in the buffer up to where the
* buffer should be reversed
@@ -152,23 +144,9 @@ public final class ReverseStringFilter extends TokenFilter {
}
/**
- * @deprecated (3.1) Remove this when support for 3.0 indexes is no longer needed.
- */
- @Deprecated
- private static void reverseUnicode3( char[] buffer, int start, int len ){
- if( len <= 1 ) return;
- int num = len>>1;
- for( int i = start; i < ( start + num ); i++ ){
- char c = buffer[i];
- buffer[i] = buffer[start * 2 + len - i - 1];
- buffer[start * 2 + len - i - 1] = c;
- }
- }
-
- /**
* Partially reverses the given input buffer in-place from the given offset
* up to the given length.
- * @param matchVersion See above
+ * @param matchVersion compatibility version
* @param buffer the input char array to reverse
* @param start the offset from where to reverse the buffer
* @param len the length in the buffer up to where the
@@ -176,10 +154,6 @@ public final class ReverseStringFilter extends TokenFilter {
*/
public static void reverse(Version matchVersion, final char[] buffer,
final int start, final int len) {
- if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
- reverseUnicode3(buffer, start, len);
- return;
- }
/* modified version of Apache Harmony AbstractStringBuilder reverse0() */
if (len < 2)
return;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index 36c7e3d..c983371 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ro;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index 0aa1309..0e3b42a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ru;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ru;
import java.io.IOException;
import java.io.Reader;
-import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -42,44 +41,13 @@ import org.apache.lucene.util.Version;
* Supports an external list of stopwords (words that
* will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating RussianAnalyzer:
- *
- *
As of 3.1, StandardTokenizer is used, Snowball stemming is done with
- * SnowballFilter, and Snowball stopwords are used by default.
- *
*/
-public final class RussianAnalyzer extends StopwordAnalyzerBase
-{
- /**
- * List of typical Russian stopwords. (for backwards compatibility)
- * @deprecated (3.1) Remove this for LUCENE 5.0
- */
- @Deprecated
- private static final String[] RUSSIAN_STOP_WORDS_30 = {
- "а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
- "вам", "ваÑ", "веÑÑŒ", "во", "вот", "вÑе", "вÑего", "вÑех", "вы", "где",
- "да", "даже", "длÑ", "до", "его", "ее", "ей", "ею", "еÑли", "еÑть",
- "еще", "же", "за", "здеÑÑŒ", "и", "из", "или", "им", "их", "к", "как",
- "ко", "когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо",
- "наш", "не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об",
- "однако", "он", "она", "они", "оно", "от", "очень", "по", "под", "при",
- "Ñ", "Ñо", "так", "также", "такой", "там", "те", "тем", "то", "того",
- "тоже", "той", "только", "том", "ты", "у", "уже", "хотÑ", "чего", "чей",
- "чем", "что", "чтобы", "чье", "чьÑ", "Ñта", "Ñти", "Ñто", "Ñ"
- };
+public final class RussianAnalyzer extends StopwordAnalyzerBase {
/** File containing default Russian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "russian_stop.txt";
private static class DefaultSetHolder {
- /** @deprecated (3.1) remove this for Lucene 5.0 */
- @Deprecated
- static final CharArraySet DEFAULT_STOP_SET_30 = CharArraySet
- .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT,
- Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
static final CharArraySet DEFAULT_STOP_SET;
static {
@@ -106,9 +74,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
}
public RussianAnalyzer(Version matchVersion) {
- this(matchVersion,
- matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
- : DefaultSetHolder.DEFAULT_STOP_SET_30);
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
@@ -151,23 +117,13 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, source);
- result = new LowerCaseFilter(matchVersion, result);
- result = new StopFilter(matchVersion, result, stopwords);
- if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
- result, stemExclusionSet);
- result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
- return new TokenStreamComponents(source, result);
- } else {
- final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
- TokenStream result = new LowerCaseFilter(matchVersion, source);
- result = new StopFilter(matchVersion, result, stopwords);
- if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
- result, stemExclusionSet);
- result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
- return new TokenStreamComponents(source, result);
- }
+ final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+ TokenStream result = new StandardFilter(matchVersion, source);
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new StopFilter(matchVersion, result, stopwords);
+ if (!stemExclusionSet.isEmpty())
+ result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+ return new TokenStreamComponents(source, result);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
deleted file mode 100644
index 088b802..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ /dev/null
@@ -1,97 +0,0 @@
-package org.apache.lucene.analysis.ru;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import org.apache.lucene.analysis.Tokenizer; // for javadocs
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Version;
-
-/**
- * A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer}
- * by also allowing the basic Latin digits 0-9.
- *
- *
- * You must specify the required {@link Version} compatibility when creating
- * {@link RussianLetterTokenizer}:
- *
- *
As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
- * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
- * {@link CharTokenizer#normalize(int)} for details.
- *
- * @deprecated (3.1) Use {@link StandardTokenizer} instead, which has the same functionality.
- * This filter will be removed in Lucene 5.0
- */
-@Deprecated
-public class RussianLetterTokenizer extends CharTokenizer
-{
- private static final int DIGIT_0 = '0';
- private static final int DIGIT_9 = '9';
-
- /**
- * Construct a new RussianLetterTokenizer. * @param matchVersion Lucene version
- * to match See {@link above}
- *
- * @param in
- * the input to split up into tokens
- */
- public RussianLetterTokenizer(Version matchVersion, Reader in) {
- super(matchVersion, in);
- }
-
- /**
- * Construct a new RussianLetterTokenizer using a given {@link AttributeSource}.
- *
- * @param matchVersion
- * Lucene version to match See {@link above}
- * @param source
- * the attribute source to use for this {@link Tokenizer}
- * @param in
- * the input to split up into tokens
- */
- public RussianLetterTokenizer(Version matchVersion, AttributeSource source, Reader in) {
- super(matchVersion, source, in);
- }
-
- /**
- * Construct a new RussianLetterTokenizer using a given
- * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * @param
- * matchVersion Lucene version to match See
- * {@link above}
- *
- * @param factory
- * the attribute factory to use for this {@link Tokenizer}
- * @param in
- * the input to split up into tokens
- */
- public RussianLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
- super(matchVersion, factory, in);
- }
-
- /**
- * Collects only characters which satisfy
- * {@link Character#isLetter(int)}.
- */
- @Override
- protected boolean isTokenChar(int c) {
- return Character.isLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
index 826b22d..36c8e0d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ru;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
index e58bf38..b255473 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.ru;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
index 2e41a14..8c75832 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.shingle;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
index d0b8e05..b14338a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.shingle;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
index 948f9b9..efa4ec4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sinks;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
index c5aff60..418fe7a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sinks;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -212,7 +212,7 @@ public final class TeeSinkTokenFilter extends TokenFilter {
}
@Override
- public final boolean incrementToken() throws IOException {
+ public final boolean incrementToken() {
// lazy init the iterator
if (it == null) {
it = cachedStates.iterator();
@@ -228,7 +228,7 @@ public final class TeeSinkTokenFilter extends TokenFilter {
}
@Override
- public final void end() throws IOException {
+ public final void end() {
if (finalState != null) {
restoreState(finalState);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java
index b5488a8..b10bd78 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sinks;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
deleted file mode 100644
index d9c624d..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package org.apache.lucene.analysis.snowball;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
-import org.apache.lucene.analysis.standard.*;
-import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
-import java.io.Reader;
-
-/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
- *
- * Available stemmers are listed in org.tartarus.snowball.ext. The name of a
- * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
- * {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
- *
- *
NOTE: This class uses the same {@link Version}
- * dependent settings as {@link StandardAnalyzer}, with the following addition:
- *
- *
As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
- *
- *
- * @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead.
- * This analyzer will be removed in Lucene 5.0
- */
-@Deprecated
-public final class SnowballAnalyzer extends Analyzer {
- private String name;
- private CharArraySet stopSet;
- private final Version matchVersion;
-
- /** Builds the named analyzer with no stop words. */
- public SnowballAnalyzer(Version matchVersion, String name) {
- this.name = name;
- this.matchVersion = matchVersion;
- }
-
- /** Builds the named analyzer with the given stop words. */
- public SnowballAnalyzer(Version matchVersion, String name, CharArraySet stopWords) {
- this(matchVersion, name);
- stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
- stopWords));
- }
-
- /** Constructs a {@link StandardTokenizer} filtered by a {@link
- StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
- and a {@link SnowballFilter} */
- @Override
- public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
- TokenStream result = new StandardFilter(matchVersion, tokenizer);
- // remove the possessive 's for english stemmers
- if (matchVersion.onOrAfter(Version.LUCENE_31) &&
- (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
- result = new EnglishPossessiveFilter(result);
- // Use a special lowercase filter for turkish, the stemmer expects it.
- if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
- result = new TurkishLowerCaseFilter(result);
- else
- result = new LowerCaseFilter(matchVersion, result);
- if (stopSet != null)
- result = new StopFilter(matchVersion,
- result, stopSet);
- result = new SnowballFilter(result, name);
- return new TokenStreamComponents(tokenizer, result);
- }
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
index 7a2639e..4231f68 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.snowball;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index 5115a5e..9901aef 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
index 48872ac..054b9c4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
index 9750e27..3576c83 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
index 32e990d..f7bc82a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@@ -2,7 +2,7 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index 13d15f1..fdec88a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
index 8771466..809f965 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -21,61 +21,19 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
/**
* Normalizes tokens extracted with {@link StandardTokenizer}.
*/
public class StandardFilter extends TokenFilter {
- private final Version matchVersion;
public StandardFilter(Version matchVersion, TokenStream in) {
super(in);
- this.matchVersion = matchVersion;
}
- private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
- private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
-
- // this filters uses attribute type
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
@Override
public final boolean incrementToken() throws IOException {
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- return input.incrementToken(); // TODO: add some niceties for the new grammar
- else
- return incrementTokenClassic();
- }
-
- public final boolean incrementTokenClassic() throws IOException {
- if (!input.incrementToken()) {
- return false;
- }
-
- final char[] buffer = termAtt.buffer();
- final int bufferLength = termAtt.length();
- final String type = typeAtt.type();
-
- if (type == APOSTROPHE_TYPE && // remove 's
- bufferLength >= 2 &&
- buffer[bufferLength-2] == '\'' &&
- (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
- // Strip last 2 characters off
- termAtt.setLength(bufferLength - 2);
- } else if (type == ACRONYM_TYPE) { // remove dots
- int upto = 0;
- for(int i=0;i
- * As of Lucene version 3.1, this class implements the Word Break rules from the
+ * This class implements the Word Break rules from the
* Unicode Text Segmentation algorithm, as specified in
* Unicode Standard Annex #29.
*
*
Many applications have specific tokenizer needs. If this tokenizer does
* not suit your application, please consider copying this source code
* directory to your project and maintaining your own grammar-based tokenizer.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating StandardTokenizer:
- *
- *
As of 3.4, Hiragana and Han characters are no longer wrongly split
- * from their combining characters. If you use a previous version number,
- * you get the exact broken behavior for backwards compatibility.
- *
As of 3.1, StandardTokenizer implements Unicode text segmentation.
- * If you use a previous version number, you get the exact behavior of
- * {@link ClassicTokenizer} for backwards compatibility.
- *
*/
public final class StandardTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */
private StandardTokenizerInterface scanner;
+ // TODO: how can we remove these old types?!
public static final int ALPHANUM = 0;
/** @deprecated (3.1) */
@Deprecated
@@ -146,13 +134,7 @@ public final class StandardTokenizer extends Tokenizer {
}
private final void init(Version matchVersion) {
- if (matchVersion.onOrAfter(Version.LUCENE_34)) {
- this.scanner = new StandardTokenizerImpl(input);
- } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- this.scanner = new StandardTokenizerImpl31(input);
- } else {
- this.scanner = new ClassicTokenizerImpl(input);
- }
+ this.scanner = new StandardTokenizerImpl(input);
}
// this tokenizer generates three attributes:
@@ -184,15 +166,7 @@ public final class StandardTokenizer extends Tokenizer {
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
- // This 'if' should be removed in the next release. For now, it converts
- // invalid acronyms to HOST. When removed, only the 'else' part should
- // remain.
- if (tokenType == StandardTokenizer.ACRONYM_DEP) {
- typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
- termAtt.setLength(termAtt.length() - 1); // remove extra '.'
- } else {
- typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
- }
+ typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
return true;
} else
// When we skip a too-long term, we still increment the
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
index d44467b..ba0737e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@@ -2,7 +2,7 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
index a3a17c3..26ef499 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index e7e7951..8b8f494 100755
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
index 15432c8..66b4f8a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -23,8 +23,6 @@ import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
-import org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -50,14 +48,6 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
*
<IDEOGRAPHIC>: A single CJKV ideographic character
*
<HIRAGANA>: A single hiragana character
*
- *
- *
You must specify the required {@link Version}
- * compatibility when creating UAX29URLEmailTokenizer:
- *
- *
As of 3.4, Hiragana and Han characters are no longer wrongly split
- * from their combining characters. If you use a previous version number,
- * you get the exact broken behavior for backwards compatibility.
- *
*/
public final class UAX29URLEmailTokenizer extends Tokenizer {
@@ -128,13 +118,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
}
private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
- if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- return new UAX29URLEmailTokenizerImpl(input);
- } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
- return new UAX29URLEmailTokenizerImpl34(input);
- } else {
- return new UAX29URLEmailTokenizerImpl31(input);
- }
+ return new UAX29URLEmailTokenizerImpl(input);
}
// this tokenizer generates three attributes:
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
index 340b84c..030b9fa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@@ -2,7 +2,7 @@
package org.apache.lucene.analysis.standard;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
deleted file mode 100644
index ed8a0ab..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright 2001-2005 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated from IANA Root Zone Database
-// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
-// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
-// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
-
-ASCIITLD = "." (
- [aA][cC]
- | [aA][dD]
- | [aA][eE]
- | [aA][eE][rR][oO]
- | [aA][fF]
- | [aA][gG]
- | [aA][iI]
- | [aA][lL]
- | [aA][mM]
- | [aA][nN]
- | [aA][oO]
- | [aA][qQ]
- | [aA][rR]
- | [aA][rR][pP][aA]
- | [aA][sS]
- | [aA][sS][iI][aA]
- | [aA][tT]
- | [aA][uU]
- | [aA][wW]
- | [aA][xX]
- | [aA][zZ]
- | [bB][aA]
- | [bB][bB]
- | [bB][dD]
- | [bB][eE]
- | [bB][fF]
- | [bB][gG]
- | [bB][hH]
- | [bB][iI]
- | [bB][iI][zZ]
- | [bB][jJ]
- | [bB][mM]
- | [bB][nN]
- | [bB][oO]
- | [bB][rR]
- | [bB][sS]
- | [bB][tT]
- | [bB][vV]
- | [bB][wW]
- | [bB][yY]
- | [bB][zZ]
- | [cC][aA]
- | [cC][aA][tT]
- | [cC][cC]
- | [cC][dD]
- | [cC][fF]
- | [cC][gG]
- | [cC][hH]
- | [cC][iI]
- | [cC][kK]
- | [cC][lL]
- | [cC][mM]
- | [cC][nN]
- | [cC][oO]
- | [cC][oO][mM]
- | [cC][oO][oO][pP]
- | [cC][rR]
- | [cC][uU]
- | [cC][vV]
- | [cC][xX]
- | [cC][yY]
- | [cC][zZ]
- | [dD][eE]
- | [dD][jJ]
- | [dD][kK]
- | [dD][mM]
- | [dD][oO]
- | [dD][zZ]
- | [eE][cC]
- | [eE][dD][uU]
- | [eE][eE]
- | [eE][gG]
- | [eE][rR]
- | [eE][sS]
- | [eE][tT]
- | [eE][uU]
- | [fF][iI]
- | [fF][jJ]
- | [fF][kK]
- | [fF][mM]
- | [fF][oO]
- | [fF][rR]
- | [gG][aA]
- | [gG][bB]
- | [gG][dD]
- | [gG][eE]
- | [gG][fF]
- | [gG][gG]
- | [gG][hH]
- | [gG][iI]
- | [gG][lL]
- | [gG][mM]
- | [gG][nN]
- | [gG][oO][vV]
- | [gG][pP]
- | [gG][qQ]
- | [gG][rR]
- | [gG][sS]
- | [gG][tT]
- | [gG][uU]
- | [gG][wW]
- | [gG][yY]
- | [hH][kK]
- | [hH][mM]
- | [hH][nN]
- | [hH][rR]
- | [hH][tT]
- | [hH][uU]
- | [iI][dD]
- | [iI][eE]
- | [iI][lL]
- | [iI][mM]
- | [iI][nN]
- | [iI][nN][fF][oO]
- | [iI][nN][tT]
- | [iI][oO]
- | [iI][qQ]
- | [iI][rR]
- | [iI][sS]
- | [iI][tT]
- | [jJ][eE]
- | [jJ][mM]
- | [jJ][oO]
- | [jJ][oO][bB][sS]
- | [jJ][pP]
- | [kK][eE]
- | [kK][gG]
- | [kK][hH]
- | [kK][iI]
- | [kK][mM]
- | [kK][nN]
- | [kK][pP]
- | [kK][rR]
- | [kK][wW]
- | [kK][yY]
- | [kK][zZ]
- | [lL][aA]
- | [lL][bB]
- | [lL][cC]
- | [lL][iI]
- | [lL][kK]
- | [lL][rR]
- | [lL][sS]
- | [lL][tT]
- | [lL][uU]
- | [lL][vV]
- | [lL][yY]
- | [mM][aA]
- | [mM][cC]
- | [mM][dD]
- | [mM][eE]
- | [mM][gG]
- | [mM][hH]
- | [mM][iI][lL]
- | [mM][kK]
- | [mM][lL]
- | [mM][mM]
- | [mM][nN]
- | [mM][oO]
- | [mM][oO][bB][iI]
- | [mM][pP]
- | [mM][qQ]
- | [mM][rR]
- | [mM][sS]
- | [mM][tT]
- | [mM][uU]
- | [mM][uU][sS][eE][uU][mM]
- | [mM][vV]
- | [mM][wW]
- | [mM][xX]
- | [mM][yY]
- | [mM][zZ]
- | [nN][aA]
- | [nN][aA][mM][eE]
- | [nN][cC]
- | [nN][eE]
- | [nN][eE][tT]
- | [nN][fF]
- | [nN][gG]
- | [nN][iI]
- | [nN][lL]
- | [nN][oO]
- | [nN][pP]
- | [nN][rR]
- | [nN][uU]
- | [nN][zZ]
- | [oO][mM]
- | [oO][rR][gG]
- | [pP][aA]
- | [pP][eE]
- | [pP][fF]
- | [pP][gG]
- | [pP][hH]
- | [pP][kK]
- | [pP][lL]
- | [pP][mM]
- | [pP][nN]
- | [pP][rR]
- | [pP][rR][oO]
- | [pP][sS]
- | [pP][tT]
- | [pP][wW]
- | [pP][yY]
- | [qQ][aA]
- | [rR][eE]
- | [rR][oO]
- | [rR][sS]
- | [rR][uU]
- | [rR][wW]
- | [sS][aA]
- | [sS][bB]
- | [sS][cC]
- | [sS][dD]
- | [sS][eE]
- | [sS][gG]
- | [sS][hH]
- | [sS][iI]
- | [sS][jJ]
- | [sS][kK]
- | [sS][lL]
- | [sS][mM]
- | [sS][nN]
- | [sS][oO]
- | [sS][rR]
- | [sS][tT]
- | [sS][uU]
- | [sS][vV]
- | [sS][yY]
- | [sS][zZ]
- | [tT][cC]
- | [tT][dD]
- | [tT][eE][lL]
- | [tT][fF]
- | [tT][gG]
- | [tT][hH]
- | [tT][jJ]
- | [tT][kK]
- | [tT][lL]
- | [tT][mM]
- | [tT][nN]
- | [tT][oO]
- | [tT][pP]
- | [tT][rR]
- | [tT][rR][aA][vV][eE][lL]
- | [tT][tT]
- | [tT][vV]
- | [tT][wW]
- | [tT][zZ]
- | [uU][aA]
- | [uU][gG]
- | [uU][kK]
- | [uU][sS]
- | [uU][yY]
- | [uU][zZ]
- | [vV][aA]
- | [vV][cC]
- | [vV][eE]
- | [vV][gG]
- | [vV][iI]
- | [vV][nN]
- | [vV][uU]
- | [wW][fF]
- | [wW][sS]
- | [xX][nN]--0[zZ][wW][mM]56[dD]
- | [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
- | [xX][nN]--3[eE]0[bB]707[eE]
- | [xX][nN]--45[bB][rR][jJ]9[cC]
- | [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
- | [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
- | [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
- | [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
- | [xX][nN]--[fF][iI][qQ][sS]8[sS]
- | [xX][nN]--[fF][iI][qQ][zZ]9[sS]
- | [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
- | [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
- | [xX][nN]--[gG]6[wW]251[dD]
- | [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
- | [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
- | [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
- | [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
- | [xX][nN]--[jJ]6[wW]193[gG]
- | [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
- | [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
- | [xX][nN]--[kK][pP][rR][wW]13[dD]
- | [xX][nN]--[kK][pP][rR][yY]57[dD]
- | [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
- | [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
- | [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
- | [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
- | [xX][nN]--[oO]3[cC][wW]4[hH]
- | [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
- | [xX][nN]--[pP]1[aA][iI]
- | [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
- | [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
- | [xX][nN]--[wW][gG][bB][hH]1[cC]
- | [xX][nN]--[wW][gG][bB][lL]6[aA]
- | [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
- | [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
- | [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
- | [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
- | [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
- | [yY][eE]
- | [yY][tT]
- | [zZ][aA]
- | [zZ][mM]
- | [zZ][wW]
- ) "."? // Accept trailing root (empty) domain
-
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
deleted file mode 100644
index c505bf4..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 2010 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC
-// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
-
-
-ALetterSupp = (
- ([\ud80d][\uDC00-\uDC2E])
- | ([\ud80c][\uDC00-\uDFFF])
- | ([\ud809][\uDC00-\uDC62])
- | ([\ud808][\uDC00-\uDF6E])
- | ([\ud81a][\uDC00-\uDE38])
- | ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
- | ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
- | ([\ud801][\uDC00-\uDC9D])
- | ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
- | ([\ud803][\uDC00-\uDC48])
- | ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
-)
-FormatSupp = (
- ([\ud804][\uDCBD])
- | ([\ud834][\uDD73-\uDD7A])
- | ([\udb40][\uDC01\uDC20-\uDC7F])
-)
-ExtendSupp = (
- ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
- | ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
- | ([\ud800][\uDDFD])
- | ([\udb40][\uDD00-\uDDEF])
- | ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
-)
-NumericSupp = (
- ([\ud804][\uDC66-\uDC6F])
- | ([\ud835][\uDFCE-\uDFFF])
- | ([\ud801][\uDCA0-\uDCA9])
-)
-KatakanaSupp = (
- ([\ud82c][\uDC00])
-)
-MidLetterSupp = (
- []
-)
-MidNumSupp = (
- []
-)
-MidNumLetSupp = (
- []
-)
-ExtendNumLetSupp = (
- []
-)
-ExtendNumLetSupp = (
- []
-)
-ComplexContextSupp = (
- []
-)
-HanSupp = (
- ([\ud87e][\uDC00-\uDE1D])
- | ([\ud86b][\uDC00-\uDFFF])
- | ([\ud86a][\uDC00-\uDFFF])
- | ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
- | ([\ud868][\uDC00-\uDFFF])
- | ([\ud86e][\uDC00-\uDC1D])
- | ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
- | ([\ud86c][\uDC00-\uDFFF])
- | ([\ud863][\uDC00-\uDFFF])
- | ([\ud862][\uDC00-\uDFFF])
- | ([\ud861][\uDC00-\uDFFF])
- | ([\ud860][\uDC00-\uDFFF])
- | ([\ud867][\uDC00-\uDFFF])
- | ([\ud866][\uDC00-\uDFFF])
- | ([\ud865][\uDC00-\uDFFF])
- | ([\ud864][\uDC00-\uDFFF])
- | ([\ud858][\uDC00-\uDFFF])
- | ([\ud859][\uDC00-\uDFFF])
- | ([\ud85a][\uDC00-\uDFFF])
- | ([\ud85b][\uDC00-\uDFFF])
- | ([\ud85c][\uDC00-\uDFFF])
- | ([\ud85d][\uDC00-\uDFFF])
- | ([\ud85e][\uDC00-\uDFFF])
- | ([\ud85f][\uDC00-\uDFFF])
- | ([\ud850][\uDC00-\uDFFF])
- | ([\ud851][\uDC00-\uDFFF])
- | ([\ud852][\uDC00-\uDFFF])
- | ([\ud853][\uDC00-\uDFFF])
- | ([\ud854][\uDC00-\uDFFF])
- | ([\ud855][\uDC00-\uDFFF])
- | ([\ud856][\uDC00-\uDFFF])
- | ([\ud857][\uDC00-\uDFFF])
- | ([\ud849][\uDC00-\uDFFF])
- | ([\ud848][\uDC00-\uDFFF])
- | ([\ud84b][\uDC00-\uDFFF])
- | ([\ud84a][\uDC00-\uDFFF])
- | ([\ud84d][\uDC00-\uDFFF])
- | ([\ud84c][\uDC00-\uDFFF])
- | ([\ud84f][\uDC00-\uDFFF])
- | ([\ud84e][\uDC00-\uDFFF])
- | ([\ud841][\uDC00-\uDFFF])
- | ([\ud840][\uDC00-\uDFFF])
- | ([\ud843][\uDC00-\uDFFF])
- | ([\ud842][\uDC00-\uDFFF])
- | ([\ud845][\uDC00-\uDFFF])
- | ([\ud844][\uDC00-\uDFFF])
- | ([\ud847][\uDC00-\uDFFF])
- | ([\ud846][\uDC00-\uDFFF])
-)
-HiraganaSupp = (
- ([\ud83c][\uDE00])
- | ([\ud82c][\uDC01])
-)
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
deleted file mode 100644
index ab2b9c2..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java
+++ /dev/null
@@ -1,1089 +0,0 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
-
-package org.apache.lucene.analysis.standard.std31;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements StandardTokenizer, except with a bug
- * (https://issues.apache.org/jira/browse/LUCENE-3358) where Han and Hiragana
- * characters would be split from combining characters:
- * @deprecated This class is only for exact backwards compatibility
- */
-@Deprecated
-
-/**
- * This class is a scanner generated by
- * JFlex 1.5.0-SNAPSHOT
- * on 9/30/11 12:10 PM from the specification file
- * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
- */
-public final class StandardTokenizerImpl31 implements StandardTokenizerInterface {
-
- /** This character denotes the end of file */
- public static final int YYEOF = -1;
-
- /** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
-
- /** lexical states */
- public static final int YYINITIAL = 0;
-
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
- private static final int ZZ_LEXSTATE[] = {
- 0, 0
- };
-
- /**
- * Translates characters to character classes
- */
- private static final String ZZ_CMAP_PACKED =
- "\47\0\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137"+
- "\5\0\32\132\4\0\1\141\1\0\32\132\57\0\1\132\2\0\1\133"+
- "\7\0\1\132\1\0\1\136\2\0\1\132\5\0\27\132\1\0\37\132"+
- "\1\0\u01ca\132\4\0\14\132\16\0\5\132\7\0\1\132\1\0\1\132"+
- "\21\0\160\133\5\132\1\0\2\132\2\0\4\132\1\137\7\0\1\132"+
- "\1\136\3\132\1\0\1\132\1\0\24\132\1\0\123\132\1\0\213\132"+
- "\1\0\7\133\236\132\11\0\46\132\2\0\1\132\7\0\47\132\1\0"+
- "\1\137\7\0\55\133\1\0\1\133\1\0\2\133\1\0\2\133\1\0"+
- "\1\133\10\0\33\132\5\0\4\132\1\136\13\0\4\133\10\0\2\137"+
- "\2\0\13\133\5\0\53\132\25\133\12\134\1\0\1\134\1\137\1\0"+
- "\2\132\1\133\143\132\1\0\1\132\7\133\1\133\1\0\6\133\2\132"+
- "\2\133\1\0\4\133\2\132\12\134\3\132\2\0\1\132\17\0\1\133"+
- "\1\132\1\133\36\132\33\133\2\0\131\132\13\133\1\132\16\0\12\134"+
- "\41\132\11\133\2\132\2\0\1\137\1\0\1\132\5\0\26\132\4\133"+
- "\1\132\11\133\1\132\3\133\1\132\5\133\22\0\31\132\3\133\244\0"+
- "\4\133\66\132\3\133\1\132\22\133\1\132\7\133\12\132\2\133\2\0"+
- "\12\134\1\0\7\132\1\0\7\132\1\0\3\133\1\0\10\132\2\0"+
- "\2\132\2\0\26\132\1\0\7\132\1\0\1\132\3\0\4\132\2\0"+
- "\1\133\1\132\7\133\2\0\2\133\2\0\3\133\1\132\10\0\1\133"+
- "\4\0\2\132\1\0\3\132\2\133\2\0\12\134\2\132\17\0\3\133"+
- "\1\0\6\132\4\0\2\132\2\0\26\132\1\0\7\132\1\0\2\132"+
- "\1\0\2\132\1\0\2\132\2\0\1\133\1\0\5\133\4\0\2\133"+
- "\2\0\3\133\3\0\1\133\7\0\4\132\1\0\1\132\7\0\12\134"+
- "\2\133\3\132\1\133\13\0\3\133\1\0\11\132\1\0\3\132\1\0"+
- "\26\132\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132"+
- "\10\133\1\0\3\133\1\0\3\133\2\0\1\132\17\0\2\132\2\133"+
- "\2\0\12\134\21\0\3\133\1\0\10\132\2\0\2\132\2\0\26\132"+
- "\1\0\7\132\1\0\2\132\1\0\5\132\2\0\1\133\1\132\7\133"+
- "\2\0\2\133\2\0\3\133\10\0\2\133\4\0\2\132\1\0\3\132"+
- "\2\133\2\0\12\134\1\0\1\132\20\0\1\133\1\132\1\0\6\132"+
- "\3\0\3\132\1\0\4\132\3\0\2\132\1\0\1\132\1\0\2\132"+
- "\3\0\2\132\3\0\3\132\3\0\14\132\4\0\5\133\3\0\3\133"+
- "\1\0\4\133\2\0\1\132\6\0\1\133\16\0\12\134\21\0\3\133"+
- "\1\0\10\132\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132"+
- "\3\0\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\1\0"+
- "\2\132\6\0\2\132\2\133\2\0\12\134\22\0\2\133\1\0\10\132"+
- "\1\0\3\132\1\0\27\132\1\0\12\132\1\0\5\132\2\0\1\133"+
- "\1\132\7\133\1\0\3\133\1\0\4\133\7\0\2\133\7\0\1\132"+
- "\1\0\2\132\2\133\2\0\12\134\1\0\2\132\17\0\2\133\1\0"+
- "\10\132\1\0\3\132\1\0\51\132\2\0\1\132\7\133\1\0\3\133"+
- "\1\0\4\133\1\132\10\0\1\133\10\0\2\132\2\133\2\0\12\134"+
- "\12\0\6\132\2\0\2\133\1\0\22\132\3\0\30\132\1\0\11\132"+
- "\1\0\1\132\2\0\7\132\3\0\1\133\4\0\6\133\1\0\1\133"+
- "\1\0\10\133\22\0\2\133\15\0\60\142\1\143\2\142\7\143\5\0"+
- "\7\142\10\143\1\0\12\134\47\0\2\142\1\0\1\142\2\0\2\142"+
- "\1\0\1\142\2\0\1\142\6\0\4\142\1\0\7\142\1\0\3\142"+
- "\1\0\1\142\1\0\1\142\2\0\2\142\1\0\4\142\1\143\2\142"+
- "\6\143\1\0\2\143\1\142\2\0\5\142\1\0\1\142\1\0\6\143"+
- "\2\0\12\134\2\0\2\142\42\0\1\132\27\0\2\133\6\0\12\134"+
- "\13\0\1\133\1\0\1\133\1\0\1\133\4\0\2\133\10\132\1\0"+
- "\44\132\4\0\24\133\1\0\2\133\5\132\13\133\1\0\44\133\11\0"+
- "\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143\4\142"+
- "\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143\1\142"+
- "\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132\3\0"+
- "\u0100\146\111\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132"+
- "\2\0\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132"+
- "\1\0\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132"+
- "\2\0\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132"+
- "\2\0\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132"+
- "\1\0\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0"+
- "\15\132\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0"+
- "\1\142\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134"+
- "\6\0\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132"+
- "\3\0\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0"+
- "\54\142\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142"+
- "\40\0\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133"+
- "\12\134\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132"+
- "\4\0\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132"+
- "\12\134\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0"+
- "\3\132\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132"+
- "\1\133\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0"+
- "\46\132\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0"+
- "\1\132\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0"+
- "\3\132\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0"+
- "\3\132\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140"+
- "\2\0\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141"+
- "\13\0\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132"+
- "\63\0\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132"+
- "\3\0\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132"+
- "\1\0\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132"+
- "\u032d\0\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132"+
- "\3\133\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132"+
- "\11\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132"+
- "\1\0\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132"+
- "\120\0\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144"+
- "\31\0\11\144\4\133\2\133\1\0\5\135\2\0\3\144\1\132\1\132"+
- "\4\0\126\145\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0"+
- "\51\132\3\0\136\146\21\0\33\132\65\0\20\135\37\0\101\0\37\0"+
- "\121\0\57\135\1\0\130\135\250\0\u19b6\144\112\0\u51cc\144\64\0\u048d\132"+
- "\103\0\56\132\2\0\u010d\132\3\0\20\132\12\134\2\132\24\0\57\132"+
- "\4\133\11\0\2\133\1\0\31\132\10\0\120\132\2\133\45\0\11\132"+
- "\2\0\147\132\2\0\4\132\1\0\2\132\16\0\12\132\120\0\10\132"+
- "\1\133\3\132\1\133\4\132\1\133\27\132\5\133\30\0\64\132\14\0"+
- "\2\133\62\132\21\133\13\0\12\134\6\0\22\133\6\132\3\0\1\132"+
- "\4\0\12\134\34\132\10\133\2\0\27\132\15\133\14\0\35\146\3\0"+
- "\4\133\57\132\16\133\16\0\1\132\12\134\46\0\51\132\16\133\11\0"+
- "\3\132\1\133\10\132\2\133\2\0\12\134\6\0\33\142\1\143\4\0"+
- "\60\142\1\143\1\142\3\143\2\142\2\143\5\142\2\143\1\142\1\143"+
- "\1\142\30\0\5\142\41\0\6\132\2\0\6\132\2\0\6\132\11\0"+
- "\7\132\1\0\7\132\221\0\43\132\10\133\1\0\2\133\2\0\12\134"+
- "\6\0\u2ba4\146\14\0\27\146\4\0\61\146\4\0\1\31\1\25\1\46"+
- "\1\43\1\13\3\0\1\7\1\5\2\0\1\3\1\1\14\0\1\11"+
- "\21\0\1\112\7\0\1\65\1\17\6\0\1\130\3\0\1\120\1\120"+
- "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
- "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
- "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
- "\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\121"+
- "\1\120\1\120\1\120\1\125\1\123\17\0\1\114\u02c1\0\1\70\277\0"+
- "\1\113\1\71\1\2\3\124\2\35\1\124\1\35\2\124\1\14\21\124"+
- "\2\60\7\73\1\72\7\73\7\52\1\15\1\52\1\75\2\45\1\44"+
- "\1\75\1\45\1\44\10\75\2\63\5\61\2\54\5\61\1\6\10\37"+
- "\5\21\3\27\12\106\20\27\3\42\32\30\1\26\2\24\2\110\1\111"+
- "\2\110\2\111\2\110\1\111\3\24\1\16\2\24\12\64\1\74\1\41"+
- "\1\34\1\64\6\41\1\34\66\41\5\115\6\103\1\51\4\103\2\51"+
- "\10\103\1\51\7\100\1\12\2\100\32\103\1\12\4\100\1\12\5\102"+
- "\1\101\1\102\3\101\7\102\1\101\23\102\5\67\3\102\6\67\2\67"+
- "\6\66\10\66\2\100\7\66\36\100\4\66\102\100\15\115\1\77\2\115"+
- "\1\131\3\117\1\115\2\117\5\115\4\117\4\116\1\115\3\116\1\115"+
- "\5\116\26\56\4\23\1\105\2\104\4\122\1\104\2\122\3\76\33\122"+
- "\35\55\3\122\35\126\3\122\6\126\2\33\31\126\1\33\17\126\6\122"+
- "\4\22\1\10\37\22\1\10\4\22\25\62\1\127\11\62\21\55\5\62"+
- "\1\57\12\40\13\62\4\55\1\50\6\55\12\122\17\55\1\47\3\53"+
- "\15\20\11\36\1\32\24\36\2\20\11\36\1\32\31\36\1\32\4\20"+
- "\4\36\2\32\2\107\1\4\5\107\52\4\u1900\0\u012e\144\2\0\76\144"+
- "\2\0\152\144\46\0\7\132\14\0\5\132\5\0\1\132\1\133\12\132"+
- "\1\0\15\132\1\0\5\132\1\0\1\132\1\0\2\132\1\0\2\132"+
- "\1\0\154\132\41\0\u016b\132\22\0\100\132\2\0\66\132\50\0\14\132"+
- "\4\0\20\133\1\137\2\0\1\136\1\137\13\0\7\133\14\0\2\141"+
- "\30\0\3\141\1\137\1\0\1\140\1\0\1\137\1\136\32\0\5\132"+
- "\1\0\207\132\2\0\1\133\7\0\1\140\4\0\1\137\1\0\1\140"+
- "\1\0\12\134\1\136\1\137\5\0\32\132\4\0\1\141\1\0\32\132"+
- "\13\0\70\135\2\133\37\146\3\0\6\146\2\0\6\146\2\0\6\146"+
- "\2\0\3\146\34\0\3\133\4\0";
-
- /**
- * Translates characters to character classes
- */
- private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
-
- /**
- * Translates DFA states to action switch labels.
- */
- private static final int [] ZZ_ACTION = zzUnpackAction();
-
- private static final String ZZ_ACTION_PACKED_0 =
- "\1\0\23\1\1\2\1\3\1\4\1\1\1\5\1\6"+
- "\1\7\1\10\15\0\1\2\1\0\1\2\10\0\1\3"+
- "\15\0\1\2\57\0";
-
- private static int [] zzUnpackAction() {
- int [] result = new int[114];
- int offset = 0;
- offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackAction(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
-
- /**
- * Translates a state to a row index in the transition table
- */
- private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
-
- private static final String ZZ_ROWMAP_PACKED_0 =
- "\0\0\0\147\0\316\0\u0135\0\u019c\0\u0203\0\u026a\0\u02d1"+
- "\0\u0338\0\u039f\0\u0406\0\u046d\0\u04d4\0\u053b\0\u05a2\0\u0609"+
- "\0\u0670\0\u06d7\0\u073e\0\u07a5\0\u080c\0\u0873\0\u08da\0\u0941"+
- "\0\u09a8\0\147\0\147\0\u0a0f\0\316\0\u0135\0\u019c\0\u0203"+
- "\0\u026a\0\u0a76\0\u0add\0\u0b44\0\u0bab\0\u046d\0\u0c12\0\u0c79"+
- "\0\u0ce0\0\u0d47\0\u0dae\0\u0e15\0\u0e7c\0\u0338\0\u039f\0\u0ee3"+
- "\0\u0f4a\0\u0fb1\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b"+
- "\0\u1282\0\u12e9\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553"+
- "\0\u15ba\0\u0941\0\u1621\0\u1688\0\u16ef\0\u1756\0\u17bd\0\u1824"+
- "\0\u188b\0\u18f2\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c"+
- "\0\u1bc3\0\u1c2a\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94"+
- "\0\u1efb\0\u1f62\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc"+
- "\0\u2233\0\u229a\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504"+
- "\0\u256b\0\u25d2\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c"+
- "\0\u28a3\0\u290a";
-
- private static int [] zzUnpackRowMap() {
- int [] result = new int[114];
- int offset = 0;
- offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackRowMap(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int high = packed.charAt(i++) << 16;
- result[j++] = high | packed.charAt(i++);
- }
- return j;
- }
-
- /**
- * The transition table of the DFA
- */
- private static final int [] ZZ_TRANS = zzUnpackTrans();
-
- private static final String ZZ_TRANS_PACKED_0 =
- "\1\2\1\3\1\2\1\4\1\2\1\5\1\2\1\6"+
- "\1\2\1\7\1\2\1\10\3\2\1\11\5\2\1\12"+
- "\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+
- "\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+
- "\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+
- "\3\2\1\30\2\31\1\32\1\33\1\34\151\0\1\25"+
- "\11\0\1\25\20\0\1\25\22\0\1\25\10\0\3\25"+
- "\17\0\1\25\10\0\1\25\24\0\1\25\1\0\1\25"+
- "\1\0\1\25\1\0\1\25\1\0\1\25\1\0\3\25"+
- "\1\0\5\25\1\0\3\25\1\0\11\25\1\0\2\25"+
- "\1\0\16\25\1\0\2\25\1\0\21\25\1\0\1\25"+
- "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\2\25"+
- "\1\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
- "\3\0\1\25\13\0\1\25\1\0\1\25\4\0\2\25"+
- "\4\0\1\25\1\0\1\25\3\0\2\25\1\0\1\25"+
- "\5\0\3\25\1\0\1\25\15\0\1\25\10\0\1\25"+
- "\24\0\1\25\3\0\1\25\1\0\1\25\1\0\1\25"+
- "\1\0\3\25\2\0\4\25\1\0\3\25\2\0\3\25"+
- "\1\0\4\25\1\0\2\25\2\0\3\25\1\0\11\25"+
- "\1\0\2\25\1\0\16\25\1\0\2\25\1\0\1\25"+
- "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\2\25"+
- "\1\0\1\25\17\0\1\25\3\0\1\25\3\0\1\25"+
- "\1\0\3\25\2\0\1\25\1\0\2\25\1\0\3\25"+
- "\3\0\2\25\1\0\1\25\1\0\2\25\1\0\2\25"+
- "\3\0\2\25\1\0\1\25\1\0\1\25\1\0\2\25"+
- "\1\0\2\25\1\0\2\25\1\0\5\25\1\0\5\25"+
- "\1\0\2\25\1\0\2\25\1\0\1\25\1\0\3\25"+
- "\4\0\1\25\4\0\1\25\31\0\3\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\4\0\1\25\14\0\1\25"+
- "\5\0\1\25\11\0\2\25\12\0\1\26\1\0\2\25"+
- "\12\0\1\25\24\0\1\25\1\0\1\26\7\0\2\25"+
- "\2\0\5\25\2\0\2\25\4\0\6\25\1\0\2\25"+
- "\4\0\5\25\1\0\5\25\1\0\2\25\1\0\3\25"+
- "\1\0\4\25\1\0\5\25\1\26\1\0\1\25\1\0"+
- "\1\25\1\0\3\25\2\0\1\25\1\0\1\25\1\0"+
- "\1\25\2\0\1\25\17\0\1\25\3\0\1\25\5\0"+
- "\2\25\3\0\1\25\4\0\3\25\4\0\1\25\1\0"+
- "\1\25\2\0\1\25\1\0\2\25\4\0\1\25\1\0"+
- "\1\25\3\0\2\25\1\0\1\25\5\0\3\25\1\0"+
- "\1\25\10\0\1\25\1\0\2\26\1\0\1\25\10\0"+
- "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
- "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
- "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
- "\1\25\1\0\3\25\3\0\2\25\4\0\3\25\1\0"+
- "\1\25\10\0\1\25\1\0\2\25\21\0\1\25\11\0"+
- "\2\25\17\0\1\25\6\0\2\25\4\0\1\25\5\0"+
- "\1\25\2\0\1\25\5\0\3\25\1\0\1\25\15\0"+
- "\1\25\10\0\1\25\24\0\1\25\3\0\1\25\5\0"+
- "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
- "\1\25\7\0\1\25\2\0\1\25\5\0\1\25\2\0"+
- "\1\25\1\0\1\25\106\0\1\33\21\0\1\27\35\0"+
- "\1\32\3\0\1\32\3\0\1\32\1\0\3\32\2\0"+
- "\1\32\2\0\1\32\1\0\3\32\3\0\2\32\1\0"+
- "\1\32\1\0\2\32\1\0\2\32\3\0\2\32\1\0"+
- "\1\32\3\0\2\32\1\0\2\32\1\0\2\32\1\0"+
- "\5\32\1\0\5\32\2\0\1\32\1\0\2\32\1\0"+
- "\1\32\1\0\3\32\4\0\1\32\4\0\1\32\17\0"+
- "\1\32\1\0\1\32\1\0\1\32\1\0\1\32\1\0"+
- "\1\32\1\0\3\32\1\0\5\32\1\0\3\32\1\0"+
- "\11\32\1\0\2\32\1\0\16\32\1\0\2\32\1\0"+
- "\21\32\1\0\1\32\1\0\3\32\2\0\1\32\1\0"+
- "\1\32\1\0\2\32\1\0\1\32\17\0\1\32\1\0"+
- "\1\32\1\0\1\32\3\0\1\32\1\0\3\32\1\0"+
- "\2\32\1\0\2\32\1\0\3\32\1\0\11\32\1\0"+
- "\2\32\1\0\16\32\1\0\2\32\1\0\21\32\1\0"+
- "\1\32\1\0\3\32\2\0\1\32\1\0\1\32\1\0"+
- "\2\32\1\0\1\32\17\0\1\32\11\0\1\32\20\0"+
- "\1\32\33\0\1\32\21\0\1\32\10\0\1\32\24\0"+
- "\1\32\1\0\1\32\1\0\1\32\1\0\1\32\1\0"+
- "\1\32\1\0\3\32\1\0\5\32\1\0\3\32\1\0"+
- "\6\32\1\0\2\32\1\0\2\32\1\0\10\32\1\0"+
- "\5\32\1\0\2\32\1\0\21\32\1\0\1\32\1\0"+
- "\3\32\2\0\1\32\1\0\1\32\1\0\2\32\1\0"+
- "\1\32\146\0\1\33\16\0\1\35\1\0\1\36\1\0"+
- "\1\37\1\0\1\40\1\0\1\41\1\0\1\42\3\0"+
- "\1\43\5\0\1\44\3\0\1\45\11\0\1\46\2\0"+
- "\1\47\16\0\1\50\2\0\1\51\41\0\2\25\1\52"+
- "\1\0\1\53\1\0\1\53\1\54\1\0\1\25\2\0"+
- "\1\25\1\0\1\35\1\0\1\36\1\0\1\37\1\0"+
- "\1\40\1\0\1\41\1\0\1\55\3\0\1\56\5\0"+
- "\1\57\3\0\1\60\11\0\1\46\2\0\1\61\16\0"+
- "\1\62\2\0\1\63\41\0\1\25\2\26\2\0\2\64"+
- "\1\65\1\0\1\26\2\0\1\25\13\0\1\66\15\0"+
- "\1\67\14\0\1\70\16\0\1\71\2\0\1\72\21\0"+
- "\1\73\20\0\1\27\1\0\1\27\3\0\1\54\1\0"+
- "\1\27\4\0\1\35\1\0\1\36\1\0\1\37\1\0"+
- "\1\40\1\0\1\41\1\0\1\74\3\0\1\56\5\0"+
- "\1\57\3\0\1\75\11\0\1\46\2\0\1\76\16\0"+
- "\1\77\2\0\1\100\21\0\1\101\17\0\1\25\1\102"+
- "\1\26\1\103\3\0\1\102\1\0\1\102\2\0\1\25"+
- "\142\0\2\31\4\0\1\35\1\0\1\36\1\0\1\37"+
- "\1\0\1\40\1\0\1\41\1\0\1\104\3\0\1\43"+
- "\5\0\1\44\3\0\1\105\11\0\1\46\2\0\1\106"+
- "\16\0\1\107\2\0\1\110\41\0\1\25\1\34\1\52"+
- "\1\0\1\53\1\0\1\53\1\54\1\0\1\34\2\0"+
- "\1\34\2\0\1\25\11\0\3\25\5\0\1\25\1\0"+
- "\1\25\1\0\1\25\4\0\1\25\4\0\1\25\1\0"+
- "\2\25\4\0\1\25\5\0\1\25\3\0\1\25\4\0"+
- "\5\25\10\0\1\52\1\0\2\25\1\0\1\25\10\0"+
- "\1\25\24\0\1\25\1\0\1\52\7\0\2\25\2\0"+
- "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
- "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
- "\4\25\1\0\5\25\1\52\1\0\1\25\1\0\1\25"+
- "\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+
- "\2\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
- "\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+
- "\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+
- "\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+
- "\10\0\1\25\1\0\2\52\1\0\1\25\10\0\1\25"+
- "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
- "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
- "\1\0\3\25\1\0\1\25\1\0\2\25\4\0\3\25"+
- "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
- "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
- "\1\0\1\25\5\0\3\25\5\0\1\25\2\0\2\25"+
- "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\25"+
- "\6\0\1\25\56\0\1\25\3\0\1\25\2\0\1\25"+
- "\3\0\1\25\5\0\1\25\7\0\1\25\4\0\2\25"+
- "\3\0\2\25\1\0\1\25\4\0\1\25\1\0\1\25"+
- "\2\0\2\25\1\0\3\25\1\0\1\25\2\0\4\25"+
- "\2\0\1\25\41\0\1\35\1\0\1\36\1\0\1\37"+
- "\1\0\1\40\1\0\1\41\1\0\1\111\3\0\1\43"+
- "\5\0\1\44\3\0\1\112\11\0\1\46\2\0\1\113"+
- "\16\0\1\114\2\0\1\115\41\0\1\25\2\52\2\0"+
- "\2\116\1\54\1\0\1\52\2\0\1\25\1\0\1\35"+
- "\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
- "\1\0\1\117\3\0\1\120\5\0\1\121\3\0\1\122"+
- "\11\0\1\46\2\0\1\123\16\0\1\124\2\0\1\125"+
- "\41\0\1\25\1\53\7\0\1\53\2\0\1\25\1\0"+
- "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
- "\1\41\1\0\1\126\3\0\1\43\5\0\1\44\3\0"+
- "\1\127\11\0\1\46\2\0\1\130\16\0\1\131\2\0"+
- "\1\132\21\0\1\101\17\0\1\25\1\54\1\52\1\103"+
- "\3\0\1\54\1\0\1\54\2\0\1\25\2\0\1\26"+
- "\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
- "\4\0\1\25\4\0\1\26\1\0\2\26\4\0\1\25"+
- "\5\0\1\25\3\0\1\26\4\0\1\26\2\25\2\26"+
- "\10\0\1\26\1\0\2\25\1\0\1\26\10\0\1\25"+
- "\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
- "\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
- "\1\0\3\25\1\0\1\26\1\0\2\25\4\0\3\25"+
- "\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
- "\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
- "\1\0\1\25\5\0\1\25\2\26\5\0\1\25\2\0"+
- "\1\25\1\26\4\0\1\25\2\0\1\25\1\0\1\25"+
- "\103\0\2\26\6\0\1\26\56\0\1\26\3\0\1\26"+
- "\2\0\1\26\3\0\1\26\5\0\1\26\7\0\1\26"+
- "\4\0\2\26\3\0\2\26\1\0\1\26\4\0\1\26"+
- "\1\0\1\26\2\0\2\26\1\0\3\26\1\0\1\26"+
- "\2\0\4\26\2\0\1\26\53\0\1\133\3\0\1\134"+
- "\5\0\1\135\3\0\1\136\14\0\1\137\16\0\1\140"+
- "\2\0\1\141\42\0\1\64\1\26\6\0\1\64\4\0"+
- "\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
- "\1\41\1\0\1\142\3\0\1\56\5\0\1\57\3\0"+
- "\1\143\11\0\1\46\2\0\1\144\16\0\1\145\2\0"+
- "\1\146\21\0\1\101\17\0\1\25\1\65\1\26\1\103"+
- "\3\0\1\65\1\0\1\65\2\0\1\25\2\0\1\27"+
- "\37\0\1\27\1\0\2\27\16\0\1\27\4\0\1\27"+
- "\2\0\2\27\15\0\1\27\132\0\1\27\153\0\2\27"+
- "\11\0\1\27\115\0\2\27\6\0\1\27\56\0\1\27"+
- "\3\0\1\27\2\0\1\27\3\0\1\27\5\0\1\27"+
- "\7\0\1\27\4\0\2\27\3\0\2\27\1\0\1\27"+
- "\4\0\1\27\1\0\1\27\2\0\2\27\1\0\3\27"+
- "\1\0\1\27\2\0\4\27\2\0\1\27\153\0\1\27"+
- "\35\0\1\102\11\0\3\25\5\0\1\25\1\0\1\25"+
- "\1\0\1\25\4\0\1\25\4\0\1\102\1\0\2\102"+
- "\4\0\1\25\5\0\1\25\3\0\1\102\4\0\1\102"+
- "\2\25\2\102\10\0\1\26\1\0\2\25\1\0\1\102"+
- "\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
- "\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
- "\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
- "\1\0\1\25\1\0\3\25\1\0\1\102\1\0\2\25"+
- "\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
- "\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
- "\5\0\3\25\1\0\1\25\5\0\1\25\2\102\5\0"+
- "\1\25\2\0\1\25\1\102\4\0\1\25\2\0\1\25"+
- "\1\0\1\25\103\0\2\102\6\0\1\102\56\0\1\102"+
- "\3\0\1\102\2\0\1\102\3\0\1\102\5\0\1\102"+
- "\7\0\1\102\4\0\2\102\3\0\2\102\1\0\1\102"+
- "\4\0\1\102\1\0\1\102\2\0\2\102\1\0\3\102"+
- "\1\0\1\102\2\0\4\102\2\0\1\102\153\0\1\103"+
- "\46\0\1\147\15\0\1\150\14\0\1\151\16\0\1\152"+
- "\2\0\1\153\21\0\1\101\20\0\1\103\1\0\1\103"+
- "\3\0\1\54\1\0\1\103\5\0\1\34\11\0\3\25"+
- "\5\0\1\25\1\0\1\25\1\0\1\25\4\0\1\25"+
- "\4\0\1\34\1\0\2\34\4\0\1\25\5\0\1\25"+
- "\3\0\1\34\4\0\1\34\2\25\2\34\10\0\1\52"+
- "\1\0\2\25\1\0\1\34\10\0\1\25\24\0\1\25"+
- "\3\0\1\25\6\0\2\25\5\0\1\25\1\0\1\25"+
- "\1\0\1\25\1\0\11\25\2\0\1\25\4\0\1\25"+
- "\4\0\6\25\2\0\1\25\1\0\1\25\1\0\3\25"+
- "\1\0\1\34\1\0\2\25\4\0\3\25\1\0\1\25"+
- "\10\0\1\25\1\0\2\25\21\0\1\25\3\0\1\25"+
- "\5\0\1\25\32\0\15\25\5\0\3\25\1\0\1\25"+
- "\5\0\1\25\2\34\5\0\1\25\2\0\1\25\1\34"+
- "\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\34"+
- "\6\0\1\34\56\0\1\34\3\0\1\34\2\0\1\34"+
- "\3\0\1\34\5\0\1\34\7\0\1\34\4\0\2\34"+
- "\3\0\2\34\1\0\1\34\4\0\1\34\1\0\1\34"+
- "\2\0\2\34\1\0\3\34\1\0\1\34\2\0\4\34"+
- "\2\0\1\34\42\0\1\52\11\0\3\25\5\0\1\25"+
- "\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\52"+
- "\1\0\2\52\4\0\1\25\5\0\1\25\3\0\1\52"+
- "\4\0\1\52\2\25\2\52\10\0\1\52\1\0\2\25"+
- "\1\0\1\52\10\0\1\25\24\0\1\25\3\0\1\25"+
- "\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
- "\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
- "\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\52"+
- "\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
- "\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
- "\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
- "\2\52\5\0\1\25\2\0\1\25\1\52\4\0\1\25"+
- "\2\0\1\25\1\0\1\25\103\0\2\52\6\0\1\52"+
- "\56\0\1\52\3\0\1\52\2\0\1\52\3\0\1\52"+
- "\5\0\1\52\7\0\1\52\4\0\2\52\3\0\2\52"+
- "\1\0\1\52\4\0\1\52\1\0\1\52\2\0\2\52"+
- "\1\0\3\52\1\0\1\52\2\0\4\52\2\0\1\52"+
- "\53\0\1\154\3\0\1\155\5\0\1\156\3\0\1\157"+
- "\14\0\1\160\16\0\1\161\2\0\1\162\42\0\1\116"+
- "\1\52\6\0\1\116\5\0\1\53\11\0\3\25\5\0"+
- "\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
- "\1\53\1\0\2\53\4\0\1\25\5\0\1\25\3\0"+
- "\1\53\4\0\1\53\2\25\2\53\12\0\2\25\1\0"+
- "\1\53\10\0\1\25\24\0\1\25\11\0\2\25\2\0"+
- "\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
- "\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
- "\4\25\1\0\5\25\2\0\1\25\1\0\1\25\1\0"+
- "\3\25\2\0\1\25\1\0\1\25\1\0\1\25\2\0"+
- "\1\25\17\0\1\25\3\0\1\25\5\0\2\25\3\0"+
- "\1\25\4\0\3\25\4\0\1\25\1\0\1\25\2\0"+
- "\1\25\1\0\2\25\4\0\1\25\1\0\1\25\3\0"+
- "\2\25\1\0\1\25\5\0\3\25\1\0\1\25\10\0"+
- "\1\25\4\0\1\25\10\0\1\25\24\0\1\25\3\0"+
- "\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+
- "\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+
- "\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+
- "\1\53\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
- "\1\25\1\0\2\25\21\0\1\25\3\0\1\25\5\0"+
- "\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
- "\1\25\2\53\5\0\1\25\2\0\1\25\1\53\4\0"+
- "\1\25\2\0\1\25\1\0\1\25\103\0\2\53\6\0"+
- "\1\53\56\0\1\53\3\0\1\53\2\0\1\53\3\0"+
- "\1\53\5\0\1\53\7\0\1\53\4\0\2\53\3\0"+
- "\2\53\1\0\1\53\4\0\1\53\1\0\1\53\2\0"+
- "\2\53\1\0\3\53\1\0\1\53\2\0\4\53\2\0"+
- "\1\53\42\0\1\54\11\0\3\25\5\0\1\25\1\0"+
- "\1\25\1\0\1\25\4\0\1\25\4\0\1\54\1\0"+
- "\2\54\4\0\1\25\5\0\1\25\3\0\1\54\4\0"+
- "\1\54\2\25\2\54\10\0\1\52\1\0\2\25\1\0"+
- "\1\54\10\0\1\25\24\0\1\25\3\0\1\25\6\0"+
- "\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+
- "\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+
- "\1\25\1\0\1\25\1\0\3\25\1\0\1\54\1\0"+
- "\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+
- "\2\25\21\0\1\25\3\0\1\25\5\0\1\25\32\0"+
- "\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\54"+
- "\5\0\1\25\2\0\1\25\1\54\4\0\1\25\2\0"+
- "\1\25\1\0\1\25\103\0\2\54\6\0\1\54\56\0"+
- "\1\54\3\0\1\54\2\0\1\54\3\0\1\54\5\0"+
- "\1\54\7\0\1\54\4\0\2\54\3\0\2\54\1\0"+
- "\1\54\4\0\1\54\1\0\1\54\2\0\2\54\1\0"+
- "\3\54\1\0\1\54\2\0\4\54\2\0\1\54\42\0"+
- "\1\64\37\0\1\64\1\0\2\64\16\0\1\64\4\0"+
- "\1\64\2\0\2\64\10\0\1\26\4\0\1\64\37\0"+
- "\1\26\102\0\1\26\147\0\2\26\134\0\1\64\153\0"+
- "\2\64\11\0\1\64\115\0\2\64\6\0\1\64\56\0"+
- "\1\64\3\0\1\64\2\0\1\64\3\0\1\64\5\0"+
- "\1\64\7\0\1\64\4\0\2\64\3\0\2\64\1\0"+
- "\1\64\4\0\1\64\1\0\1\64\2\0\2\64\1\0"+
- "\3\64\1\0\1\64\2\0\4\64\2\0\1\64\42\0"+
- "\1\65\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
- "\1\25\4\0\1\25\4\0\1\65\1\0\2\65\4\0"+
- "\1\25\5\0\1\25\3\0\1\65\4\0\1\65\2\25"+
- "\2\65\10\0\1\26\1\0\2\25\1\0\1\65\10\0"+
- "\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
- "\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
- "\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
- "\1\25\1\0\3\25\1\0\1\65\1\0\2\25\4\0"+
- "\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
- "\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
- "\3\25\1\0\1\25\5\0\1\25\2\65\5\0\1\25"+
- "\2\0\1\25\1\65\4\0\1\25\2\0\1\25\1\0"+
- "\1\25\103\0\2\65\6\0\1\65\56\0\1\65\3\0"+
- "\1\65\2\0\1\65\3\0\1\65\5\0\1\65\7\0"+
- "\1\65\4\0\2\65\3\0\2\65\1\0\1\65\4\0"+
- "\1\65\1\0\1\65\2\0\2\65\1\0\3\65\1\0"+
- "\1\65\2\0\4\65\2\0\1\65\42\0\1\103\37\0"+
- "\1\103\1\0\2\103\16\0\1\103\4\0\1\103\2\0"+
- "\2\103\15\0\1\103\132\0\1\103\153\0\2\103\11\0"+
- "\1\103\115\0\2\103\6\0\1\103\56\0\1\103\3\0"+
- "\1\103\2\0\1\103\3\0\1\103\5\0\1\103\7\0"+
- "\1\103\4\0\2\103\3\0\2\103\1\0\1\103\4\0"+
- "\1\103\1\0\1\103\2\0\2\103\1\0\3\103\1\0"+
- "\1\103\2\0\4\103\2\0\1\103\42\0\1\116\37\0"+
- "\1\116\1\0\2\116\16\0\1\116\4\0\1\116\2\0"+
- "\2\116\10\0\1\52\4\0\1\116\37\0\1\52\102\0"+
- "\1\52\147\0\2\52\134\0\1\116\153\0\2\116\11\0"+
- "\1\116\115\0\2\116\6\0\1\116\56\0\1\116\3\0"+
- "\1\116\2\0\1\116\3\0\1\116\5\0\1\116\7\0"+
- "\1\116\4\0\2\116\3\0\2\116\1\0\1\116\4\0"+
- "\1\116\1\0\1\116\2\0\2\116\1\0\3\116\1\0"+
- "\1\116\2\0\4\116\2\0\1\116\40\0";
-
- private static int [] zzUnpackTrans() {
- int [] result = new int[10609];
- int offset = 0;
- offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackTrans(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- value--;
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
-
- /* error codes */
- private static final int ZZ_UNKNOWN_ERROR = 0;
- private static final int ZZ_NO_MATCH = 1;
- private static final int ZZ_PUSHBACK_2BIG = 2;
-
- /* error messages for the codes above */
- private static final String ZZ_ERROR_MSG[] = {
- "Unkown internal scanner error",
- "Error: could not match input",
- "Error: pushback value was too large"
- };
-
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state aState
- */
- private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
-
- private static final String ZZ_ATTRIBUTE_PACKED_0 =
- "\1\0\1\11\27\1\2\11\1\1\15\0\1\1\1\0"+
- "\1\1\10\0\1\1\15\0\1\1\57\0";
-
- private static int [] zzUnpackAttribute() {
- int [] result = new int[114];
- int offset = 0;
- offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackAttribute(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
- /** the input device */
- private java.io.Reader zzReader;
-
- /** the current state of the DFA */
- private int zzState;
-
- /** the current lexical state */
- private int zzLexicalState = YYINITIAL;
-
- /** this buffer contains the current text to be matched and is
- the source of the yytext() string */
- private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
-
- /** the textposition at the last accepting state */
- private int zzMarkedPos;
-
- /** the current text position in the buffer */
- private int zzCurrentPos;
-
- /** startRead marks the beginning of the yytext() string in the buffer */
- private int zzStartRead;
-
- /** endRead marks the last character in the buffer, that has been read
- from input */
- private int zzEndRead;
-
- /** number of newlines encountered up to the start of the matched text */
- private int yyline;
-
- /** the number of characters up to the start of the matched text */
- private int yychar;
-
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
- private int yycolumn;
-
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
- private boolean zzAtBOL = true;
-
- /** zzAtEOF == true <=> the scanner is at the EOF */
- private boolean zzAtEOF;
-
- /** denotes if the user-EOF-code has already been executed */
- private boolean zzEOFDone;
-
- /* user code: */
- /** Alphanumeric sequences */
- public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-
- /** Numbers */
- public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- *
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
- public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-
- public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-
- public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-
- public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-
- public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
- public final int yychar()
- {
- return yychar;
- }
-
- /**
- * Fills CharTermAttribute with the current token text.
- */
- public final void getText(CharTermAttribute t) {
- t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
- }
-
-
- /**
- * Creates a new scanner
- * There is also a java.io.InputStream version of this constructor.
- *
- * @param in the java.io.Reader to read input from.
- */
- public StandardTokenizerImpl31(java.io.Reader in) {
- this.zzReader = in;
- }
-
- /**
- * Creates a new scanner.
- * There is also java.io.Reader version of this constructor.
- *
- * @param in the java.io.Inputstream to read input from.
- */
- public StandardTokenizerImpl31(java.io.InputStream in) {
- this(new java.io.InputStreamReader(in));
- }
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
- private static char [] zzUnpackCMap(String packed) {
- char [] map = new char[0x10000];
- int i = 0; /* index in packed string */
- int j = 0; /* index in unpacked array */
- while (i < 2650) {
- int count = packed.charAt(i++);
- char value = packed.charAt(i++);
- do map[j++] = value; while (--count > 0);
- }
- return map;
- }
-
-
- /**
- * Refills the input buffer.
- *
- * @return false, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
- private boolean zzRefill() throws java.io.IOException {
-
- /* first: make room (if you can) */
- if (zzStartRead > 0) {
- System.arraycopy(zzBuffer, zzStartRead,
- zzBuffer, 0,
- zzEndRead-zzStartRead);
-
- /* translate stored positions */
- zzEndRead-= zzStartRead;
- zzCurrentPos-= zzStartRead;
- zzMarkedPos-= zzStartRead;
- zzStartRead = 0;
- }
-
- /* is the buffer big enough? */
- if (zzCurrentPos >= zzBuffer.length) {
- /* if not: blow it up */
- char newBuffer[] = new char[zzCurrentPos*2];
- System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
- zzBuffer = newBuffer;
- }
-
- /* finally: fill the buffer with new input */
- int numRead = zzReader.read(zzBuffer, zzEndRead,
- zzBuffer.length-zzEndRead);
-
- if (numRead > 0) {
- zzEndRead+= numRead;
- return false;
- }
- // unlikely but not impossible: read 0 characters, but not at end of stream
- if (numRead == 0) {
- int c = zzReader.read();
- if (c == -1) {
- return true;
- } else {
- zzBuffer[zzEndRead++] = (char) c;
- return false;
- }
- }
-
- // numRead < 0
- return true;
- }
-
-
- /**
- * Closes the input stream.
- */
- public final void yyclose() throws java.io.IOException {
- zzAtEOF = true; /* indicate end of file */
- zzEndRead = zzStartRead; /* invalidate buffer */
-
- if (zzReader != null)
- zzReader.close();
- }
-
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * cannot be reused (internal buffer is discarded and lost).
- * Lexical state is set to ZZ_INITIAL.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
- public final void yyreset(java.io.Reader reader) {
- zzReader = reader;
- zzAtBOL = true;
- zzAtEOF = false;
- zzEOFDone = false;
- zzEndRead = zzStartRead = 0;
- zzCurrentPos = zzMarkedPos = 0;
- yyline = yychar = yycolumn = 0;
- zzLexicalState = YYINITIAL;
- if (zzBuffer.length > ZZ_BUFFERSIZE)
- zzBuffer = new char[ZZ_BUFFERSIZE];
- }
-
-
- /**
- * Returns the current lexical state.
- */
- public final int yystate() {
- return zzLexicalState;
- }
-
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
- public final void yybegin(int newState) {
- zzLexicalState = newState;
- }
-
-
- /**
- * Returns the text matched by the current regular expression.
- */
- public final String yytext() {
- return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
- }
-
-
- /**
- * Returns the character at position pos from the
- * matched text.
- *
- * It is equivalent to yytext().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to yylength()-1.
- *
- * @return the character at position pos
- */
- public final char yycharat(int pos) {
- return zzBuffer[zzStartRead+pos];
- }
-
-
- /**
- * Returns the length of the matched text region.
- */
- public final int yylength() {
- return zzMarkedPos-zzStartRead;
- }
-
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * yypushback(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
- private void zzScanError(int errorCode) {
- String message;
- try {
- message = ZZ_ERROR_MSG[errorCode];
- }
- catch (ArrayIndexOutOfBoundsException e) {
- message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
- }
-
- throw new Error(message);
- }
-
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than yylength()!
- */
- public void yypushback(int number) {
- if ( number > yylength() )
- zzScanError(ZZ_PUSHBACK_2BIG);
-
- zzMarkedPos -= number;
- }
-
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
- public int getNextToken() throws java.io.IOException {
- int zzInput;
- int zzAction;
-
- // cached fields:
- int zzCurrentPosL;
- int zzMarkedPosL;
- int zzEndReadL = zzEndRead;
- char [] zzBufferL = zzBuffer;
- char [] zzCMapL = ZZ_CMAP;
-
- int [] zzTransL = ZZ_TRANS;
- int [] zzRowMapL = ZZ_ROWMAP;
- int [] zzAttrL = ZZ_ATTRIBUTE;
-
- while (true) {
- zzMarkedPosL = zzMarkedPos;
-
- yychar+= zzMarkedPosL-zzStartRead;
-
- zzAction = -1;
-
- zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
-
- zzState = ZZ_LEXSTATE[zzLexicalState];
-
- // set up zzAction for empty match case:
- int zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- }
-
-
- zzForAction: {
- while (true) {
-
- if (zzCurrentPosL < zzEndReadL)
- zzInput = zzBufferL[zzCurrentPosL++];
- else if (zzAtEOF) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- // store back cached positions
- zzCurrentPos = zzCurrentPosL;
- zzMarkedPos = zzMarkedPosL;
- boolean eof = zzRefill();
- // get translated positions and possibly new buffer
- zzCurrentPosL = zzCurrentPos;
- zzMarkedPosL = zzMarkedPos;
- zzBufferL = zzBuffer;
- zzEndReadL = zzEndRead;
- if (eof) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- zzInput = zzBufferL[zzCurrentPosL++];
- }
- }
- int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
- if (zzNext == -1) break zzForAction;
- zzState = zzNext;
-
- zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- zzMarkedPosL = zzCurrentPosL;
- if ( (zzAttributes & 8) == 8 ) break zzForAction;
- }
-
- }
- }
-
- // store back cached position
- zzMarkedPos = zzMarkedPosL;
-
- switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 2:
- { return WORD_TYPE;
- }
- case 9: break;
- case 5:
- { return SOUTH_EAST_ASIAN_TYPE;
- }
- case 10: break;
- case 4:
- { return KATAKANA_TYPE;
- }
- case 11: break;
- case 6:
- { return IDEOGRAPHIC_TYPE;
- }
- case 12: break;
- case 8:
- { return HANGUL_TYPE;
- }
- case 13: break;
- case 3:
- { return NUMERIC_TYPE;
- }
- case 14: break;
- case 7:
- { return HIRAGANA_TYPE;
- }
- case 15: break;
- case 1:
- { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
- }
- case 16: break;
- default:
- if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
- zzAtEOF = true;
- {
- return StandardTokenizerInterface.YYEOF;
- }
- }
- else {
- zzScanError(ZZ_NO_MATCH);
- }
- }
- }
- }
-
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
deleted file mode 100644
index c4b5dc9..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
+++ /dev/null
@@ -1,184 +0,0 @@
-package org.apache.lucene.analysis.standard.std31;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements StandardTokenizer, except with a bug
- * (https://issues.apache.org/jira/browse/LUCENE-3358) where Han and Hiragana
- * characters would be split from combining characters:
- * @deprecated This class is only for exact backwards compatibility
- */
-@Deprecated
-%%
-
-%unicode 6.0
-%integer
-%final
-%public
-%class StandardTokenizerImpl31
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-
-%include src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format = ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend = ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
-
-// Script=Hangul & Aletter
-HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-ALetterEx = {ALetter} ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx = {Katakana} ({Format} | {Extend})*
-MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
-MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
-
-
-%{
- /** Alphanumeric sequences */
- public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-
- /** Numbers */
- public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- *
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
- public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
-
- public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
-
- public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
-
- public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
-
- public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
-
- public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
-
- public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
-
- public final int yychar()
- {
- return yychar;
- }
-
- /**
- * Fills CharTermAttribute with the current token text.
- */
- public final void getText(CharTermAttribute t) {
- t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
- }
-
-
- /**
- * Creates a new scanner
- * There is also a java.io.InputStream version of this constructor.
- *
- * @param in the java.io.Reader to read input from.
- */
- public UAX29URLEmailTokenizerImpl34(java.io.Reader in) {
- this.zzReader = in;
- }
-
- /**
- * Creates a new scanner.
- * There is also java.io.Reader version of this constructor.
- *
- * @param in the java.io.Inputstream to read input from.
- */
- public UAX29URLEmailTokenizerImpl34(java.io.InputStream in) {
- this(new java.io.InputStreamReader(in));
- }
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
- private static char [] zzUnpackCMap(String packed) {
- char [] map = new char[0x10000];
- int i = 0; /* index in packed string */
- int j = 0; /* index in unpacked array */
- while (i < 2812) {
- int count = packed.charAt(i++);
- char value = packed.charAt(i++);
- do map[j++] = value; while (--count > 0);
- }
- return map;
- }
-
-
- /**
- * Refills the input buffer.
- *
- * @return false, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
- private boolean zzRefill() throws java.io.IOException {
-
- /* first: make room (if you can) */
- if (zzStartRead > 0) {
- System.arraycopy(zzBuffer, zzStartRead,
- zzBuffer, 0,
- zzEndRead-zzStartRead);
-
- /* translate stored positions */
- zzEndRead-= zzStartRead;
- zzCurrentPos-= zzStartRead;
- zzMarkedPos-= zzStartRead;
- zzStartRead = 0;
- }
-
- /* is the buffer big enough? */
- if (zzCurrentPos >= zzBuffer.length) {
- /* if not: blow it up */
- char newBuffer[] = new char[zzCurrentPos*2];
- System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
- zzBuffer = newBuffer;
- }
-
- /* finally: fill the buffer with new input */
- int numRead = zzReader.read(zzBuffer, zzEndRead,
- zzBuffer.length-zzEndRead);
-
- if (numRead > 0) {
- zzEndRead+= numRead;
- return false;
- }
- // unlikely but not impossible: read 0 characters, but not at end of stream
- if (numRead == 0) {
- int c = zzReader.read();
- if (c == -1) {
- return true;
- } else {
- zzBuffer[zzEndRead++] = (char) c;
- return false;
- }
- }
-
- // numRead < 0
- return true;
- }
-
-
- /**
- * Closes the input stream.
- */
- public final void yyclose() throws java.io.IOException {
- zzAtEOF = true; /* indicate end of file */
- zzEndRead = zzStartRead; /* invalidate buffer */
-
- if (zzReader != null)
- zzReader.close();
- }
-
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * cannot be reused (internal buffer is discarded and lost).
- * Lexical state is set to ZZ_INITIAL.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
- public final void yyreset(java.io.Reader reader) {
- zzReader = reader;
- zzAtBOL = true;
- zzAtEOF = false;
- zzEOFDone = false;
- zzEndRead = zzStartRead = 0;
- zzCurrentPos = zzMarkedPos = 0;
- yyline = yychar = yycolumn = 0;
- zzLexicalState = YYINITIAL;
- if (zzBuffer.length > ZZ_BUFFERSIZE)
- zzBuffer = new char[ZZ_BUFFERSIZE];
- }
-
-
- /**
- * Returns the current lexical state.
- */
- public final int yystate() {
- return zzLexicalState;
- }
-
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
- public final void yybegin(int newState) {
- zzLexicalState = newState;
- }
-
-
- /**
- * Returns the text matched by the current regular expression.
- */
- public final String yytext() {
- return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
- }
-
-
- /**
- * Returns the character at position pos from the
- * matched text.
- *
- * It is equivalent to yytext().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to yylength()-1.
- *
- * @return the character at position pos
- */
- public final char yycharat(int pos) {
- return zzBuffer[zzStartRead+pos];
- }
-
-
- /**
- * Returns the length of the matched text region.
- */
- public final int yylength() {
- return zzMarkedPos-zzStartRead;
- }
-
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * yypushback(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
- private void zzScanError(int errorCode) {
- String message;
- try {
- message = ZZ_ERROR_MSG[errorCode];
- }
- catch (ArrayIndexOutOfBoundsException e) {
- message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
- }
-
- throw new Error(message);
- }
-
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than yylength()!
- */
- public void yypushback(int number) {
- if ( number > yylength() )
- zzScanError(ZZ_PUSHBACK_2BIG);
-
- zzMarkedPos -= number;
- }
-
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
- public int getNextToken() throws java.io.IOException {
- int zzInput;
- int zzAction;
-
- // cached fields:
- int zzCurrentPosL;
- int zzMarkedPosL;
- int zzEndReadL = zzEndRead;
- char [] zzBufferL = zzBuffer;
- char [] zzCMapL = ZZ_CMAP;
-
- int [] zzTransL = ZZ_TRANS;
- int [] zzRowMapL = ZZ_ROWMAP;
- int [] zzAttrL = ZZ_ATTRIBUTE;
-
- while (true) {
- zzMarkedPosL = zzMarkedPos;
-
- yychar+= zzMarkedPosL-zzStartRead;
-
- zzAction = -1;
-
- zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
-
- zzState = ZZ_LEXSTATE[zzLexicalState];
-
- // set up zzAction for empty match case:
- int zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- }
-
-
- zzForAction: {
- while (true) {
-
- if (zzCurrentPosL < zzEndReadL)
- zzInput = zzBufferL[zzCurrentPosL++];
- else if (zzAtEOF) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- // store back cached positions
- zzCurrentPos = zzCurrentPosL;
- zzMarkedPos = zzMarkedPosL;
- boolean eof = zzRefill();
- // get translated positions and possibly new buffer
- zzCurrentPosL = zzCurrentPos;
- zzMarkedPosL = zzMarkedPos;
- zzBufferL = zzBuffer;
- zzEndReadL = zzEndRead;
- if (eof) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- zzInput = zzBufferL[zzCurrentPosL++];
- }
- }
- int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
- if (zzNext == -1) break zzForAction;
- zzState = zzNext;
-
- zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- zzMarkedPosL = zzCurrentPosL;
- if ( (zzAttributes & 8) == 8 ) break zzForAction;
- }
-
- }
- }
-
- // store back cached position
- zzMarkedPos = zzMarkedPosL;
-
- switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 2:
- { return WORD_TYPE;
- }
- case 11: break;
- case 5:
- { return SOUTH_EAST_ASIAN_TYPE;
- }
- case 12: break;
- case 1:
- { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
- }
- case 13: break;
- case 10:
- { return URL_TYPE;
- }
- case 14: break;
- case 9:
- { return EMAIL_TYPE;
- }
- case 15: break;
- case 4:
- { return KATAKANA_TYPE;
- }
- case 16: break;
- case 6:
- { return IDEOGRAPHIC_TYPE;
- }
- case 17: break;
- case 8:
- { return HANGUL_TYPE;
- }
- case 18: break;
- case 3:
- { return NUMERIC_TYPE;
- }
- case 19: break;
- case 7:
- { return HIRAGANA_TYPE;
- }
- case 20: break;
- default:
- if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
- zzAtEOF = true;
- {
- return StandardTokenizerInterface.YYEOF;
- }
- }
- else {
- zzScanError(ZZ_NO_MATCH);
- }
- }
- }
- }
-
-
-}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
deleted file mode 100644
index dc78fa3..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
+++ /dev/null
@@ -1,272 +0,0 @@
-package org.apache.lucene.analysis.standard.std34;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
-import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements UAX29URLEmailTokenizer, except with a bug
- * (https://issues.apache.org/jira/browse/LUCENE-3880) where "mailto:"
- * URI scheme prepended to an email address will disrupt recognition
- * of the email address.
- * @deprecated This class is only for exact backwards compatibility
- */
- @Deprecated
-%%
-
-%unicode 6.0
-%integer
-%final
-%public
-%class UAX29URLEmailTokenizerImpl34
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format = ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend = ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
-
-// Script=Hangul & Aletter
-HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-ALetterEx = {ALetter} ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx = {Katakana} ({Format} | {Extend})*
-MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
-MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
-
-HanEx = {Han} ({Format} | {Extend})*
-HiraganaEx = {Hiragana} ({Format} | {Extend})*
-
-// URL and E-mail syntax specifications:
-//
-// RFC-952: DOD INTERNET HOST TABLE SPECIFICATION
-// RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
-// RFC-1123: Requirements for Internet Hosts - Application and Support
-// RFC-1738: Uniform Resource Locators (URL)
-// RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
-// RFC-5234: Augmented BNF for Syntax Specifications: ABNF
-// RFC-5321: Simple Mail Transfer Protocol
-// RFC-5322: Internet Message Format
-
-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
-
-DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
-DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
-DomainNameLoose = {DomainLabel} ("." {DomainLabel})*
-
-IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
-IPv4Address = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3}
-IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
-IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
-IPv6Address = ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
- | "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
- | {IPv6Hex16Bit}? "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
- | (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
- | (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
- | (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::" {IPv6Hex16Bit} ":" {IPv6LeastSignificant32Bits}
- | (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::" {IPv6LeastSignificant32Bits}
- | (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::" {IPv6Hex16Bit}
- | (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
-
-URIunreserved = [-._~A-Za-z0-9]
-URIpercentEncoded = "%" [0-9A-Fa-f]{2}
-URIsubDelims = [!$&'()*+,;=]
-URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
-URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
-URIquery = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
-URIport = ":" [0-9]{1,5}
-URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}
-URIhostLoose = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose}
-
-URIauthorityStrict = {URIhostStrict} {URIport}?
-URIauthorityLoose = {URIlogin}? {URIhostLoose} {URIport}?
-
-HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
-HTTPpath = ("/" {HTTPsegment})*
-HTTPscheme = [hH][tT][tT][pP][sS]? "://"
-HTTPurlFull = {HTTPscheme} {URIauthorityLoose} {HTTPpath}? {URIquery}? {URIfragment}?
-// {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
-HTTPurlNoScheme = {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
-HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
-
-FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
-FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
-FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
-FTPscheme = [fF][tT][pP] "://"
-FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
-
-FILEscheme = [fF][iI][lL][eE] "://"
-FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
-
-URL = {HTTPurl} | {FTPurl} | {FILEurl}
-
-EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
-EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
-EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
-EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
-EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
-// DFA minimization allows {IPv6Address} and {IPv4Address} to be included
-// in the {EMAILbracketedHost} definition without incurring any size penalties,
-// since {EMAILdomainLiteralText} recognizes all valid IP addresses.
-// The IP address regexes are included in {EMAILbracketedHost} simply as a
-// reminder that they are acceptable bracketed host forms.
-EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
-EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
-
-
-%{
- /** Alphanumeric sequences */
- public static final int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
-
- /** Numbers */
- public static final int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
-
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- *
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
- public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
-
- public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
-
- public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
-
- public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
-
- public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
-
- public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
-
- public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
-
- public final int yychar()
- {
- return yychar;
- }
-
- /**
- * Fills CharTermAttribute with the current token text.
- */
- public final void getText(CharTermAttribute t) {
- t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
- }
-%}
-
-%%
-
-// UAX#29 WB1. sot ÷
-// WB2. ÷ eot
-//
-<> { return StandardTokenizerInterface.YYEOF; }
-
-{URL} { return URL_TYPE; }
-{EMAIL} { return EMAIL_TYPE; }
-
-// UAX#29 WB8. Numeric × Numeric
-// WB11. Numeric (MidNum | MidNumLet) × Numeric
-// WB12. Numeric × (MidNum | MidNumLet) Numeric
-// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx}
- | {MidNumericEx} {NumericEx}
- | {NumericEx})*
-{ExtendNumLetEx}*
- { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
- { return HANGUL_TYPE; }
-
-{KatakanaEx}+
- { return KATAKANA_TYPE; }
-
-// UAX#29 WB5. ALetter × ALetter
-// WB6. ALetter × (MidLetter | MidNumLet) ALetter
-// WB7. ALetter (MidLetter | MidNumLet) × ALetter
-// WB9. ALetter × Numeric
-// WB10. Numeric × ALetter
-// WB13. Katakana × Katakana
-// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
- | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
- | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ )
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
- | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
- | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*
- { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-// [C]haracters with the Line_Break property values of Contingent_Break (CB),
-// Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
-// boundary property values based on criteria outside of the scope of this
-// annex. That means that satisfactory treatment of languages like Chinese
-// or Thai requires special handling.
-//
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together. This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-// http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14. Any ÷ Any
-//
-{HanEx} { return IDEOGRAPHIC_TYPE; }
-{HiraganaEx} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3. CR × LF
-// WB3a. (Newline | CR | LF) ÷
-// WB3b. ÷ (Newline | CR | LF)
-// WB14. Any ÷ Any
-//
-[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html
deleted file mode 100644
index f34c995..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Backwards-compatible implementation to match {@link org.apache.lucene.util.Version#LUCENE_34}
-
-
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index c1a6305..a77b856 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sv;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
index 226c974..2ed8da6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sv;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
index 036ba5f..64c5e1d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.sv;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
index 7750114..6b61d7f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.synonym;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
index fe6bafd..77e8862 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.synonym;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
index d871529..5de0c6c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.synonym;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
index 20aeea0..db7d354 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.synonym;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -92,7 +92,7 @@ public class WordnetSynonymParser extends SynonymMap.Builder {
return analyze(analyzer, text, reuse);
}
- private void addInternal(CharsRef synset[], int size) throws IOException {
+ private void addInternal(CharsRef synset[], int size) {
if (size <= 1) {
return; // nothing to do
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index 7dd505c..a28263b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -33,13 +33,6 @@ import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating ThaiAnalyzer:
- *
- *
As of 3.6, a set of Thai stopwords is used by default
- *
*/
public final class ThaiAnalyzer extends StopwordAnalyzerBase {
@@ -84,7 +77,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
* @param matchVersion lucene compatibility version
*/
public ThaiAnalyzer(Version matchVersion) {
- this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STOP_SET : StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
@@ -112,8 +105,7 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- result = new LowerCaseFilter(matchVersion, result);
+ result = new LowerCaseFilter(matchVersion, result);
result = new ThaiWordFilter(matchVersion, result);
return new TokenStreamComponents(source, new StopFilter(matchVersion,
result, stopwords));
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
index b2bc64f..1705b23 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@@ -23,7 +23,6 @@ import java.util.Locale;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -34,10 +33,6 @@ import org.apache.lucene.util.Version;
/**
* {@link TokenFilter} that use {@link java.text.BreakIterator} to break each
* Token that is Thai into separate Token(s) for each Thai word.
- *
Please note: Since matchVersion 3.1 on, this filter no longer lowercases non-thai text.
- * {@link ThaiAnalyzer} will insert a {@link LowerCaseFilter} before this filter
- * so the behaviour of the Analyzer does not change. With version 3.1, the filter handles
- * position increments correctly.
*
WARNING: this filter may not be supported by all JREs.
* It is known to work with Sun/Oracle and Harmony JREs.
* If your application needs to be fully portable, consider using ICUTokenizer instead,
@@ -58,8 +53,6 @@ public final class ThaiWordFilter extends TokenFilter {
private final BreakIterator breaker = (BreakIterator) proto.clone();
private final CharArrayIterator charIterator = CharArrayIterator.newWordInstance();
- private final boolean handlePosIncr;
-
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
@@ -72,11 +65,9 @@ public final class ThaiWordFilter extends TokenFilter {
/** Creates a new ThaiWordFilter with the specified match version. */
public ThaiWordFilter(Version matchVersion, TokenStream input) {
- super(matchVersion.onOrAfter(Version.LUCENE_31) ?
- input : new LowerCaseFilter(matchVersion, input));
+ super(input);
if (!DBBI_AVAILABLE)
throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
- handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
}
@Override
@@ -92,7 +83,7 @@ public final class ThaiWordFilter extends TokenFilter {
} else {
offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
}
- if (handlePosIncr) posAtt.setPositionIncrement(1);
+ posAtt.setPositionIncrement(1);
return true;
}
hasMoreTokensInClone = false;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index 4523897..31fc3fb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.tr;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
index 923c4fc..e26b363 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.tr;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
index 91a30cf..667dbf4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.util;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
index aa5e9bc..9ebfd62 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
@@ -1,6 +1,6 @@
package org.apache.lucene.analysis.util;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -650,7 +650,7 @@ public class CharArrayMap extends AbstractMap
PayloadData is metadata associated with the current term position. If
* PayloadLength is stored at the current position, then it indicates the length
- * of this Payload. If PayloadLength is not stored, then this Payload has the same
- * length as the Payload at the previous position.
+ * of this payload. If PayloadLength is not stored, then this payload has the same
+ * length as the payload at the previous position.
*
OffsetDelta/2 is the difference between this position's startOffset from the
* previous occurrence (or zero, if this is the first occurrence in this document).
* If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
index a7624dc..5898d30 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
@@ -36,7 +37,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
/**
* Concrete class that reads the 4.0 frq/prox
@@ -58,29 +59,35 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
// private String segment;
public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
- freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
+ boolean success = false;
+ IndexInput freqIn = null;
+ IndexInput proxIn = null;
+ try {
+ freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
ioContext);
- // TODO: hasProx should (somehow!) become codec private,
- // but it's tricky because 1) FIS.hasProx is global (it
- // could be all fields that have prox are written by a
- // different codec), 2) the field may have had prox in
- // the past but all docs w/ that field were deleted.
- // Really we'd need to init prxOut lazily on write, and
- // then somewhere record that we actually wrote it so we
- // know whether to open on read:
- if (fieldInfos.hasProx()) {
- boolean success = false;
- try {
+ CodecUtil.checkHeader(freqIn, Lucene40PostingsWriter.FRQ_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_START);
+ // TODO: hasProx should (somehow!) become codec private,
+ // but it's tricky because 1) FIS.hasProx is global (it
+ // could be all fields that have prox are written by a
+ // different codec), 2) the field may have had prox in
+ // the past but all docs w/ that field were deleted.
+ // Really we'd need to init prxOut lazily on write, and
+ // then somewhere record that we actually wrote it so we
+ // know whether to open on read:
+ if (fieldInfos.hasProx()) {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION),
- ioContext);
- success = true;
- } finally {
- if (!success) {
- freqIn.close();
- }
+ ioContext);
+ CodecUtil.checkHeader(proxIn, Lucene40PostingsWriter.PRX_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_START);
+ } else {
+ proxIn = null;
+ }
+ this.freqIn = freqIn;
+ this.proxIn = proxIn;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(freqIn, proxIn);
}
- } else {
- proxIn = null;
}
}
@@ -88,7 +95,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
- CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.CODEC,
+ CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.TERMS_CODEC,
Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_START);
skipInterval = termsIn.readInt();
@@ -212,9 +219,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
- if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- return null;
- } else if (canReuse(reuse, liveDocs)) {
+ if (canReuse(reuse, liveDocs)) {
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
}
@@ -249,9 +254,6 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
throws IOException {
boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- if (needsOffsets && !hasOffsets) {
- return null; // not available
- }
// TODO: refactor
if (fieldInfo.hasPayloads() || hasOffsets) {
@@ -317,7 +319,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
protected boolean skipped;
protected final Bits liveDocs;
- SegmentDocsEnumBase(IndexInput startFreqIn, Bits liveDocs) throws IOException {
+ SegmentDocsEnumBase(IndexInput startFreqIn, Bits liveDocs) {
this.startFreqIn = startFreqIn;
this.freqIn = (IndexInput)startFreqIn.clone();
this.liveDocs = liveDocs;
@@ -351,7 +353,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
- public final int freq() throws IOException {
+ public final int freq() {
assert !indexOmitsTF;
return freq;
}
@@ -497,7 +499,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
private final class AllDocsSegmentDocsEnum extends SegmentDocsEnumBase {
- AllDocsSegmentDocsEnum(IndexInput startFreqIn) throws IOException {
+ AllDocsSegmentDocsEnum(IndexInput startFreqIn) {
super(startFreqIn, null);
assert liveDocs == null;
}
@@ -574,7 +576,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
private final class LiveDocsSegmentDocsEnum extends SegmentDocsEnumBase {
- LiveDocsSegmentDocsEnum(IndexInput startFreqIn, Bits liveDocs) throws IOException {
+ LiveDocsSegmentDocsEnum(IndexInput startFreqIn, Bits liveDocs) {
super(startFreqIn, liveDocs);
assert liveDocs != null;
}
@@ -694,7 +696,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
Lucene40SkipListReader skipper;
private long lazyProxPointer;
- public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException {
+ public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
startFreqIn = freqIn;
this.freqIn = (IndexInput) freqIn.clone();
this.proxIn = (IndexInput) proxIn.clone();
@@ -770,7 +772,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
- public int freq() throws IOException {
+ public int freq() {
return freq;
}
@@ -851,12 +853,12 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
- public int startOffset() throws IOException {
+ public int startOffset() {
return -1;
}
@Override
- public int endOffset() throws IOException {
+ public int endOffset() {
return -1;
}
@@ -907,7 +909,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
int offsetLength;
int startOffset;
- public SegmentFullPositionsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException {
+ public SegmentFullPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
startFreqIn = freqIn;
this.freqIn = (IndexInput) freqIn.clone();
this.proxIn = (IndexInput) proxIn.clone();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
index f103897..91c135c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
@@ -35,7 +36,6 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
@@ -45,7 +45,9 @@ import org.apache.lucene.util.IOUtils;
* @lucene.experimental
*/
public final class Lucene40PostingsWriter extends PostingsWriterBase {
- final static String CODEC = "Lucene40PostingsWriter";
+ final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
+ final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
+ final static String PRX_CODEC = "Lucene40PostingsWriterPrx";
//private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
@@ -102,7 +104,9 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName, state.context);
boolean success = false;
+ IndexOutput proxOut = null;
try {
+ CodecUtil.writeHeader(freqOut, FRQ_CODEC, VERSION_CURRENT);
// TODO: this is a best effort, if one of these fields has no postings
// then we make an empty prx file, same as if we are wrapped in
// per-field postingsformat. maybe... we shouldn't
@@ -112,14 +116,16 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION);
proxOut = state.directory.createOutput(fileName, state.context);
+ CodecUtil.writeHeader(proxOut, PRX_CODEC, VERSION_CURRENT);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
}
+ this.proxOut = proxOut;
success = true;
} finally {
if (!success) {
- IOUtils.closeWhileHandlingException(freqOut);
+ IOUtils.closeWhileHandlingException(freqOut, proxOut);
}
}
@@ -135,7 +141,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
- CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+ CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
@@ -246,6 +252,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
// and the numbers aren't that much smaller anyways.
int offsetDelta = startOffset - lastOffset;
int offsetLength = endOffset - startOffset;
+ assert offsetDelta >= 0 && offsetLength >= 0 : "startOffset=" + startOffset + ",lastOffset=" + lastOffset + ",endOffset=" + endOffset;
if (offsetLength != lastOffsetLength) {
proxOut.writeVInt(offsetDelta << 1 | 1);
proxOut.writeVInt(offsetLength);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java
index 572cb03..d70b05d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,13 +17,13 @@ package org.apache.lucene.codecs.lucene40;
* limitations under the License.
*/
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.store.DataOutput; // javadocs
-import org.apache.lucene.util.CodecUtil; // javadocs
/**
* Lucene 4.0 Segment info format.
@@ -72,12 +72,12 @@ public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoWriter writer = new Lucene40SegmentInfoWriter();
@Override
- public SegmentInfoReader getSegmentInfosReader() {
+ public SegmentInfoReader getSegmentInfoReader() {
return reader;
}
@Override
- public SegmentInfoWriter getSegmentInfosWriter() {
+ public SegmentInfoWriter getSegmentInfoWriter() {
return writer;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
index 0d501c3..0aeb8bf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,13 +22,13 @@ import java.util.Collections;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java
index dae3f21..18def28 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -26,7 +27,6 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
index c082ec3..8f087ca 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
index 5eb2d88..e2af183 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
index 8f79e2b..2ebc99f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
@@ -27,7 +28,6 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.util.CodecUtil;
/**
* Lucene 4.0 Stored Fields Format.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
index cc3691c..3cb24de 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
@@ -30,7 +31,6 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
@@ -136,7 +136,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
indexStream.seek(HEADER_LENGTH_IDX + docID * 8L);
}
- public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
+ public final void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
seekIndex(n);
fieldsStream.seek(indexStream.readLong());
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
index 73be17e..ed699cd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
@@ -18,15 +18,15 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
@@ -35,7 +35,6 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
@@ -209,7 +208,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
}
@Override
- public void finish(FieldInfos fis, int numDocs) throws IOException {
+ public void finish(FieldInfos fis, int numDocs) {
if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexStream.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
@@ -226,7 +225,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
int idx = 0;
- for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
+ for (AtomicReader reader : mergeState.readers) {
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40StoredFieldsReader matchingFieldsReader = null;
if (matchingSegmentReader != null) {
@@ -237,7 +236,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
}
}
- if (reader.liveDocs != null) {
+ if (reader.getLiveDocs() != null) {
docCount += copyFieldsWithDeletions(mergeState,
reader, matchingFieldsReader, rawDocLengths);
} else {
@@ -253,12 +252,12 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
when merging stored fields */
private final static int MAX_RAW_MERGE_DOCS = 4192;
- private int copyFieldsWithDeletions(MergeState mergeState, final MergeState.IndexReaderAndLiveDocs reader,
+ private int copyFieldsWithDeletions(MergeState mergeState, final AtomicReader reader,
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
- throws IOException, MergeAbortedException, CorruptIndexException {
+ throws IOException {
int docCount = 0;
- final int maxDoc = reader.reader.maxDoc();
- final Bits liveDocs = reader.liveDocs;
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
assert liveDocs != null;
if (matchingFieldsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
@@ -298,7 +297,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
// on the fly?
// NOTE: it's very important to first assign to doc then pass it to
// fieldsWriter.addDocument; see LUCENE-1282
- Document doc = reader.reader.document(j);
+ Document doc = reader.document(j);
addDocument(doc, mergeState.fieldInfos);
docCount++;
mergeState.checkAbort.work(300);
@@ -307,10 +306,10 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
return docCount;
}
- private int copyFieldsNoDeletions(MergeState mergeState, final MergeState.IndexReaderAndLiveDocs reader,
+ private int copyFieldsNoDeletions(MergeState mergeState, final AtomicReader reader,
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
- throws IOException, MergeAbortedException, CorruptIndexException {
- final int maxDoc = reader.reader.maxDoc();
+ throws IOException {
+ final int maxDoc = reader.maxDoc();
int docCount = 0;
if (matchingFieldsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
@@ -325,7 +324,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
for (; docCount < maxDoc; docCount++) {
// NOTE: it's very important to first assign to doc then pass it to
// fieldsWriter.addDocument; see LUCENE-1282
- Document doc = reader.reader.document(docCount);
+ Document doc = reader.document(docCount);
addDocument(doc, mergeState.fieldInfos);
mergeState.checkAbort.work(300);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
index 69b49fa..89c46e4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
@@ -27,7 +28,6 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.util.CodecUtil;
/**
* Lucene 4.0 Term Vectors format.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
index 087acbc..0bd9b4d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -23,8 +23,8 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@@ -40,7 +40,6 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
@@ -96,7 +95,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
}
public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
- throws CorruptIndexException, IOException {
+ throws IOException {
final String segment = si.name;
final int size = si.getDocCount();
@@ -252,7 +251,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int fieldUpto;
@Override
- public String next() throws IOException {
+ public String next() {
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
} else {
@@ -365,7 +364,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int[] endOffsets;
// NOTE: tvf is pre-positioned by caller
- public TVTermsEnum() throws IOException {
+ public TVTermsEnum() {
this.origTVF = Lucene40TermVectorsReader.this.tvf;
tvf = (IndexInput) origTVF.clone();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
index 5cc2d1b..67b32c2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -20,13 +20,14 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException;
import java.util.Comparator;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.DataInput;
@@ -36,7 +37,6 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
@@ -255,7 +255,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
int idx = 0;
int numDocs = 0;
- for (final MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
+ for (final AtomicReader reader : mergeState.readers) {
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40TermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) {
@@ -265,7 +265,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
}
}
- if (reader.liveDocs != null) {
+ if (reader.getLiveDocs() != null) {
numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
} else {
numDocs += copyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
@@ -281,12 +281,12 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private int copyVectorsWithDeletions(MergeState mergeState,
final Lucene40TermVectorsReader matchingVectorsReader,
- final MergeState.IndexReaderAndLiveDocs reader,
+ final AtomicReader reader,
int rawDocLengths[],
int rawDocLengths2[])
- throws IOException, MergeAbortedException {
- final int maxDoc = reader.reader.maxDoc();
- final Bits liveDocs = reader.liveDocs;
+ throws IOException {
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
int totalNumDocs = 0;
if (matchingVectorsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
@@ -323,7 +323,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
- Fields vectors = reader.reader.getTermVectors(docNum);
+ Fields vectors = reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos);
totalNumDocs++;
mergeState.checkAbort.work(300);
@@ -334,11 +334,11 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private int copyVectorsNoDeletions(MergeState mergeState,
final Lucene40TermVectorsReader matchingVectorsReader,
- final MergeState.IndexReaderAndLiveDocs reader,
+ final AtomicReader reader,
int rawDocLengths[],
int rawDocLengths2[])
- throws IOException, MergeAbortedException {
- final int maxDoc = reader.reader.maxDoc();
+ throws IOException {
+ final int maxDoc = reader.maxDoc();
if (matchingVectorsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
int docCount = 0;
@@ -353,7 +353,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
for (int docNum = 0; docNum < maxDoc; docNum++) {
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
- Fields vectors = reader.reader.getTermVectors(docNum);
+ Fields vectors = reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos);
mergeState.checkAbort.work(300);
}
@@ -362,7 +362,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
}
@Override
- public void finish(FieldInfos fis, int numDocs) throws IOException {
+ public void finish(FieldInfos fis, int numDocs) {
if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != tvx.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
@@ -382,7 +382,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
}
@Override
- public Comparator getComparator() throws IOException {
+ public Comparator getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
index 1032690..cbec979 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
@@ -41,7 +42,6 @@ import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
@@ -121,14 +121,11 @@ public final class Bytes {
* {@link Type#BYTES_VAR_SORTED}.
* @param context I/O Context
* @return a new {@link Writer} instance
- * @throws IOException
- * if the files for the writer can not be created.
* @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator sortComparator,
- Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
- throws IOException {
+ Counter bytesUsed, IOContext context, float acceptableOverheadRatio) {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
if (sortComparator == null) {
@@ -244,7 +241,7 @@ public final class Bytes {
private final IOContext context;
protected BytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
- int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
+ int version, Counter bytesUsed, IOContext context, Type type) {
super(bytesUsed, type);
this.id = id;
this.dir = dir;
@@ -388,21 +385,19 @@ public final class Bytes {
protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
- int codecVersion, Counter bytesUsed, IOContext context, Type type)
- throws IOException {
+ int codecVersion, Counter bytesUsed, IOContext context, Type type) {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, PackedInts.DEFAULT, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
- int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type)
- throws IOException {
+ int codecVersion, Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) {
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, acceptableOverheadRatio, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
- Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) throws IOException {
+ Counter bytesUsed, IOContext context, float acceptableOverheadRatio, Type type) {
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
@@ -473,6 +468,10 @@ public final class Bytes {
}
}
+ public int getValueSize() {
+ return size;
+ }
+
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
index 5f38e3c..2acbb56 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -43,6 +43,11 @@ abstract class DirectSource extends Source {
toNumeric = new ShortToLong();
break;
case FLOAT_32:
+ toNumeric = new BytesToFloat();
+ break;
+ case FLOAT_64:
+ toNumeric = new BytesToDouble();
+ break;
case FIXED_INTS_32:
toNumeric = new IntToLong();
break;
@@ -58,10 +63,10 @@ abstract class DirectSource extends Source {
public BytesRef getBytes(int docID, BytesRef ref) {
try {
final int sizeToRead = position(docID);
+ ref.offset = 0;
ref.grow(sizeToRead);
data.readBytes(ref.bytes, 0, sizeToRead);
ref.length = sizeToRead;
- ref.offset = 0;
return ref;
} catch (IOException ex) {
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
@@ -103,7 +108,6 @@ abstract class DirectSource extends Source {
long toLong(IndexInput input) throws IOException {
return input.readByte();
}
-
}
private static final class ShortToLong extends ToNumeric {
@@ -118,11 +122,30 @@ abstract class DirectSource extends Source {
long toLong(IndexInput input) throws IOException {
return input.readInt();
}
+ }
+
+ private static final class BytesToFloat extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) {
+ throw new UnsupportedOperationException("ints are not supported");
+ }
double toDouble(IndexInput input) throws IOException {
return Float.intBitsToFloat(input.readInt());
}
}
+
+ private static final class BytesToDouble extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) {
+ throw new UnsupportedOperationException("ints are not supported");
+ }
+
+ double toDouble(IndexInput input) throws IOException {
+ return Double.longBitsToDouble(input.readLong());
+ }
+ }
+
private static final class LongToLong extends ToNumeric {
@Override
@@ -130,8 +153,8 @@ abstract class DirectSource extends Source {
return input.readLong();
}
- double toDouble(IndexInput input) throws IOException {
- return Double.longBitsToDouble(input.readLong());
+ double toDouble(IndexInput input) {
+ throw new UnsupportedOperationException("doubles are not supported");
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
index 22c3655..b5ec3e1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
index c7e8740..5f73318 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -46,8 +46,7 @@ class FixedDerefBytesImpl {
static final int VERSION_CURRENT = VERSION_START;
public static class Writer extends DerefBytesWriterBase {
- public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
- throws IOException {
+ public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
}
@@ -67,6 +66,7 @@ class FixedDerefBytesImpl {
idxOut.writeInt(numValues);
writeIndex(idxOut, docCount, numValues, docToEntry);
}
+
}
public static class FixedDerefReader extends BytesReaderBase {
@@ -109,8 +109,7 @@ class FixedDerefBytesImpl {
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
- final int id = (int) addresses.get(docID);
- return data.fillSlice(bytesRef, (id * size), size);
+ return data.fillSlice(bytesRef, addresses.get(docID) * size, size);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
index e5f0a7d..3e18fbc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -58,7 +58,7 @@ class FixedSortedBytesImpl {
private final Comparator comp;
public Writer(Directory dir, String id, Comparator comp,
- Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
+ Counter bytesUsed, IOContext context, float acceptableOverheadRatio) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED);
this.comp = comp;
}
@@ -213,10 +213,10 @@ class FixedSortedBytesImpl {
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
try {
datIn.seek(basePointer + size * ord);
+ bytesRef.offset = 0;
bytesRef.grow(size);
datIn.readBytes(bytesRef.bytes, 0, size);
bytesRef.length = size;
- bytesRef.offset = 0;
return bytesRef;
} catch (IOException ex) {
throw new IllegalStateException("failed to getByOrd", ex);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
index ced34f3..bc072ad 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -62,12 +62,12 @@ class FixedStraightBytesImpl {
private final ByteBlockPool pool;
protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
- int version, Counter bytesUsed, IOContext context) throws IOException {
+ int version, Counter bytesUsed, IOContext context) {
this(dir, id, codecNameDat, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
}
protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
- int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
+ int version, Counter bytesUsed, IOContext context, Type type) {
super(dir, id, null, codecNameDat, version, bytesUsed, context, type);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
@@ -129,17 +129,22 @@ class FixedStraightBytesImpl {
out.writeBytes(zeros, zeros.length);
}
}
+
+ @Override
+ public int getValueSize() {
+ return size;
+ }
}
static class Writer extends FixedBytesWriterBase {
private boolean hasMerged;
private IndexOutput datOut;
- public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
+ public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
}
- public Writer(Directory dir, String id, String codecNameDat, int version, Counter bytesUsed, IOContext context) throws IOException {
+ public Writer(Directory dir, String id, String codecNameDat, int version, Counter bytesUsed, IOContext context) {
super(dir, id, codecNameDat, version, bytesUsed, context);
}
@@ -342,7 +347,7 @@ class FixedStraightBytesImpl {
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
- return data.fillSlice(bytesRef, docID * size, size);
+ return data.fillSlice(bytesRef, size * ((long) docID), size);
}
}
@@ -356,7 +361,7 @@ class FixedStraightBytesImpl {
@Override
protected int position(int docID) throws IOException {
- data.seek(baseOffset + size * docID);
+ data.seek(baseOffset + size * ((long) docID));
return size;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
index 4d3e054..c8a96fd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -47,7 +47,7 @@ public class Floats {
protected static final int VERSION_CURRENT = VERSION_START;
public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
- IOContext context, Type type) throws IOException {
+ IOContext context, Type type) {
return new FloatsWriter(dir, id, bytesUsed, context, type);
}
@@ -72,7 +72,7 @@ public class Floats {
private final int size;
private final DocValuesArraySource template;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
- IOContext context, Type type) throws IOException {
+ IOContext context, Type type) {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
size = typeToSize(type);
this.bytesRef = new BytesRef(size);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
index 22875ad..2d937f6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -46,7 +46,7 @@ public final class Ints {
}
public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
- Type type, IOContext context) throws IOException {
+ Type type, IOContext context) {
return type == Type.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type);
}
@@ -92,12 +92,12 @@ public final class Ints {
private final DocValuesArraySource template;
public IntsWriter(Directory dir, String id, Counter bytesUsed,
- IOContext context, Type valueType) throws IOException {
+ IOContext context, Type valueType) {
this(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, valueType);
}
protected IntsWriter(Directory dir, String id, String codecName,
- int version, Counter bytesUsed, IOContext context, Type valueType) throws IOException {
+ int version, Counter bytesUsed, IOContext context, Type valueType) {
super(dir, id, codecName, version, bytesUsed, context);
size = typeToSize(valueType);
this.bytesRef = new BytesRef(size);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
index f3c5ece..c40534f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40.values;
*/
import java.io.IOException;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase;
import org.apache.lucene.index.DocValues.Source;
@@ -30,7 +31,6 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
@@ -57,7 +57,7 @@ class PackedIntValues {
private int lastDocId = -1;
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
- IOContext context) throws IOException {
+ IOContext context) {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.VAR_INTS);
bytesRef = new BytesRef(8);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
index fa46bf6..c766a36 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -57,8 +57,7 @@ class VarDerefBytesImpl {
* order and merge them in a streamed fashion.
*/
static class Writer extends DerefBytesWriterBase {
- public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
- throws IOException {
+ public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
size = 0;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
index 1d822e0..0cbf6b9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -60,7 +60,7 @@ final class VarSortedBytesImpl {
private final Comparator comp;
public Writer(Directory dir, String id, Comparator comp,
- Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
+ Counter bytesUsed, IOContext context, float acceptableOverheadRatio) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
@@ -239,10 +239,10 @@ final class VarSortedBytesImpl {
final long nextOffset = ordToOffsetIndex.get(1+ord);
datIn.seek(basePointer + offset);
final int length = (int) (nextOffset - offset);
+ bytesRef.offset = 0;
bytesRef.grow(length);
datIn.readBytes(bytesRef.bytes, 0, length);
bytesRef.length = length;
- bytesRef.offset = 0;
return bytesRef;
} catch (IOException ex) {
throw new IllegalStateException("failed", ex);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
index f3b8444..126c11d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -64,8 +64,7 @@ class VarStraightBytesImpl {
private final ByteBlockPool pool;
private IndexOutput datOut;
private boolean merge = false;
- public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
- throws IOException {
+ public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
docToAddress = new long[1];
@@ -122,7 +121,7 @@ class VarStraightBytesImpl {
final IndexInput cloneIdx = reader.cloneIndex();
try {
numDataBytes = cloneIdx.readVLong();
- final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
+ final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
for (int i = 0; i < maxDocs; i++) {
long offset = iter.next();
++lastDocID;
@@ -232,6 +231,11 @@ class VarStraightBytesImpl {
public long ramBytesUsed() {
return bytesUsed.get();
}
+
+ @Override
+ public int getValueSize() {
+ return -1;
+ }
}
public static class VarStraightReader extends BytesReaderBase {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
index e384457..1b40e49 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.lucene40.values;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -83,11 +83,10 @@ abstract class Writer extends DocValuesConsumer {
* docvalues of type {@link Type#BYTES_FIXED_SORTED} and
* {@link Type#BYTES_VAR_SORTED}.
* @return a new {@link Writer} instance for the given {@link Type}
- * @throws IOException
- * @see PackedInts#getReader(org.apache.lucene.store.DataInput, float)
+ * @see PackedInts#getReader(org.apache.lucene.store.DataInput)
*/
public static DocValuesConsumer create(Type type, String id, Directory directory,
- Comparator comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
+ Comparator comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
index 63a1ad7..fb25738 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@@ -1,6 +1,6 @@
package org.apache.lucene.codecs.memory;
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -54,6 +54,7 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.PackedInts;
// TODO: would be nice to somehow allow this to act like
// InstantiatedIndex, by never writing to disk; ie you write
@@ -81,14 +82,16 @@ import org.apache.lucene.util.fst.Util;
public class MemoryPostingsFormat extends PostingsFormat {
private final boolean doPackFST;
+ private final float acceptableOverheadRatio;
public MemoryPostingsFormat() {
- this(false);
+ this(false, PackedInts.DEFAULT);
}
- public MemoryPostingsFormat(boolean doPackFST) {
+ public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
super("Memory");
this.doPackFST = doPackFST;
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
}
@Override
@@ -102,13 +105,15 @@ public class MemoryPostingsFormat extends PostingsFormat {
private final Builder builder;
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
private final boolean doPackFST;
+ private final float acceptableOverheadRatio;
private int termCount;
- public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST) {
+ public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
this.out = out;
this.field = field;
this.doPackFST = doPackFST;
- builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST);
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio);
}
private class PostingsWriter extends PostingsConsumer {
@@ -265,7 +270,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
out.writeVInt(docCount);
FST fst = builder.finish();
if (doPackFST) {
- fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
+ fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
}
fst.save(out);
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
@@ -290,7 +295,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public TermsConsumer addField(FieldInfo field) {
//System.out.println("\naddField field=" + field.name);
- return new TermsWriter(out, field, doPackFST);
+ return new TermsWriter(out, field, doPackFST, acceptableOverheadRatio);
}
@Override
@@ -422,7 +427,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public int freq() throws IOException {
+ public int freq() {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
@@ -622,7 +627,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public int freq() throws IOException {
+ public int freq() {
return freq;
}
}
@@ -642,7 +647,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
fstEnum = new BytesRefFSTEnum(fst);
}
- private void decodeMetaData() throws IOException {
+ private void decodeMetaData() {
if (!didDecode) {
buffer.reset(current.output.bytes, 0, current.output.length);
docFreq = buffer.readVInt();
@@ -691,7 +696,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
decodeMetaData();
FSTDocsEnum docsEnum;
@@ -709,7 +714,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (needsOffsets && !hasOffsets) {
@@ -752,13 +757,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public int docFreq() throws IOException {
+ public int docFreq() {
decodeMetaData();
return docFreq;
}
@Override
- public long totalTermFreq() throws IOException {
+ public long totalTermFreq() {
decodeMetaData();
return totalTermFreq;
}
@@ -812,17 +817,17 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public long getSumDocFreq() throws IOException {
+ public long getSumDocFreq() {
return sumDocFreq;
}
@Override
- public int getDocCount() throws IOException {
+ public int getDocCount() {
return docCount;
}
@Override
- public long size() throws IOException {
+ public long size() {
return termCount;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/package.html b/lucene/core/src/java/org/apache/lucene/codecs/package.html
index 78dcb95..ceccedb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/package.html
@@ -21,5 +21,46 @@
Codecs API: API for customization of the encoding and structure of the index.
+
+
+ The Codec API allows you to customise the way the following pieces of index information are stored:
+
+
Postings lists - see {@link org.apache.lucene.codecs.PostingsFormat}
+
DocValues - see {@link org.apache.lucene.codecs.DocValuesFormat}
+
Stored fields - see {@link org.apache.lucene.codecs.StoredFieldsFormat}
+
Term vectors - see {@link org.apache.lucene.codecs.TermVectorsFormat}
+
FieldInfos - see {@link org.apache.lucene.codecs.FieldInfosFormat}
+
SegmentInfo - see {@link org.apache.lucene.codecs.SegmentInfoFormat}
+
Norms - see {@link org.apache.lucene.codecs.NormsFormat}
+
Live documents - see {@link org.apache.lucene.codecs.LiveDocsFormat}
+
+
+
+
+ Codecs are identified by name through the Java Service Provider Interface. To create your own codec, extend
+ {@link org.apache.lucene.codecs.Codec} and pass the new codec's name to the super() constructor:
+
+public class MyCodec extends Codec {
+
+ public MyCodec() {
+ super("MyCodecName");
+ }
+
+ ...
+}
+
+You will need to register the Codec class so that the {@link java.util.ServiceLoader ServiceLoader} can find it, by including a
+META-INF/services/org.apache.lucene.codecs.Codec file on your classpath that contains the package-qualified
+name of your codec.
+
+
+
+ If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
+ formats for different fields, then you can register your custom postings format in the same way (in
+ META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
+ {@link org.apache.lucene.codecs.lucene40.Lucene40Codec} and override
+ {@link org.apache.lucene.codecs.lucene40.Lucene40Codec#getPostingsFormatForField(String)} to return your custom
+ postings format.
+