### Eclipse Workspace Patch 1.0 #P Lucene Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1134546) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -40,6 +40,7 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document2.FieldType; import org.apache.lucene.index.*; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; @@ -1072,7 +1073,24 @@ public static Field newField(String name, String value, Store store, Index index) { return newField(random, name, value, store, index); } + + public static org.apache.lucene.document2.Field newField(String name, String value, FieldType type) { + return newField(random, name, value, type); + } + public static org.apache.lucene.document2.Field newField(Random random, String name, String value, FieldType type) { + if (usually(random)) { + // most of the time, don't modify the params + return new org.apache.lucene.document2.Field(name, type, value); + } + + if (!type.stored() && random.nextBoolean()) { + type.setStored(true); // randomly store it + } + + return new org.apache.lucene.document2.Field(name, type, value); + } + /** * Returns a new Field instance. Use this when the test does not * care about some specific field settings (most tests) Index: lucene/src/test/org/apache/lucene/document2/TestDateTools.java =================================================================== --- lucene/src/test/org/apache/lucene/document2/TestDateTools.java (revision 0) +++ lucene/src/test/org/apache/lucene/document2/TestDateTools.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.document; +package org.apache.lucene.document2; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -8,6 +8,8 @@ import java.util.TimeZone; import java.util.Locale; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.util.LuceneTestCase; /** Index: lucene/src/test/org/apache/lucene/document2/TestDocument.java =================================================================== --- lucene/src/test/org/apache/lucene/document2/TestDocument.java (revision 0) +++ lucene/src/test/org/apache/lucene/document2/TestDocument.java (working copy) @@ -1,6 +1,9 @@ -package org.apache.lucene.document; +package org.apache.lucene.document2; +import org.apache.lucene.document2.Document; +import org.apache.lucene.document2.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; @@ -37,20 +40,20 @@ public void testBinaryField() throws Exception { Document doc = new Document(); - Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, - Field.Index.NO); - Fieldable binaryFld = new Field("binary", binaryVal.getBytes()); - Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes()); + FieldType stringType = new FieldType().setStored(true); + IndexableField stringFld = new Field("string", stringType, binaryVal); + IndexableField binaryFld = new BinaryField("binary", binaryVal.getBytes()); + IndexableField binaryFld2 = new BinaryField("binary", binaryVal2.getBytes()); doc.add(stringFld); doc.add(binaryFld); assertEquals(2, doc.fields.size()); - assertTrue(binaryFld.isBinary()); - assertTrue(binaryFld.isStored()); - assertFalse(binaryFld.isIndexed()); - assertFalse(binaryFld.isTokenized()); + assertTrue(((Field) binaryFld).isBinary()); + assertTrue(binaryFld.stored()); + assertFalse(binaryFld.indexed()); + assertFalse(binaryFld.tokenized()); String binaryTest = new String(doc.getBinaryValue("binary")); assertTrue(binaryTest.equals(binaryVal)); @@ -115,10 +118,11 @@ } public void testConstructorExceptions() { - new Field("name", "value", Field.Store.YES, Field.Index.NO); // okay - new Field("name", "value", Field.Store.NO, Field.Index.NOT_ANALYZED); // okay + new Field("name", new FieldType().setStored(true), "value"); // okay + new Field("name", new FieldType().setIndexed(true), "value"); // okay + /* try { - new Field("name", "value", Field.Store.NO, Field.Index.NO); + new Field("name", new FieldType(), "value"); fail(); } catch (IllegalArgumentException e) { // expected exception @@ -132,6 +136,7 @@ } catch (IllegalArgumentException e) { // expected exception } + */ } /** @@ -174,20 +179,16 @@ private Document makeDocumentWithFields() { Document doc = new Document(); - doc.add(new Field("keyword", "test1", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(new Field("keyword", "test2", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(new Field("text", "test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("text", "test2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("unindexed", "test1", Field.Store.YES, Field.Index.NO)); - doc.add(new Field("unindexed", "test2", Field.Store.YES, Field.Index.NO)); + doc.add(new StringField("keyword", "test1")); + doc.add(new StringField("keyword", "test2")); + doc.add(new Field("text", TextField.DEFAULT_TYPE.setStored(true), "test1")); + doc.add(new Field("text", TextField.DEFAULT_TYPE.setStored(true), "test2")); + doc.add(new Field("unindexed", new FieldType().setStored(true), "test1")); + doc.add(new Field("unindexed", new FieldType().setStored(true), "test2")); doc - .add(new Field("unstored", "test1", Field.Store.NO, - Field.Index.ANALYZED)); + .add(new TextField("unstored", "test1")); doc - .add(new Field("unstored", "test2", Field.Store.NO, - Field.Index.ANALYZED)); + .add(new TextField("unstored", "test2")); return doc; } @@ -222,12 +223,10 @@ public void testFieldSetValue() throws Exception { - Field field = new Field("id", "id1", Field.Store.YES, - Field.Index.NOT_ANALYZED); + Field field = new Field("id", StringField.DEFAULT_TYPE.setStored(true), "id1"); Document doc = new Document(); doc.add(field); - doc.add(new Field("keyword", "test", Field.Store.YES, - Field.Index.NOT_ANALYZED)); + doc.add(new Field("keyword", StringField.DEFAULT_TYPE.setStored(true), "test")); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); @@ -262,9 +261,8 @@ } public void testFieldSetValueChangeBinary() { - Field field1 = new Field("field1", new byte[0]); - Field field2 = new Field("field2", "", Field.Store.YES, - Field.Index.ANALYZED); + Field field1 = new BinaryField("field1", new byte[0]); + Field field2 = new Field("field2", TextField.DEFAULT_TYPE.setStored(true), ""); try { field1.setValue("abc"); fail("did not hit expected exception"); Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1134546) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -17,7 +17,7 @@ * limitations under the License. */ -import org.apache.lucene.document.Document; +import org.apache.lucene.document2.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.search.Similarity; Index: lucene/src/java/org/apache/lucene/document2/NumericField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/NumericField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/NumericField.java (revision 0) @@ -0,0 +1,369 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.document.NumericField.DataType; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.search.NumericRangeQuery; // javadocs +import org.apache.lucene.search.NumericRangeFilter; // javadocs +import org.apache.lucene.search.SortField; // javadocs +import org.apache.lucene.search.FieldCache; // javadocs + +/** + *
+ * This class provides a {@link Field} that enables indexing of numeric values + * for efficient range filtering and sorting. Here's an example usage, adding an + * int value: + * + *
+ * document.add(new NumericField(name).setIntValue(value)); + *+ * + * For optimal performance, re-use the
NumericField and
+ * {@link Document} instance for more than one document:
+ *
+ *
+ * NumericField field = new NumericField(name);
+ * Document document = new Document();
+ * document.add(field);
+ *
+ * for(all documents) {
+ * ...
+ * field.setIntValue(value)
+ * writer.addDocument(document);
+ * ...
+ * }
+ *
+ *
+ *
+ * The java native types int, long, float
+ * and double are directly supported. However, any value that can
+ * be converted into these native types can also be indexed. For example,
+ * date/time values represented by a {@link java.util.Date} can be translated
+ * into a long value using the {@link java.util.Date#getTime} method. If you
+ * don't need millisecond precision, you can quantize the value, either by
+ * dividing the result of {@link java.util.Date#getTime} or using the separate
+ * getters (for year, month, etc.) to construct an int or
+ * long value.
+ *
+ * To perform range querying or filtering against a NumericField,
+ * use {@link NumericRangeQuery} or {@link NumericRangeFilter}. To sort
+ * according to a NumericField, use the normal numeric sort types,
+ * eg {@link SortField#INT}. NumericField values can also be loaded
+ * directly from {@link FieldCache}.
+ *
+ * By default, a NumericField's value is not stored but is indexed
+ * for range filtering and sorting. You can use the
+ * {@link #NumericField(String,Field.Store,boolean)} constructor if you need to
+ * change these defaults.
+ *
+ * You may add the same field name as a NumericField to the same
+ * document more than once. Range querying and filtering will be the logical OR
+ * of all values; so a range query will hit all documents that have at least one
+ * value in the range. However sort behavior is not defined. If you need to
+ * sort, you should separately index a single-valued NumericField.
+ *
+ * A NumericField will consume somewhat more disk space in the
+ * index than an ordinary single-valued field. However, for a typical index that
+ * includes substantial textual content per document, this increase will likely
+ * be in the noise.
+ *
+ * Within Lucene, each numeric value is indexed as a trie structure,
+ * where each term is logically assigned to larger and larger pre-defined
+ * brackets (which are simply lower-precision representations of the value). The
+ * step size between each successive bracket is called the
+ * precisionStep, measured in bits. Smaller
+ * precisionStep values result in larger number of brackets, which
+ * consumes more disk space in the index but may result in faster range search
+ * performance. The default value, 4, was selected for a reasonable tradeoff of
+ * disk space consumption versus performance. You can use the expert constructor
+ * {@link #NumericField(String,int,Field.Store,boolean)} if you'd like to change
+ * the value. Note that you must also specify a congruent value when creating
+ * {@link NumericRangeQuery} or {@link NumericRangeFilter}. For low cardinality
+ * fields larger precision steps are good. If the cardinality is < 100, it is
+ * fair to use {@link Integer#MAX_VALUE}, which produces one term per value.
+ *
+ *
+ * For more information on the internals of numeric trie indexing, including the
+ *
+ * precisionStep configuration, see {@link NumericRangeQuery}.
+ * The format of indexed values is described in {@link NumericUtils}.
+ *
+ *
+ * If you only need to sort by numeric value, and never run range
+ * querying/filtering, you can index using a precisionStep of
+ * {@link Integer#MAX_VALUE}. This will minimize disk space consumed.
+ *
+ * More advanced users can instead use {@link NumericTokenStream} directly, when + * indexing numbers. This class is a wrapper around this token stream type for + * easier, more intuitive usage. + *
+ * + * @since 2.9 + */ +public final class NumericField extends Field { + + /** + * Data type of the value in {@link NumericField}. + * + * @since 3.2 + */ + + /* + public static enum DataType { + INT, LONG, FLOAT, DOUBLE + } + */ + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setIndexed(true) + .setOmitNorms(true) + .setOmitTermFreqAndPositions(true); + + private org.apache.lucene.document.NumericField.DataType dataType; + private transient NumericTokenStream numericTS; + private final int precisionStep; + + /** + * Creates a field for numeric values using the default + *precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * The instance is not yet initialized with a numeric value, before indexing a
+ * document containing this field, set a value using the various set
+ * ???Value() methods. This constructor creates an indexed, but not
+ * stored field.
+ *
+ * @param name
+ * the field name
+ */
+ public NumericField(String name) {
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.DEFAULT_TYPE);
+ }
+
+ /**
+ * Creates a field for numeric values using the default
+ * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * The instance is not yet initialized with a numeric value, before indexing a
+ * document containing this field, set a value using the various set
+ * ???Value() methods.
+ *
+ * @param name
+ * the field name
+ * @param store
+ * if the field should be stored, {@link Document#getFieldable} then
+ * returns {@code NumericField} instances on search results.
+ * @param index
+ * if the field should be indexed using {@link NumericTokenStream}
+ */
+ public NumericField(String name, FieldType type) {
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, type);
+ }
+
+ /**
+ * Creates a field for numeric values with the specified
+ * precisionStep. The instance is not yet initialized with a
+ * numeric value, before indexing a document containing this field, set a
+ * value using the various set???Value() methods. This constructor
+ * creates an indexed, but not stored field.
+ *
+ * @param name
+ * the field name
+ * @param precisionStep
+ * the used precision step
+ */
+ public NumericField(String name, int precisionStep) {
+ this(name, precisionStep, NumericField.DEFAULT_TYPE);
+ }
+
+ /**
+ * Creates a field for numeric values with the specified
+ * precisionStep. The instance is not yet initialized with a
+ * numeric value, before indexing a document containing this field, set a
+ * value using the various set???Value() methods.
+ *
+ * @param name
+ * the field name
+ * @param precisionStep
+ * the used precision step
+ * @param store
+ * if the field should be stored, {@link Document#getFieldable} then
+ * returns {@code NumericField} instances on search results.
+ * @param index
+ * if the field should be indexed using {@link NumericTokenStream}
+ */
+ public NumericField(String name, int precisionStep, FieldType type) {
+ super(name, type);
+ this.precisionStep = precisionStep;
+ }
+
+ /** Returns a {@link NumericTokenStream} for indexing the numeric value. */
+ public TokenStream tokenStreamValue() {
+ if (!indexed()) return null;
+ if (numericTS == null) {
+ // lazy init the TokenStream as it is heavy to instantiate
+ // (attributes,...),
+ // if not needed (stored field loading)
+ numericTS = new NumericTokenStream(precisionStep);
+ // initialize value in TokenStream
+ if (fieldsData != null) {
+ assert dataType != null;
+ final Number val = (Number) fieldsData;
+ switch (dataType) {
+ case INT:
+ numericTS.setIntValue(val.intValue());
+ break;
+ case LONG:
+ numericTS.setLongValue(val.longValue());
+ break;
+ case FLOAT:
+ numericTS.setFloatValue(val.floatValue());
+ break;
+ case DOUBLE:
+ numericTS.setDoubleValue(val.doubleValue());
+ break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ }
+ return numericTS;
+ }
+
+ /** Returns always null for numeric fields */
+ public Reader readerValue() {
+ return null;
+ }
+
+ /**
+ * Returns the numeric value as a string. This format is also returned if you
+ * call {@link Document#get(String)} on search results. It is recommended to
+ * use {@link Document#getFieldable} instead that returns {@code NumericField}
+ * instances. You can then use {@link #getNumericValue} to return the stored
+ * value.
+ */
+ public String stringValue() {
+ return (fieldsData == null) ? null : fieldsData.toString();
+ }
+
+ /**
+ * Returns the current numeric value as a subclass of {@link Number},
+ * null if not yet initialized.
+ */
+ public Number getNumericValue() {
+ return (Number) fieldsData;
+ }
+
+ /** Returns the precision step. */
+ public int getPrecisionStep() {
+ return precisionStep;
+ }
+
+ /**
+ * Returns the data type of the current value, {@code null} if not yet set.
+ *
+ * @since 3.2
+ */
+ public DataType getNumericDataType() {
+ return dataType;
+ }
+
+ public boolean isNumeric() {
+ return true;
+ }
+
+ /**
+ * Initializes the field with the supplied long value.
+ *
+ * @param value
+ * the numeric value
+ * @return this instance, because of this you can use it the following way:
+ * document.add(new NumericField(name, precisionStep).setLongValue(value))
+ */
+ public NumericField setLongValue(final long value) {
+ if (numericTS != null) numericTS.setLongValue(value);
+ fieldsData = Long.valueOf(value);
+ dataType = DataType.LONG;
+ return this;
+ }
+
+ /**
+ * Initializes the field with the supplied int value.
+ *
+ * @param value
+ * the numeric value
+ * @return this instance, because of this you can use it the following way:
+ * document.add(new NumericField(name, precisionStep).setIntValue(value))
+ */
+ public NumericField setIntValue(final int value) {
+ if (numericTS != null) numericTS.setIntValue(value);
+ fieldsData = Integer.valueOf(value);
+ dataType = DataType.INT;
+ return this;
+ }
+
+ /**
+ * Initializes the field with the supplied double value.
+ *
+ * @param value
+ * the numeric value
+ * @return this instance, because of this you can use it the following way:
+ * document.add(new NumericField(name, precisionStep).setDoubleValue(value))
+ */
+ public NumericField setDoubleValue(final double value) {
+ if (numericTS != null) numericTS.setDoubleValue(value);
+ fieldsData = Double.valueOf(value);
+ dataType = DataType.DOUBLE;
+ return this;
+ }
+
+ /**
+ * Initializes the field with the supplied float value.
+ *
+ * @param value
+ * the numeric value
+ * @return this instance, because of this you can use it the following way:
+ * document.add(new NumericField(name, precisionStep).setFloatValue(value))
+ */
+ public NumericField setFloatValue(final float value) {
+ if (numericTS != null) numericTS.setFloatValue(value);
+ fieldsData = Float.valueOf(value);
+ dataType = DataType.FLOAT;
+ return this;
+ }
+
+}
Index: lucene/src/java/org/apache/lucene/document2/package.html
===================================================================
--- lucene/src/java/org/apache/lucene/document2/package.html (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/package.html (revision 0)
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.
+The document package provides the user level logical representation of content to be indexed and searched. The +package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.
+A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s. A + {@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored. + {@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, + stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable} + for specifics on these properties. +
+Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have +{@link org.apache.lucene.document.Fieldable}s.
+First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application. It is your job + to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.) + How this is done is completely up to you. That being said, there are many tools available in other projects that can make + the process of taking a file and converting it into a Lucene {@link org.apache.lucene.document.Document}. To see an example of this, + take a look at the Lucene demo and the associated source code + for extracting content from HTML. +
+The {@link org.apache.lucene.document.DateTools} is a utility class to make dates and times searchable +(remember, Lucene only searches text). {@link org.apache.lucene.document.NumericField} is a special helper class +to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery} +(using a special sortable string representation of numeric values).
+The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from +storage. If no FieldSelector is used, all Fieldables on a Document will be loaded. As an example of the FieldSelector usage, consider + the common use case of +displaying search results on a web page and then having users click through to see the full document. In this scenario, it is often + the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector, +the full Document had to be loaded, including the large fields, in order to display the results. Now, using the FieldSelector, one +can {@link org.apache.lucene.document.FieldSelectorResult#LAZY_LOAD} the large fields, thus only loading the large fields +when a user clicks on the actual link to view the original content.
+ + Index: lucene/src/test/org/apache/lucene/document2/TestBinaryDocument.java =================================================================== --- lucene/src/test/org/apache/lucene/document2/TestBinaryDocument.java (revision 0) +++ lucene/src/test/org/apache/lucene/document2/TestBinaryDocument.java (working copy) @@ -1,7 +1,14 @@ -package org.apache.lucene.document; +package org.apache.lucene.document2; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.document.CompressionTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; Index: lucene/src/java/org/apache/lucene/document2/TextField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/TextField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/TextField.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +public final class TextField extends Field { + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setIndexed(true) + .setTokenized(true); + + public TextField(String name, Reader reader) { + super(name, TextField.DEFAULT_TYPE, reader); + } + + public TextField(String name, String value) { + super(name, TextField.DEFAULT_TYPE, value); + } + + public boolean isNumeric() { + return false; + } +} Index: lucene/src/java/org/apache/lucene/document2/MapFieldSelector.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/MapFieldSelector.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/MapFieldSelector.java (revision 0) @@ -0,0 +1,67 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s + * + */ +public class MapFieldSelector implements FieldSelector { + + MapfieldsToLoad and lazyFieldsToLoad, lazy has precedence.
+ *
+ * @param fieldName The {@link Field} name to check
+ * @return The {@link FieldSelectorResult}
+ */
+ public FieldSelectorResult accept(String fieldName) {
+ FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
+ if (fieldsToLoad.contains(fieldName) == true){
+ result = FieldSelectorResult.LOAD;
+ }
+ if (lazyFieldsToLoad.contains(fieldName) == true){
+ result = FieldSelectorResult.LAZY_LOAD;
+ }
+ return result;
+ }
+}
\ No newline at end of file
Index: lucene/src/java/org/apache/lucene/index/IndexableField.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexableField.java (revision 1134546)
+++ lucene/src/java/org/apache/lucene/index/IndexableField.java (working copy)
@@ -55,6 +55,8 @@
public float boost();
public boolean stored();
+
+ public boolean lazy();
// nocommit -- isBinary?
public BytesRef binaryValue(BytesRef reuse);
Index: lucene/src/java/org/apache/lucene/document2/Field.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/Field.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/Field.java (revision 0)
@@ -0,0 +1,353 @@
+package org.apache.lucene.document2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * A field is a section of a Document. Each field has two parts, a name and a
+ * value. Values may be free text, provided as a String or as a Reader, or they
+ * may be atomic keywords, which are not further processed. Such keywords may be
+ * used to represent dates, urls, etc. Fields are optionally stored in the
+ * index, so that they may be returned with hits on the document.
+ */
+
+public class Field implements IndexableField {
+
+ protected FieldType type;
+ protected String name = "body";
+ // the data object for all different kind of field values
+ protected Object fieldsData = null;
+ // pre-analyzed tokenStream for indexed fields
+ protected TokenStream tokenStream;
+ protected boolean isBinary = false;
+ // length/offset for all primitive types
+ protected int binaryLength;
+ protected int binaryOffset;
+
+ protected float boost = 1.0f;
+
+ public Field(String name, FieldType type) {
+
+ }
+
+ public Field(String name, FieldType type, Reader reader) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (reader == null)
+ throw new NullPointerException("reader cannot be null");
+
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = reader;
+ }
+
+ public Field(String name, FieldType type, TokenStream tokenStream) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (tokenStream == null)
+ throw new NullPointerException("tokenStream cannot be null");
+
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+ }
+
+ public Field(String name, FieldType type, byte[] value) {
+ this(name, type, value, 0, value.length);
+ }
+
+ public Field(String name, FieldType type, byte[] value, int offset, int length) {
+ this.isBinary = true;
+ this.fieldsData = value;
+ this.type = type;
+ this.binaryOffset = offset;
+ this.binaryLength = length;
+ this.name = StringHelper.intern(name);
+ }
+
+ public Field(String name, FieldType type, String value) {
+ this(name, true, type, value);
+ }
+
+ public Field(String name, boolean internName, FieldType type, String value) {
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (value == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.type = type;
+ this.name = name;
+ this.fieldsData = value;
+
+ if (internName) // field names are optionally interned
+ name = StringHelper.intern(name);
+ }
+
+ /**
+ * The value of the field as a String, or null. If null, the Reader value or
+ * binary value is used. Exactly one of stringValue(), readerValue(), and
+ * getBinaryValue() must be set.
+ */
+ public String stringValue() {
+ return fieldsData instanceof String ? (String) fieldsData : null;
+ }
+
+ /**
+ * The value of the field as a Reader, or null. If null, the String value or
+ * binary value is used. Exactly one of stringValue(), readerValue(), and
+ * getBinaryValue() must be set.
+ */
+ public Reader readerValue() {
+ return fieldsData instanceof Reader ? (Reader) fieldsData : null;
+ }
+
+ /**
+ * The TokesStream for this field to be used when indexing, or null. If null,
+ * the Reader value or String value is analyzed to produce the indexed tokens.
+ */
+ public TokenStream tokenStreamValue() {
+ return tokenStream;
+ }
+
+ public Number getNumericValue() {
+ return null;
+ }
+
+ /**
+ * + * Expert: change the value of this field. This can be used during indexing to + * re-use a single Field instance to improve indexing speed by avoiding GC + * cost of new'ing and reclaiming Field instances. Typically a single + * {@link Document} instance is re-used as well. This helps most on small + * documents. + *
+ * + *+ * Each Field instance should only be used once within a single + * {@link Document} instance. See ImproveIndexingSpeed for details. + *
+ */ + public void setValue(String value) { + if (isBinary) { + throw new IllegalArgumentException( + "cannot set a String value on a binary field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(Reader value) { + if (isBinary) { + throw new IllegalArgumentException( + "cannot set a Reader value on a binary field"); + } + if (stored()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a stored field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(byte[] value) { + if (!isBinary) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + binaryLength = value.length; + binaryOffset = 0; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(byte[] value, int offset, int length) { + if (!isBinary) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + binaryLength = length; + binaryOffset = offset; + } + + /** + * Expert: sets the token stream to be used for indexing and causes + * isIndexed() and isTokenized() to return true. May be combined with stored + * values from stringValue() or getBinaryValue() + */ + public void setTokenStream(TokenStream tokenStream) { + if (!indexed() || !tokenized()) { + throw new IllegalArgumentException( + "cannot set token stream on non indexed and tokenized field"); + } + this.tokenStream = tokenStream; + } + + public String name() { + return name; + } + + public float boost() { + return boost; + } + + /** + * Sets the boost factor hits on this field. This value will be multiplied + * into the score of all hits on this this field of this document. + * + *+ * The boost is multiplied by + * {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then used to + * compute the norm factor for the field. By default, in the + * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} + * method, the boost value is multiplied by the length normalization factor + * and then rounded by + * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before + * it is stored in the index. One should attempt to ensure that this product + * does not overflow the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + private byte[] getBinaryValue(byte[] result /* unused */) { + if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData; + else return null; + } + + public boolean numeric() { + return false; + } + + public Number numericValue() { + return null; + } + + public NumericField.DataType numericDataType() { + return null; + } + + private byte[] getBinaryValue() { + return getBinaryValue(null); + } + + public BytesRef binaryValue(BytesRef reuse) { + final byte[] bytes = getBinaryValue(); + if (bytes != null) { + if (reuse == null) { + return new BytesRef(bytes, getBinaryOffset(), getBinaryLength()); + } else { + reuse.bytes = bytes; + reuse.offset = getBinaryOffset(); + reuse.length = getBinaryLength(); + return reuse; + } + } else { + return null; + } + } + + /** + * Returns length of byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return length of byte[] segment that represents this Field value + */ + private int getBinaryLength() { + if (isBinary) { + return binaryLength; + } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length; + else return 0; + } + + /** + * Returns offset into byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return index of the first character in byte[] segment that represents this + * Field value + */ + public int getBinaryOffset() { + return binaryOffset; + } + + public boolean isBinary() { + return isBinary; + } + + /** methods from inner FieldType */ + + public boolean stored() { + return type.stored(); + } + + public boolean indexed() { + return type.indexed(); + } + + public boolean tokenized() { + return type.tokenized(); + } + + public boolean omitNorms() { + return type.omitNorms(); + } + + public boolean omitTermFreqAndPositions() { + return type.omitTermFreqAndPositions(); + } + + public boolean storeTermVectors() { + return type.storeTermVectors(); + } + + public boolean storeTermVectorOffsets() { + return type.storeTermVectorOffsets(); + } + + public boolean storeTermVectorPositions() { + return type.storeTermVectorPositions(); + } + + public boolean lazy() { + return type.lazy(); + } +} Index: lucene/src/java/org/apache/lucene/document2/FieldSelectorResult.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/FieldSelectorResult.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/FieldSelectorResult.java (revision 0) @@ -0,0 +1,76 @@ +package org.apache.lucene.document2; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides information about what should be done with this Field + * + **/ +public enum FieldSelectorResult { + + /** + * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered. + * {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null. + *
+ * {@link Document#add(Fieldable)} should be called by the Reader. + */ + LOAD, + + /** + * Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until + * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should + * return a valid instance of a {@link Fieldable}. + * + * {@link Document#add(Fieldable)} should be called by the Reader. + */ + LAZY_LOAD, + + /** + * Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null. + * {@link Document#add(Fieldable)} is not called. + * + * {@link Document#add(Fieldable)} should not be called by the Reader. + */ + NO_LOAD, + + /** + * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the + * Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should + * both be valid for this {@link Field} + * + * {@link Document#add(Fieldable)} should be called by the Reader. + */ + LOAD_AND_BREAK, + + /** Expert: Load the size of this {@link Field} rather than its value. + * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + */ + SIZE, + + /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */ + SIZE_AND_BREAK, + + /** + * Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until + * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should + * return a valid instance of a {@link Fieldable}. + * + * {@link Document#add(Fieldable)} should be called by the Reader. + */ + LATENT +} Index: lucene/src/java/org/apache/lucene/document2/Document.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/Document.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/Document.java (revision 0) @@ -0,0 +1,346 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.*; + +import org.apache.lucene.index.IndexReader; // for javadoc +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; // for javadoc +import org.apache.lucene.search.ScoreDoc; // for javadoc + +/** Documents are the unit of indexing and search. + * + * A Document is a set of fields. Each field has a name and a textual value. + * A field may be {@link Fieldable#isStored() stored} with the document, in which + * case it is returned with search hits on the document. Thus each document + * should typically contain one or more stored fields which uniquely identify + * it. + * + *Note that fields which are not {@link Fieldable#isStored() stored} are
+ * not available in documents retrieved from the index, e.g. with {@link
+ * ScoreDoc#doc} or {@link IndexReader#document(int)}.
+ */
+
+public final class Document implements Iterable The default value is 1.0.
+ *
+ * Values are multiplied into the value of {@link IndexableField#getBoost()} of
+ * each field in this document. Thus, this method in effect sets a default
+ * boost for the fields of this document.
+ *
+ * @see IndexableField#setBoost(float)
+ */
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
+ *
+ * Note that once a document is indexed this value is no longer available
+ * from the index. At search time, for retrieved documents, this method always
+ * returns 1. This however does not mean that the boost value set at indexing
+ * time was ignored - it was just combined with other indexing time factors and
+ * stored elsewhere, for better indexing and search performance. (For more
+ * information see the "norm(t,d)" part of the scoring formula in
+ * {@link org.apache.lucene.search.Similarity Similarity}.)
+ *
+ * @see #setBoost(float)
+ */
+ // @Override not until Java 1.6
+ public float getBoost() {
+ return boost;
+ }
+
+ /**
+ * Adds a field to a document. Several fields may be added with
+ * the same name. In this case, if the fields are indexed, their text is
+ * treated as though appended for the purposes of search. Note that add like the removeField(s) methods only makes sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added. Removes field with the specified name from the document.
+ * If multiple fields exist with this name, this method removes the first field that has been added.
+ * If there is no field with the specified name, the document remains unchanged. Note that the removeField(s) methods like the add method only make sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added. Removes all fields with the given name from the document.
+ * If there is no field with the specified name, the document remains unchanged. Note that the removeField(s) methods like the add method only make sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added. Note that fields which are not {@link IndexableField#isStored() stored} are
+ * not available in documents retrieved from the
+ * index, e.g. {@link IndexSearcher#doc(int)} or {@link
+ * IndexReader#document(int)}.
+ public final List This class also helps you to limit the resolution of your dates. Do not
+ * save dates with a finer resolution than you really need, as then
+ * RangeQuery and PrefixQuery will require more memory and become slower.
+ *
+ *
+ * Another approach is {@link NumericUtils}, which provides
+ * a sortable binary representation (prefix encoded) of numeric values, which
+ * date/time are.
+ * For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ * Field[] array
+ * @deprecated use {@link #getIndexableField} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
+ @Deprecated
+ public final Field[] getFields(String name) {
+ ListIndexableField[] array
+ public IndexableField[] getIndexableFields(String name) {
+ ListString[] of field values
+ public final String[] getValues(String name) {
+ Listbyte[][] of binary field values
+ public final byte[][] getBinaryValues(String name) {
+ Listnull
+ * if no binary fields with the specified name are available.
+ * There may be non-binary fields with the same name.
+ *
+ * @param name the name of the field.
+ * @return a byte[] containing the binary field value or null
+ public final byte[] getBinaryValue(String name) {
+ for (IndexableField field : fields) {
+ if (field.name().equals(name) && ((Field) field).isBinary())
+ return field.binaryValue(null).bytes;
+ }
+ return null;
+ }
+ */
+
+ /** Prints the fields of a document for human consumption. */
+ @Override
+ public final String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("Document<");
+ for (int i = 0; i < fields.size(); i++) {
+ IndexableField field = fields.get(i);
+ buffer.append(field.toString());
+ if (i != fields.size()-1)
+ buffer.append(" ");
+ }
+ buffer.append(">");
+ return buffer.toString();
+ }
+}
Index: lucene/src/java/org/apache/lucene/document2/StringField.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/StringField.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/StringField.java (revision 0)
@@ -0,0 +1,42 @@
+package org.apache.lucene.document2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class StringField extends Field {
+
+ public static final FieldType DEFAULT_TYPE = new FieldType()
+ .setIndexed(true)
+ .setOmitNorms(true)
+ .setOmitTermFreqAndPositions(true);
+
+ public StringField(String name, boolean internName, String value) {
+ super(name, StringField.DEFAULT_TYPE, value);
+ }
+
+ public StringField(String name, String value) {
+ this(name, true, value);
+ }
+
+ public String stringValue() {
+ return (fieldsData == null) ? null : fieldsData.toString();
+ }
+
+ public boolean isNumeric() {
+ return false;
+ }
+}
Index: lucene/src/java/org/apache/lucene/document2/DateTools.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/DateTools.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/DateTools.java (revision 0)
@@ -0,0 +1,210 @@
+package org.apache.lucene.document2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.Locale;
+import java.util.TimeZone;
+
+/**
+ * Provides support for converting dates to strings and vice-versa.
+ * The strings are structured so that lexicographic sorting orders
+ * them by date, which makes them suitable for use as field values
+ * and search terms.
+ *
+ * long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ * index this as a numeric value with {@link NumericField}
+ * and use {@link NumericRangeQuery} to query it.
+ */
+public class DateTools {
+
+ final static TimeZone GMT = TimeZone.getTimeZone("GMT");
+
+ private static final ThreadLocalyyyyMMddHHmmssSSS or shorter,
+ * depending on resolution; using GMT as timezone
+ */
+ public static String dateToString(Date date, Resolution resolution) {
+ return timeToString(date.getTime(), resolution);
+ }
+
+ /**
+ * Converts a millisecond time to a string suitable for indexing.
+ *
+ * @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ * @param resolution the desired resolution, see
+ * {@link #round(long, DateTools.Resolution)}
+ * @return a string in format yyyyMMddHHmmssSSS or shorter,
+ * depending on resolution; using GMT as timezone
+ */
+ public static String timeToString(long time, Resolution resolution) {
+ final Date date = new Date(round(time, resolution));
+ return TL_FORMATS.get()[resolution.formatLen].format(date);
+ }
+
+ /**
+ * Converts a string produced by timeToString or
+ * dateToString back to a time, represented as the
+ * number of milliseconds since January 1, 1970, 00:00:00 GMT.
+ *
+ * @param dateString the date string to be converted
+ * @return the number of milliseconds since January 1, 1970, 00:00:00 GMT
+ * @throws ParseException if dateString is not in the
+ * expected format
+ */
+ public static long stringToTime(String dateString) throws ParseException {
+ return stringToDate(dateString).getTime();
+ }
+
+ /**
+ * Converts a string produced by timeToString or
+ * dateToString back to a time, represented as a
+ * Date object.
+ *
+ * @param dateString the date string to be converted
+ * @return the parsed time as a Date object
+ * @throws ParseException if dateString is not in the
+ * expected format
+ */
+ public static Date stringToDate(String dateString) throws ParseException {
+ try {
+ return TL_FORMATS.get()[dateString.length()].parse(dateString);
+ } catch (Exception e) {
+ throw new ParseException("Input is not a valid date string: " + dateString, 0);
+ }
+ }
+
+ /**
+ * Limit a date's resolution. For example, the date 2004-09-21 13:50:11
+ * will be changed to 2004-09-01 00:00:00 when using
+ * Resolution.MONTH.
+ *
+ * @param resolution The desired resolution of the date to be returned
+ * @return the date with all values more precise than resolution
+ * set to 0 or 1
+ */
+ public static Date round(Date date, Resolution resolution) {
+ return new Date(round(date.getTime(), resolution));
+ }
+
+ /**
+ * Limit a date's resolution. For example, the date 1095767411000
+ * (which represents 2004-09-21 13:50:11) will be changed to
+ * 1093989600000 (2004-09-01 00:00:00) when using
+ * Resolution.MONTH.
+ *
+ * @param resolution The desired resolution of the date to be returned
+ * @return the date with all values more precise than resolution
+ * set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ */
+ @SuppressWarnings("fallthrough")
+ public static long round(long time, Resolution resolution) {
+ final Calendar calInstance = TL_CAL.get();
+ calInstance.setTimeInMillis(time);
+
+ switch (resolution) {
+ //NOTE: switch statement fall-through is deliberate
+ case YEAR:
+ calInstance.set(Calendar.MONTH, 0);
+ case MONTH:
+ calInstance.set(Calendar.DAY_OF_MONTH, 1);
+ case DAY:
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ case HOUR:
+ calInstance.set(Calendar.MINUTE, 0);
+ case MINUTE:
+ calInstance.set(Calendar.SECOND, 0);
+ case SECOND:
+ calInstance.set(Calendar.MILLISECOND, 0);
+ case MILLISECOND:
+ // don't cut off anything
+ break;
+ default:
+ throw new IllegalArgumentException("unknown resolution " + resolution);
+ }
+ return calInstance.getTimeInMillis();
+ }
+
+ /** Specifies the time granularity. */
+ public static enum Resolution {
+
+ YEAR(4), MONTH(6), DAY(8), HOUR(10), MINUTE(12), SECOND(14), MILLISECOND(17);
+
+ final int formatLen;
+ final SimpleDateFormat format;//should be cloned before use, since it's not threadsafe
+
+ Resolution(int formatLen) {
+ this.formatLen = formatLen;
+ // formatLen 10's place: 11111111
+ // formatLen 1's place: 12345678901234567
+ this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US);
+ this.format.setTimeZone(GMT);
+ }
+
+ /** this method returns the name of the resolution
+ * in lowercase (for backwards compatibility) */
+ @Override
+ public String toString() {
+ return super.toString().toLowerCase(Locale.ENGLISH);
+ }
+
+ }
+
+}
Index: lucene/src/java/org/apache/lucene/document2/FieldSelector.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/FieldSelector.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/FieldSelector.java (revision 0)
@@ -0,0 +1,33 @@
+package org.apache.lucene.document2;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
+ * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
+ *
+ **/
+public interface FieldSelector {
+
+ /**
+ *
+ * @param fieldName the field to accept or reject
+ * @return an instance of {@link FieldSelectorResult}
+ * if the {@link Field} named fieldName should be loaded.
+ */
+ FieldSelectorResult accept(String fieldName);
+}
Index: lucene/src/java/org/apache/lucene/document2/CompressionTools.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/CompressionTools.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/CompressionTools.java (revision 0)
@@ -0,0 +1,127 @@
+package org.apache.lucene.document2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+import java.util.zip.DataFormatException;
+import java.io.ByteArrayOutputStream;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
+
+/** Simple utility class providing static methods to
+ * compress and decompress binary data for stored fields.
+ * This class uses java.util.zip.Deflater and Inflater
+ * classes to compress and decompress.
+ */
+
+public class CompressionTools {
+
+ // Export only static methods
+ private CompressionTools() {}
+
+ /** Compresses the specified byte range using the
+ * specified compressionLevel (constants are defined in
+ * java.util.zip.Deflater). */
+ public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) {
+
+ /* Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data. */
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
+
+ Deflater compressor = new Deflater();
+
+ try {
+ compressor.setLevel(compressionLevel);
+ compressor.setInput(value, offset, length);
+ compressor.finish();
+
+ // Compress the data
+ final byte[] buf = new byte[1024];
+ while (!compressor.finished()) {
+ int count = compressor.deflate(buf);
+ bos.write(buf, 0, count);
+ }
+ } finally {
+ compressor.end();
+ }
+
+ return bos.toByteArray();
+ }
+
+ /** Compresses the specified byte range, with default BEST_COMPRESSION level */
+ public static byte[] compress(byte[] value, int offset, int length) {
+ return compress(value, offset, length, Deflater.BEST_COMPRESSION);
+ }
+
+ /** Compresses all bytes in the array, with default BEST_COMPRESSION level */
+ public static byte[] compress(byte[] value) {
+ return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
+ }
+
+ /** Compresses the String value, with default BEST_COMPRESSION level */
+ public static byte[] compressString(String value) {
+ return compressString(value, Deflater.BEST_COMPRESSION);
+ }
+
+ /** Compresses the String value using the specified
+ * compressionLevel (constants are defined in
+ * java.util.zip.Deflater). */
+ public static byte[] compressString(String value, int compressionLevel) {
+ BytesRef result = new BytesRef();
+ UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
+ return compress(result.bytes, 0, result.length, compressionLevel);
+ }
+
+ /** Decompress the byte array previously returned by
+ * compress */
+ public static byte[] decompress(byte[] value) throws DataFormatException {
+ // Create an expandable byte array to hold the decompressed data
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
+
+ Inflater decompressor = new Inflater();
+
+ try {
+ decompressor.setInput(value);
+
+ // Decompress the data
+ final byte[] buf = new byte[1024];
+ while (!decompressor.finished()) {
+ int count = decompressor.inflate(buf);
+ bos.write(buf, 0, count);
+ }
+ } finally {
+ decompressor.end();
+ }
+
+ return bos.toByteArray();
+ }
+
+ /** Decompress the byte array previously returned by
+ * compressString back into a String */
+ public static String decompressString(byte[] value) throws DataFormatException {
+ final byte[] bytes = decompress(value);
+ CharsRef result = new CharsRef(bytes.length);
+ UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
+ return new String(result.chars, 0, result.length);
+ }
+}
Index: lucene/src/java/org/apache/lucene/document2/BinaryField.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/BinaryField.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/BinaryField.java (revision 0)
@@ -0,0 +1,33 @@
+package org.apache.lucene.document2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class BinaryField extends Field {
+
+ public static final FieldType DEFAULT_TYPE = new FieldType()
+ .setStored(true);
+
+ public BinaryField(String name, byte[] value) {
+ super(name, BinaryField.DEFAULT_TYPE, value);
+ this.isBinary = true;
+ }
+
+ public boolean isNumeric() {
+ return false;
+ }
+}
Index: lucene/src/java/org/apache/lucene/document2/LoadFirstFieldSelector.java
===================================================================
--- lucene/src/java/org/apache/lucene/document2/LoadFirstFieldSelector.java (revision 0)
+++ lucene/src/java/org/apache/lucene/document2/LoadFirstFieldSelector.java (revision 0)
@@ -0,0 +1,29 @@
+package org.apache.lucene.document2;
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Load the First field and break.
+ *
+ * See {@link FieldSelectorResult#LOAD_AND_BREAK}
+ */
+public class LoadFirstFieldSelector implements FieldSelector {
+
+ public FieldSelectorResult accept(String fieldName) {
+ return FieldSelectorResult.LOAD_AND_BREAK;
+ }
+}
\ No newline at end of file