Index: LICENSE.txt =================================================================== --- LICENSE.txt (revision 394295) +++ LICENSE.txt (working copy) @@ -1,5 +1,5 @@ /** - * Copyright 2006 The Apache Software Foundation + * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. Index: contrib/swing/src/java/org/apache/lucene/swing/models/TableSearcher.java =================================================================== --- contrib/swing/src/java/org/apache/lucene/swing/models/TableSearcher.java (revision 394295) +++ contrib/swing/src/java/org/apache/lucene/swing/models/TableSearcher.java (working copy) @@ -16,26 +16,23 @@ * limitations under the License. */ -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Hits; -import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.store.RAMDirectory; -import java.awt.*; -import java.awt.event.*; -import java.util.*; -import java.util.List; - -import javax.swing.*; import javax.swing.event.TableModelEvent; import javax.swing.event.TableModelListener; -import javax.swing.table.*; +import javax.swing.table.AbstractTableModel; +import javax.swing.table.TableModel; +import java.util.ArrayList; /** @@ -275,7 +272,7 @@ //tabble model row that we are mapping to for (int t=0; t + public Document document(int n, FieldSelector fieldSelector) throws IOException { + if (DEBUG) System.err.println("MemoryIndexReader.document"); + return new Document(); // there are no stored fields + } + + public boolean isDeleted(int n) { if (DEBUG) System.err.println("MemoryIndexReader.isDeleted"); return false; } Index: src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 394295) +++ src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy) @@ -16,22 +16,17 @@ * limitations under the License. */ -import java.io.IOException; -import java.io.Reader; - import junit.framework.TestCase; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import java.io.IOException; +import java.io.Reader; + /** * Tests {@link PhraseQuery}. * @@ -59,7 +54,7 @@ Document doc = new Document(); doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED)); - Field repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED); + Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED); doc.add(repeatedField); writer.addDocument(doc); Index: src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java (revision 394295) +++ src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java (working copy) @@ -16,19 +16,17 @@ * limitations under the License. */ -import java.io.IOException; - import junit.framework.TestCase; - import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.*; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.English; +import java.io.IOException; + /** * @author Bernhard Messer * @version $rcs = ' $Id$ ' ; @@ -49,7 +47,7 @@ //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES); + Fieldable fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES); doc.add(fld); writer.addDocument(doc); } Index: src/test/org/apache/lucene/search/TestDocBoost.java =================================================================== --- src/test/org/apache/lucene/search/TestDocBoost.java (revision 394295) +++ src/test/org/apache/lucene/search/TestDocBoost.java (working copy) @@ -17,14 +17,11 @@ */ import junit.framework.TestCase; - -import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.*; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; /** Document boost unit test. * @@ -35,27 +32,27 @@ public TestDocBoost(String name) { super(name); } - + public void testDocBoost() throws Exception { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); - - Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); - Field f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); + + Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); + Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); f2.setBoost(2.0f); - + Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.setBoost(3.0f); d4.setBoost(2.0f); - + d1.add(f1); // boost = 1 d2.add(f2); // boost = 2 d3.add(f1); // boost = 3 d4.add(f2); // boost = 4 - + writer.addDocument(d1); writer.addDocument(d2); writer.addDocument(d3); @@ -72,7 +69,7 @@ scores[doc] = score; } }); - + float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Index: src/test/org/apache/lucene/search/TestSetNorm.java =================================================================== --- src/test/org/apache/lucene/search/TestSetNorm.java (revision 394295) +++ src/test/org/apache/lucene/search/TestSetNorm.java (working copy) @@ -17,15 +17,12 @@ */ import junit.framework.TestCase; - -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.*; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; /** Document boost unit test. * @@ -36,13 +33,13 @@ public TestSetNorm(String name) { super(name); } - + public void testSetNorm() throws Exception { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); - + // add the same document four times - Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); + Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED); Document d1 = new Document(); d1.add(f1); writer.addDocument(d1); @@ -69,7 +66,7 @@ scores[doc] = score; } }); - + float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Index: src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 394295) +++ src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -87,7 +87,7 @@ Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); - assertTrue(stored.size() == 2); + assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4); TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); Index: src/test/org/apache/lucene/index/DocHelper.java =================================================================== --- src/test/org/apache/lucene/index/DocHelper.java (revision 394295) +++ src/test/org/apache/lucene/index/DocHelper.java (working copy) @@ -18,12 +18,12 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.*; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Map; import java.util.Enumeration; @@ -39,6 +39,13 @@ public static final int [] FIELD_2_FREQS = {3, 1, 1}; public static final String TEXT_FIELD_2_KEY = "textField2"; public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1}; + public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2"; + public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms"; public static final String TEXT_FIELD_3_KEY = "textField3"; @@ -71,6 +78,34 @@ public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES); + public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary"; + public static byte [] LAZY_FIELD_BINARY_BYTES; + public static Field lazyFieldBinary; + + public static final String LAZY_FIELD_KEY = "lazyField"; + public static final String LAZY_FIELD_TEXT = "These are some field bytes"; + public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED); + + public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField"; + public static String LARGE_LAZY_FIELD_TEXT; + public static Field largeLazyField; + + //From Issue 509 + public static final String FIELD_UTF1_TEXT = "field one \u4e00text"; + public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8"; + public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, + Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO); + + public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] FIELD_UTF2_FREQS = {3, 1, 1}; + public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8"; + public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + + + public static Map nameValues = null; // ordered list of all the fields... @@ -79,14 +114,20 @@ textField1, textField2, textField3, + compressedTextField2, keyField, noNormsField, unIndField, unStoredField1, unStoredField2, + textUtfField1, + textUtfField2, + lazyField, + lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last. + largeLazyField//placeholder for large field, since this is null. It must always be last }; - // Map + // Map public static Map all=new HashMap(); public static Map indexed=new HashMap(); public static Map stored=new HashMap(); @@ -94,11 +135,28 @@ public static Map unindexed=new HashMap(); public static Map termvector=new HashMap(); public static Map notermvector=new HashMap(); + public static Map lazy= new HashMap(); public static Map noNorms=new HashMap(); static { + //Initialize the large Lazy Field + StringBuffer buffer = new StringBuffer(); + for (int i = 0; i < 10000; i++) + { + buffer.append("Lazily loading lengths of language in lieu of laughing "); + } + + try { + LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8"); + } catch (UnsupportedEncodingException e) { + } + lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); + fields[fields.length - 2] = lazyFieldBinary; + LARGE_LAZY_FIELD_TEXT = buffer.toString(); + largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED); + fields[fields.length - 1] = largeLazyField; for (int i=0; i + * Must test using a File based directory + * + * @throws Exception + */ + public void testLazyPerformance() throws Exception { + String tmpIODir = System.getProperty("java.io.tmpdir"); + String path = tmpIODir + File.separator + "lazyDir"; + File file = new File(path); + FSDirectory tmpDir = FSDirectory.getDirectory(file, true); + assertTrue(tmpDir != null); + DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(), + Similarity.getDefault(), 50); + assertTrue(writer != null); + writer.addDocument("test", testDoc); + assertTrue(fieldInfos != null); + FieldsReader reader; + long lazyTime = 0; + long regularTime = 0; + int length = 50; + Set lazyFieldNames = new HashSet(); + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames); + + for (int i = 0; i < length; i++) { + reader = new FieldsReader(tmpDir, "test", fieldInfos); + assertTrue(reader != null); + assertTrue(reader.size() == 1); + + Document doc; + doc = reader.doc(0, null);//Load all of them + assertTrue("doc is null and it shouldn't be", doc != null); + Fieldable field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is lazy", field.isLazy() == false); + String value; + long start; + long finish; + start = System.currentTimeMillis(); + //On my machine this was always 0ms. + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue("field is null and it shouldn't be", field != null); + regularTime += (finish - start); + reader.close(); + reader = null; + doc = null; + //Hmmm, are we still in cache??? + System.gc(); + reader = new FieldsReader(tmpDir, "test", fieldInfos); + doc = reader.doc(0, fieldSelector); + field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is not lazy", field.isLazy() == true); + start = System.currentTimeMillis(); + //On my machine this took around 50 - 70ms + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + lazyTime += (finish - start); + reader.close(); + + } + System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); + System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); + } + + } Index: src/test/org/apache/lucene/document/TestBinaryDocument.java =================================================================== --- src/test/org/apache/lucene/document/TestBinaryDocument.java (revision 394295) +++ src/test/org/apache/lucene/document/TestBinaryDocument.java (working copy) @@ -38,10 +38,10 @@ public void testBinaryFieldInIndex() throws Exception { - Field binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES); - Field binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS); - Field stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); - Field stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); + Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES); + Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS); + Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); + Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); try { // binary fields with store off are not allowed Index: src/test/org/apache/lucene/document/TestDocument.java =================================================================== --- src/test/org/apache/lucene/document/TestDocument.java (revision 394295) +++ src/test/org/apache/lucene/document/TestDocument.java (working copy) @@ -46,9 +46,9 @@ throws Exception { Document doc = new Document(); - Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO); - Field binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES); - Field binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES); + Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO); + Fieldable binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES); + Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES); doc.add(stringFld); doc.add(binaryFld); Index: src/java/org/apache/lucene/analysis/Analyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/Analyzer.java (revision 394295) +++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy) @@ -38,16 +38,16 @@ /** - * Invoked before indexing a Field instance if + * Invoked before indexing a Fieldable instance if * terms have already been added to that field. This allows custom * analyzers to place an automatic position increment gap between - * Field instances using the same field name. The default value + * Fieldable instances using the same field name. The default value * position increment gap is 0. With a 0 position increment gap and * the typical default token position increment of 1, all terms in a field, - * including across Field instances, are in successive positions, allowing - * exact PhraseQuery matches, for instance, across Field instance boundaries. + * including across Fieldable instances, are in successive positions, allowing + * exact PhraseQuery matches, for instance, across Fieldable instance boundaries. * - * @param fieldName Field name being indexed. + * @param fieldName Fieldable name being indexed. * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} */ public int getPositionIncrementGap(String fieldName) Index: src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java =================================================================== --- src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (revision 394295) +++ src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (working copy) @@ -45,7 +45,7 @@ /** * Creates a hit queue sorted by the given list of fields. - * @param fields Field names, in priority order (highest priority first). + * @param fields Fieldable names, in priority order (highest priority first). * @param size The number of hits to retain. Must be greater than zero. */ FieldDocSortedHitQueue (SortField[] fields, int size) { Index: src/java/org/apache/lucene/search/Similarity.java =================================================================== --- src/java/org/apache/lucene/search/Similarity.java (revision 394295) +++ src/java/org/apache/lucene/search/Similarity.java (working copy) @@ -16,19 +16,16 @@ * limitations under the License. */ +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.SmallFloat; + import java.io.IOException; import java.io.Serializable; - import java.util.Collection; import java.util.Iterator; -import org.apache.lucene.index.Term; - -import org.apache.lucene.index.IndexReader; // for javadoc -import org.apache.lucene.index.IndexWriter; // for javadoc -import org.apache.lucene.document.Field; // for javadoc -import org.apache.lucene.util.SmallFloat; - /** Expert: Scoring API. *

Subclasses implement search scoring. * @@ -44,7 +41,7 @@ * ( {@link #tf(int) tf}(t in d) * * {@link #idf(Term,Searcher) idf}(t)^2 * * {@link Query#getBoost getBoost}(t in q) * - * {@link Field#getBoost getBoost}(t.field in d) * + * {@link org.apache.lucene.document.Field#getBoost getBoost}(t.field in d) * * {@link #lengthNorm(String,int) lengthNorm}(t.field in d) ) * *  * @@ -152,7 +149,7 @@ * fieldName of doc. * @return a normalization factor for hits on this field of this document * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract float lengthNorm(String fieldName, int numTokens); @@ -179,7 +176,7 @@ * small to represent are rounded up to the smallest positive representable * value. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) * @see SmallFloat */ public static byte encodeNorm(float f) { Index: src/java/org/apache/lucene/search/SortComparatorSource.java =================================================================== --- src/java/org/apache/lucene/search/SortComparatorSource.java (revision 394295) +++ src/java/org/apache/lucene/search/SortComparatorSource.java (working copy) @@ -19,7 +19,7 @@ /** * Creates a comparator for the field in the given index. * @param reader Index to create comparator for. - * @param fieldname Field to create comparator for. + * @param fieldname Fieldable to create comparator for. * @return Comparator of ScoreDoc objects. * @throws IOException If an error occurs reading the index. */ Index: src/java/org/apache/lucene/search/FieldSortedHitQueue.java =================================================================== --- src/java/org/apache/lucene/search/FieldSortedHitQueue.java (revision 394295) +++ src/java/org/apache/lucene/search/FieldSortedHitQueue.java (working copy) @@ -44,7 +44,7 @@ /** * Creates a hit queue sorted by the given list of fields. * @param reader Index to use. - * @param fields Field names, in priority order (highest priority first). Cannot be null or empty. + * @param fields Fieldable names, in priority order (highest priority first). Cannot be null or empty. * @param size The number of hits to retain. Must be greater than zero. * @throws IOException */ @@ -212,7 +212,7 @@ /** * Returns a comparator for sorting hits according to a field containing integers. * @param reader Index to use. - * @param fieldname Field containg integer values. + * @param fieldname Fieldable containg integer values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -243,7 +243,7 @@ /** * Returns a comparator for sorting hits according to a field containing floats. * @param reader Index to use. - * @param fieldname Field containg float values. + * @param fieldname Fieldable containg float values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -274,7 +274,7 @@ /** * Returns a comparator for sorting hits according to a field containing strings. * @param reader Index to use. - * @param fieldname Field containg string values. + * @param fieldname Fieldable containg string values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -305,7 +305,7 @@ /** * Returns a comparator for sorting hits according to a field containing strings. * @param reader Index to use. - * @param fieldname Field containg string values. + * @param fieldname Fieldable containg string values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -336,7 +336,7 @@ * floats or strings. Once the type is determined, one of the other static methods * in this class is called to get the comparator. * @param reader Index to use. - * @param fieldname Field containg values. + * @param fieldname Fieldable containg values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ Index: src/java/org/apache/lucene/search/Sort.java =================================================================== --- src/java/org/apache/lucene/search/Sort.java (revision 394295) +++ src/java/org/apache/lucene/search/Sort.java (working copy) @@ -29,7 +29,7 @@ * and does not need to be stored (unless you happen to want it back with the * rest of your document data). In other words: * - *

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));

+ *

document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));

* * *

Valid Types of Values

Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 394295) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; -import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ import java.io.IOException; import java.util.Locale; @@ -43,7 +42,7 @@ /** Expert: Every key in the internal cache is of this type. */ static class Entry { - final String field; // which Field + final String field; // which Fieldable final int type; // which SortField type final Object custom; // which custom comparator final Locale locale; // the locale we're sorting (if string) Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 394295) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -17,11 +17,14 @@ */ import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.store.Directory; import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Set; /** An IndexReader which reads multiple indexes, appending their content. * @@ -99,9 +102,9 @@ return maxDoc; } - public Document document(int n) throws IOException { + public Document document(int n, FieldSelector fieldSelector) throws IOException { int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i]); // dispatch to segment reader + return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader } public boolean isDeleted(int n) { Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (revision 394295) +++ src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -16,18 +16,17 @@ * limitations under the License. */ -import java.util.*; -import java.io.IOException; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - +import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; -/** Access to the Field Info file that describes document fields and whether or - * not they are indexed. Each segment has a separate Field Info file. Objects +import java.io.IOException; +import java.util.*; + +/** Access to the Fieldable Info file that describes document fields and whether or + * not they are indexed. Each segment has a separate Fieldable Info file. Objects * of this class are thread-safe for multiple readers, but only one thread can * be adding documents at a time, with no other reader or writer threads * accessing this object. @@ -65,7 +64,7 @@ public void add(Document doc) { Enumeration fields = doc.fields(); while (fields.hasMoreElements()) { - Field field = (Field) fields.nextElement(); + Fieldable field = (Fieldable) fields.nextElement(); add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.getOmitNorms()); } @@ -105,7 +104,7 @@ /** * Calls 5 parameter add with false for all TermVector parameters. * - * @param name The name of the Field + * @param name The name of the Fieldable * @param isIndexed true if the field is indexed * @see #add(String, boolean, boolean, boolean, boolean) */ Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (revision 394295) +++ src/java/org/apache/lucene/index/FieldsReader.java (working copy) @@ -16,19 +16,19 @@ * limitations under the License. */ +import org.apache.lucene.document.*; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; + import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.Reader; import java.util.zip.DataFormatException; import java.util.zip.Inflater; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; - /** * Class responsible for access to stored document fields. - * + *

* It uses <segment>.fdt and <segment>.fdx; files. * * @version $Id$ @@ -39,25 +39,37 @@ private IndexInput indexStream; private int size; + private static ThreadLocal fieldsStreamTL = new ThreadLocal(); + FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; fieldsStream = d.openInput(segment + ".fdt"); indexStream = d.openInput(segment + ".fdx"); - - size = (int)(indexStream.length() / 8); + size = (int) (indexStream.length() / 8); } + /** + * Cloeses the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a + * lazy implementation of a Field. This means that the Fields values will not be accessible. + * + * @throws IOException + */ final void close() throws IOException { fieldsStream.close(); indexStream.close(); + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream != null) { + localFieldsStream.close(); + fieldsStreamTL.set(null); + } } final int size() { return size; } - final Document doc(int n) throws IOException { + final Document doc(int n, FieldSelector fieldSelector) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); @@ -67,89 +79,277 @@ for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); - + FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); + boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true; + byte bits = fieldsStream.readByte(); - boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - - if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { - final byte[] b = new byte[fieldsStream.readVInt()]; - fieldsStream.readBytes(b, 0, b.length); - if (compressed) - doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); - else - doc.add(new Field(fi.name, b, Field.Store.YES)); + boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; + if (acceptField.equals(FieldSelectorResult.LOAD) == true) { + addField(doc, fi, binary, compressed, tokenize); } + else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){ + addField(doc, fi, binary, compressed, tokenize); + break;//Get out of this loop + } + else if (lazy == true){ + addFieldLazy(doc, fi, binary, compressed, tokenize); + } else { - Field.Index index; - Field.Store store = Field.Store.YES; - - if (fi.isIndexed && tokenize) - index = Field.Index.TOKENIZED; - else if (fi.isIndexed && !tokenize) - index = Field.Index.UN_TOKENIZED; - else - index = Field.Index.NO; - - Field.TermVector termVector = null; - if (fi.storeTermVector) { - if (fi.storeOffsetWithTermVector) { - if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; - } - else { - termVector = Field.TermVector.WITH_OFFSETS; - } - } - else if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS; - } - else { - termVector = Field.TermVector.YES; - } - } - else { - termVector = Field.TermVector.NO; - } - - if (compressed) { - store = Field.Store.COMPRESS; - final byte[] b = new byte[fieldsStream.readVInt()]; - fieldsStream.readBytes(b, 0, b.length); - Field f = new Field(fi.name, // field name - new String(uncompress(b), "UTF-8"), // uncompress the value and add as string - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); - doc.add(f); - } - else { - Field f = new Field(fi.name, // name + skipField(binary, compressed); + } + } + + return doc; + } + + /** + * Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + * This will have the most payoff on large fields. + */ + private void skipField(boolean binary, boolean compressed) throws IOException { + + int toRead = fieldsStream.readVInt(); + + if (binary || compressed) { + long pointer = fieldsStream.getFilePointer(); + fieldsStream.seek(pointer + toRead); + } else { + //We need to skip chars. This will slow us down, but still better + fieldsStream.skipChars(toRead); + } + } + + private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { + if (binary == true) { + int toRead = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + if (compressed) { + //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); + doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); + } else { + //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); + doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); + } + //Need to move the pointer ahead by toRead positions + fieldsStream.seek(pointer + toRead); + } else { + Field.Store store = Field.Store.YES; + Field.Index index = getIndexType(fi, tokenize); + Field.TermVector termVector = getTermVectorType(fi); + + Fieldable f; + if (compressed) { + store = Field.Store.COMPRESS; + int toRead = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + f = new LazyField(fi.name, store, toRead, pointer); + //skip over the part that we aren't loading + fieldsStream.seek(pointer + toRead); + f.setOmitNorms(fi.omitNorms); + } else { + int length = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + //Skip ahead of where we are by the length of what is stored + fieldsStream.skipChars(length); + f = new LazyField(fi.name, store, index, termVector, length, pointer); + f.setOmitNorms(fi.omitNorms); + } + doc.add(f); + } + + } + + private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { + + //we have a binary stored field, and it may be compressed + if (binary) { + int toRead = fieldsStream.readVInt(); + final byte[] b = new byte[toRead]; + fieldsStream.readBytes(b, 0, b.length); + if (compressed) + doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); + else + doc.add(new Field(fi.name, b, Field.Store.YES)); + + } else { + Field.Store store = Field.Store.YES; + Field.Index index = getIndexType(fi, tokenize); + Field.TermVector termVector = getTermVectorType(fi); + + Fieldable f; + if (compressed) { + store = Field.Store.COMPRESS; + int toRead = fieldsStream.readVInt(); + + final byte[] b = new byte[toRead]; + fieldsStream.readBytes(b, 0, b.length); + f = new Field(fi.name, // field name + new String(uncompress(b), "UTF-8"), // uncompress the value and add as string + store, + index, + termVector); + f.setOmitNorms(fi.omitNorms); + } else { + f = new Field(fi.name, // name fieldsStream.readString(), // read value store, index, termVector); - f.setOmitNorms(fi.omitNorms); - doc.add(f); + f.setOmitNorms(fi.omitNorms); + } + doc.add(f); + } + } + + private Field.TermVector getTermVectorType(FieldInfo fi) { + Field.TermVector termVector = null; + if (fi.storeTermVector) { + if (fi.storeOffsetWithTermVector) { + if (fi.storePositionWithTermVector) { + termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; + } else { + termVector = Field.TermVector.WITH_OFFSETS; } + } else if (fi.storePositionWithTermVector) { + termVector = Field.TermVector.WITH_POSITIONS; + } else { + termVector = Field.TermVector.YES; } + } else { + termVector = Field.TermVector.NO; } + return termVector; + } - return doc; + private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { + Field.Index index; + if (fi.isIndexed && tokenize) + index = Field.Index.TOKENIZED; + else if (fi.isIndexed && !tokenize) + index = Field.Index.UN_TOKENIZED; + else + index = Field.Index.NO; + return index; } - + + /** + * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + * loaded. + */ + private class LazyField extends AbstractField implements Fieldable { + private int toRead; + private long pointer; + //internal buffer + private char[] chars; + + + public LazyField(String name, Field.Store store, int toRead, long pointer) { + super(name, store, Field.Index.NO, Field.TermVector.NO); + this.toRead = toRead; + this.pointer = pointer; + lazy = true; + } + + public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) { + super(name, store, index, termVector); + this.toRead = toRead; + this.pointer = pointer; + lazy = true; + } + + /** + * The value of the field in Binary, or null. If null, the Reader or + * String value is used. Exactly one of stringValue(), readerValue() and + * binaryValue() must be set. + */ + public byte[] binaryValue() { + if (fieldsData == null) { + final byte[] b = new byte[toRead]; + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream == null) { + localFieldsStream = (IndexInput) fieldsStream.clone(); + fieldsStreamTL.set(localFieldsStream); + } + //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people + //since they are already handling this exception when getting the document + try { + localFieldsStream.seek(pointer); + localFieldsStream.readBytes(b, 0, b.length); + if (isCompressed == true) { + fieldsData = uncompress(b); + } else { + fieldsData = b; + } + } catch (IOException e) { + throw new FieldReaderException(e); + } + } + return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; + } + + /** + * The value of the field as a Reader, or null. If null, the String value + * or binary value is used. Exactly one of stringValue(), readerValue(), + * and binaryValue() must be set. + */ + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; + } + + /** + * The value of the field as a String, or null. If null, the Reader value + * or binary value is used. Exactly one of stringValue(), readerValue(), and + * binaryValue() must be set. + */ + public String stringValue() { + if (fieldsData == null) { + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream == null) { + localFieldsStream = (IndexInput) fieldsStream.clone(); + fieldsStreamTL.set(localFieldsStream); + } + try { + localFieldsStream.seek(pointer); + //read in chars b/c we already know the length we need to read + if (chars == null || toRead > chars.length) + chars = new char[toRead]; + localFieldsStream.readChars(chars, 0, toRead); + fieldsData = new String(chars, 0, toRead);//fieldsStream.readString(); + } catch (IOException e) { + throw new FieldReaderException(e); + } + } + return fieldsData instanceof String ? (String) fieldsData : null; + } + + public long getPointer() { + return pointer; + } + + public void setPointer(long pointer) { + this.pointer = pointer; + } + + public int getToRead() { + return toRead; + } + + public void setToRead(int toRead) { + this.toRead = toRead; + } + } + private final byte[] uncompress(final byte[] input) - throws IOException - { - + throws IOException { + Inflater decompressor = new Inflater(); decompressor.setInput(input); - + // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - + // Decompress the data byte[] buf = new byte[1024]; while (!decompressor.finished()) { @@ -159,12 +359,12 @@ } catch (DataFormatException e) { // this will happen if the field is not compressed - throw new IOException ("field data are in wrong format: " + e.toString()); + throw new IOException("field data are in wrong format: " + e.toString()); } } - + decompressor.end(); - + // Get the decompressed data return bos.toByteArray(); } Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 394295) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -17,7 +17,7 @@ */ import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -29,8 +29,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; -import java.util.HashSet; -import java.util.Set; /** IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, @@ -44,7 +42,7 @@ document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. - +

An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used to delete documents from the index then. @@ -52,13 +50,13 @@ @version $Id$ */ public abstract class IndexReader { - + public static final class FieldOption { private String option; private FieldOption() { } private FieldOption(String option) { this.option = option; - } + } public String toString() { return this.option; } @@ -81,7 +79,7 @@ // all fields where termvectors with offset and position values set public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET"); } - + /** * Constructor used if IndexReader is not owner of its directory. * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. @@ -91,7 +89,7 @@ protected IndexReader(Directory directory) { this.directory = directory; } - + /** * Constructor used if IndexReader is owner of its directory. * If IndexReader is owner of its directory, it locks its directory in case of write operations. @@ -119,8 +117,8 @@ private Lock writeLock; private boolean stale; private boolean hasChanges; - + /** Returns an IndexReader reading the index in an FSDirectory in the named path. */ public static IndexReader open(String path) throws IOException { @@ -132,7 +130,7 @@ public static IndexReader open(File path) throws IOException { return open(FSDirectory.getDirectory(path, false), true); } - + /** Returns an IndexReader reading the index in the given Directory. */ public static IndexReader open(final Directory directory) throws IOException { return open(directory, false); @@ -153,7 +151,7 @@ for (int i = 0; i < infos.size(); i++) readers[i] = SegmentReader.get(infos.info(i)); return new MultiReader(directory, infos, closeDirectory, readers); - + } }.run(); } @@ -162,7 +160,7 @@ /** Returns the directory this index resides in. */ public Directory directory() { return directory; } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -171,7 +169,7 @@ return lastModified(new File(directory)); } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -180,7 +178,7 @@ return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS); } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -230,12 +228,12 @@ public static long getCurrentVersion(Directory directory) throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory); } finally { if (locked) { @@ -244,7 +242,7 @@ } } } - + /** * Version number when this IndexReader was opened. */ @@ -262,12 +260,12 @@ public boolean isCurrent() throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion(); } finally { if (locked) { @@ -294,7 +292,7 @@ abstract public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException; - + /** * Return a term frequency vector for the specified document and field. The * returned vector contains terms and frequencies for the terms in @@ -311,7 +309,7 @@ */ abstract public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException; - + /** * Returns true if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. @@ -355,14 +353,40 @@ /** Returns the stored fields of the nth Document in this index. */ - public abstract Document document(int n) throws IOException; + public Document document(int n) throws IOException{ + return document(n, null); + } + /** + * Get the {@link org.apache.lucene.document.Document} at the nth position. The {@link org.apache.lucene.document.FieldSelector} + * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. + * + * NOTE: If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is + * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must + * explicitly load it or fetch the Document again with a new loader. + * + * + * @param n Get the document at the nth position + * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position + * @throws IOException If there is a problem reading this document + * + * @see org.apache.lucene.document.Fieldable + * @see org.apache.lucene.document.FieldSelector + * @see org.apache.lucene.document.SetBasedFieldSelector + * @see org.apache.lucene.document.LoadFirstFieldSelector + */ + //When we convert to JDK 1.5 make this Set + public abstract Document document(int n, FieldSelector fieldSelector) throws IOException; + + + /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); - + /** Returns true if there are norms stored for this field. */ public boolean hasNorms(String field) throws IOException { // backward compatible implementation. @@ -373,21 +397,21 @@ /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract byte[] norms(String field) throws IOException; /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract void norms(String field, byte[] bytes, int offset) throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link - * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, + * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, * int) length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * @@ -401,9 +425,9 @@ doSetNorm(doc, field, value); hasChanges = true; } - + /** Implements setNorm in subclass.*/ - protected abstract void doSetNorm(int doc, String field, byte value) + protected abstract void doSetNorm(int doc, String field, byte value) throws IOException; /** Expert: Resets the normalization factor for the named field of the named @@ -520,7 +544,7 @@ /** Implements deletion of the document numbered docNum. - * Applications should call {@link #delete(int)} or {@link #delete(Term)}. + * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. */ protected abstract void doDelete(int docNum) throws IOException; @@ -530,7 +554,7 @@ * the document. Then to delete such a document, one merely constructs a * term with the appropriate field and the unique ID string as its text and * passes it to this method. - * See {@link #delete(int)} for information about when this deletion will + * See {@link #deleteDocument(int)} for information about when this deletion will * become effective. * @return the number of documents deleted */ @@ -556,7 +580,7 @@ doUndeleteAll(); hasChanges = true; } - + /** Implements actual undeleteAll() in subclass. */ protected abstract void doUndeleteAll() throws IOException; @@ -588,10 +612,10 @@ } hasChanges = false; } - + /** Implements commit. */ protected abstract void doCommit() throws IOException; - + /** * Closes files associated with this index. * Also saves any new deletions to disk. @@ -615,7 +639,7 @@ } } - + /** * Get a list of unique field names that exist in this index and have the specified * field option information. @@ -661,7 +685,7 @@ directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release(); } - + /** * Prints the filename and size of each file within a given compound file. * Add the -extract flag to extract files to the current working directory. @@ -688,7 +712,7 @@ Directory dir = null; CompoundFileReader cfr = null; - + try { File file = new File(filename); String dirname = file.getAbsoluteFile().getParent(); @@ -698,7 +722,7 @@ String [] files = cfr.list(); Arrays.sort(files); // sort the array of filename so that the output is more readable - + for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); @@ -707,7 +731,7 @@ IndexInput ii = cfr.openInput(files[i]); FileOutputStream f = new FileOutputStream(files[i]); - + // read and write with a small buffer, which is more effectiv than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.length; @@ -717,7 +741,7 @@ f.write(buffer, 0, bufLen); len -= bufLen; } - + f.close(); ii.close(); } Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 394295) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -17,7 +17,9 @@ */ import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; + import java.io.IOException; import java.util.Collection; @@ -100,7 +102,7 @@ public int numDocs() { return in.numDocs(); } public int maxDoc() { return in.maxDoc(); } - public Document document(int n) throws IOException { return in.document(n); } + public Document document(int n, FieldSelector fieldSelector) throws IOException { return in.document(n, fieldSelector); } public boolean isDeleted(int n) { return in.isDeleted(n); } public boolean hasDeletions() { return in.hasDeletions(); } @@ -133,7 +135,7 @@ protected void doCommit() throws IOException { in.commit(); } protected void doClose() throws IOException { in.close(); } - + public Collection getFieldNames(IndexReader.FieldOption fieldNames) { return in.getFieldNames(fieldNames); } Index: src/java/org/apache/lucene/index/TermVectorsReader.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsReader.java (revision 394295) +++ src/java/org/apache/lucene/index/TermVectorsReader.java (working copy) @@ -127,7 +127,7 @@ result = readTermVector(field, position); } else { - //System.out.println("Field not found"); + //System.out.println("Fieldable not found"); } } else { //System.out.println("No tvx file"); Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 394295) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -120,7 +120,7 @@ files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); } - // Field norm files + // Fieldable norm files for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Index: src/java/org/apache/lucene/index/TermVectorsWriter.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsWriter.java (revision 394295) +++ src/java/org/apache/lucene/index/TermVectorsWriter.java (working copy) @@ -150,7 +150,7 @@ return currentField != null; } - /** Add term to the field's term vector. Field must already be open. + /** Add term to the field's term vector. Fieldable must already be open. * Terms should be added in * increasing order of terms, one call per unique termNum. ProxPointer * is a pointer into the TermPosition file (prx). Freq is the number of @@ -268,7 +268,7 @@ private void writeField() throws IOException { // remember where this field is written currentField.tvfPointer = tvf.getFilePointer(); - //System.out.println("Field Pointer: " + currentField.tvfPointer); + //System.out.println("Fieldable Pointer: " + currentField.tvfPointer); final int size = terms.size(); tvf.writeVInt(size); Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (revision 394295) +++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy) @@ -16,23 +16,23 @@ * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; + import java.io.IOException; import java.io.PrintStream; import java.io.Reader; import java.io.StringReader; +import java.util.Arrays; +import java.util.Enumeration; import java.util.Hashtable; -import java.util.Enumeration; -import java.util.Arrays; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.search.Similarity; - final class DocumentWriter { private Analyzer analyzer; private Directory directory; @@ -129,7 +129,7 @@ throws IOException { Enumeration fields = doc.fields(); while (fields.hasMoreElements()) { - Field field = (Field) fields.nextElement(); + Fieldable field = (Fieldable) fields.nextElement(); String fieldName = field.name(); int fieldNumber = fieldInfos.fieldNumber(fieldName); Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 394295) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -16,21 +16,13 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldSelector; +import java.io.IOException; +import java.util.*; + /** An IndexReader which reads multiple, parallel indexes. Each index added * must have the same number of documents, but typically each contains * different fields. Each document contains the union of the fields of all @@ -51,7 +43,7 @@ public class ParallelReader extends IndexReader { private List readers = new ArrayList(); private SortedMap fieldToReader = new TreeMap(); - private List storedFieldReaders = new ArrayList(); + private List storedFieldReaders = new ArrayList(); private int maxDoc; private int numDocs; @@ -59,7 +51,7 @@ /** Construct a ParallelReader. */ public ParallelReader() throws IOException { super(null); } - + /** Add an IndexReader. */ public void add(IndexReader reader) throws IOException { add(reader, false); @@ -89,7 +81,7 @@ if (reader.numDocs() != numDocs) throw new IllegalArgumentException ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs()); - + Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); while (i.hasNext()) { // update fieldToReader map String field = (String)i.next(); @@ -132,13 +124,13 @@ } // append fields from storedFieldReaders - public Document document(int n) throws IOException { + public Document document(int n, FieldSelector fieldSelector) throws IOException { Document result = new Document(); for (int i = 0; i < storedFieldReaders.size(); i++) { IndexReader reader = (IndexReader)storedFieldReaders.get(i); - Enumeration fields = reader.document(n).fields(); + Enumeration fields = reader.document(n, fieldSelector).fields(); while (fields.hasMoreElements()) { - result.add((Field)fields.nextElement()); + result.add((Fieldable)fields.nextElement()); } } return result; @@ -241,12 +233,12 @@ if (field != null) termEnum = ((IndexReader)fieldToReader.get(field)).terms(); } - + public ParallelTermEnum(Term term) throws IOException { field = term.field(); termEnum = ((IndexReader)fieldToReader.get(field)).terms(term); } - + public boolean next() throws IOException { if (field == null) return false; @@ -256,7 +248,7 @@ // still within field? if (next && termEnum.term().field() == field) return true; // yes, keep going - + termEnum.close(); // close old termEnum // find the next field, if any @@ -267,7 +259,7 @@ } return false; // no more fields - + } public Term term() { return termEnum.term(); } Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 394295) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -16,17 +16,17 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.*; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; -import org.apache.lucene.search.DefaultSimilarity; +import java.io.IOException; +import java.util.*; + /** * @version $Id$ */ @@ -277,11 +277,11 @@ return tis.terms(t); } - public synchronized Document document(int n) throws IOException { + public synchronized Document document(int n, FieldSelector fieldSelector) throws IOException { if (isDeleted(n)) throw new IllegalArgumentException ("attempt to access a deleted document"); - return fieldsReader.doc(n); + return fieldsReader.doc(n, fieldSelector); } public synchronized boolean isDeleted(int n) { Index: src/java/org/apache/lucene/index/IndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/IndexModifier.java (revision 394295) +++ src/java/org/apache/lucene/index/IndexModifier.java (working copy) @@ -50,8 +50,8 @@     // create an index in /tmp/index, overwriting an existing one:
    IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
    Document doc = new Document();
-    doc.add(new Field("id""1", Field.Store.YES, Field.Index.UN_TOKENIZED));
-    doc.add(new Field("body""a simple test", Field.Store.YES, Field.Index.TOKENIZED));
+    doc.add(new Fieldable("id""1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED));
+    doc.add(new Fieldable("body""a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED));
    indexModifier.addDocument(doc);
    int deleted = indexModifier.delete(new Term("id""1"));
    System.out.println("Deleted " + deleted + " document");
@@ -501,8 +501,8 @@ // create an index in /tmp/index, overwriting an existing one: IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true); Document doc = new Document(); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED)); - doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED)); + doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED)); indexModifier.addDocument(doc); int deleted = indexModifier.delete(new Term("id", "1")); System.out.println("Deleted " + deleted + " document"); Index: src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- src/java/org/apache/lucene/store/IndexInput.java (revision 394295) +++ src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -116,6 +116,32 @@ } } + /** + * Expert + * + * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still + * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything + * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + * how many more bytes to read + * @param length The number of chars to read + */ + public void skipChars(int length) throws IOException{ + for (int i = 0; i < length; i++) { + byte b = readByte(); + if ((b & 0x80) == 0){ + //do nothing, we only need one byte + } + else if ((b & 0xE0) != 0xE0) { + readByte();//read an additional byte + } else{ + //read two additional bytes. + readByte(); + readByte(); + } + } + } + + /** Closes the stream to futher operations. */ public abstract void close() throws IOException; Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (revision 394295) +++ src/java/org/apache/lucene/document/Field.java (working copy) @@ -16,9 +16,6 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Hits; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.Parameter; import java.io.Reader; @@ -32,24 +29,8 @@ index, so that they may be returned with hits on the document. */ -public final class Field implements Serializable { - private String name = "body"; +public final class Field extends AbstractField implements Fieldable, Serializable { - // the one and only data object for all different kind of field values - private Object fieldsData = null; - - private boolean storeTermVector = false; - private boolean storeOffsetWithTermVector = false; - private boolean storePositionWithTermVector = false; - private boolean omitNorms = false; - private boolean isStored = false; - private boolean isIndexed = true; - private boolean isTokenized = true; - private boolean isBinary = false; - private boolean isCompressed = false; - - private float boost = 1.0f; - /** Specifies whether and how a field should be stored. */ public static final class Store extends Parameter implements Serializable { @@ -146,45 +127,7 @@ public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

The boost is multiplied by {@link Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link Similarity#lengthNorm(String,int)}, and - * rounded by {@link Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see Document#setBoost(float) - * @see Similarity#lengthNorm(String, int) - * @see Similarity#encodeNorm(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } - - /** Returns the boost factor for hits for this field. - * - *

The default value is 1.0. - * - *

Note: this value is not stored directly with the document in the index. - * Documents returned from {@link IndexReader#document(int)} and - * {@link Hits#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - public float getBoost() { - return boost; - } - /** Returns the name of the field as an interned string. - * For example "date", "title", "body", ... - */ - public String name() { return name; } - + /** The value of the field as a String, or null. If null, the Reader value * or binary value is used. Exactly one of stringValue(), readerValue(), and * binaryValue() must be set. */ @@ -365,146 +308,6 @@ setStoreTermVector(TermVector.NO); } - - private void setStoreTermVector(TermVector termVector) { - if (termVector == TermVector.NO) { - this.storeTermVector = false; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.YES) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.WITH_POSITIONS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.WITH_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = true; - } - else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = true; - } - else { - throw new IllegalArgumentException("unknown termVector parameter " + termVector); - } - } - - /** True iff the value of the field is to be stored in the index for return - with search hits. It is an error for this to be true if a field is - Reader-valued. */ - public final boolean isStored() { return isStored; } - /** True iff the value of the field is to be indexed, so that it may be - searched on. */ - public final boolean isIndexed() { return isIndexed; } - /** True iff the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - public final boolean isTokenized() { return isTokenized; } - - /** True if the value of the field is stored and compressed within the index */ - public final boolean isCompressed() { return isCompressed; } - - /** True iff the term or terms used to index this field are stored as a term - * vector, available from {@link IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see IndexReader#getTermFreqVector(int, String) - */ - public final boolean isTermVectorStored() { return storeTermVector; } - - /** - * True iff terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - public boolean isStoreOffsetWithTermVector(){ - return storeOffsetWithTermVector; - } - - /** - * True iff terms are stored as term vector together with their token positions. - */ - public boolean isStorePositionWithTermVector(){ - return storePositionWithTermVector; - } - - /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { return isBinary; } - - /** True if norms are omitted for this indexed field */ - public boolean getOmitNorms() { return omitNorms; } - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } - - /** Prints a Field for human consumption. */ - public final String toString() { - StringBuffer result = new StringBuffer(); - if (isStored) { - result.append("stored"); - if (isCompressed) - result.append("/compressed"); - else - result.append("/uncompressed"); - } - if (isIndexed) { - if (result.length() > 0) - result.append(","); - result.append("indexed"); - } - if (isTokenized) { - if (result.length() > 0) - result.append(","); - result.append("tokenized"); - } - if (storeTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVector"); - } - if (storeOffsetWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorOffsets"); - } - if (storePositionWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorPosition"); - } - if (isBinary) { - if (result.length() > 0) - result.append(","); - result.append("binary"); - } - if (omitNorms) { - result.append(",omitNorms"); - } - result.append('<'); - result.append(name); - result.append(':'); - - if (fieldsData != null) { - result.append(fieldsData); - } - - result.append('>'); - return result.toString(); - } - } Index: src/java/org/apache/lucene/document/Document.java =================================================================== --- src/java/org/apache/lucene/document/Document.java (revision 394295) +++ src/java/org/apache/lucene/document/Document.java (working copy) @@ -16,24 +16,21 @@ * limitations under the License. */ -import java.util.Enumeration; -import java.util.Iterator; -import java.util.List; -import java.util.ArrayList; -import java.util.Vector; -import org.apache.lucene.index.IndexReader; // for javadoc -import org.apache.lucene.search.Searcher; // for javadoc -import org.apache.lucene.search.Hits; // for javadoc +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.Searcher; +import java.util.*; // for javadoc + /** Documents are the unit of indexing and search. * * A Document is a set of fields. Each field has a name and a textual value. - * A field may be {@link Field#isStored() stored} with the document, in which + * A field may be {@link Fieldable#isStored() stored} with the document, in which * case it is returned with search hits on the document. Thus each document * should typically contain one or more stored fields which uniquely identify * it. * - *

Note that fields which are not {@link Field#isStored() stored} are + *

Note that fields which are not {@link Fieldable#isStored() stored} are * not available in documents retrieved from the index, e.g. with {@link * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link * IndexReader#document(int)}. @@ -50,11 +47,11 @@ /** Sets a boost factor for hits on any field of this document. This value * will be multiplied into the score of all hits on this document. * - *

Values are multiplied into the value of {@link Field#getBoost()} of + *

Values are multiplied into the value of {@link Fieldable#getBoost()} of * each field in this document. Thus, this method in effect sets a default * boost for the fields of this document. * - * @see Field#setBoost(float) + * @see Fieldable#setBoost(float) */ public void setBoost(float boost) { this.boost = boost; @@ -85,7 +82,7 @@ * a document has to be deleted from an index and a new changed version of that * document has to be added.

*/ - public final void add(Field field) { + public final void add(Fieldable field) { fields.add(field); } @@ -102,7 +99,7 @@ public final void removeField(String name) { Iterator it = fields.iterator(); while (it.hasNext()) { - Field field = (Field)it.next(); + Fieldable field = (Fieldable)it.next(); if (field.name().equals(name)) { it.remove(); return; @@ -122,7 +119,7 @@ public final void removeFields(String name) { Iterator it = fields.iterator(); while (it.hasNext()) { - Field field = (Field)it.next(); + Fieldable field = (Fieldable)it.next(); if (field.name().equals(name)) { it.remove(); } @@ -133,9 +130,9 @@ * null. If multiple fields exists with this name, this method returns the * first value added. */ - public final Field getField(String name) { + public final Fieldable getField(String name) { for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name)) return field; } @@ -149,7 +146,7 @@ */ public final String get(String name) { for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (!field.isBinary())) return field.stringValue(); } @@ -162,16 +159,16 @@ } /** - * Returns an array of {@link Field}s with the given name. + * Returns an array of {@link Fieldable}s with the given name. * This method can return null. * * @param name the name of the field - * @return a Field[] array + * @return a Fieldable[] array */ - public final Field[] getFields(String name) { + public final Fieldable[] getFields(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name)) { result.add(field); } @@ -180,7 +177,7 @@ if (result.size() == 0) return null; - return (Field[])result.toArray(new Field[result.size()]); + return (Fieldable[])result.toArray(new Fieldable[result.size()]); } /** @@ -193,7 +190,7 @@ public final String[] getValues(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (!field.isBinary())) result.add(field.stringValue()); } @@ -215,7 +212,7 @@ public final byte[][] getBinaryValues(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (field.isBinary())) result.add(field.binaryValue()); } @@ -237,7 +234,7 @@ */ public final byte[] getBinaryValue(String name) { for (int i=0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (field.isBinary())) return field.binaryValue(); } @@ -249,7 +246,7 @@ StringBuffer buffer = new StringBuffer(); buffer.append("Document<"); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); buffer.append(field.toString()); if (i != fields.size()-1) buffer.append(" "); Index: docs/queryparsersyntax.html =================================================================== --- docs/queryparsersyntax.html (revision 394295) +++ docs/queryparsersyntax.html (working copy) @@ -117,15 +117,10 @@

Although Lucene provides the ability to create your own queries through its API, it also provides a rich query - language through the Query Parser, a lexer which - interprets a string into a Lucene Query using JavaCC. -

-

This page provides the Query Parser syntax in Lucene 1.9. - If you are using a different - version of Lucene, please consult the copy of - docs/queryparsersyntax.html that was distributed - with the version you are using. -

+ language through the Query Parser.

+

This page + provides syntax of Lucene's Query Parser, a lexer which + interprets a string into a Lucene Query using JavaCC.

Before choosing to use the provided Query Parser, please consider the following:

    Index: docs/fileformats.html =================================================================== --- docs/fileformats.html (revision 394295) +++ docs/fileformats.html (working copy) @@ -113,10 +113,7 @@

    This document defines the index file formats used - in Lucene version 1.9. If you are using a different - version of Lucene, please consult the copy of - docs/fileformats.html that was distributed - with the version you are using. + in Lucene version 1.4 and above.

    Apache Lucene is written in Java, but several Index: README.txt =================================================================== --- README.txt (revision 394295) +++ README.txt (working copy) @@ -22,19 +22,11 @@ lucene-demos-XX.jar The compiled simple example code. -luceneweb.war - The compiled simple example Web Application. - -contrib/* - Contributed code which extends and enhances Lucene, but is not - part of the core library. - docs/index.html The contents of the Lucene website. docs/api/index.html - The Javadoc Lucene API documentation. This includes the core - library, the demo, as well as all of the contrib modules. + The Javadoc Lucene API documentation. src/java The Lucene source code.