Index: src/test/org/apache/lucene/store/IndexInputTest.java =================================================================== --- src/test/org/apache/lucene/store/IndexInputTest.java (révision 0) +++ src/test/org/apache/lucene/store/IndexInputTest.java (révision 0) @@ -0,0 +1,104 @@ +package org.apache.lucene.store; + +import junit.framework.TestCase; + +public class IndexInputTest extends TestCase { + + public void testInt() throws Exception { + genericTestInt(0); + genericTestInt(1); + genericTestInt(-1); + genericTestInt(Integer.MAX_VALUE); + genericTestInt(Integer.MIN_VALUE); + } + + public void testVInt() throws Exception { + genericTestVInt(0); + genericTestVInt(1); + genericTestVInt(-1); + genericTestVInt(Integer.MAX_VALUE); + genericTestVInt(Integer.MIN_VALUE); + } + + public void testLong() throws Exception { + genericTestLong(0); + genericTestLong(1); + genericTestLong(-1); + genericTestLong(Long.MAX_VALUE); + genericTestLong(Long.MIN_VALUE); + } + + public void testVLong() throws Exception { + genericTestVLong(0); + genericTestVLong(1); + genericTestVLong(-1); + genericTestVLong(Long.MAX_VALUE); + genericTestVLong(Long.MIN_VALUE); + } + + public void testString() throws Exception { + genericTestString(""); + genericTestString("a"); + genericTestString("GiyNNKHhnivNKKHgcNiCniCH716534912é_è'-(é(_çà-é$*ù!:;,!:;,"); + } + + private void genericTestInt(int i) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeInt(i); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeInt(new RAMInputStream(fileA)); + outB.close(); + assertEquals(i, new RAMInputStream(fileB).readInt()); + } + + private void genericTestVInt(int i) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeVInt(i); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeVInt(new RAMInputStream(fileA)); + outB.close(); + assertEquals(i, new RAMInputStream(fileB).readVInt()); + } + + private void genericTestLong(long l) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeLong(l); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeLong(new RAMInputStream(fileA)); + outB.close(); + assertEquals(l, new RAMInputStream(fileB).readLong()); + } + + private void genericTestVLong(long l) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeVLong(l); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeVLong(new RAMInputStream(fileA)); + outB.close(); + assertEquals(l, new RAMInputStream(fileB).readVLong()); + } + + private void genericTestString(String s) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeString(s); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeString(new RAMInputStream(fileA)); + outB.close(); + assertEquals(s, new RAMInputStream(fileB).readString()); + } +} Index: src/test/org/apache/lucene/index/TestParallelTermEnum.java =================================================================== --- src/test/org/apache/lucene/index/TestParallelTermEnum.java (révision 449380) +++ src/test/org/apache/lucene/index/TestParallelTermEnum.java (copie de travail) @@ -1,4 +1,19 @@ package org.apache.lucene.index; +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ import java.io.IOException; Index: src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestDocumentWriter.java (révision 449380) +++ src/test/org/apache/lucene/index/TestDocumentWriter.java (copie de travail) @@ -87,7 +87,7 @@ // test that the norm file is not present if omitNorms is true for (int i = 0; i < reader.fieldInfos.size(); i++) { - FieldInfo fi = reader.fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) reader.fieldInfos.getEntry(i); if (fi.isIndexed) { assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i)); } Index: src/test/org/apache/lucene/index/TestFieldInfos.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldInfos.java (révision 449380) +++ src/test/org/apache/lucene/index/TestFieldInfos.java (copie de travail) @@ -44,22 +44,22 @@ assertTrue(output.length() > 0); FieldInfos readIn = new FieldInfos(dir, name); assertTrue(fieldInfos.size() == readIn.size()); - FieldInfo info = readIn.fieldInfo("textField1"); + FieldInfo info = (FieldInfo) readIn.getEntry("textField1"); assertTrue(info != null); assertTrue(info.storeTermVector == false); assertTrue(info.omitNorms == false); - info = readIn.fieldInfo("textField2"); + info = (FieldInfo) readIn.getEntry("textField2"); assertTrue(info != null); assertTrue(info.storeTermVector == true); assertTrue(info.omitNorms == false); - info = readIn.fieldInfo("textField3"); + info = (FieldInfo) readIn.getEntry("textField3"); assertTrue(info != null); assertTrue(info.storeTermVector == false); assertTrue(info.omitNorms == true); - info = readIn.fieldInfo("omitNorms"); + info = (FieldInfo) readIn.getEntry("omitNorms"); assertTrue(info != null); assertTrue(info.storeTermVector == false); assertTrue(info.omitNorms == true); Index: src/test/org/apache/lucene/index/SimpleEntryTableTest.java =================================================================== --- src/test/org/apache/lucene/index/SimpleEntryTableTest.java (révision 0) +++ src/test/org/apache/lucene/index/SimpleEntryTableTest.java (révision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.index; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.RAMDirectory; + +import junit.framework.TestCase; + +public class SimpleEntryTableTest extends TestCase { + + public void test() throws Exception { + SimpleEntryTable table = new SimpleEntryTable(); + assertEquals(0, table.add("id1")); + assertEquals(1, table.add("id2")); + assertEquals(1, table.add("id2")); + assertEquals(2, table.size()); + + Entry entry = table.getEntry(0); + assertNotNull(entry); + assertEquals("id1", entry.getId()); + assertEquals(0, entry.getIndex()); + + entry = table.getEntry(1); + assertNotNull(entry); + assertEquals("id2", entry.getId()); + assertEquals(1, entry.getIndex()); + + entry = table.getEntry(2); + assertNull(entry); + + entry = table.getEntry(-1); + assertNull(entry); + + entry = table.getEntry("id1"); + assertNotNull(entry); + assertEquals("id1", entry.getId()); + assertEquals(0, entry.getIndex()); + + entry = table.getEntry("id2"); + assertNotNull(entry); + assertEquals("id2", entry.getId()); + assertEquals(1, entry.getIndex()); + + entry = table.getEntry(""); + assertNull(entry); + + entry = table.getEntry(null); + assertNull(entry); + + assertEquals("id1", table.getId(0)); + assertEquals("id2", table.getId(1)); + assertNull(table.getId(2)); + assertNull(table.getId(-1)); + + assertEquals(0, table.getIndex("id1")); + assertEquals(1, table.getIndex("id2")); + assertEquals(-1, table.getIndex("")); + assertEquals(-1, table.getIndex(null)); + } + + public void testIO() throws Exception { + SimpleEntryTable table = new SimpleEntryTable(); + table.add("id1"); + table.add("id2"); + RAMDirectory dir = new RAMDirectory(); + table.write(dir, "data"); + + SimpleEntryTable table2 = new SimpleEntryTable(dir, "data"); + + assertEquals(2, table2.size()); + + Entry entry = table2.getEntry(0); + assertNotNull(entry); + assertEquals("id1", entry.getId()); + assertEquals(0, entry.getIndex()); + + entry = table2.getEntry(1); + assertNotNull(entry); + assertEquals("id2", entry.getId()); + assertEquals(1, entry.getIndex()); + + entry = table2.getEntry(2); + assertNull(entry); + } +} Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldsReader.java (révision 449380) +++ src/test/org/apache/lucene/index/TestFieldsReader.java (copie de travail) @@ -58,7 +58,7 @@ public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); Document doc = reader.doc(0, null); @@ -88,7 +88,7 @@ public void testLazyFields() throws Exception { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); Set loadFieldNames = new HashSet(); @@ -136,7 +136,7 @@ public void testLoadFirst() throws Exception { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); @@ -181,7 +181,7 @@ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames); for (int i = 0; i < length; i++) { - reader = new FieldsReader(tmpDir, "test", fieldInfos); + reader = new DefaultFieldsReader(tmpDir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); @@ -205,7 +205,7 @@ doc = null; //Hmmm, are we still in cache??? System.gc(); - reader = new FieldsReader(tmpDir, "test", fieldInfos); + reader = new DefaultFieldsReader(tmpDir, "test", fieldInfos); doc = reader.doc(0, fieldSelector); field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); assertTrue("field is not lazy", field.isLazy() == true); Index: src/test/org/apache/lucene/index/rdf/RDFIndexTest.java =================================================================== --- src/test/org/apache/lucene/index/rdf/RDFIndexTest.java (révision 0) +++ src/test/org/apache/lucene/index/rdf/RDFIndexTest.java (révision 0) @@ -0,0 +1,82 @@ +package org.apache.lucene.index.rdf; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.RAMDirectory; + +public class RDFIndexTest extends TestCase { + + public void testindex() throws Exception { + + RAMDirectory dir = new RAMDirectory(new RDFIndexFormat()); + + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); + + Document doc = new Document(); + + doc.add(new RDFLiteralField("rdfproperty", "literal", null, null, Store.YES, Index.TOKENIZED, TermVector.NO)); + doc.add(new RDFLiteralField("rdfproperty2", "literal2", null, "string", Store.YES, Index.TOKENIZED, TermVector.NO)); + doc.add(new RDFLiteralField("rdfproperty3", "literal3", "fr", null, Store.YES, Index.TOKENIZED, TermVector.NO)); + doc.add(new RDFLiteralField("rdfproperty4", "literal4", "fr", "string", Store.YES, Index.TOKENIZED, TermVector.NO)); + + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir); + + doc = reader.document(0); + + Fieldable[] fields = doc.getFieldables("rdfproperty"); + assertEquals(1, fields.length); + assertEquals("rdfproperty", fields[0].name()); + assertNull(fields[0].readerValue()); + assertNull(fields[0].binaryValue()); + assertEquals("literal", fields[0].stringValue()); + assertTrue(fields[0] instanceof RDFLiteralField); + RDFLiteralField rdfField = (RDFLiteralField) fields[0]; + assertNull(rdfField.getLang()); + assertNull(rdfField.getType()); + + fields = doc.getFieldables("rdfproperty2"); + assertEquals(1, fields.length); + assertEquals("rdfproperty2", fields[0].name()); + assertNull(fields[0].readerValue()); + assertNull(fields[0].binaryValue()); + assertEquals("literal2", fields[0].stringValue()); + assertTrue(fields[0] instanceof RDFLiteralField); + rdfField = (RDFLiteralField) fields[0]; + assertNull(rdfField.getLang()); + assertEquals("string", rdfField.getType()); + + fields = doc.getFieldables("rdfproperty3"); + assertEquals(1, fields.length); + assertEquals("rdfproperty3", fields[0].name()); + assertNull(fields[0].readerValue()); + assertNull(fields[0].binaryValue()); + assertEquals("literal3", fields[0].stringValue()); + assertTrue(fields[0] instanceof RDFLiteralField); + rdfField = (RDFLiteralField) fields[0]; + assertEquals("fr", rdfField.getLang()); + assertNull(rdfField.getType()); + + fields = doc.getFieldables("rdfproperty4"); + assertEquals(1, fields.length); + assertEquals("rdfproperty4", fields[0].name()); + assertNull(fields[0].readerValue()); + assertNull(fields[0].binaryValue()); + assertEquals("literal4", fields[0].stringValue()); + assertTrue(fields[0] instanceof RDFLiteralField); + rdfField = (RDFLiteralField) fields[0]; + assertEquals("fr", rdfField.getLang()); + assertEquals("string", rdfField.getType()); + + } +} Index: src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfo.java (révision 449380) +++ src/java/org/apache/lucene/index/FieldInfo.java (copie de travail) @@ -1,5 +1,8 @@ package org.apache.lucene.index; +import org.apache.lucene.document.Field; + + /** * Copyright 2004 The Apache Software Foundation * @@ -16,10 +19,8 @@ * limitations under the License. */ -final class FieldInfo { - String name; +public final class FieldInfo extends Entry { boolean isIndexed; - int number; // true if term vector for this field should be stored boolean storeTermVector; @@ -28,14 +29,33 @@ boolean omitNorms; // omit norms associated with indexed fields - FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, - boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { - name = na; + FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, + boolean omitNorms) { + super(na, nu); isIndexed = tk; - number = nu; this.storeTermVector = storeTermVector; this.storeOffsetWithTermVector = storeOffsetWithTermVector; this.storePositionWithTermVector = storePositionWithTermVector; this.omitNorms = omitNorms; } + + public boolean omitNorms() { + return omitNorms; + } + + public boolean isIndexed() { + return isIndexed; + } + + public boolean storeOffsetWithTermVector() { + return storeOffsetWithTermVector; + } + + public boolean storePositionWithTermVector() { + return storePositionWithTermVector; + } + + public boolean storeTermVector() { + return storeTermVector; + } } Index: src/java/org/apache/lucene/index/CompoundFileReader.java =================================================================== --- src/java/org/apache/lucene/index/CompoundFileReader.java (révision 449380) +++ src/java/org/apache/lucene/index/CompoundFileReader.java (copie de travail) @@ -54,6 +54,7 @@ throws IOException { directory = dir; + indexFormat = dir.getIndexFormat(); fileName = name; boolean success = false; Index: src/java/org/apache/lucene/index/DefaultFieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0) @@ -0,0 +1,153 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.zip.Deflater; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; + +/** + * The default implementation of FieldsWriter + * + * $Id$ + */ +public class DefaultFieldsWriter extends FieldsWriter { + + protected DefaultFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + } + + /** + * There no data stored at the document level + */ + protected void writeDocumentData(IndexOutput out, Document doc) throws IOException { + //nothing to write + } + + /** + * If a the field to write has been load lazily, it does a direct copy from the + * source to the output. + */ + protected void writeField(Fieldable field, IndexOutput out) throws IOException { + if (field.isLazy() && isBinaryCompatible(field)) { + field.writeFromLazyLoading(out); + } else { + byte bits = 0; + if (field.isTokenized()) + bits |= Field.FIELD_IS_TOKENIZED; + if (field.isBinary()) + bits |= Field.FIELD_IS_BINARY; + if (field instanceof Field && ((Field) field).isCompressed()) { + bits |= Field.FIELD_IS_COMPRESSED; + } + + out.writeByte(bits); + + if (field instanceof Field && ((Field) field).isCompressed()) { + // compression is enabled for the current field + byte[] bdata = null; + // check if it is a binary field + if (field.isBinary()) { + bdata = compress(field.binaryValue()); + } else { + bdata = compress(field.stringValue().getBytes("UTF-8")); + } + final int len = bdata.length; + out.writeVInt(len); + out.writeBytes(bdata, len); + } else { + // compression is disabled for the current field + if (field.isBinary()) { + byte[] bdata = field.binaryValue(); + final int len = bdata.length; + out.writeVInt(len); + out.writeBytes(bdata, len); + } else { + out.writeString(field.stringValue()); + } + } + } + } + + /** + * Test if the specified field is binary compatible with the current format, so + * it allow us to do a direct copy from the lazy loaded field into an index + * + * @param field the field to test + * @return true if it is compatible + */ + protected boolean isBinaryCompatible(Fieldable field) { + return field instanceof Field; + } + + /** + * To be overriden by subclasses to choose a different level of compression + * + * @return the compression level + */ + protected int getCompressionLevel() { + return Deflater.BEST_COMPRESSION; + } + + /** + * Do the compression of data + * + * To be overiden by subclasses to use a different format of compression. If overriden, you + * probably should also override isBinaryCompatible and and decompress function of + * DefaultFieldsReader. + * + * @param input the data to compress + * @return the compressed data + */ + protected byte[] compress(byte[] input) { + + // Create the compressor with highest level of compression + Deflater compressor = new Deflater(); + compressor.setLevel(getCompressionLevel()); + + // Give the compressor the data to compress + compressor.setInput(input); + compressor.finish(); + + /* + * Create an expandable byte array to hold the compressed data. + * You cannot use an array that's the same size as the orginal because + * there is no guarantee that the compressed data will be smaller than + * the uncompressed data. + */ + ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); + + // Compress the data + byte[] buf = new byte[1024]; + while (!compressor.finished()) { + int count = compressor.deflate(buf); + bos.write(buf, 0, count); + } + + compressor.end(); + + // Get the compressed data + return bos.toByteArray(); + } + +} Index: src/java/org/apache/lucene/index/IndexFormat.java =================================================================== --- src/java/org/apache/lucene/index/IndexFormat.java (révision 0) +++ src/java/org/apache/lucene/index/IndexFormat.java (révision 0) @@ -0,0 +1,81 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.store.Directory; + +/** + * Specify the format of index. + * + * The implementation of the FieldsReader and FieldsWriter returned by the function + * getFieldsReader and getFieldsWriter will specify how the data of fields are + * serialized, and also the kind of Fieldable used. + * + * $Id$ + */ +public interface IndexFormat { + + /** + * This array contains all filename extensions used by Lucene's index files, with + * one exception, namely the extension made up from .f + a number. + * Also note that two of Lucene's files (deletable and + * segments) don't have any filename extension. + * + * @return a List of String + */ + List getIndexExtensions(); + + /** + * File extensions of old-style index files + * + * @return a List of String + */ + List getCompoundExtensions(); + + /** + * File extensions for term vector support + * + * @return a List of String + */ + List getVectorExtensions(); + + /** + * Return an implemetation of FieldsReader for this format + * + * @param d the directory to use + * @param segment the segment name + * @param fn the infos on fields + * @return the implemetation of FieldsReader + * @throws IOException + */ + FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException; + + /** + * Return an implemetation of FieldsWriter for this format + * + * @param d the directory to use + * @param segment the segment name + * @param fn the infos on fields + * @return the implemetation of FieldsWriter + * @throws IOException + */ + FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException; + +} Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (révision 449380) +++ src/java/org/apache/lucene/index/FieldInfos.java (copie de travail) @@ -31,7 +31,7 @@ * be adding documents at a time, with no other reader or writer threads * accessing this object. */ -final class FieldInfos { +public final class FieldInfos extends EntryTable { static final byte IS_INDEXED = 0x1; static final byte STORE_TERMVECTOR = 0x2; @@ -39,10 +39,9 @@ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8; static final byte OMIT_NORMS = 0x10; - private ArrayList byNumber = new ArrayList(); - private HashMap byName = new HashMap(); - FieldInfos() { } + public FieldInfos() { + } /** * Construct a FieldInfos object using the directory and the name of the file @@ -52,12 +51,7 @@ * @throws IOException */ FieldInfos(Directory d, String name) throws IOException { - IndexInput input = d.openInput(name); - try { - read(input); - } finally { - input.close(); - } + super(d, name); } /** Adds field info for a Document. */ @@ -155,9 +149,11 @@ */ public void add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { - FieldInfo fi = fieldInfo(name); + FieldInfo fi = (FieldInfo) getEntry(name); if (fi == null) { - addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms); + int n = size(); + fi = new FieldInfo(name, isIndexed, n, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms); + add(fi); } else { if (fi.isIndexed != isIndexed) { fi.isIndexed = true; // once indexed, always index @@ -178,72 +174,10 @@ } } - - private void addInternal(String name, boolean isIndexed, - boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms) { - FieldInfo fi = - new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms); - byNumber.add(fi); - byName.put(name, fi); - } - - public int fieldNumber(String fieldName) { - try { - FieldInfo fi = fieldInfo(fieldName); - if (fi != null) - return fi.number; - } - catch (IndexOutOfBoundsException ioobe) { - return -1; - } - return -1; - } - - public FieldInfo fieldInfo(String fieldName) { - return (FieldInfo) byName.get(fieldName); - } - - /** - * Return the fieldName identified by its number. - * - * @param fieldNumber - * @return the fieldName or an empty string when the field - * with the given number doesn't exist. - */ - public String fieldName(int fieldNumber) { - try { - return fieldInfo(fieldNumber).name; - } - catch (NullPointerException npe) { - return ""; - } - } - - /** - * Return the fieldinfo object referenced by the fieldNumber. - * @param fieldNumber - * @return the FieldInfo object or null when the given fieldNumber - * doesn't exist. - */ - public FieldInfo fieldInfo(int fieldNumber) { - try { - return (FieldInfo) byNumber.get(fieldNumber); - } - catch (IndexOutOfBoundsException ioobe) { - return null; - } - } - - public int size() { - return byNumber.size(); - } - public boolean hasVectors() { boolean hasVectors = false; for (int i = 0; i < size(); i++) { - if (fieldInfo(i).storeTermVector) { + if (((FieldInfo) getEntry(i)).storeTermVector) { hasVectors = true; break; } @@ -251,43 +185,40 @@ return hasVectors; } - public void write(Directory d, String name) throws IOException { - IndexOutput output = d.createOutput(name); - try { - write(output); - } finally { - output.close(); + /** + * Just change the behaviour to never return null but return an empty string + */ + public String getId(int index) { + String id = super.getId(index); + if (id == null) { + return ""; } + return id; } - public void write(IndexOutput output) throws IOException { - output.writeVInt(size()); - for (int i = 0; i < size(); i++) { - FieldInfo fi = fieldInfo(i); - byte bits = 0x0; - if (fi.isIndexed) bits |= IS_INDEXED; - if (fi.storeTermVector) bits |= STORE_TERMVECTOR; - if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; - if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; - if (fi.omitNorms) bits |= OMIT_NORMS; - output.writeString(fi.name); - output.writeByte(bits); - } + protected void writeEntry(Entry info, IndexOutput output) throws IOException { + FieldInfo fi = (FieldInfo) info; + byte bits = 0x0; + if (fi.isIndexed) bits |= IS_INDEXED; + if (fi.storeTermVector) bits |= STORE_TERMVECTOR; + if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; + if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; + if (fi.omitNorms) bits |= OMIT_NORMS; + output.writeString(fi.getId()); + output.writeByte(bits); } - private void read(IndexInput input) throws IOException { - int size = input.readVInt();//read in the size - for (int i = 0; i < size; i++) { - String name = input.readString().intern(); - byte bits = input.readByte(); - boolean isIndexed = (bits & IS_INDEXED) != 0; - boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; - boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; - boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; - boolean omitNorms = (bits & OMIT_NORMS) != 0; + protected Entry readEntry(int number, IndexInput input) throws IOException { + String name = input.readString().intern(); + byte bits = input.readByte(); + boolean isIndexed = (bits & IS_INDEXED) != 0; + boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; + boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; + boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; + boolean omitNorms = (bits & OMIT_NORMS) != 0; - addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms); - } + return new FieldInfo(name, isIndexed, size(), storeTermVector, storePositionsWithTermVector, + storeOffsetWithTermVector, omitNorms); } } Index: src/java/org/apache/lucene/index/SimpleEntryTable.java =================================================================== --- src/java/org/apache/lucene/index/SimpleEntryTable.java (révision 0) +++ src/java/org/apache/lucene/index/SimpleEntryTable.java (révision 0) @@ -0,0 +1,88 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +/** + * A simple implementation of a Lucene-serialized table of entries. It only stores + * the ids of the entries. + * + * $Id$ + */ +public class SimpleEntryTable extends EntryTable { + + /** + * Constructor used to populate a table from scratch + * + */ + public SimpleEntryTable() { + super(); + } + + /** + * Construct a SimpleEntryTable object using the directory and the name of the file + * IndexInput + * + * @param d The directory to open the IndexInput from + * @param name The name of the file to open the IndexInput from in the Directory + * @throws IOException + */ + public SimpleEntryTable(Directory d, String name) throws IOException { + super(d, name); + } + + /** + * Just write the ID + */ + protected void writeEntry(Entry entry, IndexOutput output) throws IOException { + String id = entry.getId(); + output.writeString(id); + } + + /** + * Just read the ID + */ + protected Entry readEntry(int index, IndexInput input) throws IOException { + String id = input.readString().intern(); + return new Entry(id, index); + } + + /** + * Add an entry with the specified Id. If an entry already exist + * in the table, no entry is added, it just returns the index of + * the entry already stored. + * + * @param id the id to insert + * @return the created or found index of the entry + */ + public int add(String id) { + Entry entry = getEntry(id); + if (entry != null) { + return entry.getIndex(); + } + int index = size(); + entry = new Entry(id, index); + add(entry); + return index; + } + +} Index: src/java/org/apache/lucene/index/DefaultIndexFormat.java =================================================================== --- src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; + +/** + * The default implementation of the index format + * + * $Id$ + */ +public class DefaultIndexFormat implements IndexFormat { + + private static final List INDEX_EXTENSIONS = Arrays.asList(new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", + "tvd", "tvf", "tvp" }); + + private static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" }); + + private static final List VECTOR_EXTENSIONS = Arrays.asList(new String[] { "tvx", "tvd", "tvf" }); + + public List getIndexExtensions() { + return INDEX_EXTENSIONS; + } + + public List getCompoundExtensions() { + return COMPOUND_EXTENSIONS; + } + + public List getVectorExtensions() { + return VECTOR_EXTENSIONS; + } + + /** + * Use the default implementation of FieldsReader : DefaultFieldsReader + */ + public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + return new DefaultFieldsReader(d, segment, fn); + } + + /** + * Use the default implementation of FieldsWriter : DefaultFieldsWriter + */ + public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + return new DefaultFieldsWriter(d, segment, fn); + } + +} Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (révision 449380) +++ src/java/org/apache/lucene/index/FieldsReader.java (copie de travail) @@ -16,15 +16,9 @@ * limitations under the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.Reader; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; @@ -38,15 +32,16 @@ * * @version $Id$ */ -final class FieldsReader { +public abstract class FieldsReader { private FieldInfos fieldInfos; + private IndexInput fieldsStream; + private IndexInput indexStream; + private int size; - private static ThreadLocal fieldsStreamTL = new ThreadLocal(); - - FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + protected FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; fieldsStream = d.openInput(segment + ".fdt"); @@ -60,14 +55,9 @@ * * @throws IOException */ - final void close() throws IOException { + protected void close() throws IOException { fieldsStream.close(); indexStream.close(); - IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); - if (localFieldsStream != null) { - localFieldsStream.close(); - fieldsStreamTL.set(null); - } } final int size() { @@ -79,352 +69,40 @@ long position = indexStream.readLong(); fieldsStream.seek(position); - Document doc = new Document(); + Document doc = createDocument(fieldsStream); + int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); - FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); - FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); - boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true; - - byte bits = fieldsStream.readByte(); - boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; - boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; - if (acceptField.equals(FieldSelectorResult.LOAD) == true) { - addField(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) { - addFieldForMerge(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){ - addField(doc, fi, binary, compressed, tokenize); - break;//Get out of this loop - } - else if (lazy == true){ - addFieldLazy(doc, fi, binary, compressed, tokenize); - } - else { - skipField(binary, compressed); - } - } + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(fieldNumber); + FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.getId()); - return doc; - } + Fieldable field = createField(fi); - /** - * Skip the field. We still have to read some of the information about the field, but can skip past the actual content. - * This will have the most payoff on large fields. - */ - private void skipField(boolean binary, boolean compressed) throws IOException { - - int toRead = fieldsStream.readVInt(); - - if (binary || compressed) { + boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD); + boolean skip = acceptField.equals(FieldSelectorResult.NO_LOAD); + long pointer = fieldsStream.getFilePointer(); - fieldsStream.seek(pointer + toRead); - } else { - //We need to skip chars. This will slow us down, but still better - fieldsStream.skipChars(toRead); - } - } - private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - if (binary == true) { - int toRead = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - if (compressed) { - //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); - doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); - } else { - //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); - doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); - } - //Need to move the pointer ahead by toRead positions - fieldsStream.seek(pointer + toRead); - } else { - Field.Store store = Field.Store.YES; - Field.Index index = getIndexType(fi, tokenize); - Field.TermVector termVector = getTermVectorType(fi); + field.readStream(fieldsStream, skip || lazy); - Fieldable f; - if (compressed) { - store = Field.Store.COMPRESS; - int toRead = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - f = new LazyField(fi.name, store, toRead, pointer); - //skip over the part that we aren't loading - fieldsStream.seek(pointer + toRead); - f.setOmitNorms(fi.omitNorms); - } else { - int length = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - //Skip ahead of where we are by the length of what is stored - fieldsStream.skipChars(length); - f = new LazyField(fi.name, store, index, termVector, length, pointer); - f.setOmitNorms(fi.omitNorms); + if (lazy) { + field.setLazyData(fieldsStream, pointer, fieldsStream.getFilePointer() - pointer); } - doc.add(f); - } - } - - // in merge mode we don't uncompress the data of a compressed field - private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - Object data; - - if (binary || compressed) { - int toRead = fieldsStream.readVInt(); - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - data = b; - } else { - data = fieldsStream.readString(); - } - - doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); - } - - private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - - //we have a binary stored field, and it may be compressed - if (binary) { - int toRead = fieldsStream.readVInt(); - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - if (compressed) - doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); - else - doc.add(new Field(fi.name, b, Field.Store.YES)); - - } else { - Field.Store store = Field.Store.YES; - Field.Index index = getIndexType(fi, tokenize); - Field.TermVector termVector = getTermVectorType(fi); - - Fieldable f; - if (compressed) { - store = Field.Store.COMPRESS; - int toRead = fieldsStream.readVInt(); - - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - f = new Field(fi.name, // field name - new String(uncompress(b), "UTF-8"), // uncompress the value and add as string - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); - } else { - f = new Field(fi.name, // name - fieldsStream.readString(), // read value - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); + if (!skip) { + doc.add(field); } - doc.add(f); - } - } - - private Field.TermVector getTermVectorType(FieldInfo fi) { - Field.TermVector termVector = null; - if (fi.storeTermVector) { - if (fi.storeOffsetWithTermVector) { - if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; - } else { - termVector = Field.TermVector.WITH_OFFSETS; - } - } else if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS; - } else { - termVector = Field.TermVector.YES; + if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) { + break; } - } else { - termVector = Field.TermVector.NO; } - return termVector; - } - private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { - Field.Index index; - if (fi.isIndexed && tokenize) - index = Field.Index.TOKENIZED; - else if (fi.isIndexed && !tokenize) - index = Field.Index.UN_TOKENIZED; - else - index = Field.Index.NO; - return index; + return doc; } - /** - * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is - * loaded. - */ - private class LazyField extends AbstractField implements Fieldable { - private int toRead; - private long pointer; - //internal buffer - private char[] chars; + protected abstract Document createDocument(IndexInput in); + protected abstract Fieldable createField(FieldInfo fi); - public LazyField(String name, Field.Store store, int toRead, long pointer) { - super(name, store, Field.Index.NO, Field.TermVector.NO); - this.toRead = toRead; - this.pointer = pointer; - lazy = true; - } - - public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) { - super(name, store, index, termVector); - this.toRead = toRead; - this.pointer = pointer; - lazy = true; - } - - /** - * The value of the field in Binary, or null. If null, the Reader or - * String value is used. Exactly one of stringValue(), readerValue() and - * binaryValue() must be set. - */ - public byte[] binaryValue() { - if (fieldsData == null) { - final byte[] b = new byte[toRead]; - IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); - if (localFieldsStream == null) { - localFieldsStream = (IndexInput) fieldsStream.clone(); - fieldsStreamTL.set(localFieldsStream); - } - //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people - //since they are already handling this exception when getting the document - try { - localFieldsStream.seek(pointer); - localFieldsStream.readBytes(b, 0, b.length); - if (isCompressed == true) { - fieldsData = uncompress(b); - } else { - fieldsData = b; - } - } catch (IOException e) { - throw new FieldReaderException(e); - } - } - return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; - } - - /** - * The value of the field as a Reader, or null. If null, the String value - * or binary value is used. Exactly one of stringValue(), readerValue(), - * and binaryValue() must be set. - */ - public Reader readerValue() { - return fieldsData instanceof Reader ? (Reader) fieldsData : null; - } - - /** - * The value of the field as a String, or null. If null, the Reader value - * or binary value is used. Exactly one of stringValue(), readerValue(), and - * binaryValue() must be set. - */ - public String stringValue() { - if (fieldsData == null) { - IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); - if (localFieldsStream == null) { - localFieldsStream = (IndexInput) fieldsStream.clone(); - fieldsStreamTL.set(localFieldsStream); - } - try { - localFieldsStream.seek(pointer); - //read in chars b/c we already know the length we need to read - if (chars == null || toRead > chars.length) - chars = new char[toRead]; - localFieldsStream.readChars(chars, 0, toRead); - fieldsData = new String(chars, 0, toRead);//fieldsStream.readString(); - } catch (IOException e) { - throw new FieldReaderException(e); - } - } - return fieldsData instanceof String ? (String) fieldsData : null; - } - - public long getPointer() { - return pointer; - } - - public void setPointer(long pointer) { - this.pointer = pointer; - } - - public int getToRead() { - return toRead; - } - - public void setToRead(int toRead) { - this.toRead = toRead; - } - } - - private final byte[] uncompress(final byte[] input) - throws IOException { - - Inflater decompressor = new Inflater(); - decompressor.setInput(input); - - // Create an expandable byte array to hold the decompressed data - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - // Decompress the data - byte[] buf = new byte[1024]; - while (!decompressor.finished()) { - try { - int count = decompressor.inflate(buf); - bos.write(buf, 0, count); - } - catch (DataFormatException e) { - // this will happen if the field is not compressed - IOException newException = new IOException("field data are in wrong format: " + e.toString()); - newException.initCause(e); - throw newException; - } - } - - decompressor.end(); - - // Get the decompressed data - return bos.toByteArray(); - } - - // Instances of this class hold field properties and data - // for merge - final static class FieldForMerge extends AbstractField { - public String stringValue() { - return (String) this.fieldsData; - } - - public Reader readerValue() { - // not needed for merge - return null; - } - - public byte[] binaryValue() { - return (byte[]) this.fieldsData; - } - - public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) { - this.isStored = true; - this.fieldsData = value; - this.isCompressed = compressed; - this.isBinary = binary; - this.isTokenized = tokenize; - - this.name = fi.name.intern(); - this.isIndexed = fi.isIndexed; - this.omitNorms = fi.omitNorms; - this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector; - this.storePositionWithTermVector = fi.storePositionWithTermVector; - this.storeTermVector = fi.storeTermVector; - } - - } } Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (révision 449380) +++ src/java/org/apache/lucene/index/IndexFileNames.java (copie de travail) @@ -29,25 +29,5 @@ /** Name of the index deletable file */ static final String DELETABLE = "deletable"; - - /** - * This array contains all filename extensions used by Lucene's index files, with - * one exception, namely the extension made up from .f + a number. - * Also note that two of Lucene's files (deletable and - * segments) don't have any filename extension. - */ - static final String INDEX_EXTENSIONS[] = new String[] { - "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", - "tvx", "tvd", "tvf", "tvp" }; - - /** File extensions of old-style index files */ - static final String COMPOUND_EXTENSIONS[] = new String[] { - "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" - }; - - /** File extensions for term vector support */ - static final String VECTOR_EXTENSIONS[] = new String[] { - "tvx", "tvd", "tvf" - }; - + } Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (révision 449380) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (copie de travail) @@ -18,6 +18,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.store.IndexOutput; import java.io.IOException; Index: src/java/org/apache/lucene/index/TermVectorsReader.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsReader.java (révision 449380) +++ src/java/org/apache/lucene/index/TermVectorsReader.java (copie de travail) @@ -88,7 +88,7 @@ */ TermFreqVector get(int docNum, String field) throws IOException { // Check if no term vectors are available for this segment at all - int fieldNumber = fieldInfos.fieldNumber(field); + int fieldNumber = fieldInfos.getIndex(field); TermFreqVector result = null; if (tvx != null) { //We need to account for the FORMAT_SIZE at when seeking in the tvx @@ -164,7 +164,7 @@ else number += tvd.readVInt(); - fields[i] = fieldInfos.fieldName(number); + fields[i] = fieldInfos.getId(number); } // Compute position in the tvf file Index: src/java/org/apache/lucene/index/Entry.java =================================================================== --- src/java/org/apache/lucene/index/Entry.java (révision 0) +++ src/java/org/apache/lucene/index/Entry.java (révision 0) @@ -0,0 +1,57 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An antry is some a data in a Lucene-serialized table. This is the simplest + * kind of entry, it has an index and an ID. + * + * $Id$ + */ +public class Entry { + + private int index; + + private String id; + + /** + * Contructor + * + * @param id the id of the entry + * @param index the index of the entry + */ + protected Entry(String id, int index) { + this.index = index; + this.id = id; + } + + /** + * + * @return the id of the entry + */ + public String getId() { + return id; + } + + /** + * + * @return the index of the entry + */ + public int getIndex() { + return index; + } +} Index: src/java/org/apache/lucene/index/TermBuffer.java =================================================================== --- src/java/org/apache/lucene/index/TermBuffer.java (révision 449380) +++ src/java/org/apache/lucene/index/TermBuffer.java (copie de travail) @@ -64,7 +64,7 @@ int totalLength = start + length; setTextLength(totalLength); input.readChars(this.text, start, length); - this.field = fieldInfos.fieldName(input.readVInt()); + this.field = fieldInfos.getId(input.readVInt()); } public final void set(Term term) { Index: src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FieldsWriter.java (révision 449380) +++ src/java/org/apache/lucene/index/FieldsWriter.java (copie de travail) @@ -16,36 +16,29 @@ * the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.Enumeration; -import java.util.zip.Deflater; +import java.util.Iterator; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; -final class FieldsWriter -{ - static final byte FIELD_IS_TOKENIZED = 0x1; - static final byte FIELD_IS_BINARY = 0x2; - static final byte FIELD_IS_COMPRESSED = 0x4; - +public abstract class FieldsWriter { + private FieldInfos fieldInfos; private IndexOutput fieldsStream; private IndexOutput indexStream; - FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + protected FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; fieldsStream = d.createOutput(segment + ".fdt"); indexStream = d.createOutput(segment + ".fdx"); } - final void close() throws IOException { + protected void close() throws IOException { fieldsStream.close(); indexStream.close(); } @@ -53,100 +46,29 @@ final void addDocument(Document doc) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); + writeDocumentData(fieldsStream, doc); + int storedCount = 0; - Enumeration fields = doc.fields(); - while (fields.hasMoreElements()) { - Fieldable field = (Fieldable) fields.nextElement(); + Iterator fields = doc.getFields().iterator(); + while (fields.hasNext()) { + Fieldable field = (Fieldable) fields.next(); if (field.isStored()) storedCount++; } fieldsStream.writeVInt(storedCount); - fields = doc.fields(); - while (fields.hasMoreElements()) { - Fieldable field = (Fieldable) fields.nextElement(); - // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode - // and field.binaryValue() already returns the compressed value for a field - // with isCompressed()==true, so we disable compression in that case - boolean disableCompression = (field instanceof FieldsReader.FieldForMerge); + fields = doc.getFields().iterator(); + while (fields.hasNext()) { + Fieldable field = (Fieldable) fields.next(); if (field.isStored()) { - fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name())); - - byte bits = 0; - if (field.isTokenized()) - bits |= FieldsWriter.FIELD_IS_TOKENIZED; - if (field.isBinary()) - bits |= FieldsWriter.FIELD_IS_BINARY; - if (field.isCompressed()) - bits |= FieldsWriter.FIELD_IS_COMPRESSED; - - fieldsStream.writeByte(bits); - - if (field.isCompressed()) { - // compression is enabled for the current field - byte[] data = null; - - if (disableCompression) { - // optimized case for merging, the data - // is already compressed - data = field.binaryValue(); - } else { - // check if it is a binary field - if (field.isBinary()) { - data = compress(field.binaryValue()); - } - else { - data = compress(field.stringValue().getBytes("UTF-8")); - } - } - final int len = data.length; - fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); - } - else { - // compression is disabled for the current field - if (field.isBinary()) { - byte[] data = field.binaryValue(); - final int len = data.length; - fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); - } - else { - fieldsStream.writeString(field.stringValue()); - } - } + fieldsStream.writeVInt(fieldInfos.getIndex(field.name())); + writeField(field, fieldsStream); } } } - private final byte[] compress (byte[] input) { + abstract protected void writeDocumentData(IndexOutput out, Document doc) throws IOException; - // Create the compressor with highest level of compression - Deflater compressor = new Deflater(); - compressor.setLevel(Deflater.BEST_COMPRESSION); + abstract protected void writeField(Fieldable field, IndexOutput out) throws IOException; - // Give the compressor the data to compress - compressor.setInput(input); - compressor.finish(); - - /* - * Create an expandable byte array to hold the compressed data. - * You cannot use an array that's the same size as the orginal because - * there is no guarantee that the compressed data will be smaller than - * the uncompressed data. - */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - // Compress the data - byte[] buf = new byte[1024]; - while (!compressor.finished()) { - int count = compressor.deflate(buf); - bos.write(buf, 0, count); - } - - compressor.end(); - - // Get the compressed data - return bos.toByteArray(); - } } Index: src/java/org/apache/lucene/index/TermInfosWriter.java =================================================================== --- src/java/org/apache/lucene/index/TermInfosWriter.java (révision 449380) +++ src/java/org/apache/lucene/index/TermInfosWriter.java (copie de travail) @@ -131,7 +131,7 @@ output.writeVInt(length); // write delta length output.writeChars(term.text, start, length); // write delta chars - output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num + output.writeVInt(fieldInfos.getIndex(term.field)); // write field num lastTerm = term; } Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (révision 449380) +++ src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail) @@ -16,6 +16,7 @@ * limitations under the License. */ +import java.util.List; import java.util.Vector; import java.util.Iterator; import java.util.Collection; @@ -46,7 +47,7 @@ private Vector readers = new Vector(); private FieldInfos fieldInfos; - /** This ctor used only by test code. + /** This constructor is used only by test code. * * @param dir The Directory to merge the other segments into * @param name The name of the new segment @@ -111,29 +112,31 @@ final Vector createCompoundFile(String fileName) throws IOException { - CompoundFileWriter cfsWriter = - new CompoundFileWriter(directory, fileName); + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName); - Vector files = - new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size()); - + List compoundExtensions = directory.getIndexFormat().getCompoundExtensions(); + + Vector files = new Vector(compoundExtensions.size() + fieldInfos.size()); + // Basic files - for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { - files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); + for (int i = 0; i < compoundExtensions.size(); i++) { + files.add(segment + "." + compoundExtensions.get(i)); } // Fieldable norm files for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i); if (fi.isIndexed && !fi.omitNorms) { files.add(segment + ".f" + i); } } + List vectorExtensions = directory.getIndexFormat().getVectorExtensions(); + // Vector files if (fieldInfos.hasVectors()) { - for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) { - files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); + for (int i = 0; i < vectorExtensions.size(); i++) { + files.add(segment + "." + vectorExtensions.get(i)); } } @@ -177,14 +180,13 @@ } fieldInfos.write(directory, segment + ".fnm"); - FieldsWriter fieldsWriter = // merge field values - new FieldsWriter(directory, segment, fieldInfos); + FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new FieldSelector() { public FieldSelectorResult accept(String fieldName) { - return FieldSelectorResult.LOAD_FOR_MERGE; + return FieldSelectorResult.LAZY_LOAD; } }; @@ -407,7 +409,7 @@ private void mergeNorms() throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i); if (fi.isIndexed && !fi.omitNorms) { IndexOutput output = directory.createOutput(segment + ".f" + i); try { @@ -415,7 +417,7 @@ IndexReader reader = (IndexReader) readers.elementAt(j); int maxDoc = reader.maxDoc(); byte[] input = new byte[maxDoc]; - reader.norms(fi.name, input, 0); + reader.norms(fi.getId(), input, 0); for (int k = 0; k < maxDoc; k++) { if (!reader.isDeleted(k)) { output.writeByte(input[k]); Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (révision 449380) +++ src/java/org/apache/lucene/index/IndexWriter.java (copie de travail) @@ -109,7 +109,7 @@ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory - private final Directory ramDirectory = new RAMDirectory(); // for temp segs + private final Directory ramDirectory; // for temp segs private Lock writeLock; @@ -249,6 +249,7 @@ private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException { + ramDirectory = new RAMDirectory(d.getIndexFormat()); this.closeDir = closeDir; directory = d; analyzer = a; Index: src/java/org/apache/lucene/index/EntryTable.java =================================================================== --- src/java/org/apache/lucene/index/EntryTable.java (révision 0) +++ src/java/org/apache/lucene/index/EntryTable.java (révision 0) @@ -0,0 +1,194 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +/** + * Access to a Lucene-serialized table of values + * + * $Id$ + */ +public abstract class EntryTable { + + private ArrayList byIndex = new ArrayList(); + + private HashMap byId = new HashMap(); + + /** + * Constructor used to populate a table from scratch + * + */ + public EntryTable() { + //nothing to initilaize + } + + /** + * Construct a EntryTable object using the directory and the name of the file + * IndexInput + * + * @param d The directory to open the IndexInput from + * @param name The name of the file to open the IndexInput from in the Directory + * @throws IOException + */ + public EntryTable(Directory d, String name) throws IOException { + IndexInput input = d.openInput(name); + try { + read(input); + } finally { + input.close(); + } + } + + /** + * Read the table from an input stream + * + * @param input the stream to read + * @throws IOException in case or read error in stream + */ + public void read(IndexInput input) throws IOException { + int size = input.readVInt();//read in the size + for (int i = 0; i < size; i++) { + Entry entry = readEntry(i, input); + add(entry); + } + } + + /** + * Read an entry from an input stream + * + * @param index the index of the entry + * @param input the input stream to read + * @return the read entry + * @throws IOException in case of read error on the stream + */ + abstract protected Entry readEntry(int index, IndexInput input) throws IOException; + + /** + * Write the table in a segment + * + * @param d the directory to write in + * @param name the name of the file + * @throws IOException in case or read/write error in the directory + */ + public void write(Directory d, String name) throws IOException { + IndexOutput output = d.createOutput(name); + try { + write(output); + } finally { + output.close(); + } + } + + /** + * Write the table in a stream + * + * @param output the stream to write into + * @throws IOException in case or read/write error in stream + */ + public void write(IndexOutput output) throws IOException { + output.writeVInt(size()); + for (int i = 0; i < size(); i++) { + writeEntry(getEntry(i), output); + } + } + + /** + * Write an entry in the stream + * + * @param entry the netry to serialize + * @param output the stream to write in + * @throws IOException in case of write error on the stream + */ + abstract protected void writeEntry(Entry entry, IndexOutput output) throws IOException; + + /** + * + * @return the size of the table, aka the number of entries in there + */ + public int size() { + return byIndex.size(); + } + + /** + * Return the entry object referenced by the index. + * + * @param index the request index + * @return the Info object or null when the given number doesn't exist. + */ + public Entry getEntry(int index) { + if (index < 0 || index > byIndex.size() - 1) { + return null; + } + return (Entry) byIndex.get(index); + } + + /** + * Get the number of the info from it's ID + * + * @param id the ID of the info + * @return the number of the info, -1 if not found + */ + public int getIndex(String id) { + Entry entry = getEntry(id); + if (entry == null) { + return -1; + } + return entry.getIndex(); + } + + /** + * Get the entry from it's id + * + * @param id the ID of the info + * @return the entry of the table, null if not found + */ + public Entry getEntry(String id) { + return (Entry) byId.get(id); + } + + /** + * Return the ID of an entry from it's index. + * + * @param index the index of the entry + * @return the ID or null when the index doesn't exist. + */ + public String getId(int index) { + Entry entry = getEntry(index); + if (entry == null) { + return null; + } + return entry.getId(); + } + + /** + * Add an entry to the table + * + * @param entry the ntry to add + */ + public void add(Entry entry) { + byIndex.add(entry); + byId.put(entry.getId(), entry); + } + +} Index: src/java/org/apache/lucene/index/TermVectorsWriter.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsWriter.java (révision 449380) +++ src/java/org/apache/lucene/index/TermVectorsWriter.java (copie de travail) @@ -115,8 +115,8 @@ * closed automatically. */ public final void openField(String field) throws IOException { - FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector); + FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(field); + openField(fieldInfo.getIndex(), fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector); } private void openField(int fieldNumber, boolean storePositionWithTermVector, @@ -205,8 +205,8 @@ if (tpVector.size() > 0 && tpVector.getOffsets(0) != null) storeOffsetWithTermVector = true; - FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField()); - openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); + FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(tpVector.getField()); + openField(fieldInfo.getIndex(), storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tpVector.size(); j++) addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j), @@ -218,8 +218,8 @@ TermFreqVector tfVector = vectors[i]; - FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField()); - openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); + FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(tfVector.getField()); + openField(fieldInfo.getIndex(), storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tfVector.size(); j++) addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null); Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (révision 449380) +++ src/java/org/apache/lucene/index/DocumentWriter.java (copie de travail) @@ -73,8 +73,7 @@ fieldInfos.write(directory, segment + ".fnm"); // write field values - FieldsWriter fieldsWriter = - new FieldsWriter(directory, segment, fieldInfos); + FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.addDocument(doc); } finally { @@ -131,7 +130,7 @@ while (fields.hasMoreElements()) { Fieldable field = (Fieldable) fields.nextElement(); String fieldName = field.name(); - int fieldNumber = fieldInfos.fieldNumber(fieldName); + int fieldNumber = fieldInfos.getIndex(fieldName); int length = fieldLengths[fieldNumber]; // length of field int position = fieldPositions[fieldNumber]; // position in field @@ -338,7 +337,7 @@ if (currentField != termField) { // changing field - see if there is something to save currentField = termField; - FieldInfo fi = fieldInfos.fieldInfo(currentField); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(currentField); if (fi.storeTermVector) { if (termVectorWriter == null) { termVectorWriter = @@ -371,9 +370,9 @@ private final void writeNorms(String segment) throws IOException { for(int n = 0; n < fieldInfos.size(); n++){ - FieldInfo fi = fieldInfos.fieldInfo(n); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(n); if(fi.isIndexed && !fi.omitNorms){ - float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]); + float norm = fieldBoosts[n] * similarity.lengthNorm(fi.getId(), fieldLengths[n]); IndexOutput norms = directory.createOutput(segment + ".f" + n); try { norms.writeByte(Similarity.encodeNorm(norm)); Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNameFilter.java (révision 449380) +++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (copie de travail) @@ -18,6 +18,7 @@ import java.io.File; import java.io.FilenameFilter; +import java.util.List; /** * Filename filter that accept filenames and extensions only created by Lucene. @@ -27,12 +28,24 @@ */ public class IndexFileNameFilter implements FilenameFilter { + private IndexFormat indexFormat; + + /** + * Contructor + * + * @param indexFormat the format of the index + */ + public IndexFileNameFilter(IndexFormat indexFormat) { + this.indexFormat = indexFormat; + } + /* (non-Javadoc) * @see java.io.FilenameFilter#accept(java.io.File, java.lang.String) */ public boolean accept(File dir, String name) { - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { - if (name.endsWith("."+IndexFileNames.INDEX_EXTENSIONS[i])) + List IndexExtensions = indexFormat.getIndexExtensions(); + for (int i = 0; i < IndexExtensions.size(); i++) { + if (name.endsWith("." + IndexExtensions.get(i))) return true; } if (name.equals(IndexFileNames.DELETABLE)) return true; Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (révision 449380) +++ src/java/org/apache/lucene/index/SegmentReader.java (copie de travail) @@ -142,7 +142,7 @@ // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); - fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); + fieldsReader = cfsDir.getIndexFormat().getFieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); @@ -248,14 +248,16 @@ Vector files() throws IOException { Vector files = new Vector(16); - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { - String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; + List indexExtensions = directory().getIndexFormat().getIndexExtensions(); + + for (int i = 0; i < indexExtensions.size(); i++) { + String name = segment + "." + indexExtensions.get(i); if (directory().fileExists(name)) files.addElement(name); } for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i); if (fi.isIndexed && !fi.omitNorms){ String name; if(cfsReader == null) @@ -322,37 +324,37 @@ Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i); if (fieldOption == IndexReader.FieldOption.ALL) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { - fieldSet.add(fi.name); + fieldSet.add(fi.getId()); } } return fieldSet; @@ -433,16 +435,16 @@ private void openNorms(Directory cfsDir) throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i); if (fi.isIndexed && !fi.omitNorms) { // look first if there are separate norms in compound format - String fileName = segment + ".s" + fi.number; + String fileName = segment + ".s" + fi.getIndex(); Directory d = directory(); if(!d.fileExists(fileName)){ - fileName = segment + ".f" + fi.number; + fileName = segment + ".f" + fi.getIndex(); d = cfsDir; } - norms.put(fi.name, new Norm(d.openInput(fileName), fi.number)); + norms.put(fi.getId(), new Norm(d.openInput(fileName), fi.getIndex())); } } } @@ -478,7 +480,7 @@ */ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector - FieldInfo fi = fieldInfos.fieldInfo(field); + FieldInfo fi = (FieldInfo) fieldInfos.getEntry(field); if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) return null; Index: src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java (révision 0) @@ -0,0 +1,136 @@ +package org.apache.lucene.index.rdf; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsWriter; +import org.apache.lucene.index.SimpleEntryTable; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; + +/** + * The RDF implementation of FieldsWriter + * + * $Id$ + */ +public class RDFFieldsWriter extends FieldsWriter { + + private SimpleEntryTable fieldLangInfos; + + private SimpleEntryTable fieldTypeInfos; + + private Directory d; + + private String segment; + + protected void close() throws IOException { + super.close(); + fieldLangInfos.write(d, segment + ".fty"); + fieldTypeInfos.write(d, segment + ".flg"); + } + + RDFFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + this.d = d; + this.segment = segment; + fieldLangInfos = new SimpleEntryTable(); + fieldTypeInfos = new SimpleEntryTable(); + } + + /** + * Nothing is stored at the document level + */ + protected void writeDocumentData(IndexOutput out, Document doc) throws IOException { + // nothing to write + } + + /** + * Dispatch from the field implementation + */ + protected void writeField(Fieldable field, IndexOutput out) throws IOException { + if (field instanceof RDFResourceField) { + writeResourceField((RDFResourceField) field, out); + } else if (field instanceof RDFLiteralField) { + writeLiteralField((RDFLiteralField) field, out); + } else { + writeDefaultField(field, out); + } + } + + /** + * Write two integers, the lang and the type "pointers" + * + * Note that lazy field are not handled there because we store in the data some + * pointer to the lang and the type that may change in different segments + */ + private void writeLiteralField(RDFLiteralField field, IndexOutput out) throws IOException { + writeBits(field, out); + out.writeString(field.stringValue()); + int nLang = 0; + String lang = field.getLang(); + if (lang != null) { + nLang = fieldLangInfos.add(lang) + 1; + } + out.writeVInt(nLang); + + int nType = 0; + String type = field.getType(); + if (type != null) { + nType = fieldTypeInfos.add(type) + 1; + } + out.writeVInt(nType); + } + + /** + * Write only the bits and the value + */ + private void writeResourceField(RDFResourceField field, IndexOutput out) throws IOException { + if (field.isLazy()) { + field.writeFromLazyLoading(out); + } else { + writeBits(field, out); + out.writeString(field.stringValue()); + } + } + + /** + * Handle unknown fieldable implementation + */ + private void writeDefaultField(Fieldable field, IndexOutput out) throws IOException { + writeBits(field, out); + if (field.isBinary()) { + out.writeString(""); + } else { + out.writeString(field.stringValue()); + } + } + + /** + * Write the bits + */ + private void writeBits(Fieldable field, IndexOutput out) throws IOException { + byte bits = 0; + if (field.isTokenized()) { + bits |= 0x01; + } + out.writeByte(bits); + } +} Index: src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java (révision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.index.rdf; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsReader; +import org.apache.lucene.index.SimpleEntryTable; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; + +/** + * RDF implementation of FieldsReader + * + * $Id$ + */ +public class RDFFieldsReader extends FieldsReader { + + private SimpleEntryTable fieldLangInfos; + + private SimpleEntryTable fieldTypeInfos; + + protected RDFFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + fieldLangInfos = new SimpleEntryTable(d, segment + ".fty"); + fieldTypeInfos = new SimpleEntryTable(d, segment + ".flg"); + } + + /** + * Nothing is stored at the document level + */ + protected Document createDocument(IndexInput in) { + return new RDFDocument(); + } + + /** + * Dispatch the field to create from the field name + * - fieldResourceName -> RDFResourceField + * - else -> RDFLiteralField + */ + protected Fieldable createField(FieldInfo fi) { + if (RDFIndexFormat.fieldResourceName.equals(fi.getId())) { + return new RDFResourceField(fi); + } + return new RDFLiteralField(fi, fieldLangInfos, fieldTypeInfos); + } + +} Index: src/java/org/apache/lucene/index/rdf/RDFResourceField.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFResourceField.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFResourceField.java (révision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.index.rdf; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.store.IndexInput; + +/** + * A field used for the URI of the RDF resource + * + * $Id$ + */ +public class RDFResourceField extends Fieldable { + + public RDFResourceField(FieldInfo fi) { + super(fi); + } + + public RDFResourceField(String uri) { + super(RDFIndexFormat.fieldResourceName, uri, Store.YES, Index.UN_TOKENIZED, TermVector.NO); + } + + /** + * The stream is only composed of a string + */ + public void readStream(IndexInput in, boolean skip) throws IOException { + byte bits = in.readByte(); + setTokenized((bits & 0x01) != 0); + setBinary(false); + if (skip) { + int toRead = in.readVInt(); + in.skipChars(toRead); //skip the value + } else { + setData(in.readString()); + } + } + +} Index: src/java/org/apache/lucene/index/rdf/RDFDocument.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFDocument.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFDocument.java (révision 0) @@ -0,0 +1,106 @@ +package org.apache.lucene.index.rdf; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An document handling RDF statments + * + * $Id$ + */ +public class RDFDocument extends Document { + + /** + * Constructor used when read from the index + */ + public RDFDocument() { + super(); + } + + /** + * Constructor + * + * @param uri the URI of the subject of every statment + */ + public RDFDocument(String uri) { + add(new RDFResourceField(uri)); + } + + /** + * Add a RDF statment to the document + * + * @param property the property of the statment + * @param literal the literal of the statment + * @param lang the lang of the literal + * @param type the type of the literal + * @param index how to index the literal + */ + public void addStatment(String property, String literal, String lang, String type, Index index) { + add(new RDFLiteralField(property, literal, lang, type, Store.YES, index, TermVector.NO)); + } + + /** + * + * @return the uri of the main resource + */ + public String getUri() { + return getField(RDFIndexFormat.fieldResourceName).stringValue(); + } + + /** + * + * @return a List of RDFLiteralField + */ + public List getAllLiterals() { + List props = new ArrayList(); + Iterator fieldsIt = getFields().iterator(); + while (fieldsIt.hasNext()) { + Object f = fieldsIt.next(); + if (f instanceof RDFLiteralField) { + props.add(f); + } + } + return props; + } + + /** + * Return the literals pointed by the property + * + * @param property the URI of the property + * @return a List of RDFLiteralField + */ + public List getLiterals(String property) { + List props = new ArrayList(); + Fieldable[] fieldables = getFieldables(property); + for (int i = 0; i < fieldables.length; i++) { + Fieldable f = fieldables[i]; + if (f instanceof RDFLiteralField) { + props.add(f); + } + } + return props; + } +} Index: src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java (révision 0) @@ -0,0 +1,86 @@ +package org.apache.lucene.index.rdf; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.index.DefaultIndexFormat; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsReader; +import org.apache.lucene.index.FieldsWriter; +import org.apache.lucene.index.IndexFormat; +import org.apache.lucene.store.Directory; + +/** + * This is an exemple of a custom implementation of index format. This format + * is dedicated to store RDF literals. A RDF literals have a string value, a + * optionanl language, and an optionnal type. This format stores the types and + * the languages in two custom tables. It also assume that the value is always + * text, and never compressed + * + * $Id$ + */ +public class RDFIndexFormat implements IndexFormat { + + /** The field name of the resource URI */ + public final static String fieldResourceName = "__FIELD_RESSOURCE__"; + + private final static List INDEX_EXTENSIONS = Arrays.asList(new String[] { "fty", "flg" }); + + private static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fty", "flg" }); + + private static final List VECTOR_EXTENSIONS; + + static { + DefaultIndexFormat defaultFrmt = new DefaultIndexFormat(); + + INDEX_EXTENSIONS.addAll(defaultFrmt.getIndexExtensions()); + + COMPOUND_EXTENSIONS.addAll(defaultFrmt.getCompoundExtensions()); + + VECTOR_EXTENSIONS = defaultFrmt.getVectorExtensions(); + } + + public List getIndexExtensions() { + return INDEX_EXTENSIONS; + } + + public List getCompoundExtensions() { + return COMPOUND_EXTENSIONS; + } + + public List getVectorExtensions() { + return VECTOR_EXTENSIONS; + } + + /** + * Return a RDFFieldsReader + */ + public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + return new RDFFieldsReader(d, segment, fn); + } + + /** + * Return a RDFFieldsWriter + */ + public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + return new RDFFieldsWriter(d, segment, fn); + } + +} Index: src/java/org/apache/lucene/index/rdf/RDFLiteralField.java =================================================================== --- src/java/org/apache/lucene/index/rdf/RDFLiteralField.java (révision 0) +++ src/java/org/apache/lucene/index/rdf/RDFLiteralField.java (révision 0) @@ -0,0 +1,101 @@ +package org.apache.lucene.index.rdf; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SimpleEntryTable; +import org.apache.lucene.store.IndexInput; + +/** + * A Field used for literals. It handles a string value, an optionnal lang, + * and an optionnal type. + * + * $Id$ + */ +public class RDFLiteralField extends Fieldable { + + private SimpleEntryTable fieldLangInfos; + + private SimpleEntryTable fieldTypeInfos; + + private String lang; + + private String type; + + public RDFLiteralField(FieldInfo fi, SimpleEntryTable fieldLangInfos, SimpleEntryTable fieldTypeInfos) { + super(fi); + this.fieldLangInfos = fieldLangInfos; + this.fieldTypeInfos = fieldTypeInfos; + } + + public RDFLiteralField(String name, String value, String lang, String type, Store store, Index index, TermVector termVector) { + super(name, value, store, index, termVector); + this.lang = lang; + this.type = type; + } + + /** + * + * @return the lang of the literal, null if none + */ + public String getLang() { + return lang; + } + + /** + * + * @return the type of the literal, null if none + */ + public String getType() { + return type; + } + + /** + * Read the stream : + * - a string : the value + * - an integer : the pointer to the lang + * - an integer : the pointer to the type + */ + public void readStream(IndexInput in, boolean skip) throws IOException { + byte bits = in.readByte(); + setTokenized((bits & 0x01) != 0); + setBinary(false); + if (skip) { + int toRead = in.readVInt(); + in.skipChars(toRead); //skip the value + in.readVInt(); //skip the lang + in.readVInt(); //skip the type + } else { + setData(in.readString()); + int nLang = in.readVInt(); + if (nLang != 0) { + lang = fieldLangInfos.getId(nLang - 1); + } + int nType = in.readVInt(); + if (nType != 0) { + type = fieldTypeInfos.getId(nType - 1); + } + } + } + +} Index: src/java/org/apache/lucene/index/DefaultFieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0) @@ -0,0 +1,51 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; + +/** + * The default implementation of a FieldsReader + * + * $Id$ + */ +public class DefaultFieldsReader extends FieldsReader { + + protected DefaultFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + } + + /** + * There is no information stored at the document level + */ + protected Document createDocument(IndexInput fieldsStream) { + return new Document(); + } + + /** + * Use the class Field + */ + protected Fieldable createField(FieldInfo fi) { + return new Field(fi); + } +} Index: src/java/org/apache/lucene/store/Directory.java =================================================================== --- src/java/org/apache/lucene/store/Directory.java (révision 449380) +++ src/java/org/apache/lucene/store/Directory.java (copie de travail) @@ -18,6 +18,9 @@ import java.io.IOException; +import org.apache.lucene.index.DefaultIndexFormat; +import org.apache.lucene.index.IndexFormat; + /** A Directory is a flat list of files. Files may be written once, when they * are created. Once a file is created it may only be opened for read, or * deleted. Random access is permitted both when reading and writing. @@ -41,6 +44,12 @@ * this Directory instance). */ protected LockFactory lockFactory; + protected IndexFormat indexFormat = new DefaultIndexFormat(); + + public IndexFormat getIndexFormat() { + return indexFormat; + } + /** Returns an array of strings, one for each file in the directory. */ public abstract String[] list() throws IOException; @@ -123,4 +132,5 @@ public String getLockID() { return this.toString(); } + } Index: src/java/org/apache/lucene/store/RAMDirectory.java =================================================================== --- src/java/org/apache/lucene/store/RAMDirectory.java (révision 449380) +++ src/java/org/apache/lucene/store/RAMDirectory.java (copie de travail) @@ -22,6 +22,8 @@ import java.util.Hashtable; import java.util.Enumeration; +import org.apache.lucene.index.DefaultIndexFormat; +import org.apache.lucene.index.IndexFormat; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -39,8 +41,21 @@ Hashtable files = new Hashtable(); - /** Constructs an empty {@link Directory}. */ + /** + * Constructs an empty {@link Directory}. + * The index format used the the default one + */ public RAMDirectory() { + this(new DefaultIndexFormat()); + } + + /** + * Contructor + * + * @param indexFormat the format of the index + */ + public RAMDirectory(IndexFormat indexFormat) { + this.indexFormat = indexFormat; setLockFactory(new SingleInstanceLockFactory()); } @@ -51,15 +66,32 @@ *

* This should be used only with indices that can fit into memory. * + * The index format used the the default one + * * @param dir a Directory value * @exception IOException if an error occurs */ public RAMDirectory(Directory dir) throws IOException { - this(dir, false); + this(dir, new DefaultIndexFormat()); } - - private RAMDirectory(Directory dir, boolean closeDir) throws IOException { - this(); + + /** + * Creates a new RAMDirectory instance from a different + * Directory implementation. This can be used to load + * a disk-based index into memory. + *

+ * This should be used only with indices that can fit into memory. + * + * @param dir a Directory value + * @param indexFormat the format of the index + * @throws IOException if an error occurs + */ + public RAMDirectory(Directory dir, IndexFormat indexFormat) throws IOException { + this(dir, false, indexFormat); + } + + private RAMDirectory(Directory dir, boolean closeDir, IndexFormat indexFormat) throws IOException { + this(indexFormat); final String[] files = dir.list(); byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; for (int i = 0; i < files.length; i++) { @@ -87,22 +119,46 @@ /** * Creates a new RAMDirectory instance from the {@link FSDirectory}. - * + * The index format used the the default one + * * @param dir a File specifying the index directory */ public RAMDirectory(File dir) throws IOException { - this(FSDirectory.getDirectory(dir, false), true); + this(dir, new DefaultIndexFormat()); } /** * Creates a new RAMDirectory instance from the {@link FSDirectory}. + * + * @param dir a File specifying the index directory + * @param indexFormat the format of the index + * @throws IOException + */ + public RAMDirectory(File dir, IndexFormat indexFormat) throws IOException { + this(FSDirectory.getDirectory(dir, false), true, indexFormat); + } + + /** + * Creates a new RAMDirectory instance from the {@link FSDirectory}. + * The index format used the the default one * * @param dir a String specifying the full index directory path */ public RAMDirectory(String dir) throws IOException { - this(FSDirectory.getDirectory(dir, false), true); + this(dir, new DefaultIndexFormat()); } + /** + * Creates a new RAMDirectory instance from the {@link FSDirectory}. + * + * @param dir a String specifying the full index directory path + * @param indexFormat the format of the index + * @throws IOException + */ + public RAMDirectory(String dir, IndexFormat indexFormat) throws IOException { + this(FSDirectory.getDirectory(dir, false), true, indexFormat); + } + /** Returns an array of strings, one for each file in the directory. */ public final String[] list() { String[] result = new String[files.size()]; @@ -183,4 +239,21 @@ public final void close() { files = null; } + + /** + * For debug purpose, list every files name of this directory. + * The code was commented because the lockID is based on the toString() function + */ +// public String toString() { +// String[] f = list(); +// StringBuffer buffer = new StringBuffer(); +// for (int i = 0; i< f.length; i++) { +// buffer.append(f[i]); +// if (i != f.length - 1) { +// buffer.append(", "); +// } +// } +// return buffer.toString(); +// } + } Index: src/java/org/apache/lucene/store/RAMFile.java =================================================================== --- src/java/org/apache/lucene/store/RAMFile.java (révision 449380) +++ src/java/org/apache/lucene/store/RAMFile.java (copie de travail) @@ -26,4 +26,28 @@ Vector buffers = new Vector(); long length; long lastModified = System.currentTimeMillis(); + + /** + * For debug purpose + */ + public String toString() { + StringBuffer buffer = new StringBuffer(); + int i = 0; + int j = 0; + byte[] b = (byte[]) buffers.get(0); + int k = 0; + while (i < 200 && j < buffers.size()) { + buffer.append(b[k]); + k++; + if (k == b.length) { + k = 0; + j++; + if (j < buffers.size()) { + b = (byte[]) buffers.get(j); + } + } + i++; + } + return buffer.toString(); + } } Index: src/java/org/apache/lucene/store/IndexOutput.java =================================================================== --- src/java/org/apache/lucene/store/IndexOutput.java (révision 449380) +++ src/java/org/apache/lucene/store/IndexOutput.java (copie de travail) @@ -30,6 +30,17 @@ */ public abstract void writeByte(byte b) throws IOException; + /** + * Write a byte directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeByte(byte) + */ + public void writeByte(IndexInput in) throws IOException { + writeByte(in.readByte()); + } + /** Writes an array of bytes. * @param b the bytes to write * @param length the number of bytes to write @@ -37,6 +48,20 @@ */ public abstract void writeBytes(byte[] b, int length) throws IOException; + /** + * Write a batch of bytes directly from an input stream. + * + * @param in the stream to read + * @param length the number of bytes to write + * @throws IOException + * @see #writeBytes(byte[], int) + */ + public void writeBytes(IndexInput in, long length) throws IOException { + while (length-- > 0) { + writeByte(in.readByte()); + } + } + /** Writes an int as four bytes. * @see IndexInput#readInt() */ @@ -47,6 +72,20 @@ writeByte((byte) i); } + /** + * Writes an int as four bytes directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeInt(int) + */ + public void writeInt(IndexInput in) throws IOException { + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + } + /** Writes an int in a variable-length format. Writes between one and * five bytes. Smaller values take fewer bytes. Negative numbers are not * supported. @@ -60,6 +99,22 @@ writeByte((byte)i); } + /** + * Writes an int in a variable-length format directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeVInt(int) + */ + public void writeVInt(IndexInput in) throws IOException { + byte b = in.readByte(); + writeByte(b); + while ((b & 0x80) != 0) { + b = in.readByte(); + writeByte(b); + } + } + /** Writes a long as eight bytes. * @see IndexInput#readLong() */ @@ -68,6 +123,24 @@ writeInt((int) i); } + /** + * Writes a long as eight bytes directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeLong(long) + */ + public void writeLong(IndexInput in) throws IOException { + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + } + /** Writes an long in a variable-length format. Writes between one and five * bytes. Smaller values take fewer bytes. Negative numbers are not * supported. @@ -81,6 +154,22 @@ writeByte((byte)i); } + /** + * Writes an long in a variable-length format directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeVLong(long) + */ + public void writeVLong(IndexInput in) throws IOException { + byte b = in.readByte(); + writeByte(b); + while ((b & 0x80) != 0) { + b = in.readByte(); + writeByte(b); + } + } + /** Writes a string. * @see IndexInput#readString() */ @@ -90,6 +179,19 @@ writeChars(s, 0, length); } + /** + * Writes a string directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeString(String) + */ + public void writeString(IndexInput in) throws IOException { + int length = in.readVInt(); + writeVInt(length); + writeChars(in, length); + } + /** Writes a sequence of UTF-8 encoded characters from a string. * @param s the source of the characters * @param start the first character in the sequence @@ -102,18 +204,40 @@ for (int i = start; i < end; i++) { final int code = (int)s.charAt(i); if (code >= 0x01 && code <= 0x7F) - writeByte((byte)code); + writeByte((byte)code); else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) { - writeByte((byte)(0xC0 | (code >> 6))); - writeByte((byte)(0x80 | (code & 0x3F))); + writeByte((byte)(0xC0 | (code >> 6))); + writeByte((byte)(0x80 | (code & 0x3F))); } else { - writeByte((byte)(0xE0 | (code >>> 12))); - writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); - writeByte((byte)(0x80 | (code & 0x3F))); + writeByte((byte)(0xE0 | (code >>> 12))); + writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); + writeByte((byte)(0x80 | (code & 0x3F))); } } } + /** + * Writes a sequence of UTF-8 encoded characters directly from an input stream. + * + * @param in the stream to read + * @param length the number of characters in the sequence + * @throws IOException + * @see #writeChars(String,int,int) + */ + public void writeChars(IndexInput in, int length) + throws IOException { + for (int i = 0; i < length; i++) { + byte b = in.readByte(); + writeByte(b); + if ((b & 0x80) != 0) { + writeByte(in.readByte()); + if ((b & 0xE0) == 0xE0) { + writeByte(in.readByte()); + } + } + } + } + /** Forces any buffered output to be written. */ public abstract void flush() throws IOException; Index: src/java/org/apache/lucene/store/FSDirectory.java =================================================================== --- src/java/org/apache/lucene/store/FSDirectory.java (révision 449380) +++ src/java/org/apache/lucene/store/FSDirectory.java (copie de travail) @@ -25,7 +25,9 @@ import java.security.NoSuchAlgorithmException; import java.util.Hashtable; +import org.apache.lucene.index.DefaultIndexFormat; import org.apache.lucene.index.IndexFileNameFilter; +import org.apache.lucene.index.IndexFormat; /** * Straightforward implementation of {@link Directory} as a directory of files. @@ -121,21 +123,41 @@ *

Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. - * + * The index format used is the default one. + * * @param path the path to the directory. * @param create if true, create, or erase any existing contents. * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(String path, boolean create) throws IOException { - return getDirectory(path, create, null); + return getDirectory(path, create, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location. + * + *

Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param path the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(String path, boolean create, IndexFormat indexFormat) + throws IOException { + return getDirectory(path, create, null, indexFormat); + } + /** Returns the directory instance for the named location, using the * provided LockFactory implementation. * *

Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. + * The index format used is the default one. * * @param path the path to the directory. * @param create if true, create, or erase any existing contents. @@ -145,23 +167,64 @@ public static FSDirectory getDirectory(String path, boolean create, LockFactory lockFactory) throws IOException { - return getDirectory(new File(path), create, lockFactory); + return getDirectory(path, create, lockFactory, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location, using the + * provided LockFactory implementation. + * + *

Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param path the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param lockFactory instance of {@link LockFactory} providing the + * locking implementation. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(String path, boolean create, + LockFactory lockFactory, IndexFormat indexFormat) + throws IOException { + return getDirectory(new File(path), create, lockFactory, indexFormat); + } + /** Returns the directory instance for the named location. * *

Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. + * The index format used is the default one. * * @param file the path to the directory. * @param create if true, create, or erase any existing contents. * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(File file, boolean create) - throws IOException { - return getDirectory(file, create, null); + throws IOException { + return getDirectory(file, create, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location. + * + *

Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param file the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(File file, boolean create, IndexFormat indexFormat) + throws IOException { + return getDirectory(file, create, null, indexFormat); + } + /** Returns the directory instance for the named location, using the * provided LockFactory implementation. * @@ -173,9 +236,12 @@ * @param create if true, create, or erase any existing contents. * @param lockFactory instance of {@link LockFactory} providing the * locking implementation. - * @return the FSDirectory for the named file. */ + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ public static FSDirectory getDirectory(File file, boolean create, - LockFactory lockFactory) + LockFactory lockFactory, IndexFormat indexFormat) throws IOException { file = new File(file.getCanonicalPath()); FSDirectory dir; @@ -187,7 +253,7 @@ } catch (Exception e) { throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e); } - dir.init(file, create, lockFactory); + dir.init(file, create, lockFactory, indexFormat); DIRECTORIES.put(file, dir); } else { @@ -224,8 +290,10 @@ throw new IOException(path + " not a directory"); } - private void init(File path, boolean create, LockFactory lockFactory) throws IOException { + private void init(File path, boolean create, LockFactory lockFactory, IndexFormat indexFormat) throws IOException { + this.indexFormat = indexFormat; + // Set up lockFactory with cascaded defaults: if an instance was passed in, // use that; else if locks are disabled, use NoLockFactory; else if the // system property org.apache.lucene.lockClass is set, instantiate that; @@ -290,7 +358,7 @@ if (!directory.isDirectory()) throw new IOException(directory + " not a directory"); - String[] files = directory.list(new IndexFileNameFilter()); // clear old files + String[] files = directory.list(new IndexFileNameFilter(getIndexFormat())); // clear old files if (files == null) throw new IOException("Cannot read directory " + directory.getAbsolutePath()); for (int i = 0; i < files.length; i++) { @@ -304,7 +372,7 @@ /** Returns an array of strings, one for each Lucene index file in the directory. */ public String[] list() { - return directory.list(new IndexFileNameFilter()); + return directory.list(new IndexFileNameFilter(getIndexFormat())); } /** Returns true iff a file with the given name exists. */ Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (révision 449380) +++ src/java/org/apache/lucene/document/Field.java (copie de travail) @@ -16,11 +16,17 @@ * limitations under the License. */ -import org.apache.lucene.util.Parameter; - +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.Reader; import java.io.Serializable; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Parameter; + /** A field is a section of a Document. Each field has two parts, a name and a value. Values may be free text, provided as a String or as a Reader, or they @@ -29,8 +35,10 @@ index, so that they may be returned with hits on the document. */ -public final class Field extends AbstractField implements Fieldable, Serializable { - +public final class Field extends Fieldable implements Serializable { + + private boolean isCompressed; + /** Specifies whether and how a field should be stored. */ public static final class Store extends Parameter implements Serializable { @@ -127,22 +135,10 @@ public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } - - /** The value of the field as a String, or null. If null, the Reader value - * or binary value is used. Exactly one of stringValue(), readerValue(), and - * binaryValue() must be set. */ - public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } - - /** The value of the field as a Reader, or null. If null, the String value - * or binary value is used. Exactly one of stringValue(), readerValue(), - * and binaryValue() must be set. */ - public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } - - /** The value of the field in Binary, or null. If null, the Reader or - * String value is used. Exactly one of stringValue(), readerValue() and - * binaryValue() must be set. */ - public byte[] binaryValue() { return fieldsData instanceof byte[] ? (byte[])fieldsData : null; } - + public Field(FieldInfo fi) { + super(fi); + } + /** * Create a field by specifying its name, value and how it will * be saved in the index. Term vectors will not be stored in the index. @@ -177,57 +173,9 @@ * */ public Field(String name, String value, Store store, Index index, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (value == null) - throw new NullPointerException("value cannot be null"); + super(name, value, store, index, termVector); if (name.length() == 0 && value.length() == 0) throw new IllegalArgumentException("name and value cannot both be empty"); - if (index == Index.NO && store == Store.NO) - throw new IllegalArgumentException("it doesn't make sense to have a field that " - + "is neither indexed nor stored"); - if (index == Index.NO && termVector != TermVector.NO) - throw new IllegalArgumentException("cannot store term vector information " - + "for a field that is not indexed"); - - this.name = name.intern(); // field names are interned - this.fieldsData = value; - - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; - } - else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; - } - else if (store == Store.NO){ - this.isStored = false; - this.isCompressed = false; - } - else - throw new IllegalArgumentException("unknown store parameter " + store); - - if (index == Index.NO) { - this.isIndexed = false; - this.isTokenized = false; - } else if (index == Index.TOKENIZED) { - this.isIndexed = true; - this.isTokenized = true; - } else if (index == Index.UN_TOKENIZED) { - this.isIndexed = true; - this.isTokenized = false; - } else if (index == Index.NO_NORMS) { - this.isIndexed = true; - this.isTokenized = false; - this.omitNorms = true; - } else { - throw new IllegalArgumentException("unknown index parameter " + index); - } - - this.isBinary = false; - - setStoreTermVector(termVector); } /** @@ -252,23 +200,7 @@ * @throws NullPointerException if name or reader is null */ public Field(String name, Reader reader, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (reader == null) - throw new NullPointerException("reader cannot be null"); - - this.name = name.intern(); // field names are interned - this.fieldsData = reader; - - this.isStored = false; - this.isCompressed = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + super(name, reader, Store.NO, Index.TOKENIZED, termVector); } /** @@ -280,34 +212,163 @@ * @throws IllegalArgumentException if store is Store.NO */ public Field(String name, byte[] value, Store store) { - if (name == null) - throw new IllegalArgumentException("name cannot be null"); - if (value == null) - throw new IllegalArgumentException("value cannot be null"); - - this.name = name.intern(); - this.fieldsData = value; - - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; + super(name, value, store, Index.NO, TermVector.NO); + } + + protected void setStore(Field.Store store) { + if (store == Field.Store.YES) { + setStored(true); + isCompressed = false; + } else if (store == Field.Store.COMPRESS) { + setStored(true); + isCompressed = true; + } else if (store == Field.Store.NO) { + if (isBinary()) { + throw new IllegalArgumentException("binary values can't be unstored"); + } + setStored(false); + isCompressed = false; + } else { + throw new IllegalArgumentException("unknown store parameter " + store); } - else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; + } + + /** Prints a Field for human consumption. */ + public String toString() { + StringBuffer result = new StringBuffer(); + if (isStored()) { + result.append("stored"); + if (isCompressed) + result.append("/compressed"); + else + result.append("/uncompressed"); } - else if (store == Store.NO) - throw new IllegalArgumentException("binary values can't be unstored"); - else - throw new IllegalArgumentException("unknown store parameter " + store); - - this.isIndexed = false; - this.isTokenized = false; - - this.isBinary = true; - - setStoreTermVector(TermVector.NO); + if (isIndexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (isTermVectorStored()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (isStoreOffsetWithTermVector()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (isStorePositionWithTermVector()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isBinary()) { + if (result.length() > 0) + result.append(","); + result.append("binary"); + } + if (getOmitNorms()) { + result.append(",omitNorms"); + } + if (isLazy()) { + result.append(",lazy"); + } + result.append('<'); + result.append(name()); + result.append(':'); + + result.append(getData()); + + result.append('>'); + return result.toString(); } + /** True if the value of the field is stored and compressed within the index */ + public final boolean isCompressed() { + return isCompressed; + } + public void setCompressed(boolean isCompressed) { + this.isCompressed = isCompressed; + } + + public static final byte FIELD_IS_TOKENIZED = 0x1; + + public static final byte FIELD_IS_BINARY = 0x2; + + public static final byte FIELD_IS_COMPRESSED = 0x4; + + public void readStream(IndexInput in, boolean skip) throws IOException { + byte bits = in.readByte(); + isCompressed = (bits & FIELD_IS_COMPRESSED) != 0; + setTokenized((bits & FIELD_IS_TOKENIZED) != 0); + setBinary((bits & FIELD_IS_BINARY) != 0); + + if (skip) { + int toRead = in.readVInt(); + if (isBinary() || isCompressed()) { + long pointer = in.getFilePointer(); + //Need to move the pointer ahead by toRead positions + in.seek(pointer + toRead); + } else { + //Skip ahead of where we are by the length of what is stored + in.skipChars(toRead); + } + } else { + if (isBinary()) { + int toRead = in.readVInt(); + final byte[] b = new byte[toRead]; + in.readBytes(b, 0, b.length); + if (isCompressed()) { + setData(uncompress(b)); + } else { + setData(b); + } + } else { + if (isCompressed()) { + int toRead = in.readVInt(); + final byte[] b = new byte[toRead]; + in.readBytes(b, 0, b.length); + setData(new String(uncompress(b), "UTF-8")); + } else { + setData(in.readString()); // read value + } + } + } + } + + protected byte[] uncompress(final byte[] input) throws IOException { + + Inflater decompressor = new Inflater(); + decompressor.setInput(input); + + // Create an expandable byte array to hold the decompressed data + ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); + + // Decompress the data + byte[] buf = new byte[1024]; + while (!decompressor.finished()) { + try { + int count = decompressor.inflate(buf); + bos.write(buf, 0, count); + } catch (DataFormatException e) { + // this will happen if the field is not compressed + IOException newException = new IOException("field data are in wrong format: " + e.toString()); + newException.initCause(e); + throw newException; + } + } + + decompressor.end(); + + // Get the decompressed data + return bos.toByteArray(); + } + } Index: src/java/org/apache/lucene/document/AbstractField.java =================================================================== --- src/java/org/apache/lucene/document/AbstractField.java (révision 449380) +++ src/java/org/apache/lucene/document/AbstractField.java (copie de travail) @@ -1,274 +0,0 @@ -package org.apache.lucene.document; -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/** - * - * - **/ -public abstract class AbstractField implements Fieldable { - - protected String name = "body"; - protected boolean storeTermVector = false; - protected boolean storeOffsetWithTermVector = false; - protected boolean storePositionWithTermVector = false; - protected boolean omitNorms = false; - protected boolean isStored = false; - protected boolean isIndexed = true; - protected boolean isTokenized = true; - protected boolean isBinary = false; - protected boolean isCompressed = false; - protected boolean lazy = false; - protected float boost = 1.0f; - // the one and only data object for all different kind of field values - protected Object fieldsData = null; - - protected AbstractField() - { - - } - - protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - this.name = name.intern(); // field names are interned - - if (store == Field.Store.YES){ - this.isStored = true; - this.isCompressed = false; - } - else if (store == Field.Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; - } - else if (store == Field.Store.NO){ - this.isStored = false; - this.isCompressed = false; - } - else - throw new IllegalArgumentException("unknown store parameter " + store); - - if (index == Field.Index.NO) { - this.isIndexed = false; - this.isTokenized = false; - } else if (index == Field.Index.TOKENIZED) { - this.isIndexed = true; - this.isTokenized = true; - } else if (index == Field.Index.UN_TOKENIZED) { - this.isIndexed = true; - this.isTokenized = false; - } else if (index == Field.Index.NO_NORMS) { - this.isIndexed = true; - this.isTokenized = false; - this.omitNorms = true; - } else { - throw new IllegalArgumentException("unknown index parameter " + index); - } - - this.isBinary = false; - - setStoreTermVector(termVector); - } - - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } - - /** Returns the boost factor for hits for this field. - * - *

The default value is 1.0. - * - *

Note: this value is not stored directly with the document in the index. - * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - public float getBoost() { - return boost; - } - - /** Returns the name of the field as an interned string. - * For example "date", "title", "body", ... - */ - public String name() { return name; } - - protected void setStoreTermVector(Field.TermVector termVector) { - if (termVector == Field.TermVector.NO) { - this.storeTermVector = false; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == Field.TermVector.YES) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == Field.TermVector.WITH_POSITIONS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = false; - } - else if (termVector == Field.TermVector.WITH_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = true; - } - else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = true; - } - else { - throw new IllegalArgumentException("unknown termVector parameter " + termVector); - } - } - - /** True iff the value of the field is to be stored in the index for return - with search hits. It is an error for this to be true if a field is - Reader-valued. */ - public final boolean isStored() { return isStored; } - - /** True iff the value of the field is to be indexed, so that it may be - searched on. */ - public final boolean isIndexed() { return isIndexed; } - - /** True iff the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - public final boolean isTokenized() { return isTokenized; } - - /** True if the value of the field is stored and compressed within the index */ - public final boolean isCompressed() { return isCompressed; } - - /** True iff the term or terms used to index this field are stored as a term - * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) - */ - public final boolean isTermVectorStored() { return storeTermVector; } - - /** - * True iff terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - public boolean isStoreOffsetWithTermVector(){ - return storeOffsetWithTermVector; - } - - /** - * True iff terms are stored as term vector together with their token positions. - */ - public boolean isStorePositionWithTermVector(){ - return storePositionWithTermVector; - } - - /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { return isBinary; } - - /** True if norms are omitted for this indexed field */ - public boolean getOmitNorms() { return omitNorms; } - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } - - public boolean isLazy() { - return lazy; - } - - /** Prints a Field for human consumption. */ - public final String toString() { - StringBuffer result = new StringBuffer(); - if (isStored) { - result.append("stored"); - if (isCompressed) - result.append("/compressed"); - else - result.append("/uncompressed"); - } - if (isIndexed) { - if (result.length() > 0) - result.append(","); - result.append("indexed"); - } - if (isTokenized) { - if (result.length() > 0) - result.append(","); - result.append("tokenized"); - } - if (storeTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVector"); - } - if (storeOffsetWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorOffsets"); - } - if (storePositionWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorPosition"); - } - if (isBinary) { - if (result.length() > 0) - result.append(","); - result.append("binary"); - } - if (omitNorms) { - result.append(",omitNorms"); - } - if (lazy){ - result.append(",lazy"); - } - result.append('<'); - result.append(name); - result.append(':'); - - if (fieldsData != null && lazy == false) { - result.append(fieldsData); - } - - result.append('>'); - return result.toString(); - } -} Index: src/java/org/apache/lucene/document/Fieldable.java =================================================================== --- src/java/org/apache/lucene/document/Fieldable.java (révision 446873) +++ src/java/org/apache/lucene/document/Fieldable.java (copie de travail) @@ -1,137 +1,450 @@ -package org.apache.lucene.document; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Reader; -import java.io.Serializable; - -/** - * Synonymous with {@link Field}. - * - **/ -public interface Fieldable extends Serializable { - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) - */ - void setBoost(float boost); - - /** Returns the boost factor for hits for this field. - * - *

The default value is 1.0. - * - *

Note: this value is not stored directly with the document in the index. - * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - float getBoost(); - - /** Returns the name of the field as an interned string. - * For example "date", "title", "body", ... - */ - String name(); - - /** The value of the field as a String, or null. If null, the Reader value - * or binary value is used. Exactly one of stringValue(), readerValue(), and - * binaryValue() must be set. */ - String stringValue(); - - /** The value of the field as a Reader, or null. If null, the String value - * or binary value is used. Exactly one of stringValue(), readerValue(), - * and binaryValue() must be set. */ - Reader readerValue(); - - /** The value of the field in Binary, or null. If null, the Reader or - * String value is used. Exactly one of stringValue(), readerValue() and - * binaryValue() must be set. */ - byte[] binaryValue(); - - /** True iff the value of the field is to be stored in the index for return - with search hits. It is an error for this to be true if a field is - Reader-valued. */ - boolean isStored(); - - /** True iff the value of the field is to be indexed, so that it may be - searched on. */ - boolean isIndexed(); - - /** True iff the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - boolean isTokenized(); - - /** True if the value of the field is stored and compressed within the index */ - boolean isCompressed(); - - /** True iff the term or terms used to index this field are stored as a term - * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) - */ - boolean isTermVectorStored(); - - /** - * True iff terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - boolean isStoreOffsetWithTermVector(); - - /** - * True iff terms are stored as term vector together with their token positions. - */ - boolean isStorePositionWithTermVector(); - - /** True iff the value of the filed is stored as binary */ - boolean isBinary(); - - /** True if norms are omitted for this indexed field */ - boolean getOmitNorms(); - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - void setOmitNorms(boolean omitNorms); - - /** - * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving - * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that - * retrieved the {@link Document} is still open. - * - * @return true if this field can be loaded lazily - */ - boolean isLazy(); -} +package org.apache.lucene.document; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldReaderException; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +/** + * + * + */ +public abstract class Fieldable { + + private String name = "body"; + + private boolean storeTermVector = false; + + private boolean storeOffsetWithTermVector = false; + + private boolean storePositionWithTermVector = false; + + private boolean omitNorms = false; + + private boolean isIndexed = true; + + private float boost = 1.0f; + + private boolean isStored = true; + + private boolean isTokenized = true; + + private boolean isBinary = false; + + // the one and only data object for all different kind of field values + private Object fieldData = null; + + private boolean isLazy = false; + + private IndexInput fieldsStream; + + private long pointer; + + private long toRead; + + protected Fieldable(FieldInfo fi) { + this.name = fi.getId(); + storeTermVector = fi.storeTermVector(); + storeOffsetWithTermVector = fi.storeOffsetWithTermVector(); + storePositionWithTermVector = fi.storePositionWithTermVector(); + omitNorms = fi.omitNorms(); + } + + protected Fieldable(String name, String text, Field.Store store, Field.Index index, Field.TermVector termVector) { + this(name, (Object) text, store, index, termVector); + isBinary = false; + } + + protected Fieldable(String name, byte[] data, Field.Store store, Field.Index index, Field.TermVector termVector) { + this(name, (Object) data, store, index, termVector); + isBinary = true; + } + + protected Fieldable(String name, Reader reader, Field.Store store, Field.Index index, Field.TermVector termVector) { + this(name, (Object) reader, store, index, termVector); + isBinary = true; + } + + private Fieldable(String name, Object data, Field.Store store, Field.Index index, Field.TermVector termVector) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (data == null) + throw new NullPointerException("data cannot be null"); + + this.name = name.intern(); // field names are interned + + fieldData = data; + + if (index == Field.Index.NO && store == Field.Store.NO) { + throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); + } + if (index == Field.Index.NO && termVector != Field.TermVector.NO) { + throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed"); + } + + setStore(store); + setIndex(index); + setStoreTermVector(termVector); + } + + /** Sets the boost factor hits on this field. This value will be + * multiplied into the score of all hits on this this field of this + * document. + * + *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then + * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and + * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#encodeNorm(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** Returns the boost factor for hits for this field. + * + *

The default value is 1.0. + * + *

Note: this value is not stored directly with the document in the index. + * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and + * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when + * this field was indexed. + * + * @see #setBoost(float) + */ + public float getBoost() { + return boost; + } + + /** Returns the name of the field as an interned string. + * For example "date", "title", "body", ... + */ + public String name() { + return name; + } + + protected void setStoreTermVector(Field.TermVector termVector) { + if (termVector == Field.TermVector.NO) { + this.storeTermVector = false; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = false; + } else if (termVector == Field.TermVector.YES) { + this.storeTermVector = true; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = false; + } else if (termVector == Field.TermVector.WITH_POSITIONS) { + this.storeTermVector = true; + this.storePositionWithTermVector = true; + this.storeOffsetWithTermVector = false; + } else if (termVector == Field.TermVector.WITH_OFFSETS) { + this.storeTermVector = true; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = true; + } else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) { + this.storeTermVector = true; + this.storePositionWithTermVector = true; + this.storeOffsetWithTermVector = true; + } else { + throw new IllegalArgumentException("unknown termVector parameter " + termVector); + } + } + + protected void setIndex(Field.Index index) { + if (index == Field.Index.NO) { + isIndexed = false; + isTokenized = false; + } else if (index == Field.Index.TOKENIZED) { + isIndexed = true; + isTokenized = true; + } else if (index == Field.Index.UN_TOKENIZED) { + isIndexed = true; + isTokenized = false; + } else if (index == Field.Index.NO_NORMS) { + isIndexed = true; + isTokenized = false; + omitNorms = true; + } else { + throw new IllegalArgumentException("unknown index parameter " + index); + } + } + + protected void setStore(Field.Store store) { + if (store == Field.Store.YES) { + isStored = true; + } else if (store == Field.Store.NO) { + if (isBinary()) { + throw new IllegalArgumentException("binary values can't be unstored"); + } + isStored = false; + } else { + throw new IllegalArgumentException("unknown store parameter " + store); + } + } + + /** True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. */ + public final boolean isStored() { + return isStored; + } + + protected final void setStored(boolean isStored) { + this.isStored = isStored; + } + + /** True iff the value of the field is to be indexed, so that it may be + searched on. */ + public final boolean isIndexed() { + return isIndexed; + } + + protected final void setIndexed(boolean isIndexed) { + this.isIndexed = isIndexed; + } + + /** True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. */ + public final boolean isTokenized() { + return isTokenized; + } + + protected final void setTokenized(boolean isTokenized) { + this.isTokenized = isTokenized; + } + + /** True iff the term or terms used to index this field are stored as a term + * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. + * These methods do not provide access to the original content of the field, + * only to terms used to index it. If the original content must be + * preserved, use the stored attribute instead. + * + * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) + */ + public final boolean isTermVectorStored() { + return storeTermVector; + } + + /** + * True iff terms are stored as term vector together with their offsets + * (start and end positon in source text). + */ + public boolean isStoreOffsetWithTermVector() { + return storeOffsetWithTermVector; + } + + /** + * True iff terms are stored as term vector together with their token positions. + */ + public boolean isStorePositionWithTermVector() { + return storePositionWithTermVector; + } + + /** True iff the value of the filed is stored as binary */ + public final boolean isBinary() { + return isBinary; + } + + protected final void setBinary(boolean isBinary) { + this.isBinary = isBinary; + } + + /** True if norms are omitted for this indexed field */ + public boolean getOmitNorms() { + return omitNorms; + } + + /** Expert: + * + * If set, omit normalization factors associated with this indexed field. + * This effectively disables indexing boosts and length normalization for this field. + */ + public void setOmitNorms(boolean omitNorms) { + this.omitNorms = omitNorms; + } + + /** + * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that + * retrieved the {@link Document} is still open. + * + * @return true if this field can be loaded lazily + */ + public boolean isLazy() { + return isLazy; + } + + /** Prints a Field for human consumption. */ + public String toString() { + StringBuffer result = new StringBuffer(); + if (isStored()) { + result.append("stored"); + } + if (isIndexed) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeOffsetWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storePositionWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isBinary()) { + if (result.length() > 0) + result.append(","); + result.append("binary"); + } + if (omitNorms) { + result.append(",omitNorms"); + } + if (isLazy()) { + result.append(",lazy"); + } + result.append('<'); + result.append(name); + result.append(':'); + + result.append(fieldData); + + result.append('>'); + return result.toString(); + } + + /** The value of the field as a String, or null. If null, the Reader value + * or binary value is used. Exactly one of stringValue(), readerValue(), and + * binaryValue() must be set. */ + public final String stringValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof String ? (String) fieldData : null; + } + + /** The value of the field as a Reader, or null. If null, the String value + * or binary value is used. Exactly one of stringValue(), readerValue(), + * and binaryValue() must be set. */ + public final Reader readerValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof Reader ? (Reader) fieldData : null; + } + + /** The value of the field in Binary, or null. If null, the Reader or + * String value is used. Exactly one of stringValue(), readerValue() and + * binaryValue() must be set. */ + public final byte[] binaryValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof byte[] ? (byte[]) fieldData : null; + } + + /** + * + * @param fieldData the new data of the field + */ + protected void setData(Object fieldData) { + this.fieldData = fieldData; + } + + /** + * + * @return the data of the field + */ + protected Object getData() { + return fieldData; + } + + /** + * Load the field data from the stream + * + * @param in the stream to read + * @param skip if the data have to be stored, or just skipped from the stream + * @throws IOException + */ + public abstract void readStream(IndexInput in, boolean skip) throws IOException; + + private final void readLazyData() { + try { + fieldsStream.seek(pointer); + readStream(fieldsStream, false); + } catch (IOException e) { + throw new FieldReaderException(e); + } + } + + /** + * Set this field as lazy loaded, and save the stream status + * + * FIXME : this function shound't be public, and only be called from FieldsReader. But as + * FieldsReader is not in the same package, and is not extending it, the only possible + * scope is 'public' + * + * @param fieldsStream the field stream + * @param pointer the pointer of the field data + * @param toRead the number of byte of the field data + */ + public void setLazyData(IndexInput fieldsStream, long pointer, long toRead) { + isLazy = true; + this.fieldsStream = fieldsStream; + this.pointer = pointer; + this.toRead = toRead; + } + + /** + * Write the lazy loaded field data directly in the specified output stream. + * If the field has not been loaded lazily, it throws an UnsupportedOperationException. + * + * @param out the stream to write in + * @throws IOException in case of write error + */ + public final void writeFromLazyLoading(IndexOutput out) throws IOException { + if (!isLazy) { + throw new UnsupportedOperationException("The field have to be load lazily to copy it directly"); + } + fieldsStream.seek(pointer); + out.writeBytes(fieldsStream, toRead); + } +} Index: src/java/org/apache/lucene/document/Document.java =================================================================== --- src/java/org/apache/lucene/document/Document.java (révision 449380) +++ src/java/org/apache/lucene/document/Document.java (copie de travail) @@ -36,8 +36,8 @@ * IndexReader#document(int)}. */ -public final class Document implements java.io.Serializable { - List fields = new Vector(); +public class Document implements java.io.Serializable { + protected List fields = new Vector(); private float boost = 1.0f; /** Constructs a new document with no fields. */