Index: src/test/org/apache/lucene/store/IndexInputTest.java
===================================================================
--- src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
+++ src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
@@ -0,0 +1,104 @@
+package org.apache.lucene.store;
+
+import junit.framework.TestCase;
+
+public class IndexInputTest extends TestCase {
+
+ public void testInt() throws Exception {
+ genericTestInt(0);
+ genericTestInt(1);
+ genericTestInt(-1);
+ genericTestInt(Integer.MAX_VALUE);
+ genericTestInt(Integer.MIN_VALUE);
+ }
+
+ public void testVInt() throws Exception {
+ genericTestVInt(0);
+ genericTestVInt(1);
+ genericTestVInt(-1);
+ genericTestVInt(Integer.MAX_VALUE);
+ genericTestVInt(Integer.MIN_VALUE);
+ }
+
+ public void testLong() throws Exception {
+ genericTestLong(0);
+ genericTestLong(1);
+ genericTestLong(-1);
+ genericTestLong(Long.MAX_VALUE);
+ genericTestLong(Long.MIN_VALUE);
+ }
+
+ public void testVLong() throws Exception {
+ genericTestVLong(0);
+ genericTestVLong(1);
+ genericTestVLong(-1);
+ genericTestVLong(Long.MAX_VALUE);
+ genericTestVLong(Long.MIN_VALUE);
+ }
+
+ public void testString() throws Exception {
+ genericTestString("");
+ genericTestString("a");
+ genericTestString("GiyNNKHhnivNKKHgcNiCniCH716534912é_è'-(é(_çà-é$*ù!:;,!:;,");
+ }
+
+ private void genericTestInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readInt());
+ }
+
+ private void genericTestVInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readVInt());
+ }
+
+ private void genericTestLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readLong());
+ }
+
+ private void genericTestVLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readVLong());
+ }
+
+ private void genericTestString(String s) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeString(s);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeString(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(s, new RAMInputStream(fileB).readString());
+ }
+}
Index: src/test/org/apache/lucene/index/TestParallelTermEnum.java
===================================================================
--- src/test/org/apache/lucene/index/TestParallelTermEnum.java (révision 449380)
+++ src/test/org/apache/lucene/index/TestParallelTermEnum.java (copie de travail)
@@ -1,4 +1,19 @@
package org.apache.lucene.index;
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
import java.io.IOException;
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestDocumentWriter.java (révision 449380)
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java (copie de travail)
@@ -87,7 +87,7 @@
// test that the norm file is not present if omitNorms is true
for (int i = 0; i < reader.fieldInfos.size(); i++) {
- FieldInfo fi = reader.fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) reader.fieldInfos.getEntry(i);
if (fi.isIndexed) {
assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
}
Index: src/test/org/apache/lucene/index/TestFieldInfos.java
===================================================================
--- src/test/org/apache/lucene/index/TestFieldInfos.java (révision 449380)
+++ src/test/org/apache/lucene/index/TestFieldInfos.java (copie de travail)
@@ -44,22 +44,22 @@
assertTrue(output.length() > 0);
FieldInfos readIn = new FieldInfos(dir, name);
assertTrue(fieldInfos.size() == readIn.size());
- FieldInfo info = readIn.fieldInfo("textField1");
+ FieldInfo info = (FieldInfo) readIn.getEntry("textField1");
assertTrue(info != null);
assertTrue(info.storeTermVector == false);
assertTrue(info.omitNorms == false);
- info = readIn.fieldInfo("textField2");
+ info = (FieldInfo) readIn.getEntry("textField2");
assertTrue(info != null);
assertTrue(info.storeTermVector == true);
assertTrue(info.omitNorms == false);
- info = readIn.fieldInfo("textField3");
+ info = (FieldInfo) readIn.getEntry("textField3");
assertTrue(info != null);
assertTrue(info.storeTermVector == false);
assertTrue(info.omitNorms == true);
- info = readIn.fieldInfo("omitNorms");
+ info = (FieldInfo) readIn.getEntry("omitNorms");
assertTrue(info != null);
assertTrue(info.storeTermVector == false);
assertTrue(info.omitNorms == true);
Index: src/test/org/apache/lucene/index/SimpleEntryTableTest.java
===================================================================
--- src/test/org/apache/lucene/index/SimpleEntryTableTest.java (révision 0)
+++ src/test/org/apache/lucene/index/SimpleEntryTableTest.java (révision 0)
@@ -0,0 +1,99 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.RAMDirectory;
+
+import junit.framework.TestCase;
+
+public class SimpleEntryTableTest extends TestCase {
+
+ public void test() throws Exception {
+ SimpleEntryTable table = new SimpleEntryTable();
+ assertEquals(0, table.add("id1"));
+ assertEquals(1, table.add("id2"));
+ assertEquals(1, table.add("id2"));
+ assertEquals(2, table.size());
+
+ Entry entry = table.getEntry(0);
+ assertNotNull(entry);
+ assertEquals("id1", entry.getId());
+ assertEquals(0, entry.getIndex());
+
+ entry = table.getEntry(1);
+ assertNotNull(entry);
+ assertEquals("id2", entry.getId());
+ assertEquals(1, entry.getIndex());
+
+ entry = table.getEntry(2);
+ assertNull(entry);
+
+ entry = table.getEntry(-1);
+ assertNull(entry);
+
+ entry = table.getEntry("id1");
+ assertNotNull(entry);
+ assertEquals("id1", entry.getId());
+ assertEquals(0, entry.getIndex());
+
+ entry = table.getEntry("id2");
+ assertNotNull(entry);
+ assertEquals("id2", entry.getId());
+ assertEquals(1, entry.getIndex());
+
+ entry = table.getEntry("");
+ assertNull(entry);
+
+ entry = table.getEntry(null);
+ assertNull(entry);
+
+ assertEquals("id1", table.getId(0));
+ assertEquals("id2", table.getId(1));
+ assertNull(table.getId(2));
+ assertNull(table.getId(-1));
+
+ assertEquals(0, table.getIndex("id1"));
+ assertEquals(1, table.getIndex("id2"));
+ assertEquals(-1, table.getIndex(""));
+ assertEquals(-1, table.getIndex(null));
+ }
+
+ public void testIO() throws Exception {
+ SimpleEntryTable table = new SimpleEntryTable();
+ table.add("id1");
+ table.add("id2");
+ RAMDirectory dir = new RAMDirectory();
+ table.write(dir, "data");
+
+ SimpleEntryTable table2 = new SimpleEntryTable(dir, "data");
+
+ assertEquals(2, table2.size());
+
+ Entry entry = table2.getEntry(0);
+ assertNotNull(entry);
+ assertEquals("id1", entry.getId());
+ assertEquals(0, entry.getIndex());
+
+ entry = table2.getEntry(1);
+ assertNotNull(entry);
+ assertEquals("id2", entry.getId());
+ assertEquals(1, entry.getIndex());
+
+ entry = table2.getEntry(2);
+ assertNull(entry);
+ }
+}
Index: src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestFieldsReader.java (révision 449380)
+++ src/test/org/apache/lucene/index/TestFieldsReader.java (copie de travail)
@@ -58,7 +58,7 @@
public void test() throws IOException {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Document doc = reader.doc(0, null);
@@ -88,7 +88,7 @@
public void testLazyFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Set loadFieldNames = new HashSet();
@@ -136,7 +136,7 @@
public void testLoadFirst() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = new DefaultFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
@@ -181,7 +181,7 @@
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
for (int i = 0; i < length; i++) {
- reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ reader = new DefaultFieldsReader(tmpDir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
@@ -205,7 +205,7 @@
doc = null;
//Hmmm, are we still in cache???
System.gc();
- reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ reader = new DefaultFieldsReader(tmpDir, "test", fieldInfos);
doc = reader.doc(0, fieldSelector);
field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is not lazy", field.isLazy() == true);
Index: src/test/org/apache/lucene/index/rdf/RDFIndexTest.java
===================================================================
--- src/test/org/apache/lucene/index/rdf/RDFIndexTest.java (révision 0)
+++ src/test/org/apache/lucene/index/rdf/RDFIndexTest.java (révision 0)
@@ -0,0 +1,82 @@
+package org.apache.lucene.index.rdf;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.RAMDirectory;
+
+public class RDFIndexTest extends TestCase {
+
+ public void testindex() throws Exception {
+
+ RAMDirectory dir = new RAMDirectory(new RDFIndexFormat());
+
+ IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);
+
+ Document doc = new Document();
+
+ doc.add(new RDFLiteralField("rdfproperty", "literal", null, null, Store.YES, Index.TOKENIZED, TermVector.NO));
+ doc.add(new RDFLiteralField("rdfproperty2", "literal2", null, "string", Store.YES, Index.TOKENIZED, TermVector.NO));
+ doc.add(new RDFLiteralField("rdfproperty3", "literal3", "fr", null, Store.YES, Index.TOKENIZED, TermVector.NO));
+ doc.add(new RDFLiteralField("rdfproperty4", "literal4", "fr", "string", Store.YES, Index.TOKENIZED, TermVector.NO));
+
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+
+ doc = reader.document(0);
+
+ Fieldable[] fields = doc.getFieldables("rdfproperty");
+ assertEquals(1, fields.length);
+ assertEquals("rdfproperty", fields[0].name());
+ assertNull(fields[0].readerValue());
+ assertNull(fields[0].binaryValue());
+ assertEquals("literal", fields[0].stringValue());
+ assertTrue(fields[0] instanceof RDFLiteralField);
+ RDFLiteralField rdfField = (RDFLiteralField) fields[0];
+ assertNull(rdfField.getLang());
+ assertNull(rdfField.getType());
+
+ fields = doc.getFieldables("rdfproperty2");
+ assertEquals(1, fields.length);
+ assertEquals("rdfproperty2", fields[0].name());
+ assertNull(fields[0].readerValue());
+ assertNull(fields[0].binaryValue());
+ assertEquals("literal2", fields[0].stringValue());
+ assertTrue(fields[0] instanceof RDFLiteralField);
+ rdfField = (RDFLiteralField) fields[0];
+ assertNull(rdfField.getLang());
+ assertEquals("string", rdfField.getType());
+
+ fields = doc.getFieldables("rdfproperty3");
+ assertEquals(1, fields.length);
+ assertEquals("rdfproperty3", fields[0].name());
+ assertNull(fields[0].readerValue());
+ assertNull(fields[0].binaryValue());
+ assertEquals("literal3", fields[0].stringValue());
+ assertTrue(fields[0] instanceof RDFLiteralField);
+ rdfField = (RDFLiteralField) fields[0];
+ assertEquals("fr", rdfField.getLang());
+ assertNull(rdfField.getType());
+
+ fields = doc.getFieldables("rdfproperty4");
+ assertEquals(1, fields.length);
+ assertEquals("rdfproperty4", fields[0].name());
+ assertNull(fields[0].readerValue());
+ assertNull(fields[0].binaryValue());
+ assertEquals("literal4", fields[0].stringValue());
+ assertTrue(fields[0] instanceof RDFLiteralField);
+ rdfField = (RDFLiteralField) fields[0];
+ assertEquals("fr", rdfField.getLang());
+ assertEquals("string", rdfField.getType());
+
+ }
+}
Index: src/java/org/apache/lucene/index/FieldInfo.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfo.java (révision 449380)
+++ src/java/org/apache/lucene/index/FieldInfo.java (copie de travail)
@@ -1,5 +1,8 @@
package org.apache.lucene.index;
+import org.apache.lucene.document.Field;
+
+
/**
* Copyright 2004 The Apache Software Foundation
*
@@ -16,10 +19,8 @@
* limitations under the License.
*/
-final class FieldInfo {
- String name;
+public final class FieldInfo extends Entry {
boolean isIndexed;
- int number;
// true if term vector for this field should be stored
boolean storeTermVector;
@@ -28,14 +29,33 @@
boolean omitNorms; // omit norms associated with indexed fields
- FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
- name = na;
+ FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms) {
+ super(na, nu);
isIndexed = tk;
- number = nu;
this.storeTermVector = storeTermVector;
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
this.storePositionWithTermVector = storePositionWithTermVector;
this.omitNorms = omitNorms;
}
+
+ public boolean omitNorms() {
+ return omitNorms;
+ }
+
+ public boolean isIndexed() {
+ return isIndexed;
+ }
+
+ public boolean storeOffsetWithTermVector() {
+ return storeOffsetWithTermVector;
+ }
+
+ public boolean storePositionWithTermVector() {
+ return storePositionWithTermVector;
+ }
+
+ public boolean storeTermVector() {
+ return storeTermVector;
+ }
}
Index: src/java/org/apache/lucene/index/CompoundFileReader.java
===================================================================
--- src/java/org/apache/lucene/index/CompoundFileReader.java (révision 449380)
+++ src/java/org/apache/lucene/index/CompoundFileReader.java (copie de travail)
@@ -54,6 +54,7 @@
throws IOException
{
directory = dir;
+ indexFormat = dir.getIndexFormat();
fileName = name;
boolean success = false;
Index: src/java/org/apache/lucene/index/DefaultFieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0)
@@ -0,0 +1,153 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.zip.Deflater;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * The default implementation of FieldsWriter
+ *
+ * $Id$
+ */
+public class DefaultFieldsWriter extends FieldsWriter {
+
+ protected DefaultFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ }
+
+ /**
+ * There no data stored at the document level
+ */
+ protected void writeDocumentData(IndexOutput out, Document doc) throws IOException {
+ //nothing to write
+ }
+
+ /**
+ * If a the field to write has been load lazily, it does a direct copy from the
+ * source to the output.
+ */
+ protected void writeField(Fieldable field, IndexOutput out) throws IOException {
+ if (field.isLazy() && isBinaryCompatible(field)) {
+ field.writeFromLazyLoading(out);
+ } else {
+ byte bits = 0;
+ if (field.isTokenized())
+ bits |= Field.FIELD_IS_TOKENIZED;
+ if (field.isBinary())
+ bits |= Field.FIELD_IS_BINARY;
+ if (field instanceof Field && ((Field) field).isCompressed()) {
+ bits |= Field.FIELD_IS_COMPRESSED;
+ }
+
+ out.writeByte(bits);
+
+ if (field instanceof Field && ((Field) field).isCompressed()) {
+ // compression is enabled for the current field
+ byte[] bdata = null;
+ // check if it is a binary field
+ if (field.isBinary()) {
+ bdata = compress(field.binaryValue());
+ } else {
+ bdata = compress(field.stringValue().getBytes("UTF-8"));
+ }
+ final int len = bdata.length;
+ out.writeVInt(len);
+ out.writeBytes(bdata, len);
+ } else {
+ // compression is disabled for the current field
+ if (field.isBinary()) {
+ byte[] bdata = field.binaryValue();
+ final int len = bdata.length;
+ out.writeVInt(len);
+ out.writeBytes(bdata, len);
+ } else {
+ out.writeString(field.stringValue());
+ }
+ }
+ }
+ }
+
+ /**
+ * Test if the specified field is binary compatible with the current format, so
+ * it allow us to do a direct copy from the lazy loaded field into an index
+ *
+ * @param field the field to test
+ * @return true if it is compatible
+ */
+ protected boolean isBinaryCompatible(Fieldable field) {
+ return field instanceof Field;
+ }
+
+ /**
+ * To be overriden by subclasses to choose a different level of compression
+ *
+ * @return the compression level
+ */
+ protected int getCompressionLevel() {
+ return Deflater.BEST_COMPRESSION;
+ }
+
+ /**
+ * Do the compression of data
+ *
+ * To be overiden by subclasses to use a different format of compression. If overriden, you
+ * probably should also override isBinaryCompatible and and decompress function of
+ * DefaultFieldsReader.
+ *
+ * @param input the data to compress
+ * @return the compressed data
+ */
+ protected byte[] compress(byte[] input) {
+
+ // Create the compressor with highest level of compression
+ Deflater compressor = new Deflater();
+ compressor.setLevel(getCompressionLevel());
+
+ // Give the compressor the data to compress
+ compressor.setInput(input);
+ compressor.finish();
+
+ /*
+ * Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data.
+ */
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.finished()) {
+ int count = compressor.deflate(buf);
+ bos.write(buf, 0, count);
+ }
+
+ compressor.end();
+
+ // Get the compressed data
+ return bos.toByteArray();
+ }
+
+}
Index: src/java/org/apache/lucene/index/IndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
@@ -0,0 +1,81 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * Specify the format of index.
+ *
+ * The implementation of the FieldsReader and FieldsWriter returned by the function
+ * getFieldsReader and getFieldsWriter will specify how the data of fields are
+ * serialized, and also the kind of Fieldable used.
+ *
+ * $Id$
+ */
+public interface IndexFormat {
+
+ /**
+ * This array contains all filename extensions used by Lucene's index files, with
+ * one exception, namely the extension made up from .f + a number.
+ * Also note that two of Lucene's files (deletable and
+ * segments) don't have any filename extension.
+ *
+ * @return a List of String
+ */
+ List getIndexExtensions();
+
+ /**
+ * File extensions of old-style index files
+ *
+ * @return a List of String
+ */
+ List getCompoundExtensions();
+
+ /**
+ * File extensions for term vector support
+ *
+ * @return a List of String
+ */
+ List getVectorExtensions();
+
+ /**
+ * Return an implemetation of FieldsReader for this format
+ *
+ * @param d the directory to use
+ * @param segment the segment name
+ * @param fn the infos on fields
+ * @return the implemetation of FieldsReader
+ * @throws IOException
+ */
+ FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException;
+
+ /**
+ * Return an implemetation of FieldsWriter for this format
+ *
+ * @param d the directory to use
+ * @param segment the segment name
+ * @param fn the infos on fields
+ * @return the implemetation of FieldsWriter
+ * @throws IOException
+ */
+ FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException;
+
+}
Index: src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfos.java (révision 449380)
+++ src/java/org/apache/lucene/index/FieldInfos.java (copie de travail)
@@ -31,7 +31,7 @@
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
*/
-final class FieldInfos {
+public final class FieldInfos extends EntryTable {
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
@@ -39,10 +39,9 @@
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
static final byte OMIT_NORMS = 0x10;
- private ArrayList byNumber = new ArrayList();
- private HashMap byName = new HashMap();
- FieldInfos() { }
+ public FieldInfos() {
+ }
/**
* Construct a FieldInfos object using the directory and the name of the file
@@ -52,12 +51,7 @@
* @throws IOException
*/
FieldInfos(Directory d, String name) throws IOException {
- IndexInput input = d.openInput(name);
- try {
- read(input);
- } finally {
- input.close();
- }
+ super(d, name);
}
/** Adds field info for a Document. */
@@ -155,9 +149,11 @@
*/
public void add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
- FieldInfo fi = fieldInfo(name);
+ FieldInfo fi = (FieldInfo) getEntry(name);
if (fi == null) {
- addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
+ int n = size();
+ fi = new FieldInfo(name, isIndexed, n, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
+ add(fi);
} else {
if (fi.isIndexed != isIndexed) {
fi.isIndexed = true; // once indexed, always index
@@ -178,72 +174,10 @@
}
}
-
- private void addInternal(String name, boolean isIndexed,
- boolean storeTermVector, boolean storePositionWithTermVector,
- boolean storeOffsetWithTermVector, boolean omitNorms) {
- FieldInfo fi =
- new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
- storeOffsetWithTermVector, omitNorms);
- byNumber.add(fi);
- byName.put(name, fi);
- }
-
- public int fieldNumber(String fieldName) {
- try {
- FieldInfo fi = fieldInfo(fieldName);
- if (fi != null)
- return fi.number;
- }
- catch (IndexOutOfBoundsException ioobe) {
- return -1;
- }
- return -1;
- }
-
- public FieldInfo fieldInfo(String fieldName) {
- return (FieldInfo) byName.get(fieldName);
- }
-
- /**
- * Return the fieldName identified by its number.
- *
- * @param fieldNumber
- * @return the fieldName or an empty string when the field
- * with the given number doesn't exist.
- */
- public String fieldName(int fieldNumber) {
- try {
- return fieldInfo(fieldNumber).name;
- }
- catch (NullPointerException npe) {
- return "";
- }
- }
-
- /**
- * Return the fieldinfo object referenced by the fieldNumber.
- * @param fieldNumber
- * @return the FieldInfo object or null when the given fieldNumber
- * doesn't exist.
- */
- public FieldInfo fieldInfo(int fieldNumber) {
- try {
- return (FieldInfo) byNumber.get(fieldNumber);
- }
- catch (IndexOutOfBoundsException ioobe) {
- return null;
- }
- }
-
- public int size() {
- return byNumber.size();
- }
-
public boolean hasVectors() {
boolean hasVectors = false;
for (int i = 0; i < size(); i++) {
- if (fieldInfo(i).storeTermVector) {
+ if (((FieldInfo) getEntry(i)).storeTermVector) {
hasVectors = true;
break;
}
@@ -251,43 +185,40 @@
return hasVectors;
}
- public void write(Directory d, String name) throws IOException {
- IndexOutput output = d.createOutput(name);
- try {
- write(output);
- } finally {
- output.close();
+ /**
+ * Just change the behaviour to never return null but return an empty string
+ */
+ public String getId(int index) {
+ String id = super.getId(index);
+ if (id == null) {
+ return "";
}
+ return id;
}
- public void write(IndexOutput output) throws IOException {
- output.writeVInt(size());
- for (int i = 0; i < size(); i++) {
- FieldInfo fi = fieldInfo(i);
- byte bits = 0x0;
- if (fi.isIndexed) bits |= IS_INDEXED;
- if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
- if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
- if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
- if (fi.omitNorms) bits |= OMIT_NORMS;
- output.writeString(fi.name);
- output.writeByte(bits);
- }
+ protected void writeEntry(Entry info, IndexOutput output) throws IOException {
+ FieldInfo fi = (FieldInfo) info;
+ byte bits = 0x0;
+ if (fi.isIndexed) bits |= IS_INDEXED;
+ if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
+ if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+ if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
+ if (fi.omitNorms) bits |= OMIT_NORMS;
+ output.writeString(fi.getId());
+ output.writeByte(bits);
}
- private void read(IndexInput input) throws IOException {
- int size = input.readVInt();//read in the size
- for (int i = 0; i < size; i++) {
- String name = input.readString().intern();
- byte bits = input.readByte();
- boolean isIndexed = (bits & IS_INDEXED) != 0;
- boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
- boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
- boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
- boolean omitNorms = (bits & OMIT_NORMS) != 0;
+ protected Entry readEntry(int number, IndexInput input) throws IOException {
+ String name = input.readString().intern();
+ byte bits = input.readByte();
+ boolean isIndexed = (bits & IS_INDEXED) != 0;
+ boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+ boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ boolean omitNorms = (bits & OMIT_NORMS) != 0;
- addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
- }
+ return new FieldInfo(name, isIndexed, size(), storeTermVector, storePositionsWithTermVector,
+ storeOffsetWithTermVector, omitNorms);
}
}
Index: src/java/org/apache/lucene/index/SimpleEntryTable.java
===================================================================
--- src/java/org/apache/lucene/index/SimpleEntryTable.java (révision 0)
+++ src/java/org/apache/lucene/index/SimpleEntryTable.java (révision 0)
@@ -0,0 +1,88 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * A simple implementation of a Lucene-serialized table of entries. It only stores
+ * the ids of the entries.
+ *
+ * $Id$
+ */
+public class SimpleEntryTable extends EntryTable {
+
+ /**
+ * Constructor used to populate a table from scratch
+ *
+ */
+ public SimpleEntryTable() {
+ super();
+ }
+
+ /**
+ * Construct a SimpleEntryTable object using the directory and the name of the file
+ * IndexInput
+ *
+ * @param d The directory to open the IndexInput from
+ * @param name The name of the file to open the IndexInput from in the Directory
+ * @throws IOException
+ */
+ public SimpleEntryTable(Directory d, String name) throws IOException {
+ super(d, name);
+ }
+
+ /**
+ * Just write the ID
+ */
+ protected void writeEntry(Entry entry, IndexOutput output) throws IOException {
+ String id = entry.getId();
+ output.writeString(id);
+ }
+
+ /**
+ * Just read the ID
+ */
+ protected Entry readEntry(int index, IndexInput input) throws IOException {
+ String id = input.readString().intern();
+ return new Entry(id, index);
+ }
+
+ /**
+ * Add an entry with the specified Id. If an entry already exist
+ * in the table, no entry is added, it just returns the index of
+ * the entry already stored.
+ *
+ * @param id the id to insert
+ * @return the created or found index of the entry
+ */
+ public int add(String id) {
+ Entry entry = getEntry(id);
+ if (entry != null) {
+ return entry.getIndex();
+ }
+ int index = size();
+ entry = new Entry(id, index);
+ add(entry);
+ return index;
+ }
+
+}
Index: src/java/org/apache/lucene/index/DefaultIndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
@@ -0,0 +1,66 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+
+/**
+ * The default implementation of the index format
+ *
+ * $Id$
+ */
+public class DefaultIndexFormat implements IndexFormat {
+
+ private static final List INDEX_EXTENSIONS = Arrays.asList(new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx",
+ "tvd", "tvf", "tvp" });
+
+ private static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" });
+
+ private static final List VECTOR_EXTENSIONS = Arrays.asList(new String[] { "tvx", "tvd", "tvf" });
+
+ public List getIndexExtensions() {
+ return INDEX_EXTENSIONS;
+ }
+
+ public List getCompoundExtensions() {
+ return COMPOUND_EXTENSIONS;
+ }
+
+ public List getVectorExtensions() {
+ return VECTOR_EXTENSIONS;
+ }
+
+ /**
+ * Use the default implementation of FieldsReader : DefaultFieldsReader
+ */
+ public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new DefaultFieldsReader(d, segment, fn);
+ }
+
+ /**
+ * Use the default implementation of FieldsWriter : DefaultFieldsWriter
+ */
+ public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new DefaultFieldsWriter(d, segment, fn);
+ }
+
+}
Index: src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsReader.java (révision 449380)
+++ src/java/org/apache/lucene/index/FieldsReader.java (copie de travail)
@@ -16,15 +16,9 @@
* limitations under the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.Reader;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
-import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
@@ -38,15 +32,16 @@
*
* @version $Id$
*/
-final class FieldsReader {
+public abstract class FieldsReader {
private FieldInfos fieldInfos;
+
private IndexInput fieldsStream;
+
private IndexInput indexStream;
+
private int size;
- private static ThreadLocal fieldsStreamTL = new ThreadLocal();
-
- FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ protected FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
fieldsStream = d.openInput(segment + ".fdt");
@@ -60,14 +55,9 @@
*
* @throws IOException
*/
- final void close() throws IOException {
+ protected void close() throws IOException {
fieldsStream.close();
indexStream.close();
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream != null) {
- localFieldsStream.close();
- fieldsStreamTL.set(null);
- }
}
final int size() {
@@ -79,352 +69,40 @@
long position = indexStream.readLong();
fieldsStream.seek(position);
- Document doc = new Document();
+ Document doc = createDocument(fieldsStream);
+
int numFields = fieldsStream.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
- FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
- FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
-
- byte bits = fieldsStream.readByte();
- boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
- boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
- boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
- addField(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
- addFieldForMerge(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
- addField(doc, fi, binary, compressed, tokenize);
- break;//Get out of this loop
- }
- else if (lazy == true){
- addFieldLazy(doc, fi, binary, compressed, tokenize);
- }
- else {
- skipField(binary, compressed);
- }
- }
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(fieldNumber);
+ FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.getId());
- return doc;
- }
+ Fieldable field = createField(fi);
- /**
- * Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
- * This will have the most payoff on large fields.
- */
- private void skipField(boolean binary, boolean compressed) throws IOException {
-
- int toRead = fieldsStream.readVInt();
-
- if (binary || compressed) {
+ boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD);
+ boolean skip = acceptField.equals(FieldSelectorResult.NO_LOAD);
+
long pointer = fieldsStream.getFilePointer();
- fieldsStream.seek(pointer + toRead);
- } else {
- //We need to skip chars. This will slow us down, but still better
- fieldsStream.skipChars(toRead);
- }
- }
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- if (binary == true) {
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- if (compressed) {
- //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
- doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
- } else {
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
- }
- //Need to move the pointer ahead by toRead positions
- fieldsStream.seek(pointer + toRead);
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
+ field.readStream(fieldsStream, skip || lazy);
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- f = new LazyField(fi.name, store, toRead, pointer);
- //skip over the part that we aren't loading
- fieldsStream.seek(pointer + toRead);
- f.setOmitNorms(fi.omitNorms);
- } else {
- int length = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- //Skip ahead of where we are by the length of what is stored
- fieldsStream.skipChars(length);
- f = new LazyField(fi.name, store, index, termVector, length, pointer);
- f.setOmitNorms(fi.omitNorms);
+ if (lazy) {
+ field.setLazyData(fieldsStream, pointer, fieldsStream.getFilePointer() - pointer);
}
- doc.add(f);
- }
- }
-
- // in merge mode we don't uncompress the data of a compressed field
- private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- Object data;
-
- if (binary || compressed) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- data = b;
- } else {
- data = fieldsStream.readString();
- }
-
- doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
- }
-
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
-
- //we have a binary stored field, and it may be compressed
- if (binary) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- if (compressed)
- doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
- else
- doc.add(new Field(fi.name, b, Field.Store.YES));
-
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
-
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
-
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- f = new Field(fi.name, // field name
- new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
- } else {
- f = new Field(fi.name, // name
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
+ if (!skip) {
+ doc.add(field);
}
- doc.add(f);
- }
- }
-
- private Field.TermVector getTermVectorType(FieldInfo fi) {
- Field.TermVector termVector = null;
- if (fi.storeTermVector) {
- if (fi.storeOffsetWithTermVector) {
- if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
- } else {
- termVector = Field.TermVector.WITH_OFFSETS;
- }
- } else if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS;
- } else {
- termVector = Field.TermVector.YES;
+ if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) {
+ break;
}
- } else {
- termVector = Field.TermVector.NO;
}
- return termVector;
- }
- private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {
- Field.Index index;
- if (fi.isIndexed && tokenize)
- index = Field.Index.TOKENIZED;
- else if (fi.isIndexed && !tokenize)
- index = Field.Index.UN_TOKENIZED;
- else
- index = Field.Index.NO;
- return index;
+ return doc;
}
- /**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
- * loaded.
- */
- private class LazyField extends AbstractField implements Fieldable {
- private int toRead;
- private long pointer;
- //internal buffer
- private char[] chars;
+ protected abstract Document createDocument(IndexInput in);
+ protected abstract Fieldable createField(FieldInfo fi);
- public LazyField(String name, Field.Store store, int toRead, long pointer) {
- super(name, store, Field.Index.NO, Field.TermVector.NO);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
- super(name, store, index, termVector);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- /**
- * The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set.
- */
- public byte[] binaryValue() {
- if (fieldsData == null) {
- final byte[] b = new byte[toRead];
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream == null) {
- localFieldsStream = (IndexInput) fieldsStream.clone();
- fieldsStreamTL.set(localFieldsStream);
- }
- //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
- //since they are already handling this exception when getting the document
- try {
- localFieldsStream.seek(pointer);
- localFieldsStream.readBytes(b, 0, b.length);
- if (isCompressed == true) {
- fieldsData = uncompress(b);
- } else {
- fieldsData = b;
- }
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
- }
-
- /**
- * The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set.
- */
- public Reader readerValue() {
- return fieldsData instanceof Reader ? (Reader) fieldsData : null;
- }
-
- /**
- * The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set.
- */
- public String stringValue() {
- if (fieldsData == null) {
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream == null) {
- localFieldsStream = (IndexInput) fieldsStream.clone();
- fieldsStreamTL.set(localFieldsStream);
- }
- try {
- localFieldsStream.seek(pointer);
- //read in chars b/c we already know the length we need to read
- if (chars == null || toRead > chars.length)
- chars = new char[toRead];
- localFieldsStream.readChars(chars, 0, toRead);
- fieldsData = new String(chars, 0, toRead);//fieldsStream.readString();
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof String ? (String) fieldsData : null;
- }
-
- public long getPointer() {
- return pointer;
- }
-
- public void setPointer(long pointer) {
- this.pointer = pointer;
- }
-
- public int getToRead() {
- return toRead;
- }
-
- public void setToRead(int toRead) {
- this.toRead = toRead;
- }
- }
-
- private final byte[] uncompress(final byte[] input)
- throws IOException {
-
- Inflater decompressor = new Inflater();
- decompressor.setInput(input);
-
- // Create an expandable byte array to hold the decompressed data
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Decompress the data
- byte[] buf = new byte[1024];
- while (!decompressor.finished()) {
- try {
- int count = decompressor.inflate(buf);
- bos.write(buf, 0, count);
- }
- catch (DataFormatException e) {
- // this will happen if the field is not compressed
- IOException newException = new IOException("field data are in wrong format: " + e.toString());
- newException.initCause(e);
- throw newException;
- }
- }
-
- decompressor.end();
-
- // Get the decompressed data
- return bos.toByteArray();
- }
-
- // Instances of this class hold field properties and data
- // for merge
- final static class FieldForMerge extends AbstractField {
- public String stringValue() {
- return (String) this.fieldsData;
- }
-
- public Reader readerValue() {
- // not needed for merge
- return null;
- }
-
- public byte[] binaryValue() {
- return (byte[]) this.fieldsData;
- }
-
- public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
- this.isStored = true;
- this.fieldsData = value;
- this.isCompressed = compressed;
- this.isBinary = binary;
- this.isTokenized = tokenize;
-
- this.name = fi.name.intern();
- this.isIndexed = fi.isIndexed;
- this.omitNorms = fi.omitNorms;
- this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
- this.storePositionWithTermVector = fi.storePositionWithTermVector;
- this.storeTermVector = fi.storeTermVector;
- }
-
- }
}
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java (révision 449380)
+++ src/java/org/apache/lucene/index/IndexFileNames.java (copie de travail)
@@ -29,25 +29,5 @@
/** Name of the index deletable file */
static final String DELETABLE = "deletable";
-
- /**
- * This array contains all filename extensions used by Lucene's index files, with
- * one exception, namely the extension made up from .f + a number.
- * Also note that two of Lucene's files (deletable and
- * segments) don't have any filename extension.
- */
- static final String INDEX_EXTENSIONS[] = new String[] {
- "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
- "tvx", "tvd", "tvf", "tvp" };
-
- /** File extensions of old-style index files */
- static final String COMPOUND_EXTENSIONS[] = new String[] {
- "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
- };
-
- /** File extensions for term vector support */
- static final String VECTOR_EXTENSIONS[] = new String[] {
- "tvx", "tvd", "tvf"
- };
-
+
}
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/FilterIndexReader.java (révision 449380)
+++ src/java/org/apache/lucene/index/FilterIndexReader.java (copie de travail)
@@ -18,6 +18,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
Index: src/java/org/apache/lucene/index/TermVectorsReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermVectorsReader.java (révision 449380)
+++ src/java/org/apache/lucene/index/TermVectorsReader.java (copie de travail)
@@ -88,7 +88,7 @@
*/
TermFreqVector get(int docNum, String field) throws IOException {
// Check if no term vectors are available for this segment at all
- int fieldNumber = fieldInfos.fieldNumber(field);
+ int fieldNumber = fieldInfos.getIndex(field);
TermFreqVector result = null;
if (tvx != null) {
//We need to account for the FORMAT_SIZE at when seeking in the tvx
@@ -164,7 +164,7 @@
else
number += tvd.readVInt();
- fields[i] = fieldInfos.fieldName(number);
+ fields[i] = fieldInfos.getId(number);
}
// Compute position in the tvf file
Index: src/java/org/apache/lucene/index/Entry.java
===================================================================
--- src/java/org/apache/lucene/index/Entry.java (révision 0)
+++ src/java/org/apache/lucene/index/Entry.java (révision 0)
@@ -0,0 +1,57 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An antry is some a data in a Lucene-serialized table. This is the simplest
+ * kind of entry, it has an index and an ID.
+ *
+ * $Id$
+ */
+public class Entry {
+
+ private int index;
+
+ private String id;
+
+ /**
+ * Contructor
+ *
+ * @param id the id of the entry
+ * @param index the index of the entry
+ */
+ protected Entry(String id, int index) {
+ this.index = index;
+ this.id = id;
+ }
+
+ /**
+ *
+ * @return the id of the entry
+ */
+ public String getId() {
+ return id;
+ }
+
+ /**
+ *
+ * @return the index of the entry
+ */
+ public int getIndex() {
+ return index;
+ }
+}
Index: src/java/org/apache/lucene/index/TermBuffer.java
===================================================================
--- src/java/org/apache/lucene/index/TermBuffer.java (révision 449380)
+++ src/java/org/apache/lucene/index/TermBuffer.java (copie de travail)
@@ -64,7 +64,7 @@
int totalLength = start + length;
setTextLength(totalLength);
input.readChars(this.text, start, length);
- this.field = fieldInfos.fieldName(input.readVInt());
+ this.field = fieldInfos.getId(input.readVInt());
}
public final void set(Term term) {
Index: src/java/org/apache/lucene/index/FieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsWriter.java (révision 449380)
+++ src/java/org/apache/lucene/index/FieldsWriter.java (copie de travail)
@@ -16,36 +16,29 @@
* the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.util.Enumeration;
-import java.util.zip.Deflater;
+import java.util.Iterator;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
-final class FieldsWriter
-{
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
- static final byte FIELD_IS_COMPRESSED = 0x4;
-
+public abstract class FieldsWriter {
+
private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
- FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ protected FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
fieldsStream = d.createOutput(segment + ".fdt");
indexStream = d.createOutput(segment + ".fdx");
}
- final void close() throws IOException {
+ protected void close() throws IOException {
fieldsStream.close();
indexStream.close();
}
@@ -53,100 +46,29 @@
final void addDocument(Document doc) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
+ writeDocumentData(fieldsStream, doc);
+
int storedCount = 0;
- Enumeration fields = doc.fields();
- while (fields.hasMoreElements()) {
- Fieldable field = (Fieldable) fields.nextElement();
+ Iterator fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored())
storedCount++;
}
fieldsStream.writeVInt(storedCount);
- fields = doc.fields();
- while (fields.hasMoreElements()) {
- Fieldable field = (Fieldable) fields.nextElement();
- // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
- // and field.binaryValue() already returns the compressed value for a field
- // with isCompressed()==true, so we disable compression in that case
- boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
+ fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored()) {
- fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
-
- byte bits = 0;
- if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
- if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
- if (field.isCompressed())
- bits |= FieldsWriter.FIELD_IS_COMPRESSED;
-
- fieldsStream.writeByte(bits);
-
- if (field.isCompressed()) {
- // compression is enabled for the current field
- byte[] data = null;
-
- if (disableCompression) {
- // optimized case for merging, the data
- // is already compressed
- data = field.binaryValue();
- } else {
- // check if it is a binary field
- if (field.isBinary()) {
- data = compress(field.binaryValue());
- }
- else {
- data = compress(field.stringValue().getBytes("UTF-8"));
- }
- }
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- // compression is disabled for the current field
- if (field.isBinary()) {
- byte[] data = field.binaryValue();
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- fieldsStream.writeString(field.stringValue());
- }
- }
+ fieldsStream.writeVInt(fieldInfos.getIndex(field.name()));
+ writeField(field, fieldsStream);
}
}
}
- private final byte[] compress (byte[] input) {
+ abstract protected void writeDocumentData(IndexOutput out, Document doc) throws IOException;
- // Create the compressor with highest level of compression
- Deflater compressor = new Deflater();
- compressor.setLevel(Deflater.BEST_COMPRESSION);
+ abstract protected void writeField(Fieldable field, IndexOutput out) throws IOException;
- // Give the compressor the data to compress
- compressor.setInput(input);
- compressor.finish();
-
- /*
- * Create an expandable byte array to hold the compressed data.
- * You cannot use an array that's the same size as the orginal because
- * there is no guarantee that the compressed data will be smaller than
- * the uncompressed data.
- */
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Compress the data
- byte[] buf = new byte[1024];
- while (!compressor.finished()) {
- int count = compressor.deflate(buf);
- bos.write(buf, 0, count);
- }
-
- compressor.end();
-
- // Get the compressed data
- return bos.toByteArray();
- }
}
Index: src/java/org/apache/lucene/index/TermInfosWriter.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosWriter.java (révision 449380)
+++ src/java/org/apache/lucene/index/TermInfosWriter.java (copie de travail)
@@ -131,7 +131,7 @@
output.writeVInt(length); // write delta length
output.writeChars(term.text, start, length); // write delta chars
- output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
+ output.writeVInt(fieldInfos.getIndex(term.field)); // write field num
lastTerm = term;
}
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentMerger.java (révision 449380)
+++ src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail)
@@ -16,6 +16,7 @@
* limitations under the License.
*/
+import java.util.List;
import java.util.Vector;
import java.util.Iterator;
import java.util.Collection;
@@ -46,7 +47,7 @@
private Vector readers = new Vector();
private FieldInfos fieldInfos;
- /** This ctor used only by test code.
+ /** This constructor is used only by test code.
*
* @param dir The Directory to merge the other segments into
* @param name The name of the new segment
@@ -111,29 +112,31 @@
final Vector createCompoundFile(String fileName)
throws IOException {
- CompoundFileWriter cfsWriter =
- new CompoundFileWriter(directory, fileName);
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName);
- Vector files =
- new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size());
-
+ List compoundExtensions = directory.getIndexFormat().getCompoundExtensions();
+
+ Vector files = new Vector(compoundExtensions.size() + fieldInfos.size());
+
// Basic files
- for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
- files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
+ for (int i = 0; i < compoundExtensions.size(); i++) {
+ files.add(segment + "." + compoundExtensions.get(i));
}
// Fieldable norm files
for (int i = 0; i < fieldInfos.size(); i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i);
if (fi.isIndexed && !fi.omitNorms) {
files.add(segment + ".f" + i);
}
}
+ List vectorExtensions = directory.getIndexFormat().getVectorExtensions();
+
// Vector files
if (fieldInfos.hasVectors()) {
- for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) {
- files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
+ for (int i = 0; i < vectorExtensions.size(); i++) {
+ files.add(segment + "." + vectorExtensions.get(i));
}
}
@@ -177,14 +180,13 @@
}
fieldInfos.write(directory, segment + ".fnm");
- FieldsWriter fieldsWriter = // merge field values
- new FieldsWriter(directory, segment, fieldInfos);
+ FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos);
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
// in merge mode, we use this FieldSelector
FieldSelector fieldSelectorMerge = new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
- return FieldSelectorResult.LOAD_FOR_MERGE;
+ return FieldSelectorResult.LAZY_LOAD;
}
};
@@ -407,7 +409,7 @@
private void mergeNorms() throws IOException {
for (int i = 0; i < fieldInfos.size(); i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i);
if (fi.isIndexed && !fi.omitNorms) {
IndexOutput output = directory.createOutput(segment + ".f" + i);
try {
@@ -415,7 +417,7 @@
IndexReader reader = (IndexReader) readers.elementAt(j);
int maxDoc = reader.maxDoc();
byte[] input = new byte[maxDoc];
- reader.norms(fi.name, input, 0);
+ reader.norms(fi.getId(), input, 0);
for (int k = 0; k < maxDoc; k++) {
if (!reader.isDeleted(k)) {
output.writeByte(input[k]);
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (révision 449380)
+++ src/java/org/apache/lucene/index/IndexWriter.java (copie de travail)
@@ -109,7 +109,7 @@
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
- private final Directory ramDirectory = new RAMDirectory(); // for temp segs
+ private final Directory ramDirectory; // for temp segs
private Lock writeLock;
@@ -249,6 +249,7 @@
private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)
throws IOException {
+ ramDirectory = new RAMDirectory(d.getIndexFormat());
this.closeDir = closeDir;
directory = d;
analyzer = a;
Index: src/java/org/apache/lucene/index/EntryTable.java
===================================================================
--- src/java/org/apache/lucene/index/EntryTable.java (révision 0)
+++ src/java/org/apache/lucene/index/EntryTable.java (révision 0)
@@ -0,0 +1,194 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Access to a Lucene-serialized table of values
+ *
+ * $Id$
+ */
+public abstract class EntryTable {
+
+ private ArrayList byIndex = new ArrayList();
+
+ private HashMap byId = new HashMap();
+
+ /**
+ * Constructor used to populate a table from scratch
+ *
+ */
+ public EntryTable() {
+ //nothing to initilaize
+ }
+
+ /**
+ * Construct a EntryTable object using the directory and the name of the file
+ * IndexInput
+ *
+ * @param d The directory to open the IndexInput from
+ * @param name The name of the file to open the IndexInput from in the Directory
+ * @throws IOException
+ */
+ public EntryTable(Directory d, String name) throws IOException {
+ IndexInput input = d.openInput(name);
+ try {
+ read(input);
+ } finally {
+ input.close();
+ }
+ }
+
+ /**
+ * Read the table from an input stream
+ *
+ * @param input the stream to read
+ * @throws IOException in case or read error in stream
+ */
+ public void read(IndexInput input) throws IOException {
+ int size = input.readVInt();//read in the size
+ for (int i = 0; i < size; i++) {
+ Entry entry = readEntry(i, input);
+ add(entry);
+ }
+ }
+
+ /**
+ * Read an entry from an input stream
+ *
+ * @param index the index of the entry
+ * @param input the input stream to read
+ * @return the read entry
+ * @throws IOException in case of read error on the stream
+ */
+ abstract protected Entry readEntry(int index, IndexInput input) throws IOException;
+
+ /**
+ * Write the table in a segment
+ *
+ * @param d the directory to write in
+ * @param name the name of the file
+ * @throws IOException in case or read/write error in the directory
+ */
+ public void write(Directory d, String name) throws IOException {
+ IndexOutput output = d.createOutput(name);
+ try {
+ write(output);
+ } finally {
+ output.close();
+ }
+ }
+
+ /**
+ * Write the table in a stream
+ *
+ * @param output the stream to write into
+ * @throws IOException in case or read/write error in stream
+ */
+ public void write(IndexOutput output) throws IOException {
+ output.writeVInt(size());
+ for (int i = 0; i < size(); i++) {
+ writeEntry(getEntry(i), output);
+ }
+ }
+
+ /**
+ * Write an entry in the stream
+ *
+ * @param entry the netry to serialize
+ * @param output the stream to write in
+ * @throws IOException in case of write error on the stream
+ */
+ abstract protected void writeEntry(Entry entry, IndexOutput output) throws IOException;
+
+ /**
+ *
+ * @return the size of the table, aka the number of entries in there
+ */
+ public int size() {
+ return byIndex.size();
+ }
+
+ /**
+ * Return the entry object referenced by the index.
+ *
+ * @param index the request index
+ * @return the Info object or null when the given number doesn't exist.
+ */
+ public Entry getEntry(int index) {
+ if (index < 0 || index > byIndex.size() - 1) {
+ return null;
+ }
+ return (Entry) byIndex.get(index);
+ }
+
+ /**
+ * Get the number of the info from it's ID
+ *
+ * @param id the ID of the info
+ * @return the number of the info, -1 if not found
+ */
+ public int getIndex(String id) {
+ Entry entry = getEntry(id);
+ if (entry == null) {
+ return -1;
+ }
+ return entry.getIndex();
+ }
+
+ /**
+ * Get the entry from it's id
+ *
+ * @param id the ID of the info
+ * @return the entry of the table, null if not found
+ */
+ public Entry getEntry(String id) {
+ return (Entry) byId.get(id);
+ }
+
+ /**
+ * Return the ID of an entry from it's index.
+ *
+ * @param index the index of the entry
+ * @return the ID or null when the index doesn't exist.
+ */
+ public String getId(int index) {
+ Entry entry = getEntry(index);
+ if (entry == null) {
+ return null;
+ }
+ return entry.getId();
+ }
+
+ /**
+ * Add an entry to the table
+ *
+ * @param entry the ntry to add
+ */
+ public void add(Entry entry) {
+ byIndex.add(entry);
+ byId.put(entry.getId(), entry);
+ }
+
+}
Index: src/java/org/apache/lucene/index/TermVectorsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/TermVectorsWriter.java (révision 449380)
+++ src/java/org/apache/lucene/index/TermVectorsWriter.java (copie de travail)
@@ -115,8 +115,8 @@
* closed automatically.
*/
public final void openField(String field) throws IOException {
- FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
- openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
+ FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(field);
+ openField(fieldInfo.getIndex(), fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
}
private void openField(int fieldNumber, boolean storePositionWithTermVector,
@@ -205,8 +205,8 @@
if (tpVector.size() > 0 && tpVector.getOffsets(0) != null)
storeOffsetWithTermVector = true;
- FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField());
- openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
+ FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(tpVector.getField());
+ openField(fieldInfo.getIndex(), storePositionWithTermVector, storeOffsetWithTermVector);
for (int j = 0; j < tpVector.size(); j++)
addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j),
@@ -218,8 +218,8 @@
TermFreqVector tfVector = vectors[i];
- FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField());
- openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
+ FieldInfo fieldInfo = (FieldInfo) fieldInfos.getEntry(tfVector.getField());
+ openField(fieldInfo.getIndex(), storePositionWithTermVector, storeOffsetWithTermVector);
for (int j = 0; j < tfVector.size(); j++)
addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null);
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentWriter.java (révision 449380)
+++ src/java/org/apache/lucene/index/DocumentWriter.java (copie de travail)
@@ -73,8 +73,7 @@
fieldInfos.write(directory, segment + ".fnm");
// write field values
- FieldsWriter fieldsWriter =
- new FieldsWriter(directory, segment, fieldInfos);
+ FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos);
try {
fieldsWriter.addDocument(doc);
} finally {
@@ -131,7 +130,7 @@
while (fields.hasMoreElements()) {
Fieldable field = (Fieldable) fields.nextElement();
String fieldName = field.name();
- int fieldNumber = fieldInfos.fieldNumber(fieldName);
+ int fieldNumber = fieldInfos.getIndex(fieldName);
int length = fieldLengths[fieldNumber]; // length of field
int position = fieldPositions[fieldNumber]; // position in field
@@ -338,7 +337,7 @@
if (currentField != termField) {
// changing field - see if there is something to save
currentField = termField;
- FieldInfo fi = fieldInfos.fieldInfo(currentField);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(currentField);
if (fi.storeTermVector) {
if (termVectorWriter == null) {
termVectorWriter =
@@ -371,9 +370,9 @@
private final void writeNorms(String segment) throws IOException {
for(int n = 0; n < fieldInfos.size(); n++){
- FieldInfo fi = fieldInfos.fieldInfo(n);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(n);
if(fi.isIndexed && !fi.omitNorms){
- float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
+ float norm = fieldBoosts[n] * similarity.lengthNorm(fi.getId(), fieldLengths[n]);
IndexOutput norms = directory.createOutput(segment + ".f" + n);
try {
norms.writeByte(Similarity.encodeNorm(norm));
Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNameFilter.java (révision 449380)
+++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (copie de travail)
@@ -18,6 +18,7 @@
import java.io.File;
import java.io.FilenameFilter;
+import java.util.List;
/**
* Filename filter that accept filenames and extensions only created by Lucene.
@@ -27,12 +28,24 @@
*/
public class IndexFileNameFilter implements FilenameFilter {
+ private IndexFormat indexFormat;
+
+ /**
+ * Contructor
+ *
+ * @param indexFormat the format of the index
+ */
+ public IndexFileNameFilter(IndexFormat indexFormat) {
+ this.indexFormat = indexFormat;
+ }
+
/* (non-Javadoc)
* @see java.io.FilenameFilter#accept(java.io.File, java.lang.String)
*/
public boolean accept(File dir, String name) {
- for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
- if (name.endsWith("."+IndexFileNames.INDEX_EXTENSIONS[i]))
+ List IndexExtensions = indexFormat.getIndexExtensions();
+ for (int i = 0; i < IndexExtensions.size(); i++) {
+ if (name.endsWith("." + IndexExtensions.get(i)))
return true;
}
if (name.equals(IndexFileNames.DELETABLE)) return true;
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (révision 449380)
+++ src/java/org/apache/lucene/index/SegmentReader.java (copie de travail)
@@ -142,7 +142,7 @@
// No compound file exists - use the multi-file format
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
- fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+ fieldsReader = cfsDir.getIndexFormat().getFieldsReader(cfsDir, segment, fieldInfos);
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
@@ -248,14 +248,16 @@
Vector files() throws IOException {
Vector files = new Vector(16);
- for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
- String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
+ List indexExtensions = directory().getIndexFormat().getIndexExtensions();
+
+ for (int i = 0; i < indexExtensions.size(); i++) {
+ String name = segment + "." + indexExtensions.get(i);
if (directory().fileExists(name))
files.addElement(name);
}
for (int i = 0; i < fieldInfos.size(); i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i);
if (fi.isIndexed && !fi.omitNorms){
String name;
if(cfsReader == null)
@@ -322,37 +324,37 @@
Set fieldSet = new HashSet();
for (int i = 0; i < fieldInfos.size(); i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i);
if (fieldOption == IndexReader.FieldOption.ALL) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.storeTermVector == true &&
fi.storePositionWithTermVector == false &&
fi.storeOffsetWithTermVector == false &&
fieldOption == IndexReader.FieldOption.TERMVECTOR) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
- fieldSet.add(fi.name);
+ fieldSet.add(fi.getId());
}
}
return fieldSet;
@@ -433,16 +435,16 @@
private void openNorms(Directory cfsDir) throws IOException {
for (int i = 0; i < fieldInfos.size(); i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(i);
if (fi.isIndexed && !fi.omitNorms) {
// look first if there are separate norms in compound format
- String fileName = segment + ".s" + fi.number;
+ String fileName = segment + ".s" + fi.getIndex();
Directory d = directory();
if(!d.fileExists(fileName)){
- fileName = segment + ".f" + fi.number;
+ fileName = segment + ".f" + fi.getIndex();
d = cfsDir;
}
- norms.put(fi.name, new Norm(d.openInput(fileName), fi.number));
+ norms.put(fi.getId(), new Norm(d.openInput(fileName), fi.getIndex()));
}
}
}
@@ -478,7 +480,7 @@
*/
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
// Check if this field is invalid or has no stored term vector
- FieldInfo fi = fieldInfos.fieldInfo(field);
+ FieldInfo fi = (FieldInfo) fieldInfos.getEntry(field);
if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
return null;
Index: src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFFieldsWriter.java (révision 0)
@@ -0,0 +1,136 @@
+package org.apache.lucene.index.rdf;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsWriter;
+import org.apache.lucene.index.SimpleEntryTable;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * The RDF implementation of FieldsWriter
+ *
+ * $Id$
+ */
+public class RDFFieldsWriter extends FieldsWriter {
+
+ private SimpleEntryTable fieldLangInfos;
+
+ private SimpleEntryTable fieldTypeInfos;
+
+ private Directory d;
+
+ private String segment;
+
+ protected void close() throws IOException {
+ super.close();
+ fieldLangInfos.write(d, segment + ".fty");
+ fieldTypeInfos.write(d, segment + ".flg");
+ }
+
+ RDFFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ this.d = d;
+ this.segment = segment;
+ fieldLangInfos = new SimpleEntryTable();
+ fieldTypeInfos = new SimpleEntryTable();
+ }
+
+ /**
+ * Nothing is stored at the document level
+ */
+ protected void writeDocumentData(IndexOutput out, Document doc) throws IOException {
+ // nothing to write
+ }
+
+ /**
+ * Dispatch from the field implementation
+ */
+ protected void writeField(Fieldable field, IndexOutput out) throws IOException {
+ if (field instanceof RDFResourceField) {
+ writeResourceField((RDFResourceField) field, out);
+ } else if (field instanceof RDFLiteralField) {
+ writeLiteralField((RDFLiteralField) field, out);
+ } else {
+ writeDefaultField(field, out);
+ }
+ }
+
+ /**
+ * Write two integers, the lang and the type "pointers"
+ *
+ * Note that lazy field are not handled there because we store in the data some
+ * pointer to the lang and the type that may change in different segments
+ */
+ private void writeLiteralField(RDFLiteralField field, IndexOutput out) throws IOException {
+ writeBits(field, out);
+ out.writeString(field.stringValue());
+ int nLang = 0;
+ String lang = field.getLang();
+ if (lang != null) {
+ nLang = fieldLangInfos.add(lang) + 1;
+ }
+ out.writeVInt(nLang);
+
+ int nType = 0;
+ String type = field.getType();
+ if (type != null) {
+ nType = fieldTypeInfos.add(type) + 1;
+ }
+ out.writeVInt(nType);
+ }
+
+ /**
+ * Write only the bits and the value
+ */
+ private void writeResourceField(RDFResourceField field, IndexOutput out) throws IOException {
+ if (field.isLazy()) {
+ field.writeFromLazyLoading(out);
+ } else {
+ writeBits(field, out);
+ out.writeString(field.stringValue());
+ }
+ }
+
+ /**
+ * Handle unknown fieldable implementation
+ */
+ private void writeDefaultField(Fieldable field, IndexOutput out) throws IOException {
+ writeBits(field, out);
+ if (field.isBinary()) {
+ out.writeString("");
+ } else {
+ out.writeString(field.stringValue());
+ }
+ }
+
+ /**
+ * Write the bits
+ */
+ private void writeBits(Fieldable field, IndexOutput out) throws IOException {
+ byte bits = 0;
+ if (field.isTokenized()) {
+ bits |= 0x01;
+ }
+ out.writeByte(bits);
+ }
+}
Index: src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFFieldsReader.java (révision 0)
@@ -0,0 +1,66 @@
+package org.apache.lucene.index.rdf;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsReader;
+import org.apache.lucene.index.SimpleEntryTable;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * RDF implementation of FieldsReader
+ *
+ * $Id$
+ */
+public class RDFFieldsReader extends FieldsReader {
+
+ private SimpleEntryTable fieldLangInfos;
+
+ private SimpleEntryTable fieldTypeInfos;
+
+ protected RDFFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ fieldLangInfos = new SimpleEntryTable(d, segment + ".fty");
+ fieldTypeInfos = new SimpleEntryTable(d, segment + ".flg");
+ }
+
+ /**
+ * Nothing is stored at the document level
+ */
+ protected Document createDocument(IndexInput in) {
+ return new RDFDocument();
+ }
+
+ /**
+ * Dispatch the field to create from the field name
+ * - fieldResourceName -> RDFResourceField
+ * - else -> RDFLiteralField
+ */
+ protected Fieldable createField(FieldInfo fi) {
+ if (RDFIndexFormat.fieldResourceName.equals(fi.getId())) {
+ return new RDFResourceField(fi);
+ }
+ return new RDFLiteralField(fi, fieldLangInfos, fieldTypeInfos);
+ }
+
+}
Index: src/java/org/apache/lucene/index/rdf/RDFResourceField.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFResourceField.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFResourceField.java (révision 0)
@@ -0,0 +1,58 @@
+package org.apache.lucene.index.rdf;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * A field used for the URI of the RDF resource
+ *
+ * $Id$
+ */
+public class RDFResourceField extends Fieldable {
+
+ public RDFResourceField(FieldInfo fi) {
+ super(fi);
+ }
+
+ public RDFResourceField(String uri) {
+ super(RDFIndexFormat.fieldResourceName, uri, Store.YES, Index.UN_TOKENIZED, TermVector.NO);
+ }
+
+ /**
+ * The stream is only composed of a string
+ */
+ public void readStream(IndexInput in, boolean skip) throws IOException {
+ byte bits = in.readByte();
+ setTokenized((bits & 0x01) != 0);
+ setBinary(false);
+ if (skip) {
+ int toRead = in.readVInt();
+ in.skipChars(toRead); //skip the value
+ } else {
+ setData(in.readString());
+ }
+ }
+
+}
Index: src/java/org/apache/lucene/index/rdf/RDFDocument.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFDocument.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFDocument.java (révision 0)
@@ -0,0 +1,106 @@
+package org.apache.lucene.index.rdf;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An document handling RDF statments
+ *
+ * $Id$
+ */
+public class RDFDocument extends Document {
+
+ /**
+ * Constructor used when read from the index
+ */
+ public RDFDocument() {
+ super();
+ }
+
+ /**
+ * Constructor
+ *
+ * @param uri the URI of the subject of every statment
+ */
+ public RDFDocument(String uri) {
+ add(new RDFResourceField(uri));
+ }
+
+ /**
+ * Add a RDF statment to the document
+ *
+ * @param property the property of the statment
+ * @param literal the literal of the statment
+ * @param lang the lang of the literal
+ * @param type the type of the literal
+ * @param index how to index the literal
+ */
+ public void addStatment(String property, String literal, String lang, String type, Index index) {
+ add(new RDFLiteralField(property, literal, lang, type, Store.YES, index, TermVector.NO));
+ }
+
+ /**
+ *
+ * @return the uri of the main resource
+ */
+ public String getUri() {
+ return getField(RDFIndexFormat.fieldResourceName).stringValue();
+ }
+
+ /**
+ *
+ * @return a List of RDFLiteralField
+ */
+ public List getAllLiterals() {
+ List props = new ArrayList();
+ Iterator fieldsIt = getFields().iterator();
+ while (fieldsIt.hasNext()) {
+ Object f = fieldsIt.next();
+ if (f instanceof RDFLiteralField) {
+ props.add(f);
+ }
+ }
+ return props;
+ }
+
+ /**
+ * Return the literals pointed by the property
+ *
+ * @param property the URI of the property
+ * @return a List of RDFLiteralField
+ */
+ public List getLiterals(String property) {
+ List props = new ArrayList();
+ Fieldable[] fieldables = getFieldables(property);
+ for (int i = 0; i < fieldables.length; i++) {
+ Fieldable f = fieldables[i];
+ if (f instanceof RDFLiteralField) {
+ props.add(f);
+ }
+ }
+ return props;
+ }
+}
Index: src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFIndexFormat.java (révision 0)
@@ -0,0 +1,86 @@
+package org.apache.lucene.index.rdf;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.index.DefaultIndexFormat;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsReader;
+import org.apache.lucene.index.FieldsWriter;
+import org.apache.lucene.index.IndexFormat;
+import org.apache.lucene.store.Directory;
+
+/**
+ * This is an exemple of a custom implementation of index format. This format
+ * is dedicated to store RDF literals. A RDF literals have a string value, a
+ * optionanl language, and an optionnal type. This format stores the types and
+ * the languages in two custom tables. It also assume that the value is always
+ * text, and never compressed
+ *
+ * $Id$
+ */
+public class RDFIndexFormat implements IndexFormat {
+
+ /** The field name of the resource URI */
+ public final static String fieldResourceName = "__FIELD_RESSOURCE__";
+
+ private final static List INDEX_EXTENSIONS = Arrays.asList(new String[] { "fty", "flg" });
+
+ private static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fty", "flg" });
+
+ private static final List VECTOR_EXTENSIONS;
+
+ static {
+ DefaultIndexFormat defaultFrmt = new DefaultIndexFormat();
+
+ INDEX_EXTENSIONS.addAll(defaultFrmt.getIndexExtensions());
+
+ COMPOUND_EXTENSIONS.addAll(defaultFrmt.getCompoundExtensions());
+
+ VECTOR_EXTENSIONS = defaultFrmt.getVectorExtensions();
+ }
+
+ public List getIndexExtensions() {
+ return INDEX_EXTENSIONS;
+ }
+
+ public List getCompoundExtensions() {
+ return COMPOUND_EXTENSIONS;
+ }
+
+ public List getVectorExtensions() {
+ return VECTOR_EXTENSIONS;
+ }
+
+ /**
+ * Return a RDFFieldsReader
+ */
+ public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new RDFFieldsReader(d, segment, fn);
+ }
+
+ /**
+ * Return a RDFFieldsWriter
+ */
+ public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new RDFFieldsWriter(d, segment, fn);
+ }
+
+}
Index: src/java/org/apache/lucene/index/rdf/RDFLiteralField.java
===================================================================
--- src/java/org/apache/lucene/index/rdf/RDFLiteralField.java (révision 0)
+++ src/java/org/apache/lucene/index/rdf/RDFLiteralField.java (révision 0)
@@ -0,0 +1,101 @@
+package org.apache.lucene.index.rdf;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SimpleEntryTable;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * A Field used for literals. It handles a string value, an optionnal lang,
+ * and an optionnal type.
+ *
+ * $Id$
+ */
+public class RDFLiteralField extends Fieldable {
+
+ private SimpleEntryTable fieldLangInfos;
+
+ private SimpleEntryTable fieldTypeInfos;
+
+ private String lang;
+
+ private String type;
+
+ public RDFLiteralField(FieldInfo fi, SimpleEntryTable fieldLangInfos, SimpleEntryTable fieldTypeInfos) {
+ super(fi);
+ this.fieldLangInfos = fieldLangInfos;
+ this.fieldTypeInfos = fieldTypeInfos;
+ }
+
+ public RDFLiteralField(String name, String value, String lang, String type, Store store, Index index, TermVector termVector) {
+ super(name, value, store, index, termVector);
+ this.lang = lang;
+ this.type = type;
+ }
+
+ /**
+ *
+ * @return the lang of the literal, null if none
+ */
+ public String getLang() {
+ return lang;
+ }
+
+ /**
+ *
+ * @return the type of the literal, null if none
+ */
+ public String getType() {
+ return type;
+ }
+
+ /**
+ * Read the stream :
+ * - a string : the value
+ * - an integer : the pointer to the lang
+ * - an integer : the pointer to the type
+ */
+ public void readStream(IndexInput in, boolean skip) throws IOException {
+ byte bits = in.readByte();
+ setTokenized((bits & 0x01) != 0);
+ setBinary(false);
+ if (skip) {
+ int toRead = in.readVInt();
+ in.skipChars(toRead); //skip the value
+ in.readVInt(); //skip the lang
+ in.readVInt(); //skip the type
+ } else {
+ setData(in.readString());
+ int nLang = in.readVInt();
+ if (nLang != 0) {
+ lang = fieldLangInfos.getId(nLang - 1);
+ }
+ int nType = in.readVInt();
+ if (nType != 0) {
+ type = fieldTypeInfos.getId(nType - 1);
+ }
+ }
+ }
+
+}
Index: src/java/org/apache/lucene/index/DefaultFieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0)
@@ -0,0 +1,51 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * The default implementation of a FieldsReader
+ *
+ * $Id$
+ */
+public class DefaultFieldsReader extends FieldsReader {
+
+ protected DefaultFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ }
+
+ /**
+ * There is no information stored at the document level
+ */
+ protected Document createDocument(IndexInput fieldsStream) {
+ return new Document();
+ }
+
+ /**
+ * Use the class Field
+ */
+ protected Fieldable createField(FieldInfo fi) {
+ return new Field(fi);
+ }
+}
Index: src/java/org/apache/lucene/store/Directory.java
===================================================================
--- src/java/org/apache/lucene/store/Directory.java (révision 449380)
+++ src/java/org/apache/lucene/store/Directory.java (copie de travail)
@@ -18,6 +18,9 @@
import java.io.IOException;
+import org.apache.lucene.index.DefaultIndexFormat;
+import org.apache.lucene.index.IndexFormat;
+
/** A Directory is a flat list of files. Files may be written once, when they
* are created. Once a file is created it may only be opened for read, or
* deleted. Random access is permitted both when reading and writing.
@@ -41,6 +44,12 @@
* this Directory instance). */
protected LockFactory lockFactory;
+ protected IndexFormat indexFormat = new DefaultIndexFormat();
+
+ public IndexFormat getIndexFormat() {
+ return indexFormat;
+ }
+
/** Returns an array of strings, one for each file in the directory. */
public abstract String[] list()
throws IOException;
@@ -123,4 +132,5 @@
public String getLockID() {
return this.toString();
}
+
}
Index: src/java/org/apache/lucene/store/RAMDirectory.java
===================================================================
--- src/java/org/apache/lucene/store/RAMDirectory.java (révision 449380)
+++ src/java/org/apache/lucene/store/RAMDirectory.java (copie de travail)
@@ -22,6 +22,8 @@
import java.util.Hashtable;
import java.util.Enumeration;
+import org.apache.lucene.index.DefaultIndexFormat;
+import org.apache.lucene.index.IndexFormat;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -39,8 +41,21 @@
Hashtable files = new Hashtable();
- /** Constructs an empty {@link Directory}. */
+ /**
+ * Constructs an empty {@link Directory}.
+ * The index format used the the default one
+ */
public RAMDirectory() {
+ this(new DefaultIndexFormat());
+ }
+
+ /**
+ * Contructor
+ *
+ * @param indexFormat the format of the index
+ */
+ public RAMDirectory(IndexFormat indexFormat) {
+ this.indexFormat = indexFormat;
setLockFactory(new SingleInstanceLockFactory());
}
@@ -51,15 +66,32 @@
*
* This should be used only with indices that can fit into memory.
*
+ * The index format used the the default one
+ *
* @param dir a Directory value
* @exception IOException if an error occurs
*/
public RAMDirectory(Directory dir) throws IOException {
- this(dir, false);
+ this(dir, new DefaultIndexFormat());
}
-
- private RAMDirectory(Directory dir, boolean closeDir) throws IOException {
- this();
+
+ /**
+ * Creates a new RAMDirectory instance from a different
+ * Directory implementation. This can be used to load
+ * a disk-based index into memory.
+ *
+ * This should be used only with indices that can fit into memory.
+ *
+ * @param dir a Directory value
+ * @param indexFormat the format of the index
+ * @throws IOException if an error occurs
+ */
+ public RAMDirectory(Directory dir, IndexFormat indexFormat) throws IOException {
+ this(dir, false, indexFormat);
+ }
+
+ private RAMDirectory(Directory dir, boolean closeDir, IndexFormat indexFormat) throws IOException {
+ this(indexFormat);
final String[] files = dir.list();
byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE];
for (int i = 0; i < files.length; i++) {
@@ -87,22 +119,46 @@
/**
* Creates a new RAMDirectory instance from the {@link FSDirectory}.
- *
+ * The index format used the the default one
+ *
* @param dir a File specifying the index directory
*/
public RAMDirectory(File dir) throws IOException {
- this(FSDirectory.getDirectory(dir, false), true);
+ this(dir, new DefaultIndexFormat());
}
/**
* Creates a new RAMDirectory instance from the {@link FSDirectory}.
+ *
+ * @param dir a File specifying the index directory
+ * @param indexFormat the format of the index
+ * @throws IOException
+ */
+ public RAMDirectory(File dir, IndexFormat indexFormat) throws IOException {
+ this(FSDirectory.getDirectory(dir, false), true, indexFormat);
+ }
+
+ /**
+ * Creates a new RAMDirectory instance from the {@link FSDirectory}.
+ * The index format used the the default one
*
* @param dir a String specifying the full index directory path
*/
public RAMDirectory(String dir) throws IOException {
- this(FSDirectory.getDirectory(dir, false), true);
+ this(dir, new DefaultIndexFormat());
}
+ /**
+ * Creates a new RAMDirectory instance from the {@link FSDirectory}.
+ *
+ * @param dir a String specifying the full index directory path
+ * @param indexFormat the format of the index
+ * @throws IOException
+ */
+ public RAMDirectory(String dir, IndexFormat indexFormat) throws IOException {
+ this(FSDirectory.getDirectory(dir, false), true, indexFormat);
+ }
+
/** Returns an array of strings, one for each file in the directory. */
public final String[] list() {
String[] result = new String[files.size()];
@@ -183,4 +239,21 @@
public final void close() {
files = null;
}
+
+ /**
+ * For debug purpose, list every files name of this directory.
+ * The code was commented because the lockID is based on the toString() function
+ */
+// public String toString() {
+// String[] f = list();
+// StringBuffer buffer = new StringBuffer();
+// for (int i = 0; i< f.length; i++) {
+// buffer.append(f[i]);
+// if (i != f.length - 1) {
+// buffer.append(", ");
+// }
+// }
+// return buffer.toString();
+// }
+
}
Index: src/java/org/apache/lucene/store/RAMFile.java
===================================================================
--- src/java/org/apache/lucene/store/RAMFile.java (révision 449380)
+++ src/java/org/apache/lucene/store/RAMFile.java (copie de travail)
@@ -26,4 +26,28 @@
Vector buffers = new Vector();
long length;
long lastModified = System.currentTimeMillis();
+
+ /**
+ * For debug purpose
+ */
+ public String toString() {
+ StringBuffer buffer = new StringBuffer();
+ int i = 0;
+ int j = 0;
+ byte[] b = (byte[]) buffers.get(0);
+ int k = 0;
+ while (i < 200 && j < buffers.size()) {
+ buffer.append(b[k]);
+ k++;
+ if (k == b.length) {
+ k = 0;
+ j++;
+ if (j < buffers.size()) {
+ b = (byte[]) buffers.get(j);
+ }
+ }
+ i++;
+ }
+ return buffer.toString();
+ }
}
Index: src/java/org/apache/lucene/store/IndexOutput.java
===================================================================
--- src/java/org/apache/lucene/store/IndexOutput.java (révision 449380)
+++ src/java/org/apache/lucene/store/IndexOutput.java (copie de travail)
@@ -30,6 +30,17 @@
*/
public abstract void writeByte(byte b) throws IOException;
+ /**
+ * Write a byte directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeByte(byte)
+ */
+ public void writeByte(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ }
+
/** Writes an array of bytes.
* @param b the bytes to write
* @param length the number of bytes to write
@@ -37,6 +48,20 @@
*/
public abstract void writeBytes(byte[] b, int length) throws IOException;
+ /**
+ * Write a batch of bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @param length the number of bytes to write
+ * @throws IOException
+ * @see #writeBytes(byte[], int)
+ */
+ public void writeBytes(IndexInput in, long length) throws IOException {
+ while (length-- > 0) {
+ writeByte(in.readByte());
+ }
+ }
+
/** Writes an int as four bytes.
* @see IndexInput#readInt()
*/
@@ -47,6 +72,20 @@
writeByte((byte) i);
}
+ /**
+ * Writes an int as four bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeInt(int)
+ */
+ public void writeInt(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an int in a variable-length format. Writes between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -60,6 +99,22 @@
writeByte((byte)i);
}
+ /**
+ * Writes an int in a variable-length format directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeVInt(int)
+ */
+ public void writeVInt(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a long as eight bytes.
* @see IndexInput#readLong()
*/
@@ -68,6 +123,24 @@
writeInt((int) i);
}
+ /**
+ * Writes a long as eight bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeLong(long)
+ */
+ public void writeLong(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an long in a variable-length format. Writes between one and five
* bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -81,6 +154,22 @@
writeByte((byte)i);
}
+ /**
+ * Writes an long in a variable-length format directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeVLong(long)
+ */
+ public void writeVLong(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a string.
* @see IndexInput#readString()
*/
@@ -90,6 +179,19 @@
writeChars(s, 0, length);
}
+ /**
+ * Writes a string directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeString(String)
+ */
+ public void writeString(IndexInput in) throws IOException {
+ int length = in.readVInt();
+ writeVInt(length);
+ writeChars(in, length);
+ }
+
/** Writes a sequence of UTF-8 encoded characters from a string.
* @param s the source of the characters
* @param start the first character in the sequence
@@ -102,18 +204,40 @@
for (int i = start; i < end; i++) {
final int code = (int)s.charAt(i);
if (code >= 0x01 && code <= 0x7F)
- writeByte((byte)code);
+ writeByte((byte)code);
else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
- writeByte((byte)(0xC0 | (code >> 6)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xC0 | (code >> 6)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
} else {
- writeByte((byte)(0xE0 | (code >>> 12)));
- writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xE0 | (code >>> 12)));
+ writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
}
}
}
+ /**
+ * Writes a sequence of UTF-8 encoded characters directly from an input stream.
+ *
+ * @param in the stream to read
+ * @param length the number of characters in the sequence
+ * @throws IOException
+ * @see #writeChars(String,int,int)
+ */
+ public void writeChars(IndexInput in, int length)
+ throws IOException {
+ for (int i = 0; i < length; i++) {
+ byte b = in.readByte();
+ writeByte(b);
+ if ((b & 0x80) != 0) {
+ writeByte(in.readByte());
+ if ((b & 0xE0) == 0xE0) {
+ writeByte(in.readByte());
+ }
+ }
+ }
+ }
+
/** Forces any buffered output to be written. */
public abstract void flush() throws IOException;
Index: src/java/org/apache/lucene/store/FSDirectory.java
===================================================================
--- src/java/org/apache/lucene/store/FSDirectory.java (révision 449380)
+++ src/java/org/apache/lucene/store/FSDirectory.java (copie de travail)
@@ -25,7 +25,9 @@
import java.security.NoSuchAlgorithmException;
import java.util.Hashtable;
+import org.apache.lucene.index.DefaultIndexFormat;
import org.apache.lucene.index.IndexFileNameFilter;
+import org.apache.lucene.index.IndexFormat;
/**
* Straightforward implementation of {@link Directory} as a directory of files.
@@ -121,21 +123,41 @@
*
Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. - * + * The index format used is the default one. + * * @param path the path to the directory. * @param create if true, create, or erase any existing contents. * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(String path, boolean create) throws IOException { - return getDirectory(path, create, null); + return getDirectory(path, create, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location. + * + *
Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param path the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(String path, boolean create, IndexFormat indexFormat) + throws IOException { + return getDirectory(path, create, null, indexFormat); + } + /** Returns the directory instance for the named location, using the * provided LockFactory implementation. * *
Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. + * The index format used is the default one. * * @param path the path to the directory. * @param create if true, create, or erase any existing contents. @@ -145,23 +167,64 @@ public static FSDirectory getDirectory(String path, boolean create, LockFactory lockFactory) throws IOException { - return getDirectory(new File(path), create, lockFactory); + return getDirectory(path, create, lockFactory, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location, using the + * provided LockFactory implementation. + * + *
Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param path the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param lockFactory instance of {@link LockFactory} providing the + * locking implementation. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(String path, boolean create, + LockFactory lockFactory, IndexFormat indexFormat) + throws IOException { + return getDirectory(new File(path), create, lockFactory, indexFormat); + } + /** Returns the directory instance for the named location. * *
Directories are cached, so that, for a given canonical path, the same * FSDirectory instance will always be returned. This permits * synchronization on directories. + * The index format used is the default one. * * @param file the path to the directory. * @param create if true, create, or erase any existing contents. * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(File file, boolean create) - throws IOException { - return getDirectory(file, create, null); + throws IOException { + return getDirectory(file, create, new DefaultIndexFormat()); } + /** + * Returns the directory instance for the named location. + * + *
Directories are cached, so that, for a given canonical path, the same
+ * FSDirectory instance will always be returned. This permits
+ * synchronization on directories.
+ *
+ * @param file the path to the directory.
+ * @param create if true, create, or erase any existing contents.
+ * @param indexFormat the format of index
+ * @return the FSDirectory for the named file.
+ * @throws IOException
+ */
+ public static FSDirectory getDirectory(File file, boolean create, IndexFormat indexFormat)
+ throws IOException {
+ return getDirectory(file, create, null, indexFormat);
+ }
+
/** Returns the directory instance for the named location, using the
* provided LockFactory implementation.
*
@@ -173,9 +236,12 @@
* @param create if true, create, or erase any existing contents.
* @param lockFactory instance of {@link LockFactory} providing the
* locking implementation.
- * @return the FSDirectory for the named file. */
+ * @param indexFormat the format of index
+ * @return the FSDirectory for the named file.
+ * @throws IOException
+ */
public static FSDirectory getDirectory(File file, boolean create,
- LockFactory lockFactory)
+ LockFactory lockFactory, IndexFormat indexFormat)
throws IOException {
file = new File(file.getCanonicalPath());
FSDirectory dir;
@@ -187,7 +253,7 @@
} catch (Exception e) {
throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e);
}
- dir.init(file, create, lockFactory);
+ dir.init(file, create, lockFactory, indexFormat);
DIRECTORIES.put(file, dir);
} else {
@@ -224,8 +290,10 @@
throw new IOException(path + " not a directory");
}
- private void init(File path, boolean create, LockFactory lockFactory) throws IOException {
+ private void init(File path, boolean create, LockFactory lockFactory, IndexFormat indexFormat) throws IOException {
+ this.indexFormat = indexFormat;
+
// Set up lockFactory with cascaded defaults: if an instance was passed in,
// use that; else if locks are disabled, use NoLockFactory; else if the
// system property org.apache.lucene.lockClass is set, instantiate that;
@@ -290,7 +358,7 @@
if (!directory.isDirectory())
throw new IOException(directory + " not a directory");
- String[] files = directory.list(new IndexFileNameFilter()); // clear old files
+ String[] files = directory.list(new IndexFileNameFilter(getIndexFormat())); // clear old files
if (files == null)
throw new IOException("Cannot read directory " + directory.getAbsolutePath());
for (int i = 0; i < files.length; i++) {
@@ -304,7 +372,7 @@
/** Returns an array of strings, one for each Lucene index file in the directory. */
public String[] list() {
- return directory.list(new IndexFileNameFilter());
+ return directory.list(new IndexFileNameFilter(getIndexFormat()));
}
/** Returns true iff a file with the given name exists. */
Index: src/java/org/apache/lucene/document/Field.java
===================================================================
--- src/java/org/apache/lucene/document/Field.java (révision 449380)
+++ src/java/org/apache/lucene/document/Field.java (copie de travail)
@@ -16,11 +16,17 @@
* limitations under the License.
*/
-import org.apache.lucene.util.Parameter;
-
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Parameter;
+
/**
A field is a section of a Document. Each field has two parts, a name and a
value. Values may be free text, provided as a String or as a Reader, or they
@@ -29,8 +35,10 @@
index, so that they may be returned with hits on the document.
*/
-public final class Field extends AbstractField implements Fieldable, Serializable {
-
+public final class Field extends Fieldable implements Serializable {
+
+ private boolean isCompressed;
+
/** Specifies whether and how a field should be stored. */
public static final class Store extends Parameter implements Serializable {
@@ -127,22 +135,10 @@
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
}
-
- /** The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set. */
- public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
-
- /** The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set. */
- public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
-
- /** The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set. */
- public byte[] binaryValue() { return fieldsData instanceof byte[] ? (byte[])fieldsData : null; }
-
+ public Field(FieldInfo fi) {
+ super(fi);
+ }
+
/**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
@@ -177,57 +173,9 @@
*
*/
public Field(String name, String value, Store store, Index index, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (value == null)
- throw new NullPointerException("value cannot be null");
+ super(name, value, store, index, termVector);
if (name.length() == 0 && value.length() == 0)
throw new IllegalArgumentException("name and value cannot both be empty");
- if (index == Index.NO && store == Store.NO)
- throw new IllegalArgumentException("it doesn't make sense to have a field that "
- + "is neither indexed nor stored");
- if (index == Index.NO && termVector != TermVector.NO)
- throw new IllegalArgumentException("cannot store term vector information "
- + "for a field that is not indexed");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = value;
-
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- if (index == Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
}
/**
@@ -252,23 +200,7 @@
* @throws NullPointerException if name or reader is null
*/
public Field(String name, Reader reader, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (reader == null)
- throw new NullPointerException("reader cannot be null");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
- this.isCompressed = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ super(name, reader, Store.NO, Index.TOKENIZED, termVector);
}
/**
@@ -280,34 +212,163 @@
* @throws IllegalArgumentException if store is Store.NO
*/
public Field(String name, byte[] value, Store store) {
- if (name == null)
- throw new IllegalArgumentException("name cannot be null");
- if (value == null)
- throw new IllegalArgumentException("value cannot be null");
-
- this.name = name.intern();
- this.fieldsData = value;
-
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
+ super(name, value, store, Index.NO, TermVector.NO);
+ }
+
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ setStored(true);
+ isCompressed = false;
+ } else if (store == Field.Store.COMPRESS) {
+ setStored(true);
+ isCompressed = true;
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ setStored(false);
+ isCompressed = false;
+ } else {
+ throw new IllegalArgumentException("unknown store parameter " + store);
}
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
+ }
+
+ /** Prints a Field for human consumption. */
+ public String toString() {
+ StringBuffer result = new StringBuffer();
+ if (isStored()) {
+ result.append("stored");
+ if (isCompressed)
+ result.append("/compressed");
+ else
+ result.append("/uncompressed");
}
- else if (store == Store.NO)
- throw new IllegalArgumentException("binary values can't be unstored");
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- this.isIndexed = false;
- this.isTokenized = false;
-
- this.isBinary = true;
-
- setStoreTermVector(TermVector.NO);
+ if (isIndexed()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("indexed");
+ }
+ if (isTokenized()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("tokenized");
+ }
+ if (isTermVectorStored()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVector");
+ }
+ if (isStoreOffsetWithTermVector()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorOffsets");
+ }
+ if (isStorePositionWithTermVector()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorPosition");
+ }
+ if (isBinary()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("binary");
+ }
+ if (getOmitNorms()) {
+ result.append(",omitNorms");
+ }
+ if (isLazy()) {
+ result.append(",lazy");
+ }
+ result.append('<');
+ result.append(name());
+ result.append(':');
+
+ result.append(getData());
+
+ result.append('>');
+ return result.toString();
}
+ /** True if the value of the field is stored and compressed within the index */
+ public final boolean isCompressed() {
+ return isCompressed;
+ }
+ public void setCompressed(boolean isCompressed) {
+ this.isCompressed = isCompressed;
+ }
+
+ public static final byte FIELD_IS_TOKENIZED = 0x1;
+
+ public static final byte FIELD_IS_BINARY = 0x2;
+
+ public static final byte FIELD_IS_COMPRESSED = 0x4;
+
+ public void readStream(IndexInput in, boolean skip) throws IOException {
+ byte bits = in.readByte();
+ isCompressed = (bits & FIELD_IS_COMPRESSED) != 0;
+ setTokenized((bits & FIELD_IS_TOKENIZED) != 0);
+ setBinary((bits & FIELD_IS_BINARY) != 0);
+
+ if (skip) {
+ int toRead = in.readVInt();
+ if (isBinary() || isCompressed()) {
+ long pointer = in.getFilePointer();
+ //Need to move the pointer ahead by toRead positions
+ in.seek(pointer + toRead);
+ } else {
+ //Skip ahead of where we are by the length of what is stored
+ in.skipChars(toRead);
+ }
+ } else {
+ if (isBinary()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ if (isCompressed()) {
+ setData(uncompress(b));
+ } else {
+ setData(b);
+ }
+ } else {
+ if (isCompressed()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ setData(new String(uncompress(b), "UTF-8"));
+ } else {
+ setData(in.readString()); // read value
+ }
+ }
+ }
+ }
+
+ protected byte[] uncompress(final byte[] input) throws IOException {
+
+ Inflater decompressor = new Inflater();
+ decompressor.setInput(input);
+
+ // Create an expandable byte array to hold the decompressed data
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.finished()) {
+ try {
+ int count = decompressor.inflate(buf);
+ bos.write(buf, 0, count);
+ } catch (DataFormatException e) {
+ // this will happen if the field is not compressed
+ IOException newException = new IOException("field data are in wrong format: " + e.toString());
+ newException.initCause(e);
+ throw newException;
+ }
+ }
+
+ decompressor.end();
+
+ // Get the decompressed data
+ return bos.toByteArray();
+ }
+
}
Index: src/java/org/apache/lucene/document/AbstractField.java
===================================================================
--- src/java/org/apache/lucene/document/AbstractField.java (révision 449380)
+++ src/java/org/apache/lucene/document/AbstractField.java (copie de travail)
@@ -1,274 +0,0 @@
-package org.apache.lucene.document;
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/**
- *
- *
- **/
-public abstract class AbstractField implements Fieldable {
-
- protected String name = "body";
- protected boolean storeTermVector = false;
- protected boolean storeOffsetWithTermVector = false;
- protected boolean storePositionWithTermVector = false;
- protected boolean omitNorms = false;
- protected boolean isStored = false;
- protected boolean isIndexed = true;
- protected boolean isTokenized = true;
- protected boolean isBinary = false;
- protected boolean isCompressed = false;
- protected boolean lazy = false;
- protected float boost = 1.0f;
- // the one and only data object for all different kind of field values
- protected Object fieldsData = null;
-
- protected AbstractField()
- {
-
- }
-
- protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- this.name = name.intern(); // field names are interned
-
- if (store == Field.Store.YES){
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Field.Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Field.Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- if (index == Field.Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Field.Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Field.Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Field.Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
- }
-
- /** Sets the boost factor hits on this field. This value will be
- * multiplied into the score of all hits on this this field of this
- * document.
- *
- *
The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } - - /** Returns the boost factor for hits for this field. - * - *
The default value is 1.0. - * - *
Note: this value is not stored directly with the document in the index.
- * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
- * this field was indexed.
- *
- * @see #setBoost(float)
- */
- public float getBoost() {
- return boost;
- }
-
- /** Returns the name of the field as an interned string.
- * For example "date", "title", "body", ...
- */
- public String name() { return name; }
-
- protected void setStoreTermVector(Field.TermVector termVector) {
- if (termVector == Field.TermVector.NO) {
- this.storeTermVector = false;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == Field.TermVector.YES) {
- this.storeTermVector = true;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == Field.TermVector.WITH_POSITIONS) {
- this.storeTermVector = true;
- this.storePositionWithTermVector = true;
- this.storeOffsetWithTermVector = false;
- }
- else if (termVector == Field.TermVector.WITH_OFFSETS) {
- this.storeTermVector = true;
- this.storePositionWithTermVector = false;
- this.storeOffsetWithTermVector = true;
- }
- else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
- this.storeTermVector = true;
- this.storePositionWithTermVector = true;
- this.storeOffsetWithTermVector = true;
- }
- else {
- throw new IllegalArgumentException("unknown termVector parameter " + termVector);
- }
- }
-
- /** True iff the value of the field is to be stored in the index for return
- with search hits. It is an error for this to be true if a field is
- Reader-valued. */
- public final boolean isStored() { return isStored; }
-
- /** True iff the value of the field is to be indexed, so that it may be
- searched on. */
- public final boolean isIndexed() { return isIndexed; }
-
- /** True iff the value of the field should be tokenized as text prior to
- indexing. Un-tokenized fields are indexed as a single word and may not be
- Reader-valued. */
- public final boolean isTokenized() { return isTokenized; }
-
- /** True if the value of the field is stored and compressed within the index */
- public final boolean isCompressed() { return isCompressed; }
-
- /** True iff the term or terms used to index this field are stored as a term
- * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
- * These methods do not provide access to the original content of the field,
- * only to terms used to index it. If the original content must be
- * preserved, use the stored attribute instead.
- *
- * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
- */
- public final boolean isTermVectorStored() { return storeTermVector; }
-
- /**
- * True iff terms are stored as term vector together with their offsets
- * (start and end positon in source text).
- */
- public boolean isStoreOffsetWithTermVector(){
- return storeOffsetWithTermVector;
- }
-
- /**
- * True iff terms are stored as term vector together with their token positions.
- */
- public boolean isStorePositionWithTermVector(){
- return storePositionWithTermVector;
- }
-
- /** True iff the value of the filed is stored as binary */
- public final boolean isBinary() { return isBinary; }
-
- /** True if norms are omitted for this indexed field */
- public boolean getOmitNorms() { return omitNorms; }
-
- /** Expert:
- *
- * If set, omit normalization factors associated with this indexed field.
- * This effectively disables indexing boosts and length normalization for this field.
- */
- public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
-
- public boolean isLazy() {
- return lazy;
- }
-
- /** Prints a Field for human consumption. */
- public final String toString() {
- StringBuffer result = new StringBuffer();
- if (isStored) {
- result.append("stored");
- if (isCompressed)
- result.append("/compressed");
- else
- result.append("/uncompressed");
- }
- if (isIndexed) {
- if (result.length() > 0)
- result.append(",");
- result.append("indexed");
- }
- if (isTokenized) {
- if (result.length() > 0)
- result.append(",");
- result.append("tokenized");
- }
- if (storeTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVector");
- }
- if (storeOffsetWithTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorOffsets");
- }
- if (storePositionWithTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorPosition");
- }
- if (isBinary) {
- if (result.length() > 0)
- result.append(",");
- result.append("binary");
- }
- if (omitNorms) {
- result.append(",omitNorms");
- }
- if (lazy){
- result.append(",lazy");
- }
- result.append('<');
- result.append(name);
- result.append(':');
-
- if (fieldsData != null && lazy == false) {
- result.append(fieldsData);
- }
-
- result.append('>');
- return result.toString();
- }
-}
Index: src/java/org/apache/lucene/document/Fieldable.java
===================================================================
--- src/java/org/apache/lucene/document/Fieldable.java (révision 446873)
+++ src/java/org/apache/lucene/document/Fieldable.java (copie de travail)
@@ -1,137 +1,450 @@
-package org.apache.lucene.document;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import java.io.Serializable;
-
-/**
- * Synonymous with {@link Field}.
- *
- **/
-public interface Fieldable extends Serializable {
- /** Sets the boost factor hits on this field. This value will be
- * multiplied into the score of all hits on this this field of this
- * document.
- *
- *
The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and - * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) - * @see org.apache.lucene.search.Similarity#encodeNorm(float) - */ - void setBoost(float boost); - - /** Returns the boost factor for hits for this field. - * - *
The default value is 1.0. - * - *
Note: this value is not stored directly with the document in the index.
- * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
- * this field was indexed.
- *
- * @see #setBoost(float)
- */
- float getBoost();
-
- /** Returns the name of the field as an interned string.
- * For example "date", "title", "body", ...
- */
- String name();
-
- /** The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set. */
- String stringValue();
-
- /** The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set. */
- Reader readerValue();
-
- /** The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set. */
- byte[] binaryValue();
-
- /** True iff the value of the field is to be stored in the index for return
- with search hits. It is an error for this to be true if a field is
- Reader-valued. */
- boolean isStored();
-
- /** True iff the value of the field is to be indexed, so that it may be
- searched on. */
- boolean isIndexed();
-
- /** True iff the value of the field should be tokenized as text prior to
- indexing. Un-tokenized fields are indexed as a single word and may not be
- Reader-valued. */
- boolean isTokenized();
-
- /** True if the value of the field is stored and compressed within the index */
- boolean isCompressed();
-
- /** True iff the term or terms used to index this field are stored as a term
- * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
- * These methods do not provide access to the original content of the field,
- * only to terms used to index it. If the original content must be
- * preserved, use the stored attribute instead.
- *
- * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
- */
- boolean isTermVectorStored();
-
- /**
- * True iff terms are stored as term vector together with their offsets
- * (start and end positon in source text).
- */
- boolean isStoreOffsetWithTermVector();
-
- /**
- * True iff terms are stored as term vector together with their token positions.
- */
- boolean isStorePositionWithTermVector();
-
- /** True iff the value of the filed is stored as binary */
- boolean isBinary();
-
- /** True if norms are omitted for this indexed field */
- boolean getOmitNorms();
-
- /** Expert:
- *
- * If set, omit normalization factors associated with this indexed field.
- * This effectively disables indexing boosts and length normalization for this field.
- */
- void setOmitNorms(boolean omitNorms);
-
- /**
- * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
- * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
- * retrieved the {@link Document} is still open.
- *
- * @return true if this field can be loaded lazily
- */
- boolean isLazy();
-}
+package org.apache.lucene.document;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldReaderException;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ *
+ *
+ */
+public abstract class Fieldable {
+
+ private String name = "body";
+
+ private boolean storeTermVector = false;
+
+ private boolean storeOffsetWithTermVector = false;
+
+ private boolean storePositionWithTermVector = false;
+
+ private boolean omitNorms = false;
+
+ private boolean isIndexed = true;
+
+ private float boost = 1.0f;
+
+ private boolean isStored = true;
+
+ private boolean isTokenized = true;
+
+ private boolean isBinary = false;
+
+ // the one and only data object for all different kind of field values
+ private Object fieldData = null;
+
+ private boolean isLazy = false;
+
+ private IndexInput fieldsStream;
+
+ private long pointer;
+
+ private long toRead;
+
+ protected Fieldable(FieldInfo fi) {
+ this.name = fi.getId();
+ storeTermVector = fi.storeTermVector();
+ storeOffsetWithTermVector = fi.storeOffsetWithTermVector();
+ storePositionWithTermVector = fi.storePositionWithTermVector();
+ omitNorms = fi.omitNorms();
+ }
+
+ protected Fieldable(String name, String text, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ this(name, (Object) text, store, index, termVector);
+ isBinary = false;
+ }
+
+ protected Fieldable(String name, byte[] data, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ this(name, (Object) data, store, index, termVector);
+ isBinary = true;
+ }
+
+ protected Fieldable(String name, Reader reader, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ this(name, (Object) reader, store, index, termVector);
+ isBinary = true;
+ }
+
+ private Fieldable(String name, Object data, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (data == null)
+ throw new NullPointerException("data cannot be null");
+
+ this.name = name.intern(); // field names are interned
+
+ fieldData = data;
+
+ if (index == Field.Index.NO && store == Field.Store.NO) {
+ throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
+ }
+ if (index == Field.Index.NO && termVector != Field.TermVector.NO) {
+ throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed");
+ }
+
+ setStore(store);
+ setIndex(index);
+ setStoreTermVector(termVector);
+ }
+
+ /** Sets the boost factor hits on this field. This value will be
+ * multiplied into the score of all hits on this this field of this
+ * document.
+ *
+ *
The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then + * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and + * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#encodeNorm(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** Returns the boost factor for hits for this field. + * + *
The default value is 1.0. + * + *
Note: this value is not stored directly with the document in the index.
+ * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
+ * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
+ * this field was indexed.
+ *
+ * @see #setBoost(float)
+ */
+ public float getBoost() {
+ return boost;
+ }
+
+ /** Returns the name of the field as an interned string.
+ * For example "date", "title", "body", ...
+ */
+ public String name() {
+ return name;
+ }
+
+ protected void setStoreTermVector(Field.TermVector termVector) {
+ if (termVector == Field.TermVector.NO) {
+ this.storeTermVector = false;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ } else if (termVector == Field.TermVector.YES) {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ } else if (termVector == Field.TermVector.WITH_POSITIONS) {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = true;
+ this.storeOffsetWithTermVector = false;
+ } else if (termVector == Field.TermVector.WITH_OFFSETS) {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = true;
+ } else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = true;
+ this.storeOffsetWithTermVector = true;
+ } else {
+ throw new IllegalArgumentException("unknown termVector parameter " + termVector);
+ }
+ }
+
+ protected void setIndex(Field.Index index) {
+ if (index == Field.Index.NO) {
+ isIndexed = false;
+ isTokenized = false;
+ } else if (index == Field.Index.TOKENIZED) {
+ isIndexed = true;
+ isTokenized = true;
+ } else if (index == Field.Index.UN_TOKENIZED) {
+ isIndexed = true;
+ isTokenized = false;
+ } else if (index == Field.Index.NO_NORMS) {
+ isIndexed = true;
+ isTokenized = false;
+ omitNorms = true;
+ } else {
+ throw new IllegalArgumentException("unknown index parameter " + index);
+ }
+ }
+
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ isStored = true;
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ isStored = false;
+ } else {
+ throw new IllegalArgumentException("unknown store parameter " + store);
+ }
+ }
+
+ /** True iff the value of the field is to be stored in the index for return
+ with search hits. It is an error for this to be true if a field is
+ Reader-valued. */
+ public final boolean isStored() {
+ return isStored;
+ }
+
+ protected final void setStored(boolean isStored) {
+ this.isStored = isStored;
+ }
+
+ /** True iff the value of the field is to be indexed, so that it may be
+ searched on. */
+ public final boolean isIndexed() {
+ return isIndexed;
+ }
+
+ protected final void setIndexed(boolean isIndexed) {
+ this.isIndexed = isIndexed;
+ }
+
+ /** True iff the value of the field should be tokenized as text prior to
+ indexing. Un-tokenized fields are indexed as a single word and may not be
+ Reader-valued. */
+ public final boolean isTokenized() {
+ return isTokenized;
+ }
+
+ protected final void setTokenized(boolean isTokenized) {
+ this.isTokenized = isTokenized;
+ }
+
+ /** True iff the term or terms used to index this field are stored as a term
+ * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
+ * These methods do not provide access to the original content of the field,
+ * only to terms used to index it. If the original content must be
+ * preserved, use the stored attribute instead.
+ *
+ * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
+ */
+ public final boolean isTermVectorStored() {
+ return storeTermVector;
+ }
+
+ /**
+ * True iff terms are stored as term vector together with their offsets
+ * (start and end positon in source text).
+ */
+ public boolean isStoreOffsetWithTermVector() {
+ return storeOffsetWithTermVector;
+ }
+
+ /**
+ * True iff terms are stored as term vector together with their token positions.
+ */
+ public boolean isStorePositionWithTermVector() {
+ return storePositionWithTermVector;
+ }
+
+ /** True iff the value of the filed is stored as binary */
+ public final boolean isBinary() {
+ return isBinary;
+ }
+
+ protected final void setBinary(boolean isBinary) {
+ this.isBinary = isBinary;
+ }
+
+ /** True if norms are omitted for this indexed field */
+ public boolean getOmitNorms() {
+ return omitNorms;
+ }
+
+ /** Expert:
+ *
+ * If set, omit normalization factors associated with this indexed field.
+ * This effectively disables indexing boosts and length normalization for this field.
+ */
+ public void setOmitNorms(boolean omitNorms) {
+ this.omitNorms = omitNorms;
+ }
+
+ /**
+ * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+ * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
+ * retrieved the {@link Document} is still open.
+ *
+ * @return true if this field can be loaded lazily
+ */
+ public boolean isLazy() {
+ return isLazy;
+ }
+
+ /** Prints a Field for human consumption. */
+ public String toString() {
+ StringBuffer result = new StringBuffer();
+ if (isStored()) {
+ result.append("stored");
+ }
+ if (isIndexed) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("indexed");
+ }
+ if (isTokenized()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("tokenized");
+ }
+ if (storeTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVector");
+ }
+ if (storeOffsetWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorPosition");
+ }
+ if (isBinary()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("binary");
+ }
+ if (omitNorms) {
+ result.append(",omitNorms");
+ }
+ if (isLazy()) {
+ result.append(",lazy");
+ }
+ result.append('<');
+ result.append(name);
+ result.append(':');
+
+ result.append(fieldData);
+
+ result.append('>');
+ return result.toString();
+ }
+
+ /** The value of the field as a String, or null. If null, the Reader value
+ * or binary value is used. Exactly one of stringValue(), readerValue(), and
+ * binaryValue() must be set. */
+ public final String stringValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof String ? (String) fieldData : null;
+ }
+
+ /** The value of the field as a Reader, or null. If null, the String value
+ * or binary value is used. Exactly one of stringValue(), readerValue(),
+ * and binaryValue() must be set. */
+ public final Reader readerValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof Reader ? (Reader) fieldData : null;
+ }
+
+ /** The value of the field in Binary, or null. If null, the Reader or
+ * String value is used. Exactly one of stringValue(), readerValue() and
+ * binaryValue() must be set. */
+ public final byte[] binaryValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof byte[] ? (byte[]) fieldData : null;
+ }
+
+ /**
+ *
+ * @param fieldData the new data of the field
+ */
+ protected void setData(Object fieldData) {
+ this.fieldData = fieldData;
+ }
+
+ /**
+ *
+ * @return the data of the field
+ */
+ protected Object getData() {
+ return fieldData;
+ }
+
+ /**
+ * Load the field data from the stream
+ *
+ * @param in the stream to read
+ * @param skip if the data have to be stored, or just skipped from the stream
+ * @throws IOException
+ */
+ public abstract void readStream(IndexInput in, boolean skip) throws IOException;
+
+ private final void readLazyData() {
+ try {
+ fieldsStream.seek(pointer);
+ readStream(fieldsStream, false);
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
+ }
+ }
+
+ /**
+ * Set this field as lazy loaded, and save the stream status
+ *
+ * FIXME : this function shound't be public, and only be called from FieldsReader. But as
+ * FieldsReader is not in the same package, and is not extending it, the only possible
+ * scope is 'public'
+ *
+ * @param fieldsStream the field stream
+ * @param pointer the pointer of the field data
+ * @param toRead the number of byte of the field data
+ */
+ public void setLazyData(IndexInput fieldsStream, long pointer, long toRead) {
+ isLazy = true;
+ this.fieldsStream = fieldsStream;
+ this.pointer = pointer;
+ this.toRead = toRead;
+ }
+
+ /**
+ * Write the lazy loaded field data directly in the specified output stream.
+ * If the field has not been loaded lazily, it throws an UnsupportedOperationException.
+ *
+ * @param out the stream to write in
+ * @throws IOException in case of write error
+ */
+ public final void writeFromLazyLoading(IndexOutput out) throws IOException {
+ if (!isLazy) {
+ throw new UnsupportedOperationException("The field have to be load lazily to copy it directly");
+ }
+ fieldsStream.seek(pointer);
+ out.writeBytes(fieldsStream, toRead);
+ }
+}
Index: src/java/org/apache/lucene/document/Document.java
===================================================================
--- src/java/org/apache/lucene/document/Document.java (révision 449380)
+++ src/java/org/apache/lucene/document/Document.java (copie de travail)
@@ -36,8 +36,8 @@
* IndexReader#document(int)}.
*/
-public final class Document implements java.io.Serializable {
- List fields = new Vector();
+public class Document implements java.io.Serializable {
+ protected List fields = new Vector();
private float boost = 1.0f;
/** Constructs a new document with no fields. */