Index: src/test/org/apache/lucene/store/IndexInputTest.java
===================================================================
--- src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
+++ src/test/org/apache/lucene/store/IndexInputTest.java (révision 0)
@@ -0,0 +1,104 @@
+package org.apache.lucene.store;
+
+import junit.framework.TestCase;
+
+public class IndexInputTest extends TestCase {
+
+ public void testInt() throws Exception {
+ genericTestInt(0);
+ genericTestInt(1);
+ genericTestInt(-1);
+ genericTestInt(Integer.MAX_VALUE);
+ genericTestInt(Integer.MIN_VALUE);
+ }
+
+ public void testVInt() throws Exception {
+ genericTestVInt(0);
+ genericTestVInt(1);
+ genericTestVInt(-1);
+ genericTestVInt(Integer.MAX_VALUE);
+ genericTestVInt(Integer.MIN_VALUE);
+ }
+
+ public void testLong() throws Exception {
+ genericTestLong(0);
+ genericTestLong(1);
+ genericTestLong(-1);
+ genericTestLong(Long.MAX_VALUE);
+ genericTestLong(Long.MIN_VALUE);
+ }
+
+ public void testVLong() throws Exception {
+ genericTestVLong(0);
+ genericTestVLong(1);
+ genericTestVLong(-1);
+ genericTestVLong(Long.MAX_VALUE);
+ genericTestVLong(Long.MIN_VALUE);
+ }
+
+ public void testString() throws Exception {
+ genericTestString("");
+ genericTestString("a");
+ genericTestString("GiyNNKHhnivNKKHgcNiCniCH716534912é_è'-(é(_çà-é$*ù!:;,!:;,");
+ }
+
+ private void genericTestInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readInt());
+ }
+
+ private void genericTestVInt(int i) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVInt(i);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVInt(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(i, new RAMInputStream(fileB).readVInt());
+ }
+
+ private void genericTestLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readLong());
+ }
+
+ private void genericTestVLong(long l) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeVLong(l);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeVLong(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(l, new RAMInputStream(fileB).readVLong());
+ }
+
+ private void genericTestString(String s) throws Exception {
+ RAMFile fileA = new RAMFile();
+ RAMFile fileB = new RAMFile();
+ RAMOutputStream outA = new RAMOutputStream(fileA);
+ outA.writeString(s);
+ outA.close();
+ RAMOutputStream outB = new RAMOutputStream(fileB);
+ outB.writeString(new RAMInputStream(fileA));
+ outB.close();
+ assertEquals(s, new RAMInputStream(fileB).readString());
+ }
+}
Index: src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestFieldsReader.java (révision 493446)
+++ src/test/org/apache/lucene/index/TestFieldsReader.java (copie de travail)
@@ -59,7 +59,7 @@
public void test() throws IOException {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Document doc = reader.doc(0, null);
@@ -89,7 +89,7 @@
public void testLazyFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Set loadFieldNames = new HashSet();
@@ -137,7 +137,7 @@
public void testLoadFirst() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
@@ -183,7 +183,7 @@
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
for (int i = 0; i < length; i++) {
- reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ reader = tmpDir.getIndexFormat().getFieldsReader(tmpDir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
@@ -207,7 +207,7 @@
doc = null;
//Hmmm, are we still in cache???
System.gc();
- reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ reader = tmpDir.getIndexFormat().getFieldsReader(tmpDir, "test", fieldInfos);
doc = reader.doc(0, fieldSelector);
field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is not lazy", field.isLazy() == true);
Index: src/java/org/apache/lucene/index/FieldInfo.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfo.java (révision 493446)
+++ src/java/org/apache/lucene/index/FieldInfo.java (copie de travail)
@@ -17,7 +17,7 @@
* limitations under the License.
*/
-final class FieldInfo {
+final public class FieldInfo {
String name;
boolean isIndexed;
int number;
@@ -39,4 +39,28 @@
this.storePositionWithTermVector = storePositionWithTermVector;
this.omitNorms = omitNorms;
}
+
+ public String getName() {
+ return name;
+ }
+
+ public boolean storeTermVector() {
+ return storeTermVector;
+ }
+
+ public boolean storeOffsetWithTermVector() {
+ return storeOffsetWithTermVector;
+ }
+
+ public boolean storePositionWithTermVector() {
+ return storePositionWithTermVector;
+ }
+
+ public boolean omitNorms() {
+ return omitNorms;
+ }
+
+ public boolean isIndexed() {
+ return isIndexed;
+ }
}
Index: src/java/org/apache/lucene/index/CompoundFileReader.java
===================================================================
--- src/java/org/apache/lucene/index/CompoundFileReader.java (révision 493446)
+++ src/java/org/apache/lucene/index/CompoundFileReader.java (copie de travail)
@@ -55,6 +55,7 @@
throws IOException
{
directory = dir;
+ indexFormat = dir.getIndexFormat();
fileName = name;
boolean success = false;
Index: src/java/org/apache/lucene/index/DefaultFieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0)
@@ -0,0 +1,152 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.zip.Deflater;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * The default implementation of FieldsWriter
+ *
+ * $Id$
+ */
+public class DefaultFieldsWriter extends FieldsWriter {
+
+ protected DefaultFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ }
+
+ /**
+ * There no data stored at the document level
+ */
+ protected void writeDocumentData(IndexOutput out, Document doc) throws IOException {
+ //nothing to write
+ }
+
+ /**
+ * If a the field to write has been load lazily, it does a direct copy from the
+ * source to the output.
+ */
+ protected void writeFieldData(FieldData fieldData, IndexOutput out) throws IOException {
+ if (fieldData.isLazy() && isBinaryCompatible(fieldData)) {
+ fieldData.writeFromLazyLoading(out);
+ } else {
+ byte bits = 0;
+ if (fieldData.isTokenized())
+ bits |= DefaultFieldData.FIELD_IS_TOKENIZED;
+ if (fieldData.isBinary())
+ bits |= DefaultFieldData.FIELD_IS_BINARY;
+ if (fieldData instanceof DefaultFieldData && ((DefaultFieldData) fieldData).isCompressed()) {
+ bits |= DefaultFieldData.FIELD_IS_COMPRESSED;
+ }
+
+ out.writeByte(bits);
+
+ if (fieldData instanceof DefaultFieldData && ((DefaultFieldData) fieldData).isCompressed()) {
+ // compression is enabled for the current field
+ byte[] bdata = null;
+ // check if it is a binary field
+ if (fieldData.isBinary()) {
+ bdata = compress(fieldData.binaryValue());
+ } else {
+ bdata = compress(fieldData.stringValue().getBytes("UTF-8"));
+ }
+ final int len = bdata.length;
+ out.writeVInt(len);
+ out.writeBytes(bdata, len);
+ } else {
+ // compression is disabled for the current field
+ if (fieldData.isBinary()) {
+ byte[] bdata = fieldData.binaryValue();
+ final int len = bdata.length;
+ out.writeVInt(len);
+ out.writeBytes(bdata, len);
+ } else {
+ out.writeString(fieldData.stringValue());
+ }
+ }
+ }
+ }
+
+ /**
+ * Test if the specified field is binary compatible with the current format, so
+ * it allow us to do a direct copy from the lazy loaded field into an index
+ *
+ * @param field the field to test
+ * @return true if it is compatible
+ */
+ protected boolean isBinaryCompatible(FieldData field) {
+ return field instanceof DefaultFieldData;
+ }
+
+ /**
+ * To be overriden by subclasses to choose a different level of compression
+ *
+ * @return the compression level
+ */
+ protected int getCompressionLevel() {
+ return Deflater.BEST_COMPRESSION;
+ }
+
+ /**
+ * Do the compression of data
+ *
+ * To be overiden by subclasses to use a different format of compression. If overriden, you
+ * probably should also override isBinaryCompatible and and decompress function of
+ * DefaultFieldsReader.
+ *
+ * @param input the data to compress
+ * @return the compressed data
+ */
+ protected byte[] compress(byte[] input) {
+
+ // Create the compressor with highest level of compression
+ Deflater compressor = new Deflater();
+ compressor.setLevel(getCompressionLevel());
+
+ // Give the compressor the data to compress
+ compressor.setInput(input);
+ compressor.finish();
+
+ /*
+ * Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data.
+ */
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.finished()) {
+ int count = compressor.deflate(buf);
+ bos.write(buf, 0, count);
+ }
+
+ compressor.end();
+
+ // Get the compressed data
+ return bos.toByteArray();
+ }
+
+}
Index: src/java/org/apache/lucene/index/FieldData.java
===================================================================
--- src/java/org/apache/lucene/index/FieldData.java (révision 0)
+++ src/java/org/apache/lucene/index/FieldData.java (révision 0)
@@ -0,0 +1,255 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Serializable;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * The data of field
+ */
+public abstract class FieldData implements Serializable {
+
+ private boolean isBinary = false;
+
+ private boolean isTokenized = false;
+
+ // the one and only data object for all different kind of field values
+ private Object fieldData = null;
+
+ private boolean isLazy = false;
+
+ private IndexInput fieldsStream;
+
+ private long pointer;
+
+ private long toRead;
+
+ /**
+ * This contructor should only be used when retreiving data form an index
+ *
+ */
+ protected FieldData() {
+ // nothing to initialized yet. The properties should be be via #setLazyData() and then
+ // by #readLazyData()
+ }
+
+ protected FieldData(String text) {
+ this((Object) text);
+ isBinary = false;
+ }
+
+ protected FieldData(byte[] data) {
+ this((Object) data);
+ isBinary = true;
+ }
+
+ protected FieldData(Reader reader) {
+ this((Object) reader);
+ isBinary = false;
+ }
+
+ private FieldData(Object data) {
+ if (data == null)
+ throw new NullPointerException("data cannot be null");
+
+ fieldData = data;
+ }
+
+ /**
+ * @return true iff the value of the filed is stored as binary
+ */
+ public final boolean isBinary() {
+ return isBinary;
+ }
+
+ protected final void setBinary(boolean isBinary) {
+ this.isBinary = isBinary;
+ }
+
+ /**
+ * @return true iff the value of the filed is stored as binary
+ */
+ public final boolean isTokenized() {
+ return isTokenized;
+ }
+
+ /**
+ * Set the tokenization status of the field data
+ *
+ * @param isTokenized
+ */
+ public final void setTokenized(boolean isTokenized) {
+ this.isTokenized = isTokenized;
+ }
+
+ /**
+ * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+ * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
+ * retrieved the {@link Document} is still open.
+ *
+ * @return true if this field can be loaded lazily
+ */
+ public boolean isLazy() {
+ return isLazy;
+ }
+
+ /**
+ * The value of the field as a {@link String}, or null. If null,
+ * the {@link Reader} value or binary value is used. Exactly one of {@link #stringValue()},
+ * {@link #readerValue()}, and {@link #binaryValue()} must be set.
+ *
+ * @return the string value fo the field
+ */
+ public final String stringValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof String ? (String) fieldData : null;
+ }
+
+ /**
+ * The value of the field as a {@link Reader}, or null. If null,
+ * the {@link String} value or binary value is used. Exactly one of {@link #stringValue()},
+ * {@link #readerValue()}, and {@link #binaryValue()} must be set.
+ *
+ * @return the reader value
+ */
+ public final Reader readerValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof Reader ? (Reader) fieldData : null;
+ }
+
+ /**
+ * The value of the field in Binary, or null. If null, the
+ * {@link Reader} or {@link String} value is used. Exactly one of {@link #stringValue()},
+ * {@link #readerValue()} and {@link #binaryValue()} must be set.
+ *
+ * @return the binary value
+ */
+ public final byte[] binaryValue() {
+ if (isLazy && fieldData == null) {
+ readLazyData();
+ }
+ return fieldData instanceof byte[] ? (byte[]) fieldData : null;
+ }
+
+ /**
+ *
+ * @param fieldData the new data of the field
+ */
+ protected void setData(Object fieldData) {
+ this.fieldData = fieldData;
+ }
+
+ /**
+ *
+ * @return the data of the field
+ */
+ protected Object getData() {
+ return fieldData;
+ }
+
+ /**
+ * Load the field data from the stream
+ *
+ * @param in the stream to read
+ * @param skip if the data have to be stored, or just skipped from the stream
+ * @throws IOException
+ */
+ public abstract void readStream(IndexInput in, boolean skip) throws IOException;
+
+ private final void readLazyData() {
+ try {
+ fieldsStream.seek(pointer);
+ readStream(fieldsStream, false);
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
+ } finally {
+ try {
+ fieldsStream.close();
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
+ } finally {
+ fieldsStream = null;
+ }
+ }
+ }
+
+ /**
+ * Set this field as lazy loaded, and save the stream status
+ *
+ * @param fieldsStream the field stream
+ * @param pointer the pointer of the field data
+ * @param toRead the number of byte of the field data
+ */
+ final void setLazyData(IndexInput fieldsStream, long pointer, long toRead) {
+ isLazy = true;
+ this.fieldsStream = fieldsStream;
+ this.pointer = pointer;
+ this.toRead = toRead;
+ }
+
+ /**
+ * If the data was loaded lazily, close the kept opened stream. This should be used
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ if (fieldsStream != null) {
+ fieldsStream.close();
+ }
+ }
+
+ /**
+ * Write the lazy loaded field data directly in the specified output stream.
+ * If the field has not been loaded lazily, it throws an UnsupportedOperationException.
+ *
+ * @param out the stream to write in
+ * @throws IOException in case of write error
+ */
+ final void writeFromLazyLoading(IndexOutput out) throws IOException {
+ if (!isLazy) {
+ throw new UnsupportedOperationException("The field have to be load lazily to copy it directly");
+ }
+ try {
+ fieldsStream.seek(pointer);
+ out.writeBytes(fieldsStream, toRead);
+ } finally {
+ try {
+ fieldsStream.close();
+ } finally {
+ fieldsStream = null;
+ }
+ }
+ }
+
+ public String toString() {
+ if (isLazy) {
+ return null;
+ }
+ return stringValue();
+ }
+}
Index: src/java/org/apache/lucene/index/IndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/IndexFormat.java (révision 0)
@@ -0,0 +1,88 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Specify the format of index.
+ *
+ * The implementation of the {@link FieldsReader} and {@link FieldsWriter} returned by
+ * the function getFieldsReader and getFieldsWriter will specify how the data of fields are
+ * serialized, and also the kind of {@link Fieldable} used.
+ *
+ * $Id$
+ */
+public interface IndexFormat {
+
+ /**
+ * This array contains all filename extensions used by Lucene's index files, with
+ * one exception, namely the extension made up from .f + a number.
+ * Also note that two of Lucene's files (deletable and
+ * segments) don't have any filename extension.
+ *
+ * @return a List of String
+ */
+ List getIndexExtensions();
+
+ /**
+ * File extensions of old-style index files
+ *
+ * @return a List of String
+ */
+ List getCompoundExtensions();
+
+ /**
+ * File extensions for term vector support
+ *
+ * @return a List of String
+ */
+ List getVectorExtensions();
+
+ /**
+ * Return an implemetation of FieldsReader for this format
+ *
+ * @param d the directory to use
+ * @param segment the segment name
+ * @param fn the infos on fields
+ * @return the implemetation of FieldsReader
+ * @throws IOException
+ */
+ FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException;
+
+ /**
+ * Return an implemetation of FieldsWriter for this format
+ *
+ * @param d the directory to use
+ * @param segment the segment name
+ * @param fn the infos on fields
+ * @return the implemetation of FieldsWriter
+ * @throws IOException
+ */
+ FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException;
+
+ /**
+ *
+ * @return the index file name filter associated to this index format
+ */
+ IndexFileNameFilter getIndexFileNameFilter();
+}
Index: src/java/org/apache/lucene/index/DefaultFieldData.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0)
@@ -0,0 +1,155 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * The default impelmentation of {@link FieldData}
+ *
+ */
+public class DefaultFieldData extends FieldData {
+
+ DefaultFieldData() {
+ super();
+ }
+
+ /**
+ * Contructor for string data
+ *
+ * @param text the string data
+ */
+ public DefaultFieldData(String text) {
+ super(text);
+ }
+
+ /**
+ * Contructor for blob data
+ *
+ * @param data the blob data
+ */
+ public DefaultFieldData(byte[] data) {
+ super(data);
+ }
+
+ /**
+ * Contructor for data got from a reader
+ *
+ * @param reader the data's reader
+ */
+ public DefaultFieldData(Reader reader) {
+ super(reader);
+ }
+
+ private boolean isCompressed;
+
+ /**
+ * @return true if the value of the field is stored and compressed within the index
+ */
+ public final boolean isCompressed() {
+ return isCompressed;
+ }
+
+ /**
+ * Set the compress status of the data
+ *
+ * @param isCompressed
+ */
+ public void setCompressed(boolean isCompressed) {
+ this.isCompressed = isCompressed;
+ }
+
+ public static final byte FIELD_IS_TOKENIZED = 0x1;
+
+ public static final byte FIELD_IS_BINARY = 0x2;
+
+ public static final byte FIELD_IS_COMPRESSED = 0x4;
+
+ public void readStream(IndexInput in, boolean skip) throws IOException {
+ byte bits = in.readByte();
+ isCompressed = (bits & FIELD_IS_COMPRESSED) != 0;
+ setTokenized((bits & FIELD_IS_TOKENIZED) != 0);
+ setBinary((bits & FIELD_IS_BINARY) != 0);
+
+ if (skip) {
+ int toRead = in.readVInt();
+ if (isBinary() || isCompressed()) {
+ long pointer = in.getFilePointer();
+ //Need to move the pointer ahead by toRead positions
+ in.seek(pointer + toRead);
+ } else {
+ //Skip ahead of where we are by the length of what is stored
+ in.skipChars(toRead);
+ }
+ } else {
+ if (isBinary()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ if (isCompressed()) {
+ setData(uncompress(b));
+ } else {
+ setData(b);
+ }
+ } else {
+ if (isCompressed()) {
+ int toRead = in.readVInt();
+ final byte[] b = new byte[toRead];
+ in.readBytes(b, 0, b.length);
+ setData(new String(uncompress(b), "UTF-8"));
+ } else {
+ setData(in.readString()); // read value
+ }
+ }
+ }
+ }
+
+ protected byte[] uncompress(final byte[] input) throws IOException {
+
+ Inflater decompressor = new Inflater();
+ decompressor.setInput(input);
+
+ // Create an expandable byte array to hold the decompressed data
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.finished()) {
+ try {
+ int count = decompressor.inflate(buf);
+ bos.write(buf, 0, count);
+ } catch (DataFormatException e) {
+ // this will happen if the field is not compressed
+ IOException newException = new IOException("field data are in wrong format: " + e.toString());
+ newException.initCause(e);
+ throw newException;
+ }
+ }
+
+ decompressor.end();
+
+ // Get the decompressed data
+ return bos.toByteArray();
+ }
+}
Index: src/java/org/apache/lucene/index/DefaultIndexFormat.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0)
@@ -0,0 +1,96 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * The default implementation of the index format
+ *
+ * $Id$
+ */
+public class DefaultIndexFormat implements IndexFormat {
+
+ protected static final List INDEX_EXTENSIONS = Arrays.asList(new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "gen" });
+
+ protected static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" });
+
+ protected static final List VECTOR_EXTENSIONS = Arrays.asList(new String[] { "tvx", "tvd", "tvf" });
+
+ private ArrayList indexExtensions;
+
+ private ArrayList compoundExtensions;
+
+ private IndexFileNameFilter indexFileNameFilter;
+
+ /**
+ * Contructor
+ *
+ */
+ public DefaultIndexFormat() {
+ indexExtensions = new ArrayList(INDEX_EXTENSIONS);
+ indexExtensions.addAll(getVectorExtensions());
+ indexExtensions.addAll(getAdditionnalExtensions());
+
+ compoundExtensions = new ArrayList(COMPOUND_EXTENSIONS);
+ compoundExtensions.addAll(getAdditionnalExtensions());
+ }
+
+ protected List getAdditionnalExtensions() {
+ return Collections.EMPTY_LIST;
+ }
+
+ public List getIndexExtensions() {
+ return indexExtensions;
+ }
+
+ public List getCompoundExtensions() {
+ return compoundExtensions;
+ }
+
+ public List getVectorExtensions() {
+ return VECTOR_EXTENSIONS;
+ }
+
+ /**
+ * Use the default implementation of FieldsReader : DefaultFieldsReader
+ */
+ public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new DefaultFieldsReader(d, segment, fn);
+ }
+
+ /**
+ * Use the default implementation of FieldsWriter : DefaultFieldsWriter
+ */
+ public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ return new DefaultFieldsWriter(d, segment, fn);
+ }
+
+ public IndexFileNameFilter getIndexFileNameFilter() {
+ if (indexFileNameFilter == null) {
+ indexFileNameFilter = new IndexFileNameFilter(this);
+ }
+ return indexFileNameFilter;
+ }
+}
Index: src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsReader.java (révision 493446)
+++ src/java/org/apache/lucene/index/FieldsReader.java (copie de travail)
@@ -17,15 +17,9 @@
* limitations under the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.Reader;
-import java.util.zip.DataFormatException;
-import java.util.zip.Inflater;
-import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
@@ -39,7 +33,7 @@
*
* @version $Id$
*/
-final class FieldsReader {
+public abstract class FieldsReader {
private final FieldInfos fieldInfos;
// The main fieldStream, used only for cloning.
@@ -51,10 +45,9 @@
private final IndexInput indexStream;
private int size;
-
private ThreadLocal fieldsStreamTL = new ThreadLocal();
- FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ protected FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
cloneableFieldsStream = d.openInput(segment + ".fdt");
@@ -69,7 +62,7 @@
*
* @throws IOException
*/
- final void close() throws IOException {
+ protected void close() throws IOException {
fieldsStream.close();
cloneableFieldsStream.close();
indexStream.close();
@@ -89,349 +82,44 @@
long position = indexStream.readLong();
fieldsStream.seek(position);
- Document doc = new Document();
+ Document doc = createDocument(fieldsStream);
+
int numFields = fieldsStream.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
-
- byte bits = fieldsStream.readByte();
- boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
- boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
- boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
- addField(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
- addFieldForMerge(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
- addField(doc, fi, binary, compressed, tokenize);
- break;//Get out of this loop
- }
- else if (lazy == true){
- addFieldLazy(doc, fi, binary, compressed, tokenize);
- }
- else {
- skipField(binary, compressed);
- }
- }
- return doc;
- }
+ FieldData fieldData = createFieldData(fi);
- /**
- * Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
- * This will have the most payoff on large fields.
- */
- private void skipField(boolean binary, boolean compressed) throws IOException {
-
- int toRead = fieldsStream.readVInt();
-
- if (binary || compressed) {
+ boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD);
+ boolean skip = acceptField.equals(FieldSelectorResult.NO_LOAD);
+
long pointer = fieldsStream.getFilePointer();
- fieldsStream.seek(pointer + toRead);
- } else {
- //We need to skip chars. This will slow us down, but still better
- fieldsStream.skipChars(toRead);
- }
- }
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- if (binary == true) {
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- if (compressed) {
- //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
- doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
- } else {
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
- }
- //Need to move the pointer ahead by toRead positions
- fieldsStream.seek(pointer + toRead);
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
+ fieldData.readStream(fieldsStream, skip || lazy);
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- f = new LazyField(fi.name, store, toRead, pointer);
- //skip over the part that we aren't loading
- fieldsStream.seek(pointer + toRead);
- f.setOmitNorms(fi.omitNorms);
- } else {
- int length = fieldsStream.readVInt();
- long pointer = fieldsStream.getFilePointer();
- //Skip ahead of where we are by the length of what is stored
- fieldsStream.skipChars(length);
- f = new LazyField(fi.name, store, index, termVector, length, pointer);
- f.setOmitNorms(fi.omitNorms);
+ if (lazy) {
+ fieldData.setLazyData((IndexInput) fieldsStream.clone(), pointer, fieldsStream.getFilePointer() - pointer);
}
- doc.add(f);
- }
- }
+ Fieldable field = createFieldable(fi, fieldData);
- // in merge mode we don't uncompress the data of a compressed field
- private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
- Object data;
-
- if (binary || compressed) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- data = b;
- } else {
- data = fieldsStream.readString();
- }
-
- doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
- }
-
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
-
- //we have a binary stored field, and it may be compressed
- if (binary) {
- int toRead = fieldsStream.readVInt();
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- if (compressed)
- doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
- else
- doc.add(new Field(fi.name, b, Field.Store.YES));
-
- } else {
- Field.Store store = Field.Store.YES;
- Field.Index index = getIndexType(fi, tokenize);
- Field.TermVector termVector = getTermVectorType(fi);
-
- Fieldable f;
- if (compressed) {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.readVInt();
-
- final byte[] b = new byte[toRead];
- fieldsStream.readBytes(b, 0, b.length);
- f = new Field(fi.name, // field name
- new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
- } else {
- f = new Field(fi.name, // name
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitNorms(fi.omitNorms);
+ if (!skip) {
+ doc.add(field);
}
- doc.add(f);
- }
- }
-
- private Field.TermVector getTermVectorType(FieldInfo fi) {
- Field.TermVector termVector = null;
- if (fi.storeTermVector) {
- if (fi.storeOffsetWithTermVector) {
- if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
- } else {
- termVector = Field.TermVector.WITH_OFFSETS;
- }
- } else if (fi.storePositionWithTermVector) {
- termVector = Field.TermVector.WITH_POSITIONS;
- } else {
- termVector = Field.TermVector.YES;
+ if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) {
+ break;
}
- } else {
- termVector = Field.TermVector.NO;
}
- return termVector;
- }
- private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {
- Field.Index index;
- if (fi.isIndexed && tokenize)
- index = Field.Index.TOKENIZED;
- else if (fi.isIndexed && !tokenize)
- index = Field.Index.UN_TOKENIZED;
- else
- index = Field.Index.NO;
- return index;
+ return doc;
}
- /**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
- * loaded.
- */
- private class LazyField extends AbstractField implements Fieldable {
- private int toRead;
- private long pointer;
+ protected abstract Document createDocument(IndexInput in);
- public LazyField(String name, Field.Store store, int toRead, long pointer) {
- super(name, store, Field.Index.NO, Field.TermVector.NO);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
+ protected abstract FieldData createFieldData(FieldInfo fi);
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
- super(name, store, index, termVector);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
+ protected abstract Fieldable createFieldable(FieldInfo fi, FieldData fieldData);
- private IndexInput getFieldStream() {
- IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
- if (localFieldsStream == null) {
- localFieldsStream = (IndexInput) cloneableFieldsStream.clone();
- fieldsStreamTL.set(localFieldsStream);
- }
- return localFieldsStream;
- }
-
- /**
- * The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set.
- */
- public byte[] binaryValue() {
- if (fieldsData == null) {
- final byte[] b = new byte[toRead];
- IndexInput localFieldsStream = getFieldStream();
- //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
- //since they are already handling this exception when getting the document
- try {
- localFieldsStream.seek(pointer);
- localFieldsStream.readBytes(b, 0, b.length);
- if (isCompressed == true) {
- fieldsData = uncompress(b);
- } else {
- fieldsData = b;
- }
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
- }
-
- /**
- * The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set.
- */
- public Reader readerValue() {
- return fieldsData instanceof Reader ? (Reader) fieldsData : null;
- }
-
- /**
- * The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set.
- */
- public String stringValue() {
- if (fieldsData == null) {
- IndexInput localFieldsStream = getFieldStream();
- try {
- localFieldsStream.seek(pointer);
- //read in chars b/c we already know the length we need to read
- char[] chars = new char[toRead];
- localFieldsStream.readChars(chars, 0, toRead);
- fieldsData = new String(chars);
- } catch (IOException e) {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData instanceof String ? (String) fieldsData : null;
- }
-
- public long getPointer() {
- return pointer;
- }
-
- public void setPointer(long pointer) {
- this.pointer = pointer;
- }
-
- public int getToRead() {
- return toRead;
- }
-
- public void setToRead(int toRead) {
- this.toRead = toRead;
- }
- }
-
- private final byte[] uncompress(final byte[] input)
- throws IOException {
-
- Inflater decompressor = new Inflater();
- decompressor.setInput(input);
-
- // Create an expandable byte array to hold the decompressed data
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Decompress the data
- byte[] buf = new byte[1024];
- while (!decompressor.finished()) {
- try {
- int count = decompressor.inflate(buf);
- bos.write(buf, 0, count);
- }
- catch (DataFormatException e) {
- // this will happen if the field is not compressed
- IOException newException = new IOException("field data are in wrong format: " + e.toString());
- newException.initCause(e);
- throw newException;
- }
- }
-
- decompressor.end();
-
- // Get the decompressed data
- return bos.toByteArray();
- }
-
- // Instances of this class hold field properties and data
- // for merge
- final static class FieldForMerge extends AbstractField {
- public String stringValue() {
- return (String) this.fieldsData;
- }
-
- public Reader readerValue() {
- // not needed for merge
- return null;
- }
-
- public byte[] binaryValue() {
- return (byte[]) this.fieldsData;
- }
-
- public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
- this.isStored = true;
- this.fieldsData = value;
- this.isCompressed = compressed;
- this.isBinary = binary;
- this.isTokenized = tokenize;
-
- this.name = fi.name.intern();
- this.isIndexed = fi.isIndexed;
- this.omitNorms = fi.omitNorms;
- this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
- this.storePositionWithTermVector = fi.storePositionWithTermVector;
- this.storeTermVector = fi.storeTermVector;
- }
-
- }
}
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (révision 493446)
+++ src/java/org/apache/lucene/index/IndexReader.java (copie de travail)
@@ -19,6 +19,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -30,6 +31,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.List;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
@@ -414,7 +416,7 @@
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
- * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
+ * FieldData#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
* int) length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java (révision 493446)
+++ src/java/org/apache/lucene/index/IndexFileNames.java (copie de travail)
@@ -34,28 +34,6 @@
/** Name of the index deletable file (only used in
* pre-lockless indices) */
static final String DELETABLE = "deletable";
-
- /**
- * This array contains all filename extensions used by
- * Lucene's index files, with two exceptions, namely the
- * extension made up from .f + a number and
- * from .s + a number. Also note that
- * Lucene's segments_N files do not have any
- * filename extension.
- */
- static final String INDEX_EXTENSIONS[] = new String[] {
- "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
- "tvx", "tvd", "tvf", "tvp", "gen"};
-
- /** File extensions of old-style index files */
- static final String COMPOUND_EXTENSIONS[] = new String[] {
- "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
- };
-
- /** File extensions for term vector support */
- static final String VECTOR_EXTENSIONS[] = new String[] {
- "tvx", "tvd", "tvf"
- };
/**
* Computes the full file name from base, extension and
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/FilterIndexReader.java (révision 493446)
+++ src/java/org/apache/lucene/index/FilterIndexReader.java (copie de travail)
@@ -19,6 +19,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
Index: src/java/org/apache/lucene/index/FieldsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/FieldsWriter.java (révision 493446)
+++ src/java/org/apache/lucene/index/FieldsWriter.java (copie de travail)
@@ -16,35 +16,29 @@
* the License.
*/
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
-import java.util.zip.Deflater;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
-final class FieldsWriter
-{
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
- static final byte FIELD_IS_COMPRESSED = 0x4;
-
+public abstract class FieldsWriter {
+
private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
- FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ protected FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
fieldsStream = d.createOutput(segment + ".fdt");
indexStream = d.createOutput(segment + ".fdx");
}
- final void close() throws IOException {
+ protected void close() throws IOException {
fieldsStream.close();
indexStream.close();
}
@@ -52,100 +46,29 @@
final void addDocument(Document doc) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
+ writeDocumentData(fieldsStream, doc);
+
int storedCount = 0;
- Iterator fieldIterator = doc.getFields().iterator();
- while (fieldIterator.hasNext()) {
- Fieldable field = (Fieldable) fieldIterator.next();
+ Iterator fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored())
storedCount++;
}
fieldsStream.writeVInt(storedCount);
- fieldIterator = doc.getFields().iterator();
- while (fieldIterator.hasNext()) {
- Fieldable field = (Fieldable) fieldIterator.next();
- // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
- // and field.binaryValue() already returns the compressed value for a field
- // with isCompressed()==true, so we disable compression in that case
- boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
+ fields = doc.getFields().iterator();
+ while (fields.hasNext()) {
+ Fieldable field = (Fieldable) fields.next();
if (field.isStored()) {
fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
-
- byte bits = 0;
- if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
- if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
- if (field.isCompressed())
- bits |= FieldsWriter.FIELD_IS_COMPRESSED;
-
- fieldsStream.writeByte(bits);
-
- if (field.isCompressed()) {
- // compression is enabled for the current field
- byte[] data = null;
-
- if (disableCompression) {
- // optimized case for merging, the data
- // is already compressed
- data = field.binaryValue();
- } else {
- // check if it is a binary field
- if (field.isBinary()) {
- data = compress(field.binaryValue());
- }
- else {
- data = compress(field.stringValue().getBytes("UTF-8"));
- }
- }
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- // compression is disabled for the current field
- if (field.isBinary()) {
- byte[] data = field.binaryValue();
- final int len = data.length;
- fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
- }
- else {
- fieldsStream.writeString(field.stringValue());
- }
- }
+ writeFieldData(field.getFieldData(), fieldsStream);
}
}
}
- private final byte[] compress (byte[] input) {
+ abstract protected void writeDocumentData(IndexOutput out, Document doc) throws IOException;
- // Create the compressor with highest level of compression
- Deflater compressor = new Deflater();
- compressor.setLevel(Deflater.BEST_COMPRESSION);
+ abstract protected void writeFieldData(FieldData fieldData, IndexOutput out) throws IOException;
- // Give the compressor the data to compress
- compressor.setInput(input);
- compressor.finish();
-
- /*
- * Create an expandable byte array to hold the compressed data.
- * You cannot use an array that's the same size as the orginal because
- * there is no guarantee that the compressed data will be smaller than
- * the uncompressed data.
- */
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
-
- // Compress the data
- byte[] buf = new byte[1024];
- while (!compressor.finished()) {
- int count = compressor.deflate(buf);
- bos.write(buf, 0, count);
- }
-
- compressor.end();
-
- // Get the compressed data
- return bos.toByteArray();
- }
}
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentMerger.java (révision 493448)
+++ src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import java.util.List;
import java.util.Vector;
import java.util.Iterator;
import java.util.Collection;
@@ -47,7 +48,7 @@
private Vector readers = new Vector();
private FieldInfos fieldInfos;
- /** This ctor used only by test code.
+ /** This constructor is used only by test code.
*
* @param dir The Directory to merge the other segments into
* @param name The name of the new segment
@@ -115,12 +116,13 @@
CompoundFileWriter cfsWriter =
new CompoundFileWriter(directory, fileName);
- Vector files =
- new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size());
-
+ List compoundExtensions = directory.getIndexFormat().getCompoundExtensions();
+
+ Vector files = new Vector(compoundExtensions.size() + fieldInfos.size());
+
// Basic files
- for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
- files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
+ for (int i = 0; i < compoundExtensions.size(); i++) {
+ files.add(segment + "." + compoundExtensions.get(i));
}
// Fieldable norm files
@@ -131,10 +133,12 @@
}
}
+ List vectorExtensions = directory.getIndexFormat().getVectorExtensions();
+
// Vector files
if (fieldInfos.hasVectors()) {
- for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) {
- files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
+ for (int i = 0; i < vectorExtensions.size(); i++) {
+ files.add(segment + "." + vectorExtensions.get(i));
}
}
@@ -178,14 +182,13 @@
}
fieldInfos.write(directory, segment + ".fnm");
- FieldsWriter fieldsWriter = // merge field values
- new FieldsWriter(directory, segment, fieldInfos);
-
+ FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos);
+
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
// in merge mode, we use this FieldSelector
FieldSelector fieldSelectorMerge = new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
- return FieldSelectorResult.LOAD_FOR_MERGE;
+ return FieldSelectorResult.LAZY_LOAD;
}
};
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (révision 493448)
+++ src/java/org/apache/lucene/index/IndexWriter.java (copie de travail)
@@ -106,7 +106,7 @@
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
- private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
+ private RAMDirectory ramDirectory; // for temp segs
private IndexFileDeleter deleter;
private Lock writeLock;
@@ -323,6 +323,7 @@
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir)
throws IOException {
+ ramDirectory = new RAMDirectory(d.getIndexFormat());
this.closeDir = closeDir;
directory = d;
analyzer = a;
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentWriter.java (révision 493446)
+++ src/java/org/apache/lucene/index/DocumentWriter.java (copie de travail)
@@ -75,8 +75,7 @@
fieldInfos.write(directory, segment + ".fnm");
// write field values
- FieldsWriter fieldsWriter =
- new FieldsWriter(directory, segment, fieldInfos);
+ FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos);
try {
fieldsWriter.addDocument(doc);
} finally {
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java (révision 493446)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java (copie de travail)
@@ -81,7 +81,7 @@
// not referenced by the current segments info:
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
- IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
+ IndexFileNameFilter filter = directory.getIndexFormat().getIndexFileNameFilter();
String[] files = directory.list();
Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNameFilter.java (révision 493446)
+++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (copie de travail)
@@ -29,14 +29,17 @@
*/
public class IndexFileNameFilter implements FilenameFilter {
- static IndexFileNameFilter singleton = new IndexFileNameFilter();
private HashSet extensions;
- public IndexFileNameFilter() {
+ /**
+ * Contructor
+ *
+ * @param indexFormat the format of the index
+ */
+ public IndexFileNameFilter(IndexFormat indexFormat) {
extensions = new HashSet();
- for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
- extensions.add(IndexFileNames.INDEX_EXTENSIONS[i]);
- }
+ extensions.addAll(indexFormat.getIndexExtensions());
+ extensions.addAll(indexFormat.getVectorExtensions());
}
/* (non-Javadoc)
@@ -85,8 +88,4 @@
}
return false;
}
-
- public static IndexFileNameFilter getFilter() {
- return singleton;
- }
}
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (révision 493446)
+++ src/java/org/apache/lucene/index/SegmentReader.java (copie de travail)
@@ -155,8 +155,9 @@
// No compound file exists - use the multi-file format
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
- fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+ fieldsReader = cfsDir.getIndexFormat().getFieldsReader(cfsDir, segment, fieldInfos);
+
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
// NOTE: the bitvector is stored using the regular directory, not cfs
@@ -290,8 +291,9 @@
files.addElement(name);
}
} else {
- for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
- String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
+ List indexExtension = directory().getIndexFormat().getIndexExtensions();
+ for (int i = 0; i < indexExtension.size(); i++) {
+ String name = segment + "." + indexExtension.get(i);
if (directory().fileExists(name))
files.addElement(name);
}
Index: src/java/org/apache/lucene/index/DefaultFieldsReader.java
===================================================================
--- src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0)
+++ src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0)
@@ -0,0 +1,53 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * The default implementation of a {@link FieldsReader}
+ *
+ * $Id$
+ */
+public class DefaultFieldsReader extends FieldsReader {
+
+ protected DefaultFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ super(d, segment, fn);
+ }
+
+ /**
+ * There is no information stored at the document level
+ */
+ protected Document createDocument(IndexInput fieldsStream) {
+ return new Document();
+ }
+
+ protected FieldData createFieldData(FieldInfo fi) {
+ return new DefaultFieldData();
+ }
+
+ protected Fieldable createFieldable(FieldInfo fi, FieldData fieldData) {
+ return new Field(fi, (DefaultFieldData) fieldData);
+ }
+}
Index: src/java/org/apache/lucene/store/Directory.java
===================================================================
--- src/java/org/apache/lucene/store/Directory.java (révision 493446)
+++ src/java/org/apache/lucene/store/Directory.java (copie de travail)
@@ -19,6 +19,9 @@
import java.io.IOException;
+import org.apache.lucene.index.DefaultIndexFormat;
+import org.apache.lucene.index.IndexFormat;
+
/** A Directory is a flat list of files. Files may be written once, when they
* are created. Once a file is created it may only be opened for read, or
* deleted. Random access is permitted both when reading and writing.
@@ -42,6 +45,12 @@
* this Directory instance). */
protected LockFactory lockFactory;
+ protected IndexFormat indexFormat = new DefaultIndexFormat();
+
+ public IndexFormat getIndexFormat() {
+ return indexFormat;
+ }
+
/** Returns an array of strings, one for each file in the directory. */
public abstract String[] list()
throws IOException;
@@ -124,4 +133,5 @@
public String getLockID() {
return this.toString();
}
+
}
Index: src/java/org/apache/lucene/store/RAMDirectory.java
===================================================================
--- src/java/org/apache/lucene/store/RAMDirectory.java (révision 493446)
+++ src/java/org/apache/lucene/store/RAMDirectory.java (copie de travail)
@@ -27,6 +27,12 @@
import java.util.Iterator;
import java.util.Set;
+import org.apache.lucene.index.DefaultIndexFormat;
+import org.apache.lucene.index.IndexFormat;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
/**
* A memory-resident {@link Directory} implementation. Locking
* implementation is by default the {@link SingleInstanceLockFactory}
@@ -47,8 +53,21 @@
// Lock acquisition sequence: RAMDirectory, then RAMFile
// *****
- /** Constructs an empty {@link Directory}. */
+ /**
+ * Constructs an empty {@link Directory}.
+ * The index format used the the default one
+ */
public RAMDirectory() {
+ this(new DefaultIndexFormat());
+ }
+
+ /**
+ * Contructor
+ *
+ * @param indexFormat the format of the index
+ */
+ public RAMDirectory(IndexFormat indexFormat) {
+ this.indexFormat = indexFormat;
setLockFactory(new SingleInstanceLockFactory());
}
@@ -69,11 +88,11 @@
* @exception IOException if an error occurs
*/
public RAMDirectory(Directory dir) throws IOException {
- this(dir, false);
+ this(dir, false, new DefaultIndexFormat());
}
-
- private RAMDirectory(Directory dir, boolean closeDir) throws IOException {
- this();
+
+ private RAMDirectory(Directory dir, boolean closeDir, IndexFormat indexFormat) throws IOException {
+ this(indexFormat);
final String[] files = dir.list();
byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE];
for (int i = 0; i < files.length; i++) {
@@ -107,7 +126,7 @@
* @see #RAMDirectory(Directory)
*/
public RAMDirectory(File dir) throws IOException {
- this(FSDirectory.getDirectory(dir, false), true);
+ this(FSDirectory.getDirectory(dir, false), true, new DefaultIndexFormat());
}
/**
@@ -118,7 +137,7 @@
* @see #RAMDirectory(Directory)
*/
public RAMDirectory(String dir) throws IOException {
- this(FSDirectory.getDirectory(dir, false), true);
+ this(FSDirectory.getDirectory(dir, false), true, new DefaultIndexFormat());
}
/** Returns an array of strings, one for each file in the directory. */
@@ -255,5 +274,21 @@
fileNames = null;
files = null;
}
+
+ /**
+ * For debug purpose, list every files name of this directory.
+ * The code was commented because the lockID is based on the toString() function
+ */
+// public String toString() {
+// String[] f = list();
+// StringBuffer buffer = new StringBuffer();
+// for (int i = 0; i< f.length; i++) {
+// buffer.append(f[i]);
+// if (i != f.length - 1) {
+// buffer.append(", ");
+// }
+// }
+// return buffer.toString();
+// }
}
Index: src/java/org/apache/lucene/store/RAMFile.java
===================================================================
--- src/java/org/apache/lucene/store/RAMFile.java (révision 493446)
+++ src/java/org/apache/lucene/store/RAMFile.java (copie de travail)
@@ -79,4 +79,25 @@
}
}
+
+ /**
+ * For debug purpose
+ */
+ public String toString() {
+ StringBuffer buffer = new StringBuffer();
+ int bufferNum = 0;
+ byte[] b = (byte[]) buffers.get(0);
+ int bufferPos = 0;
+ for (int i = 0; i < 100 && i < length; i++) {
+ buffer.append(b[bufferPos]);
+ buffer.append(',');
+ bufferPos++;
+ if (bufferPos == b.length) {
+ bufferPos = 0;
+ bufferNum++;
+ b = (byte[]) buffers.get(bufferNum);
+ }
+ }
+ return buffer.toString();
+ }
}
Index: src/java/org/apache/lucene/store/IndexOutput.java
===================================================================
--- src/java/org/apache/lucene/store/IndexOutput.java (révision 493446)
+++ src/java/org/apache/lucene/store/IndexOutput.java (copie de travail)
@@ -31,6 +31,17 @@
*/
public abstract void writeByte(byte b) throws IOException;
+ /**
+ * Write a byte directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeByte(byte)
+ */
+ public void writeByte(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ }
+
/** Writes an array of bytes.
* @param b the bytes to write
* @param length the number of bytes to write
@@ -38,6 +49,20 @@
*/
public abstract void writeBytes(byte[] b, int length) throws IOException;
+ /**
+ * Write a batch of bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @param length the number of bytes to write
+ * @throws IOException
+ * @see #writeBytes(byte[], int)
+ */
+ public void writeBytes(IndexInput in, long length) throws IOException {
+ while (length-- > 0) {
+ writeByte(in.readByte());
+ }
+ }
+
/** Writes an int as four bytes.
* @see IndexInput#readInt()
*/
@@ -48,6 +73,20 @@
writeByte((byte) i);
}
+ /**
+ * Writes an int as four bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeInt(int)
+ */
+ public void writeInt(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an int in a variable-length format. Writes between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -61,6 +100,22 @@
writeByte((byte)i);
}
+ /**
+ * Writes an int in a variable-length format directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeVInt(int)
+ */
+ public void writeVInt(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a long as eight bytes.
* @see IndexInput#readLong()
*/
@@ -69,6 +124,24 @@
writeInt((int) i);
}
+ /**
+ * Writes a long as eight bytes directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeLong(long)
+ */
+ public void writeLong(IndexInput in) throws IOException {
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ writeByte(in.readByte());
+ }
+
/** Writes an long in a variable-length format. Writes between one and five
* bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
@@ -82,6 +155,22 @@
writeByte((byte)i);
}
+ /**
+ * Writes an long in a variable-length format directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeVLong(long)
+ */
+ public void writeVLong(IndexInput in) throws IOException {
+ byte b = in.readByte();
+ writeByte(b);
+ while ((b & 0x80) != 0) {
+ b = in.readByte();
+ writeByte(b);
+ }
+ }
+
/** Writes a string.
* @see IndexInput#readString()
*/
@@ -91,6 +180,19 @@
writeChars(s, 0, length);
}
+ /**
+ * Writes a string directly from an input stream.
+ *
+ * @param in the stream to read
+ * @throws IOException
+ * @see #writeString(String)
+ */
+ public void writeString(IndexInput in) throws IOException {
+ int length = in.readVInt();
+ writeVInt(length);
+ writeChars(in, length);
+ }
+
/** Writes a sequence of UTF-8 encoded characters from a string.
* @param s the source of the characters
* @param start the first character in the sequence
@@ -103,18 +205,40 @@
for (int i = start; i < end; i++) {
final int code = (int)s.charAt(i);
if (code >= 0x01 && code <= 0x7F)
- writeByte((byte)code);
+ writeByte((byte)code);
else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
- writeByte((byte)(0xC0 | (code >> 6)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xC0 | (code >> 6)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
} else {
- writeByte((byte)(0xE0 | (code >>> 12)));
- writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
- writeByte((byte)(0x80 | (code & 0x3F)));
+ writeByte((byte)(0xE0 | (code >>> 12)));
+ writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
}
}
}
+ /**
+ * Writes a sequence of UTF-8 encoded characters directly from an input stream.
+ *
+ * @param in the stream to read
+ * @param length the number of characters in the sequence
+ * @throws IOException
+ * @see #writeChars(String,int,int)
+ */
+ public void writeChars(IndexInput in, int length)
+ throws IOException {
+ for (int i = 0; i < length; i++) {
+ byte b = in.readByte();
+ writeByte(b);
+ if ((b & 0x80) != 0) {
+ writeByte(in.readByte());
+ if ((b & 0xE0) == 0xE0) {
+ writeByte(in.readByte());
+ }
+ }
+ }
+ }
+
/** Forces any buffered output to be written. */
public abstract void flush() throws IOException;
Index: src/java/org/apache/lucene/store/FSDirectory.java
===================================================================
--- src/java/org/apache/lucene/store/FSDirectory.java (révision 493446)
+++ src/java/org/apache/lucene/store/FSDirectory.java (copie de travail)
@@ -26,7 +26,9 @@
import java.security.NoSuchAlgorithmException;
import java.util.Hashtable;
+import org.apache.lucene.index.DefaultIndexFormat;
import org.apache.lucene.index.IndexFileNameFilter;
+import org.apache.lucene.index.IndexFormat;
/**
* Straightforward implementation of {@link Directory} as a directory of files.
@@ -128,7 +130,7 @@
* @return the FSDirectory for the named file. */
public static FSDirectory getDirectory(String path, boolean create)
throws IOException {
- return getDirectory(new File(path), create, null, true);
+ return getDirectory(new File(path), create, null, true, new DefaultIndexFormat());
}
/** Returns the directory instance for the named location, using the
@@ -146,13 +148,13 @@
public static FSDirectory getDirectory(String path, boolean create,
LockFactory lockFactory, boolean doRemoveOldFiles)
throws IOException {
- return getDirectory(new File(path), create, lockFactory, doRemoveOldFiles);
+ return getDirectory(new File(path), create, lockFactory, doRemoveOldFiles, new DefaultIndexFormat());
}
public static FSDirectory getDirectory(String path, boolean create,
LockFactory lockFactory)
throws IOException {
- return getDirectory(new File(path), create, lockFactory, true);
+ return getDirectory(new File(path), create, lockFactory, true, new DefaultIndexFormat());
}
/** Returns the directory instance for the named location.
@@ -165,10 +167,11 @@
* @param create if true, create, or erase any existing contents.
* @return the FSDirectory for the named file. */
public static FSDirectory getDirectory(File file, boolean create, boolean doRemoveOldFiles)
- throws IOException {
- return getDirectory(file, create, null, doRemoveOldFiles);
+ throws IOException {
+ return getDirectory(file, create, null, doRemoveOldFiles, new DefaultIndexFormat());
}
+
/** Returns the directory instance for the named location, using the
* provided LockFactory implementation.
*
@@ -180,10 +183,34 @@
* @param create if true, create, or erase any existing contents.
* @param lockFactory instance of {@link LockFactory} providing the
* locking implementation.
- * @return the FSDirectory for the named file. */
+ * @param indexFormat the format of index
+ * @return the FSDirectory for the named file.
+ * @throws IOException
+ */
public static FSDirectory getDirectory(File file, boolean create,
LockFactory lockFactory, boolean doRemoveOldFiles)
throws IOException {
+ return getDirectory(file, create, lockFactory, doRemoveOldFiles, new DefaultIndexFormat());
+ }
+
+ /** Returns the directory instance for the named location, using the
+ * provided LockFactory implementation.
+ *
+ *
Directories are cached, so that, for a given canonical path, the same
+ * FSDirectory instance will always be returned. This permits
+ * synchronization on directories.
+ *
+ * @param file the path to the directory.
+ * @param create if true, create, or erase any existing contents.
+ * @param lockFactory instance of {@link LockFactory} providing the
+ * locking implementation.
+ * @param indexFormat the format of index
+ * @return the FSDirectory for the named file.
+ * @throws IOException
+ */
+ public static FSDirectory getDirectory(File file, boolean create,
+ LockFactory lockFactory, boolean doRemoveOldFiles, IndexFormat indexFormat)
+ throws IOException {
file = new File(file.getCanonicalPath());
FSDirectory dir;
synchronized (DIRECTORIES) {
@@ -194,7 +221,7 @@
} catch (Exception e) {
throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e);
}
- dir.init(file, create, lockFactory, doRemoveOldFiles);
+ dir.init(file, create, lockFactory, doRemoveOldFiles, indexFormat);
DIRECTORIES.put(file, dir);
} else {
@@ -219,7 +246,7 @@
LockFactory lockFactory)
throws IOException
{
- return getDirectory(file, create, lockFactory, true);
+ return getDirectory(file, create, lockFactory, true, new DefaultIndexFormat());
}
public static FSDirectory getDirectory(File file, boolean create)
@@ -243,8 +270,10 @@
throw new IOException(path + " not a directory");
}
- private void init(File path, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles) throws IOException {
+ private void init(File path, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles, IndexFormat indexFormat) throws IOException {
+ this.indexFormat = indexFormat;
+
// Set up lockFactory with cascaded defaults: if an instance was passed in,
// use that; else if locks are disabled, use NoLockFactory; else if the
// system property org.apache.lucene.store.FSDirectoryLockFactoryClass is set,
@@ -310,7 +339,7 @@
throw new IOException(directory + " not a directory");
if (doRemoveOldFiles) {
- String[] files = directory.list(IndexFileNameFilter.getFilter()); // clear old files
+ String[] files = directory.list(getIndexFormat().getIndexFileNameFilter()); // clear old files
if (files == null)
throw new IOException("Cannot read directory " + directory.getAbsolutePath());
for (int i = 0; i < files.length; i++) {
@@ -325,7 +354,7 @@
/** Returns an array of strings, one for each Lucene index file in the directory. */
public String[] list() {
- return directory.list(IndexFileNameFilter.getFilter());
+ return directory.list(getIndexFormat().getIndexFileNameFilter());
}
/** Returns true iff a file with the given name exists. */
Index: src/java/org/apache/lucene/document/Field.java
===================================================================
--- src/java/org/apache/lucene/document/Field.java (révision 493446)
+++ src/java/org/apache/lucene/document/Field.java (copie de travail)
@@ -17,11 +17,14 @@
* limitations under the License.
*/
-import org.apache.lucene.util.Parameter;
-
import java.io.Reader;
import java.io.Serializable;
+import org.apache.lucene.index.DefaultFieldData;
+import org.apache.lucene.index.FieldData;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.util.Parameter;
+
/**
A field is a section of a Document. Each field has two parts, a name and a
value. Values may be free text, provided as a String or as a Reader, or they
@@ -31,7 +34,7 @@
*/
public final class Field extends AbstractField implements Fieldable, Serializable {
-
+
/** Specifies whether and how a field should be stored. */
public static final class Store extends Parameter implements Serializable {
@@ -127,24 +130,18 @@
*/
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
}
-
-
- /** The value of the field as a String, or null. If null, the Reader value
- * or binary value is used. Exactly one of stringValue(), readerValue(), and
- * binaryValue() must be set. */
- public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
-
- /** The value of the field as a Reader, or null. If null, the String value
- * or binary value is used. Exactly one of stringValue(), readerValue(),
- * and binaryValue() must be set. */
- public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
-
- /** The value of the field in Binary, or null. If null, the Reader or
- * String value is used. Exactly one of stringValue(), readerValue() and
- * binaryValue() must be set. */
- public byte[] binaryValue() { return fieldsData instanceof byte[] ? (byte[])fieldsData : null; }
-
+
/**
+ * Contructor used when getting data from the index
+ *
+ * @param fi the info of the field
+ * @param fieldData the data of the field
+ */
+ public Field(FieldInfo fi, DefaultFieldData fieldData) {
+ super(fi, fieldData);
+ }
+
+ /**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
*
@@ -178,57 +175,9 @@
*
*/
public Field(String name, String value, Store store, Index index, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (value == null)
- throw new NullPointerException("value cannot be null");
+ super(name, new DefaultFieldData(value), store, index, termVector);
if (name.length() == 0 && value.length() == 0)
throw new IllegalArgumentException("name and value cannot both be empty");
- if (index == Index.NO && store == Store.NO)
- throw new IllegalArgumentException("it doesn't make sense to have a field that "
- + "is neither indexed nor stored");
- if (index == Index.NO && termVector != TermVector.NO)
- throw new IllegalArgumentException("cannot store term vector information "
- + "for a field that is not indexed");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = value;
-
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
- }
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
- }
- else if (store == Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- if (index == Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
}
/**
@@ -253,23 +202,7 @@
* @throws NullPointerException if name or reader is null
*/
public Field(String name, Reader reader, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (reader == null)
- throw new NullPointerException("reader cannot be null");
-
- this.name = name.intern(); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
- this.isCompressed = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ super(name, new DefaultFieldData(reader), Store.NO, Index.TOKENIZED, termVector);
}
/**
@@ -281,34 +214,91 @@
* @throws IllegalArgumentException if store is Store.NO
*/
public Field(String name, byte[] value, Store store) {
- if (name == null)
- throw new IllegalArgumentException("name cannot be null");
- if (value == null)
- throw new IllegalArgumentException("value cannot be null");
-
- this.name = name.intern();
- this.fieldsData = value;
-
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
+ super(name, new DefaultFieldData(value), store, Index.NO, TermVector.NO);
+ }
+
+ /**
+ * Override the store management to handle compression
+ */
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ isStored = true;
+ ((DefaultFieldData) fieldData).setCompressed(false);
+ } else if (store == Field.Store.COMPRESS) {
+ isStored = true;
+ ((DefaultFieldData) fieldData).setCompressed(true);
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ isStored = false;
+ ((DefaultFieldData) fieldData).setCompressed(false);
+ } else {
+ throw new IllegalArgumentException("unknown store parameter " + store);
}
- else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
+ }
+
+ /** Prints a Field for human consumption. */
+ public String toString() {
+ StringBuffer result = new StringBuffer();
+ if (isStored()) {
+ result.append("stored");
+ if (((DefaultFieldData) fieldData).isCompressed())
+ result.append("/compressed");
+ else
+ result.append("/uncompressed");
}
- else if (store == Store.NO)
- throw new IllegalArgumentException("binary values can't be unstored");
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
-
- this.isIndexed = false;
- this.isTokenized = false;
-
- this.isBinary = true;
-
- setStoreTermVector(TermVector.NO);
+ if (isIndexed()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("indexed");
+ }
+ if (isTokenized()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("tokenized");
+ }
+ if (isTermVectorStored()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVector");
+ }
+ if (isStoreOffsetWithTermVector()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorOffsets");
+ }
+ if (isStorePositionWithTermVector()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorPosition");
+ }
+ if (isBinary()) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("binary");
+ }
+ if (getOmitNorms()) {
+ result.append(",omitNorms");
+ }
+ if (isLazy()) {
+ result.append(",lazy");
+ }
+ result.append('<');
+ result.append(name());
+ result.append(':');
+
+ result.append(getFieldData());
+
+ result.append('>');
+ return result.toString();
}
+ /**
+ * @return true if the value of the field is stored and compressed within the index
+ */
+ public final boolean isCompressed() {
+ return ((DefaultFieldData) fieldData).isCompressed();
+ }
}
Index: src/java/org/apache/lucene/document/AbstractField.java
===================================================================
--- src/java/org/apache/lucene/document/AbstractField.java (révision 493446)
+++ src/java/org/apache/lucene/document/AbstractField.java (copie de travail)
@@ -15,11 +15,16 @@
* limitations under the License.
*/
+import java.io.Reader;
+import org.apache.lucene.index.FieldData;
+import org.apache.lucene.index.FieldInfo;
+
/**
- *
- *
- **/
+ * Default abstract implementation of a {@link Fieldable}
+ *
+ * $Id$
+ */
public abstract class AbstractField implements Fieldable {
protected String name = "body";
@@ -30,57 +35,41 @@
protected boolean isStored = false;
protected boolean isIndexed = true;
protected boolean isTokenized = true;
- protected boolean isBinary = false;
- protected boolean isCompressed = false;
- protected boolean lazy = false;
protected float boost = 1.0f;
// the one and only data object for all different kind of field values
- protected Object fieldsData = null;
+ protected FieldData fieldData = null;
- protected AbstractField()
- {
-
+ protected AbstractField(FieldInfo fi, FieldData fieldData) {
+ this.name = fi.getName();
+ storeTermVector = fi.storeTermVector();
+ storeOffsetWithTermVector = fi.storeOffsetWithTermVector();
+ storePositionWithTermVector = fi.storePositionWithTermVector();
+ omitNorms = fi.omitNorms();
+ isStored = true;
+ isIndexed = fi.isIndexed();
+ isTokenized = fieldData.isTokenized();
+ this.fieldData = fieldData;
}
- protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
+ protected AbstractField(String name, FieldData data, Field.Store store, Field.Index index, Field.TermVector termVector) {
if (name == null)
throw new NullPointerException("name cannot be null");
- this.name = name.intern(); // field names are interned
+ if (data == null)
+ throw new NullPointerException("data cannot be null");
- if (store == Field.Store.YES){
- this.isStored = true;
- this.isCompressed = false;
+ this.name = name.intern(); // field names are interned
+
+ fieldData = data;
+
+ if (index == Field.Index.NO && store == Field.Store.NO) {
+ throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
}
- else if (store == Field.Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
+ if (index == Field.Index.NO && termVector != Field.TermVector.NO) {
+ throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed");
}
- else if (store == Field.Store.NO){
- this.isStored = false;
- this.isCompressed = false;
- }
- else
- throw new IllegalArgumentException("unknown store parameter " + store);
- if (index == Field.Index.NO) {
- this.isIndexed = false;
- this.isTokenized = false;
- } else if (index == Field.Index.TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = true;
- } else if (index == Field.Index.UN_TOKENIZED) {
- this.isIndexed = true;
- this.isTokenized = false;
- } else if (index == Field.Index.NO_NORMS) {
- this.isIndexed = true;
- this.isTokenized = false;
- this.omitNorms = true;
- } else {
- throw new IllegalArgumentException("unknown index parameter " + index);
- }
-
- this.isBinary = false;
-
+ setStore(store);
+ setIndex(index);
setStoreTermVector(termVector);
}
@@ -155,6 +144,39 @@
}
}
+ protected void setIndex(Field.Index index) {
+ if (index == Field.Index.NO) {
+ isIndexed = false;
+ isTokenized = false;
+ } else if (index == Field.Index.TOKENIZED) {
+ isIndexed = true;
+ isTokenized = true;
+ } else if (index == Field.Index.UN_TOKENIZED) {
+ isIndexed = true;
+ isTokenized = false;
+ } else if (index == Field.Index.NO_NORMS) {
+ isIndexed = true;
+ isTokenized = false;
+ omitNorms = true;
+ } else {
+ throw new IllegalArgumentException("unknown index parameter " + index);
+ }
+ fieldData.setTokenized(isTokenized);
+ }
+
+ protected void setStore(Field.Store store) {
+ if (store == Field.Store.YES) {
+ isStored = true;
+ } else if (store == Field.Store.NO) {
+ if (isBinary()) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ isStored = false;
+ } else {
+ throw new IllegalArgumentException("unknown store parameter " + store);
+ }
+ }
+
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
@@ -169,8 +191,6 @@
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
- /** True if the value of the field is stored and compressed within the index */
- public final boolean isCompressed() { return isCompressed; }
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
@@ -198,7 +218,9 @@
}
/** True iff the value of the filed is stored as binary */
- public final boolean isBinary() { return isBinary; }
+ public final boolean isBinary() {
+ return fieldData.isBinary();
+ }
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
@@ -211,18 +233,14 @@
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
public boolean isLazy() {
- return lazy;
+ return fieldData.isLazy();
}
/** Prints a Field for human consumption. */
- public final String toString() {
+ public String toString() {
StringBuffer result = new StringBuffer();
if (isStored) {
result.append("stored");
- if (isCompressed)
- result.append("/compressed");
- else
- result.append("/uncompressed");
}
if (isIndexed) {
if (result.length() > 0)
@@ -249,7 +267,7 @@
result.append(",");
result.append("termVectorPosition");
}
- if (isBinary) {
+ if (isBinary()) {
if (result.length() > 0)
result.append(",");
result.append("binary");
@@ -257,18 +275,48 @@
if (omitNorms) {
result.append(",omitNorms");
}
- if (lazy){
+ if (isLazy()) {
result.append(",lazy");
}
result.append('<');
result.append(name);
result.append(':');
- if (fieldsData != null && lazy == false) {
- result.append(fieldsData);
+ if (fieldData != null && !isLazy()) {
+ result.append(fieldData);
}
result.append('>');
return result.toString();
}
+
+ /** The value of the field as a String, or null. If null, the Reader value
+ * or binary value is used. Exactly one of stringValue(), readerValue(), and
+ * binaryValue() must be set. */
+ public final String stringValue() {
+ return fieldData.stringValue();
+ }
+
+ /** The value of the field as a Reader, or null. If null, the String value
+ * or binary value is used. Exactly one of stringValue(), readerValue(),
+ * and binaryValue() must be set. */
+ public final Reader readerValue() {
+ return fieldData.readerValue();
+ }
+
+ /** The value of the field in Binary, or null. If null, the Reader or
+ * String value is used. Exactly one of stringValue(), readerValue() and
+ * binaryValue() must be set. */
+ public final byte[] binaryValue() {
+ return fieldData.binaryValue();
+ }
+
+ /**
+ *
+ * @return the data of the field
+ */
+ public FieldData getFieldData() {
+ return fieldData;
+ }
+
}
Index: src/java/org/apache/lucene/document/Fieldable.java
===================================================================
--- src/java/org/apache/lucene/document/Fieldable.java (révision 493446)
+++ src/java/org/apache/lucene/document/Fieldable.java (copie de travail)
@@ -19,6 +19,8 @@
import java.io.Reader;
import java.io.Serializable;
+import org.apache.lucene.index.FieldData;
+
/**
* Synonymous with {@link Field}.
*
@@ -90,7 +92,7 @@
boolean isTokenized();
/** True if the value of the field is stored and compressed within the index */
- boolean isCompressed();
+ //boolean isCompressed();
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
@@ -134,4 +136,10 @@
* @return true if this field can be loaded lazily
*/
boolean isLazy();
+
+ /**
+ *
+ * @return the data of the field
+ */
+ FieldData getFieldData();
}
Index: src/java/org/apache/lucene/document/Document.java
===================================================================
--- src/java/org/apache/lucene/document/Document.java (révision 493446)
+++ src/java/org/apache/lucene/document/Document.java (copie de travail)
@@ -37,8 +37,8 @@
* IndexReader#document(int)}.
*/
-public final class Document implements java.io.Serializable {
- List fields = new Vector();
+public class Document implements java.io.Serializable {
+ protected List fields = new Vector();
private float boost = 1.0f;
/** Constructs a new document with no fields. */