Index: src/test/org/apache/lucene/store/IndexInputTest.java =================================================================== --- src/test/org/apache/lucene/store/IndexInputTest.java (révision 0) +++ src/test/org/apache/lucene/store/IndexInputTest.java (révision 0) @@ -0,0 +1,104 @@ +package org.apache.lucene.store; + +import junit.framework.TestCase; + +public class IndexInputTest extends TestCase { + + public void testInt() throws Exception { + genericTestInt(0); + genericTestInt(1); + genericTestInt(-1); + genericTestInt(Integer.MAX_VALUE); + genericTestInt(Integer.MIN_VALUE); + } + + public void testVInt() throws Exception { + genericTestVInt(0); + genericTestVInt(1); + genericTestVInt(-1); + genericTestVInt(Integer.MAX_VALUE); + genericTestVInt(Integer.MIN_VALUE); + } + + public void testLong() throws Exception { + genericTestLong(0); + genericTestLong(1); + genericTestLong(-1); + genericTestLong(Long.MAX_VALUE); + genericTestLong(Long.MIN_VALUE); + } + + public void testVLong() throws Exception { + genericTestVLong(0); + genericTestVLong(1); + genericTestVLong(-1); + genericTestVLong(Long.MAX_VALUE); + genericTestVLong(Long.MIN_VALUE); + } + + public void testString() throws Exception { + genericTestString(""); + genericTestString("a"); + genericTestString("GiyNNKHhnivNKKHgcNiCniCH716534912é_è'-(é(_çà-é$*ù!:;,!:;,"); + } + + private void genericTestInt(int i) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeInt(i); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeInt(new RAMInputStream(fileA)); + outB.close(); + assertEquals(i, new RAMInputStream(fileB).readInt()); + } + + private void genericTestVInt(int i) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeVInt(i); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeVInt(new RAMInputStream(fileA)); + outB.close(); + assertEquals(i, new RAMInputStream(fileB).readVInt()); + } + + private void genericTestLong(long l) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeLong(l); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeLong(new RAMInputStream(fileA)); + outB.close(); + assertEquals(l, new RAMInputStream(fileB).readLong()); + } + + private void genericTestVLong(long l) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeVLong(l); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeVLong(new RAMInputStream(fileA)); + outB.close(); + assertEquals(l, new RAMInputStream(fileB).readVLong()); + } + + private void genericTestString(String s) throws Exception { + RAMFile fileA = new RAMFile(); + RAMFile fileB = new RAMFile(); + RAMOutputStream outA = new RAMOutputStream(fileA); + outA.writeString(s); + outA.close(); + RAMOutputStream outB = new RAMOutputStream(fileB); + outB.writeString(new RAMInputStream(fileA)); + outB.close(); + assertEquals(s, new RAMInputStream(fileB).readString()); + } +} Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldsReader.java (révision 493446) +++ src/test/org/apache/lucene/index/TestFieldsReader.java (copie de travail) @@ -59,7 +59,7 @@ public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); Document doc = reader.doc(0, null); @@ -89,7 +89,7 @@ public void testLazyFields() throws Exception { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); Set loadFieldNames = new HashSet(); @@ -137,7 +137,7 @@ public void testLoadFirst() throws Exception { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + FieldsReader reader = dir.getIndexFormat().getFieldsReader(dir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); @@ -183,7 +183,7 @@ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames); for (int i = 0; i < length; i++) { - reader = new FieldsReader(tmpDir, "test", fieldInfos); + reader = tmpDir.getIndexFormat().getFieldsReader(tmpDir, "test", fieldInfos); assertTrue(reader != null); assertTrue(reader.size() == 1); @@ -207,7 +207,7 @@ doc = null; //Hmmm, are we still in cache??? System.gc(); - reader = new FieldsReader(tmpDir, "test", fieldInfos); + reader = tmpDir.getIndexFormat().getFieldsReader(tmpDir, "test", fieldInfos); doc = reader.doc(0, fieldSelector); field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); assertTrue("field is not lazy", field.isLazy() == true); Index: src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfo.java (révision 493446) +++ src/java/org/apache/lucene/index/FieldInfo.java (copie de travail) @@ -17,7 +17,7 @@ * limitations under the License. */ -final class FieldInfo { +final public class FieldInfo { String name; boolean isIndexed; int number; @@ -39,4 +39,28 @@ this.storePositionWithTermVector = storePositionWithTermVector; this.omitNorms = omitNorms; } + + public String getName() { + return name; + } + + public boolean storeTermVector() { + return storeTermVector; + } + + public boolean storeOffsetWithTermVector() { + return storeOffsetWithTermVector; + } + + public boolean storePositionWithTermVector() { + return storePositionWithTermVector; + } + + public boolean omitNorms() { + return omitNorms; + } + + public boolean isIndexed() { + return isIndexed; + } } Index: src/java/org/apache/lucene/index/CompoundFileReader.java =================================================================== --- src/java/org/apache/lucene/index/CompoundFileReader.java (révision 493446) +++ src/java/org/apache/lucene/index/CompoundFileReader.java (copie de travail) @@ -55,6 +55,7 @@ throws IOException { directory = dir; + indexFormat = dir.getIndexFormat(); fileName = name; boolean success = false; Index: src/java/org/apache/lucene/index/DefaultFieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultFieldsWriter.java (révision 0) @@ -0,0 +1,152 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.zip.Deflater; + +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; + +/** + * The default implementation of FieldsWriter + * + * $Id$ + */ +public class DefaultFieldsWriter extends FieldsWriter { + + protected DefaultFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + } + + /** + * There no data stored at the document level + */ + protected void writeDocumentData(IndexOutput out, Document doc) throws IOException { + //nothing to write + } + + /** + * If a the field to write has been load lazily, it does a direct copy from the + * source to the output. + */ + protected void writeFieldData(FieldData fieldData, IndexOutput out) throws IOException { + if (fieldData.isLazy() && isBinaryCompatible(fieldData)) { + fieldData.writeFromLazyLoading(out); + } else { + byte bits = 0; + if (fieldData.isTokenized()) + bits |= DefaultFieldData.FIELD_IS_TOKENIZED; + if (fieldData.isBinary()) + bits |= DefaultFieldData.FIELD_IS_BINARY; + if (fieldData instanceof DefaultFieldData && ((DefaultFieldData) fieldData).isCompressed()) { + bits |= DefaultFieldData.FIELD_IS_COMPRESSED; + } + + out.writeByte(bits); + + if (fieldData instanceof DefaultFieldData && ((DefaultFieldData) fieldData).isCompressed()) { + // compression is enabled for the current field + byte[] bdata = null; + // check if it is a binary field + if (fieldData.isBinary()) { + bdata = compress(fieldData.binaryValue()); + } else { + bdata = compress(fieldData.stringValue().getBytes("UTF-8")); + } + final int len = bdata.length; + out.writeVInt(len); + out.writeBytes(bdata, len); + } else { + // compression is disabled for the current field + if (fieldData.isBinary()) { + byte[] bdata = fieldData.binaryValue(); + final int len = bdata.length; + out.writeVInt(len); + out.writeBytes(bdata, len); + } else { + out.writeString(fieldData.stringValue()); + } + } + } + } + + /** + * Test if the specified field is binary compatible with the current format, so + * it allow us to do a direct copy from the lazy loaded field into an index + * + * @param field the field to test + * @return true if it is compatible + */ + protected boolean isBinaryCompatible(FieldData field) { + return field instanceof DefaultFieldData; + } + + /** + * To be overriden by subclasses to choose a different level of compression + * + * @return the compression level + */ + protected int getCompressionLevel() { + return Deflater.BEST_COMPRESSION; + } + + /** + * Do the compression of data + * + * To be overiden by subclasses to use a different format of compression. If overriden, you + * probably should also override isBinaryCompatible and and decompress function of + * DefaultFieldsReader. + * + * @param input the data to compress + * @return the compressed data + */ + protected byte[] compress(byte[] input) { + + // Create the compressor with highest level of compression + Deflater compressor = new Deflater(); + compressor.setLevel(getCompressionLevel()); + + // Give the compressor the data to compress + compressor.setInput(input); + compressor.finish(); + + /* + * Create an expandable byte array to hold the compressed data. + * You cannot use an array that's the same size as the orginal because + * there is no guarantee that the compressed data will be smaller than + * the uncompressed data. + */ + ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); + + // Compress the data + byte[] buf = new byte[1024]; + while (!compressor.finished()) { + int count = compressor.deflate(buf); + bos.write(buf, 0, count); + } + + compressor.end(); + + // Get the compressed data + return bos.toByteArray(); + } + +} Index: src/java/org/apache/lucene/index/FieldData.java =================================================================== --- src/java/org/apache/lucene/index/FieldData.java (révision 0) +++ src/java/org/apache/lucene/index/FieldData.java (révision 0) @@ -0,0 +1,255 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; + +import org.apache.lucene.document.Document; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +/** + * The data of field + */ +public abstract class FieldData implements Serializable { + + private boolean isBinary = false; + + private boolean isTokenized = false; + + // the one and only data object for all different kind of field values + private Object fieldData = null; + + private boolean isLazy = false; + + private IndexInput fieldsStream; + + private long pointer; + + private long toRead; + + /** + * This contructor should only be used when retreiving data form an index + * + */ + protected FieldData() { + // nothing to initialized yet. The properties should be be via #setLazyData() and then + // by #readLazyData() + } + + protected FieldData(String text) { + this((Object) text); + isBinary = false; + } + + protected FieldData(byte[] data) { + this((Object) data); + isBinary = true; + } + + protected FieldData(Reader reader) { + this((Object) reader); + isBinary = false; + } + + private FieldData(Object data) { + if (data == null) + throw new NullPointerException("data cannot be null"); + + fieldData = data; + } + + /** + * @return true iff the value of the filed is stored as binary + */ + public final boolean isBinary() { + return isBinary; + } + + protected final void setBinary(boolean isBinary) { + this.isBinary = isBinary; + } + + /** + * @return true iff the value of the filed is stored as binary + */ + public final boolean isTokenized() { + return isTokenized; + } + + /** + * Set the tokenization status of the field data + * + * @param isTokenized + */ + public final void setTokenized(boolean isTokenized) { + this.isTokenized = isTokenized; + } + + /** + * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that + * retrieved the {@link Document} is still open. + * + * @return true if this field can be loaded lazily + */ + public boolean isLazy() { + return isLazy; + } + + /** + * The value of the field as a {@link String}, or null. If null, + * the {@link Reader} value or binary value is used. Exactly one of {@link #stringValue()}, + * {@link #readerValue()}, and {@link #binaryValue()} must be set. + * + * @return the string value fo the field + */ + public final String stringValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof String ? (String) fieldData : null; + } + + /** + * The value of the field as a {@link Reader}, or null. If null, + * the {@link String} value or binary value is used. Exactly one of {@link #stringValue()}, + * {@link #readerValue()}, and {@link #binaryValue()} must be set. + * + * @return the reader value + */ + public final Reader readerValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof Reader ? (Reader) fieldData : null; + } + + /** + * The value of the field in Binary, or null. If null, the + * {@link Reader} or {@link String} value is used. Exactly one of {@link #stringValue()}, + * {@link #readerValue()} and {@link #binaryValue()} must be set. + * + * @return the binary value + */ + public final byte[] binaryValue() { + if (isLazy && fieldData == null) { + readLazyData(); + } + return fieldData instanceof byte[] ? (byte[]) fieldData : null; + } + + /** + * + * @param fieldData the new data of the field + */ + protected void setData(Object fieldData) { + this.fieldData = fieldData; + } + + /** + * + * @return the data of the field + */ + protected Object getData() { + return fieldData; + } + + /** + * Load the field data from the stream + * + * @param in the stream to read + * @param skip if the data have to be stored, or just skipped from the stream + * @throws IOException + */ + public abstract void readStream(IndexInput in, boolean skip) throws IOException; + + private final void readLazyData() { + try { + fieldsStream.seek(pointer); + readStream(fieldsStream, false); + } catch (IOException e) { + throw new FieldReaderException(e); + } finally { + try { + fieldsStream.close(); + } catch (IOException e) { + throw new FieldReaderException(e); + } finally { + fieldsStream = null; + } + } + } + + /** + * Set this field as lazy loaded, and save the stream status + * + * @param fieldsStream the field stream + * @param pointer the pointer of the field data + * @param toRead the number of byte of the field data + */ + final void setLazyData(IndexInput fieldsStream, long pointer, long toRead) { + isLazy = true; + this.fieldsStream = fieldsStream; + this.pointer = pointer; + this.toRead = toRead; + } + + /** + * If the data was loaded lazily, close the kept opened stream. This should be used + * + * @throws IOException + */ + public void close() throws IOException { + if (fieldsStream != null) { + fieldsStream.close(); + } + } + + /** + * Write the lazy loaded field data directly in the specified output stream. + * If the field has not been loaded lazily, it throws an UnsupportedOperationException. + * + * @param out the stream to write in + * @throws IOException in case of write error + */ + final void writeFromLazyLoading(IndexOutput out) throws IOException { + if (!isLazy) { + throw new UnsupportedOperationException("The field have to be load lazily to copy it directly"); + } + try { + fieldsStream.seek(pointer); + out.writeBytes(fieldsStream, toRead); + } finally { + try { + fieldsStream.close(); + } finally { + fieldsStream = null; + } + } + } + + public String toString() { + if (isLazy) { + return null; + } + return stringValue(); + } +} Index: src/java/org/apache/lucene/index/IndexFormat.java =================================================================== --- src/java/org/apache/lucene/index/IndexFormat.java (révision 0) +++ src/java/org/apache/lucene/index/IndexFormat.java (révision 0) @@ -0,0 +1,88 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.store.Directory; + +/** + * Specify the format of index. + * + * The implementation of the {@link FieldsReader} and {@link FieldsWriter} returned by + * the function getFieldsReader and getFieldsWriter will specify how the data of fields are + * serialized, and also the kind of {@link Fieldable} used. + * + * $Id$ + */ +public interface IndexFormat { + + /** + * This array contains all filename extensions used by Lucene's index files, with + * one exception, namely the extension made up from .f + a number. + * Also note that two of Lucene's files (deletable and + * segments) don't have any filename extension. + * + * @return a List of String + */ + List getIndexExtensions(); + + /** + * File extensions of old-style index files + * + * @return a List of String + */ + List getCompoundExtensions(); + + /** + * File extensions for term vector support + * + * @return a List of String + */ + List getVectorExtensions(); + + /** + * Return an implemetation of FieldsReader for this format + * + * @param d the directory to use + * @param segment the segment name + * @param fn the infos on fields + * @return the implemetation of FieldsReader + * @throws IOException + */ + FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException; + + /** + * Return an implemetation of FieldsWriter for this format + * + * @param d the directory to use + * @param segment the segment name + * @param fn the infos on fields + * @return the implemetation of FieldsWriter + * @throws IOException + */ + FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException; + + /** + * + * @return the index file name filter associated to this index format + */ + IndexFileNameFilter getIndexFileNameFilter(); +} Index: src/java/org/apache/lucene/index/DefaultFieldData.java =================================================================== --- src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultFieldData.java (révision 0) @@ -0,0 +1,155 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.Reader; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +import org.apache.lucene.store.IndexInput; + +/** + * The default impelmentation of {@link FieldData} + * + */ +public class DefaultFieldData extends FieldData { + + DefaultFieldData() { + super(); + } + + /** + * Contructor for string data + * + * @param text the string data + */ + public DefaultFieldData(String text) { + super(text); + } + + /** + * Contructor for blob data + * + * @param data the blob data + */ + public DefaultFieldData(byte[] data) { + super(data); + } + + /** + * Contructor for data got from a reader + * + * @param reader the data's reader + */ + public DefaultFieldData(Reader reader) { + super(reader); + } + + private boolean isCompressed; + + /** + * @return true if the value of the field is stored and compressed within the index + */ + public final boolean isCompressed() { + return isCompressed; + } + + /** + * Set the compress status of the data + * + * @param isCompressed + */ + public void setCompressed(boolean isCompressed) { + this.isCompressed = isCompressed; + } + + public static final byte FIELD_IS_TOKENIZED = 0x1; + + public static final byte FIELD_IS_BINARY = 0x2; + + public static final byte FIELD_IS_COMPRESSED = 0x4; + + public void readStream(IndexInput in, boolean skip) throws IOException { + byte bits = in.readByte(); + isCompressed = (bits & FIELD_IS_COMPRESSED) != 0; + setTokenized((bits & FIELD_IS_TOKENIZED) != 0); + setBinary((bits & FIELD_IS_BINARY) != 0); + + if (skip) { + int toRead = in.readVInt(); + if (isBinary() || isCompressed()) { + long pointer = in.getFilePointer(); + //Need to move the pointer ahead by toRead positions + in.seek(pointer + toRead); + } else { + //Skip ahead of where we are by the length of what is stored + in.skipChars(toRead); + } + } else { + if (isBinary()) { + int toRead = in.readVInt(); + final byte[] b = new byte[toRead]; + in.readBytes(b, 0, b.length); + if (isCompressed()) { + setData(uncompress(b)); + } else { + setData(b); + } + } else { + if (isCompressed()) { + int toRead = in.readVInt(); + final byte[] b = new byte[toRead]; + in.readBytes(b, 0, b.length); + setData(new String(uncompress(b), "UTF-8")); + } else { + setData(in.readString()); // read value + } + } + } + } + + protected byte[] uncompress(final byte[] input) throws IOException { + + Inflater decompressor = new Inflater(); + decompressor.setInput(input); + + // Create an expandable byte array to hold the decompressed data + ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); + + // Decompress the data + byte[] buf = new byte[1024]; + while (!decompressor.finished()) { + try { + int count = decompressor.inflate(buf); + bos.write(buf, 0, count); + } catch (DataFormatException e) { + // this will happen if the field is not compressed + IOException newException = new IOException("field data are in wrong format: " + e.toString()); + newException.initCause(e); + throw newException; + } + } + + decompressor.end(); + + // Get the decompressed data + return bos.toByteArray(); + } +} Index: src/java/org/apache/lucene/index/DefaultIndexFormat.java =================================================================== --- src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultIndexFormat.java (révision 0) @@ -0,0 +1,96 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.store.Directory; + +/** + * The default implementation of the index format + * + * $Id$ + */ +public class DefaultIndexFormat implements IndexFormat { + + protected static final List INDEX_EXTENSIONS = Arrays.asList(new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "gen" }); + + protected static final List COMPOUND_EXTENSIONS = Arrays.asList(new String[] { "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" }); + + protected static final List VECTOR_EXTENSIONS = Arrays.asList(new String[] { "tvx", "tvd", "tvf" }); + + private ArrayList indexExtensions; + + private ArrayList compoundExtensions; + + private IndexFileNameFilter indexFileNameFilter; + + /** + * Contructor + * + */ + public DefaultIndexFormat() { + indexExtensions = new ArrayList(INDEX_EXTENSIONS); + indexExtensions.addAll(getVectorExtensions()); + indexExtensions.addAll(getAdditionnalExtensions()); + + compoundExtensions = new ArrayList(COMPOUND_EXTENSIONS); + compoundExtensions.addAll(getAdditionnalExtensions()); + } + + protected List getAdditionnalExtensions() { + return Collections.EMPTY_LIST; + } + + public List getIndexExtensions() { + return indexExtensions; + } + + public List getCompoundExtensions() { + return compoundExtensions; + } + + public List getVectorExtensions() { + return VECTOR_EXTENSIONS; + } + + /** + * Use the default implementation of FieldsReader : DefaultFieldsReader + */ + public FieldsReader getFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + return new DefaultFieldsReader(d, segment, fn); + } + + /** + * Use the default implementation of FieldsWriter : DefaultFieldsWriter + */ + public FieldsWriter getFieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + return new DefaultFieldsWriter(d, segment, fn); + } + + public IndexFileNameFilter getIndexFileNameFilter() { + if (indexFileNameFilter == null) { + indexFileNameFilter = new IndexFileNameFilter(this); + } + return indexFileNameFilter; + } +} Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (révision 493446) +++ src/java/org/apache/lucene/index/FieldsReader.java (copie de travail) @@ -17,15 +17,9 @@ * limitations under the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.Reader; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; @@ -39,7 +33,7 @@ * * @version $Id$ */ -final class FieldsReader { +public abstract class FieldsReader { private final FieldInfos fieldInfos; // The main fieldStream, used only for cloning. @@ -51,10 +45,9 @@ private final IndexInput indexStream; private int size; - private ThreadLocal fieldsStreamTL = new ThreadLocal(); - FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + protected FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; cloneableFieldsStream = d.openInput(segment + ".fdt"); @@ -69,7 +62,7 @@ * * @throws IOException */ - final void close() throws IOException { + protected void close() throws IOException { fieldsStream.close(); cloneableFieldsStream.close(); indexStream.close(); @@ -89,349 +82,44 @@ long position = indexStream.readLong(); fieldsStream.seek(position); - Document doc = new Document(); + Document doc = createDocument(fieldsStream); + int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); - boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true; - - byte bits = fieldsStream.readByte(); - boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; - boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; - if (acceptField.equals(FieldSelectorResult.LOAD) == true) { - addField(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) { - addFieldForMerge(doc, fi, binary, compressed, tokenize); - } - else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){ - addField(doc, fi, binary, compressed, tokenize); - break;//Get out of this loop - } - else if (lazy == true){ - addFieldLazy(doc, fi, binary, compressed, tokenize); - } - else { - skipField(binary, compressed); - } - } - return doc; - } + FieldData fieldData = createFieldData(fi); - /** - * Skip the field. We still have to read some of the information about the field, but can skip past the actual content. - * This will have the most payoff on large fields. - */ - private void skipField(boolean binary, boolean compressed) throws IOException { - - int toRead = fieldsStream.readVInt(); - - if (binary || compressed) { + boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD); + boolean skip = acceptField.equals(FieldSelectorResult.NO_LOAD); + long pointer = fieldsStream.getFilePointer(); - fieldsStream.seek(pointer + toRead); - } else { - //We need to skip chars. This will slow us down, but still better - fieldsStream.skipChars(toRead); - } - } - private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - if (binary == true) { - int toRead = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - if (compressed) { - //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); - doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); - } else { - //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); - doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); - } - //Need to move the pointer ahead by toRead positions - fieldsStream.seek(pointer + toRead); - } else { - Field.Store store = Field.Store.YES; - Field.Index index = getIndexType(fi, tokenize); - Field.TermVector termVector = getTermVectorType(fi); + fieldData.readStream(fieldsStream, skip || lazy); - Fieldable f; - if (compressed) { - store = Field.Store.COMPRESS; - int toRead = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - f = new LazyField(fi.name, store, toRead, pointer); - //skip over the part that we aren't loading - fieldsStream.seek(pointer + toRead); - f.setOmitNorms(fi.omitNorms); - } else { - int length = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - //Skip ahead of where we are by the length of what is stored - fieldsStream.skipChars(length); - f = new LazyField(fi.name, store, index, termVector, length, pointer); - f.setOmitNorms(fi.omitNorms); + if (lazy) { + fieldData.setLazyData((IndexInput) fieldsStream.clone(), pointer, fieldsStream.getFilePointer() - pointer); } - doc.add(f); - } - } + Fieldable field = createFieldable(fi, fieldData); - // in merge mode we don't uncompress the data of a compressed field - private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - Object data; - - if (binary || compressed) { - int toRead = fieldsStream.readVInt(); - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - data = b; - } else { - data = fieldsStream.readString(); - } - - doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); - } - - private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { - - //we have a binary stored field, and it may be compressed - if (binary) { - int toRead = fieldsStream.readVInt(); - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - if (compressed) - doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); - else - doc.add(new Field(fi.name, b, Field.Store.YES)); - - } else { - Field.Store store = Field.Store.YES; - Field.Index index = getIndexType(fi, tokenize); - Field.TermVector termVector = getTermVectorType(fi); - - Fieldable f; - if (compressed) { - store = Field.Store.COMPRESS; - int toRead = fieldsStream.readVInt(); - - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - f = new Field(fi.name, // field name - new String(uncompress(b), "UTF-8"), // uncompress the value and add as string - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); - } else { - f = new Field(fi.name, // name - fieldsStream.readString(), // read value - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); + if (!skip) { + doc.add(field); } - doc.add(f); - } - } - - private Field.TermVector getTermVectorType(FieldInfo fi) { - Field.TermVector termVector = null; - if (fi.storeTermVector) { - if (fi.storeOffsetWithTermVector) { - if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; - } else { - termVector = Field.TermVector.WITH_OFFSETS; - } - } else if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS; - } else { - termVector = Field.TermVector.YES; + if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)) { + break; } - } else { - termVector = Field.TermVector.NO; } - return termVector; - } - private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { - Field.Index index; - if (fi.isIndexed && tokenize) - index = Field.Index.TOKENIZED; - else if (fi.isIndexed && !tokenize) - index = Field.Index.UN_TOKENIZED; - else - index = Field.Index.NO; - return index; + return doc; } - /** - * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is - * loaded. - */ - private class LazyField extends AbstractField implements Fieldable { - private int toRead; - private long pointer; + protected abstract Document createDocument(IndexInput in); - public LazyField(String name, Field.Store store, int toRead, long pointer) { - super(name, store, Field.Index.NO, Field.TermVector.NO); - this.toRead = toRead; - this.pointer = pointer; - lazy = true; - } + protected abstract FieldData createFieldData(FieldInfo fi); - public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) { - super(name, store, index, termVector); - this.toRead = toRead; - this.pointer = pointer; - lazy = true; - } + protected abstract Fieldable createFieldable(FieldInfo fi, FieldData fieldData); - private IndexInput getFieldStream() { - IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); - if (localFieldsStream == null) { - localFieldsStream = (IndexInput) cloneableFieldsStream.clone(); - fieldsStreamTL.set(localFieldsStream); - } - return localFieldsStream; - } - - /** - * The value of the field in Binary, or null. If null, the Reader or - * String value is used. Exactly one of stringValue(), readerValue() and - * binaryValue() must be set. - */ - public byte[] binaryValue() { - if (fieldsData == null) { - final byte[] b = new byte[toRead]; - IndexInput localFieldsStream = getFieldStream(); - //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people - //since they are already handling this exception when getting the document - try { - localFieldsStream.seek(pointer); - localFieldsStream.readBytes(b, 0, b.length); - if (isCompressed == true) { - fieldsData = uncompress(b); - } else { - fieldsData = b; - } - } catch (IOException e) { - throw new FieldReaderException(e); - } - } - return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; - } - - /** - * The value of the field as a Reader, or null. If null, the String value - * or binary value is used. Exactly one of stringValue(), readerValue(), - * and binaryValue() must be set. - */ - public Reader readerValue() { - return fieldsData instanceof Reader ? (Reader) fieldsData : null; - } - - /** - * The value of the field as a String, or null. If null, the Reader value - * or binary value is used. Exactly one of stringValue(), readerValue(), and - * binaryValue() must be set. - */ - public String stringValue() { - if (fieldsData == null) { - IndexInput localFieldsStream = getFieldStream(); - try { - localFieldsStream.seek(pointer); - //read in chars b/c we already know the length we need to read - char[] chars = new char[toRead]; - localFieldsStream.readChars(chars, 0, toRead); - fieldsData = new String(chars); - } catch (IOException e) { - throw new FieldReaderException(e); - } - } - return fieldsData instanceof String ? (String) fieldsData : null; - } - - public long getPointer() { - return pointer; - } - - public void setPointer(long pointer) { - this.pointer = pointer; - } - - public int getToRead() { - return toRead; - } - - public void setToRead(int toRead) { - this.toRead = toRead; - } - } - - private final byte[] uncompress(final byte[] input) - throws IOException { - - Inflater decompressor = new Inflater(); - decompressor.setInput(input); - - // Create an expandable byte array to hold the decompressed data - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - // Decompress the data - byte[] buf = new byte[1024]; - while (!decompressor.finished()) { - try { - int count = decompressor.inflate(buf); - bos.write(buf, 0, count); - } - catch (DataFormatException e) { - // this will happen if the field is not compressed - IOException newException = new IOException("field data are in wrong format: " + e.toString()); - newException.initCause(e); - throw newException; - } - } - - decompressor.end(); - - // Get the decompressed data - return bos.toByteArray(); - } - - // Instances of this class hold field properties and data - // for merge - final static class FieldForMerge extends AbstractField { - public String stringValue() { - return (String) this.fieldsData; - } - - public Reader readerValue() { - // not needed for merge - return null; - } - - public byte[] binaryValue() { - return (byte[]) this.fieldsData; - } - - public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) { - this.isStored = true; - this.fieldsData = value; - this.isCompressed = compressed; - this.isBinary = binary; - this.isTokenized = tokenize; - - this.name = fi.name.intern(); - this.isIndexed = fi.isIndexed; - this.omitNorms = fi.omitNorms; - this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector; - this.storePositionWithTermVector = fi.storePositionWithTermVector; - this.storeTermVector = fi.storeTermVector; - } - - } } Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (révision 493446) +++ src/java/org/apache/lucene/index/IndexReader.java (copie de travail) @@ -19,6 +19,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.search.Filter; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -30,6 +31,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.List; /** IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, @@ -414,7 +416,7 @@ /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link - * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, + * FieldData#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, * int) length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (révision 493446) +++ src/java/org/apache/lucene/index/IndexFileNames.java (copie de travail) @@ -34,28 +34,6 @@ /** Name of the index deletable file (only used in * pre-lockless indices) */ static final String DELETABLE = "deletable"; - - /** - * This array contains all filename extensions used by - * Lucene's index files, with two exceptions, namely the - * extension made up from .f + a number and - * from .s + a number. Also note that - * Lucene's segments_N files do not have any - * filename extension. - */ - static final String INDEX_EXTENSIONS[] = new String[] { - "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", - "tvx", "tvd", "tvf", "tvp", "gen"}; - - /** File extensions of old-style index files */ - static final String COMPOUND_EXTENSIONS[] = new String[] { - "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis" - }; - - /** File extensions for term vector support */ - static final String VECTOR_EXTENSIONS[] = new String[] { - "tvx", "tvd", "tvf" - }; /** * Computes the full file name from base, extension and Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (révision 493446) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (copie de travail) @@ -19,6 +19,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.store.IndexOutput; import java.io.IOException; Index: src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FieldsWriter.java (révision 493446) +++ src/java/org/apache/lucene/index/FieldsWriter.java (copie de travail) @@ -16,35 +16,29 @@ * the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; -import java.util.zip.Deflater; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; -final class FieldsWriter -{ - static final byte FIELD_IS_TOKENIZED = 0x1; - static final byte FIELD_IS_BINARY = 0x2; - static final byte FIELD_IS_COMPRESSED = 0x4; - +public abstract class FieldsWriter { + private FieldInfos fieldInfos; private IndexOutput fieldsStream; private IndexOutput indexStream; - FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + protected FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; fieldsStream = d.createOutput(segment + ".fdt"); indexStream = d.createOutput(segment + ".fdx"); } - final void close() throws IOException { + protected void close() throws IOException { fieldsStream.close(); indexStream.close(); } @@ -52,100 +46,29 @@ final void addDocument(Document doc) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); + writeDocumentData(fieldsStream, doc); + int storedCount = 0; - Iterator fieldIterator = doc.getFields().iterator(); - while (fieldIterator.hasNext()) { - Fieldable field = (Fieldable) fieldIterator.next(); + Iterator fields = doc.getFields().iterator(); + while (fields.hasNext()) { + Fieldable field = (Fieldable) fields.next(); if (field.isStored()) storedCount++; } fieldsStream.writeVInt(storedCount); - fieldIterator = doc.getFields().iterator(); - while (fieldIterator.hasNext()) { - Fieldable field = (Fieldable) fieldIterator.next(); - // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode - // and field.binaryValue() already returns the compressed value for a field - // with isCompressed()==true, so we disable compression in that case - boolean disableCompression = (field instanceof FieldsReader.FieldForMerge); + fields = doc.getFields().iterator(); + while (fields.hasNext()) { + Fieldable field = (Fieldable) fields.next(); if (field.isStored()) { fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name())); - - byte bits = 0; - if (field.isTokenized()) - bits |= FieldsWriter.FIELD_IS_TOKENIZED; - if (field.isBinary()) - bits |= FieldsWriter.FIELD_IS_BINARY; - if (field.isCompressed()) - bits |= FieldsWriter.FIELD_IS_COMPRESSED; - - fieldsStream.writeByte(bits); - - if (field.isCompressed()) { - // compression is enabled for the current field - byte[] data = null; - - if (disableCompression) { - // optimized case for merging, the data - // is already compressed - data = field.binaryValue(); - } else { - // check if it is a binary field - if (field.isBinary()) { - data = compress(field.binaryValue()); - } - else { - data = compress(field.stringValue().getBytes("UTF-8")); - } - } - final int len = data.length; - fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); - } - else { - // compression is disabled for the current field - if (field.isBinary()) { - byte[] data = field.binaryValue(); - final int len = data.length; - fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, len); - } - else { - fieldsStream.writeString(field.stringValue()); - } - } + writeFieldData(field.getFieldData(), fieldsStream); } } } - private final byte[] compress (byte[] input) { + abstract protected void writeDocumentData(IndexOutput out, Document doc) throws IOException; - // Create the compressor with highest level of compression - Deflater compressor = new Deflater(); - compressor.setLevel(Deflater.BEST_COMPRESSION); + abstract protected void writeFieldData(FieldData fieldData, IndexOutput out) throws IOException; - // Give the compressor the data to compress - compressor.setInput(input); - compressor.finish(); - - /* - * Create an expandable byte array to hold the compressed data. - * You cannot use an array that's the same size as the orginal because - * there is no guarantee that the compressed data will be smaller than - * the uncompressed data. - */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - // Compress the data - byte[] buf = new byte[1024]; - while (!compressor.finished()) { - int count = compressor.deflate(buf); - bos.write(buf, 0, count); - } - - compressor.end(); - - // Get the compressed data - return bos.toByteArray(); - } } Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (révision 493448) +++ src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.util.List; import java.util.Vector; import java.util.Iterator; import java.util.Collection; @@ -47,7 +48,7 @@ private Vector readers = new Vector(); private FieldInfos fieldInfos; - /** This ctor used only by test code. + /** This constructor is used only by test code. * * @param dir The Directory to merge the other segments into * @param name The name of the new segment @@ -115,12 +116,13 @@ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName); - Vector files = - new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size()); - + List compoundExtensions = directory.getIndexFormat().getCompoundExtensions(); + + Vector files = new Vector(compoundExtensions.size() + fieldInfos.size()); + // Basic files - for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { - files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); + for (int i = 0; i < compoundExtensions.size(); i++) { + files.add(segment + "." + compoundExtensions.get(i)); } // Fieldable norm files @@ -131,10 +133,12 @@ } } + List vectorExtensions = directory.getIndexFormat().getVectorExtensions(); + // Vector files if (fieldInfos.hasVectors()) { - for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) { - files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); + for (int i = 0; i < vectorExtensions.size(); i++) { + files.add(segment + "." + vectorExtensions.get(i)); } } @@ -178,14 +182,13 @@ } fieldInfos.write(directory, segment + ".fnm"); - FieldsWriter fieldsWriter = // merge field values - new FieldsWriter(directory, segment, fieldInfos); - + FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos); + // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new FieldSelector() { public FieldSelectorResult accept(String fieldName) { - return FieldSelectorResult.LOAD_FOR_MERGE; + return FieldSelectorResult.LAZY_LOAD; } }; Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (révision 493448) +++ src/java/org/apache/lucene/index/IndexWriter.java (copie de travail) @@ -106,7 +106,7 @@ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory - private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs + private RAMDirectory ramDirectory; // for temp segs private IndexFileDeleter deleter; private Lock writeLock; @@ -323,6 +323,7 @@ private void init(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException { + ramDirectory = new RAMDirectory(d.getIndexFormat()); this.closeDir = closeDir; directory = d; analyzer = a; Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (révision 493446) +++ src/java/org/apache/lucene/index/DocumentWriter.java (copie de travail) @@ -75,8 +75,7 @@ fieldInfos.write(directory, segment + ".fnm"); // write field values - FieldsWriter fieldsWriter = - new FieldsWriter(directory, segment, fieldInfos); + FieldsWriter fieldsWriter = directory.getIndexFormat().getFieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.addDocument(doc); } finally { Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (révision 493446) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (copie de travail) @@ -81,7 +81,7 @@ // not referenced by the current segments info: String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); + IndexFileNameFilter filter = directory.getIndexFormat().getIndexFileNameFilter(); String[] files = directory.list(); Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNameFilter.java (révision 493446) +++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (copie de travail) @@ -29,14 +29,17 @@ */ public class IndexFileNameFilter implements FilenameFilter { - static IndexFileNameFilter singleton = new IndexFileNameFilter(); private HashSet extensions; - public IndexFileNameFilter() { + /** + * Contructor + * + * @param indexFormat the format of the index + */ + public IndexFileNameFilter(IndexFormat indexFormat) { extensions = new HashSet(); - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { - extensions.add(IndexFileNames.INDEX_EXTENSIONS[i]); - } + extensions.addAll(indexFormat.getIndexExtensions()); + extensions.addAll(indexFormat.getVectorExtensions()); } /* (non-Javadoc) @@ -85,8 +88,4 @@ } return false; } - - public static IndexFileNameFilter getFilter() { - return singleton; - } } Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (révision 493446) +++ src/java/org/apache/lucene/index/SegmentReader.java (copie de travail) @@ -155,8 +155,9 @@ // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); - fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); + fieldsReader = cfsDir.getIndexFormat().getFieldsReader(cfsDir, segment, fieldInfos); + tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs @@ -290,8 +291,9 @@ files.addElement(name); } } else { - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { - String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; + List indexExtension = directory().getIndexFormat().getIndexExtensions(); + for (int i = 0; i < indexExtension.size(); i++) { + String name = segment + "." + indexExtension.get(i); if (directory().fileExists(name)) files.addElement(name); } Index: src/java/org/apache/lucene/index/DefaultFieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0) +++ src/java/org/apache/lucene/index/DefaultFieldsReader.java (révision 0) @@ -0,0 +1,53 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; + +/** + * The default implementation of a {@link FieldsReader} + * + * $Id$ + */ +public class DefaultFieldsReader extends FieldsReader { + + protected DefaultFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + super(d, segment, fn); + } + + /** + * There is no information stored at the document level + */ + protected Document createDocument(IndexInput fieldsStream) { + return new Document(); + } + + protected FieldData createFieldData(FieldInfo fi) { + return new DefaultFieldData(); + } + + protected Fieldable createFieldable(FieldInfo fi, FieldData fieldData) { + return new Field(fi, (DefaultFieldData) fieldData); + } +} Index: src/java/org/apache/lucene/store/Directory.java =================================================================== --- src/java/org/apache/lucene/store/Directory.java (révision 493446) +++ src/java/org/apache/lucene/store/Directory.java (copie de travail) @@ -19,6 +19,9 @@ import java.io.IOException; +import org.apache.lucene.index.DefaultIndexFormat; +import org.apache.lucene.index.IndexFormat; + /** A Directory is a flat list of files. Files may be written once, when they * are created. Once a file is created it may only be opened for read, or * deleted. Random access is permitted both when reading and writing. @@ -42,6 +45,12 @@ * this Directory instance). */ protected LockFactory lockFactory; + protected IndexFormat indexFormat = new DefaultIndexFormat(); + + public IndexFormat getIndexFormat() { + return indexFormat; + } + /** Returns an array of strings, one for each file in the directory. */ public abstract String[] list() throws IOException; @@ -124,4 +133,5 @@ public String getLockID() { return this.toString(); } + } Index: src/java/org/apache/lucene/store/RAMDirectory.java =================================================================== --- src/java/org/apache/lucene/store/RAMDirectory.java (révision 493446) +++ src/java/org/apache/lucene/store/RAMDirectory.java (copie de travail) @@ -27,6 +27,12 @@ import java.util.Iterator; import java.util.Set; +import org.apache.lucene.index.DefaultIndexFormat; +import org.apache.lucene.index.IndexFormat; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + /** * A memory-resident {@link Directory} implementation. Locking * implementation is by default the {@link SingleInstanceLockFactory} @@ -47,8 +53,21 @@ // Lock acquisition sequence: RAMDirectory, then RAMFile // ***** - /** Constructs an empty {@link Directory}. */ + /** + * Constructs an empty {@link Directory}. + * The index format used the the default one + */ public RAMDirectory() { + this(new DefaultIndexFormat()); + } + + /** + * Contructor + * + * @param indexFormat the format of the index + */ + public RAMDirectory(IndexFormat indexFormat) { + this.indexFormat = indexFormat; setLockFactory(new SingleInstanceLockFactory()); } @@ -69,11 +88,11 @@ * @exception IOException if an error occurs */ public RAMDirectory(Directory dir) throws IOException { - this(dir, false); + this(dir, false, new DefaultIndexFormat()); } - - private RAMDirectory(Directory dir, boolean closeDir) throws IOException { - this(); + + private RAMDirectory(Directory dir, boolean closeDir, IndexFormat indexFormat) throws IOException { + this(indexFormat); final String[] files = dir.list(); byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; for (int i = 0; i < files.length; i++) { @@ -107,7 +126,7 @@ * @see #RAMDirectory(Directory) */ public RAMDirectory(File dir) throws IOException { - this(FSDirectory.getDirectory(dir, false), true); + this(FSDirectory.getDirectory(dir, false), true, new DefaultIndexFormat()); } /** @@ -118,7 +137,7 @@ * @see #RAMDirectory(Directory) */ public RAMDirectory(String dir) throws IOException { - this(FSDirectory.getDirectory(dir, false), true); + this(FSDirectory.getDirectory(dir, false), true, new DefaultIndexFormat()); } /** Returns an array of strings, one for each file in the directory. */ @@ -255,5 +274,21 @@ fileNames = null; files = null; } + + /** + * For debug purpose, list every files name of this directory. + * The code was commented because the lockID is based on the toString() function + */ +// public String toString() { +// String[] f = list(); +// StringBuffer buffer = new StringBuffer(); +// for (int i = 0; i< f.length; i++) { +// buffer.append(f[i]); +// if (i != f.length - 1) { +// buffer.append(", "); +// } +// } +// return buffer.toString(); +// } } Index: src/java/org/apache/lucene/store/RAMFile.java =================================================================== --- src/java/org/apache/lucene/store/RAMFile.java (révision 493446) +++ src/java/org/apache/lucene/store/RAMFile.java (copie de travail) @@ -79,4 +79,25 @@ } } + + /** + * For debug purpose + */ + public String toString() { + StringBuffer buffer = new StringBuffer(); + int bufferNum = 0; + byte[] b = (byte[]) buffers.get(0); + int bufferPos = 0; + for (int i = 0; i < 100 && i < length; i++) { + buffer.append(b[bufferPos]); + buffer.append(','); + bufferPos++; + if (bufferPos == b.length) { + bufferPos = 0; + bufferNum++; + b = (byte[]) buffers.get(bufferNum); + } + } + return buffer.toString(); + } } Index: src/java/org/apache/lucene/store/IndexOutput.java =================================================================== --- src/java/org/apache/lucene/store/IndexOutput.java (révision 493446) +++ src/java/org/apache/lucene/store/IndexOutput.java (copie de travail) @@ -31,6 +31,17 @@ */ public abstract void writeByte(byte b) throws IOException; + /** + * Write a byte directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeByte(byte) + */ + public void writeByte(IndexInput in) throws IOException { + writeByte(in.readByte()); + } + /** Writes an array of bytes. * @param b the bytes to write * @param length the number of bytes to write @@ -38,6 +49,20 @@ */ public abstract void writeBytes(byte[] b, int length) throws IOException; + /** + * Write a batch of bytes directly from an input stream. + * + * @param in the stream to read + * @param length the number of bytes to write + * @throws IOException + * @see #writeBytes(byte[], int) + */ + public void writeBytes(IndexInput in, long length) throws IOException { + while (length-- > 0) { + writeByte(in.readByte()); + } + } + /** Writes an int as four bytes. * @see IndexInput#readInt() */ @@ -48,6 +73,20 @@ writeByte((byte) i); } + /** + * Writes an int as four bytes directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeInt(int) + */ + public void writeInt(IndexInput in) throws IOException { + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + } + /** Writes an int in a variable-length format. Writes between one and * five bytes. Smaller values take fewer bytes. Negative numbers are not * supported. @@ -61,6 +100,22 @@ writeByte((byte)i); } + /** + * Writes an int in a variable-length format directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeVInt(int) + */ + public void writeVInt(IndexInput in) throws IOException { + byte b = in.readByte(); + writeByte(b); + while ((b & 0x80) != 0) { + b = in.readByte(); + writeByte(b); + } + } + /** Writes a long as eight bytes. * @see IndexInput#readLong() */ @@ -69,6 +124,24 @@ writeInt((int) i); } + /** + * Writes a long as eight bytes directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeLong(long) + */ + public void writeLong(IndexInput in) throws IOException { + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + writeByte(in.readByte()); + } + /** Writes an long in a variable-length format. Writes between one and five * bytes. Smaller values take fewer bytes. Negative numbers are not * supported. @@ -82,6 +155,22 @@ writeByte((byte)i); } + /** + * Writes an long in a variable-length format directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeVLong(long) + */ + public void writeVLong(IndexInput in) throws IOException { + byte b = in.readByte(); + writeByte(b); + while ((b & 0x80) != 0) { + b = in.readByte(); + writeByte(b); + } + } + /** Writes a string. * @see IndexInput#readString() */ @@ -91,6 +180,19 @@ writeChars(s, 0, length); } + /** + * Writes a string directly from an input stream. + * + * @param in the stream to read + * @throws IOException + * @see #writeString(String) + */ + public void writeString(IndexInput in) throws IOException { + int length = in.readVInt(); + writeVInt(length); + writeChars(in, length); + } + /** Writes a sequence of UTF-8 encoded characters from a string. * @param s the source of the characters * @param start the first character in the sequence @@ -103,18 +205,40 @@ for (int i = start; i < end; i++) { final int code = (int)s.charAt(i); if (code >= 0x01 && code <= 0x7F) - writeByte((byte)code); + writeByte((byte)code); else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) { - writeByte((byte)(0xC0 | (code >> 6))); - writeByte((byte)(0x80 | (code & 0x3F))); + writeByte((byte)(0xC0 | (code >> 6))); + writeByte((byte)(0x80 | (code & 0x3F))); } else { - writeByte((byte)(0xE0 | (code >>> 12))); - writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); - writeByte((byte)(0x80 | (code & 0x3F))); + writeByte((byte)(0xE0 | (code >>> 12))); + writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); + writeByte((byte)(0x80 | (code & 0x3F))); } } } + /** + * Writes a sequence of UTF-8 encoded characters directly from an input stream. + * + * @param in the stream to read + * @param length the number of characters in the sequence + * @throws IOException + * @see #writeChars(String,int,int) + */ + public void writeChars(IndexInput in, int length) + throws IOException { + for (int i = 0; i < length; i++) { + byte b = in.readByte(); + writeByte(b); + if ((b & 0x80) != 0) { + writeByte(in.readByte()); + if ((b & 0xE0) == 0xE0) { + writeByte(in.readByte()); + } + } + } + } + /** Forces any buffered output to be written. */ public abstract void flush() throws IOException; Index: src/java/org/apache/lucene/store/FSDirectory.java =================================================================== --- src/java/org/apache/lucene/store/FSDirectory.java (révision 493446) +++ src/java/org/apache/lucene/store/FSDirectory.java (copie de travail) @@ -26,7 +26,9 @@ import java.security.NoSuchAlgorithmException; import java.util.Hashtable; +import org.apache.lucene.index.DefaultIndexFormat; import org.apache.lucene.index.IndexFileNameFilter; +import org.apache.lucene.index.IndexFormat; /** * Straightforward implementation of {@link Directory} as a directory of files. @@ -128,7 +130,7 @@ * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(String path, boolean create) throws IOException { - return getDirectory(new File(path), create, null, true); + return getDirectory(new File(path), create, null, true, new DefaultIndexFormat()); } /** Returns the directory instance for the named location, using the @@ -146,13 +148,13 @@ public static FSDirectory getDirectory(String path, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles) throws IOException { - return getDirectory(new File(path), create, lockFactory, doRemoveOldFiles); + return getDirectory(new File(path), create, lockFactory, doRemoveOldFiles, new DefaultIndexFormat()); } public static FSDirectory getDirectory(String path, boolean create, LockFactory lockFactory) throws IOException { - return getDirectory(new File(path), create, lockFactory, true); + return getDirectory(new File(path), create, lockFactory, true, new DefaultIndexFormat()); } /** Returns the directory instance for the named location. @@ -165,10 +167,11 @@ * @param create if true, create, or erase any existing contents. * @return the FSDirectory for the named file. */ public static FSDirectory getDirectory(File file, boolean create, boolean doRemoveOldFiles) - throws IOException { - return getDirectory(file, create, null, doRemoveOldFiles); + throws IOException { + return getDirectory(file, create, null, doRemoveOldFiles, new DefaultIndexFormat()); } + /** Returns the directory instance for the named location, using the * provided LockFactory implementation. * @@ -180,10 +183,34 @@ * @param create if true, create, or erase any existing contents. * @param lockFactory instance of {@link LockFactory} providing the * locking implementation. - * @return the FSDirectory for the named file. */ + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ public static FSDirectory getDirectory(File file, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles) throws IOException { + return getDirectory(file, create, lockFactory, doRemoveOldFiles, new DefaultIndexFormat()); + } + + /** Returns the directory instance for the named location, using the + * provided LockFactory implementation. + * + *

Directories are cached, so that, for a given canonical path, the same + * FSDirectory instance will always be returned. This permits + * synchronization on directories. + * + * @param file the path to the directory. + * @param create if true, create, or erase any existing contents. + * @param lockFactory instance of {@link LockFactory} providing the + * locking implementation. + * @param indexFormat the format of index + * @return the FSDirectory for the named file. + * @throws IOException + */ + public static FSDirectory getDirectory(File file, boolean create, + LockFactory lockFactory, boolean doRemoveOldFiles, IndexFormat indexFormat) + throws IOException { file = new File(file.getCanonicalPath()); FSDirectory dir; synchronized (DIRECTORIES) { @@ -194,7 +221,7 @@ } catch (Exception e) { throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e); } - dir.init(file, create, lockFactory, doRemoveOldFiles); + dir.init(file, create, lockFactory, doRemoveOldFiles, indexFormat); DIRECTORIES.put(file, dir); } else { @@ -219,7 +246,7 @@ LockFactory lockFactory) throws IOException { - return getDirectory(file, create, lockFactory, true); + return getDirectory(file, create, lockFactory, true, new DefaultIndexFormat()); } public static FSDirectory getDirectory(File file, boolean create) @@ -243,8 +270,10 @@ throw new IOException(path + " not a directory"); } - private void init(File path, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles) throws IOException { + private void init(File path, boolean create, LockFactory lockFactory, boolean doRemoveOldFiles, IndexFormat indexFormat) throws IOException { + this.indexFormat = indexFormat; + // Set up lockFactory with cascaded defaults: if an instance was passed in, // use that; else if locks are disabled, use NoLockFactory; else if the // system property org.apache.lucene.store.FSDirectoryLockFactoryClass is set, @@ -310,7 +339,7 @@ throw new IOException(directory + " not a directory"); if (doRemoveOldFiles) { - String[] files = directory.list(IndexFileNameFilter.getFilter()); // clear old files + String[] files = directory.list(getIndexFormat().getIndexFileNameFilter()); // clear old files if (files == null) throw new IOException("Cannot read directory " + directory.getAbsolutePath()); for (int i = 0; i < files.length; i++) { @@ -325,7 +354,7 @@ /** Returns an array of strings, one for each Lucene index file in the directory. */ public String[] list() { - return directory.list(IndexFileNameFilter.getFilter()); + return directory.list(getIndexFormat().getIndexFileNameFilter()); } /** Returns true iff a file with the given name exists. */ Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (révision 493446) +++ src/java/org/apache/lucene/document/Field.java (copie de travail) @@ -17,11 +17,14 @@ * limitations under the License. */ -import org.apache.lucene.util.Parameter; - import java.io.Reader; import java.io.Serializable; +import org.apache.lucene.index.DefaultFieldData; +import org.apache.lucene.index.FieldData; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.util.Parameter; + /** A field is a section of a Document. Each field has two parts, a name and a value. Values may be free text, provided as a String or as a Reader, or they @@ -31,7 +34,7 @@ */ public final class Field extends AbstractField implements Fieldable, Serializable { - + /** Specifies whether and how a field should be stored. */ public static final class Store extends Parameter implements Serializable { @@ -127,24 +130,18 @@ */ public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } - - - /** The value of the field as a String, or null. If null, the Reader value - * or binary value is used. Exactly one of stringValue(), readerValue(), and - * binaryValue() must be set. */ - public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } - - /** The value of the field as a Reader, or null. If null, the String value - * or binary value is used. Exactly one of stringValue(), readerValue(), - * and binaryValue() must be set. */ - public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } - - /** The value of the field in Binary, or null. If null, the Reader or - * String value is used. Exactly one of stringValue(), readerValue() and - * binaryValue() must be set. */ - public byte[] binaryValue() { return fieldsData instanceof byte[] ? (byte[])fieldsData : null; } - + /** + * Contructor used when getting data from the index + * + * @param fi the info of the field + * @param fieldData the data of the field + */ + public Field(FieldInfo fi, DefaultFieldData fieldData) { + super(fi, fieldData); + } + + /** * Create a field by specifying its name, value and how it will * be saved in the index. Term vectors will not be stored in the index. * @@ -178,57 +175,9 @@ * */ public Field(String name, String value, Store store, Index index, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (value == null) - throw new NullPointerException("value cannot be null"); + super(name, new DefaultFieldData(value), store, index, termVector); if (name.length() == 0 && value.length() == 0) throw new IllegalArgumentException("name and value cannot both be empty"); - if (index == Index.NO && store == Store.NO) - throw new IllegalArgumentException("it doesn't make sense to have a field that " - + "is neither indexed nor stored"); - if (index == Index.NO && termVector != TermVector.NO) - throw new IllegalArgumentException("cannot store term vector information " - + "for a field that is not indexed"); - - this.name = name.intern(); // field names are interned - this.fieldsData = value; - - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; - } - else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; - } - else if (store == Store.NO){ - this.isStored = false; - this.isCompressed = false; - } - else - throw new IllegalArgumentException("unknown store parameter " + store); - - if (index == Index.NO) { - this.isIndexed = false; - this.isTokenized = false; - } else if (index == Index.TOKENIZED) { - this.isIndexed = true; - this.isTokenized = true; - } else if (index == Index.UN_TOKENIZED) { - this.isIndexed = true; - this.isTokenized = false; - } else if (index == Index.NO_NORMS) { - this.isIndexed = true; - this.isTokenized = false; - this.omitNorms = true; - } else { - throw new IllegalArgumentException("unknown index parameter " + index); - } - - this.isBinary = false; - - setStoreTermVector(termVector); } /** @@ -253,23 +202,7 @@ * @throws NullPointerException if name or reader is null */ public Field(String name, Reader reader, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (reader == null) - throw new NullPointerException("reader cannot be null"); - - this.name = name.intern(); // field names are interned - this.fieldsData = reader; - - this.isStored = false; - this.isCompressed = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + super(name, new DefaultFieldData(reader), Store.NO, Index.TOKENIZED, termVector); } /** @@ -281,34 +214,91 @@ * @throws IllegalArgumentException if store is Store.NO */ public Field(String name, byte[] value, Store store) { - if (name == null) - throw new IllegalArgumentException("name cannot be null"); - if (value == null) - throw new IllegalArgumentException("value cannot be null"); - - this.name = name.intern(); - this.fieldsData = value; - - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; + super(name, new DefaultFieldData(value), store, Index.NO, TermVector.NO); + } + + /** + * Override the store management to handle compression + */ + protected void setStore(Field.Store store) { + if (store == Field.Store.YES) { + isStored = true; + ((DefaultFieldData) fieldData).setCompressed(false); + } else if (store == Field.Store.COMPRESS) { + isStored = true; + ((DefaultFieldData) fieldData).setCompressed(true); + } else if (store == Field.Store.NO) { + if (isBinary()) { + throw new IllegalArgumentException("binary values can't be unstored"); + } + isStored = false; + ((DefaultFieldData) fieldData).setCompressed(false); + } else { + throw new IllegalArgumentException("unknown store parameter " + store); } - else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; + } + + /** Prints a Field for human consumption. */ + public String toString() { + StringBuffer result = new StringBuffer(); + if (isStored()) { + result.append("stored"); + if (((DefaultFieldData) fieldData).isCompressed()) + result.append("/compressed"); + else + result.append("/uncompressed"); } - else if (store == Store.NO) - throw new IllegalArgumentException("binary values can't be unstored"); - else - throw new IllegalArgumentException("unknown store parameter " + store); - - this.isIndexed = false; - this.isTokenized = false; - - this.isBinary = true; - - setStoreTermVector(TermVector.NO); + if (isIndexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (isTermVectorStored()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (isStoreOffsetWithTermVector()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (isStorePositionWithTermVector()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isBinary()) { + if (result.length() > 0) + result.append(","); + result.append("binary"); + } + if (getOmitNorms()) { + result.append(",omitNorms"); + } + if (isLazy()) { + result.append(",lazy"); + } + result.append('<'); + result.append(name()); + result.append(':'); + + result.append(getFieldData()); + + result.append('>'); + return result.toString(); } + /** + * @return true if the value of the field is stored and compressed within the index + */ + public final boolean isCompressed() { + return ((DefaultFieldData) fieldData).isCompressed(); + } } Index: src/java/org/apache/lucene/document/AbstractField.java =================================================================== --- src/java/org/apache/lucene/document/AbstractField.java (révision 493446) +++ src/java/org/apache/lucene/document/AbstractField.java (copie de travail) @@ -15,11 +15,16 @@ * limitations under the License. */ +import java.io.Reader; +import org.apache.lucene.index.FieldData; +import org.apache.lucene.index.FieldInfo; + /** - * - * - **/ + * Default abstract implementation of a {@link Fieldable} + * + * $Id$ + */ public abstract class AbstractField implements Fieldable { protected String name = "body"; @@ -30,57 +35,41 @@ protected boolean isStored = false; protected boolean isIndexed = true; protected boolean isTokenized = true; - protected boolean isBinary = false; - protected boolean isCompressed = false; - protected boolean lazy = false; protected float boost = 1.0f; // the one and only data object for all different kind of field values - protected Object fieldsData = null; + protected FieldData fieldData = null; - protected AbstractField() - { - + protected AbstractField(FieldInfo fi, FieldData fieldData) { + this.name = fi.getName(); + storeTermVector = fi.storeTermVector(); + storeOffsetWithTermVector = fi.storeOffsetWithTermVector(); + storePositionWithTermVector = fi.storePositionWithTermVector(); + omitNorms = fi.omitNorms(); + isStored = true; + isIndexed = fi.isIndexed(); + isTokenized = fieldData.isTokenized(); + this.fieldData = fieldData; } - protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { + protected AbstractField(String name, FieldData data, Field.Store store, Field.Index index, Field.TermVector termVector) { if (name == null) throw new NullPointerException("name cannot be null"); - this.name = name.intern(); // field names are interned + if (data == null) + throw new NullPointerException("data cannot be null"); - if (store == Field.Store.YES){ - this.isStored = true; - this.isCompressed = false; + this.name = name.intern(); // field names are interned + + fieldData = data; + + if (index == Field.Index.NO && store == Field.Store.NO) { + throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); } - else if (store == Field.Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; + if (index == Field.Index.NO && termVector != Field.TermVector.NO) { + throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed"); } - else if (store == Field.Store.NO){ - this.isStored = false; - this.isCompressed = false; - } - else - throw new IllegalArgumentException("unknown store parameter " + store); - if (index == Field.Index.NO) { - this.isIndexed = false; - this.isTokenized = false; - } else if (index == Field.Index.TOKENIZED) { - this.isIndexed = true; - this.isTokenized = true; - } else if (index == Field.Index.UN_TOKENIZED) { - this.isIndexed = true; - this.isTokenized = false; - } else if (index == Field.Index.NO_NORMS) { - this.isIndexed = true; - this.isTokenized = false; - this.omitNorms = true; - } else { - throw new IllegalArgumentException("unknown index parameter " + index); - } - - this.isBinary = false; - + setStore(store); + setIndex(index); setStoreTermVector(termVector); } @@ -155,6 +144,39 @@ } } + protected void setIndex(Field.Index index) { + if (index == Field.Index.NO) { + isIndexed = false; + isTokenized = false; + } else if (index == Field.Index.TOKENIZED) { + isIndexed = true; + isTokenized = true; + } else if (index == Field.Index.UN_TOKENIZED) { + isIndexed = true; + isTokenized = false; + } else if (index == Field.Index.NO_NORMS) { + isIndexed = true; + isTokenized = false; + omitNorms = true; + } else { + throw new IllegalArgumentException("unknown index parameter " + index); + } + fieldData.setTokenized(isTokenized); + } + + protected void setStore(Field.Store store) { + if (store == Field.Store.YES) { + isStored = true; + } else if (store == Field.Store.NO) { + if (isBinary()) { + throw new IllegalArgumentException("binary values can't be unstored"); + } + isStored = false; + } else { + throw new IllegalArgumentException("unknown store parameter " + store); + } + } + /** True iff the value of the field is to be stored in the index for return with search hits. It is an error for this to be true if a field is Reader-valued. */ @@ -169,8 +191,6 @@ Reader-valued. */ public final boolean isTokenized() { return isTokenized; } - /** True if the value of the field is stored and compressed within the index */ - public final boolean isCompressed() { return isCompressed; } /** True iff the term or terms used to index this field are stored as a term * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. @@ -198,7 +218,9 @@ } /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { return isBinary; } + public final boolean isBinary() { + return fieldData.isBinary(); + } /** True if norms are omitted for this indexed field */ public boolean getOmitNorms() { return omitNorms; } @@ -211,18 +233,14 @@ public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } public boolean isLazy() { - return lazy; + return fieldData.isLazy(); } /** Prints a Field for human consumption. */ - public final String toString() { + public String toString() { StringBuffer result = new StringBuffer(); if (isStored) { result.append("stored"); - if (isCompressed) - result.append("/compressed"); - else - result.append("/uncompressed"); } if (isIndexed) { if (result.length() > 0) @@ -249,7 +267,7 @@ result.append(","); result.append("termVectorPosition"); } - if (isBinary) { + if (isBinary()) { if (result.length() > 0) result.append(","); result.append("binary"); @@ -257,18 +275,48 @@ if (omitNorms) { result.append(",omitNorms"); } - if (lazy){ + if (isLazy()) { result.append(",lazy"); } result.append('<'); result.append(name); result.append(':'); - if (fieldsData != null && lazy == false) { - result.append(fieldsData); + if (fieldData != null && !isLazy()) { + result.append(fieldData); } result.append('>'); return result.toString(); } + + /** The value of the field as a String, or null. If null, the Reader value + * or binary value is used. Exactly one of stringValue(), readerValue(), and + * binaryValue() must be set. */ + public final String stringValue() { + return fieldData.stringValue(); + } + + /** The value of the field as a Reader, or null. If null, the String value + * or binary value is used. Exactly one of stringValue(), readerValue(), + * and binaryValue() must be set. */ + public final Reader readerValue() { + return fieldData.readerValue(); + } + + /** The value of the field in Binary, or null. If null, the Reader or + * String value is used. Exactly one of stringValue(), readerValue() and + * binaryValue() must be set. */ + public final byte[] binaryValue() { + return fieldData.binaryValue(); + } + + /** + * + * @return the data of the field + */ + public FieldData getFieldData() { + return fieldData; + } + } Index: src/java/org/apache/lucene/document/Fieldable.java =================================================================== --- src/java/org/apache/lucene/document/Fieldable.java (révision 493446) +++ src/java/org/apache/lucene/document/Fieldable.java (copie de travail) @@ -19,6 +19,8 @@ import java.io.Reader; import java.io.Serializable; +import org.apache.lucene.index.FieldData; + /** * Synonymous with {@link Field}. * @@ -90,7 +92,7 @@ boolean isTokenized(); /** True if the value of the field is stored and compressed within the index */ - boolean isCompressed(); + //boolean isCompressed(); /** True iff the term or terms used to index this field are stored as a term * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. @@ -134,4 +136,10 @@ * @return true if this field can be loaded lazily */ boolean isLazy(); + + /** + * + * @return the data of the field + */ + FieldData getFieldData(); } Index: src/java/org/apache/lucene/document/Document.java =================================================================== --- src/java/org/apache/lucene/document/Document.java (révision 493446) +++ src/java/org/apache/lucene/document/Document.java (copie de travail) @@ -37,8 +37,8 @@ * IndexReader#document(int)}. */ -public final class Document implements java.io.Serializable { - List fields = new Vector(); +public class Document implements java.io.Serializable { + protected List fields = new Vector(); private float boost = 1.0f; /** Constructs a new document with no fields. */