Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 755690) +++ CHANGES.txt (working copy) @@ -59,6 +59,10 @@ 9. LUCENE-1186: Add Analyzer.close() to free internal ThreadLocal resources. (Christian Kohlschütter via Mike McCandless) +10. LUCENE-652: Added org.apache.lucene.document.CompressionTools, to + enable compressing & decompressing binary content, external to + Lucene's indexing. Deprecated Field.Store.COMPRESS. + Bug fixes 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() Index: src/test/org/apache/lucene/document/TestBinaryDocument.java =================================================================== --- src/test/org/apache/lucene/document/TestBinaryDocument.java (revision 755690) +++ src/test/org/apache/lucene/document/TestBinaryDocument.java (working copy) @@ -5,7 +5,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.MockRAMDirectory; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -43,7 +43,7 @@ Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS); Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); - + try { // binary fields with store off are not allowed new Field("fail", binaryValCompressed.getBytes(), Field.Store.NO); @@ -60,12 +60,12 @@ doc.add(stringFldStored); doc.add(stringFldCompressed); - + /** test for field count */ assertEquals(4, doc.fields.size()); /** add the doc to a ram index */ - RAMDirectory dir = new RAMDirectory(); + MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.addDocument(doc); writer.close(); @@ -90,13 +90,40 @@ /** fetch the compressed string field and compare it's content with the original one */ String stringFldCompressedTest = docFromReader.get("stringCompressed"); assertTrue(stringFldCompressedTest.equals(binaryValCompressed)); - + /** delete the document from index */ reader.deleteDocument(0); assertEquals(0, reader.numDocs()); reader.close(); + dir.close(); + } + + public void testCompressionTools() + throws Exception + { + Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()), Field.Store.YES); + Document doc = new Document(); + + doc.add(binaryFldCompressed); + + /** add the doc to a ram index */ + MockRAMDirectory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); + writer.addDocument(doc); + writer.close(); + + /** open a reader and fetch the document */ + IndexReader reader = IndexReader.open(dir); + Document docFromReader = reader.document(0); + assertTrue(docFromReader != null); + + /** fetch the binary compressed field and compare it's content with the original one */ + String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))); + assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); + + reader.close(); + dir.close(); } - } Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (revision 755690) +++ src/java/org/apache/lucene/index/FieldsReader.java (working copy) @@ -25,11 +25,9 @@ import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.util.CloseableThreadLocal; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.Reader; import java.util.zip.DataFormatException; -import java.util.zip.Inflater; /** * Class responsible for access to stored document fields. @@ -596,40 +594,19 @@ return null; } } - - private final byte[] uncompress(final byte[] input) - throws CorruptIndexException, IOException { - // Create an expandable byte array to hold the decompressed data - ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - - Inflater decompressor = new Inflater(); - + private byte[] uncompress(byte[] b) + throws CorruptIndexException { try { - decompressor.setInput(input); - - // Decompress the data - byte[] buf = new byte[1024]; - while (!decompressor.finished()) { - try { - int count = decompressor.inflate(buf); - bos.write(buf, 0, count); - } - catch (DataFormatException e) { - // this will happen if the field is not compressed - CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString()); - newException.initCause(e); - throw newException; - } - } - } finally { - decompressor.end(); + return CompressionTools.decompress(b); + } catch (DataFormatException e) { + // this will happen if the field is not compressed + CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString()); + newException.initCause(e); + throw newException; } - - // Get the decompressed data - return bos.toByteArray(); } - + // Instances of this class hold field properties and data // for merge final static class FieldForMerge extends AbstractField { Index: src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FieldsWriter.java (revision 755704) +++ src/java/org/apache/lucene/index/FieldsWriter.java (working copy) @@ -16,13 +16,12 @@ * the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; -import java.util.zip.Deflater; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.CompressionTools; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.store.IndexOutput; @@ -203,10 +202,10 @@ } else { // check if it is a binary field if (field.isBinary()) { - data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); + data = CompressionTools.compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); } else { byte x[] = field.stringValue().getBytes("UTF-8"); - data = compress(x, 0, x.length); + data = CompressionTools.compress(x, 0, x.length); } len = data.length; offset = 0; @@ -269,43 +268,4 @@ writeField(fieldInfos.fieldInfo(field.name()), field); } } - - private final byte[] compress (byte[] input, int offset, int length) { - // Create the compressor with highest level of compression - Deflater compressor = new Deflater(); - compressor.setLevel(Deflater.BEST_COMPRESSION); - - // Give the compressor the data to compress - compressor.setInput(input, offset, length); - compressor.finish(); - - /* - * Create an expandable byte array to hold the compressed data. - * You cannot use an array that's the same size as the orginal because - * there is no guarantee that the compressed data will be smaller than - * the uncompressed data. - */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(length); - - try { - compressor.setLevel(Deflater.BEST_COMPRESSION); - - // Give the compressor the data to compress - compressor.setInput(input); - compressor.finish(); - - // Compress the data - byte[] buf = new byte[1024]; - while (!compressor.finished()) { - int count = compressor.deflate(buf); - bos.write(buf, 0, count); - } - - } finally { - compressor.end(); - } - - // Get the compressed data - return bos.toByteArray(); - } } Index: src/java/org/apache/lucene/document/CompressionTools.java =================================================================== --- src/java/org/apache/lucene/document/CompressionTools.java (revision 0) +++ src/java/org/apache/lucene/document/CompressionTools.java (revision 0) @@ -0,0 +1,110 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.zip.Deflater; +import java.util.zip.Inflater; +import java.util.zip.DataFormatException; +import java.io.ByteArrayOutputStream; + +/** Simple utility class providing static methods to + * compress and decompress binary data for stored fields. + * This class uses java.util.zip.Deflater and Inflater + * classes to compress and decompress. + * + * To compress a String: + *
+ *    String string = ...
+ *    byte[] bytes = compress(string.getBytes("UTF-8");
+ *  
+ * and to decompress: + *
+ *    new String(decompress(bytes), "UTF-8");
+ *  
+ */ + +public class CompressionTools { + + // Export only static methods + private CompressionTools() {} + + /** Compresses the specified byte range using the + * specified compressionLevel (constants are defined in + * java.util.zip.Deflater). */ + public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) { + + /* Create an expandable byte array to hold the compressed data. + * You cannot use an array that's the same size as the orginal because + * there is no guarantee that the compressed data will be smaller than + * the uncompressed data. */ + ByteArrayOutputStream bos = new ByteArrayOutputStream(length); + + Deflater compressor = new Deflater(); + + try { + compressor.setLevel(compressionLevel); + compressor.setInput(value, offset, length); + compressor.finish(); + + // Compress the data + final byte[] buf = new byte[1024]; + while (!compressor.finished()) { + int count = compressor.deflate(buf); + bos.write(buf, 0, count); + } + } finally { + compressor.end(); + } + + return bos.toByteArray(); + } + + /** Compresses the specified byte range, with default BEST_COMPRESSION level */ + public static byte[] compress(byte[] value, int offset, int length) { + return compress(value, offset, length, Deflater.BEST_COMPRESSION); + } + + /** Compresses all bytes in the array, with default BEST_COMPRESSION level */ + public static byte[] compress(byte[] value) { + return compress(value, 0, value.length, Deflater.BEST_COMPRESSION); + } + + /** Decompress the byte array previously returned by + * compress */ + public static byte[] decompress(byte[] value) throws DataFormatException { + // Create an expandable byte array to hold the decompressed data + ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length); + + Inflater decompressor = new Inflater(); + + try { + decompressor.setInput(value); + + // Decompress the data + final byte[] buf = new byte[1024]; + while (!decompressor.finished()) { + int count = decompressor.inflate(buf); + bos.write(buf, 0, count); + } + } finally { + decompressor.end(); + } + + return bos.toByteArray(); + } +} Property changes on: src/java/org/apache/lucene/document/CompressionTools.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (revision 755690) +++ src/java/org/apache/lucene/document/Field.java (working copy) @@ -43,6 +43,7 @@ /** Store the original field value in the index in a compressed form. This is * useful for long documents and for binary valued fields. + * @deprecated Please use {@link CompressionTools} instead */ public static final Store COMPRESS = new Store("COMPRESS");