Index: src/java/org/apache/lucene/document/CompressionTools.java =================================================================== --- src/java/org/apache/lucene/document/CompressionTools.java (revision 756701) +++ src/java/org/apache/lucene/document/CompressionTools.java (working copy) @@ -21,21 +21,15 @@ import java.util.zip.Inflater; import java.util.zip.DataFormatException; import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +import org.apache.lucene.util.UnicodeUtil; /** Simple utility class providing static methods to * compress and decompress binary data for stored fields. * This class uses java.util.zip.Deflater and Inflater - * classes to compress and decompress. - * - * To compress a String: - *
- * String string = ...
- * byte[] bytes = compress(string.getBytes("UTF-8");
- *
- * and to decompress:
- * - * new String(decompress(bytes), "UTF-8"); - *+ * classes to compress and decompress, which is the same + * format previously used by the now deprecated + * Field.Store.COMPRESS. */ public class CompressionTools { @@ -84,6 +78,20 @@ return compress(value, 0, value.length, Deflater.BEST_COMPRESSION); } + /** Compresses the String value, with default BEST_COMPRESSION level */ + public static byte[] compressString(String value) { + return compressString(value, Deflater.BEST_COMPRESSION); + } + + /** Compresses the String value using the specified + * compressionLevel (constants are defined in + * java.util.zip.Deflater). */ + public static byte[] compressString(String value, int compressionLevel) { + UnicodeUtil.UTF8Result result=new UnicodeUtil.UTF8Result(); + UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result); + return compress(result.result, 0, result.length, compressionLevel); + } + /** Decompress the byte array previously returned by * compress */ public static byte[] decompress(byte[] value) throws DataFormatException { @@ -107,4 +115,13 @@ return bos.toByteArray(); } + + /** Decompress the byte array previously returned by + * compressString back into a String */ + public static String decompressString(byte[] value) throws DataFormatException { + UnicodeUtil.UTF16Result result=new UnicodeUtil.UTF16Result(); + final byte[] bytes = decompress(value); + UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); + return new String(result.result, 0, result.length); + } } Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (revision 756701) +++ src/java/org/apache/lucene/document/Field.java (working copy) @@ -43,7 +43,11 @@ /** Store the original field value in the index in a compressed form. This is * useful for long documents and for binary valued fields. - * @deprecated Please use {@link CompressionTools} instead + * @deprecated Please use {@link CompressionTools} instead. + * For string fields that were previously indexed and stored using compression, + * the new way to achive this is: First add the field indexed-only (no store) + * and additionally using the same field name as a binary, stored field + * with {@link CompressionTools#compressString}. */ public static final Store COMPRESS = new Store("COMPRESS"); Index: src/test/org/apache/lucene/document/TestBinaryDocument.java =================================================================== --- src/test/org/apache/lucene/document/TestBinaryDocument.java (revision 756701) +++ src/test/org/apache/lucene/document/TestBinaryDocument.java (working copy) @@ -103,10 +103,12 @@ throws Exception { Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()), Field.Store.YES); + Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.add(binaryFldCompressed); + doc.add(stringFldCompressed); /** add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); @@ -122,7 +124,8 @@ /** fetch the binary compressed field and compare it's content with the original one */ String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))); assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); - + assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); + reader.close(); dir.close(); }