diff --git vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtil.java vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtil.java new file mode 100644 index 0000000..b380dd1 --- /dev/null +++ vault-core/src/main/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtil.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.vault.fs.impl.io; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import javax.annotation.Nonnull; +import javax.jcr.RepositoryException; + +import org.apache.commons.io.IOUtils; +import org.apache.jackrabbit.vault.fs.api.Artifact; +import org.apache.jackrabbit.vault.fs.api.SerializationType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * {@code CompressionUtil} is a utility class that allows to evaluate + * the compressibility of artifacts. + */ +public final class CompressionUtil { + + /** + * default logger + */ + private static final Logger log = LoggerFactory.getLogger(CompressionUtil.class); + + /** + * Minimum length to run the auto-detection algorithm in Byte. + */ + private static final long MIN_AUTO_DETECTION_LENGTH = 9932; + + /** + * Length of the sample (in byte) peeked from the artifact for running the auto-detection algorithm. + */ + private static final int SAMPLE_LENGTH = 256; + + // TODO extend the MIME type lists + + /** + * List of well known mime types identifying to compressed formats. + */ + private static final Set INCOMPRESSIBLE_MIME_TYPES = new HashSet(Arrays.asList( + "image/gif", + "image/jpeg", + "image/png", + "multipart/x-gzip", + "video/mp4", + "application/gzip", + "application/java-archive", + "application/mp4", + "application/x-7z-compressed", + "application/x-compressed", + "application/x-gzip", + "application/x-rar-compressed", + "application/zip", + "application/zlib", + "audio/mpeg" + )); + + /** + * List of well known mime types identifying to non compressed formats. + */ + private static final Set COMPRESSIBLE_MIME_TYPES = new HashSet(Arrays.asList( + "application/xml", + "application/java", + "application/json", + "application/javascript", + "application/ecmascript" + )); + + /** + * Estimates if the provided artifact is compressible. + * + * @param artifact the artifact to be tested for compressibility + * @return A negative integer, a positive integer, or zero as the artifact is estimated + * to not be compressible, is estimated to be compressible, or if it would be + * too expensive to compute the estimation. + */ + public static int isCompressible(@Nonnull Artifact artifact) { + + if (SerializationType.GENERIC == artifact.getSerializationType()) { + + /* + * Test for known content types + */ + String contentType = artifact.getContentType(); + if (contentType != null) { + contentType = contentType.toLowerCase(); + if (isCompressibleContentType(contentType)) { + return 1; + } + if (isIncompressibleContentType(contentType)) { + return -1; + } + } + + /* + * Apply compressibility prediction heuristic on a sample of the artifact + * + * The heuristic is tested only if the expected cost of running the heuristic + * is smaller than 3% of the expected cost of compressing the artifact, such + * that the extra cost is reasonable in the worst case. + * + * The compression throughput is assumed to be 20 MB/s. The expected sampling + * cost is assumed to be constant and around 15 μs. The artifact size + * threshold is thus set to 9.7KB. + * + * A better improved implementation may measure those values and self tune for a + * specific runtime. + */ + long contentLength = artifact.getContentLength(); + if (contentLength > MIN_AUTO_DETECTION_LENGTH) { + return seemsCompressible(artifact); + } + } + return 0; + } + + static boolean isCompressibleContentType(@Nonnull String mimeType) { + return mimeType.startsWith("text/") || COMPRESSIBLE_MIME_TYPES.contains(mimeType); + } + + static boolean isIncompressibleContentType(@Nonnull String mimeType) { + return INCOMPRESSIBLE_MIME_TYPES.contains(mimeType); + } + + static int seemsCompressible(@Nonnull Artifact artifact) { + InputStream stream = null; + try { + stream = artifact.getInputStream(); + byte[] sample = IOUtils.toByteArray(stream, SAMPLE_LENGTH); + return isCompressible(sample, SAMPLE_LENGTH) ? 1 : -1; + } catch (RepositoryException | IOException e) { + log.warn(e.getMessage(), e); + } finally { + IOUtils.closeQuietly(stream); + } + return 0; + } + + /** + * This algorithm estimates the entropy of the high nibbles. + * I wanted to avoid using too many buckets, because they have to be zeroed out each time + * (which is slow if the blocks to check are small). + * 63 - numberOfLeadingZeros is the logarithm (I wanted to avoid using floating point numbers). + * Depending on the data, it is faster or slower than the algorithm above (not sure why). + * The result isn't quite as accurate as the algorithm above, possibly because of using only 16 buckets, + * and only integer arithmetic. + * + * Credits to Thomas Mueller's for this solution, shared on StackOverflow at + * How To Efficiently Predict If Data Is Compressible + * + * @param data + * @param len + * @return + */ + static boolean isCompressible(byte[] data, int len) { + // the number of bytes with + // high nibble 0, 1,.., 15 + int[] sum = new int[16]; + for (int i = 0; i < len; i++) { + int x = (data[i] & 255) >> 4; + sum[x]++; + } + // see wikipedia to understand this formula :-) + int r = 0; + for (int x : sum) { + long v = ((long) x << 32) / len; + r += 63 - Long.numberOfLeadingZeros(v + 1); + } + return len * r < 438 * len; + } + +} diff --git vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/JarExporter.java vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/JarExporter.java index e272c9a..6822198 100644 --- vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/JarExporter.java +++ vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/JarExporter.java @@ -22,7 +22,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import java.util.jar.JarOutputStream; +import java.util.zip.Deflater; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -32,28 +36,64 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.CloseShieldOutputStream; import org.apache.jackrabbit.vault.fs.api.Artifact; import org.apache.jackrabbit.vault.fs.api.VaultFile; +import org.apache.jackrabbit.vault.fs.impl.io.CompressionUtil; import org.apache.jackrabbit.vault.util.PlatformNameFormat; +import static java.util.zip.Deflater.BEST_COMPRESSION; +import static java.util.zip.Deflater.DEFAULT_COMPRESSION; +import static java.util.zip.Deflater.NO_COMPRESSION; + /** * Implements a Vault filesystem exporter that exports Vault files to a jar file. - * It uses the {@link PlatformNameFormat} for formatting the jcr file + * The entries are stored compressed in the jar (as {@link ZipEntry} zip entries. + * + * The exporter can optimize the handling of binary, by avoiding to compress non + * compressible binaries. + * The optimisation is disabled for the {@link Deflater#DEFAULT_COMPRESSION}, + * {@link Deflater#NO_COMPRESSION}, {@link Deflater#BEST_COMPRESSION} levels and + * disabled for the remaining ones. + * + * The exporter uses the {@link PlatformNameFormat} for formatting the jcr file * names to local ones. * */ public class JarExporter extends AbstractExporter { + /** + * Contains the compression levels for which the binaries are always deflated + * independently of their compressibility. + */ + private static final Set COMPRESSED_LEVELS = new HashSet(Arrays.asList( + DEFAULT_COMPRESSION, NO_COMPRESSION, BEST_COMPRESSION)); + private JarOutputStream jOut; private OutputStream out; private File jarFile; + private final int level; + + private final boolean compressedLevel; + /** * Constructs a new jar exporter that writes to the given file. * @param jarFile the jar file */ public JarExporter(File jarFile) { + this(jarFile, DEFAULT_COMPRESSION); + } + + /** + * Constructs a new jar exporter that writes to the given file. + * @param jarFile the jar file + * @param level level the compression level + */ + public JarExporter(File jarFile, int level) { + compressedLevel = COMPRESSED_LEVELS.contains(level); this.jarFile = jarFile; + this.level = level; + } /** @@ -61,7 +101,19 @@ public class JarExporter extends AbstractExporter { * @param out the output stream */ public JarExporter(OutputStream out) { + this(out, DEFAULT_COMPRESSION); + } + + /** + * Constructs a new jar exporter that writes to the output stream. + * @param out the output stream + * @param level level the compression level + * + */ + public JarExporter(OutputStream out, int level) { + compressedLevel = COMPRESSED_LEVELS.contains(level); this.out = out; + this.level = level; } /** @@ -72,8 +124,10 @@ public class JarExporter extends AbstractExporter { if (jOut == null) { if (jarFile != null) { jOut = new JarOutputStream(new FileOutputStream(jarFile)); + jOut.setLevel(level); } else if (out != null) { jOut = new JarOutputStream(out); + jOut.setLevel(level); } else { throw new IllegalArgumentException("Either out or jarFile needs to be set."); } @@ -107,6 +161,10 @@ public class JarExporter extends AbstractExporter { throws RepositoryException, IOException { ZipEntry e = new ZipEntry(getPlatformFilePath(file, relPath)); Artifact a = file.getArtifact(); + boolean compress = compressedLevel || CompressionUtil.isCompressible(a) >= 0; + if (! compress) { + jOut.setLevel(NO_COMPRESSION); + } if (a.getLastModified() > 0) { e.setTime(a.getLastModified()); } @@ -130,9 +188,13 @@ public class JarExporter extends AbstractExporter { break; } jOut.closeEntry(); + if (! compress) { + jOut.setLevel(level); + } } public void writeFile(InputStream in, String relPath) throws IOException { + // The file input stream to be written is assumed to be compressible ZipEntry e = new ZipEntry(relPath); exportInfo.update(ExportInfo.Type.ADD, e.getName()); jOut.putNextEntry(e); @@ -144,6 +206,7 @@ public class JarExporter extends AbstractExporter { public void write(ZipFile zip, ZipEntry entry) throws IOException { track("A", entry.getName()); + jOut.setLevel(NO_COMPRESSION); // The entry to be written is assumed to not be compressible exportInfo.update(ExportInfo.Type.ADD, entry.getName()); ZipEntry copy = new ZipEntry(entry); jOut.putNextEntry(copy); @@ -154,6 +217,7 @@ public class JarExporter extends AbstractExporter { in.close(); } jOut.closeEntry(); + jOut.setLevel(level); } diff --git vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/package-info.java vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/package-info.java index aa4b5a3..ca2a316 100644 --- vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/package-info.java +++ vault-core/src/main/java/org/apache/jackrabbit/vault/fs/io/package-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ -@Version("2.4.0") +@Version("2.5.0") package org.apache.jackrabbit.vault.fs.io; import aQute.bnd.annotation.Version; diff --git vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/ExportOptions.java vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/ExportOptions.java index bfabe8b..e092d48 100644 --- vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/ExportOptions.java +++ vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/ExportOptions.java @@ -35,6 +35,8 @@ public class ExportOptions { private String mountPath; + private int compressionLevel = -1; + /** * Returns the progress tracker listener. * @return the progress tracker listener. @@ -122,4 +124,20 @@ public class ExportOptions { public void setMountPath(String mountPath) { this.mountPath = mountPath; } + + /** + * Defines the compression level for the export. + * @param compressionLevel the compression level + */ + public void setCompressionLevel(int compressionLevel) { + this.compressionLevel = compressionLevel; + } + + /** + * Returns the compression level + * @return the compression level + */ + public int getCompressionLevel() { + return compressionLevel; + } } \ No newline at end of file diff --git vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/impl/PackageManagerImpl.java vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/impl/PackageManagerImpl.java index 7bb2f05..58c85d8 100644 --- vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/impl/PackageManagerImpl.java +++ vault-core/src/main/java/org/apache/jackrabbit/vault/packaging/impl/PackageManagerImpl.java @@ -133,7 +133,7 @@ public class PackageManagerImpl implements PackageManager { } VaultFileSystem jcrfs = Mounter.mount(config, metaInf.getFilter(), addr, opts.getRootPath(), s); - JarExporter exporter = new JarExporter(out); + JarExporter exporter = new JarExporter(out, opts.getCompressionLevel()); exporter.setProperties(metaInf.getProperties()); if (opts.getListener() != null) { exporter.setVerbose(opts.getListener()); @@ -185,7 +185,7 @@ public class PackageManagerImpl implements PackageManager { if (metaInf == null) { metaInf = new DefaultMetaInf(); } - JarExporter exporter = new JarExporter(out); + JarExporter exporter = new JarExporter(out, opts.getCompressionLevel()); exporter.open(); exporter.setProperties(metaInf.getProperties()); ProgressTracker tracker = null; diff --git vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtilTest.java vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtilTest.java new file mode 100644 index 0000000..5b3ab1a --- /dev/null +++ vault-core/src/test/java/org/apache/jackrabbit/vault/fs/impl/io/CompressionUtilTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.vault.fs.impl.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Random; + +import javax.jcr.RepositoryException; + +import org.apache.jackrabbit.vault.fs.api.Artifact; +import org.apache.jackrabbit.vault.fs.api.SerializationType; +import org.junit.Test; +import org.mockito.Mockito; + +import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.assertFalse; +import static org.apache.jackrabbit.vault.fs.impl.io.CompressionUtil.*; + +public class CompressionUtilTest { + + private static final Random RAND = new Random(); + + private static final String COMPRESSIBLE_MIME_TYPE = "text/plain"; + + private static final String INCOMPRESSIBLE_MIME_TYPE = "image/png"; + + private static final String UNKNOWN_MIME_TYPE = "unkown/unknown"; + + @Test + public void testCompressibilityByMimeType() { + assertTrue(isIncompressibleContentType(INCOMPRESSIBLE_MIME_TYPE)); + assertFalse(isIncompressibleContentType(UNKNOWN_MIME_TYPE)); + assertFalse(isIncompressibleContentType(COMPRESSIBLE_MIME_TYPE)); + assertTrue(isCompressibleContentType(COMPRESSIBLE_MIME_TYPE)); + assertFalse(isCompressibleContentType(UNKNOWN_MIME_TYPE)); + } + + @Test + public void testCompressibilityEstimation() + throws IOException, RepositoryException { + assertTrue(seemsCompressible(newArtifact(incompressibleData(5*1024), null)) < 0); + assertTrue(seemsCompressible(newArtifact(compressibleData(5*1024), null)) > 0); + } + + @Test + public void testCompressibility() + throws IOException, RepositoryException { + byte[] comp50KB = compressibleData(50*1024); + byte[] incomp50KB = incompressibleData(50*1024); + assertTrue(isCompressible(newArtifact(comp50KB, COMPRESSIBLE_MIME_TYPE)) > 0); + assertTrue(isCompressible(newArtifact(comp50KB, UNKNOWN_MIME_TYPE)) > 0); + assertTrue(isCompressible(newArtifact(comp50KB, null)) > 0); + assertTrue(isCompressible(newArtifact(new byte[10], null)) == 0); + assertTrue(isCompressible(newArtifact(incomp50KB, UNKNOWN_MIME_TYPE)) < 0); + assertTrue(isCompressible(newArtifact(incomp50KB, INCOMPRESSIBLE_MIME_TYPE)) < 0); + } + + private Artifact newArtifact(byte[] data, String contentType) + throws IOException, RepositoryException { + Artifact artifact = Mockito.mock(Artifact.class); + InputStream inputStream = new ByteArrayInputStream(data); + Mockito.when(artifact.getInputStream()).thenReturn(inputStream); + Mockito.when(artifact.getContentLength()).thenReturn((long) data.length); + Mockito.when(artifact.getContentType()).thenReturn(contentType); + Mockito.when(artifact.getSerializationType()).thenReturn(SerializationType.GENERIC); + return artifact; + } + + private byte[] compressibleData(int length) { + byte[] data = new byte[length]; + Arrays.fill(data, (byte)42); + return data; + } + + private byte[] incompressibleData(int length) { + byte[] data = new byte[length]; + RAND.nextBytes(data); + return data; + } +} \ No newline at end of file diff --git vault-core/src/test/java/org/apache/jackrabbit/vault/packaging/integration/TestCompressionExport.java vault-core/src/test/java/org/apache/jackrabbit/vault/packaging/integration/TestCompressionExport.java new file mode 100644 index 0000000..d6eff4c --- /dev/null +++ vault-core/src/test/java/org/apache/jackrabbit/vault/packaging/integration/TestCompressionExport.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.vault.packaging.integration; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Properties; +import java.util.Random; +import java.util.zip.Deflater; + +import javax.jcr.Node; +import javax.jcr.RepositoryException; + +import org.apache.jackrabbit.commons.JcrUtils; +import org.apache.jackrabbit.vault.fs.api.PathFilterSet; +import org.apache.jackrabbit.vault.fs.config.DefaultMetaInf; +import org.apache.jackrabbit.vault.fs.config.DefaultWorkspaceFilter; +import org.apache.jackrabbit.vault.packaging.ExportOptions; +import org.apache.jackrabbit.vault.packaging.PackageException; +import org.apache.jackrabbit.vault.packaging.VaultPackage; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertTrue; + +/** + * Test cases for custom compression level on export + */ +public class TestCompressionExport extends IntegrationTestBase { + + /** + * default logger + */ + private static final Logger log = LoggerFactory.getLogger(TestCompressionExport.class); + + private static final Random RAND = new Random(); + + private static final String TEST_PARENT_PATH = "/tmp/testCompressionExport"; + + private static final String COMPRESSIBLE_MIME_TYPE = "text/plain"; + + private static final String INCOMPRESSIBLE_MIME_TYPE = "image/png"; + + private static final String UNKNOWN_MIME_TYPE = "unkown/unknown"; + + private static final int NB_WARMUP_ITERATIONS = 100; + + private static final int NB_TEST_ITERATIONS = 10000; + + @Before + public void setup() throws RepositoryException, PackageException, IOException { + JcrUtils.getOrCreateByPath(TEST_PARENT_PATH, "nt:unstructured", admin); + admin.save(); + } + + @After + public void after() throws RepositoryException { + if (admin.nodeExists(TEST_PARENT_PATH)) { + admin.getNode(TEST_PARENT_PATH).remove(); + admin.save(); + } + } + + @Test + public void test50KB_to_150KB() throws RepositoryException, IOException { + for (int size = 50 * 1024 ; size < 150 * 1024 ; size += 10 * 1024) { + runTestAndAssertGains(size); + } + } + + @Test + public void test1MB() throws RepositoryException, IOException { + runTestAndAssertGains(1024 * 1024); + } + + @Test + public void test10MB() throws RepositoryException, IOException { + runTestAndAssertGains(10 * 1024 * 1024); + } + + private void runTestAndAssertGains(int size) + throws RepositoryException, IOException { + compareWithAndWithoutOptimization(storeFile(true, COMPRESSIBLE_MIME_TYPE, size)); + compareWithAndWithoutOptimization(storeFile(false, INCOMPRESSIBLE_MIME_TYPE, size)); + compareWithAndWithoutOptimization(storeFile(true, UNKNOWN_MIME_TYPE, size)); + compareWithAndWithoutOptimization(storeFile(false, UNKNOWN_MIME_TYPE, size)); + } + + private void compareWithAndWithoutOptimization(String path) + throws IOException, RepositoryException { + SizeDuration noOptimization = measureExportDuration(path, Deflater.DEFAULT_COMPRESSION); + SizeDuration withOptimization = measureExportDuration(path, 6); // level 6 is used for the DEFAULT_COMPRESSION strategy + float durationGain = (noOptimization.duration - withOptimization.duration) / (float) noOptimization.duration; + float sizeGain = (noOptimization.size - withOptimization.size) / (float) noOptimization.size; + log.info("Path {} duration gain: {}, size gain: {}", new Object[]{path, durationGain, sizeGain}); + // assert the optimization does not imply a decrease of throughput larger than 3% + assertTrue(noOptimization.duration * 1.03f > withOptimization.duration); + } + + private SizeDuration measureExportDuration(String nodePath, int level) + throws IOException, RepositoryException { + ExportOptions opts = buildExportOptions(nodePath, level); + log.info("Warmup for path {} and compression level {}", + new Object[]{nodePath, level}); + exportMultipleTimes(opts, NB_WARMUP_ITERATIONS); + log.info("Run for path {} and compression level {}", + new Object[]{nodePath, level}); + long start = System.nanoTime(); + long size = exportMultipleTimes(opts, NB_TEST_ITERATIONS); + long stop = System.nanoTime(); + SizeDuration sd = new SizeDuration(size, stop - start); + float rate = (sd.size / (float)sd.duration * 1000); + log.info("Ran for path {} and compression level {} in {} ns produced {} B ({} MB/s)", + new Object[]{nodePath, level, sd.duration, sd.size, rate}); + return sd; + } + + private long exportMultipleTimes(ExportOptions opts, int times) + throws IOException, RepositoryException { + long size = 0; + for (int i = 0 ; i < times ; i++) { + WriteCountOutputStream outputStream = new WriteCountOutputStream(); + packMgr.assemble(admin, opts, outputStream); + size += outputStream.size(); + } + return size; + } + + private ExportOptions buildExportOptions(String nodePath, int level) { + ExportOptions opts = new ExportOptions(); + opts.setCompressionLevel(level); + DefaultMetaInf inf = new DefaultMetaInf(); + DefaultWorkspaceFilter filter = new DefaultWorkspaceFilter(); + filter.add(new PathFilterSet(nodePath)); + inf.setFilter(filter); + Properties props = new Properties(); + props.setProperty(VaultPackage.NAME_GROUP, "jackrabbit/test"); + props.setProperty(VaultPackage.NAME_NAME, "test-compression-package"); + inf.setProperties(props); + opts.setMetaInf(inf); + return opts; + } + + + private String storeFile(boolean compressible, String mimeType, int size) + throws RepositoryException { + String path = String.format("%s/%s", TEST_PARENT_PATH, fileName(compressible, mimeType, size)); + Node node = JcrUtils.getOrCreateByPath(path, "nt:unstructured", admin); + byte[] data = compressible ? compressibleData(size) : incompressibleData(size); + JcrUtils.putFile(node, "file", mimeType, new ByteArrayInputStream(data)); + admin.save(); + return node.getPath(); + } + + private String fileName(boolean compressible, String mimeType, int size) { + return String.format("%s_%s_%s", mimeType, size, compressible ? "compressible" : "incompressible"); + } + + private byte[] compressibleData(int length) { + byte[] data = new byte[length]; + Arrays.fill(data, (byte)42); // low entropy data + return data; + } + + private byte[] incompressibleData(int length) { + byte[] data = new byte[length]; + RAND.nextBytes(data); // high entropy data + return data; + } + + public class WriteCountOutputStream extends OutputStream { + + long size = 0; + + public void write(int b) throws IOException { + size++; + } + + public long size() { + return size; + } + } + + private class SizeDuration { + + long size; + + long duration; + + SizeDuration(long size, long duration) { + this.duration = duration; + this.size = size; + } + } + +}