Uploaded image for project: 'Commons Compress'
  1. Commons Compress
  2. COMPRESS-501

Possibility to introduce a fast Zip open with some caveats

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Open
    • Major
    • Resolution: Unresolved
    • 1.19
    • None
    • Archivers
    • None
    • OSX 10.14.6 and Linux

    • Patch

    Description

      About a year ago I created an improvement (https://issues.apache.org/jira/browse/COMPRESS-466) to speed up some things in commons-compress for Zip-files. This helped us quite a lot but we wanted it to be even faster so I optimised away some stuff that I thought was not that important for us.
      I was able to improve opening of a 34GB zip file from ~12s to ~2s.

      Now to my question, do you think it would be possible to introduce some of my fixes (diff included) into master?

      Yes, I know that I shortcut some features for some specific zip files and don't expose everything anymore.
      I haven't really made a good switchable solution for it because we just use our own build locally with this path.

      But with some hints from you I might be able to do it somehow. I'm happy to help and would love to get this speed open into master (it is always cumbersome with custom changes to public libraries). 

      diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
      index 767f615d..d441b12d 100644
      --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
      +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
      @@ -146,6 +146,7 @@
           private boolean isStreamContiguous = false;
           private NameSource nameSource = NameSource.NAME;
           private CommentSource commentSource = CommentSource.COMMENT;
      +    private byte[] cdExtraData = null;
       
       
           /**
      @@ -397,6 +398,14 @@ public void setAlignment(int alignment) {
               this.alignment = alignment;
           }
       
      +    public void setRawCentralDirectoryExtra(byte[] cdExtraData) {
      +        this.cdExtraData = cdExtraData;
      +    }
      +
      +    public byte[] getRawCentralDirectoryExtra() {
      +        return this.cdExtraData;
      +    }
      +
           /**
            * Replaces all currently attached extra fields with the new array.
            * @param fields an array of extra fields
      diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
      index 152272b5..bb33b50f 100644
      --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
      +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
      @@ -691,10 +691,10 @@ protected void finalize() throws Throwable {
               final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
                   new HashMap<>();
       
      -        positionAtCentralDirectory();
      +        ByteBuffer ceDir = positionAtCentralDirectory();
       
               wordBbuf.rewind();
      -        IOUtils.readFully(archive, wordBbuf);
      +        ceDir.get(wordBuf);
               long sig = ZipLong.getValue(wordBuf);
       
               if (sig != CFH_SIG && startsWithLocalFileHeader()) {
      @@ -703,9 +703,12 @@ protected void finalize() throws Throwable {
               }
       
               while (sig == CFH_SIG) {
      -            readCentralDirectoryEntry(noUTF8Flag);
      +            readCentralDirectoryEntry(ceDir, noUTF8Flag);
                   wordBbuf.rewind();
      -            IOUtils.readFully(archive, wordBbuf);
      +            if (ceDir.remaining() == 0) {
      +                break;
      +            }
      +            ceDir.get(wordBuf);
                   sig = ZipLong.getValue(wordBuf);
               }
               return noUTF8Flag;
      @@ -721,10 +724,10 @@ protected void finalize() throws Throwable {
            * added to this map.
            */
           private void
      -        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
      +        readCentralDirectoryEntry(ByteBuffer ceDir, final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
               throws IOException {
               cfhBbuf.rewind();
      -        IOUtils.readFully(archive, cfhBbuf);
      +        ceDir.get(cfhBuf);
               int off = 0;
               final Entry ze = new Entry();
       
      @@ -752,8 +755,9 @@ protected void finalize() throws Throwable {
               ze.setMethod(ZipShort.getValue(cfhBuf, off));
               off += SHORT;
       
      -        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
      -        ze.setTime(time);
      +        //long ts = ZipLong.getValue(cfhBuf, off);
      +        //final long time = ZipUtil.dosToJavaTime(ts);
      +        //ze.setTime(time);
               off += WORD;
       
               ze.setCrc(ZipLong.getValue(cfhBuf, off));
      @@ -784,7 +788,7 @@ protected void finalize() throws Throwable {
               off += WORD;
       
               final byte[] fileName = new byte[fileNameLen];
      -        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
      +        ceDir.get(fileName);
               ze.setName(entryEncoding.decode(fileName), fileName);
       
               // LFH offset,
      @@ -792,19 +796,22 @@ protected void finalize() throws Throwable {
               // data offset will be filled later
               entries.add(ze);
       
      +//        ceDir.position(ceDir.position() + extraLen + commentLen);
               final byte[] cdExtraData = new byte[extraLen];
      -        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
      -        ze.setCentralDirectoryExtra(cdExtraData);
      +        ceDir.get(cdExtraData);
      +        ze.setRawCentralDirectoryExtra(cdExtraData);
      +//        ze.setCentralDirectoryExtra(cdExtraData);
       
      -        setSizesAndOffsetFromZip64Extra(ze, diskStart);
      +//        setSizesAndOffsetFromZip64Extra(ze, diskStart);
       
      -        final byte[] comment = new byte[commentLen];
      -        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
      -        ze.setComment(entryEncoding.decode(comment));
      -
      -        if (!hasUTF8Flag && useUnicodeExtraFields) {
      -            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
      -        }
      +        ceDir.position(ceDir.position() + commentLen);
      +//        final byte[] comment = new byte[commentLen];
      +//        ceDir.get(comment);
      +//        ze.setComment(entryEncoding.decode(comment));
      +//
      +//        if (!hasUTF8Flag && useUnicodeExtraFields) {
      +//            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
      +//        }
       
               ze.setStreamContiguous(true);
           }
      @@ -953,9 +960,10 @@ private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
            * it and positions the stream at the first central directory
            * record.
            */
      -    private void positionAtCentralDirectory()
      +    private ByteBuffer positionAtCentralDirectory()
               throws IOException {
               positionAtEndOfCentralDirectoryRecord();
      +        long end = archive.position();
               boolean found = false;
               final boolean searchedForZip64EOCD =
                   archive.position() > ZIP64_EOCDL_LENGTH;
      @@ -975,6 +983,12 @@ private void positionAtCentralDirectory()
               } else {
                   positionAtCentralDirectory64();
               }
      +        long start = archive.position();
      +        ByteBuffer bigb = ByteBuffer.allocateDirect((int)(end - start));
      +        archive.read(bigb);
      +        archive.position(start);
      +        bigb.rewind();
      +        return bigb;
           }
       
           /**
      @@ -1168,6 +1182,8 @@ private void fillNameMap() {
           private long getDataOffset(ZipArchiveEntry ze) throws IOException {
               long s = ze.getDataOffset();
               if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
      +            ze.setCentralDirectoryExtra(ze.getRawCentralDirectoryExtra());
      +            setSizesAndOffsetFromZip64Extra(ze, 0);
                   setDataOffset(ze);
                   return ze.getDataOffset();
               }
      

      Attachments

        1. zipfile-speed-improvements.diff
          6 kB
          Jakob Sultan Ericsson

        Activity

          People

            Unassigned Unassigned
            jakeri Jakob Sultan Ericsson
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated: