001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.IOUtils;
039
040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
044
045/**
046 * Implements an input stream that can read Zip archives.
047 *
048 * <p>As of Apache Commons Compress it transparently supports Zip64
049 * extensions and thus individual entries and archives larger than 4
050 * GB or with more than 65536 entries.</p>
051 *
052 * <p>The {@link ZipFile} class is preferred when reading from files
053 * as {@link ZipArchiveInputStream} is limited by not being able to
054 * read the central directory header before returning entries.  In
055 * particular {@link ZipArchiveInputStream}</p>
056 *
057 * <ul>
058 *
059 *  <li>may return entries that are not part of the central directory
060 *  at all and shouldn't be considered part of the archive.</li>
061 *
062 *  <li>may return several entries with the same name.</li>
063 *
064 *  <li>will not return internal or external attributes.</li>
065 *
066 *  <li>may return incomplete extra field data.</li>
067 *
068 *  <li>may return unknown sizes and CRC values for entries until the
069 *  next entry has been reached if the archive uses the data
070 *  descriptor feature.</li>
071 *
072 * </ul>
073 *
074 * @see ZipFile
075 * @NotThreadSafe
076 */
077public class ZipArchiveInputStream extends ArchiveInputStream {
078
079    /** The zip encoding to use for filenames and the file comment. */
080    private final ZipEncoding zipEncoding;
081
082    // the provided encoding (for unit tests)
083    final String encoding;
084
085    /** Whether to look for and use Unicode extra fields. */
086    private final boolean useUnicodeExtraFields;
087
088    /** Wrapped stream, will always be a PushbackInputStream. */
089    private final InputStream in;
090
091    /** Inflater used for all deflated entries. */
092    private final Inflater inf = new Inflater(true);
093
094    /** Buffer used to read from the wrapped stream. */
095    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
096
097    /** The entry that is currently being read. */
098    private CurrentEntry current = null;
099
100    /** Whether the stream has been closed. */
101    private boolean closed = false;
102
103    /** Whether the stream has reached the central directory - and thus found all entries. */
104    private boolean hitCentralDirectory = false;
105
106    /**
107     * When reading a stored entry that uses the data descriptor this
108     * stream has to read the full entry and caches it.  This is the
109     * cache.
110     */
111    private ByteArrayInputStream lastStoredEntry = null;
112
113    /** Whether the stream will try to read STORED entries that use a data descriptor. */
114    private boolean allowStoredEntriesWithDataDescriptor = false;
115
116    private static final int LFH_LEN = 30;
117    /*
118      local file header signature     WORD
119      version needed to extract       SHORT
120      general purpose bit flag        SHORT
121      compression method              SHORT
122      last mod file time              SHORT
123      last mod file date              SHORT
124      crc-32                          WORD
125      compressed size                 WORD
126      uncompressed size               WORD
127      file name length                SHORT
128      extra field length              SHORT
129    */
130
131    private static final int CFH_LEN = 46;
132    /*
133        central file header signature   WORD
134        version made by                 SHORT
135        version needed to extract       SHORT
136        general purpose bit flag        SHORT
137        compression method              SHORT
138        last mod file time              SHORT
139        last mod file date              SHORT
140        crc-32                          WORD
141        compressed size                 WORD
142        uncompressed size               WORD
143        file name length                SHORT
144        extra field length              SHORT
145        file comment length             SHORT
146        disk number start               SHORT
147        internal file attributes        SHORT
148        external file attributes        WORD
149        relative offset of local header WORD
150    */
151
152    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] LFH_BUF = new byte[LFH_LEN];
156    private final byte[] SKIP_BUF = new byte[1024];
157    private final byte[] SHORT_BUF = new byte[SHORT];
158    private final byte[] WORD_BUF = new byte[WORD];
159    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
160
161    private int entriesRead = 0;
162
163    /**
164     * Create an instance using UTF-8 encoding
165     * @param inputStream the stream to wrap
166     */
167    public ZipArchiveInputStream(final InputStream inputStream) {
168        this(inputStream, ZipEncodingHelper.UTF8);
169    }
170
171    /**
172     * Create an instance using the specified encoding
173     * @param inputStream the stream to wrap
174     * @param encoding the encoding to use for file names, use null
175     * for the platform's default encoding
176     * @since 1.5
177     */
178    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
179        this(inputStream, encoding, true);
180    }
181
182    /**
183     * Create an instance using the specified encoding
184     * @param inputStream the stream to wrap
185     * @param encoding the encoding to use for file names, use null
186     * for the platform's default encoding
187     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
188     * Extra Fields (if present) to set the file names.
189     */
190    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
191        this(inputStream, encoding, useUnicodeExtraFields, false);
192    }
193
194    /**
195     * Create an instance using the specified encoding
196     * @param inputStream the stream to wrap
197     * @param encoding the encoding to use for file names, use null
198     * for the platform's default encoding
199     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
200     * Extra Fields (if present) to set the file names.
201     * @param allowStoredEntriesWithDataDescriptor whether the stream
202     * will try to read STORED entries that use a data descriptor
203     * @since 1.1
204     */
205    public ZipArchiveInputStream(final InputStream inputStream,
206                                 final String encoding,
207                                 final boolean useUnicodeExtraFields,
208                                 final boolean allowStoredEntriesWithDataDescriptor) {
209        this.encoding = encoding;
210        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
211        this.useUnicodeExtraFields = useUnicodeExtraFields;
212        in = new PushbackInputStream(inputStream, buf.capacity());
213        this.allowStoredEntriesWithDataDescriptor =
214            allowStoredEntriesWithDataDescriptor;
215        // haven't read anything so far
216        buf.limit(0);
217    }
218
219    public ZipArchiveEntry getNextZipEntry() throws IOException {
220        boolean firstEntry = true;
221        if (closed || hitCentralDirectory) {
222            return null;
223        }
224        if (current != null) {
225            closeEntry();
226            firstEntry = false;
227        }
228
229        try {
230            if (firstEntry) {
231                // split archives have a special signature before the
232                // first local file header - look for it and fail with
233                // the appropriate error message if this is a split
234                // archive.
235                readFirstLocalFileHeader(LFH_BUF);
236            } else {
237                readFully(LFH_BUF);
238            }
239        } catch (final EOFException e) {
240            return null;
241        }
242
243        final ZipLong sig = new ZipLong(LFH_BUF);
244        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
245            hitCentralDirectory = true;
246            skipRemainderOfArchive();
247        }
248        if (!sig.equals(ZipLong.LFH_SIG)) {
249            return null;
250        }
251
252        int off = WORD;
253        current = new CurrentEntry();
254
255        final int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
256        off += SHORT;
257        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
258
259        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
260        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
261        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
262        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
263        current.entry.setGeneralPurposeBit(gpFlag);
264
265        off += SHORT;
266
267        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
268        off += SHORT;
269
270        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
271        current.entry.setTime(time);
272        off += WORD;
273
274        ZipLong size = null, cSize = null;
275        if (!current.hasDataDescriptor) {
276            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
277            off += WORD;
278
279            cSize = new ZipLong(LFH_BUF, off);
280            off += WORD;
281
282            size = new ZipLong(LFH_BUF, off);
283            off += WORD;
284        } else {
285            off += 3 * WORD;
286        }
287
288        final int fileNameLen = ZipShort.getValue(LFH_BUF, off);
289
290        off += SHORT;
291
292        final int extraLen = ZipShort.getValue(LFH_BUF, off);
293        off += SHORT;
294
295        final byte[] fileName = new byte[fileNameLen];
296        readFully(fileName);
297        current.entry.setName(entryEncoding.decode(fileName), fileName);
298
299        final byte[] extraData = new byte[extraLen];
300        readFully(extraData);
301        current.entry.setExtra(extraData);
302
303        if (!hasUTF8Flag && useUnicodeExtraFields) {
304            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
305        }
306
307        processZip64Extra(size, cSize);
308
309        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
310            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
311                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
312            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
313                current.in = new ExplodingInputStream(
314                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
315                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
316                        new BoundedInputStream(in, current.entry.getCompressedSize()));
317            } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
318                current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
319            }
320        }
321        
322        entriesRead++;
323        return current.entry;
324    }
325
326    /**
327     * Fills the given array with the first local file header and
328     * deals with splitting/spanning markers that may prefix the first
329     * LFH.
330     */
331    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
332        readFully(lfh);
333        final ZipLong sig = new ZipLong(lfh);
334        if (sig.equals(ZipLong.DD_SIG)) {
335            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
336        }
337
338        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
339            // The archive is not really split as only one segment was
340            // needed in the end.  Just skip over the marker.
341            final byte[] missedLfhBytes = new byte[4];
342            readFully(missedLfhBytes);
343            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
344            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
345        }
346    }
347
348    /**
349     * Records whether a Zip64 extra is present and sets the size
350     * information from it if sizes are 0xFFFFFFFF and the entry
351     * doesn't use a data descriptor.
352     */
353    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
354        final Zip64ExtendedInformationExtraField z64 =
355            (Zip64ExtendedInformationExtraField) 
356            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
357        current.usesZip64 = z64 != null;
358        if (!current.hasDataDescriptor) {
359            if (z64 != null // same as current.usesZip64 but avoids NPE warning
360                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
361                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
362                current.entry.setSize(z64.getSize().getLongValue());
363            } else {
364                current.entry.setCompressedSize(cSize.getValue());
365                current.entry.setSize(size.getValue());
366            }
367        }
368    }
369
370    @Override
371    public ArchiveEntry getNextEntry() throws IOException {
372        return getNextZipEntry();
373    }
374
375    /**
376     * Whether this class is able to read the given entry.
377     *
378     * <p>May return false if it is set up to use encryption or a
379     * compression method that hasn't been implemented yet.</p>
380     * @since 1.1
381     */
382    @Override
383    public boolean canReadEntryData(final ArchiveEntry ae) {
384        if (ae instanceof ZipArchiveEntry) {
385            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
386            return ZipUtil.canHandleEntryData(ze)
387                && supportsDataDescriptorFor(ze);
388
389        }
390        return false;
391    }
392
393    @Override
394    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
395        if (closed) {
396            throw new IOException("The stream is closed");
397        }
398
399        if (current == null) {
400            return -1;
401        }
402
403        // avoid int overflow, check null buffer
404        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
405            throw new ArrayIndexOutOfBoundsException();
406        }
407        
408        ZipUtil.checkRequestedFeatures(current.entry);
409        if (!supportsDataDescriptorFor(current.entry)) {
410            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
411                    current.entry);
412        }
413
414        int read;
415        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
416            read = readStored(buffer, offset, length);
417        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
418            read = readDeflated(buffer, offset, length);
419        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
420                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
421                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
422            read = current.in.read(buffer, offset, length);
423        } else {
424            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
425                    current.entry);
426        }
427        
428        if (read >= 0) {
429            current.crc.update(buffer, offset, read);
430        }
431        
432        return read;
433    }
434
435    /**
436     * Implementation of read for STORED entries.
437     */
438    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
439
440        if (current.hasDataDescriptor) {
441            if (lastStoredEntry == null) {
442                readStoredEntry();
443            }
444            return lastStoredEntry.read(buffer, offset, length);
445        }
446
447        final long csize = current.entry.getSize();
448        if (current.bytesRead >= csize) {
449            return -1;
450        }
451
452        if (buf.position() >= buf.limit()) {
453            buf.position(0);
454            final int l = in.read(buf.array());
455            if (l == -1) {
456                buf.limit(0);
457                throw new IOException("Truncated ZIP file");
458            }
459            buf.limit(l);
460
461            count(l);
462            current.bytesReadFromStream += l;
463        }
464
465        int toRead = Math.min(buf.remaining(), length);
466        if ((csize - current.bytesRead) < toRead) {
467            // if it is smaller than toRead then it fits into an int
468            toRead = (int) (csize - current.bytesRead);
469        }
470        buf.get(buffer, offset, toRead);
471        current.bytesRead += toRead;
472        return toRead;
473    }
474
475    /**
476     * Implementation of read for DEFLATED entries.
477     */
478    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
479        final int read = readFromInflater(buffer, offset, length);
480        if (read <= 0) {
481            if (inf.finished()) {
482                return -1;
483            } else if (inf.needsDictionary()) {
484                throw new ZipException("This archive needs a preset dictionary"
485                                       + " which is not supported by Commons"
486                                       + " Compress.");
487            } else if (read == -1) {
488                throw new IOException("Truncated ZIP file");
489            }
490        }
491        return read;
492    }
493
494    /**
495     * Potentially reads more bytes to fill the inflater's buffer and
496     * reads from it.
497     */
498    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
499        int read = 0;
500        do {
501            if (inf.needsInput()) {
502                final int l = fill();
503                if (l > 0) {
504                    current.bytesReadFromStream += buf.limit();
505                } else if (l == -1) {
506                    return -1;
507                } else {
508                    break;
509                }
510            }
511            try {
512                read = inf.inflate(buffer, offset, length);
513            } catch (final DataFormatException e) {
514                throw (IOException) new ZipException(e.getMessage()).initCause(e);
515            }
516        } while (read == 0 && inf.needsInput());
517        return read;
518    }
519
520    @Override
521    public void close() throws IOException {
522        if (!closed) {
523            closed = true;
524            try {
525                in.close();
526            } finally {
527                inf.end();
528            }
529        }
530    }
531
532    /**
533     * Skips over and discards value bytes of data from this input
534     * stream.
535     *
536     * <p>This implementation may end up skipping over some smaller
537     * number of bytes, possibly 0, if and only if it reaches the end
538     * of the underlying stream.</p>
539     *
540     * <p>The actual number of bytes skipped is returned.</p>
541     *
542     * @param value the number of bytes to be skipped.
543     * @return the actual number of bytes skipped.
544     * @throws IOException - if an I/O error occurs.
545     * @throws IllegalArgumentException - if value is negative.
546     */
547    @Override
548    public long skip(final long value) throws IOException {
549        if (value >= 0) {
550            long skipped = 0;
551            while (skipped < value) {
552                final long rem = value - skipped;
553                final int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
554                if (x == -1) {
555                    return skipped;
556                }
557                skipped += x;
558            }
559            return skipped;
560        }
561        throw new IllegalArgumentException();
562    }
563
564    /**
565     * Checks if the signature matches what is expected for a zip file.
566     * Does not currently handle self-extracting zips which may have arbitrary
567     * leading content.
568     *
569     * @param signature the bytes to check
570     * @param length    the number of bytes to check
571     * @return true, if this stream is a zip archive stream, false otherwise
572     */
573    public static boolean matches(final byte[] signature, final int length) {
574        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
575            return false;
576        }
577
578        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
579            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
580            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
581            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
582    }
583
584    private static boolean checksig(final byte[] signature, final byte[] expected) {
585        for (int i = 0; i < expected.length; i++) {
586            if (signature[i] != expected[i]) {
587                return false;
588            }
589        }
590        return true;
591    }
592
593    /**
594     * Closes the current ZIP archive entry and positions the underlying
595     * stream to the beginning of the next entry. All per-entry variables
596     * and data structures are cleared.
597     * <p>
598     * If the compressed size of this entry is included in the entry header,
599     * then any outstanding bytes are simply skipped from the underlying
600     * stream without uncompressing them. This allows an entry to be safely
601     * closed even if the compression method is unsupported.
602     * <p>
603     * In case we don't know the compressed size of this entry or have
604     * already buffered too much data from the underlying stream to support
605     * uncompression, then the uncompression process is completed and the
606     * end position of the stream is adjusted based on the result of that
607     * process.
608     *
609     * @throws IOException if an error occurs
610     */
611    private void closeEntry() throws IOException {
612        if (closed) {
613            throw new IOException("The stream is closed");
614        }
615        if (current == null) {
616            return;
617        }
618
619        // Ensure all entry bytes are read
620        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
621                && !current.hasDataDescriptor) {
622            drainCurrentEntryData();
623        } else {
624            skip(Long.MAX_VALUE);
625
626            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
627                       ? getBytesInflated() : current.bytesRead;
628
629            // this is at most a single read() operation and can't
630            // exceed the range of int
631            final int diff = (int) (current.bytesReadFromStream - inB);
632
633            // Pushback any required bytes
634            if (diff > 0) {
635                pushback(buf.array(), buf.limit() - diff, diff);
636            }
637        }
638
639        if (lastStoredEntry == null && current.hasDataDescriptor) {
640            readDataDescriptor();
641        }
642
643        inf.reset();
644        buf.clear().flip();
645        current = null;
646        lastStoredEntry = null;
647    }
648
649    /**
650     * Read all data of the current entry from the underlying stream
651     * that hasn't been read, yet.
652     */
653    private void drainCurrentEntryData() throws IOException {
654        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
655        while (remaining > 0) {
656            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
657            if (n < 0) {
658                throw new EOFException("Truncated ZIP entry: "
659                                       + ArchiveUtils.sanitize(current.entry.getName()));
660            }
661            count(n);
662            remaining -= n;
663        }
664    }
665
666    /**
667     * Get the number of bytes Inflater has actually processed.
668     *
669     * <p>for Java &lt; Java7 the getBytes* methods in
670     * Inflater/Deflater seem to return unsigned ints rather than
671     * longs that start over with 0 at 2^32.</p>
672     *
673     * <p>The stream knows how many bytes it has read, but not how
674     * many the Inflater actually consumed - it should be between the
675     * total number of bytes read for the entry and the total number
676     * minus the last read operation.  Here we just try to make the
677     * value close enough to the bytes we've read by assuming the
678     * number of bytes consumed must be smaller than (or equal to) the
679     * number of bytes read but not smaller by more than 2^32.</p>
680     */
681    private long getBytesInflated() {
682        long inB = inf.getBytesRead();
683        if (current.bytesReadFromStream >= TWO_EXP_32) {
684            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
685                inB += TWO_EXP_32;
686            }
687        }
688        return inB;
689    }
690
691    private int fill() throws IOException {
692        if (closed) {
693            throw new IOException("The stream is closed");
694        }
695        final int length = in.read(buf.array());
696        if (length > 0) {
697            buf.limit(length);
698            count(buf.limit());
699            inf.setInput(buf.array(), 0, buf.limit());
700        }
701        return length;
702    }
703
704    private void readFully(final byte[] b) throws IOException {
705        final int count = IOUtils.readFully(in, b);
706        count(count);
707        if (count < b.length) {
708            throw new EOFException();
709        }
710    }
711
712    private void readDataDescriptor() throws IOException {
713        readFully(WORD_BUF);
714        ZipLong val = new ZipLong(WORD_BUF);
715        if (ZipLong.DD_SIG.equals(val)) {
716            // data descriptor with signature, skip sig
717            readFully(WORD_BUF);
718            val = new ZipLong(WORD_BUF);
719        }
720        current.entry.setCrc(val.getValue());
721
722        // if there is a ZIP64 extra field, sizes are eight bytes
723        // each, otherwise four bytes each.  Unfortunately some
724        // implementations - namely Java7 - use eight bytes without
725        // using a ZIP64 extra field -
726        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
727
728        // just read 16 bytes and check whether bytes nine to twelve
729        // look like one of the signatures of what could follow a data
730        // descriptor (ignoring archive decryption headers for now).
731        // If so, push back eight bytes and assume sizes are four
732        // bytes, otherwise sizes are eight bytes each.
733        readFully(TWO_DWORD_BUF);
734        final ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
735        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
736            pushback(TWO_DWORD_BUF, DWORD, DWORD);
737            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
738            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
739        } else {
740            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
741            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
742        }
743    }
744
745    /**
746     * Whether this entry requires a data descriptor this library can work with.
747     *
748     * @return true if allowStoredEntriesWithDataDescriptor is true,
749     * the entry doesn't require any data descriptor or the method is
750     * DEFLATED.
751     */
752    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
753        return !entry.getGeneralPurposeBit().usesDataDescriptor()
754
755                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
756                || entry.getMethod() == ZipEntry.DEFLATED;
757    }
758
759    /**
760     * Caches a stored entry that uses the data descriptor.
761     *
762     * <ul>
763     *   <li>Reads a stored entry until the signature of a local file
764     *     header, central directory header or data descriptor has been
765     *     found.</li>
766     *   <li>Stores all entry data in lastStoredEntry.</p>
767     *   <li>Rewinds the stream to position at the data
768     *     descriptor.</li>
769     *   <li>reads the data descriptor</li>
770     * </ul>
771     *
772     * <p>After calling this method the entry should know its size,
773     * the entry's data is cached and the stream is positioned at the
774     * next local file or central directory header.</p>
775     */
776    private void readStoredEntry() throws IOException {
777        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
778        int off = 0;
779        boolean done = false;
780
781        // length of DD without signature
782        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
783
784        while (!done) {
785            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
786            if (r <= 0) {
787                // read the whole archive without ever finding a
788                // central directory
789                throw new IOException("Truncated ZIP file");
790            }
791            if (r + off < 4) {
792                // buffer too small to check for a signature, loop
793                off += r;
794                continue;
795            }
796
797            done = bufferContainsSignature(bos, off, r, ddLen);
798            if (!done) {
799                off = cacheBytesRead(bos, off, r, ddLen);
800            }
801        }
802
803        final byte[] b = bos.toByteArray();
804        lastStoredEntry = new ByteArrayInputStream(b);
805    }
806
807    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
808    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
809    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
810
811    /**
812     * Checks whether the current buffer contains the signature of a
813     * &quot;data descriptor&quot;, &quot;local file header&quot; or
814     * &quot;central directory entry&quot;.
815     *
816     * <p>If it contains such a signature, reads the data descriptor
817     * and positions the stream right after the data descriptor.</p>
818     */
819    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
820            throws IOException {
821
822        boolean done = false;
823        int readTooMuch = 0;
824        for (int i = 0; !done && i < lastRead - 4; i++) {
825            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
826                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
827                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
828                    // found a LFH or CFH:
829                    readTooMuch = offset + lastRead - i - expectedDDLen;
830                    done = true;
831                }
832                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
833                    // found DD:
834                    readTooMuch = offset + lastRead - i;
835                    done = true;
836                }
837                if (done) {
838                    // * push back bytes read in excess as well as the data
839                    //   descriptor
840                    // * copy the remaining bytes to cache
841                    // * read data descriptor
842                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
843                    bos.write(buf.array(), 0, i);
844                    readDataDescriptor();
845                }
846            }
847        }
848        return done;
849    }
850
851    /**
852     * If the last read bytes could hold a data descriptor and an
853     * incomplete signature then save the last bytes to the front of
854     * the buffer and cache everything in front of the potential data
855     * descriptor into the given ByteArrayOutputStream.
856     *
857     * <p>Data descriptor plus incomplete signature (3 bytes in the
858     * worst case) can be 20 bytes max.</p>
859     */
860    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
861        final int cacheable = offset + lastRead - expecteDDLen - 3;
862        if (cacheable > 0) {
863            bos.write(buf.array(), 0, cacheable);
864            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
865            offset = expecteDDLen + 3;
866        } else {
867            offset += lastRead;
868        }
869        return offset;
870    }
871
872    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
873        ((PushbackInputStream) in).unread(buf, offset, length);
874        pushedBackBytes(length);
875    }
876
877    // End of Central Directory Record
878    //   end of central dir signature    WORD
879    //   number of this disk             SHORT
880    //   number of the disk with the
881    //   start of the central directory  SHORT
882    //   total number of entries in the
883    //   central directory on this disk  SHORT
884    //   total number of entries in
885    //   the central directory           SHORT
886    //   size of the central directory   WORD
887    //   offset of start of central
888    //   directory with respect to
889    //   the starting disk number        WORD
890    //   .ZIP file comment length        SHORT
891    //   .ZIP file comment               up to 64KB
892    //
893
894    /**
895     * Reads the stream until it find the "End of central directory
896     * record" and consumes it as well.
897     */
898    private void skipRemainderOfArchive() throws IOException {
899        // skip over central directory. One LFH has been read too much
900        // already.  The calculation discounts file names and extra
901        // data so it will be too short.
902        realSkip(entriesRead * CFH_LEN - LFH_LEN);
903        findEocdRecord();
904        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
905        readFully(SHORT_BUF);
906        // file comment
907        realSkip(ZipShort.getValue(SHORT_BUF));
908    }
909
910    /**
911     * Reads forward until the signature of the &quot;End of central
912     * directory&quot; record is found.
913     */
914    private void findEocdRecord() throws IOException {
915        int currentByte = -1;
916        boolean skipReadCall = false;
917        while (skipReadCall || (currentByte = readOneByte()) > -1) {
918            skipReadCall = false;
919            if (!isFirstByteOfEocdSig(currentByte)) {
920                continue;
921            }
922            currentByte = readOneByte();
923            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
924                if (currentByte == -1) {
925                    break;
926                }
927                skipReadCall = isFirstByteOfEocdSig(currentByte);
928                continue;
929            }
930            currentByte = readOneByte();
931            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
932                if (currentByte == -1) {
933                    break;
934                }
935                skipReadCall = isFirstByteOfEocdSig(currentByte);
936                continue;
937            }
938            currentByte = readOneByte();
939            if (currentByte == -1
940                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
941                break;
942            }
943            skipReadCall = isFirstByteOfEocdSig(currentByte);
944        }
945    }
946
947    /**
948     * Skips bytes by reading from the underlying stream rather than
949     * the (potentially inflating) archive stream - which {@link
950     * #skip} would do.
951     *
952     * Also updates bytes-read counter.
953     */
954    private void realSkip(final long value) throws IOException {
955        if (value >= 0) {
956            long skipped = 0;
957            while (skipped < value) {
958                final long rem = value - skipped;
959                final int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
960                if (x == -1) {
961                    return;
962                }
963                count(x);
964                skipped += x;
965            }
966            return;
967        }
968        throw new IllegalArgumentException();
969    }
970
971    /**
972     * Reads bytes by reading from the underlying stream rather than
973     * the (potentially inflating) archive stream - which {@link #read} would do.
974     *
975     * Also updates bytes-read counter.
976     */
977    private int readOneByte() throws IOException {
978        final int b = in.read();
979        if (b != -1) {
980            count(1);
981        }
982        return b;
983    }
984
985    private boolean isFirstByteOfEocdSig(final int b) {
986        return b == ZipArchiveOutputStream.EOCD_SIG[0];
987    }
988
989    /**
990     * Structure collecting information for the entry that is
991     * currently being read.
992     */
993    private static final class CurrentEntry {
994
995        /**
996         * Current ZIP entry.
997         */
998        private final ZipArchiveEntry entry = new ZipArchiveEntry();
999
1000        /**
1001         * Does the entry use a data descriptor?
1002         */
1003        private boolean hasDataDescriptor;
1004
1005        /**
1006         * Does the entry have a ZIP64 extended information extra field.
1007         */
1008        private boolean usesZip64;
1009
1010        /**
1011         * Number of bytes of entry content read by the client if the
1012         * entry is STORED.
1013         */
1014        private long bytesRead;
1015
1016        /**
1017         * Number of bytes of entry content read so from the stream.
1018         *
1019         * <p>This may be more than the actual entry's length as some
1020         * stuff gets buffered up and needs to be pushed back when the
1021         * end of the entry has been reached.</p>
1022         */
1023        private long bytesReadFromStream;
1024
1025        /**
1026         * The checksum calculated as the current entry is read.
1027         */
1028        private final CRC32 crc = new CRC32();
1029
1030        /**
1031         * The input stream decompressing the data for shrunk and imploded entries.
1032         */
1033        private InputStream in;
1034    }
1035
1036    /**
1037     * Bounded input stream adapted from commons-io
1038     */
1039    private class BoundedInputStream extends InputStream {
1040
1041        /** the wrapped input stream */
1042        private final InputStream in;
1043
1044        /** the max length to provide */
1045        private final long max;
1046
1047        /** the number of bytes already returned */
1048        private long pos = 0;
1049    
1050        /**
1051         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1052         * stream and limits it to a certain size.
1053         *
1054         * @param in The wrapped input stream
1055         * @param size The maximum number of bytes to return
1056         */
1057        public BoundedInputStream(final InputStream in, final long size) {
1058            this.max = size;
1059            this.in = in;
1060        }
1061
1062        @Override
1063        public int read() throws IOException {
1064            if (max >= 0 && pos >= max) {
1065                return -1;
1066            }
1067            final int result = in.read();
1068            pos++;
1069            count(1);
1070            current.bytesReadFromStream++;
1071            return result;
1072        }
1073
1074        @Override
1075        public int read(final byte[] b) throws IOException {
1076            return this.read(b, 0, b.length);
1077        }
1078
1079        @Override
1080        public int read(final byte[] b, final int off, final int len) throws IOException {
1081            if (max >= 0 && pos >= max) {
1082                return -1;
1083            }
1084            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1085            final int bytesRead = in.read(b, off, (int) maxRead);
1086
1087            if (bytesRead == -1) {
1088                return -1;
1089            }
1090
1091            pos += bytesRead;
1092            count(bytesRead);
1093            current.bytesReadFromStream += bytesRead;
1094            return bytesRead;
1095        }
1096
1097        @Override
1098        public long skip(final long n) throws IOException {
1099            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1100            final long skippedBytes = in.skip(toSkip);
1101            pos += skippedBytes;
1102            return skippedBytes;
1103        }
1104    
1105        @Override
1106        public int available() throws IOException {
1107            if (max >= 0 && pos >= max) {
1108                return 0;
1109            }
1110            return in.available();
1111        }
1112    }
1113}