001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.IOUtils; 039 040import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 044 045/** 046 * Implements an input stream that can read Zip archives. 047 * 048 * <p>As of Apache Commons Compress it transparently supports Zip64 049 * extensions and thus individual entries and archives larger than 4 050 * GB or with more than 65536 entries.</p> 051 * 052 * <p>The {@link ZipFile} class is preferred when reading from files 053 * as {@link ZipArchiveInputStream} is limited by not being able to 054 * read the central directory header before returning entries. In 055 * particular {@link ZipArchiveInputStream}</p> 056 * 057 * <ul> 058 * 059 * <li>may return entries that are not part of the central directory 060 * at all and shouldn't be considered part of the archive.</li> 061 * 062 * <li>may return several entries with the same name.</li> 063 * 064 * <li>will not return internal or external attributes.</li> 065 * 066 * <li>may return incomplete extra field data.</li> 067 * 068 * <li>may return unknown sizes and CRC values for entries until the 069 * next entry has been reached if the archive uses the data 070 * descriptor feature.</li> 071 * 072 * </ul> 073 * 074 * @see ZipFile 075 * @NotThreadSafe 076 */ 077public class ZipArchiveInputStream extends ArchiveInputStream { 078 079 /** The zip encoding to use for filenames and the file comment. */ 080 private final ZipEncoding zipEncoding; 081 082 // the provided encoding (for unit tests) 083 final String encoding; 084 085 /** Whether to look for and use Unicode extra fields. */ 086 private final boolean useUnicodeExtraFields; 087 088 /** Wrapped stream, will always be a PushbackInputStream. */ 089 private final InputStream in; 090 091 /** Inflater used for all deflated entries. */ 092 private final Inflater inf = new Inflater(true); 093 094 /** Buffer used to read from the wrapped stream. */ 095 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 096 097 /** The entry that is currently being read. */ 098 private CurrentEntry current = null; 099 100 /** Whether the stream has been closed. */ 101 private boolean closed = false; 102 103 /** Whether the stream has reached the central directory - and thus found all entries. */ 104 private boolean hitCentralDirectory = false; 105 106 /** 107 * When reading a stored entry that uses the data descriptor this 108 * stream has to read the full entry and caches it. This is the 109 * cache. 110 */ 111 private ByteArrayInputStream lastStoredEntry = null; 112 113 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 114 private boolean allowStoredEntriesWithDataDescriptor = false; 115 116 private static final int LFH_LEN = 30; 117 /* 118 local file header signature WORD 119 version needed to extract SHORT 120 general purpose bit flag SHORT 121 compression method SHORT 122 last mod file time SHORT 123 last mod file date SHORT 124 crc-32 WORD 125 compressed size WORD 126 uncompressed size WORD 127 file name length SHORT 128 extra field length SHORT 129 */ 130 131 private static final int CFH_LEN = 46; 132 /* 133 central file header signature WORD 134 version made by SHORT 135 version needed to extract SHORT 136 general purpose bit flag SHORT 137 compression method SHORT 138 last mod file time SHORT 139 last mod file date SHORT 140 crc-32 WORD 141 compressed size WORD 142 uncompressed size WORD 143 file name length SHORT 144 extra field length SHORT 145 file comment length SHORT 146 disk number start SHORT 147 internal file attributes SHORT 148 external file attributes WORD 149 relative offset of local header WORD 150 */ 151 152 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] LFH_BUF = new byte[LFH_LEN]; 156 private final byte[] SKIP_BUF = new byte[1024]; 157 private final byte[] SHORT_BUF = new byte[SHORT]; 158 private final byte[] WORD_BUF = new byte[WORD]; 159 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 160 161 private int entriesRead = 0; 162 163 /** 164 * Create an instance using UTF-8 encoding 165 * @param inputStream the stream to wrap 166 */ 167 public ZipArchiveInputStream(final InputStream inputStream) { 168 this(inputStream, ZipEncodingHelper.UTF8); 169 } 170 171 /** 172 * Create an instance using the specified encoding 173 * @param inputStream the stream to wrap 174 * @param encoding the encoding to use for file names, use null 175 * for the platform's default encoding 176 * @since 1.5 177 */ 178 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 179 this(inputStream, encoding, true); 180 } 181 182 /** 183 * Create an instance using the specified encoding 184 * @param inputStream the stream to wrap 185 * @param encoding the encoding to use for file names, use null 186 * for the platform's default encoding 187 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 188 * Extra Fields (if present) to set the file names. 189 */ 190 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 191 this(inputStream, encoding, useUnicodeExtraFields, false); 192 } 193 194 /** 195 * Create an instance using the specified encoding 196 * @param inputStream the stream to wrap 197 * @param encoding the encoding to use for file names, use null 198 * for the platform's default encoding 199 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 200 * Extra Fields (if present) to set the file names. 201 * @param allowStoredEntriesWithDataDescriptor whether the stream 202 * will try to read STORED entries that use a data descriptor 203 * @since 1.1 204 */ 205 public ZipArchiveInputStream(final InputStream inputStream, 206 final String encoding, 207 final boolean useUnicodeExtraFields, 208 final boolean allowStoredEntriesWithDataDescriptor) { 209 this.encoding = encoding; 210 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 211 this.useUnicodeExtraFields = useUnicodeExtraFields; 212 in = new PushbackInputStream(inputStream, buf.capacity()); 213 this.allowStoredEntriesWithDataDescriptor = 214 allowStoredEntriesWithDataDescriptor; 215 // haven't read anything so far 216 buf.limit(0); 217 } 218 219 public ZipArchiveEntry getNextZipEntry() throws IOException { 220 boolean firstEntry = true; 221 if (closed || hitCentralDirectory) { 222 return null; 223 } 224 if (current != null) { 225 closeEntry(); 226 firstEntry = false; 227 } 228 229 try { 230 if (firstEntry) { 231 // split archives have a special signature before the 232 // first local file header - look for it and fail with 233 // the appropriate error message if this is a split 234 // archive. 235 readFirstLocalFileHeader(LFH_BUF); 236 } else { 237 readFully(LFH_BUF); 238 } 239 } catch (final EOFException e) { 240 return null; 241 } 242 243 final ZipLong sig = new ZipLong(LFH_BUF); 244 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 245 hitCentralDirectory = true; 246 skipRemainderOfArchive(); 247 } 248 if (!sig.equals(ZipLong.LFH_SIG)) { 249 return null; 250 } 251 252 int off = WORD; 253 current = new CurrentEntry(); 254 255 final int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 256 off += SHORT; 257 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 258 259 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 260 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 261 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 262 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 263 current.entry.setGeneralPurposeBit(gpFlag); 264 265 off += SHORT; 266 267 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 268 off += SHORT; 269 270 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 271 current.entry.setTime(time); 272 off += WORD; 273 274 ZipLong size = null, cSize = null; 275 if (!current.hasDataDescriptor) { 276 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 277 off += WORD; 278 279 cSize = new ZipLong(LFH_BUF, off); 280 off += WORD; 281 282 size = new ZipLong(LFH_BUF, off); 283 off += WORD; 284 } else { 285 off += 3 * WORD; 286 } 287 288 final int fileNameLen = ZipShort.getValue(LFH_BUF, off); 289 290 off += SHORT; 291 292 final int extraLen = ZipShort.getValue(LFH_BUF, off); 293 off += SHORT; 294 295 final byte[] fileName = new byte[fileNameLen]; 296 readFully(fileName); 297 current.entry.setName(entryEncoding.decode(fileName), fileName); 298 299 final byte[] extraData = new byte[extraLen]; 300 readFully(extraData); 301 current.entry.setExtra(extraData); 302 303 if (!hasUTF8Flag && useUnicodeExtraFields) { 304 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 305 } 306 307 processZip64Extra(size, cSize); 308 309 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 310 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 311 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 312 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 313 current.in = new ExplodingInputStream( 314 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 315 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 316 new BoundedInputStream(in, current.entry.getCompressedSize())); 317 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 318 current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 319 } 320 } 321 322 entriesRead++; 323 return current.entry; 324 } 325 326 /** 327 * Fills the given array with the first local file header and 328 * deals with splitting/spanning markers that may prefix the first 329 * LFH. 330 */ 331 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 332 readFully(lfh); 333 final ZipLong sig = new ZipLong(lfh); 334 if (sig.equals(ZipLong.DD_SIG)) { 335 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 336 } 337 338 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 339 // The archive is not really split as only one segment was 340 // needed in the end. Just skip over the marker. 341 final byte[] missedLfhBytes = new byte[4]; 342 readFully(missedLfhBytes); 343 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 344 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 345 } 346 } 347 348 /** 349 * Records whether a Zip64 extra is present and sets the size 350 * information from it if sizes are 0xFFFFFFFF and the entry 351 * doesn't use a data descriptor. 352 */ 353 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 354 final Zip64ExtendedInformationExtraField z64 = 355 (Zip64ExtendedInformationExtraField) 356 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 357 current.usesZip64 = z64 != null; 358 if (!current.hasDataDescriptor) { 359 if (z64 != null // same as current.usesZip64 but avoids NPE warning 360 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 361 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 362 current.entry.setSize(z64.getSize().getLongValue()); 363 } else { 364 current.entry.setCompressedSize(cSize.getValue()); 365 current.entry.setSize(size.getValue()); 366 } 367 } 368 } 369 370 @Override 371 public ArchiveEntry getNextEntry() throws IOException { 372 return getNextZipEntry(); 373 } 374 375 /** 376 * Whether this class is able to read the given entry. 377 * 378 * <p>May return false if it is set up to use encryption or a 379 * compression method that hasn't been implemented yet.</p> 380 * @since 1.1 381 */ 382 @Override 383 public boolean canReadEntryData(final ArchiveEntry ae) { 384 if (ae instanceof ZipArchiveEntry) { 385 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 386 return ZipUtil.canHandleEntryData(ze) 387 && supportsDataDescriptorFor(ze); 388 389 } 390 return false; 391 } 392 393 @Override 394 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 395 if (closed) { 396 throw new IOException("The stream is closed"); 397 } 398 399 if (current == null) { 400 return -1; 401 } 402 403 // avoid int overflow, check null buffer 404 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 405 throw new ArrayIndexOutOfBoundsException(); 406 } 407 408 ZipUtil.checkRequestedFeatures(current.entry); 409 if (!supportsDataDescriptorFor(current.entry)) { 410 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 411 current.entry); 412 } 413 414 int read; 415 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 416 read = readStored(buffer, offset, length); 417 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 418 read = readDeflated(buffer, offset, length); 419 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 420 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 421 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 422 read = current.in.read(buffer, offset, length); 423 } else { 424 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 425 current.entry); 426 } 427 428 if (read >= 0) { 429 current.crc.update(buffer, offset, read); 430 } 431 432 return read; 433 } 434 435 /** 436 * Implementation of read for STORED entries. 437 */ 438 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 439 440 if (current.hasDataDescriptor) { 441 if (lastStoredEntry == null) { 442 readStoredEntry(); 443 } 444 return lastStoredEntry.read(buffer, offset, length); 445 } 446 447 final long csize = current.entry.getSize(); 448 if (current.bytesRead >= csize) { 449 return -1; 450 } 451 452 if (buf.position() >= buf.limit()) { 453 buf.position(0); 454 final int l = in.read(buf.array()); 455 if (l == -1) { 456 buf.limit(0); 457 throw new IOException("Truncated ZIP file"); 458 } 459 buf.limit(l); 460 461 count(l); 462 current.bytesReadFromStream += l; 463 } 464 465 int toRead = Math.min(buf.remaining(), length); 466 if ((csize - current.bytesRead) < toRead) { 467 // if it is smaller than toRead then it fits into an int 468 toRead = (int) (csize - current.bytesRead); 469 } 470 buf.get(buffer, offset, toRead); 471 current.bytesRead += toRead; 472 return toRead; 473 } 474 475 /** 476 * Implementation of read for DEFLATED entries. 477 */ 478 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 479 final int read = readFromInflater(buffer, offset, length); 480 if (read <= 0) { 481 if (inf.finished()) { 482 return -1; 483 } else if (inf.needsDictionary()) { 484 throw new ZipException("This archive needs a preset dictionary" 485 + " which is not supported by Commons" 486 + " Compress."); 487 } else if (read == -1) { 488 throw new IOException("Truncated ZIP file"); 489 } 490 } 491 return read; 492 } 493 494 /** 495 * Potentially reads more bytes to fill the inflater's buffer and 496 * reads from it. 497 */ 498 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 499 int read = 0; 500 do { 501 if (inf.needsInput()) { 502 final int l = fill(); 503 if (l > 0) { 504 current.bytesReadFromStream += buf.limit(); 505 } else if (l == -1) { 506 return -1; 507 } else { 508 break; 509 } 510 } 511 try { 512 read = inf.inflate(buffer, offset, length); 513 } catch (final DataFormatException e) { 514 throw (IOException) new ZipException(e.getMessage()).initCause(e); 515 } 516 } while (read == 0 && inf.needsInput()); 517 return read; 518 } 519 520 @Override 521 public void close() throws IOException { 522 if (!closed) { 523 closed = true; 524 try { 525 in.close(); 526 } finally { 527 inf.end(); 528 } 529 } 530 } 531 532 /** 533 * Skips over and discards value bytes of data from this input 534 * stream. 535 * 536 * <p>This implementation may end up skipping over some smaller 537 * number of bytes, possibly 0, if and only if it reaches the end 538 * of the underlying stream.</p> 539 * 540 * <p>The actual number of bytes skipped is returned.</p> 541 * 542 * @param value the number of bytes to be skipped. 543 * @return the actual number of bytes skipped. 544 * @throws IOException - if an I/O error occurs. 545 * @throws IllegalArgumentException - if value is negative. 546 */ 547 @Override 548 public long skip(final long value) throws IOException { 549 if (value >= 0) { 550 long skipped = 0; 551 while (skipped < value) { 552 final long rem = value - skipped; 553 final int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 554 if (x == -1) { 555 return skipped; 556 } 557 skipped += x; 558 } 559 return skipped; 560 } 561 throw new IllegalArgumentException(); 562 } 563 564 /** 565 * Checks if the signature matches what is expected for a zip file. 566 * Does not currently handle self-extracting zips which may have arbitrary 567 * leading content. 568 * 569 * @param signature the bytes to check 570 * @param length the number of bytes to check 571 * @return true, if this stream is a zip archive stream, false otherwise 572 */ 573 public static boolean matches(final byte[] signature, final int length) { 574 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 575 return false; 576 } 577 578 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 579 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 580 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 581 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 582 } 583 584 private static boolean checksig(final byte[] signature, final byte[] expected) { 585 for (int i = 0; i < expected.length; i++) { 586 if (signature[i] != expected[i]) { 587 return false; 588 } 589 } 590 return true; 591 } 592 593 /** 594 * Closes the current ZIP archive entry and positions the underlying 595 * stream to the beginning of the next entry. All per-entry variables 596 * and data structures are cleared. 597 * <p> 598 * If the compressed size of this entry is included in the entry header, 599 * then any outstanding bytes are simply skipped from the underlying 600 * stream without uncompressing them. This allows an entry to be safely 601 * closed even if the compression method is unsupported. 602 * <p> 603 * In case we don't know the compressed size of this entry or have 604 * already buffered too much data from the underlying stream to support 605 * uncompression, then the uncompression process is completed and the 606 * end position of the stream is adjusted based on the result of that 607 * process. 608 * 609 * @throws IOException if an error occurs 610 */ 611 private void closeEntry() throws IOException { 612 if (closed) { 613 throw new IOException("The stream is closed"); 614 } 615 if (current == null) { 616 return; 617 } 618 619 // Ensure all entry bytes are read 620 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 621 && !current.hasDataDescriptor) { 622 drainCurrentEntryData(); 623 } else { 624 skip(Long.MAX_VALUE); 625 626 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 627 ? getBytesInflated() : current.bytesRead; 628 629 // this is at most a single read() operation and can't 630 // exceed the range of int 631 final int diff = (int) (current.bytesReadFromStream - inB); 632 633 // Pushback any required bytes 634 if (diff > 0) { 635 pushback(buf.array(), buf.limit() - diff, diff); 636 } 637 } 638 639 if (lastStoredEntry == null && current.hasDataDescriptor) { 640 readDataDescriptor(); 641 } 642 643 inf.reset(); 644 buf.clear().flip(); 645 current = null; 646 lastStoredEntry = null; 647 } 648 649 /** 650 * Read all data of the current entry from the underlying stream 651 * that hasn't been read, yet. 652 */ 653 private void drainCurrentEntryData() throws IOException { 654 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 655 while (remaining > 0) { 656 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 657 if (n < 0) { 658 throw new EOFException("Truncated ZIP entry: " 659 + ArchiveUtils.sanitize(current.entry.getName())); 660 } 661 count(n); 662 remaining -= n; 663 } 664 } 665 666 /** 667 * Get the number of bytes Inflater has actually processed. 668 * 669 * <p>for Java < Java7 the getBytes* methods in 670 * Inflater/Deflater seem to return unsigned ints rather than 671 * longs that start over with 0 at 2^32.</p> 672 * 673 * <p>The stream knows how many bytes it has read, but not how 674 * many the Inflater actually consumed - it should be between the 675 * total number of bytes read for the entry and the total number 676 * minus the last read operation. Here we just try to make the 677 * value close enough to the bytes we've read by assuming the 678 * number of bytes consumed must be smaller than (or equal to) the 679 * number of bytes read but not smaller by more than 2^32.</p> 680 */ 681 private long getBytesInflated() { 682 long inB = inf.getBytesRead(); 683 if (current.bytesReadFromStream >= TWO_EXP_32) { 684 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 685 inB += TWO_EXP_32; 686 } 687 } 688 return inB; 689 } 690 691 private int fill() throws IOException { 692 if (closed) { 693 throw new IOException("The stream is closed"); 694 } 695 final int length = in.read(buf.array()); 696 if (length > 0) { 697 buf.limit(length); 698 count(buf.limit()); 699 inf.setInput(buf.array(), 0, buf.limit()); 700 } 701 return length; 702 } 703 704 private void readFully(final byte[] b) throws IOException { 705 final int count = IOUtils.readFully(in, b); 706 count(count); 707 if (count < b.length) { 708 throw new EOFException(); 709 } 710 } 711 712 private void readDataDescriptor() throws IOException { 713 readFully(WORD_BUF); 714 ZipLong val = new ZipLong(WORD_BUF); 715 if (ZipLong.DD_SIG.equals(val)) { 716 // data descriptor with signature, skip sig 717 readFully(WORD_BUF); 718 val = new ZipLong(WORD_BUF); 719 } 720 current.entry.setCrc(val.getValue()); 721 722 // if there is a ZIP64 extra field, sizes are eight bytes 723 // each, otherwise four bytes each. Unfortunately some 724 // implementations - namely Java7 - use eight bytes without 725 // using a ZIP64 extra field - 726 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 727 728 // just read 16 bytes and check whether bytes nine to twelve 729 // look like one of the signatures of what could follow a data 730 // descriptor (ignoring archive decryption headers for now). 731 // If so, push back eight bytes and assume sizes are four 732 // bytes, otherwise sizes are eight bytes each. 733 readFully(TWO_DWORD_BUF); 734 final ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 735 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 736 pushback(TWO_DWORD_BUF, DWORD, DWORD); 737 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 738 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 739 } else { 740 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 741 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 742 } 743 } 744 745 /** 746 * Whether this entry requires a data descriptor this library can work with. 747 * 748 * @return true if allowStoredEntriesWithDataDescriptor is true, 749 * the entry doesn't require any data descriptor or the method is 750 * DEFLATED. 751 */ 752 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 753 return !entry.getGeneralPurposeBit().usesDataDescriptor() 754 755 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 756 || entry.getMethod() == ZipEntry.DEFLATED; 757 } 758 759 /** 760 * Caches a stored entry that uses the data descriptor. 761 * 762 * <ul> 763 * <li>Reads a stored entry until the signature of a local file 764 * header, central directory header or data descriptor has been 765 * found.</li> 766 * <li>Stores all entry data in lastStoredEntry.</p> 767 * <li>Rewinds the stream to position at the data 768 * descriptor.</li> 769 * <li>reads the data descriptor</li> 770 * </ul> 771 * 772 * <p>After calling this method the entry should know its size, 773 * the entry's data is cached and the stream is positioned at the 774 * next local file or central directory header.</p> 775 */ 776 private void readStoredEntry() throws IOException { 777 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 778 int off = 0; 779 boolean done = false; 780 781 // length of DD without signature 782 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 783 784 while (!done) { 785 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 786 if (r <= 0) { 787 // read the whole archive without ever finding a 788 // central directory 789 throw new IOException("Truncated ZIP file"); 790 } 791 if (r + off < 4) { 792 // buffer too small to check for a signature, loop 793 off += r; 794 continue; 795 } 796 797 done = bufferContainsSignature(bos, off, r, ddLen); 798 if (!done) { 799 off = cacheBytesRead(bos, off, r, ddLen); 800 } 801 } 802 803 final byte[] b = bos.toByteArray(); 804 lastStoredEntry = new ByteArrayInputStream(b); 805 } 806 807 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 808 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 809 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 810 811 /** 812 * Checks whether the current buffer contains the signature of a 813 * "data descriptor", "local file header" or 814 * "central directory entry". 815 * 816 * <p>If it contains such a signature, reads the data descriptor 817 * and positions the stream right after the data descriptor.</p> 818 */ 819 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 820 throws IOException { 821 822 boolean done = false; 823 int readTooMuch = 0; 824 for (int i = 0; !done && i < lastRead - 4; i++) { 825 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 826 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 827 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 828 // found a LFH or CFH: 829 readTooMuch = offset + lastRead - i - expectedDDLen; 830 done = true; 831 } 832 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 833 // found DD: 834 readTooMuch = offset + lastRead - i; 835 done = true; 836 } 837 if (done) { 838 // * push back bytes read in excess as well as the data 839 // descriptor 840 // * copy the remaining bytes to cache 841 // * read data descriptor 842 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 843 bos.write(buf.array(), 0, i); 844 readDataDescriptor(); 845 } 846 } 847 } 848 return done; 849 } 850 851 /** 852 * If the last read bytes could hold a data descriptor and an 853 * incomplete signature then save the last bytes to the front of 854 * the buffer and cache everything in front of the potential data 855 * descriptor into the given ByteArrayOutputStream. 856 * 857 * <p>Data descriptor plus incomplete signature (3 bytes in the 858 * worst case) can be 20 bytes max.</p> 859 */ 860 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 861 final int cacheable = offset + lastRead - expecteDDLen - 3; 862 if (cacheable > 0) { 863 bos.write(buf.array(), 0, cacheable); 864 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 865 offset = expecteDDLen + 3; 866 } else { 867 offset += lastRead; 868 } 869 return offset; 870 } 871 872 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 873 ((PushbackInputStream) in).unread(buf, offset, length); 874 pushedBackBytes(length); 875 } 876 877 // End of Central Directory Record 878 // end of central dir signature WORD 879 // number of this disk SHORT 880 // number of the disk with the 881 // start of the central directory SHORT 882 // total number of entries in the 883 // central directory on this disk SHORT 884 // total number of entries in 885 // the central directory SHORT 886 // size of the central directory WORD 887 // offset of start of central 888 // directory with respect to 889 // the starting disk number WORD 890 // .ZIP file comment length SHORT 891 // .ZIP file comment up to 64KB 892 // 893 894 /** 895 * Reads the stream until it find the "End of central directory 896 * record" and consumes it as well. 897 */ 898 private void skipRemainderOfArchive() throws IOException { 899 // skip over central directory. One LFH has been read too much 900 // already. The calculation discounts file names and extra 901 // data so it will be too short. 902 realSkip(entriesRead * CFH_LEN - LFH_LEN); 903 findEocdRecord(); 904 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 905 readFully(SHORT_BUF); 906 // file comment 907 realSkip(ZipShort.getValue(SHORT_BUF)); 908 } 909 910 /** 911 * Reads forward until the signature of the "End of central 912 * directory" record is found. 913 */ 914 private void findEocdRecord() throws IOException { 915 int currentByte = -1; 916 boolean skipReadCall = false; 917 while (skipReadCall || (currentByte = readOneByte()) > -1) { 918 skipReadCall = false; 919 if (!isFirstByteOfEocdSig(currentByte)) { 920 continue; 921 } 922 currentByte = readOneByte(); 923 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 924 if (currentByte == -1) { 925 break; 926 } 927 skipReadCall = isFirstByteOfEocdSig(currentByte); 928 continue; 929 } 930 currentByte = readOneByte(); 931 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 932 if (currentByte == -1) { 933 break; 934 } 935 skipReadCall = isFirstByteOfEocdSig(currentByte); 936 continue; 937 } 938 currentByte = readOneByte(); 939 if (currentByte == -1 940 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 941 break; 942 } 943 skipReadCall = isFirstByteOfEocdSig(currentByte); 944 } 945 } 946 947 /** 948 * Skips bytes by reading from the underlying stream rather than 949 * the (potentially inflating) archive stream - which {@link 950 * #skip} would do. 951 * 952 * Also updates bytes-read counter. 953 */ 954 private void realSkip(final long value) throws IOException { 955 if (value >= 0) { 956 long skipped = 0; 957 while (skipped < value) { 958 final long rem = value - skipped; 959 final int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 960 if (x == -1) { 961 return; 962 } 963 count(x); 964 skipped += x; 965 } 966 return; 967 } 968 throw new IllegalArgumentException(); 969 } 970 971 /** 972 * Reads bytes by reading from the underlying stream rather than 973 * the (potentially inflating) archive stream - which {@link #read} would do. 974 * 975 * Also updates bytes-read counter. 976 */ 977 private int readOneByte() throws IOException { 978 final int b = in.read(); 979 if (b != -1) { 980 count(1); 981 } 982 return b; 983 } 984 985 private boolean isFirstByteOfEocdSig(final int b) { 986 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 987 } 988 989 /** 990 * Structure collecting information for the entry that is 991 * currently being read. 992 */ 993 private static final class CurrentEntry { 994 995 /** 996 * Current ZIP entry. 997 */ 998 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 999 1000 /** 1001 * Does the entry use a data descriptor? 1002 */ 1003 private boolean hasDataDescriptor; 1004 1005 /** 1006 * Does the entry have a ZIP64 extended information extra field. 1007 */ 1008 private boolean usesZip64; 1009 1010 /** 1011 * Number of bytes of entry content read by the client if the 1012 * entry is STORED. 1013 */ 1014 private long bytesRead; 1015 1016 /** 1017 * Number of bytes of entry content read so from the stream. 1018 * 1019 * <p>This may be more than the actual entry's length as some 1020 * stuff gets buffered up and needs to be pushed back when the 1021 * end of the entry has been reached.</p> 1022 */ 1023 private long bytesReadFromStream; 1024 1025 /** 1026 * The checksum calculated as the current entry is read. 1027 */ 1028 private final CRC32 crc = new CRC32(); 1029 1030 /** 1031 * The input stream decompressing the data for shrunk and imploded entries. 1032 */ 1033 private InputStream in; 1034 } 1035 1036 /** 1037 * Bounded input stream adapted from commons-io 1038 */ 1039 private class BoundedInputStream extends InputStream { 1040 1041 /** the wrapped input stream */ 1042 private final InputStream in; 1043 1044 /** the max length to provide */ 1045 private final long max; 1046 1047 /** the number of bytes already returned */ 1048 private long pos = 0; 1049 1050 /** 1051 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1052 * stream and limits it to a certain size. 1053 * 1054 * @param in The wrapped input stream 1055 * @param size The maximum number of bytes to return 1056 */ 1057 public BoundedInputStream(final InputStream in, final long size) { 1058 this.max = size; 1059 this.in = in; 1060 } 1061 1062 @Override 1063 public int read() throws IOException { 1064 if (max >= 0 && pos >= max) { 1065 return -1; 1066 } 1067 final int result = in.read(); 1068 pos++; 1069 count(1); 1070 current.bytesReadFromStream++; 1071 return result; 1072 } 1073 1074 @Override 1075 public int read(final byte[] b) throws IOException { 1076 return this.read(b, 0, b.length); 1077 } 1078 1079 @Override 1080 public int read(final byte[] b, final int off, final int len) throws IOException { 1081 if (max >= 0 && pos >= max) { 1082 return -1; 1083 } 1084 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1085 final int bytesRead = in.read(b, off, (int) maxRead); 1086 1087 if (bytesRead == -1) { 1088 return -1; 1089 } 1090 1091 pos += bytesRead; 1092 count(bytesRead); 1093 current.bytesReadFromStream += bytesRead; 1094 return bytesRead; 1095 } 1096 1097 @Override 1098 public long skip(final long n) throws IOException { 1099 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1100 final long skippedBytes = in.skip(toSkip); 1101 pos += skippedBytes; 1102 return skippedBytes; 1103 } 1104 1105 @Override 1106 public int available() throws IOException { 1107 if (max >= 0 && pos >= max) { 1108 return 0; 1109 } 1110 return in.available(); 1111 } 1112 } 1113}