001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.ByteBuffer; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.file.Files; 032import java.nio.file.StandardOpenOption; 033import java.util.Arrays; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.Enumeration; 037import java.util.EnumSet; 038import java.util.HashMap; 039import java.util.LinkedList; 040import java.util.List; 041import java.util.Map; 042import java.util.zip.Inflater; 043import java.util.zip.InflaterInputStream; 044import java.util.zip.ZipException; 045 046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 048import org.apache.commons.compress.utils.IOUtils; 049 050import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 051import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 052import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 053import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 054import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 055 056/** 057 * Replacement for <code>java.util.ZipFile</code>. 058 * 059 * <p>This class adds support for file name encodings other than UTF-8 060 * (which is required to work on ZIP files created by native zip tools 061 * and is able to skip a preamble like the one found in self 062 * extracting archives. Furthermore it returns instances of 063 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 064 * instead of <code>java.util.zip.ZipEntry</code>.</p> 065 * 066 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 067 * have to reimplement all methods anyway. Like 068 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 069 * covers and supports compressed and uncompressed entries. As of 070 * Apache Commons Compress 1.3 it also transparently supports Zip64 071 * extensions and thus individual entries and archives larger than 4 072 * GB or with more than 65536 entries.</p> 073 * 074 * <p>The method signatures mimic the ones of 075 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 076 * 077 * <ul> 078 * <li>There is no getName method.</li> 079 * <li>entries has been renamed to getEntries.</li> 080 * <li>getEntries and getEntry return 081 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 082 * instances.</li> 083 * <li>close is allowed to throw IOException.</li> 084 * </ul> 085 * 086 */ 087public class ZipFile implements Closeable { 088 private static final int HASH_SIZE = 509; 089 static final int NIBLET_MASK = 0x0f; 090 static final int BYTE_SHIFT = 8; 091 private static final int POS_0 = 0; 092 private static final int POS_1 = 1; 093 private static final int POS_2 = 2; 094 private static final int POS_3 = 3; 095 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 096 097 /** 098 * List of entries in the order they appear inside the central 099 * directory. 100 */ 101 private final List<ZipArchiveEntry> entries = 102 new LinkedList<>(); 103 104 /** 105 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 106 */ 107 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 108 new HashMap<>(HASH_SIZE); 109 110 /** 111 * The encoding to use for filenames and the file comment. 112 * 113 * <p>For a list of possible values see <a 114 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 115 * Defaults to UTF-8.</p> 116 */ 117 private final String encoding; 118 119 /** 120 * The zip encoding to use for filenames and the file comment. 121 */ 122 private final ZipEncoding zipEncoding; 123 124 /** 125 * File name of actual source. 126 */ 127 private final String archiveName; 128 129 /** 130 * The actual data source. 131 */ 132 private final SeekableByteChannel archive; 133 134 /** 135 * Whether to look for and use Unicode extra fields. 136 */ 137 private final boolean useUnicodeExtraFields; 138 139 /** 140 * Whether the file is closed. 141 */ 142 private volatile boolean closed = true; 143 144 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 145 private final byte[] dwordBuf = new byte[DWORD]; 146 private final byte[] wordBuf = new byte[WORD]; 147 private final byte[] cfhBuf = new byte[CFH_LEN]; 148 private final byte[] shortBuf = new byte[SHORT]; 149 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 150 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 151 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 152 153 /** 154 * Opens the given file for reading, assuming "UTF8" for file names. 155 * 156 * @param f the archive. 157 * 158 * @throws IOException if an error occurs while reading the file. 159 */ 160 public ZipFile(final File f) throws IOException { 161 this(f, ZipEncodingHelper.UTF8); 162 } 163 164 /** 165 * Opens the given file for reading, assuming "UTF8". 166 * 167 * @param name name of the archive. 168 * 169 * @throws IOException if an error occurs while reading the file. 170 */ 171 public ZipFile(final String name) throws IOException { 172 this(new File(name), ZipEncodingHelper.UTF8); 173 } 174 175 /** 176 * Opens the given file for reading, assuming the specified 177 * encoding for file names, scanning unicode extra fields. 178 * 179 * @param name name of the archive. 180 * @param encoding the encoding to use for file names, use null 181 * for the platform's default encoding 182 * 183 * @throws IOException if an error occurs while reading the file. 184 */ 185 public ZipFile(final String name, final String encoding) throws IOException { 186 this(new File(name), encoding, true); 187 } 188 189 /** 190 * Opens the given file for reading, assuming the specified 191 * encoding for file names and scanning for unicode extra fields. 192 * 193 * @param f the archive. 194 * @param encoding the encoding to use for file names, use null 195 * for the platform's default encoding 196 * 197 * @throws IOException if an error occurs while reading the file. 198 */ 199 public ZipFile(final File f, final String encoding) throws IOException { 200 this(f, encoding, true); 201 } 202 203 /** 204 * Opens the given file for reading, assuming the specified 205 * encoding for file names. 206 * 207 * @param f the archive. 208 * @param encoding the encoding to use for file names, use null 209 * for the platform's default encoding 210 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 211 * Extra Fields (if present) to set the file names. 212 * 213 * @throws IOException if an error occurs while reading the file. 214 */ 215 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 216 throws IOException { 217 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 218 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 219 } 220 221 /** 222 * Opens the given channel for reading, assuming "UTF8" for file names. 223 * 224 * <p>{@link 225 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 226 * allows you to read from an in-memory archive.</p> 227 * 228 * @param channel the archive. 229 * 230 * @throws IOException if an error occurs while reading the file. 231 * @since 1.13 232 */ 233 public ZipFile(final SeekableByteChannel channel) 234 throws IOException { 235 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 236 } 237 238 /** 239 * Opens the given channel for reading, assuming the specified 240 * encoding for file names. 241 * 242 * <p>{@link 243 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 244 * allows you to read from an in-memory archive.</p> 245 * 246 * @param channel the archive. 247 * @param encoding the encoding to use for file names, use null 248 * for the platform's default encoding 249 * 250 * @throws IOException if an error occurs while reading the file. 251 * @since 1.13 252 */ 253 public ZipFile(final SeekableByteChannel channel, final String encoding) 254 throws IOException { 255 this(channel, "unknown archive", encoding, true); 256 } 257 258 /** 259 * Opens the given channel for reading, assuming the specified 260 * encoding for file names. 261 * 262 * <p>{@link 263 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 264 * allows you to read from an in-memory archive.</p> 265 * 266 * @param channel the archive. 267 * @param archiveName name of the archive, used for error messages only. 268 * @param encoding the encoding to use for file names, use null 269 * for the platform's default encoding 270 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 271 * Extra Fields (if present) to set the file names. 272 * 273 * @throws IOException if an error occurs while reading the file. 274 * @since 1.13 275 */ 276 public ZipFile(final SeekableByteChannel channel, final String archiveName, 277 final String encoding, final boolean useUnicodeExtraFields) 278 throws IOException { 279 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 280 } 281 282 private ZipFile(final SeekableByteChannel channel, final String archiveName, 283 final String encoding, final boolean useUnicodeExtraFields, 284 final boolean closeOnError) 285 throws IOException { 286 this.archiveName = archiveName; 287 this.encoding = encoding; 288 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 289 this.useUnicodeExtraFields = useUnicodeExtraFields; 290 archive = channel; 291 boolean success = false; 292 try { 293 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 294 populateFromCentralDirectory(); 295 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 296 success = true; 297 } finally { 298 closed = !success; 299 if (!success && closeOnError) { 300 IOUtils.closeQuietly(archive); 301 } 302 } 303 } 304 305 /** 306 * The encoding to use for filenames and the file comment. 307 * 308 * @return null if using the platform's default character encoding. 309 */ 310 public String getEncoding() { 311 return encoding; 312 } 313 314 /** 315 * Closes the archive. 316 * @throws IOException if an error occurs closing the archive. 317 */ 318 @Override 319 public void close() throws IOException { 320 // this flag is only written here and read in finalize() which 321 // can never be run in parallel. 322 // no synchronization needed. 323 closed = true; 324 325 archive.close(); 326 } 327 328 /** 329 * close a zipfile quietly; throw no io fault, do nothing 330 * on a null parameter 331 * @param zipfile file to close, can be null 332 */ 333 public static void closeQuietly(final ZipFile zipfile) { 334 IOUtils.closeQuietly(zipfile); 335 } 336 337 /** 338 * Returns all entries. 339 * 340 * <p>Entries will be returned in the same order they appear 341 * within the archive's central directory.</p> 342 * 343 * @return all entries as {@link ZipArchiveEntry} instances 344 */ 345 public Enumeration<ZipArchiveEntry> getEntries() { 346 return Collections.enumeration(entries); 347 } 348 349 /** 350 * Returns all entries in physical order. 351 * 352 * <p>Entries will be returned in the same order their contents 353 * appear within the archive.</p> 354 * 355 * @return all entries as {@link ZipArchiveEntry} instances 356 * 357 * @since 1.1 358 */ 359 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 360 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 361 Arrays.sort(allEntries, offsetComparator); 362 return Collections.enumeration(Arrays.asList(allEntries)); 363 } 364 365 /** 366 * Returns a named entry - or {@code null} if no entry by 367 * that name exists. 368 * 369 * <p>If multiple entries with the same name exist the first entry 370 * in the archive's central directory by that name is 371 * returned.</p> 372 * 373 * @param name name of the entry. 374 * @return the ZipArchiveEntry corresponding to the given name - or 375 * {@code null} if not present. 376 */ 377 public ZipArchiveEntry getEntry(final String name) { 378 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 379 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 380 } 381 382 /** 383 * Returns all named entries in the same order they appear within 384 * the archive's central directory. 385 * 386 * @param name name of the entry. 387 * @return the Iterable<ZipArchiveEntry> corresponding to the 388 * given name 389 * @since 1.6 390 */ 391 public Iterable<ZipArchiveEntry> getEntries(final String name) { 392 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 393 return entriesOfThatName != null ? entriesOfThatName 394 : Collections.<ZipArchiveEntry>emptyList(); 395 } 396 397 /** 398 * Returns all named entries in the same order their contents 399 * appear within the archive. 400 * 401 * @param name name of the entry. 402 * @return the Iterable<ZipArchiveEntry> corresponding to the 403 * given name 404 * @since 1.6 405 */ 406 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 407 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 408 if (nameMap.containsKey(name)) { 409 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 410 Arrays.sort(entriesOfThatName, offsetComparator); 411 } 412 return Arrays.asList(entriesOfThatName); 413 } 414 415 /** 416 * Whether this class is able to read the given entry. 417 * 418 * <p>May return false if it is set up to use encryption or a 419 * compression method that hasn't been implemented yet.</p> 420 * @since 1.1 421 * @param ze the entry 422 * @return whether this class is able to read the given entry. 423 */ 424 public boolean canReadEntryData(final ZipArchiveEntry ze) { 425 return ZipUtil.canHandleEntryData(ze); 426 } 427 428 /** 429 * Expose the raw stream of the archive entry (compressed form). 430 * 431 * <p>This method does not relate to how/if we understand the payload in the 432 * stream, since we really only intend to move it on to somewhere else.</p> 433 * 434 * @param ze The entry to get the stream for 435 * @return The raw input stream containing (possibly) compressed data. 436 * @since 1.11 437 */ 438 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 439 if (!(ze instanceof Entry)) { 440 return null; 441 } 442 final long start = ze.getDataOffset(); 443 return createBoundedInputStream(start, ze.getCompressedSize()); 444 } 445 446 447 /** 448 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 449 * Compression and all other attributes will be as in this file. 450 * <p>This method transfers entries based on the central directory of the zip file.</p> 451 * 452 * @param target The zipArchiveOutputStream to write the entries to 453 * @param predicate A predicate that selects which entries to write 454 * @throws IOException on error 455 */ 456 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 457 throws IOException { 458 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 459 while (src.hasMoreElements()) { 460 final ZipArchiveEntry entry = src.nextElement(); 461 if (predicate.test( entry)) { 462 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 463 } 464 } 465 } 466 467 /** 468 * Returns an InputStream for reading the contents of the given entry. 469 * 470 * @param ze the entry to get the stream for. 471 * @return a stream to read the entry from. 472 * @throws IOException if unable to create an input stream from the zipentry 473 */ 474 public InputStream getInputStream(final ZipArchiveEntry ze) 475 throws IOException { 476 if (!(ze instanceof Entry)) { 477 return null; 478 } 479 // cast validity is checked just above 480 ZipUtil.checkRequestedFeatures(ze); 481 final long start = ze.getDataOffset(); 482 483 // doesn't get closed if the method is not supported - which 484 // should never happen because of the checkRequestedFeatures 485 // call above 486 final InputStream is = 487 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 488 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 489 case STORED: 490 return is; 491 case UNSHRINKING: 492 return new UnshrinkingInputStream(is); 493 case IMPLODING: 494 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 495 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 496 case DEFLATED: 497 final Inflater inflater = new Inflater(true); 498 // Inflater with nowrap=true has this odd contract for a zero padding 499 // byte following the data stream; this used to be zlib's requirement 500 // and has been fixed a long time ago, but the contract persists so 501 // we comply. 502 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 503 return new InflaterInputStream(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 504 inflater) { 505 @Override 506 public void close() throws IOException { 507 try { 508 super.close(); 509 } finally { 510 inflater.end(); 511 } 512 } 513 }; 514 case BZIP2: 515 return new BZip2CompressorInputStream(is); 516 case ENHANCED_DEFLATED: 517 return new Deflate64CompressorInputStream(is); 518 case AES_ENCRYPTED: 519 case EXPANDING_LEVEL_1: 520 case EXPANDING_LEVEL_2: 521 case EXPANDING_LEVEL_3: 522 case EXPANDING_LEVEL_4: 523 case JPEG: 524 case LZMA: 525 case PKWARE_IMPLODING: 526 case PPMD: 527 case TOKENIZATION: 528 case UNKNOWN: 529 case WAVPACK: 530 case XZ: 531 default: 532 throw new ZipException("Found unsupported compression method " 533 + ze.getMethod()); 534 } 535 } 536 537 /** 538 * <p> 539 * Convenience method to return the entry's content as a String if isUnixSymlink() 540 * returns true for it, otherwise returns null. 541 * </p> 542 * 543 * <p>This method assumes the symbolic link's file name uses the 544 * same encoding that as been specified for this ZipFile.</p> 545 * 546 * @param entry ZipArchiveEntry object that represents the symbolic link 547 * @return entry's content as a String 548 * @throws IOException problem with content's input stream 549 * @since 1.5 550 */ 551 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 552 if (entry != null && entry.isUnixSymlink()) { 553 try (InputStream in = getInputStream(entry)) { 554 return zipEncoding.decode(IOUtils.toByteArray(in)); 555 } 556 } 557 return null; 558 } 559 560 /** 561 * Ensures that the close method of this zipfile is called when 562 * there are no more references to it. 563 * @see #close() 564 */ 565 @Override 566 protected void finalize() throws Throwable { 567 try { 568 if (!closed) { 569 System.err.println("Cleaning up unclosed ZipFile for archive " 570 + archiveName); 571 close(); 572 } 573 } finally { 574 super.finalize(); 575 } 576 } 577 578 /** 579 * Length of a "central directory" entry structure without file 580 * name, extra fields or comment. 581 */ 582 private static final int CFH_LEN = 583 /* version made by */ SHORT 584 /* version needed to extract */ + SHORT 585 /* general purpose bit flag */ + SHORT 586 /* compression method */ + SHORT 587 /* last mod file time */ + SHORT 588 /* last mod file date */ + SHORT 589 /* crc-32 */ + WORD 590 /* compressed size */ + WORD 591 /* uncompressed size */ + WORD 592 /* filename length */ + SHORT 593 /* extra field length */ + SHORT 594 /* file comment length */ + SHORT 595 /* disk number start */ + SHORT 596 /* internal file attributes */ + SHORT 597 /* external file attributes */ + WORD 598 /* relative offset of local header */ + WORD; 599 600 private static final long CFH_SIG = 601 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 602 603 /** 604 * Reads the central directory of the given archive and populates 605 * the internal tables with ZipArchiveEntry instances. 606 * 607 * <p>The ZipArchiveEntrys will know all data that can be obtained from 608 * the central directory alone, but not the data that requires the 609 * local file header or additional data to be read.</p> 610 * 611 * @return a map of zipentries that didn't have the language 612 * encoding flag set when read. 613 */ 614 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 615 throws IOException { 616 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 617 new HashMap<>(); 618 619 positionAtCentralDirectory(); 620 621 wordBbuf.rewind(); 622 IOUtils.readFully(archive, wordBbuf); 623 long sig = ZipLong.getValue(wordBuf); 624 625 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 626 throw new IOException("central directory is empty, can't expand" 627 + " corrupt archive."); 628 } 629 630 while (sig == CFH_SIG) { 631 readCentralDirectoryEntry(noUTF8Flag); 632 wordBbuf.rewind(); 633 IOUtils.readFully(archive, wordBbuf); 634 sig = ZipLong.getValue(wordBuf); 635 } 636 return noUTF8Flag; 637 } 638 639 /** 640 * Reads an individual entry of the central directory, creats an 641 * ZipArchiveEntry from it and adds it to the global maps. 642 * 643 * @param noUTF8Flag map used to collect entries that don't have 644 * their UTF-8 flag set and whose name will be set by data read 645 * from the local file header later. The current entry may be 646 * added to this map. 647 */ 648 private void 649 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 650 throws IOException { 651 cfhBbuf.rewind(); 652 IOUtils.readFully(archive, cfhBbuf); 653 int off = 0; 654 final Entry ze = new Entry(); 655 656 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 657 off += SHORT; 658 ze.setVersionMadeBy(versionMadeBy); 659 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 660 661 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 662 off += SHORT; // version required 663 664 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 665 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 666 final ZipEncoding entryEncoding = 667 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 668 if (hasUTF8Flag) { 669 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 670 } 671 ze.setGeneralPurposeBit(gpFlag); 672 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 673 674 off += SHORT; 675 676 //noinspection MagicConstant 677 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 678 off += SHORT; 679 680 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 681 ze.setTime(time); 682 off += WORD; 683 684 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 685 off += WORD; 686 687 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 688 off += WORD; 689 690 ze.setSize(ZipLong.getValue(cfhBuf, off)); 691 off += WORD; 692 693 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 694 off += SHORT; 695 696 final int extraLen = ZipShort.getValue(cfhBuf, off); 697 off += SHORT; 698 699 final int commentLen = ZipShort.getValue(cfhBuf, off); 700 off += SHORT; 701 702 final int diskStart = ZipShort.getValue(cfhBuf, off); 703 off += SHORT; 704 705 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 706 off += SHORT; 707 708 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 709 off += WORD; 710 711 final byte[] fileName = new byte[fileNameLen]; 712 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 713 ze.setName(entryEncoding.decode(fileName), fileName); 714 715 // LFH offset, 716 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 717 // data offset will be filled later 718 entries.add(ze); 719 720 final byte[] cdExtraData = new byte[extraLen]; 721 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 722 ze.setCentralDirectoryExtra(cdExtraData); 723 724 setSizesAndOffsetFromZip64Extra(ze, diskStart); 725 726 final byte[] comment = new byte[commentLen]; 727 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 728 ze.setComment(entryEncoding.decode(comment)); 729 730 if (!hasUTF8Flag && useUnicodeExtraFields) { 731 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 732 } 733 } 734 735 /** 736 * If the entry holds a Zip64 extended information extra field, 737 * read sizes from there if the entry's sizes are set to 738 * 0xFFFFFFFFF, do the same for the offset of the local file 739 * header. 740 * 741 * <p>Ensures the Zip64 extra either knows both compressed and 742 * uncompressed size or neither of both as the internal logic in 743 * ExtraFieldUtils forces the field to create local header data 744 * even if they are never used - and here a field with only one 745 * size would be invalid.</p> 746 */ 747 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 748 final int diskStart) 749 throws IOException { 750 final Zip64ExtendedInformationExtraField z64 = 751 (Zip64ExtendedInformationExtraField) 752 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 753 if (z64 != null) { 754 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 755 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 756 final boolean hasRelativeHeaderOffset = 757 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 758 z64.reparseCentralDirectoryData(hasUncompressedSize, 759 hasCompressedSize, 760 hasRelativeHeaderOffset, 761 diskStart == ZIP64_MAGIC_SHORT); 762 763 if (hasUncompressedSize) { 764 ze.setSize(z64.getSize().getLongValue()); 765 } else if (hasCompressedSize) { 766 z64.setSize(new ZipEightByteInteger(ze.getSize())); 767 } 768 769 if (hasCompressedSize) { 770 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 771 } else if (hasUncompressedSize) { 772 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 773 } 774 775 if (hasRelativeHeaderOffset) { 776 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 777 } 778 } 779 } 780 781 /** 782 * Length of the "End of central directory record" - which is 783 * supposed to be the last structure of the archive - without file 784 * comment. 785 */ 786 static final int MIN_EOCD_SIZE = 787 /* end of central dir signature */ WORD 788 /* number of this disk */ + SHORT 789 /* number of the disk with the */ 790 /* start of the central directory */ + SHORT 791 /* total number of entries in */ 792 /* the central dir on this disk */ + SHORT 793 /* total number of entries in */ 794 /* the central dir */ + SHORT 795 /* size of the central directory */ + WORD 796 /* offset of start of central */ 797 /* directory with respect to */ 798 /* the starting disk number */ + WORD 799 /* zipfile comment length */ + SHORT; 800 801 /** 802 * Maximum length of the "End of central directory record" with a 803 * file comment. 804 */ 805 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 806 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 807 808 /** 809 * Offset of the field that holds the location of the first 810 * central directory entry inside the "End of central directory 811 * record" relative to the start of the "End of central directory 812 * record". 813 */ 814 private static final int CFD_LOCATOR_OFFSET = 815 /* end of central dir signature */ WORD 816 /* number of this disk */ + SHORT 817 /* number of the disk with the */ 818 /* start of the central directory */ + SHORT 819 /* total number of entries in */ 820 /* the central dir on this disk */ + SHORT 821 /* total number of entries in */ 822 /* the central dir */ + SHORT 823 /* size of the central directory */ + WORD; 824 825 /** 826 * Length of the "Zip64 end of central directory locator" - which 827 * should be right in front of the "end of central directory 828 * record" if one is present at all. 829 */ 830 private static final int ZIP64_EOCDL_LENGTH = 831 /* zip64 end of central dir locator sig */ WORD 832 /* number of the disk with the start */ 833 /* start of the zip64 end of */ 834 /* central directory */ + WORD 835 /* relative offset of the zip64 */ 836 /* end of central directory record */ + DWORD 837 /* total number of disks */ + WORD; 838 839 /** 840 * Offset of the field that holds the location of the "Zip64 end 841 * of central directory record" inside the "Zip64 end of central 842 * directory locator" relative to the start of the "Zip64 end of 843 * central directory locator". 844 */ 845 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 846 /* zip64 end of central dir locator sig */ WORD 847 /* number of the disk with the start */ 848 /* start of the zip64 end of */ 849 /* central directory */ + WORD; 850 851 /** 852 * Offset of the field that holds the location of the first 853 * central directory entry inside the "Zip64 end of central 854 * directory record" relative to the start of the "Zip64 end of 855 * central directory record". 856 */ 857 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 858 /* zip64 end of central dir */ 859 /* signature */ WORD 860 /* size of zip64 end of central */ 861 /* directory record */ + DWORD 862 /* version made by */ + SHORT 863 /* version needed to extract */ + SHORT 864 /* number of this disk */ + WORD 865 /* number of the disk with the */ 866 /* start of the central directory */ + WORD 867 /* total number of entries in the */ 868 /* central directory on this disk */ + DWORD 869 /* total number of entries in the */ 870 /* central directory */ + DWORD 871 /* size of the central directory */ + DWORD; 872 873 /** 874 * Searches for either the "Zip64 end of central directory 875 * locator" or the "End of central dir record", parses 876 * it and positions the stream at the first central directory 877 * record. 878 */ 879 private void positionAtCentralDirectory() 880 throws IOException { 881 positionAtEndOfCentralDirectoryRecord(); 882 boolean found = false; 883 final boolean searchedForZip64EOCD = 884 archive.position() > ZIP64_EOCDL_LENGTH; 885 if (searchedForZip64EOCD) { 886 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 887 wordBbuf.rewind(); 888 IOUtils.readFully(archive, wordBbuf); 889 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 890 wordBuf); 891 } 892 if (!found) { 893 // not a ZIP64 archive 894 if (searchedForZip64EOCD) { 895 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 896 } 897 positionAtCentralDirectory32(); 898 } else { 899 positionAtCentralDirectory64(); 900 } 901 } 902 903 /** 904 * Parses the "Zip64 end of central directory locator", 905 * finds the "Zip64 end of central directory record" using the 906 * parsed information, parses that and positions the stream at the 907 * first central directory record. 908 * 909 * Expects stream to be positioned right behind the "Zip64 910 * end of central directory locator"'s signature. 911 */ 912 private void positionAtCentralDirectory64() 913 throws IOException { 914 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 915 - WORD /* signature has already been read */); 916 dwordBbuf.rewind(); 917 IOUtils.readFully(archive, dwordBbuf); 918 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 919 wordBbuf.rewind(); 920 IOUtils.readFully(archive, wordBbuf); 921 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 922 throw new ZipException("archive's ZIP64 end of central " 923 + "directory locator is corrupt."); 924 } 925 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 926 - WORD /* signature has already been read */); 927 dwordBbuf.rewind(); 928 IOUtils.readFully(archive, dwordBbuf); 929 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 930 } 931 932 /** 933 * Parses the "End of central dir record" and positions 934 * the stream at the first central directory record. 935 * 936 * Expects stream to be positioned at the beginning of the 937 * "End of central dir record". 938 */ 939 private void positionAtCentralDirectory32() 940 throws IOException { 941 skipBytes(CFD_LOCATOR_OFFSET); 942 wordBbuf.rewind(); 943 IOUtils.readFully(archive, wordBbuf); 944 archive.position(ZipLong.getValue(wordBuf)); 945 } 946 947 /** 948 * Searches for the and positions the stream at the start of the 949 * "End of central dir record". 950 */ 951 private void positionAtEndOfCentralDirectoryRecord() 952 throws IOException { 953 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 954 ZipArchiveOutputStream.EOCD_SIG); 955 if (!found) { 956 throw new ZipException("archive is not a ZIP archive"); 957 } 958 } 959 960 /** 961 * Searches the archive backwards from minDistance to maxDistance 962 * for the given signature, positions the RandomaccessFile right 963 * at the signature if it has been found. 964 */ 965 private boolean tryToLocateSignature(final long minDistanceFromEnd, 966 final long maxDistanceFromEnd, 967 final byte[] sig) throws IOException { 968 boolean found = false; 969 long off = archive.size() - minDistanceFromEnd; 970 final long stopSearching = 971 Math.max(0L, archive.size() - maxDistanceFromEnd); 972 if (off >= 0) { 973 for (; off >= stopSearching; off--) { 974 archive.position(off); 975 try { 976 wordBbuf.rewind(); 977 IOUtils.readFully(archive, wordBbuf); 978 wordBbuf.flip(); 979 } catch (EOFException ex) { 980 break; 981 } 982 int curr = wordBbuf.get(); 983 if (curr == sig[POS_0]) { 984 curr = wordBbuf.get(); 985 if (curr == sig[POS_1]) { 986 curr = wordBbuf.get(); 987 if (curr == sig[POS_2]) { 988 curr = wordBbuf.get(); 989 if (curr == sig[POS_3]) { 990 found = true; 991 break; 992 } 993 } 994 } 995 } 996 } 997 } 998 if (found) { 999 archive.position(off); 1000 } 1001 return found; 1002 } 1003 1004 /** 1005 * Skips the given number of bytes or throws an EOFException if 1006 * skipping failed. 1007 */ 1008 private void skipBytes(final int count) throws IOException { 1009 long currentPosition = archive.position(); 1010 long newPosition = currentPosition + count; 1011 if (newPosition > archive.size()) { 1012 throw new EOFException(); 1013 } 1014 archive.position(newPosition); 1015 } 1016 1017 /** 1018 * Number of bytes in local file header up to the "length of 1019 * filename" entry. 1020 */ 1021 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1022 /* local file header signature */ WORD 1023 /* version needed to extract */ + SHORT 1024 /* general purpose bit flag */ + SHORT 1025 /* compression method */ + SHORT 1026 /* last mod file time */ + SHORT 1027 /* last mod file date */ + SHORT 1028 /* crc-32 */ + WORD 1029 /* compressed size */ + WORD 1030 /* uncompressed size */ + (long) WORD; 1031 1032 /** 1033 * Walks through all recorded entries and adds the data available 1034 * from the local file header. 1035 * 1036 * <p>Also records the offsets for the data to read from the 1037 * entries.</p> 1038 */ 1039 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1040 entriesWithoutUTF8Flag) 1041 throws IOException { 1042 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1043 // entries is filled in populateFromCentralDirectory and 1044 // never modified 1045 final Entry ze = (Entry) zipArchiveEntry; 1046 final long offset = ze.getLocalHeaderOffset(); 1047 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1048 wordBbuf.rewind(); 1049 IOUtils.readFully(archive, wordBbuf); 1050 wordBbuf.flip(); 1051 wordBbuf.get(shortBuf); 1052 final int fileNameLen = ZipShort.getValue(shortBuf); 1053 wordBbuf.get(shortBuf); 1054 final int extraFieldLen = ZipShort.getValue(shortBuf); 1055 skipBytes(fileNameLen); 1056 final byte[] localExtraData = new byte[extraFieldLen]; 1057 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1058 ze.setExtra(localExtraData); 1059 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1060 + SHORT + SHORT + fileNameLen + extraFieldLen); 1061 ze.setStreamContiguous(true); 1062 1063 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1064 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1065 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1066 nc.comment); 1067 } 1068 1069 final String name = ze.getName(); 1070 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1071 if (entriesOfThatName == null) { 1072 entriesOfThatName = new LinkedList<>(); 1073 nameMap.put(name, entriesOfThatName); 1074 } 1075 entriesOfThatName.addLast(ze); 1076 } 1077 } 1078 1079 /** 1080 * Checks whether the archive starts with a LFH. If it doesn't, 1081 * it may be an empty archive. 1082 */ 1083 private boolean startsWithLocalFileHeader() throws IOException { 1084 archive.position(0); 1085 wordBbuf.rewind(); 1086 IOUtils.readFully(archive, wordBbuf); 1087 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1088 } 1089 1090 /** 1091 * Creates new BoundedInputStream, according to implementation of 1092 * underlying archive channel. 1093 */ 1094 private BoundedInputStream createBoundedInputStream(long start, long remaining) { 1095 return archive instanceof FileChannel ? 1096 new BoundedFileChannelInputStream(start, remaining) : 1097 new BoundedInputStream(start, remaining); 1098 } 1099 1100 /** 1101 * InputStream that delegates requests to the underlying 1102 * SeekableByteChannel, making sure that only bytes from a certain 1103 * range can be read. 1104 */ 1105 private class BoundedInputStream extends InputStream { 1106 private ByteBuffer singleByteBuffer; 1107 private final long end; 1108 private long loc; 1109 1110 BoundedInputStream(final long start, final long remaining) { 1111 this.end = start+remaining; 1112 if (this.end < start) { 1113 // check for potential vulnerability due to overflow 1114 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); 1115 } 1116 loc = start; 1117 } 1118 1119 @Override 1120 public synchronized int read() throws IOException { 1121 if (loc >= end) { 1122 return -1; 1123 } 1124 if (singleByteBuffer == null) { 1125 singleByteBuffer = ByteBuffer.allocate(1); 1126 } 1127 else { 1128 singleByteBuffer.rewind(); 1129 } 1130 int read = read(loc, singleByteBuffer); 1131 if (read < 0) { 1132 return read; 1133 } 1134 loc++; 1135 return singleByteBuffer.get() & 0xff; 1136 } 1137 1138 @Override 1139 public synchronized int read(final byte[] b, final int off, int len) throws IOException { 1140 if (len <= 0) { 1141 return 0; 1142 } 1143 1144 if (len > end-loc) { 1145 if (loc >= end) { 1146 return -1; 1147 } 1148 len = (int)(end-loc); 1149 } 1150 1151 ByteBuffer buf; 1152 buf = ByteBuffer.wrap(b, off, len); 1153 int ret = read(loc, buf); 1154 if (ret > 0) { 1155 loc += ret; 1156 return ret; 1157 } 1158 return ret; 1159 } 1160 1161 protected int read(long pos, ByteBuffer buf) throws IOException { 1162 int read; 1163 synchronized (archive) { 1164 archive.position(pos); 1165 read = archive.read(buf); 1166 } 1167 buf.flip(); 1168 return read; 1169 } 1170 } 1171 1172 /** 1173 * Lock-free implementation of BoundedInputStream. The 1174 * implementation uses positioned reads on the underlying archive 1175 * file channel and therefore performs significantly faster in 1176 * concurrent environment. 1177 */ 1178 private class BoundedFileChannelInputStream extends BoundedInputStream { 1179 private final FileChannel archive; 1180 1181 BoundedFileChannelInputStream(final long start, final long remaining) { 1182 super(start, remaining); 1183 archive = (FileChannel)ZipFile.this.archive; 1184 } 1185 1186 @Override 1187 protected int read(long pos, ByteBuffer buf) throws IOException { 1188 int read = archive.read(buf, pos); 1189 buf.flip(); 1190 return read; 1191 } 1192 } 1193 1194 private static final class NameAndComment { 1195 private final byte[] name; 1196 private final byte[] comment; 1197 private NameAndComment(final byte[] name, final byte[] comment) { 1198 this.name = name; 1199 this.comment = comment; 1200 } 1201 } 1202 1203 /** 1204 * Compares two ZipArchiveEntries based on their offset within the archive. 1205 * 1206 * <p>Won't return any meaningful results if one of the entries 1207 * isn't part of the archive at all.</p> 1208 * 1209 * @since 1.1 1210 */ 1211 private final Comparator<ZipArchiveEntry> offsetComparator = 1212 new Comparator<ZipArchiveEntry>() { 1213 @Override 1214 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1215 if (e1 == e2) { 1216 return 0; 1217 } 1218 1219 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1220 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1221 if (ent1 == null) { 1222 return 1; 1223 } 1224 if (ent2 == null) { 1225 return -1; 1226 } 1227 final long val = (ent1.getLocalHeaderOffset() 1228 - ent2.getLocalHeaderOffset()); 1229 return val == 0 ? 0 : val < 0 ? -1 : +1; 1230 } 1231 }; 1232 1233 /** 1234 * Extends ZipArchiveEntry to store the offset within the archive. 1235 */ 1236 private static class Entry extends ZipArchiveEntry { 1237 1238 Entry() { 1239 } 1240 1241 @Override 1242 public int hashCode() { 1243 return 3 * super.hashCode() 1244 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1245 } 1246 1247 @Override 1248 public boolean equals(final Object other) { 1249 if (super.equals(other)) { 1250 // super.equals would return false if other were not an Entry 1251 final Entry otherEntry = (Entry) other; 1252 return getLocalHeaderOffset() 1253 == otherEntry.getLocalHeaderOffset() 1254 && getDataOffset() 1255 == otherEntry.getDataOffset(); 1256 } 1257 return false; 1258 } 1259 } 1260}