001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.EOFException; 025import java.io.File; 026import java.io.FilterInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.Buffer; 030import java.nio.ByteBuffer; 031import java.nio.ByteOrder; 032import java.nio.CharBuffer; 033import java.nio.channels.SeekableByteChannel; 034import java.nio.charset.StandardCharsets; 035import java.nio.charset.CharsetEncoder; 036import java.nio.file.Files; 037import java.nio.file.StandardOpenOption; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.BitSet; 041import java.util.EnumSet; 042import java.util.HashMap; 043import java.util.LinkedList; 044import java.util.List; 045import java.util.Map; 046import java.util.stream.Collectors; 047import java.util.zip.CRC32; 048 049import org.apache.commons.compress.MemoryLimitException; 050import org.apache.commons.compress.utils.BoundedInputStream; 051import org.apache.commons.compress.utils.ByteUtils; 052import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 053import org.apache.commons.compress.utils.IOUtils; 054import org.apache.commons.compress.utils.InputStreamStatistics; 055 056/** 057 * Reads a 7z file, using SeekableByteChannel under 058 * the covers. 059 * <p> 060 * The 7z file format is a flexible container 061 * that can contain many compression and 062 * encryption types, but at the moment only 063 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 064 * are supported. 065 * <p> 066 * The format is very Windows/Intel specific, 067 * so it uses little-endian byte order, 068 * doesn't store user/group or permission bits, 069 * and represents times using NTFS timestamps 070 * (100 nanosecond units since 1 January 1601). 071 * Hence the official tools recommend against 072 * using it for backup purposes on *nix, and 073 * recommend .tar.7z or .tar.lzma or .tar.xz 074 * instead. 075 * <p> 076 * Both the header and file contents may be 077 * compressed and/or encrypted. With both 078 * encrypted, neither file names nor file 079 * contents can be read, but the use of 080 * encryption isn't plausibly deniable. 081 * 082 * <p>Multi volume archives can be read by concatenating the parts in 083 * correct order - either manually or by using {link 084 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 085 * for example.</p> 086 * 087 * @NotThreadSafe 088 * @since 1.6 089 */ 090public class SevenZFile implements Closeable { 091 static final int SIGNATURE_HEADER_SIZE = 32; 092 093 private static final String DEFAULT_FILE_NAME = "unknown archive"; 094 095 private final String fileName; 096 private SeekableByteChannel channel; 097 private final Archive archive; 098 private int currentEntryIndex = -1; 099 private int currentFolderIndex = -1; 100 private InputStream currentFolderInputStream; 101 private byte[] password; 102 private final SevenZFileOptions options; 103 104 private long compressedBytesReadFromCurrentEntry; 105 private long uncompressedBytesReadFromCurrentEntry; 106 107 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 108 109 // shared with SevenZOutputFile and tests, neither mutates it 110 static final byte[] sevenZSignature = { //NOSONAR 111 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 112 }; 113 114 /** 115 * Reads a file as 7z archive 116 * 117 * @param fileName the file to read 118 * @param password optional password if the archive is encrypted 119 * @throws IOException if reading the archive fails 120 * @since 1.17 121 */ 122 public SevenZFile(final File fileName, final char[] password) throws IOException { 123 this(fileName, password, SevenZFileOptions.DEFAULT); 124 } 125 126 /** 127 * Reads a file as 7z archive with additional options. 128 * 129 * @param fileName the file to read 130 * @param password optional password if the archive is encrypted 131 * @param options the options to apply 132 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 133 * @since 1.19 134 */ 135 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 136 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 137 fileName.getAbsolutePath(), utf16Decode(password), true, options); 138 } 139 140 /** 141 * Reads a file as 7z archive 142 * 143 * @param fileName the file to read 144 * @param password optional password if the archive is encrypted - 145 * the byte array is supposed to be the UTF16-LE encoded 146 * representation of the password. 147 * @throws IOException if reading the archive fails 148 * @deprecated use the char[]-arg version for the password instead 149 */ 150 @Deprecated 151 public SevenZFile(final File fileName, final byte[] password) throws IOException { 152 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 153 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 154 } 155 156 /** 157 * Reads a SeekableByteChannel as 7z archive 158 * 159 * <p>{@link 160 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 161 * allows you to read from an in-memory archive.</p> 162 * 163 * @param channel the channel to read 164 * @throws IOException if reading the archive fails 165 * @since 1.13 166 */ 167 public SevenZFile(final SeekableByteChannel channel) throws IOException { 168 this(channel, SevenZFileOptions.DEFAULT); 169 } 170 171 /** 172 * Reads a SeekableByteChannel as 7z archive with addtional options. 173 * 174 * <p>{@link 175 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 176 * allows you to read from an in-memory archive.</p> 177 * 178 * @param channel the channel to read 179 * @param options the options to apply 180 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 181 * @since 1.19 182 */ 183 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 184 this(channel, DEFAULT_FILE_NAME, null, options); 185 } 186 187 /** 188 * Reads a SeekableByteChannel as 7z archive 189 * 190 * <p>{@link 191 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 192 * allows you to read from an in-memory archive.</p> 193 * 194 * @param channel the channel to read 195 * @param password optional password if the archive is encrypted 196 * @throws IOException if reading the archive fails 197 * @since 1.17 198 */ 199 public SevenZFile(final SeekableByteChannel channel, 200 final char[] password) throws IOException { 201 this(channel, password, SevenZFileOptions.DEFAULT); 202 } 203 204 /** 205 * Reads a SeekableByteChannel as 7z archive with additional options. 206 * 207 * <p>{@link 208 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 209 * allows you to read from an in-memory archive.</p> 210 * 211 * @param channel the channel to read 212 * @param password optional password if the archive is encrypted 213 * @param options the options to apply 214 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 215 * @since 1.19 216 */ 217 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 218 throws IOException { 219 this(channel, DEFAULT_FILE_NAME, password, options); 220 } 221 222 /** 223 * Reads a SeekableByteChannel as 7z archive 224 * 225 * <p>{@link 226 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 227 * allows you to read from an in-memory archive.</p> 228 * 229 * @param channel the channel to read 230 * @param fileName name of the archive - only used for error reporting 231 * @param password optional password if the archive is encrypted 232 * @throws IOException if reading the archive fails 233 * @since 1.17 234 */ 235 public SevenZFile(final SeekableByteChannel channel, final String fileName, 236 final char[] password) throws IOException { 237 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 238 } 239 240 /** 241 * Reads a SeekableByteChannel as 7z archive with addtional options. 242 * 243 * <p>{@link 244 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 245 * allows you to read from an in-memory archive.</p> 246 * 247 * @param channel the channel to read 248 * @param fileName name of the archive - only used for error reporting 249 * @param password optional password if the archive is encrypted 250 * @param options the options to apply 251 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 252 * @since 1.19 253 */ 254 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 255 final SevenZFileOptions options) throws IOException { 256 this(channel, fileName, utf16Decode(password), false, options); 257 } 258 259 /** 260 * Reads a SeekableByteChannel as 7z archive 261 * 262 * <p>{@link 263 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 264 * allows you to read from an in-memory archive.</p> 265 * 266 * @param channel the channel to read 267 * @param fileName name of the archive - only used for error reporting 268 * @throws IOException if reading the archive fails 269 * @since 1.17 270 */ 271 public SevenZFile(final SeekableByteChannel channel, final String fileName) 272 throws IOException { 273 this(channel, fileName, SevenZFileOptions.DEFAULT); 274 } 275 276 /** 277 * Reads a SeekableByteChannel as 7z archive with additional options. 278 * 279 * <p>{@link 280 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 281 * allows you to read from an in-memory archive.</p> 282 * 283 * @param channel the channel to read 284 * @param fileName name of the archive - only used for error reporting 285 * @param options the options to apply 286 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 287 * @since 1.19 288 */ 289 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 290 throws IOException { 291 this(channel, fileName, null, false, options); 292 } 293 294 /** 295 * Reads a SeekableByteChannel as 7z archive 296 * 297 * <p>{@link 298 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 299 * allows you to read from an in-memory archive.</p> 300 * 301 * @param channel the channel to read 302 * @param password optional password if the archive is encrypted - 303 * the byte array is supposed to be the UTF16-LE encoded 304 * representation of the password. 305 * @throws IOException if reading the archive fails 306 * @since 1.13 307 * @deprecated use the char[]-arg version for the password instead 308 */ 309 @Deprecated 310 public SevenZFile(final SeekableByteChannel channel, 311 final byte[] password) throws IOException { 312 this(channel, DEFAULT_FILE_NAME, password); 313 } 314 315 /** 316 * Reads a SeekableByteChannel as 7z archive 317 * 318 * <p>{@link 319 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 320 * allows you to read from an in-memory archive.</p> 321 * 322 * @param channel the channel to read 323 * @param fileName name of the archive - only used for error reporting 324 * @param password optional password if the archive is encrypted - 325 * the byte array is supposed to be the UTF16-LE encoded 326 * representation of the password. 327 * @throws IOException if reading the archive fails 328 * @since 1.13 329 * @deprecated use the char[]-arg version for the password instead 330 */ 331 @Deprecated 332 public SevenZFile(final SeekableByteChannel channel, final String fileName, 333 final byte[] password) throws IOException { 334 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 335 } 336 337 private SevenZFile(final SeekableByteChannel channel, final String filename, 338 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 339 boolean succeeded = false; 340 this.channel = channel; 341 this.fileName = filename; 342 this.options = options; 343 try { 344 archive = readHeaders(password); 345 if (password != null) { 346 this.password = Arrays.copyOf(password, password.length); 347 } else { 348 this.password = null; 349 } 350 succeeded = true; 351 } finally { 352 if (!succeeded && closeOnError) { 353 this.channel.close(); 354 } 355 } 356 } 357 358 /** 359 * Reads a file as unencrypted 7z archive 360 * 361 * @param fileName the file to read 362 * @throws IOException if reading the archive fails 363 */ 364 public SevenZFile(final File fileName) throws IOException { 365 this(fileName, SevenZFileOptions.DEFAULT); 366 } 367 368 /** 369 * Reads a file as unencrypted 7z archive 370 * 371 * @param fileName the file to read 372 * @param options the options to apply 373 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 374 * @since 1.19 375 */ 376 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 377 this(fileName, null, options); 378 } 379 380 /** 381 * Closes the archive. 382 * @throws IOException if closing the file fails 383 */ 384 @Override 385 public void close() throws IOException { 386 if (channel != null) { 387 try { 388 channel.close(); 389 } finally { 390 channel = null; 391 if (password != null) { 392 Arrays.fill(password, (byte) 0); 393 } 394 password = null; 395 } 396 } 397 } 398 399 /** 400 * Returns the next Archive Entry in this archive. 401 * 402 * @return the next entry, 403 * or {@code null} if there are no more entries 404 * @throws IOException if the next entry could not be read 405 */ 406 public SevenZArchiveEntry getNextEntry() throws IOException { 407 if (currentEntryIndex >= archive.files.length - 1) { 408 return null; 409 } 410 ++currentEntryIndex; 411 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 412 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 413 entry.setName(getDefaultName()); 414 } 415 buildDecodingStream(currentEntryIndex, false); 416 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 417 return entry; 418 } 419 420 /** 421 * Returns a copy of meta-data of all archive entries. 422 * 423 * <p>This method only provides meta-data, the entries can not be 424 * used to read the contents, you still need to process all 425 * entries in order using {@link #getNextEntry} for that.</p> 426 * 427 * <p>The content methods are only available for entries that have 428 * already been reached via {@link #getNextEntry}.</p> 429 * 430 * @return a copy of meta-data of all archive entries. 431 * @since 1.11 432 */ 433 public Iterable<SevenZArchiveEntry> getEntries() { 434 return new ArrayList<>(Arrays.asList(archive.files)); 435 } 436 437 private Archive readHeaders(final byte[] password) throws IOException { 438 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 439 .order(ByteOrder.LITTLE_ENDIAN); 440 readFully(buf); 441 final byte[] signature = new byte[6]; 442 buf.get(signature); 443 if (!Arrays.equals(signature, sevenZSignature)) { 444 throw new IOException("Bad 7z signature"); 445 } 446 // 7zFormat.txt has it wrong - it's first major then minor 447 final byte archiveVersionMajor = buf.get(); 448 final byte archiveVersionMinor = buf.get(); 449 if (archiveVersionMajor != 0) { 450 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 451 archiveVersionMajor, archiveVersionMinor)); 452 } 453 454 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 455 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 456 if (startHeaderCrc == 0) { 457 // This is an indication of a corrupt header - peek the next 20 bytes 458 final long currentPosition = channel.position(); 459 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 460 readFully(peekBuf); 461 channel.position(currentPosition); 462 // Header invalid if all data is 0 463 while (peekBuf.hasRemaining()) { 464 if (peekBuf.get()!=0) { 465 headerLooksValid = true; 466 break; 467 } 468 } 469 } else { 470 headerLooksValid = true; 471 } 472 473 if (headerLooksValid) { 474 final StartHeader startHeader = readStartHeader(startHeaderCrc); 475 return initializeArchive(startHeader, password, true); 476 } 477 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 478 if (options.getTryToRecoverBrokenArchives()) { 479 return tryToLocateEndHeader(password); 480 } 481 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 482 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 483 + " prematurely."); 484 } 485 486 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 487 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 488 final long searchLimit = 1024L * 1024 * 1; 489 // Main header, plus bytes that readStartHeader would read 490 final long previousDataSize = channel.position() + 20; 491 final long minPos; 492 // Determine minimal position - can't start before current position 493 if (channel.position() + searchLimit > channel.size()) { 494 minPos = channel.position(); 495 } else { 496 minPos = channel.size() - searchLimit; 497 } 498 long pos = channel.size() - 1; 499 // Loop: Try from end of archive 500 while (pos > minPos) { 501 pos--; 502 channel.position(pos); 503 ((Buffer)nidBuf).rewind(); 504 if (channel.read(nidBuf) < 1) { 505 throw new EOFException(); 506 } 507 final int nid = nidBuf.array()[0]; 508 // First indicator: Byte equals one of these header identifiers 509 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 510 try { 511 // Try to initialize Archive structure from here 512 final StartHeader startHeader = new StartHeader(); 513 startHeader.nextHeaderOffset = pos - previousDataSize; 514 startHeader.nextHeaderSize = channel.size() - pos; 515 final Archive result = initializeArchive(startHeader, password, false); 516 // Sanity check: There must be some data... 517 if (result.packSizes.length > 0 && result.files.length > 0) { 518 return result; 519 } 520 } catch (final Exception ignore) { 521 // Wrong guess... 522 } 523 } 524 } 525 throw new IOException("Start header corrupt and unable to guess end header"); 526 } 527 528 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 529 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 530 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 531 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 532 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 533 readFully(buf); 534 if (verifyCrc) { 535 final CRC32 crc = new CRC32(); 536 crc.update(buf.array()); 537 if (startHeader.nextHeaderCrc != crc.getValue()) { 538 throw new IOException("NextHeader CRC mismatch"); 539 } 540 } 541 542 Archive archive = new Archive(); 543 int nid = getUnsignedByte(buf); 544 if (nid == NID.kEncodedHeader) { 545 buf = readEncodedHeader(buf, archive, password); 546 // Archive gets rebuilt with the new header 547 archive = new Archive(); 548 nid = getUnsignedByte(buf); 549 } 550 if (nid != NID.kHeader) { 551 throw new IOException("Broken or unsupported archive: no Header"); 552 } 553 readHeader(buf, archive); 554 archive.subStreamsInfo = null; 555 return archive; 556 } 557 558 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 559 final StartHeader startHeader = new StartHeader(); 560 // using Stream rather than ByteBuffer for the benefit of the 561 // built-in CRC check 562 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 563 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 564 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 565 if (startHeader.nextHeaderOffset < 0 566 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 567 throw new IOException("nextHeaderOffset is out of bounds"); 568 } 569 570 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 571 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 572 if (nextHeaderEnd < startHeader.nextHeaderOffset 573 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 574 throw new IOException("nextHeaderSize is out of bounds"); 575 } 576 577 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 578 579 return startHeader; 580 } 581 } 582 583 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 584 final int pos = header.position(); 585 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 586 stats.assertValidity(options.getMaxMemoryLimitInKb()); 587 header.position(pos); 588 589 int nid = getUnsignedByte(header); 590 591 if (nid == NID.kArchiveProperties) { 592 readArchiveProperties(header); 593 nid = getUnsignedByte(header); 594 } 595 596 if (nid == NID.kAdditionalStreamsInfo) { 597 throw new IOException("Additional streams unsupported"); 598 //nid = getUnsignedByte(header); 599 } 600 601 if (nid == NID.kMainStreamsInfo) { 602 readStreamsInfo(header, archive); 603 nid = getUnsignedByte(header); 604 } 605 606 if (nid == NID.kFilesInfo) { 607 readFilesInfo(header, archive); 608 nid = getUnsignedByte(header); 609 } 610 } 611 612 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 613 throws IOException { 614 final ArchiveStatistics stats = new ArchiveStatistics(); 615 616 int nid = getUnsignedByte(header); 617 618 if (nid == NID.kArchiveProperties) { 619 sanityCheckArchiveProperties(header); 620 nid = getUnsignedByte(header); 621 } 622 623 if (nid == NID.kAdditionalStreamsInfo) { 624 throw new IOException("Additional streams unsupported"); 625 //nid = getUnsignedByte(header); 626 } 627 628 if (nid == NID.kMainStreamsInfo) { 629 sanityCheckStreamsInfo(header, stats); 630 nid = getUnsignedByte(header); 631 } 632 633 if (nid == NID.kFilesInfo) { 634 sanityCheckFilesInfo(header, stats); 635 nid = getUnsignedByte(header); 636 } 637 638 if (nid != NID.kEnd) { 639 throw new IOException("Badly terminated header, found " + nid); 640 } 641 642 return stats; 643 } 644 645 private void readArchiveProperties(final ByteBuffer input) throws IOException { 646 // FIXME: the reference implementation just throws them away? 647 int nid = getUnsignedByte(input); 648 while (nid != NID.kEnd) { 649 final long propertySize = readUint64(input); 650 final byte[] property = new byte[(int)propertySize]; 651 get(input, property); 652 nid = getUnsignedByte(input); 653 } 654 } 655 656 private void sanityCheckArchiveProperties(final ByteBuffer header) 657 throws IOException { 658 int nid = getUnsignedByte(header); 659 while (nid != NID.kEnd) { 660 final int propertySize = 661 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 662 if (skipBytesFully(header, propertySize) < propertySize) { 663 throw new IOException("invalid property size"); 664 } 665 nid = getUnsignedByte(header); 666 } 667 } 668 669 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 670 final byte[] password) throws IOException { 671 final int pos = header.position(); 672 ArchiveStatistics stats = new ArchiveStatistics(); 673 sanityCheckStreamsInfo(header, stats); 674 stats.assertValidity(options.getMaxMemoryLimitInKb()); 675 header.position(pos); 676 677 readStreamsInfo(header, archive); 678 679 if (archive.folders == null || archive.folders.length == 0) { 680 throw new IOException("no folders, can't read encoded header"); 681 } 682 if (archive.packSizes == null || archive.packSizes.length == 0) { 683 throw new IOException("no packed streams, can't read encoded header"); 684 } 685 686 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 687 final Folder folder = archive.folders[0]; 688 final int firstPackStreamIndex = 0; 689 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 690 0; 691 692 channel.position(folderOffset); 693 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 694 archive.packSizes[firstPackStreamIndex]); 695 for (final Coder coder : folder.getOrderedCoders()) { 696 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 697 throw new IOException("Multi input/output stream coders are not yet supported"); 698 } 699 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 700 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 701 } 702 if (folder.hasCrc) { 703 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 704 folder.getUnpackSize(), folder.crc); 705 } 706 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 707 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 708 if (nextHeader.length < unpackSize) { 709 throw new IOException("premature end of stream"); 710 } 711 inputStreamStack.close(); 712 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 713 } 714 715 private void sanityCheckStreamsInfo(final ByteBuffer header, 716 final ArchiveStatistics stats) throws IOException { 717 int nid = getUnsignedByte(header); 718 719 if (nid == NID.kPackInfo) { 720 sanityCheckPackInfo(header, stats); 721 nid = getUnsignedByte(header); 722 } 723 724 if (nid == NID.kUnpackInfo) { 725 sanityCheckUnpackInfo(header, stats); 726 nid = getUnsignedByte(header); 727 } 728 729 if (nid == NID.kSubStreamsInfo) { 730 sanityCheckSubStreamsInfo(header, stats); 731 nid = getUnsignedByte(header); 732 } 733 734 if (nid != NID.kEnd) { 735 throw new IOException("Badly terminated StreamsInfo"); 736 } 737 } 738 739 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 740 int nid = getUnsignedByte(header); 741 742 if (nid == NID.kPackInfo) { 743 readPackInfo(header, archive); 744 nid = getUnsignedByte(header); 745 } 746 747 if (nid == NID.kUnpackInfo) { 748 readUnpackInfo(header, archive); 749 nid = getUnsignedByte(header); 750 } else { 751 // archive without unpack/coders info 752 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 753 } 754 755 if (nid == NID.kSubStreamsInfo) { 756 readSubStreamsInfo(header, archive); 757 nid = getUnsignedByte(header); 758 } 759 } 760 761 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 762 final long packPos = readUint64(header); 763 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 764 || SIGNATURE_HEADER_SIZE + packPos < 0) { 765 throw new IOException("packPos (" + packPos + ") is out of range"); 766 } 767 final long numPackStreams = readUint64(header); 768 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 769 int nid = getUnsignedByte(header); 770 if (nid == NID.kSize) { 771 long totalPackSizes = 0; 772 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 773 final long packSize = readUint64(header); 774 totalPackSizes += packSize; 775 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 776 if (packSize < 0 777 || endOfPackStreams > channel.size() 778 || endOfPackStreams < packPos) { 779 throw new IOException("packSize (" + packSize + ") is out of range"); 780 } 781 } 782 nid = getUnsignedByte(header); 783 } 784 785 if (nid == NID.kCRC) { 786 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 787 .cardinality(); 788 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 789 throw new IOException("invalid number of CRCs in PackInfo"); 790 } 791 nid = getUnsignedByte(header); 792 } 793 794 if (nid != NID.kEnd) { 795 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 796 } 797 } 798 799 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 800 archive.packPos = readUint64(header); 801 final int numPackStreamsInt = (int) readUint64(header); 802 int nid = getUnsignedByte(header); 803 if (nid == NID.kSize) { 804 archive.packSizes = new long[numPackStreamsInt]; 805 for (int i = 0; i < archive.packSizes.length; i++) { 806 archive.packSizes[i] = readUint64(header); 807 } 808 nid = getUnsignedByte(header); 809 } 810 811 if (nid == NID.kCRC) { 812 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 813 archive.packCrcs = new long[numPackStreamsInt]; 814 for (int i = 0; i < numPackStreamsInt; i++) { 815 if (archive.packCrcsDefined.get(i)) { 816 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 817 } 818 } 819 820 nid = getUnsignedByte(header); 821 } 822 } 823 824 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 825 throws IOException { 826 int nid = getUnsignedByte(header); 827 if (nid != NID.kFolder) { 828 throw new IOException("Expected kFolder, got " + nid); 829 } 830 final long numFolders = readUint64(header); 831 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 832 final int external = getUnsignedByte(header); 833 if (external != 0) { 834 throw new IOException("External unsupported"); 835 } 836 837 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 838 for (int i = 0; i < stats.numberOfFolders; i++) { 839 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 840 } 841 842 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 843 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 844 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 845 throw new IOException("archive doesn't contain enough packed streams"); 846 } 847 848 nid = getUnsignedByte(header); 849 if (nid != NID.kCodersUnpackSize) { 850 throw new IOException("Expected kCodersUnpackSize, got " + nid); 851 } 852 853 for (int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 854 for (int i = 0; i < numberOfOutputStreams; i++) { 855 final long unpackSize = readUint64(header); 856 if (unpackSize < 0) { 857 throw new IllegalArgumentException("negative unpackSize"); 858 } 859 } 860 } 861 862 nid = getUnsignedByte(header); 863 if (nid == NID.kCRC) { 864 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 865 final int crcsDefined = stats.folderHasCrc.cardinality(); 866 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 867 throw new IOException("invalid number of CRCs in UnpackInfo"); 868 } 869 nid = getUnsignedByte(header); 870 } 871 872 if (nid != NID.kEnd) { 873 throw new IOException("Badly terminated UnpackInfo"); 874 } 875 } 876 877 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 878 int nid = getUnsignedByte(header); 879 final int numFoldersInt = (int) readUint64(header); 880 final Folder[] folders = new Folder[numFoldersInt]; 881 archive.folders = folders; 882 /* final int external = */ getUnsignedByte(header); 883 for (int i = 0; i < numFoldersInt; i++) { 884 folders[i] = readFolder(header); 885 } 886 887 nid = getUnsignedByte(header); 888 for (final Folder folder : folders) { 889 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 890 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 891 for (int i = 0; i < folder.totalOutputStreams; i++) { 892 folder.unpackSizes[i] = readUint64(header); 893 } 894 } 895 896 nid = getUnsignedByte(header); 897 if (nid == NID.kCRC) { 898 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 899 for (int i = 0; i < numFoldersInt; i++) { 900 if (crcsDefined.get(i)) { 901 folders[i].hasCrc = true; 902 folders[i].crc = 0xffffFFFFL & getInt(header); 903 } else { 904 folders[i].hasCrc = false; 905 } 906 } 907 908 nid = getUnsignedByte(header); 909 } 910 } 911 912 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 913 914 int nid = getUnsignedByte(header); 915 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 916 if (nid == NID.kNumUnpackStream) { 917 for (int i = 0; i < stats.numberOfFolders; i++) { 918 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 919 } 920 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().collect(Collectors.summingLong(Integer::longValue)); 921 nid = getUnsignedByte(header); 922 } else { 923 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 924 } 925 926 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 927 928 if (nid == NID.kSize) { 929 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 930 if (numUnpackSubStreams == 0) { 931 continue; 932 } 933 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 934 final long size = readUint64(header); 935 if (size < 0) { 936 throw new IOException("negative unpackSize"); 937 } 938 } 939 } 940 nid = getUnsignedByte(header); 941 } 942 943 int numDigests = 0; 944 if (numUnpackSubStreamsPerFolder.isEmpty()) { 945 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 946 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 947 } else { 948 int folderIdx = 0; 949 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 950 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 951 || !stats.folderHasCrc.get(folderIdx++)) { 952 numDigests += numUnpackSubStreams; 953 } 954 } 955 } 956 957 if (nid == NID.kCRC) { 958 assertFitsIntoNonNegativeInt("numDigests", numDigests); 959 final int missingCrcs = readAllOrBits(header, numDigests) 960 .cardinality(); 961 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 962 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 963 } 964 nid = getUnsignedByte(header); 965 } 966 967 if (nid != NID.kEnd) { 968 throw new IOException("Badly terminated SubStreamsInfo"); 969 } 970 } 971 972 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 973 for (final Folder folder : archive.folders) { 974 folder.numUnpackSubStreams = 1; 975 } 976 long unpackStreamsCount = archive.folders.length; 977 978 int nid = getUnsignedByte(header); 979 if (nid == NID.kNumUnpackStream) { 980 unpackStreamsCount = 0; 981 for (final Folder folder : archive.folders) { 982 final long numStreams = readUint64(header); 983 folder.numUnpackSubStreams = (int)numStreams; 984 unpackStreamsCount += numStreams; 985 } 986 nid = getUnsignedByte(header); 987 } 988 989 final int totalUnpackStreams = (int) unpackStreamsCount; 990 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 991 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 992 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 993 subStreamsInfo.crcs = new long[totalUnpackStreams]; 994 995 int nextUnpackStream = 0; 996 for (final Folder folder : archive.folders) { 997 if (folder.numUnpackSubStreams == 0) { 998 continue; 999 } 1000 long sum = 0; 1001 if (nid == NID.kSize) { 1002 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1003 final long size = readUint64(header); 1004 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1005 sum += size; 1006 } 1007 } 1008 if (sum > folder.getUnpackSize()) { 1009 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1010 } 1011 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1012 } 1013 if (nid == NID.kSize) { 1014 nid = getUnsignedByte(header); 1015 } 1016 1017 int numDigests = 0; 1018 for (final Folder folder : archive.folders) { 1019 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1020 numDigests += folder.numUnpackSubStreams; 1021 } 1022 } 1023 1024 if (nid == NID.kCRC) { 1025 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1026 final long[] missingCrcs = new long[numDigests]; 1027 for (int i = 0; i < numDigests; i++) { 1028 if (hasMissingCrc.get(i)) { 1029 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1030 } 1031 } 1032 int nextCrc = 0; 1033 int nextMissingCrc = 0; 1034 for (final Folder folder: archive.folders) { 1035 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1036 subStreamsInfo.hasCrc.set(nextCrc, true); 1037 subStreamsInfo.crcs[nextCrc] = folder.crc; 1038 ++nextCrc; 1039 } else { 1040 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1041 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1042 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1043 ++nextCrc; 1044 ++nextMissingCrc; 1045 } 1046 } 1047 } 1048 1049 nid = getUnsignedByte(header); 1050 } 1051 1052 archive.subStreamsInfo = subStreamsInfo; 1053 } 1054 1055 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1056 throws IOException { 1057 1058 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1059 if (numCoders == 0) { 1060 throw new IOException("Folder without coders"); 1061 } 1062 stats.numberOfCoders += numCoders; 1063 1064 long totalOutStreams = 0; 1065 long totalInStreams = 0; 1066 for (int i = 0; i < numCoders; i++) { 1067 final int bits = getUnsignedByte(header); 1068 final int idSize = bits & 0xf; 1069 get(header, new byte[idSize]); 1070 1071 final boolean isSimple = (bits & 0x10) == 0; 1072 final boolean hasAttributes = (bits & 0x20) != 0; 1073 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1074 if (moreAlternativeMethods) { 1075 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1076 "The reference implementation doesn't support them either."); 1077 } 1078 1079 if (isSimple) { 1080 totalInStreams++; 1081 totalOutStreams++; 1082 } else { 1083 totalInStreams += 1084 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1085 totalOutStreams += 1086 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1087 } 1088 1089 if (hasAttributes) { 1090 final int propertiesSize = 1091 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1092 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1093 throw new IOException("invalid propertiesSize in folder"); 1094 } 1095 } 1096 } 1097 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1098 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1099 stats.numberOfOutStreams += totalOutStreams; 1100 stats.numberOfInStreams += totalInStreams; 1101 1102 if (totalOutStreams == 0) { 1103 throw new IOException("Total output streams can't be 0"); 1104 } 1105 1106 final int numBindPairs = 1107 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1108 if (totalInStreams < numBindPairs) { 1109 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1110 } 1111 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1112 for (int i = 0; i < numBindPairs; i++) { 1113 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1114 if (totalInStreams <= inIndex) { 1115 throw new IOException("inIndex is bigger than number of inStreams"); 1116 } 1117 inStreamsBound.set(inIndex); 1118 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1119 if (totalOutStreams <= outIndex) { 1120 throw new IOException("outIndex is bigger than number of outStreams"); 1121 } 1122 } 1123 1124 final int numPackedStreams = 1125 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1126 1127 if (numPackedStreams == 1) { 1128 if (inStreamsBound.nextClearBit(0) == -1) { 1129 throw new IOException("Couldn't find stream's bind pair index"); 1130 } 1131 } else { 1132 for (int i = 0; i < numPackedStreams; i++) { 1133 final int packedStreamIndex = 1134 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1135 if (packedStreamIndex >= totalInStreams) { 1136 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1137 } 1138 } 1139 } 1140 1141 return (int) totalOutStreams; 1142 } 1143 1144 private Folder readFolder(final ByteBuffer header) throws IOException { 1145 final Folder folder = new Folder(); 1146 1147 final long numCoders = readUint64(header); 1148 final Coder[] coders = new Coder[(int)numCoders]; 1149 long totalInStreams = 0; 1150 long totalOutStreams = 0; 1151 for (int i = 0; i < coders.length; i++) { 1152 coders[i] = new Coder(); 1153 final int bits = getUnsignedByte(header); 1154 final int idSize = bits & 0xf; 1155 final boolean isSimple = (bits & 0x10) == 0; 1156 final boolean hasAttributes = (bits & 0x20) != 0; 1157 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1158 1159 coders[i].decompressionMethodId = new byte[idSize]; 1160 get(header, coders[i].decompressionMethodId); 1161 if (isSimple) { 1162 coders[i].numInStreams = 1; 1163 coders[i].numOutStreams = 1; 1164 } else { 1165 coders[i].numInStreams = readUint64(header); 1166 coders[i].numOutStreams = readUint64(header); 1167 } 1168 totalInStreams += coders[i].numInStreams; 1169 totalOutStreams += coders[i].numOutStreams; 1170 if (hasAttributes) { 1171 final long propertiesSize = readUint64(header); 1172 coders[i].properties = new byte[(int)propertiesSize]; 1173 get(header, coders[i].properties); 1174 } 1175 // would need to keep looping as above: 1176 while (moreAlternativeMethods) { 1177 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1178 "The reference implementation doesn't support them either."); 1179 } 1180 } 1181 folder.coders = coders; 1182 folder.totalInputStreams = totalInStreams; 1183 folder.totalOutputStreams = totalOutStreams; 1184 1185 final long numBindPairs = totalOutStreams - 1; 1186 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 1187 for (int i = 0; i < bindPairs.length; i++) { 1188 bindPairs[i] = new BindPair(); 1189 bindPairs[i].inIndex = readUint64(header); 1190 bindPairs[i].outIndex = readUint64(header); 1191 } 1192 folder.bindPairs = bindPairs; 1193 1194 final long numPackedStreams = totalInStreams - numBindPairs; 1195 final long[] packedStreams = new long[(int)numPackedStreams]; 1196 if (numPackedStreams == 1) { 1197 int i; 1198 for (i = 0; i < (int)totalInStreams; i++) { 1199 if (folder.findBindPairForInStream(i) < 0) { 1200 break; 1201 } 1202 } 1203 packedStreams[0] = i; 1204 } else { 1205 for (int i = 0; i < (int)numPackedStreams; i++) { 1206 packedStreams[i] = readUint64(header); 1207 } 1208 } 1209 folder.packedStreams = packedStreams; 1210 1211 return folder; 1212 } 1213 1214 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1215 final int areAllDefined = getUnsignedByte(header); 1216 final BitSet bits; 1217 if (areAllDefined != 0) { 1218 bits = new BitSet(size); 1219 for (int i = 0; i < size; i++) { 1220 bits.set(i, true); 1221 } 1222 } else { 1223 bits = readBits(header, size); 1224 } 1225 return bits; 1226 } 1227 1228 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1229 final BitSet bits = new BitSet(size); 1230 int mask = 0; 1231 int cache = 0; 1232 for (int i = 0; i < size; i++) { 1233 if (mask == 0) { 1234 mask = 0x80; 1235 cache = getUnsignedByte(header); 1236 } 1237 bits.set(i, (cache & mask) != 0); 1238 mask >>>= 1; 1239 } 1240 return bits; 1241 } 1242 1243 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1244 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1245 1246 int emptyStreams = -1; 1247 while (true) { 1248 final int propertyType = getUnsignedByte(header); 1249 if (propertyType == 0) { 1250 break; 1251 } 1252 final long size = readUint64(header); 1253 switch (propertyType) { 1254 case NID.kEmptyStream: { 1255 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1256 break; 1257 } 1258 case NID.kEmptyFile: { 1259 if (emptyStreams == -1) { 1260 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1261 } 1262 readBits(header, emptyStreams); 1263 break; 1264 } 1265 case NID.kAnti: { 1266 if (emptyStreams == -1) { 1267 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1268 } 1269 readBits(header, emptyStreams); 1270 break; 1271 } 1272 case NID.kName: { 1273 final int external = getUnsignedByte(header); 1274 if (external != 0) { 1275 throw new IOException("Not implemented"); 1276 } 1277 final int namesLength = 1278 assertFitsIntoNonNegativeInt("file names length", size - 1); 1279 if ((namesLength & 1) != 0) { 1280 throw new IOException("File names length invalid"); 1281 } 1282 1283 int filesSeen = 0; 1284 for (int i = 0; i < namesLength; i += 2) { 1285 final char c = getChar(header); 1286 if (c == 0) { 1287 filesSeen++; 1288 } 1289 } 1290 if (filesSeen != stats.numberOfEntries) { 1291 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1292 + stats.numberOfEntries + ")"); 1293 } 1294 break; 1295 } 1296 case NID.kCTime: { 1297 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1298 .cardinality(); 1299 final int external = getUnsignedByte(header); 1300 if (external != 0) { 1301 throw new IOException("Not implemented"); 1302 } 1303 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1304 throw new IOException("invalid creation dates size"); 1305 } 1306 break; 1307 } 1308 case NID.kATime: { 1309 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1310 .cardinality(); 1311 final int external = getUnsignedByte(header); 1312 if (external != 0) { 1313 throw new IOException("Not implemented"); 1314 } 1315 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1316 throw new IOException("invalid access dates size"); 1317 } 1318 break; 1319 } 1320 case NID.kMTime: { 1321 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1322 .cardinality(); 1323 final int external = getUnsignedByte(header); 1324 if (external != 0) { 1325 throw new IOException("Not implemented"); 1326 } 1327 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1328 throw new IOException("invalid modification dates size"); 1329 } 1330 break; 1331 } 1332 case NID.kWinAttributes: { 1333 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1334 .cardinality(); 1335 final int external = getUnsignedByte(header); 1336 if (external != 0) { 1337 throw new IOException("Not implemented"); 1338 } 1339 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1340 throw new IOException("invalid windows attributes size"); 1341 } 1342 break; 1343 } 1344 case NID.kStartPos: { 1345 throw new IOException("kStartPos is unsupported, please report"); 1346 } 1347 case NID.kDummy: { 1348 // 7z 9.20 asserts the content is all zeros and ignores the property 1349 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1350 1351 if (skipBytesFully(header, size) < size) { 1352 throw new IOException("Incomplete kDummy property"); 1353 } 1354 break; 1355 } 1356 1357 default: { 1358 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1359 if (skipBytesFully(header, size) < size) { 1360 throw new IOException("Incomplete property of type " + propertyType); 1361 } 1362 break; 1363 } 1364 } 1365 } 1366 stats.numberOfEntriesWithStream = stats.numberOfEntries - (emptyStreams > 0 ? emptyStreams : 0); 1367 } 1368 1369 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1370 final int numFilesInt = (int) readUint64(header);; 1371 final Map<Integer, SevenZArchiveEntry> fileMap = new HashMap<>(); 1372 BitSet isEmptyStream = null; 1373 BitSet isEmptyFile = null; 1374 BitSet isAnti = null; 1375 while (true) { 1376 final int propertyType = getUnsignedByte(header); 1377 if (propertyType == 0) { 1378 break; 1379 } 1380 final long size = readUint64(header); 1381 switch (propertyType) { 1382 case NID.kEmptyStream: { 1383 isEmptyStream = readBits(header, numFilesInt); 1384 break; 1385 } 1386 case NID.kEmptyFile: { 1387 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1388 break; 1389 } 1390 case NID.kAnti: { 1391 isAnti = readBits(header, isEmptyStream.cardinality()); 1392 break; 1393 } 1394 case NID.kName: { 1395 /* final int external = */ getUnsignedByte(header); 1396 final byte[] names = new byte[(int) (size - 1)]; 1397 final int namesLength = names.length; 1398 get(header, names); 1399 int nextFile = 0; 1400 int nextName = 0; 1401 for (int i = 0; i < namesLength; i += 2) { 1402 if (names[i] == 0 && names[i + 1] == 0) { 1403 checkEntryIsInitialized(fileMap, nextFile); 1404 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, StandardCharsets.UTF_16LE)); 1405 nextName = i + 2; 1406 nextFile++; 1407 } 1408 } 1409 if (nextName != namesLength || nextFile != numFilesInt) { 1410 throw new IOException("Error parsing file names"); 1411 } 1412 break; 1413 } 1414 case NID.kCTime: { 1415 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1416 /* final int external = */ getUnsignedByte(header); 1417 for (int i = 0; i < numFilesInt; i++) { 1418 checkEntryIsInitialized(fileMap, i); 1419 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1420 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1421 if (entryAtIndex.getHasCreationDate()) { 1422 entryAtIndex.setCreationDate(getLong(header)); 1423 } 1424 } 1425 break; 1426 } 1427 case NID.kATime: { 1428 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1429 /* final int external = */ getUnsignedByte(header); 1430 for (int i = 0; i < numFilesInt; i++) { 1431 checkEntryIsInitialized(fileMap, i); 1432 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1433 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1434 if (entryAtIndex.getHasAccessDate()) { 1435 entryAtIndex.setAccessDate(getLong(header)); 1436 } 1437 } 1438 break; 1439 } 1440 case NID.kMTime: { 1441 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1442 /* final int external = */ getUnsignedByte(header); 1443 for (int i = 0; i < numFilesInt; i++) { 1444 checkEntryIsInitialized(fileMap, i); 1445 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1446 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1447 if (entryAtIndex.getHasLastModifiedDate()) { 1448 entryAtIndex.setLastModifiedDate(getLong(header)); 1449 } 1450 } 1451 break; 1452 } 1453 case NID.kWinAttributes: { 1454 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1455 /* final int external = */ getUnsignedByte(header); 1456 for (int i = 0; i < numFilesInt; i++) { 1457 checkEntryIsInitialized(fileMap, i); 1458 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1459 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1460 if (entryAtIndex.getHasWindowsAttributes()) { 1461 entryAtIndex.setWindowsAttributes(getInt(header)); 1462 } 1463 } 1464 break; 1465 } 1466 case NID.kDummy: { 1467 // 7z 9.20 asserts the content is all zeros and ignores the property 1468 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1469 1470 skipBytesFully(header, size); 1471 break; 1472 } 1473 1474 default: { 1475 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1476 skipBytesFully(header, size); 1477 break; 1478 } 1479 } 1480 } 1481 int nonEmptyFileCounter = 0; 1482 int emptyFileCounter = 0; 1483 for (int i = 0; i < numFilesInt; i++) { 1484 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1485 if (entryAtIndex == null) { 1486 continue; 1487 } 1488 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1489 if (entryAtIndex.hasStream()) { 1490 if (archive.subStreamsInfo == null) { 1491 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1492 } 1493 entryAtIndex.setDirectory(false); 1494 entryAtIndex.setAntiItem(false); 1495 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1496 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1497 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1498 if (entryAtIndex.getSize() < 0) { 1499 throw new IOException("broken archive, entry with negative size"); 1500 } 1501 ++nonEmptyFileCounter; 1502 } else { 1503 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1504 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1505 entryAtIndex.setHasCrc(false); 1506 entryAtIndex.setSize(0); 1507 ++emptyFileCounter; 1508 } 1509 } 1510 final List<SevenZArchiveEntry> entries = new ArrayList<>(); 1511 for (final SevenZArchiveEntry e : fileMap.values()) { 1512 if (e != null) { 1513 entries.add(e); 1514 } 1515 } 1516 archive.files = entries.toArray(SevenZArchiveEntry.EMPTY_SEVEN_Z_ARCHIVE_ENTRY_ARRAY); 1517 calculateStreamMap(archive); 1518 } 1519 1520 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 1521 if (archiveEntries.get(index) == null) { 1522 archiveEntries.put(index, new SevenZArchiveEntry()); 1523 } 1524 } 1525 1526 private void calculateStreamMap(final Archive archive) throws IOException { 1527 final StreamMap streamMap = new StreamMap(); 1528 1529 int nextFolderPackStreamIndex = 0; 1530 final int numFolders = archive.folders != null ? archive.folders.length : 0; 1531 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 1532 for (int i = 0; i < numFolders; i++) { 1533 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 1534 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 1535 } 1536 1537 long nextPackStreamOffset = 0; 1538 final int numPackSizes = archive.packSizes.length; 1539 streamMap.packStreamOffsets = new long[numPackSizes]; 1540 for (int i = 0; i < numPackSizes; i++) { 1541 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 1542 nextPackStreamOffset += archive.packSizes[i]; 1543 } 1544 1545 streamMap.folderFirstFileIndex = new int[numFolders]; 1546 streamMap.fileFolderIndex = new int[archive.files.length]; 1547 int nextFolderIndex = 0; 1548 int nextFolderUnpackStreamIndex = 0; 1549 for (int i = 0; i < archive.files.length; i++) { 1550 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 1551 streamMap.fileFolderIndex[i] = -1; 1552 continue; 1553 } 1554 if (nextFolderUnpackStreamIndex == 0) { 1555 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 1556 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 1557 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 1558 break; 1559 } 1560 } 1561 if (nextFolderIndex >= archive.folders.length) { 1562 throw new IOException("Too few folders in archive"); 1563 } 1564 } 1565 streamMap.fileFolderIndex[i] = nextFolderIndex; 1566 if (!archive.files[i].hasStream()) { 1567 continue; 1568 } 1569 ++nextFolderUnpackStreamIndex; 1570 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 1571 ++nextFolderIndex; 1572 nextFolderUnpackStreamIndex = 0; 1573 } 1574 } 1575 1576 archive.streamMap = streamMap; 1577 } 1578 1579 /** 1580 * Build the decoding stream for the entry to be read. 1581 * This method may be called from a random access(getInputStream) or 1582 * sequential access(getNextEntry). 1583 * If this method is called from a random access, some entries may 1584 * need to be skipped(we put them to the deferredBlockStreams and 1585 * skip them when actually needed to improve the performance) 1586 * 1587 * @param entryIndex the index of the entry to be read 1588 * @param isRandomAccess is this called in a random access 1589 * @throws IOException if there are exceptions when reading the file 1590 */ 1591 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 1592 if (archive.streamMap == null) { 1593 throw new IOException("Archive doesn't contain stream information to read entries"); 1594 } 1595 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1596 if (folderIndex < 0) { 1597 deferredBlockStreams.clear(); 1598 // TODO: previously it'd return an empty stream? 1599 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 1600 return; 1601 } 1602 final SevenZArchiveEntry file = archive.files[entryIndex]; 1603 boolean isInSameFolder = false; 1604 if (currentFolderIndex == folderIndex) { 1605 // (COMPRESS-320). 1606 // The current entry is within the same (potentially opened) folder. The 1607 // previous stream has to be fully decoded before we can start reading 1608 // but don't do it eagerly -- if the user skips over the entire folder nothing 1609 // is effectively decompressed. 1610 if (entryIndex > 0) { 1611 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 1612 } 1613 1614 // if this is called in a random access, then the content methods of previous entry may be null 1615 // the content methods should be set to methods of the first entry as it must not be null, 1616 // and the content methods would only be set if the content methods was not set 1617 if(isRandomAccess && file.getContentMethods() == null) { 1618 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 1619 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 1620 file.setContentMethods(folderFirstFile.getContentMethods()); 1621 } 1622 isInSameFolder = true; 1623 } else { 1624 currentFolderIndex = folderIndex; 1625 // We're opening a new folder. Discard any queued streams/ folder stream. 1626 reopenFolderInputStream(folderIndex, file); 1627 } 1628 1629 boolean haveSkippedEntries = false; 1630 if (isRandomAccess) { 1631 // entries will only need to be skipped if it's a random access 1632 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 1633 } 1634 1635 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 1636 // we don't need to add another entry to the deferredBlockStreams when : 1637 // 1. If this method is called in a random access and the entry index 1638 // to be read equals to the current entry index, the input stream 1639 // has already been put in the deferredBlockStreams 1640 // 2. If this entry has not been read(which means no entries are skipped) 1641 return; 1642 } 1643 1644 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 1645 if (file.getHasCrc()) { 1646 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 1647 } 1648 1649 deferredBlockStreams.add(fileStream); 1650 } 1651 1652 /** 1653 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1654 * 1655 * @param folderIndex the index of the folder to reopen 1656 * @param file the 7z entry to read 1657 * @throws IOException if exceptions occur when reading the 7z file 1658 */ 1659 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1660 deferredBlockStreams.clear(); 1661 if (currentFolderInputStream != null) { 1662 currentFolderInputStream.close(); 1663 currentFolderInputStream = null; 1664 } 1665 final Folder folder = archive.folders[folderIndex]; 1666 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1667 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1668 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1669 1670 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1671 } 1672 1673 /** 1674 * Skip all the entries if needed. 1675 * Entries need to be skipped when: 1676 * <p> 1677 * 1. it's a random access 1678 * 2. one of these 2 condition is meet : 1679 * <p> 1680 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 1681 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 1682 * been read(currentEntryIndex > entryIndex) 1683 * <p> 1684 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 1685 * if the entry to be read is the current entry, but some data of it has 1686 * been read before, then we need to reopen the stream of the folder and 1687 * skip all the entries before the current entries 1688 * 1689 * @param entryIndex the entry to be read 1690 * @param isInSameFolder are the entry to be read and the current entry in the same folder 1691 * @param folderIndex the index of the folder which contains the entry 1692 * @return true if there are entries actually skipped 1693 * @throws IOException there are exceptions when skipping entries 1694 * @since 1.21 1695 */ 1696 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 1697 final SevenZArchiveEntry file = archive.files[entryIndex]; 1698 // if the entry to be read is the current entry, and the entry has not 1699 // been read yet, then there's nothing we need to do 1700 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 1701 return false; 1702 } 1703 1704 // 1. if currentEntryIndex < entryIndex : 1705 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 1706 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 1707 // this means the entry has already been read before, and we need to reopen the 1708 // stream of the folder and skip all the entries before the current entries 1709 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 1710 if (isInSameFolder) { 1711 if (currentEntryIndex < entryIndex) { 1712 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 1713 filesToSkipStartIndex = currentEntryIndex + 1; 1714 } else { 1715 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 1716 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 1717 // from the start entry of the folder again 1718 reopenFolderInputStream(folderIndex, file); 1719 } 1720 } 1721 1722 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 1723 final SevenZArchiveEntry fileToSkip = archive.files[i]; 1724 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 1725 if (fileToSkip.getHasCrc()) { 1726 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 1727 } 1728 deferredBlockStreams.add(fileStreamToSkip); 1729 1730 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 1731 fileToSkip.setContentMethods(file.getContentMethods()); 1732 } 1733 return true; 1734 } 1735 1736 /** 1737 * Find out if any data of current entry has been read or not. 1738 * This is achieved by comparing the bytes remaining to read 1739 * and the size of the file. 1740 * 1741 * @return true if any data of current entry has been read 1742 * @since 1.21 1743 */ 1744 private boolean hasCurrentEntryBeenRead() { 1745 boolean hasCurrentEntryBeenRead = false; 1746 if (!deferredBlockStreams.isEmpty()) { 1747 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1748 // get the bytes remaining to read, and compare it with the size of 1749 // the file to figure out if the file has been read 1750 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1751 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1752 } 1753 1754 if (currentEntryInputStream instanceof BoundedInputStream) { 1755 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1756 } 1757 } 1758 return hasCurrentEntryBeenRead; 1759 } 1760 1761 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 1762 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 1763 channel.position(folderOffset); 1764 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1765 new BoundedSeekableByteChannelInputStream(channel, 1766 archive.packSizes[firstPackStreamIndex]))) { 1767 @Override 1768 public int read() throws IOException { 1769 final int r = in.read(); 1770 if (r >= 0) { 1771 count(1); 1772 } 1773 return r; 1774 } 1775 @Override 1776 public int read(final byte[] b) throws IOException { 1777 return read(b, 0, b.length); 1778 } 1779 @Override 1780 public int read(final byte[] b, final int off, final int len) throws IOException { 1781 if (len == 0) { 1782 return 0; 1783 } 1784 final int r = in.read(b, off, len); 1785 if (r >= 0) { 1786 count(r); 1787 } 1788 return r; 1789 } 1790 private void count(final int c) { 1791 compressedBytesReadFromCurrentEntry += c; 1792 } 1793 }; 1794 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1795 for (final Coder coder : folder.getOrderedCoders()) { 1796 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1797 throw new IOException("Multi input/output stream coders are not yet supported"); 1798 } 1799 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1800 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1801 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1802 methods.addFirst(new SevenZMethodConfiguration(method, 1803 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1804 } 1805 entry.setContentMethods(methods); 1806 if (folder.hasCrc) { 1807 return new CRC32VerifyingInputStream(inputStreamStack, 1808 folder.getUnpackSize(), folder.crc); 1809 } 1810 return inputStreamStack; 1811 } 1812 1813 /** 1814 * Reads a byte of data. 1815 * 1816 * @return the byte read, or -1 if end of input is reached 1817 * @throws IOException 1818 * if an I/O error has occurred 1819 */ 1820 public int read() throws IOException { 1821 final int b = getCurrentStream().read(); 1822 if (b >= 0) { 1823 uncompressedBytesReadFromCurrentEntry++; 1824 } 1825 return b; 1826 } 1827 1828 private InputStream getCurrentStream() throws IOException { 1829 if (archive.files[currentEntryIndex].getSize() == 0) { 1830 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 1831 } 1832 if (deferredBlockStreams.isEmpty()) { 1833 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1834 } 1835 1836 while (deferredBlockStreams.size() > 1) { 1837 // In solid compression mode we need to decompress all leading folder' 1838 // streams to get access to an entry. We defer this until really needed 1839 // so that entire blocks can be skipped without wasting time for decompression. 1840 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1841 IOUtils.skip(stream, Long.MAX_VALUE); 1842 } 1843 compressedBytesReadFromCurrentEntry = 0; 1844 } 1845 1846 return deferredBlockStreams.get(0); 1847 } 1848 1849 /** 1850 * Returns an InputStream for reading the contents of the given entry. 1851 * 1852 * <p>For archives using solid compression randomly accessing 1853 * entries will be significantly slower than reading the archive 1854 * sequentially.</p> 1855 * 1856 * @param entry the entry to get the stream for. 1857 * @return a stream to read the entry from. 1858 * @throws IOException if unable to create an input stream from the zipentry 1859 * @since Compress 1.20 1860 */ 1861 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 1862 int entryIndex = -1; 1863 for (int i = 0; i < this.archive.files.length;i++) { 1864 if (entry == this.archive.files[i]) { 1865 entryIndex = i; 1866 break; 1867 } 1868 } 1869 1870 if (entryIndex < 0) { 1871 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 1872 } 1873 1874 buildDecodingStream(entryIndex, true); 1875 currentEntryIndex = entryIndex; 1876 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1877 return getCurrentStream(); 1878 } 1879 1880 /** 1881 * Reads data into an array of bytes. 1882 * 1883 * @param b the array to write data to 1884 * @return the number of bytes read, or -1 if end of input is reached 1885 * @throws IOException 1886 * if an I/O error has occurred 1887 */ 1888 public int read(final byte[] b) throws IOException { 1889 return read(b, 0, b.length); 1890 } 1891 1892 /** 1893 * Reads data into an array of bytes. 1894 * 1895 * @param b the array to write data to 1896 * @param off offset into the buffer to start filling at 1897 * @param len of bytes to read 1898 * @return the number of bytes read, or -1 if end of input is reached 1899 * @throws IOException 1900 * if an I/O error has occurred 1901 */ 1902 public int read(final byte[] b, final int off, final int len) throws IOException { 1903 if (len == 0) { 1904 return 0; 1905 } 1906 final int cnt = getCurrentStream().read(b, off, len); 1907 if (cnt > 0) { 1908 uncompressedBytesReadFromCurrentEntry += cnt; 1909 } 1910 return cnt; 1911 } 1912 1913 /** 1914 * Provides statistics for bytes read from the current entry. 1915 * 1916 * @return statistics for bytes read from the current entry 1917 * @since 1.17 1918 */ 1919 public InputStreamStatistics getStatisticsForCurrentEntry() { 1920 return new InputStreamStatistics() { 1921 @Override 1922 public long getCompressedCount() { 1923 return compressedBytesReadFromCurrentEntry; 1924 } 1925 @Override 1926 public long getUncompressedCount() { 1927 return uncompressedBytesReadFromCurrentEntry; 1928 } 1929 }; 1930 } 1931 1932 private static long readUint64(final ByteBuffer in) throws IOException { 1933 // long rather than int as it might get shifted beyond the range of an int 1934 final long firstByte = getUnsignedByte(in); 1935 int mask = 0x80; 1936 long value = 0; 1937 for (int i = 0; i < 8; i++) { 1938 if ((firstByte & mask) == 0) { 1939 return value | ((firstByte & (mask - 1)) << (8 * i)); 1940 } 1941 final long nextByte = getUnsignedByte(in); 1942 value |= nextByte << (8 * i); 1943 mask >>>= 1; 1944 } 1945 return value; 1946 } 1947 1948 private static char getChar(final ByteBuffer buf) throws IOException { 1949 if (buf.remaining() < 2) { 1950 throw new EOFException(); 1951 } 1952 return buf.getChar(); 1953 } 1954 1955 private static int getInt(final ByteBuffer buf) throws IOException { 1956 if (buf.remaining() < 4) { 1957 throw new EOFException(); 1958 } 1959 return buf.getInt(); 1960 } 1961 1962 private static long getLong(final ByteBuffer buf) throws IOException { 1963 if (buf.remaining() < 8) { 1964 throw new EOFException(); 1965 } 1966 return buf.getLong(); 1967 } 1968 1969 private static void get(final ByteBuffer buf, final byte[] to) throws IOException { 1970 if (buf.remaining() < to.length) { 1971 throw new EOFException(); 1972 } 1973 buf.get(to); 1974 } 1975 1976 private static int getUnsignedByte(final ByteBuffer buf) throws IOException { 1977 if (!buf.hasRemaining()) { 1978 throw new EOFException(); 1979 } 1980 return buf.get() & 0xff; 1981 } 1982 1983 /** 1984 * Checks if the signature matches what is expected for a 7z file. 1985 * 1986 * @param signature 1987 * the bytes to check 1988 * @param length 1989 * the number of bytes to check 1990 * @return true, if this is the signature of a 7z archive. 1991 * @since 1.8 1992 */ 1993 public static boolean matches(final byte[] signature, final int length) { 1994 if (length < sevenZSignature.length) { 1995 return false; 1996 } 1997 1998 for (int i = 0; i < sevenZSignature.length; i++) { 1999 if (signature[i] != sevenZSignature[i]) { 2000 return false; 2001 } 2002 } 2003 return true; 2004 } 2005 2006 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { 2007 if (bytesToSkip < 1) { 2008 return 0; 2009 } 2010 final int current = input.position(); 2011 final int maxSkip = input.remaining(); 2012 if (maxSkip < bytesToSkip) { 2013 bytesToSkip = maxSkip; 2014 } 2015 input.position(current + (int) bytesToSkip); 2016 return bytesToSkip; 2017 } 2018 2019 private void readFully(final ByteBuffer buf) throws IOException { 2020 ((Buffer)buf).rewind(); 2021 IOUtils.readFully(channel, buf); 2022 ((Buffer)buf).flip(); 2023 } 2024 2025 @Override 2026 public String toString() { 2027 return archive.toString(); 2028 } 2029 2030 /** 2031 * Derives a default file name from the archive name - if known. 2032 * 2033 * <p>This implements the same heuristics the 7z tools use. In 2034 * 7z's case if an archive contains entries without a name - 2035 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 2036 * then its command line and GUI tools will use this default name 2037 * when extracting the entries.</p> 2038 * 2039 * @return null if the name of the archive is unknown. Otherwise 2040 * if the name of the archive has got any extension, it is 2041 * stripped and the remainder returned. Finally if the name of the 2042 * archive hasn't got any extension then a {@code ~} character is 2043 * appended to the archive name. 2044 * 2045 * @since 1.19 2046 */ 2047 public String getDefaultName() { 2048 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 2049 return null; 2050 } 2051 2052 final String lastSegment = new File(fileName).getName(); 2053 final int dotPos = lastSegment.lastIndexOf("."); 2054 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 2055 return lastSegment.substring(0, dotPos); 2056 } 2057 return lastSegment + "~"; 2058 } 2059 2060 private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder(); 2061 2062 private static byte[] utf16Decode(final char[] chars) throws IOException { 2063 if (chars == null) { 2064 return null; 2065 } 2066 final ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars)); 2067 if (encoded.hasArray()) { 2068 return encoded.array(); 2069 } 2070 final byte[] e = new byte[encoded.remaining()]; 2071 encoded.get(e); 2072 return e; 2073 } 2074 2075 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 2076 if (value > Integer.MAX_VALUE || value < 0) { 2077 throw new IOException("Cannot handle " + what + " " + value); 2078 } 2079 return (int) value; 2080 } 2081 2082 private static class ArchiveStatistics { 2083 private int numberOfPackedStreams; 2084 private long numberOfCoders; 2085 private long numberOfOutStreams; 2086 private long numberOfInStreams; 2087 private long numberOfUnpackSubStreams; 2088 private int numberOfFolders; 2089 private BitSet folderHasCrc; 2090 private int numberOfEntries; 2091 private int numberOfEntriesWithStream; 2092 2093 @Override 2094 public String toString() { 2095 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 2096 + " folders. Estimated size " + (estimateSize()/1024l) + " kB."; 2097 } 2098 2099 long estimateSize() { 2100 long lowerBound = 16l * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 2101 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 2102 + numberOfFolders * folderSize() /* folders in Archive */ 2103 + numberOfCoders * coderSize() /* coders in Folder */ 2104 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 2105 + 8l * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 2106 + 8l * numberOfOutStreams /* unpackSizes in Folder */ 2107 + numberOfEntries * entrySize() /* files in Archive */ 2108 + streamMapSize() 2109 ; 2110 return 2 * lowerBound /* conservative guess */; 2111 } 2112 2113 void assertValidity(int maxMemoryLimitInKb) throws IOException { 2114 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 2115 throw new IOException("archive with entries but no folders"); 2116 } 2117 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 2118 throw new IOException("archive doesn't contain enough substreams for entries"); 2119 } 2120 2121 final long memoryNeededInKb = estimateSize() / 1024; 2122 if (maxMemoryLimitInKb < memoryNeededInKb) { 2123 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 2124 } 2125 } 2126 2127 private long folderSize() { 2128 return 30; /* nested arrays are accounted for separately */ 2129 } 2130 2131 private long coderSize() { 2132 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 2133 + 16 2134 + 4 /* properties, guess */ 2135 ; 2136 } 2137 2138 private long bindPairSize() { 2139 return 16; 2140 } 2141 2142 private long entrySize() { 2143 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 2144 } 2145 2146 private long streamMapSize() { 2147 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 2148 + 8 * numberOfPackedStreams /* packStreamOffsets */ 2149 + 4 * numberOfEntries /* fileFolderIndex */ 2150 ; 2151 } 2152 } 2153}