001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorInputStream; 026import org.apache.commons.compress.utils.BoundedInputStream; 027import org.apache.commons.compress.utils.ByteUtils; 028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; 029import org.apache.commons.compress.utils.CountingInputStream; 030import org.apache.commons.compress.utils.IOUtils; 031import org.apache.commons.compress.utils.InputStreamStatistics; 032 033/** 034 * CompressorInputStream for the LZ4 frame format. 035 * 036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 037 * 038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 039 * @since 1.14 040 * @NotThreadSafe 041 */ 042public class FramedLZ4CompressorInputStream extends CompressorInputStream 043 implements InputStreamStatistics { 044 045 // used by FramedLZ4CompressorOutputStream as well 046 static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR 047 4, 0x22, 0x4d, 0x18 048 }; 049 private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] { 050 0x2a, 0x4d, 0x18 051 }; 052 private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; 053 054 static final int VERSION_MASK = 0xC0; 055 static final int SUPPORTED_VERSION = 0x40; 056 static final int BLOCK_INDEPENDENCE_MASK = 0x20; 057 static final int BLOCK_CHECKSUM_MASK = 0x10; 058 static final int CONTENT_SIZE_MASK = 0x08; 059 static final int CONTENT_CHECKSUM_MASK = 0x04; 060 static final int BLOCK_MAX_SIZE_MASK = 0x70; 061 static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; 062 063 // used in no-arg read method 064 private final byte[] oneByte = new byte[1]; 065 066 private final ByteUtils.ByteSupplier supplier = this::readOneByte; 067 068 private final CountingInputStream inputStream; 069 private final boolean decompressConcatenated; 070 071 private boolean expectBlockChecksum; 072 private boolean expectBlockDependency; 073 private boolean expectContentSize; 074 private boolean expectContentChecksum; 075 076 private InputStream currentBlock; 077 private boolean endReached, inUncompressed; 078 079 // used for frame header checksum and content checksum, if present 080 private final XXHash32 contentHash = new XXHash32(); 081 082 // used for block checksum, if present 083 private final XXHash32 blockHash = new XXHash32(); 084 085 // only created if the frame doesn't set the block independence flag 086 private byte[] blockDependencyBuffer; 087 088 /** 089 * Creates a new input stream that decompresses streams compressed 090 * using the LZ4 frame format and stops after decompressing the 091 * first frame. 092 * @param in the InputStream from which to read the compressed data 093 * @throws IOException if reading fails 094 */ 095 public FramedLZ4CompressorInputStream(final InputStream in) throws IOException { 096 this(in, false); 097 } 098 099 /** 100 * Creates a new input stream that decompresses streams compressed 101 * using the LZ4 frame format. 102 * @param in the InputStream from which to read the compressed data 103 * @param decompressConcatenated if true, decompress until the end 104 * of the input; if false, stop after the first LZ4 frame 105 * and leave the input position to point to the next byte 106 * after the frame stream 107 * @throws IOException if reading fails 108 */ 109 public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 110 this.inputStream = new CountingInputStream(in); 111 this.decompressConcatenated = decompressConcatenated; 112 init(true); 113 } 114 115 /** {@inheritDoc} */ 116 @Override 117 public int read() throws IOException { 118 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 119 } 120 121 /** {@inheritDoc} */ 122 @Override 123 public void close() throws IOException { 124 try { 125 if (currentBlock != null) { 126 currentBlock.close(); 127 currentBlock = null; 128 } 129 } finally { 130 inputStream.close(); 131 } 132 } 133 134 /** {@inheritDoc} */ 135 @Override 136 public int read(final byte[] b, final int off, final int len) throws IOException { 137 if (len == 0) { 138 return 0; 139 } 140 if (endReached) { 141 return -1; 142 } 143 int r = readOnce(b, off, len); 144 if (r == -1) { 145 nextBlock(); 146 if (!endReached) { 147 r = readOnce(b, off, len); 148 } 149 } 150 if (r != -1) { 151 if (expectBlockDependency) { 152 appendToBlockDependencyBuffer(b, off, r); 153 } 154 if (expectContentChecksum) { 155 contentHash.update(b, off, r); 156 } 157 } 158 return r; 159 } 160 161 /** 162 * @since 1.17 163 */ 164 @Override 165 public long getCompressedCount() { 166 return inputStream.getBytesRead(); 167 } 168 169 private void init(final boolean firstFrame) throws IOException { 170 if (readSignature(firstFrame)) { 171 readFrameDescriptor(); 172 nextBlock(); 173 } 174 } 175 176 private boolean readSignature(final boolean firstFrame) throws IOException { 177 final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; 178 final byte[] b = new byte[4]; 179 int read = IOUtils.readFully(inputStream, b); 180 count(read); 181 if (0 == read && !firstFrame) { 182 // good LZ4 frame and nothing after it 183 endReached = true; 184 return false; 185 } 186 if (4 != read) { 187 throw new IOException(garbageMessage); 188 } 189 190 read = skipSkippableFrame(b); 191 if (0 == read && !firstFrame) { 192 // good LZ4 frame with only some skippable frames after it 193 endReached = true; 194 return false; 195 } 196 if (4 != read || !matches(b, 4)) { 197 throw new IOException(garbageMessage); 198 } 199 return true; 200 } 201 202 private void readFrameDescriptor() throws IOException { 203 final int flags = readOneByte(); 204 if (flags == -1) { 205 throw new IOException("Premature end of stream while reading frame flags"); 206 } 207 contentHash.update(flags); 208 if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { 209 throw new IOException("Unsupported version " + (flags >> 6)); 210 } 211 expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; 212 if (expectBlockDependency) { 213 if (blockDependencyBuffer == null) { 214 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; 215 } 216 } else { 217 blockDependencyBuffer = null; 218 } 219 expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; 220 expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; 221 expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; 222 final int bdByte = readOneByte(); 223 if (bdByte == -1) { // max size is irrelevant for this implementation 224 throw new IOException("Premature end of stream while reading frame BD byte"); 225 } 226 contentHash.update(bdByte); 227 if (expectContentSize) { // for now we don't care, contains the uncompressed size 228 final byte[] contentSize = new byte[8]; 229 final int skipped = IOUtils.readFully(inputStream, contentSize); 230 count(skipped); 231 if (8 != skipped) { 232 throw new IOException("Premature end of stream while reading content size"); 233 } 234 contentHash.update(contentSize, 0, contentSize.length); 235 } 236 final int headerHash = readOneByte(); 237 if (headerHash == -1) { // partial hash of header. 238 throw new IOException("Premature end of stream while reading frame header checksum"); 239 } 240 final int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); 241 contentHash.reset(); 242 if (headerHash != expectedHash) { 243 throw new IOException("Frame header checksum mismatch"); 244 } 245 } 246 247 private void nextBlock() throws IOException { 248 maybeFinishCurrentBlock(); 249 final long len = ByteUtils.fromLittleEndian(supplier, 4); 250 final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; 251 final int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); 252 if (realLen < 0) { 253 throw new IOException("Found illegal block with negative size"); 254 } 255 if (realLen == 0) { 256 verifyContentChecksum(); 257 if (!decompressConcatenated) { 258 endReached = true; 259 } else { 260 init(false); 261 } 262 return; 263 } 264 InputStream capped = new BoundedInputStream(inputStream, realLen); 265 if (expectBlockChecksum) { 266 capped = new ChecksumCalculatingInputStream(blockHash, capped); 267 } 268 if (uncompressed) { 269 inUncompressed = true; 270 currentBlock = capped; 271 } else { 272 inUncompressed = false; 273 final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); 274 if (expectBlockDependency) { 275 s.prefill(blockDependencyBuffer); 276 } 277 currentBlock = s; 278 } 279 } 280 281 private void maybeFinishCurrentBlock() throws IOException { 282 if (currentBlock != null) { 283 currentBlock.close(); 284 currentBlock = null; 285 if (expectBlockChecksum) { 286 verifyChecksum(blockHash, "block"); 287 blockHash.reset(); 288 } 289 } 290 } 291 292 private void verifyContentChecksum() throws IOException { 293 if (expectContentChecksum) { 294 verifyChecksum(contentHash, "content"); 295 } 296 contentHash.reset(); 297 } 298 299 private void verifyChecksum(final XXHash32 hash, final String kind) throws IOException { 300 final byte[] checksum = new byte[4]; 301 final int read = IOUtils.readFully(inputStream, checksum); 302 count(read); 303 if (4 != read) { 304 throw new IOException("Premature end of stream while reading " + kind + " checksum"); 305 } 306 final long expectedHash = hash.getValue(); 307 if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { 308 throw new IOException(kind + " checksum mismatch."); 309 } 310 } 311 312 private int readOneByte() throws IOException { 313 final int b = inputStream.read(); 314 if (b != -1) { 315 count(1); 316 return b & 0xFF; 317 } 318 return -1; 319 } 320 321 private int readOnce(final byte[] b, final int off, final int len) throws IOException { 322 if (inUncompressed) { 323 final int cnt = currentBlock.read(b, off, len); 324 count(cnt); 325 return cnt; 326 } 327 final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; 328 final long before = l.getBytesRead(); 329 final int cnt = currentBlock.read(b, off, len); 330 count(l.getBytesRead() - before); 331 return cnt; 332 } 333 334 private static boolean isSkippableFrameSignature(final byte[] b) { 335 if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { 336 return false; 337 } 338 for (int i = 1; i < 4; i++) { 339 if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { 340 return false; 341 } 342 } 343 return true; 344 } 345 346 /** 347 * Skips over the contents of a skippable frame as well as 348 * skippable frames following it. 349 * 350 * <p>It then tries to read four more bytes which are supposed to 351 * hold an LZ4 signature and returns the number of bytes read 352 * while storing the bytes in the given array.</p> 353 */ 354 private int skipSkippableFrame(final byte[] b) throws IOException { 355 int read = 4; 356 while (read == 4 && isSkippableFrameSignature(b)) { 357 final long len = ByteUtils.fromLittleEndian(supplier, 4); 358 if (len < 0) { 359 throw new IOException("Found illegal skippable frame with negative size"); 360 } 361 final long skipped = IOUtils.skip(inputStream, len); 362 count(skipped); 363 if (len != skipped) { 364 throw new IOException("Premature end of stream while skipping frame"); 365 } 366 read = IOUtils.readFully(inputStream, b); 367 count(read); 368 } 369 return read; 370 } 371 372 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 373 len = Math.min(len, blockDependencyBuffer.length); 374 if (len > 0) { 375 final int keep = blockDependencyBuffer.length - len; 376 if (keep > 0) { 377 // move last keep bytes towards the start of the buffer 378 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 379 } 380 // append new data 381 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 382 } 383 } 384 385 /** 386 * Checks if the signature matches what is expected for a .lz4 file. 387 * 388 * <p>.lz4 files start with a four byte signature.</p> 389 * 390 * @param signature the bytes to check 391 * @param length the number of bytes to check 392 * @return true if this is a .sz stream, false otherwise 393 */ 394 public static boolean matches(final byte[] signature, final int length) { 395 396 if (length < LZ4_SIGNATURE.length) { 397 return false; 398 } 399 400 byte[] shortenedSig = signature; 401 if (signature.length > LZ4_SIGNATURE.length) { 402 shortenedSig = new byte[LZ4_SIGNATURE.length]; 403 System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length); 404 } 405 406 return Arrays.equals(shortenedSig, LZ4_SIGNATURE); 407 } 408}