001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019package org.apache.commons.compress.archivers.zip; 020 021import org.apache.commons.compress.archivers.ArchiveStreamFactory; 022import org.apache.commons.compress.utils.FileNameUtils; 023import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; 024 025import java.io.File; 026import java.io.IOException; 027import java.io.Serializable; 028import java.nio.ByteBuffer; 029import java.nio.channels.SeekableByteChannel; 030import java.nio.file.Files; 031import java.nio.file.StandardOpenOption; 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.Comparator; 035import java.util.List; 036import java.util.Objects; 037import java.util.regex.Pattern; 038 039/** 040 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. 041 * 042 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of 043 * the archive.</p> 044 * 045 * @since 1.20 046 */ 047public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { 048 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; 049 private final ByteBuffer zipSplitSignatureByteBuffer = 050 ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); 051 052 /** 053 * Concatenates the given channels. 054 * 055 * <p>The channels should be add in ascending order, e.g. z01, 056 * z02, ... z99, zip please note that the .zip file is the last 057 * segment and should be added as the last one in the channels</p> 058 * 059 * @param channels the channels to concatenate 060 * @throws NullPointerException if channels is null 061 * @throws IOException if the first channel doesn't seem to hold 062 * the beginning of a split archive 063 */ 064 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) 065 throws IOException { 066 super(channels); 067 068 // the first split zip segment should begin with zip split signature 069 assertSplitSignature(channels); 070 } 071 072 /** 073 * Based on the zip specification: 074 * 075 * <p> 076 * 8.5.3 Spanned/Split archives created using PKZIP for Windows 077 * (V2.50 or greater), PKZIP Command Line (V2.50 or greater), 078 * or PKZIP Explorer will include a special spanning 079 * signature as the first 4 bytes of the first segment of 080 * the archive. This signature (0x08074b50) will be 081 * followed immediately by the local header signature for 082 * the first file in the archive. 083 * 084 * <p> 085 * the first 4 bytes of the first zip split segment should be the zip split signature(0x08074B50) 086 * 087 * @param channels channels to be validated 088 * @throws IOException 089 */ 090 private void assertSplitSignature(final List<SeekableByteChannel> channels) 091 throws IOException { 092 final SeekableByteChannel channel = channels.get(0); 093 // the zip split file signature is at the beginning of the first split segment 094 channel.position(0L); 095 096 zipSplitSignatureByteBuffer.rewind(); 097 channel.read(zipSplitSignatureByteBuffer); 098 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array()); 099 if (!signature.equals(ZipLong.DD_SIG)) { 100 channel.position(0L); 101 throw new IOException("The first zip split segment does not begin with split zip file signature"); 102 } 103 104 channel.position(0L); 105 } 106 107 /** 108 * Concatenates the given channels. 109 * 110 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) 111 * and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip) 112 * @return SeekableByteChannel that concatenates all provided channels 113 * @throws NullPointerException if channels is null 114 * @throws IOException if reading channels fails 115 */ 116 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException { 117 if (Objects.requireNonNull(channels, "channels must not be null").length == 1) { 118 return channels[0]; 119 } 120 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels)); 121 } 122 123 /** 124 * Concatenates the given channels. 125 * 126 * @param lastSegmentChannel channel of the last segment of split zip segments, its extension should be .zip 127 * @param channels the channels to concatenate except for the last segment, 128 * note theses channels should be added in correct order (e.g. .z01, .z02... .z99) 129 * @return SeekableByteChannel that concatenates all provided channels 130 * @throws NullPointerException if lastSegmentChannel or channels is null 131 * @throws IOException if the first channel doesn't seem to hold 132 * the beginning of a split archive 133 */ 134 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, 135 final Iterable<SeekableByteChannel> channels) throws IOException { 136 Objects.requireNonNull(channels, "channels"); 137 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel"); 138 139 final List<SeekableByteChannel> channelsList = new ArrayList<>(); 140 for (final SeekableByteChannel channel : channels) { 141 channelsList.add(channel); 142 } 143 channelsList.add(lastSegmentChannel); 144 145 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0])); 146 } 147 148 /** 149 * Concatenates zip split files from the last segment(the extension SHOULD be .zip) 150 * 151 * @param lastSegmentFile the last segment of zip split files, note that the extension SHOULD be .zip 152 * @return SeekableByteChannel that concatenates all zip split files 153 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip 154 * @throws IOException if the first channel doesn't seem to hold 155 * the beginning of a split archive 156 */ 157 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { 158 final String extension = FileNameUtils.getExtension(lastSegmentFile.getCanonicalPath()); 159 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { 160 throw new IllegalArgumentException("The extension of last zip split segment should be .zip"); 161 } 162 163 final File parent = lastSegmentFile.getParentFile(); 164 final String fileBaseName = FileNameUtils.getBaseName(lastSegmentFile.getCanonicalPath()); 165 final ArrayList<File> splitZipSegments = new ArrayList<>(); 166 167 // zip split segments should be like z01,z02....z(n-1) based on the zip specification 168 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); 169 final File[] children = parent.listFiles(); 170 if (children != null) { 171 for (final File file : children) { 172 if (!pattern.matcher(file.getName()).matches()) { 173 continue; 174 } 175 176 splitZipSegments.add(file); 177 } 178 } 179 180 splitZipSegments.sort(new ZipSplitSegmentComparator()); 181 return forFiles(lastSegmentFile, splitZipSegments); 182 } 183 184 /** 185 * Concatenates the given files. 186 * 187 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) 188 * and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip) 189 * @return SeekableByteChannel that concatenates all provided files 190 * @throws NullPointerException if files is null 191 * @throws IOException if opening a channel for one of the files fails 192 * @throws IOException if the first channel doesn't seem to hold 193 * the beginning of a split archive 194 */ 195 public static SeekableByteChannel forFiles(final File... files) throws IOException { 196 final List<SeekableByteChannel> channels = new ArrayList<>(); 197 for (final File f : Objects.requireNonNull(files, "files must not be null")) { 198 channels.add(Files.newByteChannel(f.toPath(), StandardOpenOption.READ)); 199 } 200 if (channels.size() == 1) { 201 return channels.get(0); 202 } 203 return new ZipSplitReadOnlySeekableByteChannel(channels); 204 } 205 206 /** 207 * Concatenates the given files. 208 * 209 * @param lastSegmentFile the last segment of split zip segments, its extension should be .zip 210 * @param files the files to concatenate except for the last segment, 211 * note theses files should be added in correct order (e.g. .z01, .z02... .z99) 212 * @return SeekableByteChannel that concatenates all provided files 213 * @throws IOException if the first channel doesn't seem to hold 214 * the beginning of a split archive 215 * @throws NullPointerException if files or lastSegmentFile is null 216 */ 217 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException { 218 Objects.requireNonNull(files, "files"); 219 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); 220 221 final List<File> filesList = new ArrayList<>(); 222 for (final File f : files) { 223 filesList.add(f); 224 } 225 filesList.add(lastSegmentFile); 226 227 return forFiles(filesList.toArray(new File[0])); 228 } 229 230 private static class ZipSplitSegmentComparator implements Comparator<File>, Serializable { 231 private static final long serialVersionUID = 20200123L; 232 @Override 233 public int compare(final File file1, final File file2) { 234 final String extension1 = FileNameUtils.getExtension(file1.getPath()); 235 final String extension2 = FileNameUtils.getExtension(file2.getPath()); 236 237 if (!extension1.startsWith("z")) { 238 return -1; 239 } 240 241 if (!extension2.startsWith("z")) { 242 return 1; 243 } 244 245 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1)); 246 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1)); 247 248 return splitSegmentNumber1.compareTo(splitSegmentNumber2); 249 } 250 } 251}