001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.IOException;
020import java.io.InputStream;
021
022/**
023 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
024 *
025 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are
026 * used in the implementation of many bands; but there are a variety of other ones, and indeed the specification assumes
027 * that other combinations of values can result in more specific and efficient formats. There are also a sequence of
028 * canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical number.
029 * {@link CodecEncoding#getCodec(int, InputStream, Codec)})
030 */
031public abstract class Codec {
032
033    /**
034     * BCI5 = (5,4): Used for storing branching information in bytecode.
035     */
036    public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
037
038    /**
039     * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
040     */
041    public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
042
043    /**
044     * BYTE1 = (1,256): Used for storing plain bytes.
045     */
046    public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
047
048    /**
049     * CHAR3 = (3,128): Used for storing text (UTF-8) strings. NB This isn't quite the same as UTF-8, but has similar
050     * properties; ASCII characters < 127 are stored in a single byte.
051     */
052    public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
053
054    /**
055     * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed
056     * values.
057     */
058    public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
059
060    /**
061     * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed
062     * values, but where most of them are expected to be non-negative.
063     */
064    public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
065
066    /**
067     * SIGNED5 = (5,64,1): Used for small signed values.
068     */
069    public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
070
071    /**
072     * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned
073     * values.
074     */
075    public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
076
077    /**
078     * UNSIGNED5 = (5,64): Used for small unsigned values.
079     */
080    public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
081
082    public int lastBandLength;
083
084    /**
085     * Decode a sequence of bytes from the given input stream, returning the value as a long. Note that this method can
086     * only be applied for non-delta encodings.
087     *
088     * @param in the input stream to read from
089     * @return the value as a long
090     * @throws IOException if there is a problem reading from the underlying input stream
091     * @throws Pack200Exception if the encoding is a delta encoding
092     */
093    public abstract int decode(InputStream in) throws IOException, Pack200Exception;
094
095    /**
096     * Encode a single value into a sequence of bytes.
097     *
098     * @param value the value to encode
099     * @param last the previous value encoded (for delta encodings)
100     * @return the encoded bytes
101     * @throws Pack200Exception TODO
102     */
103    public abstract byte[] encode(int value, int last) throws Pack200Exception;
104
105    /**
106     * Encode a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
107     *
108     * @param value the value to encode
109     * @return the encoded bytes
110     * @throws Pack200Exception TODO
111     */
112    public abstract byte[] encode(int value) throws Pack200Exception;
113
114    /**
115     * Decode a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a
116     * delta encoding (d=1) then the previous value must be passed in as a parameter. If it is a non-delta encoding,
117     * then it does not matter what value is passed in, so it makes sense for the value to be passed in by default using
118     * code similar to:
119     *
120     * <pre>
121     * long last = 0;
122     * while (condition) {
123     *     last = codec.decode(in, last);
124     *     // do something with last
125     * }
126     * </pre>
127     *
128     * @param in the input stream to read from
129     * @param last the previous value read, which must be supplied if the codec is a delta encoding
130     * @return the value as a long
131     * @throws IOException if there is a problem reading from the underlying input stream
132     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
133     */
134    public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
135
136    /**
137     * Decodes a sequence of <code>n</code> values from <code>in</code>. This should probably be used in most cases,
138     * since some codecs (such as {@link PopulationCodec}) only work when the number of values to be read is known.
139     *
140     * @param n the number of values to decode
141     * @param in the input stream to read from
142     * @return an array of <code>int</code> values corresponding to values decoded
143     * @throws IOException if there is a problem reading from the underlying input stream
144     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
145     */
146    public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
147        lastBandLength = 0;
148        final int result[] = new int[n];
149        int last = 0;
150        for (int i = 0; i < n; i++) {
151            result[i] = last = decode(in, last);
152        }
153        return result;
154    }
155
156    /**
157     * Decodes a sequence of <code>n</code> values from <code>in</code>.
158     *
159     * @param n the number of values to decode
160     * @param in the input stream to read from
161     * @param firstValue the first value in the band if it has already been read
162     * @return an array of <code>int</code> values corresponding to values decoded, with firstValue as the first value
163     *         in the array.
164     * @throws IOException if there is a problem reading from the underlying input stream
165     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
166     */
167    public int[] decodeInts(final int n, final InputStream in, final int firstValue)
168        throws IOException, Pack200Exception {
169        final int result[] = new int[n + 1];
170        result[0] = firstValue;
171        int last = firstValue;
172        for (int i = 1; i < n + 1; i++) {
173            result[i] = last = decode(in, last);
174        }
175        return result;
176    }
177
178    /**
179     * Encode a sequence of integers into a byte array
180     *
181     * @param ints the values to encode
182     * @return byte[] encoded bytes
183     * @throws Pack200Exception if there is a problem encoding any of the values
184     */
185    public byte[] encode(final int[] ints) throws Pack200Exception {
186        int total = 0;
187        final byte[][] bytes = new byte[ints.length][];
188        for (int i = 0; i < ints.length; i++) {
189            bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
190            total += bytes[i].length;
191        }
192        final byte[] encoded = new byte[total];
193        int index = 0;
194        for (int i = 0; i < bytes.length; i++) {
195            System.arraycopy(bytes[i], 0, encoded, index, bytes[i].length);
196            index += bytes[i].length;
197        }
198        return encoded;
199    }
200}