ProteoWizard
MSNumpress.hpp
Go to the documentation of this file.
1 //
2 // $Id$
3 //
4 //
5 // Original author: Johan Teleman <johan.teleman@immun.lth.se>
6 //
7 // Copyright 2013 Johan Teleman
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
20 //
21 
22 
23 /*
24  ==================== encodeInt ====================
25  Some of the encodings described below use a integer compression refered to simply as
26 
27  encodeInt()
28 
29  This encoding works on a 4 byte integer, by truncating initial zeros or ones.
30  If the initial (most significant) half byte is 0x0 or 0xf, the number of such
31  halfbytes starting from the most significant is stored in a halfbyte. This initial
32  count is then followed by the rest of the ints halfbytes, in little-endian order.
33  A count halfbyte c of
34 
35  0 <= c <= 8 is interpreted as an initial c 0x0 halfbytes
36  9 <= c <= 15 is interpreted as an initial (c-8) 0xf halfbytes
37 
38  Ex:
39  int c rest
40  0 => 0x8
41  -1 => 0xf 0xf
42  23 => 0x6 0x7 0x1
43  */
44 
45 #ifndef _MSNUMPRESS_HPP_
46 #define _MSNUMPRESS_HPP_
47 
48 
50 #include <cstddef>
51 #include <vector>
52 
53 
54 namespace pwiz {
55 namespace msdata {
56 
57 namespace MSNumpress {
58 
60  const double *data,
61  size_t dataSize);
62 
64  const double *data,
65  size_t dataSize,
66  double mass_acc
67  );
68 
69 
70  /**
71  * Encodes the doubles in data by first using a
72  * - lossy conversion to a 4 byte 5 decimal fixed point repressentation
73  * - storing the residuals from a linear prediction after first to values
74  * - encoding by encodeInt (see above)
75  *
76  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
77  * data is reasonably smooth on the first order.
78  *
79  * This encoding is suitable for typical m/z or retention time binary arrays.
80  * For masses above 100 m/z the encoding is accurate to at least 0.1 ppm.
81  *
82  * @data pointer to array of double to be encoded (need memorycont. repr.)
83  * @dataSize number of doubles from *data to encode
84  * @result pointer to were resulting bytes should be stored
85  * @fixedPoint the scaling factor used for getting the fixed point repr.
86  * This is stored in the binary and automatically extracted
87  * on decoding. Automatically (and maybe slowly) determined if 0.
88  * @return the number of encoded bytes
89  */
91  const double *data,
92  const size_t dataSize,
93  unsigned char *result,
94  double fixedPoint);
95 
96  /**
97  * Calls lower level encodeLinear while handling vector sizes appropriately
98  *
99  * @data vector of doubles to be encoded
100  * @result vector of resulting bytes (will be resized to the number of bytes)
101  */
103  const std::vector<double> &data,
104  std::vector<unsigned char> &result,
105  double fixedPoint);
106 
107  /**
108  * Decodes data encoded by encodeLinear. Note that the compression
109  * discard any information < 1e-5, so data is only guaranteed
110  * to be within +- 5e-6 of the original value.
111  *
112  * Further, values > ~42000 will also be truncated because of the
113  * fixed point representation, so this scheme is stronly discouraged
114  * if values above might be above this size.
115  *
116  * result vector guaranteedly shorter than twice the data length (in nbr of values)
117  *
118  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
119  * @dataSize number of bytes from *data to decode
120  * @result pointer to were resulting doubles should be stored
121  * @return the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8
122  */
124  const unsigned char *data,
125  const size_t dataSize,
126  double *result);
127 
128  /**
129  * Calls lower level decodeLinear while handling vector sizes appropriately
130  *
131  * @data vector of bytes to be decoded
132  * @result vector of resulting double (will be resized to the number of doubles)
133  */
135  const std::vector<unsigned char> &data,
136  std::vector<double> &result);
137 
138 /////////////////////////////////////////////////////////////
139 
140  /**
141  * Encodes ion counts by simply rounding to the nearest 4 byte integer,
142  * and compressing each integer with encodeInt.
143  *
144  * The handleable range is therefore 0 -> 4294967294.
145  * The resulting binary is maximally dataSize * 5 bytes, but much less if the
146  * data is close to 0 on average.
147  *
148  * @data pointer to array of double to be encoded (need memorycont. repr.)
149  * @dataSize number of doubles from *data to encode
150  * @result pointer to were resulting bytes should be stored
151  * @return the number of encoded bytes
152  */
154  const double *data,
155  const size_t dataSize,
156  unsigned char *result);
157 
158  /**
159  * Calls lower level encodePic while handling vector sizes appropriately
160  *
161  * @data vector of doubles to be encoded
162  * @result vector of resulting bytes (will be resized to the number of bytes)
163  */
165  const std::vector<double> &data,
166  std::vector<unsigned char> &result);
167 
168  /**
169  * Decodes data encoded by encodePic
170  *
171  * result vector guaranteedly shorter than twice the data length (in nbr of values)
172  *
173  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
174  * @dataSize number of bytes from *data to decode
175  * @result pointer to were resulting doubles should be stored
176  * @return the number of decoded doubles
177  */
179  const std::vector<unsigned char> &data,
180  std::vector<double> &result);
181 
182  /**
183  * Calls lower level decodePic while handling vector sizes appropriately
184  *
185  * @data vector of bytes to be decoded
186  * @result vector of resulting double (will be resized to the number of doubles)
187  */
189  const unsigned char *data,
190  const size_t dataSize,
191  double *result);
192 
193 /////////////////////////////////////////////////////////////
194 
195 
197  const double *data,
198  size_t dataSize);
199 
200  /**
201  * Encodes ion counts by taking the natural logarithm, and storing a
202  * fixed point representation of this. This is calculated as
203  *
204  * unsigned short fp = log(d + 1) * 3000.0 + 0.5
205  *
206  * Note that this fixed point will mean any d < 0.00016667 will be
207  * stored as a zero and mapped back to a zero.
208  *
209  * result vector is exactly twice the data length (in nbr of values)
210  *
211  * @data pointer to array of double to be encoded (need memorycont. repr.)
212  * @dataSize number of doubles from *data to encode
213  * @result pointer to were resulting bytes should be stored
214  * &fixedPoint automatically (and maybe slowly) determined if 0.
215  * @return the number of encoded bytes
216  */
218  const double *data,
219  const size_t dataSize,
220  unsigned char *result,
221  double fixedPoint);
222 
223  /**
224  * Calls lower level encodeSlof while handling vector sizes appropriately
225  *
226  * @data vector of doubles to be encoded
227  * @result vector of resulting bytes (will be resized to the number of bytes)
228  */
230  const std::vector<double> &data,
231  std::vector<unsigned char> &result,
232  double fixedPoint);
233 
234  /**
235  * Decodes data encoded by encodeSlof
236  *
237  * @data pointer to array of bytes to be decoded (need memorycont. repr.)
238  * @dataSize number of bytes from *data to decode
239  * @result pointer to were resulting doubles should be stored
240  * @return the number of decoded doubles
241  */
243  const unsigned char *data,
244  const size_t dataSize,
245  double *result);
246 
247  /**
248  * Calls lower level decodeSlof while handling vector sizes appropriately
249  *
250  * @data vector of bytes to be decoded
251  * @result vector of resulting double (will be resized to the number of doubles)
252  */
254  const std::vector<unsigned char> &data,
255  std::vector<double> &result);
256 
257 } // namespace MSNumpress
258 } // namespace msdata
259 } // namespace pwiz
260 
261 #endif // _MSNUMPRESS_HPP_
pwiz::msdata::MSNumpress::optimalLinearFixedPointMass
double PWIZ_API_DECL optimalLinearFixedPointMass(const double *data, size_t dataSize, double mass_acc)
pwiz
Definition: ChromatogramList_Filter.hpp:36
pwiz::msdata::MSNumpress::decodePic
void PWIZ_API_DECL decodePic(const std::vector< unsigned char > &data, std::vector< double > &result)
Decodes data encoded by encodePic.
PWIZ_API_DECL
#define PWIZ_API_DECL
Definition: Export.hpp:32
pwiz::msdata::MSNumpress::decodeLinear
size_t PWIZ_API_DECL decodeLinear(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeLinear.
Export.hpp
pwiz::msdata::MSNumpress::encodeLinear
size_t PWIZ_API_DECL encodeLinear(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes the doubles in data by first using a.
pwiz::msdata::MSNumpress::optimalSlofFixedPoint
double PWIZ_API_DECL optimalSlofFixedPoint(const double *data, size_t dataSize)
pwiz::msdata::MSNumpress::encodeSlof
size_t PWIZ_API_DECL encodeSlof(const double *data, const size_t dataSize, unsigned char *result, double fixedPoint)
Encodes ion counts by taking the natural logarithm, and storing a fixed point representation of this.
pwiz::msdata::MSNumpress::encodePic
size_t PWIZ_API_DECL encodePic(const double *data, const size_t dataSize, unsigned char *result)
Encodes ion counts by simply rounding to the nearest 4 byte integer, and compressing each integer wit...
pwiz::msdata::MSNumpress::decodeSlof
size_t PWIZ_API_DECL decodeSlof(const unsigned char *data, const size_t dataSize, double *result)
Decodes data encoded by encodeSlof.
pwiz::msdata::MSNumpress::optimalLinearFixedPoint
double PWIZ_API_DECL optimalLinearFixedPoint(const double *data, size_t dataSize)