BamTools  2.5.1
BamConstants.h
Go to the documentation of this file.
1 // ***************************************************************************
2 // BamConstants.h (c) 2011 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 16 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides basic constants for handling BAM files.
8 // ***************************************************************************
9 
10 #ifndef BAM_CONSTANTS_H
11 #define BAM_CONSTANTS_H
12 
13 #include <cassert>
14 #include <string>
15 #include "api/api_global.h"
16 
21 namespace BamTools {
22 namespace Constants {
23 
24 const uint8_t BAM_SIZEOF_INT = 4;
25 
26 // header magic number
27 const char* const BAM_HEADER_MAGIC = "BAM\1";
28 const uint8_t BAM_HEADER_MAGIC_LENGTH = 4;
29 
30 // BAM alignment core size
31 const uint8_t BAM_CORE_SIZE = 32;
32 const uint8_t BAM_CORE_BUFFER_SIZE = 8;
33 
34 // BAM alignment flags
35 const int BAM_ALIGNMENT_PAIRED = 0x0001;
36 const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002;
37 const int BAM_ALIGNMENT_UNMAPPED = 0x0004;
38 const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008;
39 const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010;
41 const int BAM_ALIGNMENT_READ_1 = 0x0040;
42 const int BAM_ALIGNMENT_READ_2 = 0x0080;
43 const int BAM_ALIGNMENT_SECONDARY = 0x0100;
44 const int BAM_ALIGNMENT_QC_FAILED = 0x0200;
45 const int BAM_ALIGNMENT_DUPLICATE = 0x0400;
46 
47 // CIGAR constants
48 const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
49 const uint8_t BAM_CIGAR_MATCH = 0;
50 const uint8_t BAM_CIGAR_INS = 1;
51 const uint8_t BAM_CIGAR_DEL = 2;
52 const uint8_t BAM_CIGAR_REFSKIP = 3;
53 const uint8_t BAM_CIGAR_SOFTCLIP = 4;
54 const uint8_t BAM_CIGAR_HARDCLIP = 5;
55 const uint8_t BAM_CIGAR_PAD = 6;
56 const uint8_t BAM_CIGAR_SEQMATCH = 7;
57 const uint8_t BAM_CIGAR_MISMATCH = 8;
58 
59 const char BAM_CIGAR_MATCH_CHAR = 'M';
60 const char BAM_CIGAR_INS_CHAR = 'I';
61 const char BAM_CIGAR_DEL_CHAR = 'D';
62 const char BAM_CIGAR_REFSKIP_CHAR = 'N';
63 const char BAM_CIGAR_SOFTCLIP_CHAR = 'S';
64 const char BAM_CIGAR_HARDCLIP_CHAR = 'H';
65 const char BAM_CIGAR_PAD_CHAR = 'P';
66 const char BAM_CIGAR_SEQMATCH_CHAR = '=';
67 const char BAM_CIGAR_MISMATCH_CHAR = 'X';
68 
69 const int BAM_CIGAR_SHIFT = 4;
70 const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
71 
72 // BAM tag types & sizes
73 const char BAM_TAG_TYPE_ASCII = 'A';
74 const char BAM_TAG_TYPE_INT8 = 'c';
75 const char BAM_TAG_TYPE_UINT8 = 'C';
76 const char BAM_TAG_TYPE_INT16 = 's';
77 const char BAM_TAG_TYPE_UINT16 = 'S';
78 const char BAM_TAG_TYPE_INT32 = 'i';
79 const char BAM_TAG_TYPE_UINT32 = 'I';
80 const char BAM_TAG_TYPE_FLOAT = 'f';
81 const char BAM_TAG_TYPE_STRING = 'Z';
82 const char BAM_TAG_TYPE_HEX = 'H';
83 const char BAM_TAG_TYPE_ARRAY = 'B';
84 
85 const uint8_t BAM_TAG_TAGSIZE = 2;
86 const uint8_t BAM_TAG_TYPESIZE = 1;
87 const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8;
88 
89 // DNA bases
90 const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
91 const uint8_t BAM_BASECODE_EQUAL = 0;
92 const uint8_t BAM_BASECODE_A = 1;
93 const uint8_t BAM_BASECODE_C = 2;
94 const uint8_t BAM_BASECODE_M = 3;
95 const uint8_t BAM_BASECODE_G = 4;
96 const uint8_t BAM_BASECODE_R = 5;
97 const uint8_t BAM_BASECODE_S = 6;
98 const uint8_t BAM_BASECODE_V = 7;
99 const uint8_t BAM_BASECODE_T = 8;
100 const uint8_t BAM_BASECODE_W = 9;
101 const uint8_t BAM_BASECODE_Y = 10;
102 const uint8_t BAM_BASECODE_H = 11;
103 const uint8_t BAM_BASECODE_K = 12;
104 const uint8_t BAM_BASECODE_D = 13;
105 const uint8_t BAM_BASECODE_B = 14;
106 const uint8_t BAM_BASECODE_N = 15;
107 
108 const char BAM_DNA_EQUAL = '=';
109 const char BAM_DNA_A = 'A';
110 const char BAM_DNA_C = 'C';
111 const char BAM_DNA_M = 'M';
112 const char BAM_DNA_G = 'G';
113 const char BAM_DNA_R = 'R';
114 const char BAM_DNA_S = 'S';
115 const char BAM_DNA_V = 'V';
116 const char BAM_DNA_T = 'T';
117 const char BAM_DNA_W = 'W';
118 const char BAM_DNA_Y = 'Y';
119 const char BAM_DNA_H = 'H';
120 const char BAM_DNA_K = 'K';
121 const char BAM_DNA_D = 'D';
122 const char BAM_DNA_B = 'B';
123 const char BAM_DNA_N = 'N';
124 const char BAM_DNA_DEL = '-';
125 const char BAM_DNA_PAD = '*';
126 
127 // zlib & BGZF constants
128 const char GZIP_ID1 = 31;
129 const char GZIP_ID2 = static_cast<char>(139);
130 const char CM_DEFLATE = 8;
131 const char FLG_FEXTRA = 4;
132 const char OS_UNKNOWN = static_cast<char>(255);
133 const char BGZF_XLEN = 6;
134 const char BGZF_ID1 = 66;
135 const char BGZF_ID2 = 67;
136 const char BGZF_LEN = 2;
137 
138 const int8_t GZIP_WINDOW_BITS = -15;
139 const int8_t Z_DEFAULT_MEM_LEVEL = 8;
140 const uint8_t BGZF_BLOCK_HEADER_LENGTH = 18;
141 const uint8_t BGZF_BLOCK_FOOTER_LENGTH = 8;
142 const uint32_t BGZF_MAX_BLOCK_SIZE = 65536;
143 const uint32_t BGZF_DEFAULT_BLOCK_SIZE = 65536;
144 
145 } // namespace Constants
146 
148 // -------------------------
149 // tag-type helper structs
150 // -------------------------
151 
152 // fail on any types not specified below
153 template <typename T>
154 struct TagTypeHelper
155 {
156  static bool CanConvertFrom(const char)
157  {
158  assert(false);
159  return false;
160  }
161  static bool CanConvertTo(const char)
162  {
163  assert(false);
164  return false;
165  }
166  static char TypeCode()
167  {
168  assert(false);
169  return 0;
170  }
171 };
172 
173 template <>
174 struct TagTypeHelper<uint8_t>
175 {
176  static bool CanConvertFrom(const char c)
177  {
179  }
180  static bool CanConvertTo(const char c)
181  {
184  }
185 
186  static char TypeCode()
187  {
189  }
190 };
191 
192 template <>
193 struct TagTypeHelper<int8_t>
194 {
195  static bool CanConvertFrom(const char c)
196  {
198  }
199  static bool CanConvertTo(const char c)
200  {
203  }
204  static char TypeCode()
205  {
207  }
208 };
209 
210 template <>
211 struct TagTypeHelper<uint16_t>
212 {
213  static bool CanConvertFrom(const char c)
214  {
217  }
218  static bool CanConvertTo(const char c)
219  {
221  }
222  static char TypeCode()
223  {
225  }
226 };
227 
228 template <>
229 struct TagTypeHelper<int16_t>
230 {
231  static bool CanConvertFrom(const char c)
232  {
235  }
236  static bool CanConvertTo(const char c)
237  {
239  }
240  static char TypeCode()
241  {
243  }
244 };
245 
246 template <>
247 struct TagTypeHelper<uint32_t>
248 {
249  static bool CanConvertFrom(const char c)
250  {
253  }
254  static bool CanConvertTo(const char c)
255  {
256  return (c == Constants::BAM_TAG_TYPE_UINT32);
257  }
258  static char TypeCode()
259  {
261  }
262 };
263 
264 template <>
265 struct TagTypeHelper<int32_t>
266 {
267  static bool CanConvertFrom(const char c)
268  {
271  }
272  static bool CanConvertTo(const char c)
273  {
274  return (c == Constants::BAM_TAG_TYPE_INT32);
275  }
276  static char TypeCode()
277  {
279  }
280 };
281 
282 template <>
283 struct TagTypeHelper<float>
284 {
285  static bool CanConvertFrom(const char c)
286  {
291  }
292  static bool CanConvertTo(const char c)
293  {
294  return (c == Constants::BAM_TAG_TYPE_FLOAT);
295  }
296  static char TypeCode()
297  {
299  }
300 };
301 
302 template <>
303 struct TagTypeHelper<std::string>
304 {
305  static bool CanConvertFrom(const char c)
306  {
308  }
309  static bool CanConvertTo(const char c)
310  {
312  }
313  static char TypeCode()
314  {
316  }
317 };
318 
320 
321 } // namespace BamTools
322 
323 #endif // BAM_CONSTANTS_H
const uint8_t BAM_BASECODE_R
Definition: BamConstants.h:96
const uint8_t BAM_CIGAR_MISMATCH
Definition: BamConstants.h:57
const char BAM_CIGAR_HARDCLIP_CHAR
Definition: BamConstants.h:64
const uint8_t BAM_BASECODE_H
Definition: BamConstants.h:102
const uint8_t BAM_SIZEOF_INT
Definition: BamConstants.h:24
const char BAM_TAG_TYPE_FLOAT
Definition: BamConstants.h:80
const uint8_t BAM_CIGAR_HARDCLIP
Definition: BamConstants.h:54
const uint8_t BGZF_BLOCK_FOOTER_LENGTH
Definition: BamConstants.h:141
const char BAM_CIGAR_PAD_CHAR
Definition: BamConstants.h:65
const uint8_t BAM_BASECODE_EQUAL
Definition: BamConstants.h:91
const char BAM_TAG_TYPE_UINT16
Definition: BamConstants.h:77
const uint8_t BAM_TAG_ARRAYBASE_SIZE
Definition: BamConstants.h:87
const int BAM_ALIGNMENT_SECONDARY
Definition: BamConstants.h:43
const int8_t Z_DEFAULT_MEM_LEVEL
Definition: BamConstants.h:139
const uint8_t BAM_BASECODE_M
Definition: BamConstants.h:94
const int BAM_CIGAR_SHIFT
Definition: BamConstants.h:69
const char BAM_DNA_M
Definition: BamConstants.h:111
const uint8_t BAM_BASECODE_N
Definition: BamConstants.h:106
const uint8_t BAM_TAG_TYPESIZE
Definition: BamConstants.h:86
const char BAM_TAG_TYPE_INT8
Definition: BamConstants.h:74
const uint8_t BAM_CIGAR_INS
Definition: BamConstants.h:50
const char *const BAM_CIGAR_LOOKUP
Definition: BamConstants.h:48
const uint8_t BAM_CORE_SIZE
Definition: BamConstants.h:31
const int BAM_ALIGNMENT_DUPLICATE
Definition: BamConstants.h:45
const uint8_t BAM_BASECODE_S
Definition: BamConstants.h:97
const int BAM_ALIGNMENT_REVERSE_STRAND
Definition: BamConstants.h:39
const char BAM_TAG_TYPE_INT32
Definition: BamConstants.h:78
const char BAM_DNA_S
Definition: BamConstants.h:114
const char BAM_DNA_R
Definition: BamConstants.h:113
const uint8_t BGZF_BLOCK_HEADER_LENGTH
Definition: BamConstants.h:140
const int8_t GZIP_WINDOW_BITS
Definition: BamConstants.h:138
const char BAM_DNA_PAD
Definition: BamConstants.h:125
const char BAM_DNA_Y
Definition: BamConstants.h:118
const uint8_t BAM_BASECODE_W
Definition: BamConstants.h:100
const char BGZF_XLEN
Definition: BamConstants.h:133
const int BAM_CIGAR_MASK
Definition: BamConstants.h:70
const uint8_t BAM_BASECODE_T
Definition: BamConstants.h:99
const int BAM_ALIGNMENT_MATE_UNMAPPED
Definition: BamConstants.h:38
const char BAM_DNA_H
Definition: BamConstants.h:119
const int BAM_ALIGNMENT_READ_1
Definition: BamConstants.h:41
const char BAM_DNA_D
Definition: BamConstants.h:121
const char BAM_CIGAR_INS_CHAR
Definition: BamConstants.h:60
const char *const BAM_DNA_LOOKUP
Definition: BamConstants.h:90
const char BAM_DNA_A
Definition: BamConstants.h:109
const char BAM_DNA_T
Definition: BamConstants.h:116
const uint8_t BAM_BASECODE_D
Definition: BamConstants.h:104
const uint8_t BAM_CIGAR_DEL
Definition: BamConstants.h:51
const uint8_t BAM_CIGAR_SOFTCLIP
Definition: BamConstants.h:53
const char BAM_DNA_DEL
Definition: BamConstants.h:124
const char BAM_TAG_TYPE_ARRAY
Definition: BamConstants.h:83
const uint8_t BAM_CORE_BUFFER_SIZE
Definition: BamConstants.h:32
const char GZIP_ID1
Definition: BamConstants.h:128
const char BAM_CIGAR_SOFTCLIP_CHAR
Definition: BamConstants.h:63
const uint8_t BAM_HEADER_MAGIC_LENGTH
Definition: BamConstants.h:28
const uint8_t BAM_BASECODE_K
Definition: BamConstants.h:103
const int BAM_ALIGNMENT_UNMAPPED
Definition: BamConstants.h:37
const int BAM_ALIGNMENT_QC_FAILED
Definition: BamConstants.h:44
const uint8_t BAM_CIGAR_PAD
Definition: BamConstants.h:55
const char BGZF_ID2
Definition: BamConstants.h:135
const uint8_t BAM_CIGAR_MATCH
Definition: BamConstants.h:49
const uint8_t BAM_BASECODE_Y
Definition: BamConstants.h:101
const char CM_DEFLATE
Definition: BamConstants.h:130
const char BAM_CIGAR_MISMATCH_CHAR
Definition: BamConstants.h:67
const uint8_t BAM_CIGAR_SEQMATCH
Definition: BamConstants.h:56
const char BAM_TAG_TYPE_HEX
Definition: BamConstants.h:82
const char BAM_CIGAR_SEQMATCH_CHAR
Definition: BamConstants.h:66
const char BAM_TAG_TYPE_ASCII
Definition: BamConstants.h:73
const uint32_t BGZF_DEFAULT_BLOCK_SIZE
Definition: BamConstants.h:143
const char BGZF_ID1
Definition: BamConstants.h:134
const uint8_t BAM_BASECODE_G
Definition: BamConstants.h:95
const uint8_t BAM_BASECODE_V
Definition: BamConstants.h:98
const uint8_t BAM_BASECODE_B
Definition: BamConstants.h:105
const char FLG_FEXTRA
Definition: BamConstants.h:131
const char BAM_CIGAR_MATCH_CHAR
Definition: BamConstants.h:59
const int BAM_ALIGNMENT_READ_2
Definition: BamConstants.h:42
const int BAM_ALIGNMENT_MATE_REVERSE_STRAND
Definition: BamConstants.h:40
const char BAM_CIGAR_REFSKIP_CHAR
Definition: BamConstants.h:62
const char BAM_TAG_TYPE_INT16
Definition: BamConstants.h:76
const char BAM_DNA_EQUAL
Definition: BamConstants.h:108
const char BAM_DNA_G
Definition: BamConstants.h:112
const char BGZF_LEN
Definition: BamConstants.h:136
const char BAM_DNA_W
Definition: BamConstants.h:117
const char GZIP_ID2
Definition: BamConstants.h:129
const char BAM_TAG_TYPE_STRING
Definition: BamConstants.h:81
const char BAM_DNA_V
Definition: BamConstants.h:115
const uint8_t BAM_CIGAR_REFSKIP
Definition: BamConstants.h:52
Contains all BamTools classes & methods.
Definition: Sort.h:24
const char BAM_DNA_C
Definition: BamConstants.h:110
const uint8_t BAM_BASECODE_C
Definition: BamConstants.h:93
const uint8_t BAM_TAG_TAGSIZE
Definition: BamConstants.h:85
const uint32_t BGZF_MAX_BLOCK_SIZE
Definition: BamConstants.h:142
const char BAM_TAG_TYPE_UINT8
Definition: BamConstants.h:75
const char BAM_CIGAR_DEL_CHAR
Definition: BamConstants.h:61
const char BAM_DNA_B
Definition: BamConstants.h:122
const char BAM_DNA_N
Definition: BamConstants.h:123
const char BAM_TAG_TYPE_UINT32
Definition: BamConstants.h:79
const char OS_UNKNOWN
Definition: BamConstants.h:132
const uint8_t BAM_BASECODE_A
Definition: BamConstants.h:92
const int BAM_ALIGNMENT_PAIRED
Definition: BamConstants.h:35
const int BAM_ALIGNMENT_PROPER_PAIR
Definition: BamConstants.h:36
const char BAM_DNA_K
Definition: BamConstants.h:120
const char *const BAM_HEADER_MAGIC
Definition: BamConstants.h:27