24 #include "SamRecord.h" 25 #include "SamValidation.h" 27 #include "BaseUtilities.h" 28 #include "SamQuerySeqWithRefHelper.h" 30 const char* SamRecord::DEFAULT_READ_NAME =
"UNKNOWN";
31 const char* SamRecord::FIELD_ABSENT_STRING =
"=";
32 int SamRecord::myNumWarns = 0;
37 mySequenceTranslation(NONE)
39 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE +
sizeof(int32_t);
44 myCigarTempBuffer = NULL;
45 myCigarTempBufferAllocatedSize = 0;
47 allocatedSize = defaultAllocSize;
54 : myStatus(errorHandlingType),
56 mySequenceTranslation(
NONE)
58 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE +
sizeof(int32_t);
63 myCigarTempBuffer = NULL;
64 myCigarTempBufferAllocatedSize = 0;
66 allocatedSize = defaultAllocSize;
76 if(myRecordPtr != NULL)
81 if(myCigarTempBuffer != NULL)
83 free(myCigarTempBuffer);
84 myCigarTempBuffer = NULL;
85 myCigarTempBufferAllocatedSize = 0;
93 myIsBufferSynced =
true;
95 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
96 myRecordPtr->myReferenceID = -1;
97 myRecordPtr->myPosition = -1;
98 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
99 myRecordPtr->myMapQuality = 0;
100 myRecordPtr->myBin = DEFAULT_BIN;
101 myRecordPtr->myCigarLength = 0;
102 myRecordPtr->myFlag = 0;
103 myRecordPtr->myReadLength = 0;
104 myRecordPtr->myMateReferenceID = -1;
105 myRecordPtr->myMatePosition = -1;
106 myRecordPtr->myInsertSize = 0;
111 myReadName = DEFAULT_READ_NAME;
112 myReferenceName =
"*";
113 myMateReferenceName =
"*";
117 mySeqWithoutEq.clear();
119 myNeedToSetTagsFromBuffer =
false;
120 myNeedToSetTagsInBuffer =
false;
123 myAlignmentLength = -1;
124 myUnclippedStartOffset = -1;
125 myUnclippedEndOffset = -1;
133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
134 myRecordPtr->myReadNameLength);
137 myIsReadNameBufferValid =
true;
138 myIsCigarBufferValid =
true;
140 (
unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
141 myRecordPtr->myCigarLength *
sizeof(
int);
142 myIsSequenceBufferValid =
true;
143 myBufferSequenceTranslation =
NONE;
145 myPackedQuality = myPackedSequence;
146 myIsQualityBufferValid =
true;
147 myIsTagsBufferValid =
true;
150 myCigarTempBufferLength = -1;
154 NOT_FOUND_TAG_STRING =
"";
155 NOT_FOUND_TAG_INT = -1;
168 std::string errorMessage =
"";
180 myRefPtr = reference;
189 mySequenceTranslation = translation;
195 myReadName = readName;
196 myIsBufferSynced =
false;
197 myIsReadNameBufferValid =
false;
202 if(myReadName.Length() == 0)
205 myReadName = DEFAULT_READ_NAME;
206 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
218 myRecordPtr->myFlag = flag;
224 const char* referenceName)
228 myReferenceName = referenceName;
230 myRecordPtr->myReferenceID = header.
getReferenceID(referenceName,
true);
245 myRecordPtr->myPosition = position;
246 myIsBinValid =
false;
254 myRecordPtr->myMapQuality = mapQuality;
264 myIsBufferSynced =
false;
265 myIsCigarBufferValid =
false;
266 myCigarTempBufferLength = -1;
267 myIsBinValid =
false;
270 myAlignmentLength = -1;
271 myUnclippedStartOffset = -1;
272 myUnclippedEndOffset = -1;
283 myIsBufferSynced =
false;
284 myIsCigarBufferValid =
false;
285 myCigarTempBufferLength = -1;
286 myIsBinValid =
false;
289 myAlignmentLength = -1;
290 myUnclippedStartOffset = -1;
291 myUnclippedEndOffset = -1;
298 const char* mateReferenceName)
304 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
306 myMateReferenceName = myReferenceName;
310 myMateReferenceName = mateReferenceName;
315 myRecordPtr->myMateReferenceID =
331 myRecordPtr->myMatePosition = matePosition;
339 myRecordPtr->myInsertSize = insertSize;
349 mySeqWithoutEq.clear();
351 myIsBufferSynced =
false;
352 myIsSequenceBufferValid =
false;
361 myIsBufferSynced =
false;
362 myIsQualityBufferValid =
false;
374 if(myAlignmentLength == -1)
381 bool shifted =
false;
385 uint32_t currentPos = 0;
392 currentPos += myCigarRoller[0].count;
395 int numOps = myCigarRoller.
size();
399 for(
int currentOp = 1; currentOp < numOps; currentOp++)
404 int prevOpIndex = currentOp-1;
407 int nextOpIndex = currentOp+1;
408 if(nextOpIndex == numOps)
411 nextOpIndex = currentOp;
415 uint32_t prevOpStart =
416 currentPos - myCigarRoller[prevOpIndex].count;
423 currentPos += myCigarRoller[currentOp].count;
433 uint32_t insertEndPos =
434 currentPos + myCigarRoller[currentOp].count - 1;
437 uint32_t insertStartPos = currentPos;
446 while((insertStartPos > prevOpStart) &&
456 int shiftLen = currentPos - insertStartPos;
467 if(myCigarRoller[nextOpIndex].operation ==
468 myCigarRoller[prevOpIndex].operation)
477 if(myCigarRoller[prevOpIndex].count == 0)
479 myCigarRoller.
Remove(prevOpIndex);
488 if(insertStartPos == prevOpStart)
492 myCigarRoller.
Update(currentOp,
493 myCigarRoller[prevOpIndex].operation,
494 myCigarRoller[prevOpIndex].count);
497 myCigarRoller.
Update(prevOpIndex,
505 currentPos += myCigarRoller[currentOp].count;
510 currentPos += myCigarRoller[currentOp].count;
526 uint32_t fromBufferSize,
530 if((fromBuffer == NULL) || (fromBufferSize == 0))
534 "Cannot parse an empty file.");
542 if(!allocateRecordStructure(fromBufferSize))
548 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
550 setVariablesForNewBuffer(header);
562 if((filePtr == NULL) || (filePtr->
isOpen() ==
false))
566 "Can't read from an unopened file.");
575 ifread(filePtr, &(myRecordPtr->myBlockSize),
sizeof(int32_t));
578 if(
ifeof(filePtr) && (numBytes == 0))
582 "No more records left to read.");
586 if(numBytes !=
sizeof(int32_t))
595 "EOF reached in the middle of a record.");
602 "Failed to read the record size.");
608 if(!allocateRecordStructure(myRecordPtr->myBlockSize +
sizeof(int32_t)))
616 if(
ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
617 != (
unsigned int)myRecordPtr->myBlockSize)
622 "Failed to read the record");
626 setVariablesForNewBuffer(header);
642 int tagBufferSize = 0;
645 if(myNeedToSetTagsFromBuffer)
647 if(!setTagsFromBuffer())
662 if(value > ((std::numeric_limits<char>::min)()))
668 else if(value > ((std::numeric_limits<short>::min)()))
684 if(value < ((std::numeric_limits<unsigned char>::max)()))
690 else if(value < ((std::numeric_limits<unsigned short>::max)()))
705 key = MAKEKEY(tag[0], tag[1], bamvtype);
706 unsigned int hashIndex = extras.Find(key);
707 if(hashIndex != LH_NOTFOUND)
710 index = extras[hashIndex];
714 switch(intType[index])
731 "unknown tag inttype type found.\n");
737 if(myNumWarns++ < myMaxWarns)
741 appendIntArrayValue(index, origVal);
742 appendIntArrayValue(bamvtype, value, newVal);
743 fprintf(stderr,
"WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
744 tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
745 if(myNumWarns == myMaxWarns)
747 fprintf(stderr,
"Suppressing rest of Duplicate Tag warnings.\n");
752 integers[index] = value;
753 intType[index] = bamvtype;
758 index = integers.Length();
760 integers.Push(value);
761 intType.push_back(bamvtype);
763 extras.Add(key, index);
767 myNeedToSetTagsInBuffer =
true;
768 myIsTagsBufferValid =
false;
769 myIsBufferSynced =
false;
770 myTagBufferSize += tagBufferSize;
784 int intVal = atoi(valuePtr);
794 int tagBufferSize = 0;
797 if(myNeedToSetTagsFromBuffer)
799 if(!setTagsFromBuffer())
807 key = MAKEKEY(tag[0], tag[1], vtype);
808 unsigned int hashIndex = extras.Find(key);
809 if(hashIndex != LH_NOTFOUND)
812 index = extras[hashIndex];
815 char origType = vtype;
822 if((integers[index] == (
const int)*(valuePtr)) &&
823 (intType[index] == vtype))
831 origType = intType[index];
832 appendIntArrayValue(index, origTag);
833 tagBufferSize -= getNumericTagTypeSize(intType[index]);
834 tagBufferSize += getNumericTagTypeSize(vtype);
835 integers[index] = (
const int)*(valuePtr);
836 intType[index] = vtype;
841 if(strings[index] == valuePtr)
849 origTag = strings[index];
850 tagBufferSize -= strings[index].Length();
851 strings[index] = valuePtr;
853 tagBufferSize += strings[index].Length();
858 if(strings[index] == valuePtr)
866 origTag = strings[index];
867 tagBufferSize -= getBtagBufferSize(strings[index]);
868 strings[index] = valuePtr;
870 tagBufferSize += getBtagBufferSize(strings[index]);
875 if(floats[index] == (
float)atof(valuePtr))
883 origTag.appendFullFloat(floats[index]);
884 floats[index] = (float)atof(valuePtr);
889 "samRecord::addTag() - Unknown custom field of type %c\n",
892 "Unknown custom field in a tag");
900 if(myNumWarns++ < myMaxWarns)
902 fprintf(stderr,
"WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
903 tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
904 if(myNumWarns == myMaxWarns)
906 fprintf(stderr,
"Suppressing rest of Duplicate Tag warnings.\n");
916 index = integers.Length();
917 integers.Push((
const int)*(valuePtr));
918 intType.push_back(vtype);
922 index = strings.Length();
923 strings.Push(valuePtr);
924 tagBufferSize += 4 + strings.Last().Length();
927 index = strings.Length();
928 strings.Push(valuePtr);
929 tagBufferSize += 3 + getBtagBufferSize(strings[index]);
932 index = floats.size();
933 floats.push_back((
float)atof(valuePtr));
938 "samRecord::addTag() - Unknown custom field of type %c\n",
941 "Unknown custom field in a tag");
948 extras.Add(key, index);
956 myNeedToSetTagsInBuffer =
true;
957 myIsTagsBufferValid =
false;
958 myIsBufferSynced =
false;
959 myTagBufferSize += tagBufferSize;
967 if(extras.Entries() != 0)
987 "rmTag called with tag that is not 2 characters\n");
992 if(myNeedToSetTagsFromBuffer)
994 if(!setTagsFromBuffer())
1003 int key = MAKEKEY(tag[0], tag[1], type);
1005 int offset = extras.Find(key);
1017 getTypeFromKey(key, vtype);
1020 vtype = getIntegerType(offset);
1047 rmBuffSize = 4 +
getString(offset).Length();
1050 rmBuffSize = 3 + getBtagBufferSize(
getString(offset));
1054 "rmTag called with unknown type.\n");
1060 myNeedToSetTagsInBuffer =
true;
1061 myIsTagsBufferValid =
false;
1062 myIsBufferSynced =
false;
1063 myTagBufferSize -= rmBuffSize;
1066 extras.Delete(offset);
1073 const char* currentTagPtr = tags;
1076 if(myNeedToSetTagsFromBuffer)
1078 if(!setTagsFromBuffer())
1086 bool returnStatus =
true;
1089 while(*currentTagPtr !=
'\0')
1095 if((currentTagPtr[0] ==
'\0') || (currentTagPtr[1] ==
'\0') ||
1096 (currentTagPtr[2] !=
':') || (currentTagPtr[3] ==
'\0'))
1099 "rmTags called with improperly formatted tags.\n");
1100 returnStatus =
false;
1105 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
1108 int offset = extras.Find(key);
1115 getTypeFromKey(key, vtype);
1118 vtype = getIntegerType(offset);
1144 rmBuffSize += 4 +
getString(offset).Length();
1147 rmBuffSize += 3 + getBtagBufferSize(
getString(offset));
1151 "rmTag called with unknown type.\n");
1152 returnStatus =
false;
1157 extras.Delete(offset);
1160 if((currentTagPtr[4] ==
';') || (currentTagPtr[4] ==
','))
1165 else if(currentTagPtr[4] !=
'\0')
1169 "rmTags called with improperly formatted tags.\n");
1170 returnStatus =
false;
1181 myNeedToSetTagsInBuffer =
true;
1182 myIsTagsBufferValid =
false;
1183 myIsBufferSynced =
false;
1184 myTagBufferSize -= rmBuffSize;
1187 return(returnStatus);
1205 if((myIsBufferSynced ==
false) ||
1206 (myBufferSequenceTranslation != translation))
1208 status &= fixBuffer(translation);
1211 if(myNeedToSetTagsInBuffer)
1213 status &= setTagsInBuffer();
1219 return (
const void *)myRecordPtr;
1236 if((filePtr == NULL) || (filePtr->
isOpen() ==
false))
1240 "Can't write to an unopened file.");
1244 if((myIsBufferSynced ==
false) ||
1245 (myBufferSequenceTranslation != translation))
1247 if(!fixBuffer(translation))
1254 unsigned int numBytesToWrite = myRecordPtr->myBlockSize +
sizeof(int32_t);
1255 unsigned int numBytesWritten =
1256 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
1259 if(numBytesToWrite == numBytesWritten)
1274 if(myIsBufferSynced ==
false)
1279 fixBuffer(myBufferSequenceTranslation);
1281 return myRecordPtr->myBlockSize;
1289 return myReferenceName.c_str();
1296 return myRecordPtr->myReferenceID;
1303 return (myRecordPtr->myPosition + 1);
1310 return myRecordPtr->myPosition;
1319 if(myIsReadNameBufferValid)
1321 return(myRecordPtr->myReadNameLength);
1324 return(myReadName.Length() + 1);
1331 return myRecordPtr->myMapQuality;
1342 myRecordPtr->myBin =
1344 myIsBinValid =
true;
1346 return(myRecordPtr->myBin);
1355 if(myIsCigarBufferValid)
1357 return myRecordPtr->myCigarLength;
1360 if(myCigarTempBufferLength == -1)
1368 return(myCigarTempBufferLength);
1375 return myRecordPtr->myFlag;
1382 if(myIsSequenceBufferValid ==
false)
1385 if((mySequence.Length() == 1) && (mySequence[0] ==
'*'))
1390 return(mySequence.Length());
1392 return(myRecordPtr->myReadLength);
1401 return myMateReferenceName.c_str();
1411 if(myMateReferenceName ==
"*")
1413 return(myMateReferenceName);
1417 return(FIELD_ABSENT_STRING);
1421 return(myMateReferenceName);
1429 return myRecordPtr->myMateReferenceID;
1436 return (myRecordPtr->myMatePosition + 1);
1443 return myRecordPtr->myMatePosition;
1450 return myRecordPtr->myInsertSize;
1458 if(myAlignmentLength == -1)
1464 if(myAlignmentLength == 0)
1467 return(myRecordPtr->myPosition);
1469 return(myRecordPtr->myPosition + myAlignmentLength - 1);
1484 if(myAlignmentLength == -1)
1490 return(myAlignmentLength);
1497 if(myUnclippedStartOffset == -1)
1502 return(myRecordPtr->myPosition - myUnclippedStartOffset);
1533 if(myReadName.Length() == 0)
1537 myReadName = (
char*)&(myRecordPtr->myData);
1539 return myReadName.c_str();
1546 if(myCigar.Length() == 0)
1552 return myCigar.c_str();
1565 if(mySequence.Length() == 0)
1569 setSequenceAndQualityFromBuffer();
1573 if((translation ==
NONE) || (myRefPtr == NULL))
1575 return mySequence.c_str();
1577 else if(translation ==
EQUAL)
1579 if(mySeqWithEq.length() == 0)
1582 if(mySequence ==
"*")
1591 myRecordPtr->myPosition,
1598 return(mySeqWithEq.c_str());
1603 if(mySeqWithoutEq.length() == 0)
1605 if(mySequence ==
"*")
1608 mySeqWithoutEq =
'*';
1614 myRecordPtr->myPosition,
1621 return(mySeqWithoutEq.c_str());
1629 if(myQuality.Length() == 0)
1633 setSequenceAndQualityFromBuffer();
1635 return myQuality.c_str();
1641 return(
getSequence(index, mySequenceTranslation));
1647 static const char * asciiBases =
"=AC.G...T......N";
1655 String exceptionString =
"SamRecord::getSequence(";
1656 exceptionString += index;
1657 exceptionString +=
") is not allowed since sequence = '*'";
1658 throw std::runtime_error(exceptionString.c_str());
1660 else if((index < 0) || (index >= readLen))
1663 String exceptionString =
"SamRecord::getSequence(";
1664 exceptionString += index;
1665 exceptionString +=
") is out of range. Index must be between 0 and ";
1666 exceptionString += (readLen - 1);
1667 throw std::runtime_error(exceptionString.c_str());
1671 if((translation ==
NONE) || (myRefPtr == NULL))
1674 if(mySequence.Length() == 0)
1677 if(myIsSequenceBufferValid)
1680 asciiBases[myPackedSequence[index / 2] & 0xF] :
1681 asciiBases[myPackedSequence[index / 2] >> 4]);
1685 String exceptionString =
"SamRecord::getSequence(";
1686 exceptionString += index;
1687 exceptionString +=
") called with no sequence set";
1688 throw std::runtime_error(exceptionString.c_str());
1692 return(mySequence[index]);
1699 if(mySequence.Length() == 0)
1703 setSequenceAndQualityFromBuffer();
1707 if(translation ==
EQUAL)
1711 if(mySeqWithEq.length() == 0)
1716 if(mySequence ==
"*")
1725 myRecordPtr->myPosition,
1733 return(mySeqWithEq[index]);
1740 if(mySeqWithoutEq.length() == 0)
1745 if(mySequence ==
"*")
1748 mySeqWithoutEq =
'*';
1756 myRecordPtr->myPosition,
1764 return(mySeqWithoutEq[index]);
1781 else if((index < 0) || (index >= readLen))
1784 String exceptionString =
"SamRecord::getQuality(";
1785 exceptionString += index;
1786 exceptionString +=
") is out of range. Index must be between 0 and ";
1787 exceptionString += (readLen - 1);
1788 throw std::runtime_error(exceptionString.c_str());
1791 if(myQuality.Length() == 0)
1795 return(myPackedQuality[index] + 33);
1800 if((myQuality.Length() == 1) && (myQuality[0] ==
'*'))
1805 else if(index >= myQuality.Length())
1810 String exceptionString =
"SamRecord::getQuality(";
1811 exceptionString += index;
1812 exceptionString +=
") is out of range. Index must be between 0 and ";
1813 exceptionString += (myQuality.Length() - 1);
1814 throw std::runtime_error(exceptionString.c_str());
1818 return(myQuality[index]);
1830 if(myAlignmentLength == -1)
1835 return(&myCigarRoller);
1845 if(myAlignmentLength == -1)
1857 return(
getFields(recStruct, readName, cigar, sequence, quality,
1858 mySequenceTranslation));
1868 if(myIsBufferSynced ==
false)
1870 if(!fixBuffer(translation))
1920 if(myNeedToSetTagsFromBuffer)
1924 unsigned char * tagStart =
1925 (
unsigned char *)myRecordPtr->myData
1926 + myRecordPtr->myReadNameLength
1927 + myRecordPtr->myCigarLength *
sizeof(
int)
1928 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
1933 uint32_t nonTagSize =
1934 tagStart - (
unsigned char*)&(myRecordPtr->myReferenceID);
1936 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
1941 return(myTagBufferSize);
1953 if(myNeedToSetTagsFromBuffer)
1955 if(!setTagsFromBuffer())
1966 int maxTagIndex = extras.Capacity();
1967 if(myLastTagIndex >= maxTagIndex)
1975 bool tagFound =
false;
1977 while((tagFound ==
false) && (myLastTagIndex < maxTagIndex))
1979 if(extras.SlotInUse(myLastTagIndex))
1982 int key = extras.GetKey(myLastTagIndex);
1984 getTypeFromKey(key, vtype);
1990 *value = getFloatPtr(myLastTagIndex);
1993 *value = getIntegerPtr(myLastTagIndex, vtype);
2002 *value = getStringPtr(myLastTagIndex);
2006 "Unknown tag type");
2024 myLastTagIndex = -1;
2030 if((vtype ==
'c') || (vtype ==
'C') ||
2031 (vtype ==
's') || (vtype ==
'S') ||
2032 (vtype ==
'i') || (vtype ==
'I'))
2062 if((vtype ==
'Z') || (vtype ==
'B'))
2072 const char* currentTagPtr = tags;
2074 returnString.Clear();
2076 if(myNeedToSetTagsFromBuffer)
2078 if(!setTagsFromBuffer())
2086 bool returnStatus =
true;
2088 while(*currentTagPtr !=
'\0')
2093 if((currentTagPtr[0] ==
'\0') || (currentTagPtr[1] ==
'\0') ||
2094 (currentTagPtr[2] !=
':') || (currentTagPtr[3] ==
'\0'))
2097 "getTagsString called with improperly formatted tags.\n");
2098 returnStatus =
false;
2103 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
2106 int offset = extras.Find(key);
2111 if(!returnString.IsEmpty())
2113 returnString += delim;
2115 returnString += currentTagPtr[0];
2116 returnString += currentTagPtr[1];
2117 returnString +=
':';
2118 returnString += currentTagPtr[3];
2119 returnString +=
':';
2123 getTypeFromKey(key, vtype);
2128 returnString += *(
int*)getIntegerPtr(offset, vtype);
2131 returnString += *(
float*)getFloatPtr(offset);
2135 returnString += *(
String*)getStringPtr(offset);
2139 "rmTag called with unknown type.\n");
2140 returnStatus =
false;
2145 if((currentTagPtr[4] ==
';') || (currentTagPtr[4] ==
','))
2150 else if(currentTagPtr[4] !=
'\0')
2154 "rmTags called with improperly formatted tags.\n");
2155 returnStatus =
false;
2164 return(returnStatus);
2171 if(myNeedToSetTagsFromBuffer)
2173 if(!setTagsFromBuffer())
2182 int key = MAKEKEY(tag[0], tag[1],
'Z');
2183 int offset = extras.Find(key);
2189 key = MAKEKEY(tag[0], tag[1],
'B');
2190 offset = extras.Find(key);
2199 value = extras[offset];
2200 return(&(strings[value]));
2209 if(myNeedToSetTagsFromBuffer)
2211 if(!setTagsFromBuffer())
2220 int key = MAKEKEY(tag[0], tag[1],
'i');
2221 int offset = extras.Find(key);
2230 value = extras[offset];
2232 return(&(integers[value]));
2241 if(myNeedToSetTagsFromBuffer)
2243 if(!setTagsFromBuffer())
2252 int key = MAKEKEY(tag[0], tag[1],
'i');
2253 int offset = extras.Find(key);
2262 value = extras[offset];
2264 tagVal = integers[value];
2274 if(myNeedToSetTagsFromBuffer)
2276 if(!setTagsFromBuffer())
2285 int key = MAKEKEY(tag[0], tag[1],
'f');
2286 int offset = extras.Find(key);
2295 value = extras[offset];
2297 tagVal = floats[value];
2307 if(myNeedToSetTagsFromBuffer)
2309 if(!setTagsFromBuffer())
2317 int key = MAKEKEY(tag[0], tag[1],
'Z');
2318 int offset = extras.Find(key);
2324 key = MAKEKEY(tag[0], tag[1],
'B');
2325 offset = extras.Find(key);
2329 return(NOT_FOUND_TAG_STRING);
2332 value = extras[offset];
2334 return strings[value];
2343 if(myNeedToSetTagsFromBuffer)
2345 if(!setTagsFromBuffer())
2353 int key = MAKEKEY(tag[0], tag[1],
'i');
2354 int offset = extras.Find(key);
2360 return NOT_FOUND_TAG_INT;
2363 value = extras[offset];
2365 return integers[value];
2374 if(myNeedToSetTagsFromBuffer)
2376 if(!setTagsFromBuffer())
2384 int key = MAKEKEY(tag[0], tag[1], type);
2386 return (extras.Find(key) != LH_NOTFOUND);
2400 bool SamRecord::allocateRecordStructure(
int size)
2402 if (allocatedSize < size)
2406 if(tmpRecordPtr == NULL)
2409 fprintf(stderr,
"FAILED TO ALLOCATE MEMORY!!!");
2414 myRecordPtr = tmpRecordPtr;
2417 if(myIsSequenceBufferValid)
2419 myPackedSequence = (
unsigned char *)myRecordPtr->myData +
2420 myRecordPtr->myReadNameLength +
2421 myRecordPtr->myCigarLength *
sizeof(
int);
2423 if(myIsQualityBufferValid)
2425 myPackedQuality = (
unsigned char *)myRecordPtr->myData +
2426 myRecordPtr->myReadNameLength +
2427 myRecordPtr->myCigarLength *
sizeof(
int) +
2428 (myRecordPtr->myReadLength + 1) / 2;
2431 allocatedSize = size;
2438 void* SamRecord::getStringPtr(
int index)
2440 int value = extras[index];
2442 return &(strings[value]);
2445 void* SamRecord::getIntegerPtr(
int offset,
char& type)
2447 int value = extras[offset];
2449 type = intType[value];
2451 return &(integers[value]);
2454 void* SamRecord::getFloatPtr(
int offset)
2456 int value = extras[offset];
2458 return &(floats[value]);
2466 if(myIsBufferSynced &&
2467 (myBufferSequenceTranslation == translation))
2478 myRecordPtr->myBin =
2480 myIsBinValid =
true;
2491 uint32_t bamSequenceLen = (newReadLen+1)/2;
2497 ((
unsigned char*)(&(myRecordPtr->myData)) -
2498 (
unsigned char*)myRecordPtr) +
2499 newReadNameLen + ((newCigarLen)*4) +
2500 newReadLen + bamSequenceLen + newTagLen;
2502 if(!allocateRecordStructure(newBufferSize))
2513 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
2514 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
2515 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
2519 if(myIsTagsBufferValid &&
2520 (readNameLenChange | cigarLenChange | readLenChange))
2522 status &= setTagsFromBuffer();
2525 myIsTagsBufferValid =
false;
2531 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
2532 (readNameLenChange | cigarLenChange | readLenChange))
2534 setSequenceAndQualityFromBuffer();
2537 myIsQualityBufferValid =
false;
2538 myIsSequenceBufferValid =
false;
2543 if((myIsCigarBufferValid) &&
2544 (readNameLenChange))
2546 status &= parseCigarBinary();
2547 myIsCigarBufferValid =
false;
2551 if(!myIsReadNameBufferValid)
2553 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
2557 myRecordPtr->myReadNameLength = newReadNameLen;
2558 myIsReadNameBufferValid =
true;
2561 unsigned char * readNameEnds = (
unsigned char*)(&(myRecordPtr->myData)) +
2562 myRecordPtr->myReadNameLength;
2565 unsigned int * packedCigar = (
unsigned int *) (
void *) readNameEnds;
2567 if(!myIsCigarBufferValid)
2571 myRecordPtr->myCigarLength = newCigarLen;
2572 memcpy(packedCigar, myCigarTempBuffer,
2573 myRecordPtr->myCigarLength *
sizeof(uint32_t));
2575 myIsCigarBufferValid =
true;
2578 unsigned char * packedSequence = readNameEnds +
2579 myRecordPtr->myCigarLength *
sizeof(int);
2580 unsigned char * packedQuality = packedSequence + bamSequenceLen;
2582 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
2583 (myBufferSequenceTranslation != translation))
2585 myRecordPtr->myReadLength = newReadLen;
2588 bool noQuality =
false;
2589 if((myQuality.Length() == 1) && (myQuality[0] ==
'*'))
2594 const char* translatedSeq = NULL;
2598 if((!myIsSequenceBufferValid) ||
2599 (translation != myBufferSequenceTranslation))
2604 for (
int i = 0; i < myRecordPtr->myReadLength; i++)
2606 if((!myIsSequenceBufferValid) ||
2607 (translation != myBufferSequenceTranslation))
2611 switch(translatedSeq[i])
2639 "Unknown Sequence character found.");
2648 packedSequence[i/2] |= seqVal;
2653 packedSequence[i/2] = seqVal << 4;
2657 if(!myIsQualityBufferValid)
2660 if((noQuality) || (myQuality.Length() <= i))
2664 packedQuality[i] = 0xFF;
2669 packedQuality[i] = myQuality[i] - 33;
2673 myPackedSequence = (
unsigned char *)myRecordPtr->myData +
2674 myRecordPtr->myReadNameLength +
2675 myRecordPtr->myCigarLength *
sizeof(
int);
2676 myPackedQuality = myPackedSequence +
2677 (myRecordPtr->myReadLength + 1) / 2;
2678 myIsSequenceBufferValid =
true;
2679 myIsQualityBufferValid =
true;
2680 myBufferSequenceTranslation = translation;
2683 if(!myIsTagsBufferValid)
2685 status &= setTagsInBuffer();
2689 myRecordPtr->myReadNameLength = newReadNameLen;
2690 myRecordPtr->myCigarLength = newCigarLen;
2691 myRecordPtr->myReadLength = newReadLen;
2695 myRecordPtr->myBlockSize = newBufferSize -
sizeof(int32_t);
2699 myIsBufferSynced =
true;
2709 void SamRecord::setSequenceAndQualityFromBuffer()
2717 bool extractSequence =
false;
2718 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
2720 extractSequence =
true;
2724 bool extractQuality =
false;
2725 if(myIsQualityBufferValid && (myQuality.Length() == 0))
2727 extractQuality =
true;
2732 if(!extractSequence && !extractQuality)
2740 mySequence.SetLength(myRecordPtr->myReadLength);
2744 myQuality.SetLength(myRecordPtr->myReadLength);
2747 const char * asciiBases =
"=AC.G...T......N";
2751 bool qualitySpecified =
false;
2753 for (
int i = 0; i < myRecordPtr->myReadLength; i++)
2757 mySequence[i] = i & 1 ?
2758 asciiBases[myPackedSequence[i / 2] & 0xF] :
2759 asciiBases[myPackedSequence[i / 2] >> 4];
2764 if(myPackedQuality[i] != 0xFF)
2767 qualitySpecified =
true;
2770 myQuality[i] = myPackedQuality[i] + 33;
2775 if(myRecordPtr->myReadLength == 0)
2786 else if(extractQuality && !qualitySpecified)
2795 bool SamRecord::parseCigar()
2798 if(myCigar.Length() == 0)
2801 return(parseCigarBinary());
2803 return(parseCigarString());
2807 bool SamRecord::parseCigarBinary()
2812 if(myCigar.Length() != 0)
2818 unsigned char * readNameEnds =
2819 (
unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
2821 unsigned int * packedCigar = (
unsigned int *) (
void *) readNameEnds;
2823 myCigarRoller.
Set(packedCigar, myRecordPtr->myCigarLength);
2833 if(myRecordPtr->myCigarLength == 0)
2840 int newBufferSize = myRecordPtr->myCigarLength *
sizeof(uint32_t);
2841 if(newBufferSize > myCigarTempBufferAllocatedSize)
2843 uint32_t* tempBufferPtr =
2844 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2845 if(tempBufferPtr == NULL)
2849 fprintf(stderr,
"FAILED TO ALLOCATE MEMORY!!!");
2851 "Failed to Allocate Memory.");
2854 myCigarTempBuffer = tempBufferPtr;
2855 myCigarTempBufferAllocatedSize = newBufferSize;
2858 memcpy(myCigarTempBuffer, packedCigar,
2859 myRecordPtr->myCigarLength *
sizeof(uint32_t));
2862 myCigarTempBufferLength = myRecordPtr->myCigarLength;
2868 bool SamRecord::parseCigarString()
2870 myCigarTempBufferLength = 0;
2874 myAlignmentLength = 0;
2875 myUnclippedStartOffset = 0;
2876 myUnclippedEndOffset = 0;
2877 myCigarRoller.
clear();
2881 myCigarRoller.
Set(myCigar);
2891 int newBufferSize = myCigar.Length() *
sizeof(uint32_t);
2892 if(newBufferSize > myCigarTempBufferAllocatedSize)
2894 uint32_t* tempBufferPtr =
2895 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2896 if(tempBufferPtr == NULL)
2900 fprintf(stderr,
"FAILED TO ALLOCATE MEMORY!!!");
2902 "Failed to Allocate Memory.");
2905 myCigarTempBuffer = tempBufferPtr;
2906 myCigarTempBufferAllocatedSize = newBufferSize;
2914 const char* cigarEntryStart = myCigar.c_str();
2918 unsigned int * packedCigar = myCigarTempBuffer;
2921 const char* endCigarString = cigarEntryStart + myCigar.Length();
2922 while(cigarEntryStart < endCigarString)
2924 bool validCigarEntry =
true;
2927 opLen = strtol(cigarEntryStart, &cigarOp, 10);
2955 fprintf(stderr,
"ERROR parsing cigar\n");
2956 validCigarEntry =
false;
2959 "Unknown operation found when parsing the Cigar.");
2965 ++myCigarTempBufferLength;
2966 *packedCigar = (opLen << 4) | op;
2970 cigarEntryStart = ++cigarOp;
2978 bool SamRecord::setTagsFromBuffer()
2981 if(myNeedToSetTagsFromBuffer ==
false)
2988 myNeedToSetTagsFromBuffer =
false;
2990 unsigned char * extraPtr = myPackedQuality + myRecordPtr->myReadLength;
2997 while (myRecordPtr->myBlockSize + 4 -
2998 (extraPtr - (
unsigned char *)myRecordPtr) > 0)
3002 void * content = extraPtr + 3;
3003 int tagBufferSize = 0;
3005 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
3008 unsigned int location = extras.Find(key);
3010 String* duplicate = NULL;
3012 if(location != LH_NOTFOUND)
3016 origIndex = extras[location];
3018 *duplicate = (char)(extraPtr[0]);
3019 *duplicate += (char)(extraPtr[1]);
3022 *origTag = *duplicate;
3023 *duplicate += (char)(extraPtr[2]);
3027 switch (extraPtr[2])
3030 if(duplicate != NULL)
3032 *duplicate += (* (
char *) content);
3033 *origTag += intType[origIndex];
3035 appendIntArrayValue(origIndex, *origTag);
3036 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3037 integers[origIndex] = *(
char *)content;
3038 intType[origIndex] = extraPtr[2];
3039 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3043 value = integers.Length();
3044 integers.Push(* (
char *) content);
3045 intType.push_back(extraPtr[2]);
3051 if(duplicate != NULL)
3053 *duplicate += (* (
char *) content);
3054 *origTag += intType[origIndex];
3056 appendIntArrayValue(origIndex, *origTag);
3057 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3058 integers[origIndex] = *(
char *)content;
3059 intType[origIndex] = extraPtr[2];
3060 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3064 value = integers.Length();
3065 integers.Push(* (
char *) content);
3066 intType.push_back(extraPtr[2]);
3072 if(duplicate != NULL)
3074 *duplicate += (* (
unsigned char *) content);
3075 *origTag += intType[origIndex];
3077 appendIntArrayValue(origIndex, *origTag);
3078 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3079 integers[origIndex] = *(
unsigned char *)content;
3080 intType[origIndex] = extraPtr[2];
3081 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3085 value = integers.Length();
3086 integers.Push(* (
unsigned char *) content);
3087 intType.push_back(extraPtr[2]);
3093 if(duplicate != NULL)
3095 *duplicate += (* (
short *) content);
3096 *origTag += intType[origIndex];
3098 appendIntArrayValue(origIndex, *origTag);
3099 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3100 integers[origIndex] = *(
short *)content;
3101 intType[origIndex] = extraPtr[2];
3102 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3106 value = integers.Length();
3107 integers.Push(* (
short *) content);
3108 intType.push_back(extraPtr[2]);
3114 if(duplicate != NULL)
3116 *duplicate += (* (
unsigned short *) content);
3117 *origTag += intType[origIndex];
3119 appendIntArrayValue(origIndex, *origTag);
3120 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3121 integers[origIndex] = *(
unsigned short *)content;
3122 intType[origIndex] = extraPtr[2];
3123 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3127 value = integers.Length();
3128 integers.Push(* (
unsigned short *) content);
3129 intType.push_back(extraPtr[2]);
3135 if(duplicate != NULL)
3137 *duplicate += (* (
int *) content);
3138 *origTag += intType[origIndex];
3140 appendIntArrayValue(origIndex, *origTag);
3141 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3142 integers[origIndex] = *(
int *)content;
3143 intType[origIndex] = extraPtr[2];
3144 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3148 value = integers.Length();
3149 integers.Push(* (
int *) content);
3150 intType.push_back(extraPtr[2]);
3156 if(duplicate != NULL)
3158 *duplicate += (* (
unsigned int *) content);
3159 *origTag += intType[origIndex];
3161 appendIntArrayValue(origIndex, *origTag);
3162 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3163 integers[origIndex] = *(
unsigned int *)content;
3164 intType[origIndex] = extraPtr[2];
3165 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3169 value = integers.Length();
3170 integers.Push((
int) * (
unsigned int *) content);
3171 intType.push_back(extraPtr[2]);
3177 if(duplicate != NULL)
3179 *duplicate += ((
const char *) content);
3182 *origTag += (
char*)(strings[origIndex]);
3183 tagBufferSize -= strings[origIndex].Length();
3184 strings[origIndex] = (
const char *) content;
3185 extraPtr += 4 + strings[origIndex].Length();
3186 tagBufferSize += strings[origIndex].Length();
3190 value = strings.Length();
3191 strings.Push((
const char *) content);
3192 tagBufferSize += 4 + strings.Last().Length();
3193 extraPtr += 4 + strings.Last().Length();
3197 if(duplicate != NULL)
3201 *origTag += (
char*)(strings[origIndex]);
3203 getBtagBufferSize(strings[origIndex]);
3205 getStringFromBtagBuffer((
unsigned char*)content,
3206 strings[origIndex]);
3207 *duplicate += (
char *)(strings[origIndex]);
3208 tagBufferSize += bufferSize;
3209 extraPtr += 3 + bufferSize;
3213 value = strings.Length();
3216 getStringFromBtagBuffer((
unsigned char*)content,
3218 strings.Push(tempBTag);
3219 tagBufferSize += 3 + bufferSize;
3220 extraPtr += 3 + bufferSize;
3224 if(duplicate != NULL)
3226 duplicate->appendFullFloat(* (
float *) content);
3229 origTag->appendFullFloat(floats[origIndex]);
3230 floats[origIndex] = *(
float *)content;
3234 value = floats.size();
3235 floats.push_back(* (
float *) content);
3242 "parsing BAM - Unknown custom field of type %c%c:%c\n",
3243 extraPtr[0], extraPtr[1], extraPtr[2]);
3244 fprintf(stderr,
"BAM Tags: \n");
3246 unsigned char* tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3248 fprintf(stderr,
"\n\n");
3249 tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3250 while(myRecordPtr->myBlockSize + 4 -
3251 (tagInfo - (
unsigned char *)myRecordPtr) > 0)
3253 fprintf(stderr,
"%02x",tagInfo[0]);
3256 fprintf(stderr,
"\n");
3262 "Unknown tag type.");
3266 if(duplicate != NULL)
3271 if(myNumWarns++ < myMaxWarns)
3273 fprintf(stderr,
"WARNING: Duplicate Tags, overwritting %s with %s\n",
3274 origTag->c_str(), duplicate->c_str());
3275 if(myNumWarns == myMaxWarns)
3277 fprintf(stderr,
"Suppressing rest of Duplicate Tag warnings.\n");
3288 extras.Add(key, value);
3289 myTagBufferSize += tagBufferSize;
3296 bool SamRecord::setTagsInBuffer()
3301 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
3302 int newBufferSize = ((
unsigned char*)(&(myRecordPtr->myData)) -
3303 (
unsigned char*)myRecordPtr) +
3304 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
3305 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
3308 if(!allocateRecordStructure(newBufferSize))
3314 char * extraPtr = (
char*)myPackedQuality + myRecordPtr->myReadLength;
3319 if (extras.Entries())
3321 for (
int i = 0; i < extras.Capacity(); i++)
3323 if (extras.SlotInUse(i))
3325 int key = extras.GetKey(i);
3326 getTag(key, extraPtr);
3329 getTypeFromKey(key, vtype);
3333 vtype = getIntegerType(i);
3336 extraPtr[0] = vtype;
3354 *(uint8_t*)extraPtr = (uint8_t)
getInteger(i);
3359 *(int16_t*)extraPtr = (int16_t)
getInteger(i);
3364 *(uint16_t*)extraPtr = (uint16_t)
getInteger(i);
3369 *(int32_t*)extraPtr = (int32_t)
getInteger(i);
3374 *(uint32_t*)extraPtr = (uint32_t)
getInteger(i);
3379 sprintf(extraPtr,
"%s",
getString(i).c_str());
3383 extraPtr += setBtagBuffer(
getString(i), extraPtr);
3389 *(
float*)extraPtr = getFloat(i);
3394 "Unknown tag type.");
3404 if(extraPtr != (
char*)myRecordPtr + newBufferSize)
3406 fprintf(stderr,
"ERROR updating the buffer. Incorrect size.");
3408 "ERROR updating the buffer. Incorrect size.");
3414 myNeedToSetTagsInBuffer =
false;
3415 myIsTagsBufferValid =
true;
3423 void SamRecord::setVariablesForNewBuffer(
SamFileHeader& header)
3429 myMateReferenceName =
3433 myReadName.SetLength(0);
3434 myCigar.SetLength(0);
3435 mySequence.SetLength(0);
3436 mySeqWithEq.clear();
3437 mySeqWithoutEq.clear();
3438 myQuality.SetLength(0);
3439 myNeedToSetTagsFromBuffer =
true;
3440 myNeedToSetTagsInBuffer =
false;
3443 myIsBufferSynced =
true;
3445 myIsReadNameBufferValid =
true;
3446 myIsCigarBufferValid =
true;
3447 myPackedSequence = (
unsigned char *)myRecordPtr->myData +
3448 myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength *
sizeof(
int);
3449 myIsSequenceBufferValid =
true;
3450 myBufferSequenceTranslation =
NONE;
3451 myPackedQuality = myPackedSequence +
3452 (myRecordPtr->myReadLength + 1) / 2;
3453 myIsQualityBufferValid =
true;
3454 myIsTagsBufferValid =
true;
3455 myIsBinValid =
true;
3460 void SamRecord::getTypeFromKey(
int key,
char& type)
const 3463 type = (key >> 16) & 0xFF;
3468 void SamRecord::getTag(
int key,
char* tag)
const 3471 tag[0] = key & 0xFF;
3472 tag[1] = (key >> 8) & 0xFF;
3480 int value = extras[index];
3482 return strings[value];
3487 int value = extras[offset];
3489 return integers[value];
3492 const char & SamRecord::getIntegerType(
int offset)
const 3494 int value = extras[offset];
3496 return intType[value];
3499 float & SamRecord::getFloat(
int offset)
3501 int value = extras[offset];
3503 return floats[value];
3507 void SamRecord::appendIntArrayValue(
char type,
int value,
String& strVal)
const 3512 strVal += (char)value;
3529 int SamRecord::getBtagBufferSize(
String& tagStr)
3531 if(tagStr.Length() < 1)
3535 "SamRecord::getBtagBufferSize no tag subtype specified");
3538 char type = tagStr[0];
3539 int elementSize = getNumericTagTypeSize(type);
3540 if(elementSize <= 0)
3543 String errorMsg =
"SamRecord::getBtagBufferSize invalid tag subtype, ";
3550 int numElements = 0;
3551 int index = tagStr.FastFindChar(
',', 0);
3555 index = tagStr.FastFindChar(
',', index+1);
3558 return(numElements * elementSize + 5);
3562 int SamRecord::setBtagBuffer(
String& tagStr,
char* extraPtr)
3564 if(tagStr.Length() < 1)
3568 "SamRecord::getBtagBufferSize no tag subtype specified");
3571 char type = tagStr[0];
3572 int elementSize = getNumericTagTypeSize(type);
3573 if(elementSize <= 0)
3576 String errorMsg =
"SamRecord::getBtagBufferSize invalid tag subtype, ";
3584 *(
char*)extraPtr = type;
3589 uint32_t numElements = 0;
3590 int index = tagStr.FastFindChar(
',', 0);
3594 index = tagStr.FastFindChar(
',', index+1);
3596 *(uint32_t*)extraPtr = numElements;
3600 const char* stringPtr = tagStr.c_str();
3601 const char* endPtr = stringPtr + tagStr.Length();
3605 char* newPtr = NULL;
3606 while(stringPtr < endPtr)
3611 *(
float*)extraPtr = (
float)(strtod(stringPtr, &newPtr));
3614 *(int8_t*)extraPtr = (int8_t)strtol(stringPtr, &newPtr, 0);
3617 *(int16_t*)extraPtr = (int16_t)strtol(stringPtr, &newPtr, 0);
3620 *(int32_t*)extraPtr = (int32_t)strtol(stringPtr, &newPtr, 0);
3623 *(uint8_t*)extraPtr = (uint8_t)strtoul(stringPtr, &newPtr, 0);
3626 *(uint16_t*)extraPtr = (uint16_t)strtoul(stringPtr, &newPtr, 0);
3629 *(uint32_t*)extraPtr = (uint32_t)strtoul(stringPtr, &newPtr, 0);
3633 "Unknown 'B' tag subtype.");
3636 extraPtr += elementSize;
3637 totalInc += elementSize;
3638 stringPtr = newPtr + 1;
3644 int SamRecord::getStringFromBtagBuffer(
unsigned char* buffer,
3652 char type = *buffer;
3658 unsigned int numEntries = *(
unsigned int *)buffer;
3663 int subtypeSize = getNumericTagTypeSize(type);
3665 for(
unsigned int i = 0; i < numEntries; i++)
3671 tagStr.appendFullFloat(*(
float *)buffer);
3674 tagStr += *(int8_t *)buffer;
3677 tagStr += *(int16_t *)buffer;
3680 tagStr += *(int32_t *)buffer;
3683 tagStr += *(uint8_t *)buffer;
3686 tagStr += *(uint16_t *)buffer;
3689 tagStr += *(uint32_t *)buffer;
3693 "Unknown 'B' tag subtype.");
3696 buffer += subtypeSize;
3697 bufferSize += subtypeSize;
static bool isMatchOrMismatch(Operation op)
Return true if the specified operation is a match/mismatch operation, false if not.
static const char UNKNOWN_QUALITY_CHAR
Character used when the quality is unknown.
void Set(const char *cigarString)
Sets this object to the specified cigarString.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
bool shiftIndelsLeft()
Shift the indels (if any) to the left by updating the CIGAR.
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
void resetRecord()
Reset the fields of the record to a default value.
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
int getNumEndClips() const
Return the number of clips that are at the end of the cigar.
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
This class is used to track the status results of some methods in the BAM classes.
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
Translate '=' to the actual base.
bool getFloatTag(const char *tag, float &tagVal)
Get the float value for the specified tag.
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
int & getInteger(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool...
method failed due to an I/O issue.
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
static bool isStringType(char vtype)
Returns whether or not the specified vtype is a string type.
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
Status getStatus() const
Return the enum for this status object.
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...
method completed successfully.
failed to parse a record/header - invalid format.
uint16_t getBin()
Get the BAM bin for the record.
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value...
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
int size() const
Return the number of cigar operations.
int getNumBeginClips() const
Return the number of clips that are at the beginning of the cigar.
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
void clearTags()
Clear the tags in this record.
static bool isCharType(char vtype)
Returns whether or not the specified vtype is a char type.
uint32_t getNumOverlaps(int32_t start, int32_t end, int32_t queryStartPos)
Return the number of bases that overlap the reference and the read associated with this cigar that fa...
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
invalid other than for sorting.
int32_t getReadLength()
Get the length of the read.
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
int * getIntegerTag(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure...
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
static bool isFloatType(char vtype)
Returns whether or not the specified vtype is a float type.
void clear()
Clear this object so that it has no Cigar Operations.
Structure of a BAM record.
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
void getCigarString(String &cigarString) const
Set the passed in String to the string reprentation of the Cigar operations in this object...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that)...
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
bool rmTags(const char *tags)
Remove tags.
Leave the sequence as is.
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
HandlingType
This specifies how this class should respond to errors.
bool getTagsString(const char *tags, String &returnString, char delim='\t')
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE...
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.
fail a memory allocation.
int getExpectedReferenceBaseCount() const
Return the number of bases in the reference that this CIGAR "spans".
bool isValid(SamFileHeader &header)
Returns whether or not the record is valid, setting the status to indicate success or failure...
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
static bool isIntegerType(char vtype)
Returns whether or not the specified vtype is an integer type.
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value, specify a negative value to decrement.
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
const String & getString(const char *tag)
Get the string value for the specified tag.
int32_t getBlockSize()
Get the block size of the record (BAM format).
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
SamStatus::Status setBuffer(const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
SamRecord()
Default Constructor.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations...
The SamValidationErrors class is a container class that holds SamValidationError Objects, allowing a validation method to return all of the invalid errors rather than just one.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
Translate bases that match the reference to '='.
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
uint16_t getFlag()
Get the flag (FLAG).
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN)...
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
bool setReadName(const char *readName)
Set QNAME to the passed in name.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Status
Return value enum for StatGenFile methods.
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...
bool Remove(int index)
Remove the operation at the specified index.
const char * getCigar()
Returns the SAM formatted CIGAR string.
bool rmTag(const char *tag, char type)
Remove a tag.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).