libStatGen Software 1
Loading...
Searching...
No Matches
SamRecord.cpp
1/*
2 * Copyright (C) 2010-2012 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <stdlib.h>
19#include <limits>
20#include <stdexcept>
21
22#include "bam.h"
23
24#include "SamRecord.h"
25#include "SamValidation.h"
26
27#include "BaseUtilities.h"
28#include "SamQuerySeqWithRefHelper.h"
29
30const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
31const char* SamRecord::FIELD_ABSENT_STRING = "=";
32int SamRecord::myNumWarns = 0;
33
35 : myStatus(),
36 myRefPtr(NULL),
37 mySequenceTranslation(NONE)
38{
39 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
40
41 myRecordPtr =
42 (bamRecordStruct *) malloc(defaultAllocSize);
43
44 myCigarTempBuffer = NULL;
45 myCigarTempBufferAllocatedSize = 0;
46
47 allocatedSize = defaultAllocSize;
48
50}
51
52
54 : myStatus(errorHandlingType),
55 myRefPtr(NULL),
56 mySequenceTranslation(NONE)
57{
58 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
59
60 myRecordPtr =
61 (bamRecordStruct *) malloc(defaultAllocSize);
62
63 myCigarTempBuffer = NULL;
64 myCigarTempBufferAllocatedSize = 0;
65
66 allocatedSize = defaultAllocSize;
67
69}
70
71
73{
75
76 if(myRecordPtr != NULL)
77 {
78 free(myRecordPtr);
79 myRecordPtr = NULL;
80 }
81 if(myCigarTempBuffer != NULL)
82 {
83 free(myCigarTempBuffer);
84 myCigarTempBuffer = NULL;
85 myCigarTempBufferAllocatedSize = 0;
86 }
87}
88
89
90// Resets the fields of the record to a default value.
92{
93 myIsBufferSynced = true;
94
95 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
96 myRecordPtr->myReferenceID = -1;
97 myRecordPtr->myPosition = -1;
98 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
99 myRecordPtr->myMapQuality = 0;
100 myRecordPtr->myBin = DEFAULT_BIN;
101 myRecordPtr->myCigarLength = 0;
102 myRecordPtr->myFlag = 0;
103 myRecordPtr->myReadLength = 0;
104 myRecordPtr->myMateReferenceID = -1;
105 myRecordPtr->myMatePosition = -1;
106 myRecordPtr->myInsertSize = 0;
107
108 // Set the sam values for the variable length fields.
109 // TODO - one way to speed this up might be to not set to "*" and just
110 // clear them, and write out a '*' for SAM if it is empty.
111 myReadName = DEFAULT_READ_NAME;
112 myReferenceName = "*";
113 myMateReferenceName = "*";
114 myCigar = "*";
115 mySequence = "*";
116 mySeqWithEq.clear();
117 mySeqWithoutEq.clear();
118 myQuality = "*";
119 myNeedToSetTagsFromBuffer = false;
120 myNeedToSetTagsInBuffer = false;
121
122 // Initialize the calculated alignment info to the uncalculated value.
123 myAlignmentLength = -1;
124 myUnclippedStartOffset = -1;
125 myUnclippedEndOffset = -1;
126
127 clearTags();
128
129 // Set the bam values for the variable length fields.
130 // Only the read name needs to be set, the others are a length of 0.
131 // Set the read name. The min size of myRecordPtr includes the size for
132 // the default read name.
133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
134 myRecordPtr->myReadNameLength);
135
136 // Set that the variable length buffer fields are valid.
137 myIsReadNameBufferValid = true;
138 myIsCigarBufferValid = true;
139 myPackedSequence =
140 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
141 myRecordPtr->myCigarLength * sizeof(int);
142 myIsSequenceBufferValid = true;
143 myBufferSequenceTranslation = NONE;
144
145 myPackedQuality = myPackedSequence;
146 myIsQualityBufferValid = true;
147 myIsTagsBufferValid = true;
148 myIsBinValid = true;
149
150 myCigarTempBufferLength = -1;
151
152 myStatus = SamStatus::SUCCESS;
153
154 NOT_FOUND_TAG_STRING = "";
155 NOT_FOUND_TAG_INT = -1; // TODO - deprecate
156}
157
158
159// Returns whether or not the record is valid.
160// Header is needed to perform some validation against it.
162{
163 myStatus = SamStatus::SUCCESS;
164 SamValidationErrors invalidSamErrors;
165 if(!SamValidator::isValid(header, *this, invalidSamErrors))
166 {
167 // The record is not valid.
168 std::string errorMessage = "";
169 invalidSamErrors.getErrorString(errorMessage);
170 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
171 return(false);
172 }
173 // The record is valid.
174 return(true);
175}
176
177
179{
180 myRefPtr = reference;
181}
182
183
184// Set the type of sequence translation to use when getting
185// the sequence. The default type (if this method is never called) is
186// NONE (the sequence is left as-is). This is used
188{
189 mySequenceTranslation = translation;
190}
191
192
193bool SamRecord::setReadName(const char* readName)
194{
195 myReadName = readName;
196 myIsBufferSynced = false;
197 myIsReadNameBufferValid = false;
198 myStatus = SamStatus::SUCCESS;
199
200 // The read name must at least have some length, otherwise this is a parsing
201 // error.
202 if(myReadName.Length() == 0)
203 {
204 // Invalid - reset ReadName return false.
205 myReadName = DEFAULT_READ_NAME;
206 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
207 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
208 return(false);
209 }
210
211 return true;
212}
213
214
215bool SamRecord::setFlag(uint16_t flag)
216{
217 myStatus = SamStatus::SUCCESS;
218 myRecordPtr->myFlag = flag;
219 return true;
220}
221
222
224 const char* referenceName)
225{
226 myStatus = SamStatus::SUCCESS;
227
228 myReferenceName = referenceName;
229 // If the reference ID does not already exist, add it (pass true)
230 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
231
232 return true;
233}
234
235
236bool SamRecord::set1BasedPosition(int32_t position)
237{
238 return(set0BasedPosition(position - 1));
239}
240
241
242bool SamRecord::set0BasedPosition(int32_t position)
243{
244 myStatus = SamStatus::SUCCESS;
245 myRecordPtr->myPosition = position;
246 myIsBinValid = false;
247 return true;
248}
249
250
251bool SamRecord::setMapQuality(uint8_t mapQuality)
252{
253 myStatus = SamStatus::SUCCESS;
254 myRecordPtr->myMapQuality = mapQuality;
255 return true;
256}
257
258
259bool SamRecord::setCigar(const char* cigar)
260{
261 myStatus = SamStatus::SUCCESS;
262 myCigar = cigar;
263
264 myIsBufferSynced = false;
265 myIsCigarBufferValid = false;
266 myCigarTempBufferLength = -1;
267 myIsBinValid = false;
268
269 // Initialize the calculated alignment info to the uncalculated value.
270 myAlignmentLength = -1;
271 myUnclippedStartOffset = -1;
272 myUnclippedEndOffset = -1;
273
274 return true;
275}
276
277
278bool SamRecord::setCigar(const Cigar& cigar)
279{
280 myStatus = SamStatus::SUCCESS;
281 cigar.getCigarString(myCigar);
282
283 myIsBufferSynced = false;
284 myIsCigarBufferValid = false;
285 myCigarTempBufferLength = -1;
286 myIsBinValid = false;
287
288 // Initialize the calculated alignment info to the uncalculated value.
289 myAlignmentLength = -1;
290 myUnclippedStartOffset = -1;
291 myUnclippedEndOffset = -1;
292
293 return true;
294}
295
296
298 const char* mateReferenceName)
299{
300 myStatus = SamStatus::SUCCESS;
301 // Set the mate reference, if it is "=", set it to be equal
302 // to myReferenceName. This assumes that myReferenceName has already
303 // been called.
304 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
305 {
306 myMateReferenceName = myReferenceName;
307 }
308 else
309 {
310 myMateReferenceName = mateReferenceName;
311 }
312
313 // Set the Mate Reference ID.
314 // If the reference ID does not already exist, add it (pass true)
315 myRecordPtr->myMateReferenceID =
316 header.getReferenceID(myMateReferenceName, true);
317
318 return true;
319}
320
321
322bool SamRecord::set1BasedMatePosition(int32_t matePosition)
323{
324 return(set0BasedMatePosition(matePosition - 1));
325}
326
327
328bool SamRecord::set0BasedMatePosition(int32_t matePosition)
329{
330 myStatus = SamStatus::SUCCESS;
331 myRecordPtr->myMatePosition = matePosition;
332 return true;
333}
334
335
336bool SamRecord::setInsertSize(int32_t insertSize)
337{
338 myStatus = SamStatus::SUCCESS;
339 myRecordPtr->myInsertSize = insertSize;
340 return true;
341}
342
343
344bool SamRecord::setSequence(const char* seq)
345{
346 myStatus = SamStatus::SUCCESS;
347 mySequence = seq;
348 mySeqWithEq.clear();
349 mySeqWithoutEq.clear();
350
351 myIsBufferSynced = false;
352 myIsSequenceBufferValid = false;
353 return true;
354}
355
356
357bool SamRecord::setQuality(const char* quality)
358{
359 myStatus = SamStatus::SUCCESS;
360 myQuality = quality;
361 myIsBufferSynced = false;
362 myIsQualityBufferValid = false;
363 return true;
364}
365
366
367//Shift indels to the left
369{
370 // Check to see whether or not the Cigar has already been
371 // set - this is determined by checking if alignment length
372 // is set since alignment length and the cigar are set
373 // at the same time.
374 if(myAlignmentLength == -1)
375 {
376 // Not been set, so calculate it.
377 parseCigar();
378 }
379
380 // Track whether or not there was a shift.
381 bool shifted = false;
382
383 // Cigar is set, so now myCigarRoller can be used.
384 // Track where in the read we are.
385 uint32_t currentPos = 0;
386
387 // Since the loop starts at 1 because the first operation can't be shifted,
388 // increment the currentPos past the first operation.
389 if(Cigar::foundInQuery(myCigarRoller[0]))
390 {
391 // This op was found in the read, increment the current position.
392 currentPos += myCigarRoller[0].count;
393 }
394
395 int numOps = myCigarRoller.size();
396
397 // Loop through the cigar operations from the 2nd operation since
398 // the first operation is already on the end and can't shift.
399 for(int currentOp = 1; currentOp < numOps; currentOp++)
400 {
401 if(myCigarRoller[currentOp].operation == Cigar::insert)
402 {
403 // For now, only shift a max of 1 operation.
404 int prevOpIndex = currentOp-1;
405 // Track the next op for seeing if it is the same as the
406 // previous for merging reasons.
407 int nextOpIndex = currentOp+1;
408 if(nextOpIndex == numOps)
409 {
410 // There is no next op, so set it equal to the current one.
411 nextOpIndex = currentOp;
412 }
413 // The start of the previous operation, so we know when we hit it
414 // so we don't shift past it.
415 uint32_t prevOpStart =
416 currentPos - myCigarRoller[prevOpIndex].count;
417
418 // We can only shift if the previous operation
419 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
420 {
421 // TODO - shift past pads
422 // An insert is in the read, so increment the position.
423 currentPos += myCigarRoller[currentOp].count;
424 // Not a match/mismatch, so can't shift into it.
425 continue;
426 }
427
428 // It is a match or mismatch, so check to see if we can
429 // shift into it.
430
431 // The end of the insert is calculated by adding the size
432 // of this insert minus 1 to the start of the insert.
433 uint32_t insertEndPos =
434 currentPos + myCigarRoller[currentOp].count - 1;
435
436 // The insert starts at the current position.
437 uint32_t insertStartPos = currentPos;
438
439 // Loop as long as the position before the insert start
440 // matches the last character in the insert. If they match,
441 // the insert can be shifted one index left because the
442 // implied reference will not change. If they do not match,
443 // we can't shift because the implied reference would change.
444 // Stop loop when insertStartPos = prevOpStart, because we
445 // don't want to move past that.
446 while((insertStartPos > prevOpStart) &&
447 (getSequence(insertEndPos,BASES) ==
448 getSequence(insertStartPos - 1, BASES)))
449 {
450 // We can shift, so move the insert start & end one left.
451 --insertEndPos;
452 --insertStartPos;
453 }
454
455 // Determine if a shift has occurred.
456 int shiftLen = currentPos - insertStartPos;
457 if(shiftLen > 0)
458 {
459 // Shift occured, so adjust the cigar if the cigar will
460 // not become more operations.
461 // If the next operation is the same as the previous or
462 // if the insert and the previous operation switch positions
463 // then the cigar has the same number of operations.
464 // If the next operation is different, and the shift splits
465 // the previous operation in 2, then the cigar would
466 // become longer, so we do not want to shift.
467 if(myCigarRoller[nextOpIndex].operation ==
468 myCigarRoller[prevOpIndex].operation)
469 {
470 // The operations are the same, so merge them by adding
471 // the length of the shift to the next operation.
472 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
473 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
474
475 // If the previous op length is 0, just remove that
476 // operation.
477 if(myCigarRoller[prevOpIndex].count == 0)
478 {
479 myCigarRoller.Remove(prevOpIndex);
480 }
481 shifted = true;
482 }
483 else
484 {
485 // Can only shift if the insert shifts past the
486 // entire previous operation, otherwise an operation
487 // would need to be added.
488 if(insertStartPos == prevOpStart)
489 {
490 // Swap the positions of the insert and the
491 // previous operation.
492 myCigarRoller.Update(currentOp,
493 myCigarRoller[prevOpIndex].operation,
494 myCigarRoller[prevOpIndex].count);
495 // Size of the previous op is the entire
496 // shift length.
497 myCigarRoller.Update(prevOpIndex,
499 shiftLen);
500 shifted = true;
501 }
502 }
503 }
504 // An insert is in the read, so increment the position.
505 currentPos += myCigarRoller[currentOp].count;
506 }
507 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
508 {
509 // This op was found in the read, increment the current position.
510 currentPos += myCigarRoller[currentOp].count;
511 }
512 }
513 if(shifted)
514 {
515 // TODO - setCigar is currently inefficient because later the cigar
516 // roller will be recalculated, but for now it will work.
517 setCigar(myCigarRoller);
518 }
519 return(shifted);
520}
521
522
523// Set the BAM record from the passeed in buffer of the specified size.
524// Note: The size includes the block size.
526 uint32_t fromBufferSize,
527 SamFileHeader& header)
528{
529 myStatus = SamStatus::SUCCESS;
530 if((fromBuffer == NULL) || (fromBufferSize == 0))
531 {
532 // Buffer is empty.
533 myStatus.setStatus(SamStatus::FAIL_PARSE,
534 "Cannot parse an empty file.");
535 return(SamStatus::FAIL_PARSE);
536 }
537
538 // Clear the record.
539 resetRecord();
540
541 // allocate space for the record size.
542 if(!allocateRecordStructure(fromBufferSize))
543 {
544 // Failed to allocate space.
545 return(SamStatus::FAIL_MEM);
546 }
547
548 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
549
550 setVariablesForNewBuffer(header);
551
552 // Return the status of the record.
553 return(SamStatus::SUCCESS);
554}
555
556
557// Read the BAM record from a file.
559 SamFileHeader& header)
560{
561 myStatus = SamStatus::SUCCESS;
562 if((filePtr == NULL) || (filePtr->isOpen() == false))
563 {
564 // File is not open, return failure.
565 myStatus.setStatus(SamStatus::FAIL_ORDER,
566 "Can't read from an unopened file.");
567 return(SamStatus::FAIL_ORDER);
568 }
569
570 // Clear the record.
571 resetRecord();
572
573 // read the record size.
574 int numBytes =
575 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
576
577 // Check to see if the end of the file was hit and no bytes were read.
578 if(ifeof(filePtr) && (numBytes == 0))
579 {
580 // End of file, nothing was read, no more records.
581 std::string statusMsg = "No more records left to read, ";
582 statusMsg += filePtr->getFileName();
583 statusMsg += ".";
584 myStatus.setStatus(SamStatus::NO_MORE_RECS,
585 statusMsg.c_str());
587 }
588
589 if(numBytes != sizeof(int32_t))
590 {
591 // Failed to read the entire block size. Either the end of the file
592 // was reached early or there was an error.
593 if(ifeof(filePtr))
594 {
595 // Error: end of the file reached prior to reading the rest of the
596 // record.
597 std::string statusMsg = "EOF reached in the middle of a record, ";
598 statusMsg += filePtr->getFileName();
599 statusMsg += ".";
600 myStatus.setStatus(SamStatus::FAIL_PARSE,
601 statusMsg.c_str());
602 return(SamStatus::FAIL_PARSE);
603 }
604 else
605 {
606 // Error reading.
607 std::string statusMsg = "Failed to read the record size, ";
608 statusMsg += filePtr->getFileName();
609 statusMsg += ".";
610 myStatus.setStatus(SamStatus::FAIL_IO,
611 statusMsg.c_str());
612 return(SamStatus::FAIL_IO);
613 }
614 }
615
616 // allocate space for the record size.
617 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
618 {
619 // Failed to allocate space.
620 // Status is set by allocateRecordStructure.
621 return(SamStatus::FAIL_MEM);
622 }
623
624 // Read the rest of the alignment block, starting at the reference id.
625 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
626 != (unsigned int)myRecordPtr->myBlockSize)
627 {
628 // Error reading the record. Reset it and return failure.
629 resetRecord();
630 std::string statusMsg = "Failed to read the record, ";
631 statusMsg += filePtr->getFileName();
632 statusMsg += ".";
633 myStatus.setStatus(SamStatus::FAIL_IO,
634 statusMsg.c_str());
635 return(SamStatus::FAIL_IO);
636 }
637
638 setVariablesForNewBuffer(header);
639
640 // Return the status of the record.
641 return(SamStatus::SUCCESS);
642}
643
644
645// Add the specified tag to the record.
646// Returns true if the tag was successfully added, false otherwise.
647bool SamRecord::addIntTag(const char* tag, int32_t value)
648{
649 myStatus = SamStatus::SUCCESS;
650 int key = 0;
651 int index = 0;
652 char bamvtype;
653
654 int tagBufferSize = 0;
655
656 // First check to see if the tags need to be synced to the buffer.
657 if(myNeedToSetTagsFromBuffer)
658 {
659 if(!setTagsFromBuffer())
660 {
661 // Failed to read tags from the buffer, so cannot add new ones.
662 return(false);
663 }
664 }
665
666 // Ints come in as int. But it can be represented in fewer bits.
667 // So determine a more specific type that is in line with the
668 // types for BAM files.
669 // First check to see if it is a negative.
670 if(value < 0)
671 {
672 // The int is negative, so it will need to use a signed type.
673 // See if it is greater than the min value for a char.
674 if(value > ((std::numeric_limits<char>::min)()))
675 {
676 // It can be stored in a signed char.
677 bamvtype = 'c';
678 tagBufferSize += 4;
679 }
680 else if(value > ((std::numeric_limits<short>::min)()))
681 {
682 // It fits in a signed short.
683 bamvtype = 's';
684 tagBufferSize += 5;
685 }
686 else
687 {
688 // Just store it as a signed int.
689 bamvtype = 'i';
690 tagBufferSize += 7;
691 }
692 }
693 else
694 {
695 // It is positive, so an unsigned type can be used.
696 if(value < ((std::numeric_limits<unsigned char>::max)()))
697 {
698 // It is under the max of an unsigned char.
699 bamvtype = 'C';
700 tagBufferSize += 4;
701 }
702 else if(value < ((std::numeric_limits<unsigned short>::max)()))
703 {
704 // It is under the max of an unsigned short.
705 bamvtype = 'S';
706 tagBufferSize += 5;
707 }
708 else
709 {
710 // Just store it as an unsigned int.
711 bamvtype = 'I';
712 tagBufferSize += 7;
713 }
714 }
715
716 // Check to see if the tag is already there.
717 key = MAKEKEY(tag[0], tag[1], bamvtype);
718 unsigned int hashIndex = extras.Find(key);
719 if(hashIndex != LH_NOTFOUND)
720 {
721 // Tag was already found.
722 index = extras[hashIndex];
723
724 // Since the tagBufferSize was already updated with the new value,
725 // subtract the size for the previous tag (even if they are the same).
726 switch(intType[index])
727 {
728 case 'c':
729 case 'C':
730 case 'A':
731 tagBufferSize -= 4;
732 break;
733 case 's':
734 case 'S':
735 tagBufferSize -= 5;
736 break;
737 case 'i':
738 case 'I':
739 tagBufferSize -= 7;
740 break;
741 default:
742 myStatus.setStatus(SamStatus::INVALID,
743 "unknown tag inttype type found.\n");
744 return(false);
745 }
746
747 // Tag already existed, print message about overwriting.
748 // WARN about dropping duplicate tags.
749 if(myNumWarns++ < myMaxWarns)
750 {
751 String newVal;
752 String origVal;
753 appendIntArrayValue(index, origVal);
754 appendIntArrayValue(bamvtype, value, newVal);
755 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
756 tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
757 if(myNumWarns == myMaxWarns)
758 {
759 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
760 }
761 }
762
763 // Update the integer value and type.
764 integers[index] = value;
765 intType[index] = bamvtype;
766 }
767 else
768 {
769 // Tag is not already there, so add it.
770 index = integers.Length();
771
772 integers.Push(value);
773 intType.push_back(bamvtype);
774
775 extras.Add(key, index);
776 }
777
778 // The buffer tags are now out of sync.
779 myNeedToSetTagsInBuffer = true;
780 myIsTagsBufferValid = false;
781 myIsBufferSynced = false;
782 myTagBufferSize += tagBufferSize;
783
784 return(true);
785}
786
787
788// Add the specified tag to the record, replacing it if it is already there and
789// is different from the previous value.
790// Returns true if the tag was successfully added (or was already there), false otherwise.
791bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
792{
793 if(vtype == 'i')
794 {
795 // integer type. Call addIntTag to handle it.
796 int intVal = atoi(valuePtr);
797 return(addIntTag(tag, intVal));
798 }
799
800 // Non-int type.
801 myStatus = SamStatus::SUCCESS;
802 bool status = true; // default to successful.
803 int key = 0;
804 int index = 0;
805
806 int tagBufferSize = 0;
807
808 // First check to see if the tags need to be synced to the buffer.
809 if(myNeedToSetTagsFromBuffer)
810 {
811 if(!setTagsFromBuffer())
812 {
813 // Failed to read tags from the buffer, so cannot add new ones.
814 return(false);
815 }
816 }
817
818 // First check to see if the tag is already there.
819 key = MAKEKEY(tag[0], tag[1], vtype);
820 unsigned int hashIndex = extras.Find(key);
821 if(hashIndex != LH_NOTFOUND)
822 {
823 // The key was found in the hash, so get the lookup index.
824 index = extras[hashIndex];
825
826 String origTag;
827 char origType = vtype;
828
829 // Adjust the currently pointed to value to the new setting.
830 switch (vtype)
831 {
832 case 'A' :
833 // First check to see if the value changed.
834 if((integers[index] == (const int)*(valuePtr)) &&
835 (intType[index] == vtype))
836 {
837 // The value & type has not changed, so do nothing.
838 return(true);
839 }
840 else
841 {
842 // Tag buffer size changes if type changes, so subtract & add.
843 origType = intType[index];
844 appendIntArrayValue(index, origTag);
845 tagBufferSize -= getNumericTagTypeSize(intType[index]);
846 tagBufferSize += getNumericTagTypeSize(vtype);
847 integers[index] = (const int)*(valuePtr);
848 intType[index] = vtype;
849 }
850 break;
851 case 'Z' :
852 // First check to see if the value changed.
853 if(strings[index] == valuePtr)
854 {
855 // The value has not changed, so do nothing.
856 return(true);
857 }
858 else
859 {
860 // Adjust the tagBufferSize by removing the size of the old string.
861 origTag = strings[index];
862 tagBufferSize -= strings[index].Length();
863 strings[index] = valuePtr;
864 // Adjust the tagBufferSize by adding the size of the new string.
865 tagBufferSize += strings[index].Length();
866 }
867 break;
868 case 'B' :
869 // First check to see if the value changed.
870 if(strings[index] == valuePtr)
871 {
872 // The value has not changed, so do nothing.
873 return(true);
874 }
875 else
876 {
877 // Adjust the tagBufferSize by removing the size of the old field.
878 origTag = strings[index];
879 tagBufferSize -= getBtagBufferSize(strings[index]);
880 strings[index] = valuePtr;
881 // Adjust the tagBufferSize by adding the size of the new field.
882 tagBufferSize += getBtagBufferSize(strings[index]);
883 }
884 break;
885 case 'f' :
886 // First check to see if the value changed.
887 if(floats[index] == (float)atof(valuePtr))
888 {
889 // The value has not changed, so do nothing.
890 return(true);
891 }
892 else
893 {
894 // Tag buffer size doesn't change between different 'f' entries.
895 origTag.appendFullFloat(floats[index]);
896 floats[index] = (float)atof(valuePtr);
897 }
898 break;
899 default :
900 fprintf(stderr,
901 "samRecord::addTag() - Unknown custom field of type %c\n",
902 vtype);
903 myStatus.setStatus(SamStatus::FAIL_PARSE,
904 "Unknown custom field in a tag");
905 status = false;
906 break;
907 }
908
909 // Duplicate tag in this record.
910 // Tag already existed, print message about overwriting.
911 // WARN about dropping duplicate tags.
912 if(myNumWarns++ < myMaxWarns)
913 {
914 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
915 tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
916 if(myNumWarns == myMaxWarns)
917 {
918 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
919 }
920 }
921 }
922 else
923 {
924 // The key was not found in the hash, so add it.
925 switch (vtype)
926 {
927 case 'A' :
928 index = integers.Length();
929 integers.Push((const int)*(valuePtr));
930 intType.push_back(vtype);
931 tagBufferSize += 4;
932 break;
933 case 'Z' :
934 index = strings.Length();
935 strings.Push(valuePtr);
936 tagBufferSize += 4 + strings.Last().Length();
937 break;
938 case 'B' :
939 index = strings.Length();
940 strings.Push(valuePtr);
941 tagBufferSize += 3 + getBtagBufferSize(strings[index]);
942 break;
943 case 'f' :
944 index = floats.size();
945 floats.push_back((float)atof(valuePtr));
946 tagBufferSize += 7;
947 break;
948 default :
949 fprintf(stderr,
950 "samRecord::addTag() - Unknown custom field of type %c\n",
951 vtype);
952 myStatus.setStatus(SamStatus::FAIL_PARSE,
953 "Unknown custom field in a tag");
954 status = false;
955 break;
956 }
957 if(status)
958 {
959 // If successful, add the key to extras.
960 extras.Add(key, index);
961 }
962 }
963
964 // Only add the tag if it has so far been successfully processed.
965 if(status)
966 {
967 // The buffer tags are now out of sync.
968 myNeedToSetTagsInBuffer = true;
969 myIsTagsBufferValid = false;
970 myIsBufferSynced = false;
971 myTagBufferSize += tagBufferSize;
972 }
973 return(status);
974}
975
976
978{
979 if(extras.Entries() != 0)
980 {
981 extras.Clear();
982 }
983 strings.Clear();
984 integers.Clear();
985 intType.clear();
986 floats.clear();
987 myTagBufferSize = 0;
988 resetTagIter();
989}
990
991
992bool SamRecord::rmTag(const char* tag, char type)
993{
994 // Check the length of tag.
995 if(strlen(tag) != 2)
996 {
997 // Tag is the wrong length.
998 myStatus.setStatus(SamStatus::INVALID,
999 "rmTag called with tag that is not 2 characters\n");
1000 return(false);
1001 }
1002
1003 myStatus = SamStatus::SUCCESS;
1004 if(myNeedToSetTagsFromBuffer)
1005 {
1006 if(!setTagsFromBuffer())
1007 {
1008 // Failed to read the tags from the buffer, so cannot
1009 // get tags.
1010 return(false);
1011 }
1012 }
1013
1014 // Construct the key.
1015 int key = MAKEKEY(tag[0], tag[1], type);
1016 // Look to see if the key exsists in the hash.
1017 int offset = extras.Find(key);
1018
1019 if(offset < 0)
1020 {
1021 // Not found, so return true, successfully removed since
1022 // it is not in tag.
1023 return(true);
1024 }
1025
1026 // Offset is set, so the key was found.
1027 // First if it is an integer, determine the actual type of the int.
1028 char vtype;
1029 getTypeFromKey(key, vtype);
1030 if(vtype == 'i')
1031 {
1032 vtype = getIntegerType(offset);
1033 }
1034
1035 // Offset is set, so recalculate the buffer size without this entry.
1036 // Do NOT remove from strings, integers, or floats because then
1037 // extras would need to be updated for all entries with the new indexes
1038 // into those variables.
1039 int rmBuffSize = 0;
1040 switch(vtype)
1041 {
1042 case 'A':
1043 case 'c':
1044 case 'C':
1045 rmBuffSize = 4;
1046 break;
1047 case 's':
1048 case 'S':
1049 rmBuffSize = 5;
1050 break;
1051 case 'i':
1052 case 'I':
1053 rmBuffSize = 7;
1054 break;
1055 case 'f':
1056 rmBuffSize = 7;
1057 break;
1058 case 'Z':
1059 rmBuffSize = 4 + getString(offset).Length();
1060 break;
1061 case 'B':
1062 rmBuffSize = 3 + getBtagBufferSize(getString(offset));
1063 break;
1064 default:
1065 myStatus.setStatus(SamStatus::INVALID,
1066 "rmTag called with unknown type.\n");
1067 return(false);
1068 break;
1069 };
1070
1071 // The buffer tags are now out of sync.
1072 myNeedToSetTagsInBuffer = true;
1073 myIsTagsBufferValid = false;
1074 myIsBufferSynced = false;
1075 myTagBufferSize -= rmBuffSize;
1076
1077 // Remove from the hash.
1078 extras.Delete(offset);
1079 return(true);
1080}
1081
1082
1083bool SamRecord::rmTags(const char* tags)
1084{
1085 const char* currentTagPtr = tags;
1086
1087 myStatus = SamStatus::SUCCESS;
1088 if(myNeedToSetTagsFromBuffer)
1089 {
1090 if(!setTagsFromBuffer())
1091 {
1092 // Failed to read the tags from the buffer, so cannot
1093 // get tags.
1094 return(false);
1095 }
1096 }
1097
1098 bool returnStatus = true;
1099
1100 int rmBuffSize = 0;
1101 while(*currentTagPtr != '\0')
1102 {
1103
1104 // Tags are formatted as: XY:Z
1105 // Where X is [A-Za-z], Y is [A-Za-z], and
1106 // Z is A,i,f,Z,H (cCsSI are also excepted)
1107 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
1108 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
1109 {
1110 myStatus.setStatus(SamStatus::INVALID,
1111 "rmTags called with improperly formatted tags.\n");
1112 returnStatus = false;
1113 break;
1114 }
1115
1116 // Construct the key.
1117 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
1118 currentTagPtr[3]);
1119 // Look to see if the key exsists in the hash.
1120 int offset = extras.Find(key);
1121
1122 if(offset >= 0)
1123 {
1124 // Offset is set, so the key was found.
1125 // First if it is an integer, determine the actual type of the int.
1126 char vtype;
1127 getTypeFromKey(key, vtype);
1128 if(vtype == 'i')
1129 {
1130 vtype = getIntegerType(offset);
1131 }
1132
1133 // Offset is set, so recalculate the buffer size without this entry.
1134 // Do NOT remove from strings, integers, or floats because then
1135 // extras would need to be updated for all entries with the new indexes
1136 // into those variables.
1137 switch(vtype)
1138 {
1139 case 'A':
1140 case 'c':
1141 case 'C':
1142 rmBuffSize += 4;
1143 break;
1144 case 's':
1145 case 'S':
1146 rmBuffSize += 5;
1147 break;
1148 case 'i':
1149 case 'I':
1150 rmBuffSize += 7;
1151 break;
1152 case 'f':
1153 rmBuffSize += 7;
1154 break;
1155 case 'Z':
1156 rmBuffSize += 4 + getString(offset).Length();
1157 break;
1158 case 'B':
1159 rmBuffSize += 3 + getBtagBufferSize(getString(offset));
1160 break;
1161 default:
1162 myStatus.setStatus(SamStatus::INVALID,
1163 "rmTag called with unknown type.\n");
1164 returnStatus = false;
1165 break;
1166 };
1167
1168 // Remove from the hash.
1169 extras.Delete(offset);
1170 }
1171 // Increment to the next tag.
1172 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
1173 {
1174 // Increment once more.
1175 currentTagPtr += 5;
1176 }
1177 else if(currentTagPtr[4] != '\0')
1178 {
1179 // Invalid tag format.
1180 myStatus.setStatus(SamStatus::INVALID,
1181 "rmTags called with improperly formatted tags.\n");
1182 returnStatus = false;
1183 break;
1184 }
1185 else
1186 {
1187 // Last Tag.
1188 currentTagPtr += 4;
1189 }
1190 }
1191
1192 // The buffer tags are now out of sync.
1193 myNeedToSetTagsInBuffer = true;
1194 myIsTagsBufferValid = false;
1195 myIsBufferSynced = false;
1196 myTagBufferSize -= rmBuffSize;
1197
1198
1199 return(returnStatus);
1200}
1201
1202
1203// Get methods for record fields.
1205{
1206 return(getRecordBuffer(mySequenceTranslation));
1207}
1208
1209
1210// Get methods for record fields.
1212{
1213 myStatus = SamStatus::SUCCESS;
1214 bool status = true;
1215 // If the buffer is not synced or the sequence in the buffer is not
1216 // properly translated, fix the buffer.
1217 if((myIsBufferSynced == false) ||
1218 (myBufferSequenceTranslation != translation))
1219 {
1220 status &= fixBuffer(translation);
1221 }
1222 // If the buffer is synced, check to see if the tags need to be synced.
1223 if(myNeedToSetTagsInBuffer)
1224 {
1225 status &= setTagsInBuffer();
1226 }
1227 if(!status)
1228 {
1229 return(NULL);
1230 }
1231 return (const void *)myRecordPtr;
1232}
1233
1234
1235// Write the record as a buffer into the file using the class's
1236// sequence translation setting.
1238{
1239 return(writeRecordBuffer(filePtr, mySequenceTranslation));
1240}
1241
1242
1243// Write the record as a buffer into the file using the specified translation.
1245 SequenceTranslation translation)
1246{
1247 myStatus = SamStatus::SUCCESS;
1248 if((filePtr == NULL) || (filePtr->isOpen() == false))
1249 {
1250 // File is not open, return failure.
1251 myStatus.setStatus(SamStatus::FAIL_ORDER,
1252 "Can't write to an unopened file.");
1253 return(SamStatus::FAIL_ORDER);
1254 }
1255
1256 if((myIsBufferSynced == false) ||
1257 (myBufferSequenceTranslation != translation))
1258 {
1259 if(!fixBuffer(translation))
1260 {
1261 return(myStatus.getStatus());
1262 }
1263 }
1264
1265 // Write the record.
1266 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
1267 unsigned int numBytesWritten =
1268 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
1269
1270 // Return status based on if the correct number of bytes were written.
1271 if(numBytesToWrite == numBytesWritten)
1272 {
1273 return(SamStatus::SUCCESS);
1274 }
1275 // The correct number of bytes were not written.
1276 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
1277 return(SamStatus::FAIL_IO);
1278}
1279
1280
1282{
1283 myStatus = SamStatus::SUCCESS;
1284 // If the buffer isn't synced, sync the buffer to determine the
1285 // block size.
1286 if(myIsBufferSynced == false)
1287 {
1288 // Since this just returns the block size, the translation of
1289 // the sequence does not matter, so just use the currently set
1290 // value.
1291 fixBuffer(myBufferSequenceTranslation);
1292 }
1293 return myRecordPtr->myBlockSize;
1294}
1295
1296
1297// This method returns the reference name.
1299{
1300 myStatus = SamStatus::SUCCESS;
1301 return myReferenceName.c_str();
1302}
1303
1304
1306{
1307 myStatus = SamStatus::SUCCESS;
1308 return myRecordPtr->myReferenceID;
1309}
1310
1311
1313{
1314 myStatus = SamStatus::SUCCESS;
1315 return (myRecordPtr->myPosition + 1);
1316}
1317
1318
1320{
1321 myStatus = SamStatus::SUCCESS;
1322 return myRecordPtr->myPosition;
1323}
1324
1325
1327{
1328 myStatus = SamStatus::SUCCESS;
1329 // If the buffer is valid, return the size from there, otherwise get the
1330 // size from the string length + 1 (ending null).
1331 if(myIsReadNameBufferValid)
1332 {
1333 return(myRecordPtr->myReadNameLength);
1334 }
1335
1336 return(myReadName.Length() + 1);
1337}
1338
1339
1341{
1342 myStatus = SamStatus::SUCCESS;
1343 return myRecordPtr->myMapQuality;
1344}
1345
1346
1348{
1349 myStatus = SamStatus::SUCCESS;
1350 if(!myIsBinValid)
1351 {
1352 // The bin that is set in the record is not valid, so
1353 // reset it.
1354 myRecordPtr->myBin =
1355 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
1356 myIsBinValid = true;
1357 }
1358 return(myRecordPtr->myBin);
1359}
1360
1361
1363{
1364 myStatus = SamStatus::SUCCESS;
1365 // If the cigar buffer is valid
1366 // then get the length from there.
1367 if(myIsCigarBufferValid)
1368 {
1369 return myRecordPtr->myCigarLength;
1370 }
1371
1372 if(myCigarTempBufferLength == -1)
1373 {
1374 // The cigar buffer is not valid and the cigar temp buffer is not set,
1375 // so parse the string.
1376 parseCigarString();
1377 }
1378
1379 // The temp buffer is now set, so return the size.
1380 return(myCigarTempBufferLength);
1381}
1382
1383
1385{
1386 myStatus = SamStatus::SUCCESS;
1387 return myRecordPtr->myFlag;
1388}
1389
1390
1392{
1393 myStatus = SamStatus::SUCCESS;
1394 if(myIsSequenceBufferValid == false)
1395 {
1396 // If the sequence is "*", then return 0.
1397 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
1398 {
1399 return(0);
1400 }
1401 // Do not add 1 since it is not null terminated.
1402 return(mySequence.Length());
1403 }
1404 return(myRecordPtr->myReadLength);
1405}
1406
1407
1408// This method returns the mate reference name. If it is equal to the
1409// reference name, it still returns the reference name.
1411{
1412 myStatus = SamStatus::SUCCESS;
1413 return myMateReferenceName.c_str();
1414}
1415
1416
1417// This method returns the mate reference name. If it is equal to the
1418// reference name, it returns "=", unless they are both "*" in which case
1419// "*" is returned.
1421{
1422 myStatus = SamStatus::SUCCESS;
1423 if(myMateReferenceName == "*")
1424 {
1425 return(myMateReferenceName);
1426 }
1427 if(myMateReferenceName == getReferenceName())
1428 {
1429 return(FIELD_ABSENT_STRING);
1430 }
1431 else
1432 {
1433 return(myMateReferenceName);
1434 }
1435}
1436
1437
1439{
1440 myStatus = SamStatus::SUCCESS;
1441 return myRecordPtr->myMateReferenceID;
1442}
1443
1444
1446{
1447 myStatus = SamStatus::SUCCESS;
1448 return (myRecordPtr->myMatePosition + 1);
1449}
1450
1451
1453{
1454 myStatus = SamStatus::SUCCESS;
1455 return myRecordPtr->myMatePosition;
1456}
1457
1458
1460{
1461 myStatus = SamStatus::SUCCESS;
1462 return myRecordPtr->myInsertSize;
1463}
1464
1465
1466// Returns the inclusive rightmost position of the clipped sequence.
1468{
1469 myStatus = SamStatus::SUCCESS;
1470 if(myAlignmentLength == -1)
1471 {
1472 // Alignment end has not been set, so calculate it.
1473 parseCigar();
1474 }
1475 // If alignment length > 0, subtract 1 from it to get the end.
1476 if(myAlignmentLength == 0)
1477 {
1478 // Length is 0, just return the start position.
1479 return(myRecordPtr->myPosition);
1480 }
1481 return(myRecordPtr->myPosition + myAlignmentLength - 1);
1482}
1483
1484
1485// Returns the inclusive rightmost position of the clipped sequence.
1487{
1488 return(get0BasedAlignmentEnd() + 1);
1489}
1490
1491
1492// Return the length of the alignment.
1494{
1495 myStatus = SamStatus::SUCCESS;
1496 if(myAlignmentLength == -1)
1497 {
1498 // Alignment end has not been set, so calculate it.
1499 parseCigar();
1500 }
1501 // Return the alignment length.
1502 return(myAlignmentLength);
1503}
1504
1505// Returns the inclusive left-most position adjust for clipped bases.
1507{
1508 myStatus = SamStatus::SUCCESS;
1509 if(myUnclippedStartOffset == -1)
1510 {
1511 // Unclipped has not yet been calculated, so parse the cigar to get it
1512 parseCigar();
1513 }
1514 return(myRecordPtr->myPosition - myUnclippedStartOffset);
1515}
1516
1517
1518// Returns the inclusive left-most position adjust for clipped bases.
1520{
1521 return(get0BasedUnclippedStart() + 1);
1522}
1523
1524
1525// Returns the inclusive right-most position adjust for clipped bases.
1527{
1528 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the
1529 // cigar has not yet been parsed, so no need to check it here.
1530 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
1531}
1532
1533
1534// Returns the inclusive right-most position adjust for clipped bases.
1536{
1537 return(get0BasedUnclippedEnd() + 1);
1538}
1539
1540
1541// Get the read name.
1543{
1544 myStatus = SamStatus::SUCCESS;
1545 if(myReadName.Length() == 0)
1546 {
1547 // 0 Length, means that it is in the buffer, but has not yet
1548 // been synced to the string, so do the sync.
1549 myReadName = (char*)&(myRecordPtr->myData);
1550 }
1551 return myReadName.c_str();
1552}
1553
1554
1556{
1557 myStatus = SamStatus::SUCCESS;
1558 if(myCigar.Length() == 0)
1559 {
1560 // 0 Length, means that it is in the buffer, but has not yet
1561 // been synced to the string, so do the sync.
1562 parseCigarBinary();
1563 }
1564 return myCigar.c_str();
1565}
1566
1567
1569{
1570 return(getSequence(mySequenceTranslation));
1571}
1572
1573
1575{
1576 myStatus = SamStatus::SUCCESS;
1577 if(mySequence.Length() == 0)
1578 {
1579 // 0 Length, means that it is in the buffer, but has not yet
1580 // been synced to the string, so do the sync.
1581 setSequenceAndQualityFromBuffer();
1582 }
1583
1584 // Determine if translation needs to be done.
1585 if((translation == NONE) || (myRefPtr == NULL))
1586 {
1587 return mySequence.c_str();
1588 }
1589 else if(translation == EQUAL)
1590 {
1591 if(mySeqWithEq.length() == 0)
1592 {
1593 // Check to see if the sequence is defined.
1594 if(mySequence == "*")
1595 {
1596 // Sequence is undefined, so no translation necessary.
1597 mySeqWithEq = '*';
1598 }
1599 else
1600 {
1601 // Sequence defined, so translate it.
1602 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1603 myRecordPtr->myPosition,
1604 *(getCigarInfo()),
1606 *myRefPtr,
1607 mySeqWithEq);
1608 }
1609 }
1610 return(mySeqWithEq.c_str());
1611 }
1612 else
1613 {
1614 // translation == BASES
1615 if(mySeqWithoutEq.length() == 0)
1616 {
1617 if(mySequence == "*")
1618 {
1619 // Sequence is undefined, so no translation necessary.
1620 mySeqWithoutEq = '*';
1621 }
1622 else
1623 {
1624 // Sequence defined, so translate it.
1625 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1626 myRecordPtr->myPosition,
1627 *(getCigarInfo()),
1629 *myRefPtr,
1630 mySeqWithoutEq);
1631 }
1632 }
1633 return(mySeqWithoutEq.c_str());
1634 }
1635}
1636
1637
1639{
1640 myStatus = SamStatus::SUCCESS;
1641 if(myQuality.Length() == 0)
1642 {
1643 // 0 Length, means that it is in the buffer, but has not yet
1644 // been synced to the string, so do the sync.
1645 setSequenceAndQualityFromBuffer();
1646 }
1647 return myQuality.c_str();
1648}
1649
1650
1652{
1653 return(getSequence(index, mySequenceTranslation));
1654}
1655
1656
1658{
1659 static const char * asciiBases = "=AC.G...T......N";
1660
1661 // Determine the read length.
1662 int32_t readLen = getReadLength();
1663
1664 // If the read length is 0, this method should not be called.
1665 if(readLen == 0)
1666 {
1667 String exceptionString = "SamRecord::getSequence(";
1668 exceptionString += index;
1669 exceptionString += ") is not allowed since sequence = '*'";
1670 throw std::runtime_error(exceptionString.c_str());
1671 }
1672 else if((index < 0) || (index >= readLen))
1673 {
1674 // Only get here if the index was out of range, so thow an exception.
1675 String exceptionString = "SamRecord::getSequence(";
1676 exceptionString += index;
1677 exceptionString += ") is out of range. Index must be between 0 and ";
1678 exceptionString += (readLen - 1);
1679 throw std::runtime_error(exceptionString.c_str());
1680 }
1681
1682 // Determine if translation needs to be done.
1683 if((translation == NONE) || (myRefPtr == NULL))
1684 {
1685 // No translation needs to be done.
1686 if(mySequence.Length() == 0)
1687 {
1688 // Parse BAM sequence.
1689 if(myIsSequenceBufferValid)
1690 {
1691 return(index & 1 ?
1692 asciiBases[myPackedSequence[index / 2] & 0xF] :
1693 asciiBases[myPackedSequence[index / 2] >> 4]);
1694 }
1695 else
1696 {
1697 String exceptionString = "SamRecord::getSequence(";
1698 exceptionString += index;
1699 exceptionString += ") called with no sequence set";
1700 throw std::runtime_error(exceptionString.c_str());
1701 }
1702 }
1703 // Already have string.
1704 return(mySequence[index]);
1705 }
1706 else
1707 {
1708 // Need to translate the sequence either to have '=' or to not
1709 // have it.
1710 // First check to see if the sequence has been set.
1711 if(mySequence.Length() == 0)
1712 {
1713 // 0 Length, means that it is in the buffer, but has not yet
1714 // been synced to the string, so do the sync.
1715 setSequenceAndQualityFromBuffer();
1716 }
1717
1718 // Check the type of translation.
1719 if(translation == EQUAL)
1720 {
1721 // Check whether or not the string has already been
1722 // retrieved that has the '=' in it.
1723 if(mySeqWithEq.length() == 0)
1724 {
1725 // The string with '=' has not yet been determined,
1726 // so get the string.
1727 // Check to see if the sequence is defined.
1728 if(mySequence == "*")
1729 {
1730 // Sequence is undefined, so no translation necessary.
1731 mySeqWithEq = '*';
1732 }
1733 else
1734 {
1735 // Sequence defined, so translate it.
1736 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1737 myRecordPtr->myPosition,
1738 *(getCigarInfo()),
1740 *myRefPtr,
1741 mySeqWithEq);
1742 }
1743 }
1744 // Sequence is set, so return it.
1745 return(mySeqWithEq[index]);
1746 }
1747 else
1748 {
1749 // translation == BASES
1750 // Check whether or not the string has already been
1751 // retrieved that does not have the '=' in it.
1752 if(mySeqWithoutEq.length() == 0)
1753 {
1754 // The string with '=' has not yet been determined,
1755 // so get the string.
1756 // Check to see if the sequence is defined.
1757 if(mySequence == "*")
1758 {
1759 // Sequence is undefined, so no translation necessary.
1760 mySeqWithoutEq = '*';
1761 }
1762 else
1763 {
1764 // Sequence defined, so translate it.
1765 // The string without '=' has not yet been determined,
1766 // so get the string.
1767 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1768 myRecordPtr->myPosition,
1769 *(getCigarInfo()),
1771 *myRefPtr,
1772 mySeqWithoutEq);
1773 }
1774 }
1775 // Sequence is set, so return it.
1776 return(mySeqWithoutEq[index]);
1777 }
1778 }
1779}
1780
1781
1783{
1784 // Determine the read length.
1785 int32_t readLen = getReadLength();
1786
1787 // If the read length is 0, return ' ' whose ascii code is below
1788 // the minimum ascii code for qualities.
1789 if(readLen == 0)
1790 {
1792 }
1793 else if((index < 0) || (index >= readLen))
1794 {
1795 // Only get here if the index was out of range, so thow an exception.
1796 String exceptionString = "SamRecord::getQuality(";
1797 exceptionString += index;
1798 exceptionString += ") is out of range. Index must be between 0 and ";
1799 exceptionString += (readLen - 1);
1800 throw std::runtime_error(exceptionString.c_str());
1801 }
1802
1803 if(myQuality.Length() == 0)
1804 {
1805 // Parse BAM Quality.
1806 // Know that myPackedQuality is correct since readLen != 0.
1807 return(myPackedQuality[index] + 33);
1808 }
1809 else
1810 {
1811 // Already have string.
1812 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
1813 {
1814 // Return the unknown quality character.
1816 }
1817 else if(index >= myQuality.Length())
1818 {
1819 // Only get here if the index was out of range, so thow an exception.
1820 // Technically the myQuality string is not guaranteed to be the same length
1821 // as the sequence, so this catches that error.
1822 String exceptionString = "SamRecord::getQuality(";
1823 exceptionString += index;
1824 exceptionString += ") is out of range. Index must be between 0 and ";
1825 exceptionString += (myQuality.Length() - 1);
1826 throw std::runtime_error(exceptionString.c_str());
1827 }
1828 else
1829 {
1830 return(myQuality[index]);
1831 }
1832 }
1833}
1834
1835
1837{
1838 // Check to see whether or not the Cigar has already been
1839 // set - this is determined by checking if alignment length
1840 // is set since alignment length and the cigar are set
1841 // at the same time.
1842 if(myAlignmentLength == -1)
1843 {
1844 // Not been set, so calculate it.
1845 parseCigar();
1846 }
1847 return(&myCigarRoller);
1848}
1849
1850
1851// Return the number of bases in this read that overlap the passed in
1852// region. (start & end are 0-based)
1853uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
1854{
1855 // Determine whether or not the cigar has been parsed, which sets up
1856 // the cigar roller. This is determined by checking the alignment length.
1857 if(myAlignmentLength == -1)
1858 {
1859 parseCigar();
1860 }
1861 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
1862}
1863
1864
1865// Returns the values of all fields except the tags.
1866bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
1867 String& cigar, String& sequence, String& quality)
1868{
1869 return(getFields(recStruct, readName, cigar, sequence, quality,
1870 mySequenceTranslation));
1871}
1872
1873
1874// Returns the values of all fields except the tags.
1875bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
1876 String& cigar, String& sequence, String& quality,
1877 SequenceTranslation translation)
1878{
1879 myStatus = SamStatus::SUCCESS;
1880 if(myIsBufferSynced == false)
1881 {
1882 if(!fixBuffer(translation))
1883 {
1884 // failed to set the buffer, return false.
1885 return(false);
1886 }
1887 }
1888 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
1889
1890 readName = getReadName();
1891 // Check the status.
1892 if(myStatus != SamStatus::SUCCESS)
1893 {
1894 // Failed to set the fields, return false.
1895 return(false);
1896 }
1897 cigar = getCigar();
1898 // Check the status.
1899 if(myStatus != SamStatus::SUCCESS)
1900 {
1901 // Failed to set the fields, return false.
1902 return(false);
1903 }
1904 sequence = getSequence(translation);
1905 // Check the status.
1906 if(myStatus != SamStatus::SUCCESS)
1907 {
1908 // Failed to set the fields, return false.
1909 return(false);
1910 }
1911 quality = getQuality();
1912 // Check the status.
1913 if(myStatus != SamStatus::SUCCESS)
1914 {
1915 // Failed to set the fields, return false.
1916 return(false);
1917 }
1918 return(true);
1919}
1920
1921
1922// Returns the reference pointer.
1924{
1925 return(myRefPtr);
1926}
1927
1928
1930{
1931 myStatus = SamStatus::SUCCESS;
1932 if(myNeedToSetTagsFromBuffer)
1933 {
1934 // Tags are only set in the buffer, so the size of the tags is
1935 // the length of the record minus the starting location of the tags.
1936 unsigned char * tagStart =
1937 (unsigned char *)myRecordPtr->myData
1938 + myRecordPtr->myReadNameLength
1939 + myRecordPtr->myCigarLength * sizeof(int)
1940 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
1941
1942 // The non-tags take up from the start of the record to the tag start.
1943 // Do not include the block size part of the record since it is not
1944 // included in the size.
1945 uint32_t nonTagSize =
1946 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
1947 // Tags take up the size of the block minus the non-tag section.
1948 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
1949 return(tagSize);
1950 }
1951
1952 // Tags are stored outside the buffer, so myTagBufferSize is set.
1953 return(myTagBufferSize);
1954}
1955
1956
1957// Returns true if there is another tag and sets tag and vtype to the
1958// appropriate values, and returns a pointer to the value.
1959// Sets the Status to SUCCESS when a tag is successfully returned or
1960// when there are no more tags. Otherwise the status is set to describe
1961// why it failed (parsing, etc).
1962bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
1963{
1964 myStatus = SamStatus::SUCCESS;
1965 if(myNeedToSetTagsFromBuffer)
1966 {
1967 if(!setTagsFromBuffer())
1968 {
1969 // Failed to read the tags from the buffer, so cannot
1970 // get tags.
1971 return(false);
1972 }
1973 }
1974
1975 // Increment the tag index to start looking at the next tag.
1976 // At the beginning, it is set to -1.
1977 myLastTagIndex++;
1978 int maxTagIndex = extras.Capacity();
1979 if(myLastTagIndex >= maxTagIndex)
1980 {
1981 // Hit the end of the tags, return false, no more tags.
1982 // Status is still success since this is not an error,
1983 // it is just the end of the list.
1984 return(false);
1985 }
1986
1987 bool tagFound = false;
1988 // Loop until a tag is found or the end of extras is hit.
1989 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
1990 {
1991 if(extras.SlotInUse(myLastTagIndex))
1992 {
1993 // Found a slot to use.
1994 int key = extras.GetKey(myLastTagIndex);
1995 getTag(key, tag);
1996 getTypeFromKey(key, vtype);
1997 tagFound = true;
1998 // Get the value associated with the key based on the vtype.
1999 switch (vtype)
2000 {
2001 case 'f' :
2002 *value = getFloatPtr(myLastTagIndex);
2003 break;
2004 case 'i' :
2005 *value = getIntegerPtr(myLastTagIndex, vtype);
2006 if(vtype != 'A')
2007 {
2008 // Convert all int types to 'i'
2009 vtype = 'i';
2010 }
2011 break;
2012 case 'Z' :
2013 case 'B' :
2014 *value = getStringPtr(myLastTagIndex);
2015 break;
2016 default:
2017 myStatus.setStatus(SamStatus::FAIL_PARSE,
2018 "Unknown tag type");
2019 tagFound = false;
2020 break;
2021 }
2022 }
2023 if(!tagFound)
2024 {
2025 // Increment the index since a tag was not found.
2026 myLastTagIndex++;
2027 }
2028 }
2029 return(tagFound);
2030}
2031
2032
2033// Reset the tag iterator to the beginning of the tags.
2035{
2036 myLastTagIndex = -1;
2037}
2038
2039
2041{
2042 if((vtype == 'c') || (vtype == 'C') ||
2043 (vtype == 's') || (vtype == 'S') ||
2044 (vtype == 'i') || (vtype == 'I'))
2045 {
2046 return(true);
2047 }
2048 return(false);
2049}
2050
2051
2053{
2054 if(vtype == 'f')
2055 {
2056 return(true);
2057 }
2058 return(false);
2059}
2060
2061
2063{
2064 if(vtype == 'A')
2065 {
2066 return(true);
2067 }
2068 return(false);
2069}
2070
2071
2073{
2074 if((vtype == 'Z') || (vtype == 'B'))
2075 {
2076 return(true);
2077 }
2078 return(false);
2079}
2080
2081
2082bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
2083{
2084 const char* currentTagPtr = tags;
2085
2086 returnString.Clear();
2087 myStatus = SamStatus::SUCCESS;
2088 if(myNeedToSetTagsFromBuffer)
2089 {
2090 if(!setTagsFromBuffer())
2091 {
2092 // Failed to read the tags from the buffer, so cannot
2093 // get tags.
2094 return(false);
2095 }
2096 }
2097
2098 bool returnStatus = true;
2099
2100 while(*currentTagPtr != '\0')
2101 {
2102 // Tags are formatted as: XY:Z
2103 // Where X is [A-Za-z], Y is [A-Za-z], and
2104 // Z is A,i,f,Z,H (cCsSI are also excepted)
2105 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
2106 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
2107 {
2108 myStatus.setStatus(SamStatus::INVALID,
2109 "getTagsString called with improperly formatted tags.\n");
2110 returnStatus = false;
2111 break;
2112 }
2113
2114 // Construct the key.
2115 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
2116 currentTagPtr[3]);
2117 // Look to see if the key exsists in the hash.
2118 int offset = extras.Find(key);
2119
2120 if(offset >= 0)
2121 {
2122 // Offset is set, so the key was found.
2123 if(!returnString.IsEmpty())
2124 {
2125 returnString += delim;
2126 }
2127 returnString += currentTagPtr[0];
2128 returnString += currentTagPtr[1];
2129 returnString += ':';
2130 returnString += currentTagPtr[3];
2131 returnString += ':';
2132
2133 // First if it is an integer, determine the actual type of the int.
2134 char vtype;
2135 getTypeFromKey(key, vtype);
2136
2137 switch(vtype)
2138 {
2139 case 'i':
2140 returnString += *(int*)getIntegerPtr(offset, vtype);
2141 break;
2142 case 'f':
2143 returnString += *(float*)getFloatPtr(offset);
2144 break;
2145 case 'Z':
2146 case 'B':
2147 returnString += *(String*)getStringPtr(offset);
2148 break;
2149 default:
2150 myStatus.setStatus(SamStatus::INVALID,
2151 "rmTag called with unknown type.\n");
2152 returnStatus = false;
2153 break;
2154 };
2155 }
2156 // Increment to the next tag.
2157 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
2158 {
2159 // Increment once more.
2160 currentTagPtr += 5;
2161 }
2162 else if(currentTagPtr[4] != '\0')
2163 {
2164 // Invalid tag format.
2165 myStatus.setStatus(SamStatus::INVALID,
2166 "rmTags called with improperly formatted tags.\n");
2167 returnStatus = false;
2168 break;
2169 }
2170 else
2171 {
2172 // Last Tag.
2173 currentTagPtr += 4;
2174 }
2175 }
2176 return(returnStatus);
2177}
2178
2179
2180const String* SamRecord::getStringTag(const char * tag)
2181{
2182 // Parse the buffer if necessary.
2183 if(myNeedToSetTagsFromBuffer)
2184 {
2185 if(!setTagsFromBuffer())
2186 {
2187 // Failed to read the tags from the buffer, so cannot
2188 // get tags. setTagsFromBuffer set the errors,
2189 // so just return null.
2190 return(NULL);
2191 }
2192 }
2193
2194 int key = MAKEKEY(tag[0], tag[1], 'Z');
2195 int offset = extras.Find(key);
2196
2197 int value;
2198 if (offset < 0)
2199 {
2200 // Check for 'B' tag.
2201 key = MAKEKEY(tag[0], tag[1], 'B');
2202 offset = extras.Find(key);
2203 if(offset < 0)
2204 {
2205 // Tag not found.
2206 return(NULL);
2207 }
2208 }
2209
2210 // Offset is valid, so return the tag.
2211 value = extras[offset];
2212 return(&(strings[value]));
2213}
2214
2215
2216int* SamRecord::getIntegerTag(const char * tag)
2217{
2218 // Init to success.
2219 myStatus = SamStatus::SUCCESS;
2220 // Parse the buffer if necessary.
2221 if(myNeedToSetTagsFromBuffer)
2222 {
2223 if(!setTagsFromBuffer())
2224 {
2225 // Failed to read the tags from the buffer, so cannot
2226 // get tags. setTagsFromBuffer set the errors,
2227 // so just return NULL.
2228 return(NULL);
2229 }
2230 }
2231
2232 int key = MAKEKEY(tag[0], tag[1], 'i');
2233 int offset = extras.Find(key);
2234
2235 int value;
2236 if (offset < 0)
2237 {
2238 // Failed to find the tag.
2239 return(NULL);
2240 }
2241 else
2242 value = extras[offset];
2243
2244 return(&(integers[value]));
2245}
2246
2247
2248bool SamRecord::getIntegerTag(const char * tag, int& tagVal)
2249{
2250 // Init to success.
2251 myStatus = SamStatus::SUCCESS;
2252 // Parse the buffer if necessary.
2253 if(myNeedToSetTagsFromBuffer)
2254 {
2255 if(!setTagsFromBuffer())
2256 {
2257 // Failed to read the tags from the buffer, so cannot
2258 // get tags. setTagsFromBuffer set the errors,
2259 // so just return false.
2260 return(false);
2261 }
2262 }
2263
2264 int key = MAKEKEY(tag[0], tag[1], 'i');
2265 int offset = extras.Find(key);
2266
2267 int value;
2268 if (offset < 0)
2269 {
2270 // Failed to find the tag.
2271 return(false);
2272 }
2273 else
2274 value = extras[offset];
2275
2276 tagVal = integers[value];
2277 return(true);
2278}
2279
2280
2281bool SamRecord::getFloatTag(const char * tag, float& tagVal)
2282{
2283 // Init to success.
2284 myStatus = SamStatus::SUCCESS;
2285 // Parse the buffer if necessary.
2286 if(myNeedToSetTagsFromBuffer)
2287 {
2288 if(!setTagsFromBuffer())
2289 {
2290 // Failed to read the tags from the buffer, so cannot
2291 // get tags. setTagsFromBuffer set the errors,
2292 // so just return false.
2293 return(false);
2294 }
2295 }
2296
2297 int key = MAKEKEY(tag[0], tag[1], 'f');
2298 int offset = extras.Find(key);
2299
2300 int value;
2301 if (offset < 0)
2302 {
2303 // Failed to find the tag.
2304 return(false);
2305 }
2306 else
2307 value = extras[offset];
2308
2309 tagVal = floats[value];
2310 return(true);
2311}
2312
2313
2314const String & SamRecord::getString(const char * tag)
2315{
2316 // Init to success.
2317 myStatus = SamStatus::SUCCESS;
2318 // Parse the buffer if necessary.
2319 if(myNeedToSetTagsFromBuffer)
2320 {
2321 if(!setTagsFromBuffer())
2322 {
2323 // Failed to read the tags from the buffer, so cannot
2324 // get tags.
2325 // TODO - what do we want to do on failure?
2326 }
2327 }
2328
2329 int key = MAKEKEY(tag[0], tag[1], 'Z');
2330 int offset = extras.Find(key);
2331
2332 int value;
2333 if (offset < 0)
2334 {
2335
2336 key = MAKEKEY(tag[0], tag[1], 'B');
2337 offset = extras.Find(key);
2338 if (offset < 0)
2339 {
2340 // TODO - what do we want to do on failure?
2341 return(NOT_FOUND_TAG_STRING);
2342 }
2343 }
2344 value = extras[offset];
2345
2346 return strings[value];
2347}
2348
2349
2350int & SamRecord::getInteger(const char * tag)
2351{
2352 // Init to success.
2353 myStatus = SamStatus::SUCCESS;
2354 // Parse the buffer if necessary.
2355 if(myNeedToSetTagsFromBuffer)
2356 {
2357 if(!setTagsFromBuffer())
2358 {
2359 // Failed to read the tags from the buffer, so cannot
2360 // get tags. setTagsFromBuffer set the error.
2361 // TODO - what do we want to do on failure?
2362 }
2363 }
2364
2365 int key = MAKEKEY(tag[0], tag[1], 'i');
2366 int offset = extras.Find(key);
2367
2368 int value;
2369 if (offset < 0)
2370 {
2371 // TODO - what do we want to do on failure?
2372 return NOT_FOUND_TAG_INT;
2373 }
2374 else
2375 value = extras[offset];
2376
2377 return integers[value];
2378}
2379
2380
2381bool SamRecord::checkTag(const char * tag, char type)
2382{
2383 // Init to success.
2384 myStatus = SamStatus::SUCCESS;
2385 // Parse the buffer if necessary.
2386 if(myNeedToSetTagsFromBuffer)
2387 {
2388 if(!setTagsFromBuffer())
2389 {
2390 // Failed to read the tags from the buffer, so cannot
2391 // get tags. setTagsFromBuffer set the error.
2392 return("");
2393 }
2394 }
2395
2396 int key = MAKEKEY(tag[0], tag[1], type);
2397
2398 return (extras.Find(key) != LH_NOTFOUND);
2399}
2400
2401
2402// Return the error after a failed SamRecord call.
2403const SamStatus& SamRecord::getStatus()
2404{
2405 return(myStatus);
2406}
2407
2408
2409// Allocate space for the record - does a realloc.
2410// The passed in size is the size of the entire record including the
2411// block size field.
2412bool SamRecord::allocateRecordStructure(int size)
2413{
2414 if (allocatedSize < size)
2415 {
2416 bamRecordStruct* tmpRecordPtr =
2417 (bamRecordStruct *)realloc(myRecordPtr, size);
2418 if(tmpRecordPtr == NULL)
2419 {
2420 // FAILED to allocate memory
2421 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2422 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
2423 return(false);
2424 }
2425 // Successfully allocated memory, so set myRecordPtr.
2426 myRecordPtr = tmpRecordPtr;
2427
2428 // Reset the pointers into the record.
2429 if(myIsSequenceBufferValid)
2430 {
2431 myPackedSequence = (unsigned char *)myRecordPtr->myData +
2432 myRecordPtr->myReadNameLength +
2433 myRecordPtr->myCigarLength * sizeof(int);
2434 }
2435 if(myIsQualityBufferValid)
2436 {
2437 myPackedQuality = (unsigned char *)myRecordPtr->myData +
2438 myRecordPtr->myReadNameLength +
2439 myRecordPtr->myCigarLength * sizeof(int) +
2440 (myRecordPtr->myReadLength + 1) / 2;
2441 }
2442
2443 allocatedSize = size;
2444 }
2445 return(true);
2446}
2447
2448
2449// Index is the index into the strings array.
2450void* SamRecord::getStringPtr(int index)
2451{
2452 int value = extras[index];
2453
2454 return &(strings[value]);
2455}
2456
2457void* SamRecord::getIntegerPtr(int offset, char& type)
2458{
2459 int value = extras[offset];
2460
2461 type = intType[value];
2462
2463 return &(integers[value]);
2464}
2465
2466void* SamRecord::getFloatPtr(int offset)
2467{
2468 int value = extras[offset];
2469
2470 return &(floats[value]);
2471}
2472
2473
2474// Fixes the buffer to match the variable length fields if they are set.
2475bool SamRecord::fixBuffer(SequenceTranslation translation)
2476{
2477 // Check to see if the buffer is already synced.
2478 if(myIsBufferSynced &&
2479 (myBufferSequenceTranslation == translation))
2480 {
2481 // Already synced, nothing to do.
2482 return(true);
2483 }
2484
2485 // Set the bin if necessary.
2486 if(!myIsBinValid)
2487 {
2488 // The bin that is set in the record is not valid, so
2489 // reset it.
2490 myRecordPtr->myBin =
2491 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
2492 myIsBinValid = true;
2493 }
2494
2495 // Not synced.
2496 bool status = true;
2497
2498 // First determine the size the buffer needs to be.
2499 uint8_t newReadNameLen = getReadNameLength();
2500 uint16_t newCigarLen = getCigarLength();
2501 int32_t newReadLen = getReadLength();
2502 uint32_t newTagLen = getTagLength();
2503 uint32_t bamSequenceLen = (newReadLen+1)/2;
2504
2505 // The buffer size extends from the start of the record to data
2506 // plus the length of the variable fields,
2507 // Multiply the cigar length by 4 since it is the number of uint32_t fields.
2508 int newBufferSize =
2509 ((unsigned char*)(&(myRecordPtr->myData)) -
2510 (unsigned char*)myRecordPtr) +
2511 newReadNameLen + ((newCigarLen)*4) +
2512 newReadLen + bamSequenceLen + newTagLen;
2513
2514 if(!allocateRecordStructure(newBufferSize))
2515 {
2516 // Failed to allocate space.
2517 return(false);
2518 }
2519
2520 // Now that space has been added to the buffer, check to see what if
2521 // any fields need to be extracted from the buffer prior to starting to
2522 // overwrite it. Fields need to be extracted from the buffer if the
2523 // buffer is valid for the field and a previous variable length field has
2524 // changed length.
2525 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
2526 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
2527 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
2528
2529 // If the tags are still stored in the buffer and any other fields changed
2530 // lengths, they need to be extracted.
2531 if(myIsTagsBufferValid &&
2532 (readNameLenChange | cigarLenChange | readLenChange))
2533 {
2534 status &= setTagsFromBuffer();
2535 // The tag buffer will not be valid once the other fields
2536 // are written, so set it to not valid.
2537 myIsTagsBufferValid = false;
2538 }
2539
2540 // If the sequence or quality strings are still stored in the buffer, and
2541 // any of the previous fields have changed length, extract it from the
2542 // current buffer.
2543 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
2544 (readNameLenChange | cigarLenChange | readLenChange))
2545 {
2546 setSequenceAndQualityFromBuffer();
2547 // The quality and sequence buffers will not be valid once the other
2548 // fields are written, so set them to not valid.
2549 myIsQualityBufferValid = false;
2550 myIsSequenceBufferValid = false;
2551 }
2552
2553 // If the cigar is still stored in the buffer, and any of the
2554 // previous fields have changed length, extract it from the current buffer.
2555 if((myIsCigarBufferValid) &&
2556 (readNameLenChange))
2557 {
2558 status &= parseCigarBinary();
2559 myIsCigarBufferValid = false;
2560 }
2561
2562 // Set each value in the buffer if it is not already valid.
2563 if(!myIsReadNameBufferValid)
2564 {
2565 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
2566 newReadNameLen);
2567
2568 // Set the new ReadNameLength.
2569 myRecordPtr->myReadNameLength = newReadNameLen;
2570 myIsReadNameBufferValid = true;
2571 }
2572
2573 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
2574 myRecordPtr->myReadNameLength;
2575
2576 // Set the Cigar. Need to reformat from the string to
2577 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
2578
2579 if(!myIsCigarBufferValid)
2580 {
2581 // The cigar was already parsed when it was set, so just copy
2582 // data from the temporary buffer.
2583 myRecordPtr->myCigarLength = newCigarLen;
2584 memcpy(packedCigar, myCigarTempBuffer,
2585 myRecordPtr->myCigarLength * sizeof(uint32_t));
2586
2587 myIsCigarBufferValid = true;
2588 }
2589
2590 unsigned char * packedSequence = readNameEnds +
2591 myRecordPtr->myCigarLength * sizeof(int);
2592 unsigned char * packedQuality = packedSequence + bamSequenceLen;
2593
2594 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
2595 (myBufferSequenceTranslation != translation))
2596 {
2597 myRecordPtr->myReadLength = newReadLen;
2598 // Determine if the quality needs to be set and is just a * and needs to
2599 // be set to 0xFF.
2600 bool noQuality = false;
2601 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
2602 {
2603 noQuality = true;
2604 }
2605
2606 const char* translatedSeq = NULL;
2607 // If the sequence is not valid in the buffer or it is not
2608 // properly translated, get the properly translated sequence
2609 // that needs to be put into the buffer.
2610 if((!myIsSequenceBufferValid) ||
2611 (translation != myBufferSequenceTranslation))
2612 {
2613 translatedSeq = getSequence(translation);
2614 }
2615
2616 for (int i = 0; i < myRecordPtr->myReadLength; i++)
2617 {
2618 if((!myIsSequenceBufferValid) ||
2619 (translation != myBufferSequenceTranslation))
2620 {
2621 // Sequence buffer is not valid, so set the sequence.
2622 int seqVal = 0;
2623 switch(translatedSeq[i])
2624 {
2625 case '=':
2626 seqVal = 0;
2627 break;
2628 case 'A':
2629 case 'a':
2630 seqVal = 1;
2631 break;
2632 case 'C':
2633 case 'c':
2634 seqVal = 2;
2635 break;
2636 case 'G':
2637 case 'g':
2638 seqVal = 4;
2639 break;
2640 case 'T':
2641 case 't':
2642 seqVal = 8;
2643 break;
2644 case 'N':
2645 case 'n':
2646 case '.':
2647 seqVal = 15;
2648 break;
2649 default:
2650 myStatus.addError(SamStatus::FAIL_PARSE,
2651 "Unknown Sequence character found.");
2652 status = false;
2653 break;
2654 };
2655
2656 if(i & 1)
2657 {
2658 // Odd number i's go in the lower 4 bits, so OR in the
2659 // lower bits
2660 packedSequence[i/2] |= seqVal;
2661 }
2662 else
2663 {
2664 // Even i's go in the upper 4 bits and are always set first.
2665 packedSequence[i/2] = seqVal << 4;
2666 }
2667 }
2668
2669 if(!myIsQualityBufferValid)
2670 {
2671 // Set the quality.
2672 if((noQuality) || (myQuality.Length() <= i))
2673 {
2674 // No quality or the quality is smaller than the sequence,
2675 // so set it to 0xFF
2676 packedQuality[i] = 0xFF;
2677 }
2678 else
2679 {
2680 // Copy the quality string.
2681 packedQuality[i] = myQuality[i] - 33;
2682 }
2683 }
2684 }
2685 myPackedSequence = (unsigned char *)myRecordPtr->myData +
2686 myRecordPtr->myReadNameLength +
2687 myRecordPtr->myCigarLength * sizeof(int);
2688 myPackedQuality = myPackedSequence +
2689 (myRecordPtr->myReadLength + 1) / 2;
2690 myIsSequenceBufferValid = true;
2691 myIsQualityBufferValid = true;
2692 myBufferSequenceTranslation = translation;
2693 }
2694
2695 if(!myIsTagsBufferValid)
2696 {
2697 status &= setTagsInBuffer();
2698 }
2699
2700 // Set the lengths in the buffer.
2701 myRecordPtr->myReadNameLength = newReadNameLen;
2702 myRecordPtr->myCigarLength = newCigarLen;
2703 myRecordPtr->myReadLength = newReadLen;
2704
2705 // Set the buffer block size to the size of the buffer minus the
2706 // first field.
2707 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
2708
2709 if(status)
2710 {
2711 myIsBufferSynced = true;
2712 }
2713
2714 return(status);
2715}
2716
2717
2718// Sets the Sequence and Quality strings from the buffer.
2719// They are done together in one method because they require the same
2720// loop, so might as well be done at the same time.
2721void SamRecord::setSequenceAndQualityFromBuffer()
2722{
2723 // NOTE: If the sequence buffer is not valid, do not set the sequence
2724 // string from the buffer.
2725 // NOTE: If the quality buffer is not valid, do not set the quality string
2726 // from the buffer.
2727
2728 // Extract the sequence if the buffer is valid and the string's length is 0.
2729 bool extractSequence = false;
2730 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
2731 {
2732 extractSequence = true;
2733 }
2734
2735 // Extract the quality if the buffer is valid and the string's length is 0.
2736 bool extractQuality = false;
2737 if(myIsQualityBufferValid && (myQuality.Length() == 0))
2738 {
2739 extractQuality = true;
2740 }
2741
2742 // If neither the quality nor the sequence need to be extracted,
2743 // just return.
2744 if(!extractSequence && !extractQuality)
2745 {
2746 return;
2747 }
2748
2749 // Set the sequence and quality strings..
2750 if(extractSequence)
2751 {
2752 mySequence.SetLength(myRecordPtr->myReadLength);
2753 }
2754 if(extractQuality)
2755 {
2756 myQuality.SetLength(myRecordPtr->myReadLength);
2757 }
2758
2759 const char * asciiBases = "=AC.G...T......N";
2760
2761 // Flag to see if the quality is specified - the quality contains a value
2762 // other than 0xFF. If all values are 0xFF, then there is no quality.
2763 bool qualitySpecified = false;
2764
2765 for (int i = 0; i < myRecordPtr->myReadLength; i++)
2766 {
2767 if(extractSequence)
2768 {
2769 mySequence[i] = i & 1 ?
2770 asciiBases[myPackedSequence[i / 2] & 0xF] :
2771 asciiBases[myPackedSequence[i / 2] >> 4];
2772 }
2773
2774 if(extractQuality)
2775 {
2776 if(myPackedQuality[i] != 0xFF)
2777 {
2778 // Quality is specified, so mark the flag.
2779 qualitySpecified = true;
2780 }
2781
2782 myQuality[i] = myPackedQuality[i] + 33;
2783 }
2784 }
2785
2786 // If the read length is 0, then set the sequence and quality to '*'
2787 if(myRecordPtr->myReadLength == 0)
2788 {
2789 if(extractSequence)
2790 {
2791 mySequence = "*";
2792 }
2793 if(extractQuality)
2794 {
2795 myQuality = "*";
2796 }
2797 }
2798 else if(extractQuality && !qualitySpecified)
2799 {
2800 // No quality was specified, so set it to "*"
2801 myQuality = "*";
2802 }
2803}
2804
2805
2806// Parse the cigar to calculate the alignment/unclipped end.
2807bool SamRecord::parseCigar()
2808{
2809 // Determine if the cigar string or cigar binary needs to be parsed.
2810 if(myCigar.Length() == 0)
2811 {
2812 // The cigar string is not yet set, so parse the binary.
2813 return(parseCigarBinary());
2814 }
2815 return(parseCigarString());
2816}
2817
2818// Parse the cigar to calculate the alignment/unclipped end.
2819bool SamRecord::parseCigarBinary()
2820{
2821 // Only need to parse if the string is not already set.
2822 // The length of the cigar string is set to zero when the
2823 // record is read from a file into the buffer.
2824 if(myCigar.Length() != 0)
2825 {
2826 // Already parsed.
2827 return(true);
2828 }
2829
2830 unsigned char * readNameEnds =
2831 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
2832
2833 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
2834
2835 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
2836
2837 myCigarRoller.getCigarString(myCigar);
2838
2839 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
2840
2841 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
2842 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
2843
2844 // if the cigar length is 0, then set the cigar string to "*"
2845 if(myRecordPtr->myCigarLength == 0)
2846 {
2847 myCigar = "*";
2848 return(true);
2849 }
2850
2851 // Copy the cigar into a temporary buffer.
2852 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
2853 if(newBufferSize > myCigarTempBufferAllocatedSize)
2854 {
2855 uint32_t* tempBufferPtr =
2856 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2857 if(tempBufferPtr == NULL)
2858 {
2859 // Failed to allocate memory.
2860 // Do not parse, just return.
2861 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2862 myStatus.addError(SamStatus::FAIL_MEM,
2863 "Failed to Allocate Memory.");
2864 return(false);
2865 }
2866 myCigarTempBuffer = tempBufferPtr;
2867 myCigarTempBufferAllocatedSize = newBufferSize;
2868 }
2869
2870 memcpy(myCigarTempBuffer, packedCigar,
2871 myRecordPtr->myCigarLength * sizeof(uint32_t));
2872
2873 // Set the length of the temp buffer.
2874 myCigarTempBufferLength = myRecordPtr->myCigarLength;
2875
2876 return(true);
2877}
2878
2879// Parse the cigar string to calculate the cigar length and alignment end.
2880bool SamRecord::parseCigarString()
2881{
2882 myCigarTempBufferLength = 0;
2883 if(myCigar == "*")
2884 {
2885 // Cigar is empty, so initialize the variables.
2886 myAlignmentLength = 0;
2887 myUnclippedStartOffset = 0;
2888 myUnclippedEndOffset = 0;
2889 myCigarRoller.clear();
2890 return(true);
2891 }
2892
2893 myCigarRoller.Set(myCigar);
2894
2895 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
2896
2897 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
2898 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
2899
2900 // Check to see if the Temporary Cigar Buffer is large enough to contain
2901 // this cigar. If we make it the size of the length of the cigar string,
2902 // it will be more than large enough.
2903 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
2904 if(newBufferSize > myCigarTempBufferAllocatedSize)
2905 {
2906 uint32_t* tempBufferPtr =
2907 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
2908 if(tempBufferPtr == NULL)
2909 {
2910 // Failed to allocate memory.
2911 // Do not parse, just return.
2912 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
2913 myStatus.addError(SamStatus::FAIL_MEM,
2914 "Failed to Allocate Memory.");
2915 return(false);
2916 }
2917 myCigarTempBuffer = tempBufferPtr;
2918 myCigarTempBufferAllocatedSize = newBufferSize;
2919 }
2920
2921 // Track if there were any errors.
2922 bool status = true;
2923
2924 // Track the index into the cigar string that is being parsed.
2925 char *cigarOp;
2926 const char* cigarEntryStart = myCigar.c_str();
2927 int opLen = 0;
2928 int op = 0;
2929
2930 unsigned int * packedCigar = myCigarTempBuffer;
2931 // TODO - maybe one day make a cigar list... or maybe make a
2932 // reference cigar string for ease of lookup....
2933 const char* endCigarString = cigarEntryStart + myCigar.Length();
2934 while(cigarEntryStart < endCigarString)
2935 {
2936 bool validCigarEntry = true;
2937 // Get the opLen from the string. cigarOp will then point to
2938 // the operation.
2939 opLen = strtol(cigarEntryStart, &cigarOp, 10);
2940 // Switch on the type of operation.
2941 switch(*cigarOp)
2942 {
2943 case('M'):
2944 op = 0;
2945 break;
2946 case('I'):
2947 // Insert into the reference position, so do not increment the
2948 // reference end position.
2949 op = 1;
2950 break;
2951 case('D'):
2952 op = 2;
2953 break;
2954 case('N'):
2955 op = 3;
2956 break;
2957 case('S'):
2958 op = 4;
2959 break;
2960 case('H'):
2961 op = 5;
2962 break;
2963 case('P'):
2964 op = 6;
2965 break;
2966 default:
2967 fprintf(stderr, "ERROR parsing cigar\n");
2968 validCigarEntry = false;
2969 status = false;
2970 myStatus.addError(SamStatus::FAIL_PARSE,
2971 "Unknown operation found when parsing the Cigar.");
2972 break;
2973 };
2974 if(validCigarEntry)
2975 {
2976 // Increment the cigar length.
2977 ++myCigarTempBufferLength;
2978 *packedCigar = (opLen << 4) | op;
2979 packedCigar++;
2980 }
2981 // The next Entry starts at one past the cigar op, so set the start.
2982 cigarEntryStart = ++cigarOp;
2983 }
2984
2985 // Check clipLength to adjust the end position.
2986 return(status);
2987}
2988
2989
2990bool SamRecord::setTagsFromBuffer()
2991{
2992 // If the tags do not need to be set from the buffer, return true.
2993 if(myNeedToSetTagsFromBuffer == false)
2994 {
2995 // Already been set from the buffer.
2996 return(true);
2997 }
2998
2999 // Mark false, as they are being set now.
3000 myNeedToSetTagsFromBuffer = false;
3001
3002 unsigned char * extraPtr = myPackedQuality + myRecordPtr->myReadLength;
3003
3004 // Default to success, will be changed to false on failure.
3005 bool status = true;
3006
3007 // Clear any previously set tags.
3008 clearTags();
3009 while (myRecordPtr->myBlockSize + 4 -
3010 (extraPtr - (unsigned char *)myRecordPtr) > 0)
3011 {
3012 int key = 0;
3013 int value = 0;
3014 void * content = extraPtr + 3;
3015 int tagBufferSize = 0;
3016
3017 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
3018
3019 // First check if the tag already exists.
3020 unsigned int location = extras.Find(key);
3021 int origIndex = 0;
3022 String* duplicate = NULL;
3023 String* origTag = NULL;
3024 if(location != LH_NOTFOUND)
3025 {
3026 duplicate = new String;
3027 origTag = new String;
3028 origIndex = extras[location];
3029
3030 *duplicate = (char)(extraPtr[0]);
3031 *duplicate += (char)(extraPtr[1]);
3032 *duplicate += ':';
3033
3034 *origTag = *duplicate;
3035 *duplicate += (char)(extraPtr[2]);
3036 *duplicate += ':';
3037 }
3038
3039 switch (extraPtr[2])
3040 {
3041 case 'A' :
3042 if(duplicate != NULL)
3043 {
3044 *duplicate += (* (char *) content);
3045 *origTag += intType[origIndex];
3046 *origTag += ':';
3047 appendIntArrayValue(origIndex, *origTag);
3048 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3049 integers[origIndex] = *(char *)content;
3050 intType[origIndex] = extraPtr[2];
3051 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3052 }
3053 else
3054 {
3055 value = integers.Length();
3056 integers.Push(* (char *) content);
3057 intType.push_back(extraPtr[2]);
3058 tagBufferSize += 4;
3059 }
3060 extraPtr += 4;
3061 break;
3062 case 'c' :
3063 if(duplicate != NULL)
3064 {
3065 *duplicate += (* (char *) content);
3066 *origTag += intType[origIndex];
3067 *origTag += ':';
3068 appendIntArrayValue(origIndex, *origTag);
3069 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3070 integers[origIndex] = *(char *)content;
3071 intType[origIndex] = extraPtr[2];
3072 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3073 }
3074 else
3075 {
3076 value = integers.Length();
3077 integers.Push(* (char *) content);
3078 intType.push_back(extraPtr[2]);
3079 tagBufferSize += 4;
3080 }
3081 extraPtr += 4;
3082 break;
3083 case 'C' :
3084 if(duplicate != NULL)
3085 {
3086 *duplicate += (* (unsigned char *) content);
3087 *origTag += intType[origIndex];
3088 *origTag += ':';
3089 appendIntArrayValue(origIndex, *origTag);
3090 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3091 integers[origIndex] = *(unsigned char *)content;
3092 intType[origIndex] = extraPtr[2];
3093 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3094 }
3095 else
3096 {
3097 value = integers.Length();
3098 integers.Push(* (unsigned char *) content);
3099 intType.push_back(extraPtr[2]);
3100 tagBufferSize += 4;
3101 }
3102 extraPtr += 4;
3103 break;
3104 case 's' :
3105 if(duplicate != NULL)
3106 {
3107 *duplicate += (* (short *) content);
3108 *origTag += intType[origIndex];
3109 *origTag += ':';
3110 appendIntArrayValue(origIndex, *origTag);
3111 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3112 integers[origIndex] = *(short *)content;
3113 intType[origIndex] = extraPtr[2];
3114 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3115 }
3116 else
3117 {
3118 value = integers.Length();
3119 integers.Push(* (short *) content);
3120 intType.push_back(extraPtr[2]);
3121 tagBufferSize += 5;
3122 }
3123 extraPtr += 5;
3124 break;
3125 case 'S' :
3126 if(duplicate != NULL)
3127 {
3128 *duplicate += (* (unsigned short *) content);
3129 *origTag += intType[origIndex];
3130 *origTag += ':';
3131 appendIntArrayValue(origIndex, *origTag);
3132 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3133 integers[origIndex] = *(unsigned short *)content;
3134 intType[origIndex] = extraPtr[2];
3135 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3136 }
3137 else
3138 {
3139 value = integers.Length();
3140 integers.Push(* (unsigned short *) content);
3141 intType.push_back(extraPtr[2]);
3142 tagBufferSize += 5;
3143 }
3144 extraPtr += 5;
3145 break;
3146 case 'i' :
3147 if(duplicate != NULL)
3148 {
3149 *duplicate += (* (int *) content);
3150 *origTag += intType[origIndex];
3151 *origTag += ':';
3152 appendIntArrayValue(origIndex, *origTag);
3153 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3154 integers[origIndex] = *(int *)content;
3155 intType[origIndex] = extraPtr[2];
3156 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3157 }
3158 else
3159 {
3160 value = integers.Length();
3161 integers.Push(* (int *) content);
3162 intType.push_back(extraPtr[2]);
3163 tagBufferSize += 7;
3164 }
3165 extraPtr += 7;
3166 break;
3167 case 'I' :
3168 if(duplicate != NULL)
3169 {
3170 *duplicate += (* (unsigned int *) content);
3171 *origTag += intType[origIndex];
3172 *origTag += ':';
3173 appendIntArrayValue(origIndex, *origTag);
3174 tagBufferSize -= getNumericTagTypeSize(intType[origIndex]);
3175 integers[origIndex] = *(unsigned int *)content;
3176 intType[origIndex] = extraPtr[2];
3177 tagBufferSize += getNumericTagTypeSize(intType[origIndex]);
3178 }
3179 else
3180 {
3181 value = integers.Length();
3182 integers.Push((int) * (unsigned int *) content);
3183 intType.push_back(extraPtr[2]);
3184 tagBufferSize += 7;
3185 }
3186 extraPtr += 7;
3187 break;
3188 case 'Z' :
3189 if(duplicate != NULL)
3190 {
3191 *duplicate += ((const char *) content);
3192 *origTag += 'Z';
3193 *origTag += ':';
3194 *origTag += (char*)(strings[origIndex]);
3195 tagBufferSize -= strings[origIndex].Length();
3196 strings[origIndex] = (const char *) content;
3197 extraPtr += 4 + strings[origIndex].Length();
3198 tagBufferSize += strings[origIndex].Length();
3199 }
3200 else
3201 {
3202 value = strings.Length();
3203 strings.Push((const char *) content);
3204 tagBufferSize += 4 + strings.Last().Length();
3205 extraPtr += 4 + strings.Last().Length();
3206 }
3207 break;
3208 case 'B' :
3209 if(duplicate != NULL)
3210 {
3211 *origTag += 'B';
3212 *origTag += ':';
3213 *origTag += (char*)(strings[origIndex]);
3214 tagBufferSize -=
3215 getBtagBufferSize(strings[origIndex]);
3216 int bufferSize =
3217 getStringFromBtagBuffer((unsigned char*)content,
3218 strings[origIndex]);
3219 *duplicate += (char *)(strings[origIndex]);
3220 tagBufferSize += bufferSize;
3221 extraPtr += 3 + bufferSize;
3222 }
3223 else
3224 {
3225 value = strings.Length();
3226 String tempBTag;
3227 int bufferSize =
3228 getStringFromBtagBuffer((unsigned char*)content,
3229 tempBTag);
3230 strings.Push(tempBTag);
3231 tagBufferSize += 3 + bufferSize;
3232 extraPtr += 3 + bufferSize;
3233 }
3234 break;
3235 case 'f' :
3236 if(duplicate != NULL)
3237 {
3238 duplicate->appendFullFloat(* (float *) content);
3239 *origTag += 'f';
3240 *origTag += ':';
3241 origTag->appendFullFloat(floats[origIndex]);
3242 floats[origIndex] = *(float *)content;
3243 }
3244 else
3245 {
3246 value = floats.size();
3247 floats.push_back(* (float *) content);
3248 tagBufferSize += 7;
3249 }
3250 extraPtr += 7;
3251 break;
3252 default :
3253 fprintf(stderr,
3254 "parsing BAM - Unknown custom field of type %c%c:%c\n",
3255 extraPtr[0], extraPtr[1], extraPtr[2]);
3256 fprintf(stderr, "BAM Tags: \n");
3257
3258 unsigned char* tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3259
3260 fprintf(stderr, "\n\n");
3261 tagInfo = myPackedQuality + myRecordPtr->myReadLength;
3262 while(myRecordPtr->myBlockSize + 4 -
3263 (tagInfo - (unsigned char *)myRecordPtr) > 0)
3264 {
3265 fprintf(stderr, "%02x",tagInfo[0]);
3266 ++tagInfo;
3267 }
3268 fprintf(stderr, "\n");
3269
3270 // Failed on read.
3271 // Increment extraPtr just by the size of the 3 known fields
3272 extraPtr += 3;
3273 myStatus.addError(SamStatus::FAIL_PARSE,
3274 "Unknown tag type.");
3275 status = false;
3276 }
3277
3278 if(duplicate != NULL)
3279 {
3280 // Duplicate tag in this record.
3281 // Tag already existed, print message about overwriting.
3282 // WARN about dropping duplicate tags.
3283 if(myNumWarns++ < myMaxWarns)
3284 {
3285 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %s with %s\n",
3286 origTag->c_str(), duplicate->c_str());
3287 if(myNumWarns == myMaxWarns)
3288 {
3289 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
3290 }
3291 }
3292
3293 continue;
3294 }
3295
3296 // Only add the tag if it has so far been successfully processed.
3297 if(status)
3298 {
3299 // Add the tag.
3300 extras.Add(key, value);
3301 myTagBufferSize += tagBufferSize;
3302 }
3303 }
3304 return(status);
3305}
3306
3307
3308bool SamRecord::setTagsInBuffer()
3309{
3310 // The buffer size extends from the start of the record to data
3311 // plus the length of the variable fields,
3312 // Multiply the cigar length by 4 since it is the number of uint32_t fields.
3313 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
3314 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
3315 (unsigned char*)myRecordPtr) +
3316 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
3317 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
3318
3319 // Make sure the buffer is big enough.
3320 if(!allocateRecordStructure(newBufferSize))
3321 {
3322 // Failed to allocate space.
3323 return(false);
3324 }
3325
3326 char * extraPtr = (char*)myPackedQuality + myRecordPtr->myReadLength;
3327
3328 bool status = true;
3329
3330 // Set the tags in the buffer.
3331 if (extras.Entries())
3332 {
3333 for (int i = 0; i < extras.Capacity(); i++)
3334 {
3335 if (extras.SlotInUse(i))
3336 {
3337 int key = extras.GetKey(i);
3338 getTag(key, extraPtr);
3339 extraPtr += 2;
3340 char vtype;
3341 getTypeFromKey(key, vtype);
3342
3343 if(vtype == 'i')
3344 {
3345 vtype = getIntegerType(i);
3346 }
3347
3348 extraPtr[0] = vtype;
3349
3350 // increment the pointer to where the value is.
3351 extraPtr += 1;
3352
3353 switch (vtype)
3354 {
3355 case 'A' :
3356 *(char*)extraPtr = (char)getInteger(i);
3357 // sprintf(extraPtr, "%d", getInteger(i));
3358 extraPtr += 1;
3359 break;
3360 case 'c' :
3361 *(int8_t*)extraPtr = (int8_t)getInteger(i);
3362 // sprintf(extraPtr, "%.4d", getInteger(i));
3363 extraPtr += 1;
3364 break;
3365 case 'C' :
3366 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
3367 // sprintf(extraPtr, "%.4d", getInteger(i));
3368 extraPtr += 1;
3369 break;
3370 case 's' :
3371 *(int16_t*)extraPtr = (int16_t)getInteger(i);
3372 // sprintf(extraPtr, "%.4d", getInteger(i));
3373 extraPtr += 2;
3374 break;
3375 case 'S' :
3376 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
3377 // sprintf(extraPtr, "%.4d", getInteger(i));
3378 extraPtr += 2;
3379 break;
3380 case 'i' :
3381 *(int32_t*)extraPtr = (int32_t)getInteger(i);
3382 // sprintf(extraPtr, "%.4d", getInteger(i));
3383 extraPtr += 4;
3384 break;
3385 case 'I' :
3386 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
3387 // sprintf(extraPtr, "%.4d", getInteger(i));
3388 extraPtr += 4;
3389 break;
3390 case 'Z' :
3391 sprintf(extraPtr, "%s", getString(i).c_str());
3392 extraPtr += getString(i).Length() + 1;
3393 break;
3394 case 'B' :
3395 extraPtr += setBtagBuffer(getString(i), extraPtr);
3396 //--TODO-- Set buffer with correct B tag
3397 //sprintf(extraPtr, "%s", getString(i).c_str());
3398 // extraPtr += getBtagBufferSize(getString(i));
3399 break;
3400 case 'f' :
3401 *(float*)extraPtr = getFloat(i);
3402 extraPtr += 4;
3403 break;
3404 default :
3405 myStatus.addError(SamStatus::FAIL_PARSE,
3406 "Unknown tag type.");
3407 status = false;
3408 break;
3409 }
3410 }
3411 }
3412 }
3413
3414 // Validate that the extra pointer is at the end of the allocated buffer.
3415 // If not then there was a problem.
3416 if(extraPtr != (char*)myRecordPtr + newBufferSize)
3417 {
3418 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
3419 myStatus.addError(SamStatus::FAIL_PARSE,
3420 "ERROR updating the buffer. Incorrect size.");
3421 status = false;
3422 }
3423
3424
3425 // The buffer tags are now in sync.
3426 myNeedToSetTagsInBuffer = false;
3427 myIsTagsBufferValid = true;
3428 return(status);
3429}
3430
3431
3432// Reset the variables for a newly set buffer. The buffer must be set first
3433// since this looks up the reference ids in the buffer to set the reference
3434// names.
3435void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
3436{
3437 // Lookup the reference name & mate reference name associated with this
3438 // record.
3439 myReferenceName =
3440 header.getReferenceLabel(myRecordPtr->myReferenceID);
3441 myMateReferenceName =
3442 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
3443
3444 // Clear the SAM Strings that are now not in-sync with the buffer.
3445 myReadName.SetLength(0);
3446 myCigar.SetLength(0);
3447 mySequence.SetLength(0);
3448 mySeqWithEq.clear();
3449 mySeqWithoutEq.clear();
3450 myQuality.SetLength(0);
3451 myNeedToSetTagsFromBuffer = true;
3452 myNeedToSetTagsInBuffer = false;
3453
3454 //Set that the buffer is valid.
3455 myIsBufferSynced = true;
3456 // Set that the variable length buffer fields are valid.
3457 myIsReadNameBufferValid = true;
3458 myIsCigarBufferValid = true;
3459 myPackedSequence = (unsigned char *)myRecordPtr->myData +
3460 myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int);
3461 myIsSequenceBufferValid = true;
3462 myBufferSequenceTranslation = NONE;
3463 myPackedQuality = myPackedSequence +
3464 (myRecordPtr->myReadLength + 1) / 2;
3465 myIsQualityBufferValid = true;
3466 myIsTagsBufferValid = true;
3467 myIsBinValid = true;
3468}
3469
3470
3471// Extract the vtype from the key.
3472void SamRecord::getTypeFromKey(int key, char& type) const
3473{
3474 // Extract the type from the key.
3475 type = (key >> 16) & 0xFF;
3476}
3477
3478
3479// Extract the tag from the key.
3480void SamRecord::getTag(int key, char* tag) const
3481{
3482 // Extract the tag from the key.
3483 tag[0] = key & 0xFF;
3484 tag[1] = (key >> 8) & 0xFF;
3485 tag[2] = 0;
3486}
3487
3488
3489// Index is the index into the strings array.
3490String & SamRecord::getString(int index)
3491{
3492 int value = extras[index];
3493
3494 return strings[value];
3495}
3496
3497int & SamRecord::getInteger(int offset)
3498{
3499 int value = extras[offset];
3500
3501 return integers[value];
3502}
3503
3504const char & SamRecord::getIntegerType(int offset) const
3505{
3506 int value = extras[offset];
3507
3508 return intType[value];
3509}
3510
3511float & SamRecord::getFloat(int offset)
3512{
3513 int value = extras[offset];
3514
3515 return floats[value];
3516 }
3517
3518
3519void SamRecord::appendIntArrayValue(char type, int value, String& strVal) const
3520{
3521 switch(type)
3522 {
3523 case 'A':
3524 strVal += (char)value;
3525 break;
3526 case 'c':
3527 case 's':
3528 case 'i':
3529 case 'C':
3530 case 'S':
3531 case 'I':
3532 strVal += value;
3533 break;
3534 default:
3535 // Do nothing.
3536 ;
3537 }
3538}
3539
3540
3541int SamRecord::getBtagBufferSize(String& tagStr)
3542{
3543 if(tagStr.Length() < 1)
3544 {
3545 // ERROR, needs at least the type.
3546 myStatus.addError(SamStatus::FAIL_PARSE,
3547 "SamRecord::getBtagBufferSize no tag subtype specified");
3548 return(0);
3549 }
3550 char type = tagStr[0];
3551 int elementSize = getNumericTagTypeSize(type);
3552 if(elementSize <= 0)
3553 {
3554 // ERROR, 'B' tag subtype must be numeric, so should be non-zero
3555 String errorMsg = "SamRecord::getBtagBufferSize invalid tag subtype, ";
3556 errorMsg += type;
3557 myStatus.addError(SamStatus::FAIL_PARSE, errorMsg.c_str());
3558 return(0);
3559 }
3560
3561 // Separated by ',', so count the number of commas.
3562 int numElements = 0;
3563 int index = tagStr.FastFindChar(',', 0);
3564 while(index > 0)
3565 {
3566 ++numElements;
3567 index = tagStr.FastFindChar(',', index+1);
3568 }
3569 // Add 5 bytes: type & numElements
3570 return(numElements * elementSize + 5);
3571}
3572
3573
3574int SamRecord::setBtagBuffer(String& tagStr, char* extraPtr)
3575{
3576 if(tagStr.Length() < 1)
3577 {
3578 // ERROR, needs at least the type.
3579 myStatus.addError(SamStatus::FAIL_PARSE,
3580 "SamRecord::getBtagBufferSize no tag subtype specified");
3581 return(0);
3582 }
3583 char type = tagStr[0];
3584 int elementSize = getNumericTagTypeSize(type);
3585 if(elementSize <= 0)
3586 {
3587 // ERROR, 'B' tag subtype must be numeric, so should be non-zero
3588 String errorMsg = "SamRecord::getBtagBufferSize invalid tag subtype, ";
3589 errorMsg += type;
3590 myStatus.addError(SamStatus::FAIL_PARSE, errorMsg.c_str());
3591 return(0);
3592 }
3593
3594 int totalInc = 0;
3595 // Write the type.
3596 *(char*)extraPtr = type;
3597 ++extraPtr;
3598 ++totalInc;
3599
3600 // Get the number of elements by counting ','s
3601 uint32_t numElements = 0;
3602 int index = tagStr.FastFindChar(',', 0);
3603 while(index > 0)
3604 {
3605 ++numElements;
3606 index = tagStr.FastFindChar(',', index+1);
3607 }
3608 *(uint32_t*)extraPtr = numElements;
3609 extraPtr += 4;
3610 totalInc += 4;
3611
3612 const char* stringPtr = tagStr.c_str();
3613 const char* endPtr = stringPtr + tagStr.Length();
3614 // increment past the subtype and ','.
3615 stringPtr += 2;
3616
3617 char* newPtr = NULL;
3618 while(stringPtr < endPtr)
3619 {
3620 switch(type)
3621 {
3622 case 'f':
3623 *(float*)extraPtr = (float)(strtod(stringPtr, &newPtr));
3624 break;
3625 case 'c':
3626 *(int8_t*)extraPtr = (int8_t)strtol(stringPtr, &newPtr, 0);
3627 break;
3628 case 's':
3629 *(int16_t*)extraPtr = (int16_t)strtol(stringPtr, &newPtr, 0);
3630 break;
3631 case 'i':
3632 *(int32_t*)extraPtr = (int32_t)strtol(stringPtr, &newPtr, 0);
3633 break;
3634 case 'C':
3635 *(uint8_t*)extraPtr = (uint8_t)strtoul(stringPtr, &newPtr, 0);
3636 break;
3637 case 'S':
3638 *(uint16_t*)extraPtr = (uint16_t)strtoul(stringPtr, &newPtr, 0);
3639 break;
3640 case 'I':
3641 *(uint32_t*)extraPtr = (uint32_t)strtoul(stringPtr, &newPtr, 0);
3642 break;
3643 default :
3644 myStatus.addError(SamStatus::FAIL_PARSE,
3645 "Unknown 'B' tag subtype.");
3646 break;
3647 }
3648 extraPtr += elementSize;
3649 totalInc += elementSize;
3650 stringPtr = newPtr + 1; // skip the ','
3651 }
3652 return(totalInc);
3653}
3654
3655
3656int SamRecord::getStringFromBtagBuffer(unsigned char* buffer,
3657 String& tagStr)
3658{
3659 tagStr.Clear();
3660
3661 int bufferSize = 0;
3662
3663 // 1st byte is the type.
3664 char type = *buffer;
3665 ++buffer;
3666 ++bufferSize;
3667 tagStr = type;
3668
3669 // 2nd-5th bytes are the size
3670 unsigned int numEntries = *(unsigned int *)buffer;
3671 buffer += 4;
3672 bufferSize += 4;
3673 // Num Entries is not included in the string.
3674
3675 int subtypeSize = getNumericTagTypeSize(type);
3676
3677 for(unsigned int i = 0; i < numEntries; i++)
3678 {
3679 tagStr += ',';
3680 switch(type)
3681 {
3682 case 'f':
3683 tagStr.appendFullFloat(*(float *)buffer);
3684 break;
3685 case 'c':
3686 tagStr += *(int8_t *)buffer;
3687 break;
3688 case 's':
3689 tagStr += *(int16_t *)buffer;
3690 break;
3691 case 'i':
3692 tagStr += *(int32_t *)buffer;
3693 break;
3694 case 'C':
3695 tagStr += *(uint8_t *)buffer;
3696 break;
3697 case 'S':
3698 tagStr += *(uint16_t *)buffer;
3699 break;
3700 case 'I':
3701 tagStr += *(uint32_t *)buffer;
3702 break;
3703 default :
3704 myStatus.addError(SamStatus::FAIL_PARSE,
3705 "Unknown 'B' tag subtype.");
3706 break;
3707 }
3708 buffer += subtypeSize;
3709 bufferSize += subtypeSize;
3710 }
3711 return(bufferSize);
3712}
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition InputFile.h:654
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition InputFile.h:600
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition InputFile.h:669
InputFile * IFILE
Define IFILE as a pointer to an InputFile object.
Definition InputFile.h:551
static const char UNKNOWN_QUALITY_CHAR
Character used when the quality is unknown.
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition Cigar.h:84
static bool isMatchOrMismatch(Operation op)
Return true if the specified operation is a match/mismatch operation, false if not.
Definition Cigar.h:298
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
Definition Cigar.h:219
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition Cigar.h:91
void getCigarString(String &cigarString) const
Set the passed in String to the string reprentation of the Cigar operations in this object.
Definition Cigar.cpp:52
HandlingType
This specifies how this class should respond to errors.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition InputFile.h:423
const char * getFileName() const
Get the filename that is currently opened.
Definition InputFile.h:473
This class allows a user to get/set the fields in a SAM/BAM Header.
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
int32_t getBlockSize()
Get the block size of the record (BAM format).
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition SamRecord.h:57
@ NONE
Leave the sequence as is.
Definition SamRecord.h:58
@ BASES
Translate '=' to the actual base.
Definition SamRecord.h:60
@ EQUAL
Translate bases that match the reference to '='.
Definition SamRecord.h:59
bool setReadName(const char *readName)
Set QNAME to the passed in name.
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
void clearTags()
Clear the tags in this record.
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
bool getTagsString(const char *tags, String &returnString, char delim='\t')
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE...
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
int & getInteger(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
SamRecord()
Default Constructor.
Definition SamRecord.cpp:34
static bool isIntegerType(char vtype)
Returns whether or not the specified vtype is an integer type.
bool rmTag(const char *tag, char type)
Remove a tag.
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name,...
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
bool getFloatTag(const char *tag, float &tagVal)
Get the float value for the specified tag.
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
static bool isFloatType(char vtype)
Returns whether or not the specified vtype is a float type.
SamStatus::Status setBuffer(const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
uint16_t getBin()
Get the BAM bin for the record.
bool isValid(SamFileHeader &header)
Returns whether or not the record is valid, setting the status to indicate success or failure.
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
void resetRecord()
Reset the fields of the record to a default value.
Definition SamRecord.cpp:91
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
uint16_t getFlag()
Get the flag (FLAG).
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
bool shiftIndelsLeft()
Shift the indels (if any) to the left by updating the CIGAR.
int * getIntegerTag(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure...
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
static bool isCharType(char vtype)
Returns whether or not the specified vtype is a char type.
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
int32_t getReadLength()
Get the length of the read.
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getCigar()
Returns the SAM formatted CIGAR string.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
const String & getString(const char *tag)
Get the string value for the specified tag.
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
~SamRecord()
Destructor.
Definition SamRecord.cpp:72
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
bool rmTags(const char *tags)
Remove tags.
static bool isStringType(char vtype)
Returns whether or not the specified vtype is a string type.
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
Status
Return value enum for StatGenFile methods.
@ FAIL_MEM
fail a memory allocation.
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
@ SUCCESS
method completed successfully.
@ FAIL_IO
method failed due to an I/O issue.
@ INVALID
invalid other than for sorting.
@ FAIL_PARSE
failed to parse a record/header - invalid format.
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...
Structure of a BAM record.
Definition SamRecord.h:34