18 #include "PedigreeGlobals.h" 26 int PedigreeGlobals::traitCount = 0;
27 int PedigreeGlobals::affectionCount = 0;
28 int PedigreeGlobals::covariateCount = 0;
29 int PedigreeGlobals::markerCount = 0;
30 int PedigreeGlobals::stringCount = 0;
33 bool PedigreeGlobals::chromosomeX =
false;
34 bool PedigreeGlobals::sexSpecificMap =
false;
47 int PedigreeGlobals::markerInfoCount = 0;
48 int PedigreeGlobals::markerInfoSize = 0;
50 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
51 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
54 int MarkerInfo::count = 0;
58 if ((*left)->chromosome != (*right)->chromosome)
59 return (*left)->chromosome - (*right)->chromosome;
61 double difference = (*left)->position - (*right)->position;
65 else if (difference == 0.0)
66 return (*left)->serial - (*right)->serial;
71 String MarkerInfo::GetAlleleLabel(
int allele)
73 if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
74 return alleleLabels[allele];
75 else if (alleleLabels.Length() <= allele)
76 alleleLabels.Dimension(allele + 1);
77 return alleleLabels[allele] = allele;
80 bool MarkerInfo::AdjustFrequencies()
82 if (freq.Length() <= 1)
89 error(
"Locus %s has negative allele frequencies\n", (
const char *) name);
91 double sum = freq.Sum();
94 error(
"Locus %s frequencies sum to %f, which doesn't make sense\n",
95 (
const char *) name, sum);
100 if (fabs(sum - 1.0) > 1.2e-5)
102 printf(
"Locus %s frequencies sum to %f, adjusted to 1.0\n",
103 (
const char *) name, sum);
110 void MarkerInfo::IndexAlleles()
112 if (alleleLabels.Length() >= 255)
113 error(
"Marker %s has more than 254 distinct alleles\n",
114 (
const char *) name);
116 alleleNumbers.Clear();
118 for (
int i = 1; i < alleleLabels.Length(); i++)
119 alleleNumbers.SetInteger(alleleLabels[i], i);
122 int MarkerInfo::NewAllele(
const String & label)
124 if (alleleLabels.Length() == 0)
125 alleleLabels.Push(
"");
127 if (alleleLabels.Length() >= 255)
128 error(
"Marker %s has more than 254 distinct alleles\n",
129 (
const char *) name);
131 alleleNumbers.SetInteger(label, alleleLabels.Length());
132 alleleLabels.Push(label);
134 return alleleLabels.Length() - 1;
137 int PedigreeGlobals::GetTraitID(
const char * name)
139 int idx = traitLookup.Integer(name);
141 if (idx != -1)
return idx;
143 traitNames.Add(name);
144 traitLookup.SetInteger(name, traitCount);
148 int PedigreeGlobals::GetAffectionID(
const char * name)
150 int idx = affectionLookup.Integer(name);
152 if (idx != -1)
return idx;
154 affectionNames.Add(name);
155 affectionLookup.SetInteger(name, affectionCount);
156 return affectionCount++;
159 int PedigreeGlobals::GetCovariateID(
const char * name)
161 int idx = covariateLookup.Integer(name);
163 if (idx != -1)
return idx;
165 covariateNames.Add(name);
166 covariateLookup.SetInteger(name, covariateCount);
167 return covariateCount++;
170 int PedigreeGlobals::GetStringID(
const char * name)
172 int idx = stringLookup.Integer(name);
174 if (idx != -1)
return idx;
176 stringNames.Add(name);
177 stringLookup.SetInteger(name, stringCount);
178 return stringCount++;
181 int PedigreeGlobals::GetMarkerID(
const char * name)
183 int idx = markerLookup.Integer(name);
185 if (idx != -1)
return idx;
187 markerNames.Add(name);
188 markerLookup.SetInteger(name, markerCount);
191 if (markerCount == 0)
195 for (
int i = 0; i < 16; i++)
196 markerInfoByInteger[i] = NULL;
198 else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
202 for (
int i = 0; i < markerCount; i++)
203 newKey[i] = markerInfoByInteger[i];
205 for (
int i = markerCount; i < markerCount * 2; i++)
208 delete [] markerInfoByInteger;
210 markerInfoByInteger = newKey;
213 return markerCount++;
220 if (info != NULL)
return info;
223 markerInfoByName.Add(name, info);
225 if (markerInfoCount >= markerInfoSize)
228 markerInfo[markerInfoCount++] = info;
230 int markerId = LookupMarker(name);
231 if (markerId >= 0) markerInfoByInteger[markerId] = info;
236 MarkerInfo * PedigreeGlobals::GetMarkerInfo(
int markerId)
238 if (markerId >= markerCount)
239 error(
"Attempted to retrieve MarkerInfo using out-of-bounds index\n");
241 if (markerInfoByInteger[markerId] != NULL)
242 return markerInfoByInteger[markerId];
244 return GetMarkerInfo(markerNames[markerId]);
247 void PedigreeGlobals::GrowMarkerInfo()
249 int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
255 memcpy(newArray, markerInfo,
sizeof(
MarkerInfo *) * markerInfoSize);
256 delete [] markerInfo;
259 markerInfo = newArray;
260 markerInfoSize = newSize;
263 void PedigreeGlobals::FlagMissingMarkers(
IntArray & missingMarkers)
265 int skipped_markers = 0;
267 if (missingMarkers.Length())
271 printf(
"These markers couldn't be placed and won't be analysed:");
273 for (
int i = 0; i < missingMarkers.Length(); i++)
274 names.Push(GetMarkerInfo(missingMarkers[i])->name);
277 for (
int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
279 if (line + names[i].Length() + 1 > 79)
280 printf(
"\n "), line = 3, lines++;
284 printf(
"%s ", (
const char *) names[i]);
285 line += names[i].Length() + 1;
292 printf(
"as well as %d other unlisted markers...", skipped_markers);
298 void PedigreeGlobals::GetOrderedMarkers(
IntArray & markers)
300 if (markers.Length() == 0)
302 markers.Dimension(markerCount);
303 markers.SetSequence(0, 1);
311 for (
int i = 0; i < markers.Length(); i++)
313 MarkerInfo * info = GetMarkerInfo(markers[i]);
315 if (info->chromosome != -1)
316 subset[count++] = info;
318 missingMarkers.Push(i);
321 FlagMissingMarkers(missingMarkers);
323 QuickSort(subset, count,
sizeof(
MarkerInfo *),
324 COMPAREFUNC MarkerInfo::ComparePosition);
327 for (
int i = 0; i < count; i++)
328 markers.Push(GetMarkerID(subset[i]->name));
331 int PedigreeGlobals::SortMarkersInMapOrder(
IntArray & markers,
int chromosome)
333 if (markers.Length() == 0)
335 markers.Dimension(markerCount);
336 markers.SetSequence(0, 1);
344 for (
int i = 0; i < markers.Length(); i++)
346 MarkerInfo * info = GetMarkerInfo(markers[i]);
348 if (info->chromosome != -1)
349 subset[count++] = info;
350 else if (chromosome == -1)
351 missingMarkers.Push(i);
354 if (chromosome == -1)
355 FlagMissingMarkers(missingMarkers);
357 QuickSort(subset, count,
sizeof(
MarkerInfo *),
358 COMPAREFUNC MarkerInfo::ComparePosition);
362 int current_chromosome = -1, next_chromosome = 0;
364 for (
int i = 0; i < count; i++)
365 if (subset[i]->chromosome < chromosome)
367 else if (current_chromosome == -1 ||
368 subset[i]->chromosome == current_chromosome)
370 markers.Push(GetMarkerID(subset[i]->name));
371 current_chromosome = subset[i]->chromosome;
373 else if (!next_chromosome)
375 next_chromosome = subset[i]->chromosome;
381 return next_chromosome;
384 void PedigreeGlobals::VerifySexSpecificOrder()
386 if (markerCount <= 1)
391 for (
int i = 0; i < markerCount; i++)
392 sortedMarkers[i] = GetMarkerInfo(i);
394 QuickSort(sortedMarkers, markerCount,
sizeof(
MarkerInfo *),
395 COMPAREFUNC MarkerInfo::ComparePosition);
397 double prev_female = sortedMarkers[0]->positionFemale;
398 double prev_male = sortedMarkers[0]->positionMale;
399 double curr_female, curr_male;
401 int prev_chromosome = sortedMarkers[0]->chromosome;
404 for (
int i = 1; i < markerCount; i++)
406 curr_chromosome = sortedMarkers[i]->chromosome;
407 curr_female = sortedMarkers[i]->positionFemale;
408 curr_male = sortedMarkers[i]->positionMale;
410 if (curr_chromosome == prev_chromosome &&
411 (curr_female < prev_female || curr_male < prev_male))
412 error(
"Sex-specific and sex-averaged maps are inconsistent.\n\n" 413 "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n" 414 "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
415 (
const char *) sortedMarkers[i]->name,
416 sortedMarkers[i]->position * 100,
417 (
const char *) sortedMarkers[i-1]->name,
418 sortedMarkers[i-1]->position * 100,
419 curr_female < prev_female ?
"fe" :
"",
420 (
const char *) sortedMarkers[i]->name,
421 (curr_female < prev_female ? curr_female : curr_male) * 100,
422 (
const char *) sortedMarkers[i-1]->name,
423 (curr_female < prev_female ? prev_female : prev_male) * 100);
425 prev_chromosome = curr_chromosome;
426 prev_female = curr_female;
427 prev_male = curr_male;
430 delete [] sortedMarkers;
433 void PedigreeGlobals::LoadAlleleFrequencies(
const char * filename,
bool required)
437 if (filename[0] == 0)
440 error(
"No name provided for required allele freuquency file\n");
452 error(
"Failed to open required allele frequency file '%s'",
453 (
const char *) filename);
458 LoadAlleleFrequencies(f);
462 void PedigreeGlobals::LoadAlleleFrequencies(
IFILE & input)
469 bool need_blank_line =
false;
470 int allele_size, old_max, next_allele = 0;
472 while (!
ifeof(input) && !done)
476 buffer.ReadLine(input);
479 tokens.AddTokens(buffer, WHITESPACE);
481 if (tokens.Length() < 1)
continue;
483 switch (toupper(tokens[0][0]))
486 if (tokens.Length() == 1)
487 error(
"Unnamed marker in allele frequency file");
489 need_blank_line |= info->AdjustFrequencies();
490 info = GetMarkerInfo(tokens[1]);
492 info->freq.Push(0.0);
497 for (i = 1; i < tokens.Length(); i++)
499 buffer = next_allele++;
501 int allele = LoadAllele(info, buffer);
503 if (allele >= info->freq.Length())
505 old_max = info->freq.Length();
506 info->freq.Dimension(allele + 1);
507 for (j = old_max; j < allele; j++)
511 info->freq[allele] = tokens[i].AsDouble();
515 if (info == NULL)
continue;
517 if (tokens.Length() != 3)
518 error(
"Error reading named allele frequencies for locus %s\n" 519 "Lines with named alleles should have the format\n" 520 " A allele_label allele_frequency\n\n" 521 "But the following line was read:\n%s\n",
522 (
const char *) info->name, (
const char *) buffer);
524 allele_size = LoadAllele(info, tokens[1]);
525 next_allele = atoi(tokens[1]) + 1;
528 error(
"Error reading named allele frequencies for locus %s\n" 529 "An invalid allele label was encountered\n",
530 (
const char *) info->name);
532 if (allele_size >= info->freq.Length())
534 old_max = info->freq.Length();
535 info->freq.Dimension(allele_size + 1);
536 for (i = old_max; i < allele_size; i++)
540 info->freq[allele_size] = tokens[2];
546 error(
"Problem in allele frequency file.\n" 547 "Lines in this file should be of two types:\n" 548 " -- Marker name lines begin with an M\n" 549 " -- Frequency lines begin with an F\n\n" 550 "However the following line is different:\n%s\n",
551 (
const char *) buffer);
556 need_blank_line |= info->AdjustFrequencies();
558 if (need_blank_line) printf(
"\n");
561 void PedigreeGlobals::LoadMarkerMap(
const char * filename,
bool filter)
564 if (f == NULL)
return;
565 LoadMarkerMap(f, filter);
569 void PedigreeGlobals::LoadMarkerMap(
IFILE & input,
bool filter)
573 bool first_pass =
true;
575 while (!
ifeof(input))
577 buffer.ReadLine(input);
580 tokens.AddTokens(buffer, WHITESPACE);
582 if (tokens.Length() < 1)
continue;
586 sexSpecificMap = (tokens.Length() == 5);
594 if (tokens.Length() != 3 && !sexSpecificMap)
595 error(
"Error reading map file\n" 596 "Each line in this file should include 3 fields:\n" 597 "CHROMOSOME, MARKER_NAME, and POSITION\n" 598 "However the following line has %d fields\n%s\n",
599 tokens.Length(), (
const char *) buffer);
601 if (tokens.Length() != 5 && sexSpecificMap)
602 error(
"Error reading map file\n" 603 "Each line in this file should include 5 fields:\n\n" 604 "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n" 605 "However the following line has %d fields\n%s\n",
606 tokens.Length(), (
const char *) buffer);
608 bool previous_state = String::caseSensitive;
609 String::caseSensitive =
false;
611 if ((tokens[0] ==
"CHR" || tokens[0] ==
"CHROMOSOME") &&
612 (tokens[1] ==
"MARKER" || tokens[1] ==
"MARKER_NAME" || tokens[1] ==
"MRK") &&
613 (tokens[2] ==
"KOSAMBI" || tokens[2] ==
"POS" || tokens[2] ==
"POSITION" ||
614 tokens[2] ==
"SEX_AVERAGED_POS" || tokens[2] ==
"CM" || tokens[2] ==
"HALDANE"))
617 String::caseSensitive = previous_state;
620 if (LookupMarker(tokens[1]) < 0)
625 int chr = (tokens[0][0] ==
'x' || tokens[0][0] ==
'X') ? 999 : (
int) tokens[0];
627 info->chromosome = chr;
628 info->position = (double) tokens[2] * 0.01;
634 double female = strtod(tokens[3], &flag);
636 error(
"In the map file, the female cM position for marker\n" 637 "%s is %s. This is not a valid number.",
638 (
const char *) tokens[1], (
const char *) tokens[3]);
640 double male = strtod(tokens[4], &flag);
642 error(
"In the map file, the male cM position for marker\n" 643 "%s is %s. This is not a valid number.",
644 (
const char *) tokens[1], (
const char *) tokens[4]);
646 info->positionFemale = (double) female * 0.01;
647 info->positionMale = (double) male * 0.01;
650 info->positionFemale = info->positionMale = info->position;
653 if (sexSpecificMap) VerifySexSpecificOrder();
656 void PedigreeGlobals::LoadBasepairMap(
const char * filename)
660 error(
"The map file [%s] could not be opened\n\n" 661 "Please check that the filename is correct and that the file is\n" 662 "not being used by another program", filename);
667 void PedigreeGlobals::LoadBasepairMap(
IFILE & input)
672 sexSpecificMap =
false;
674 while (!
ifeof(input))
676 buffer.ReadLine(input);
679 tokens.AddTokens(buffer, WHITESPACE);
681 if (tokens.Length() < 1)
continue;
683 if (tokens.Length() != 3)
684 error(
"Error reading map file\n" 685 "Each line in this file should include 3 fields:\n" 686 "CHROMOSOME, MARKER_NAME, and POSITION\n" 687 "However the following line has %d fields\n%s\n",
688 tokens.Length(), (
const char *) buffer);
690 bool previous_state = String::caseSensitive;
691 String::caseSensitive =
false;
693 if ((tokens[0] ==
"CHR" || tokens[0] ==
"CHROMOSOME") &&
694 (tokens[1] ==
"MARKER" || tokens[1] ==
"MARKER_NAME" || tokens[1] ==
"MRK") &&
695 (tokens[2] ==
"BASEPAIR" || tokens[2] ==
"POS" || tokens[2] ==
"POSITION"))
698 String::caseSensitive = previous_state;
702 int chr = (tokens[0][0] ==
'x' || tokens[0][0] ==
'X') ? 999 : (
int) tokens[0];
704 info->chromosome = chr;
705 info->position = (double) tokens[2];
709 int PedigreeGlobals::instanceCount = 0;
711 PedigreeGlobals::~PedigreeGlobals()
713 if (--instanceCount == 0 && markerInfoSize)
715 for (
int i = 0; i < markerInfoCount; i++)
716 delete markerInfo[i];
717 delete [] markerInfo;
718 delete [] markerInfoByInteger;
722 void PedigreeGlobals::WriteMapFile(
const char * filename)
724 if (!MarkerPositionsAvailable())
727 FILE * output = fopen(filename,
"wt");
730 error(
"Creating map file \"%s\"", filename);
732 WriteMapFile(output);
736 void PedigreeGlobals::WriteMapFile(FILE * output)
739 fprintf(output,
"CHR MARKER POS\n");
741 fprintf(output,
"CHR MARKER POS POSF POSM\n");
743 for (
int i = 0; i < markerInfoCount; i++)
745 if (markerInfo[i]->chromosome != -1)
748 fprintf(output,
"%3d %-10s %g\n",
749 markerInfo[i]->chromosome,
750 (
const char *) markerInfo[i]->name,
751 markerInfo[i]->position * 100.0);
753 fprintf(output,
"%3d %-10s %g %g %g\n",
754 markerInfo[i]->chromosome,
755 (
const char *) markerInfo[i]->name,
756 markerInfo[i]->position * 100.0,
757 markerInfo[i]->positionFemale * 100.0,
758 markerInfo[i]->positionMale * 100.0);
763 void PedigreeGlobals::WriteFreqFile(
const char * filename,
bool old_format)
765 FILE * output = fopen(filename,
"wt");
768 error(
"Creating allele frequency file \"%s\"", filename);
770 WriteFreqFile(output, old_format);
774 void PedigreeGlobals::WriteFreqFile(FILE * output,
bool old_format)
776 for (
int i = 0; i < markerInfoCount; i++)
780 if (info->freq.Length() == 0)
continue;
782 fprintf(output,
"M %s\n", (
const char *) info->name);
784 if (old_format && info->alleleLabels.Length() == 0)
785 for (
int j = 1; j < info->freq.Length(); j++)
786 fprintf(output,
"%s%.5f%s",
787 j % 7 == 1 ?
"F " :
"", info->freq[j],
788 j == info->freq.Length() - 1 ?
"\n" : j % 7 == 0 ?
"\n" :
" ");
790 for (
int j = 1; j < info->freq.Length(); j++)
791 if (info->freq[j] > 1e-7)
792 fprintf(output,
"A %5s %.5f\n",
793 (
const char *) info->GetAlleleLabel(j), info->freq[j]);
797 bool PedigreeGlobals::MarkerPositionsAvailable()
799 for (
int i = 0; i < markerInfoCount; i++)
800 if (markerInfo[i]->chromosome != -1)
806 bool PedigreeGlobals::AlleleFrequenciesAvailable()
808 for (
int i = 0; i < markerInfoCount; i++)
809 if (markerInfo[i]->freq.Length() > 1)
815 int PedigreeGlobals::LoadAllele(
int marker,
String & token)
817 return LoadAllele(GetMarkerInfo(marker), token);
822 int allele = info->GetAlleleNumber(token);
824 if (allele >= 0)
return allele;
826 static unsigned char lookup[128];
827 static bool init =
false;
833 for (
int i = 0; i < 128; i++)
836 for (
int i =
'1'; i <=
'9'; i++)
839 lookup[int(
'a')] = lookup[int(
'A')] = lookup[int(
'c')] = lookup[int(
'C')] = 2;
840 lookup[int(
'g')] = lookup[int(
'G')] = lookup[int(
't')] = lookup[int(
'T')] = 2;
843 int first = token[0];
844 bool goodstart = first > 0 && first < 128;
846 if (token.Length() == 1 && goodstart && lookup[int(token[0])])
847 return info->NewAllele(token);
849 if (!goodstart || lookup[
int(token[0])] != 1)
852 int integer = token.AsInteger();
855 allele = info->GetAlleleNumber(token);
860 if (integer <= 0)
return 0;
862 if (integer > 1000000)
864 static bool warn_user =
true;
868 printf(
"Some allele numbers for marker %s are > 1000000\n" 869 "All allele numbers >1000000 will be treated as missing\n\n",
870 (
const char *) info->name);
877 return info->NewAllele(token);
882 stream <<
"MarkerInfo for marker " << m.name << std::endl;
883 stream <<
" located on chromsome " << m.chromosome <<
":" << (int64_t)(100 * m.position) << std::endl;
884 stream <<
" allele count = " << m.freq.Length() << std::endl;
885 stream <<
" label count = " << m.alleleLabels.Length() << std::endl;
886 if (m.freq.Length() == m.alleleLabels.Length())
888 for (
int i=0; i<m.freq.Length(); i++)
890 stream <<
" " << m.alleleLabels[i] <<
":" << m.freq[i];
896 stream <<
" output truncated - counts appear corrupt." << std::endl;