Package core :: Module ReadAndWrite
[hide private]
[frames] | no frames]

Source Code for Module core.ReadAndWrite

  1  #!/usr/bin/env python
 
  2  '''
 
  3  Created 2012
 
  4  
 
  5  Contains various help functions which read or produce an input/ output
 
  6  
 
  7  
 
  8  @author: Sven Giese
 
  9  ''' 
 10  import os 
 11  import random 
 12  import HTSeq 
 13  
 
 14  
 
15 -def readdna(filename):
16 """ 17 Reads in the dna sequence of the given fasta 18 19 @type filename: string 20 @param filename: Fasta-file used as input. 21 @rtype: HTSeq Sequence object 22 @return: Reference Fasta. 23 """ 24 chr = HTSeq.FastaReader(filename) 25 for fasta in chr: 26 referenz = HTSeq.Sequence(fasta.seq,fasta.name) 27 return(referenz)
28 29
30 -def writefile(sequenceObject,filename):
31 """ 32 Writes a given sequence object to a fasta file. 33 34 @type sequenceObject: HTSeq Sequence object 35 @param sequenceObject: Reference sequence as fasta. 36 """ 37 38 outfasta = open(filename,"w") 39 sequenceObject.write_to_fasta_file(outfasta) 40 outfasta.close()
41 42
43 -def writeoverview(Ndic_G,aadic_G,Ndic_AR,aadic_AR,filename):
44 """ 45 Creates the "delta" file for the comparison of the two chromosoms. This file contains the differences in nucleotide distribution between reference and artificial. 46 input: nucleotid dictionary genom, aa dictionary genome, nucleotid dictionary artificial chromosom, aa dictionary, filename 47 48 @type Ndic_G: dictionary 49 @param Ndic_G: Nucleotid dictionary genom. 50 @type aadic_G: dictionary 51 @param aadic_G: AA dictionary genome. 52 @type Ndic_AR: dictionary 53 @param Ndic_AR: Nucleotid dictionary artificial. 54 @type aadic_AR: dictionary 55 @param aadic_AR: AA dictionary artificial 56 @type filename: string 57 @param filename: Output filename. 58 """ 59 fobj = open(filename,"w") 60 fobj.write("NUC /AA \t Genom \t Artificial Reference \t Delta \n") 61 62 sum1 =0 63 sum2= 0 64 for item in Ndic_G.keys(): 65 fobj.write(item +"\t"+str(Ndic_G[item])+"\t"+str(Ndic_AR[item])+"\t"+str(Ndic_G[item]-Ndic_AR[item])+"\n") 66 sum1 +=abs(Ndic_G[item]-Ndic_AR[item]) 67 fobj.write(str(sum1)+"\n") 68 69 for item in aadic_G.keys(): 70 fobj.write(item +"\t"+str(aadic_G[item])+"\t"+str(aadic_AR[item])+"\t"+str(aadic_G[item]-aadic_AR[item])+"\n") 71 sum2 +=abs(aadic_G[item]-aadic_AR[item]) 72 fobj.write(str(sum2)+"\n")
73 74 75 76
77 -def nucleotide_dist_seq(seq,txt_file,shallwrite):
78 """ 79 Writes the nucleotide distribution in a file and returns the dictionary. adjust s for % results. 80 @type seq: string 81 @param seq: Nucleotide sequence. 82 @type txt_file: string 83 @param txt_file: Output compare file. 84 @type shallwrite: Bool 85 @param shallwrite: Decides if percentages values are written to the output. 86 """ 87 Nndic={"A":0,"C":0,"G":0,"T":0,"N":0} 88 89 for i in range(0,len(seq)): 90 Nndic[seq[i]]+=1 91 s=len(seq) 92 s=1 93 94 if (shallwrite==1): 95 output_file=open(txt_file,'w') 96 for item in Nndic.keys(): 97 Nndic[item]=Nndic[item]/float(s) 98 output_file.write(item + "\t" + str(Nndic[item])+"\n") 99 100 output_file.close() 101 else: 102 for item in Nndic.keys(): 103 Nndic[item]=Nndic[item]/float(s) 104 return (Nndic) #N can be used for checking: should be the same number in real
105 # and artificial chromosome 106 107
108 -def aa_dist_seq(seq,txt_file,shallwrite):
109 """ 110 Writes the AA distribution in a file and returns the dictionary. adjust s for % results. 111 @type seq: string 112 @param seq: Nucleotide sequence. 113 @type txt_file: string 114 @param txt_file: Output compare file. 115 @type shallwrite: Bool 116 @param shallwrite: Write output in percentages.. 117 """ 118 aadic = {"A":0,"R":0,"N":0,"D":0,"C":0,"E":0,"Q":0,"G":0,"H":0,"I":0,"L":0,"K":0,"M":0,"F":0,"P":0,"S":0,"T":0,"W":0,"Y":0,"V":0,"*":0} 119 for i in range(0,len(seq)): 120 121 '''escape 'n' Sequences ''' 122 if (seq[i] in aadic): 123 aadic[seq[i]]+=1 124 else: 125 continue 126 127 128 n = len(seq) 129 n=1 130 if (shallwrite==1): 131 output_file=open(txt_file,'w') 132 for item in aadic.keys(): 133 aadic[item]=aadic[item]/float(n) 134 output_file.write(item + "\t" + str(aadic[item])+"\n") 135 136 output_file.close() 137 else: 138 for item in aadic.keys(): 139 aadic[item]=aadic[item]/float(n) 140 141 return (aadic)
142 143 ''' 144 input: DNA Sequence, outputfilename and 1/0 for writing/not writing outputfile ''' 145
146 -def nucleotide_dist_file(file_fasta,txt_file):
147 """ 148 Writes the DNA distribution in a file and returns the dictionary. adjust n for % results 149 150 @type file_fasta: string 151 @param file_fasta: DNA Sequence 152 @type txt_file: string 153 @param txt_file: Filename for output. 154 """ 155 input_file=open(file_fasta,'r') 156 output_file=open(txt_file,'a') 157 seq='' 158 for line in input_file: 159 if line[0]!='>': 160 line=line.rstrip() 161 seq+=line 162 output_file.write(str(nucleotide_dist_seq(seq))) 163 output_file.write('\n') 164 output_file.close() 165 input_file.close()
166 167 168 '''gets the number of missmatches between 2 sequences 169 input: orig sequence, decoy sequence '''
170 -def gethammingdistance(original,artificial):
171 """ 172 Calculates the hamming distances between two sequences. 173 @type original: list 174 @param original: Nucleotide sequence from the reference. 175 @type artificial: list 176 @param artificial: Nucleotide sequence from the artificial reference. 177 """ 178 hamming = 0 179 not_hamming=0 180 for i in range(0,len(original)): 181 if (original[i]!=artificial[i]): 182 hamming +=1 183 184 else: 185 not_hamming+=1 186 print ("#hamming distance REF-ART\t"+ str(hamming)) 187 print ("avg. distance:\t" + str(len(original)/float(hamming))) 188 print("###########################\r\n")
189