Author: Laszlo Kajan <lkajan@rostlab.org>
Description: allow parsing of Stockholm format as well
Forwarded: no
--- a/score_conservation.py
+++ b/score_conservation.py
@@ -84,6 +84,7 @@
 ################################################################################
 
 import math, sys, getopt
+import re
 # numarray imported below
 
 PSEUDOCOUNT = .0000001
@@ -98,7 +99,7 @@
 
 
 def usage():
-    print """\nUSAGE:\nscore_conservation [options] alignfile\n\t -alignfile must be in fasta or clustal format.\n\nOPTIONS:\n\t
+    print """\nUSAGE:\nscore_conservation [options] alignfile\n\t -alignfile must be in fasta, Stockholm or clustal format.\n\nOPTIONS:\n\t
     -a\treference sequence. Print scores in reference to a specific sequence (ignoring gaps). Default prints the entire column. [sequence name]\n\t
     -b\tlambda for window heuristic linear combination. Default=.5 [real in [0,1]]\n
     -d\tbackground distribution file, e.g., swissprot.distribution. Default=BLOSUM62 background [filename]\n\t
@@ -679,12 +680,14 @@
     return names, alignment
 	
 def read_clustal_alignment(filename):
-    """ Read in the alignment stored in the CLUSTAL file, filename. Return
+    """ Read in the alignment stored in the CLUSTAL or Stockholm file, filename. Return
     two lists: the names and sequences. """
 
     names = []
     alignment = []
 
+    re_stock_markup = re.compile('^#=')
+
     f = open(filename)
 
     for line in f:
@@ -692,16 +695,21 @@
 	if len(line) == 0: continue
 	if '*' in line: continue
 
-	if 'CLUSTAL' in line: continue
+	if line[0:7] == 'CLUSTAL': continue
+	if line[0:11] == '# STOCKHOLM': continue
+	if line[0:2] == '//': continue
+
+	if re_stock_markup.match(line): continue
 
 	t = line.split()
 
 	if len(t) == 2 and t[1][0] in iupac_alphabet:
+	    ali = t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', '').replace('.', '-')
 	    if t[0] not in names:
 		names.append(t[0])
-		alignment.append(t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', ''))
+		alignment.append(ali)
 	    else:
-		alignment[names.index(t[0])] += t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X','-').replace('\r', '')
+		alignment[names.index(t[0])] += ali
 		   
     return names, alignment
 
