Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: 2to3
--- hisat2.orig/evaluation/build_indexes.py
+++ hisat2/evaluation/build_indexes.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 use_message = '''
@@ -47,7 +47,7 @@
                 assert False
             else:
                 assert False
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
             os.chdir("..")
 
--- hisat2.orig/evaluation/generate_reads.py
+++ hisat2/evaluation/generate_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, random
 from argparse import ArgumentParser, FileType
@@ -18,7 +18,7 @@
     assert len(random_list) == len(reads)
     for i in random_list:
         read = reads[random_list[i]]
-        print >> read_file_out, "\n".join(read)
+        print("\n".join(read), file=read_file_out)
     read_file_out.close()
 
 
@@ -73,11 +73,11 @@
     data_dir_base = "../../../data"
 
     def generate_reads(cmd):
-        print >> sys.stderr, cmd
+        print(cmd, file=sys.stderr)
         os.system(cmd)
 
         random.seed(0)
-        print >> sys.stderr, "shuffle reads sim_1.fa and sim_2.fa"
+        print("shuffle reads sim_1.fa and sim_2.fa", file=sys.stderr)
         shuffle_pairs("sim_1.fa", "sim_2.fa")
         shuffle_reads_cmd = " mv sim_1.fa.shuffle sim_1.fa"
         shuffle_reads_cmd += "; mv sim_2.fa.shuffle sim_2.fa"
--- hisat2.orig/evaluation/get_data.py
+++ hisat2/evaluation/get_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 from argparse import ArgumentParser, FileType
@@ -21,7 +21,7 @@
         if os.path.exists(file):
             continue
         wget_cmd = "wget %s/data/%s" % (data_root, file)
-        print >> sys.stderr, wget_cmd
+        print(wget_cmd, file=sys.stderr)
         os.system(wget_cmd)
     os.chdir("..")
 
@@ -42,7 +42,7 @@
                 continue
             cmd = "wget %s/indexes/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \
                 (data_root, aligner_dir, aligner_dir, aligner_dir)
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
     os.chdir("..")
 
@@ -83,7 +83,7 @@
                 continue
             cmd = "wget %s/reads/%s/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \
                 (data_root, type, file, file, file)
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
         os.chdir("..")
     
--- hisat2.orig/evaluation/get_programs.py
+++ hisat2/evaluation/get_programs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 use_message = '''
@@ -112,7 +112,7 @@
             cmd += "; source ./source_me.sh; make; cp bin/%s ../bin; cd .." % (program)
         else:
             assert False
-        print >> sys.stderr, cmd
+        print(cmd, file=sys.stderr)
         os.system(cmd)
 
     files = ["hisat2", "hisat2-align-s", "hisat2-build", "hisat2-build-s", "hisat2-inspect", "hisat2-inspect-s", "extract_splice_sites.py", "hisat2_extract_snps_haplotypes_UCSC.py", "hisat2_simulate_reads.py"]
--- hisat2.orig/evaluation/real/calculate_read_cost.py
+++ hisat2/evaluation/real/calculate_read_cost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess, signal
 import multiprocessing
@@ -86,7 +86,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -111,7 +111,7 @@
             data = int(data)
         snps[chr].append([snpID, type, int(pos), data])
 
-    print >> sys.stderr, "snp is loaded"
+    print("snp is loaded", file=sys.stderr)
 
     return snps
 
@@ -158,7 +158,7 @@
     gtf_file.close()
     
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -170,7 +170,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]]))
 
@@ -411,7 +411,7 @@
             high = mid - 1
 
     snps = []
-    for i in xrange(low, len(chr_snps)):
+    for i in range(low, len(chr_snps)):
         snp = chr_snps[i]
         snpID, type, pos, data = snp
         pos2 = pos
@@ -539,7 +539,7 @@
 
             if cigar_op == "S":
                 if i != 0 and i != len(cigars) - 1:
-                    print >> sys.stderr, "S is located at %dth out of %d %s" % (i+1, len(cigars), cigar_str)
+                    print("S is located at %dth out of %d %s" % (i+1, len(cigars), cigar_str), file=sys.stderr)
 
             if cigar_op in "MS":
                 ref_pos = right_pos
@@ -596,8 +596,8 @@
         if hisat2:
             XM, NM = HISAT2_XM, HISAT2_NM
         if NM < MAX_EDIT:
-            print >> temp_read_file, "%s\t%d\t%s\t%s\t%s\tXM:i:%d\tNM:i:%d" % \
-                  (read_id, flag, chr, pos, cigar_str, XM, NM)
+            print("%s\t%d\t%s\t%s\t%s\tXM:i:%d\tNM:i:%d" % \
+                  (read_id, flag, chr, pos, cigar_str, XM, NM), file=temp_read_file)
 
             found = False
             me = "%s\t%s\t%d" % (read_id, chr, pos)
@@ -611,8 +611,8 @@
                             flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr_str, mate_pos, mate_cigar_str, mate_XM, mate_NM = \
                                   mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM
 
-                        print >> temp_pair_file, "%s\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d" % \
-                              (read_id, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM)
+                        print("%s\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d" % \
+                              (read_id, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM), file=temp_pair_file)
                         found = True
                         break
 
@@ -764,7 +764,7 @@
 
 def is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM):
     concord_length = 1000
-    segment_length = sys.maxint
+    segment_length = sys.maxsize
 
     pairs = {}
     pairs[0] = [flag, chr, pos, cigar_str, XM, NM]
@@ -819,7 +819,7 @@
 
         # check concordantly
         concord_align, segment_len = is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM)
-        print >> (con_file if concord_align else discon_file), line.strip(), ('none', 'first')[(flag & 0x40 == 0x40)], ('none', 'last')[(mate_flag & 0x80 == 0x80)], segment_len
+        print(line.strip(), ('none', 'first')[(flag & 0x40 == 0x40)], ('none', 'last')[(mate_flag & 0x80 == 0x80)], segment_len, file=(con_file if concord_align else discon_file))
 
         if junction_pair:
             for junction_str, is_gtf_junction in pair_junctions:
@@ -911,7 +911,7 @@
 
 def create_sql_db(sql_db):
     if os.path.exists(sql_db):
-        print >> sys.stderr, sql_db, "already exists!"
+        print(sql_db, "already exists!", file=sys.stderr)
         return
 
     columns = [
@@ -961,14 +961,14 @@
     database_fname = database_name + "_" + paired + ".analysis"
     database_file = open(database_fname, "w")
 
-    print >> database_file, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime"
+    print("aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime", file=database_file)
     for aligner in aligners:
         for edit_distance in range(MAX_EDIT):
             sql_row = "SELECT aligner, use_annotation, end_type, edit_distance, mapped_reads, junction_reads, gtf_junction_reads, junctions, gtf_junctions, runtime FROM Mappings"
             sql_row += " WHERE reads = '%s' and aligner = '%s' and edit_distance = %d and end_type = '%s' ORDER BY created DESC LIMIT 1" % (database_name, aligner, edit_distance, paired)
             output = sql_execute(sql_db, sql_row)
             if output:
-                print >> database_file, output
+                print(output, file=database_file)
             
     database_file.close()
 
@@ -1072,7 +1072,7 @@
         gtf_junctions.append(junction)
     gtf_junctions = sorted(gtf_junctions, cmp=junction_cmp)            
 
-    print >> sys.stderr, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime"
+    print("aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime", file=sys.stderr)
     
     for paired in [False, True]:
         if not paired and not single_end:
@@ -1219,7 +1219,7 @@
                 if version != "":
                     version = int(version)
                 else:
-                    version = sys.maxint
+                    version = sys.maxsize
 
                 if not RNA:
                     cmd += ["--no-spliced-alignment"]
@@ -1441,7 +1441,7 @@
                         dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fq", "../two.fq", "/dev/null")
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd)
+                            print(start_time, "\t", " ".join(dummy_cmd), file=sys.stderr)
                         if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                             proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE)
                         else:
@@ -1451,7 +1451,7 @@
                         duration = finish_time - start_time
                         duration = duration.total_seconds()
                         if verbose:
-                            print >> sys.stderr, finish_time, "duration:", duration
+                            print(finish_time, "duration:", duration, file=sys.stderr)
                         loading_time = duration
 
                 # align all reads
@@ -1459,7 +1459,7 @@
                     sweep_read_cmd = "cat ../%s ../%s > /dev/null" % (type_read1_fname, type_read2_fname)
                 else:
                     sweep_read_cmd = "cat ../%s > /dev/null" % (type_read1_fname)
-                print >> sys.stderr, datetime.now(), "\t", sweep_read_cmd
+                print(datetime.now(), "\t", sweep_read_cmd, file=sys.stderr)
                 os.system(sweep_read_cmd)
 
                 skip_alignment = False
@@ -1470,7 +1470,7 @@
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd)
+                        print(start_time, "\t", " ".join(aligner_cmd), file=sys.stderr)
                     if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2"]:
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     else:
@@ -1483,14 +1483,14 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, finish_time, "duration:", duration
+                        print(finish_time, "duration:", duration, file=sys.stderr)
 
                     if verbose:
-                        print >> sys.stderr, finish_time, "Memory Usage: %dMB" % (int(mem_usage) / 1024)
+                        print(finish_time, "Memory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
 
                     if debug and aligner == "hisat" and type == "x1":
                         os.system("cat metrics.out")
-                        print >> sys.stderr, "\ttime: %.4f" % (duration)
+                        print("\ttime: %.4f" % (duration), file=sys.stderr)
                         # break
 
                 if aligner == "star" and type in ["", "gtf"]:
@@ -1498,7 +1498,7 @@
                 elif aligner in ["hisat2", "hisat"] and type == "x2":
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                     if verbose:
-                        print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd)
+                        print(start_time, "\t", " ".join(aligner_cmd), file=sys.stderr)
                     start_time = datetime.now()
                     proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     proc.communicate()
@@ -1508,7 +1508,7 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, finish_time, "duration:", duration
+                        print(finish_time, "duration:", duration, file=sys.stderr)
                 elif aligner == "star" and type == "x2":
                     assert os.path.exists("SJ.out.tab")
                     os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb")
@@ -1517,18 +1517,18 @@
                             continue
                         os.remove(file)
                     star_index_cmd = "STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../../data/genome.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 100 --runThreadN %d" % (num_threads)
-                    print >> sys.stderr, "\t", datetime.now(), star_index_cmd
+                    print("\t", datetime.now(), star_index_cmd, file=sys.stderr)
                     os.system(star_index_cmd)
                     if verbose:
-                        print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd)
+                        print("\t", datetime.now(), " ".join(dummy_cmd), file=sys.stderr)
                     proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     proc.communicate()
                     if verbose:
-                        print >> sys.stderr, "\t", datetime.now(), "finished"
+                        print("\t", datetime.now(), "finished", file=sys.stderr)
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                        print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                     proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     proc.communicate()
                     finish_time = datetime.now()
@@ -1537,7 +1537,7 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, "\t", finish_time, "finished:", duration
+                        print("\t", finish_time, "finished:", duration, file=sys.stderr)
                     os.system("mv Aligned.out.sam %s" % out_fname)
                 elif aligner == "tophat2":
                     os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname)
@@ -1597,8 +1597,8 @@
                             sql_execute("../" + sql_db_name, sql_insert)     
                     
 
-                    print >> sys.stderr, output,
-                    print >> done_file, output
+                    print(output, end=' ', file=sys.stderr)
+                    print(output, file=done_file)
                 else:
                     sum = [0, 0, 0, 0, 0]
                     stat = read_stat(read_sam, gtf_junctions, chr_dic)
@@ -1616,8 +1616,8 @@
                                     (workdir, genome, "single", aligner_name, get_aligner_version(aligner), "no", i, mapped_reads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, platform.node(), " ".join(aligner_cmd))
                             sql_execute("../" + sql_db_name, sql_insert)                    
                         
-                    print >> sys.stderr, output,
-                    print >> done_file, output
+                    print(output, end=' ', file=sys.stderr)
+                    print(output, file=done_file)
                     
                 done_file.close()
 
--- hisat2.orig/evaluation/real/init.py
+++ hisat2/evaluation/real/init.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, signal
 import string, re
@@ -51,7 +51,7 @@
                  not os.path.exists(read_dir_base + read_dir + "/1.fq"):
                 continue
 
-        print >> sys.stderr, "Processing", read_dir, "..."
+        print("Processing", read_dir, "...", file=sys.stderr)
 
         os.mkdir(read_dir)
         os.chdir(read_dir)
@@ -74,18 +74,18 @@
 
             if dir_name == "whole":
                 ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_1_name)
-                print >> sys.stderr, ln_cmd
+                print(ln_cmd, file=sys.stderr)
                 os.system(ln_cmd)
                 ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_2_name)
-                print >> sys.stderr, ln_cmd
+                print(ln_cmd, file=sys.stderr)
                 os.system(ln_cmd)
             else:
                 cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_1_name, num_reads)
-                print >> sys.stderr, cmd
+                print(cmd, file=sys.stderr)
                 os.system(cmd)
 
                 cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_2_name, num_reads)
-                print >> sys.stderr, cmd
+                print(cmd, file=sys.stderr)
                 os.system(cmd)
 
             os.system("ln -s ../../calculate_read_cost.py .")
--- hisat2.orig/evaluation/simulation/calculate_read_cost.py
+++ hisat2/evaluation/simulation/calculate_read_cost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess
 import multiprocessing
@@ -170,18 +170,18 @@
                     repeat_db[rptRefName][rptName].allele[allele_idx].add_position(chr, pos, strand)
 
     else:
-        print >> sys.stderr, 'Cannot open file', repeat_filename
+        print('Cannot open file', repeat_filename, file=sys.stderr)
 
 
-    print >> sys.stderr, 'Build repeatMap'
+    print('Build repeatMap', file=sys.stderr)
     repeat_map = {}
-    for rptRefName, repeats in repeat_db.items():
+    for rptRefName, repeats in list(repeat_db.items()):
         #print 'Processing', rptRefName
         repeat_pos_list = []
 
-        for repeatName, repeat in repeats.items():
+        for repeatName, repeat in list(repeats.items()):
             #print 'Common Allele:', repeatName, repeat.repeat_name
-            repeat_left = sys.maxint
+            repeat_left = sys.maxsize
             repeat_right = 0
 
             #for allele_id, repeatAllele in repeat.allele.items():
@@ -242,11 +242,11 @@
 
     i = find_leftmost_pos(rmap, left)
     if i >= len(rmap):
-        print >> sys.stderr, 'Value Error'
+        print('Value Error', file=sys.stderr)
         return
 
     if right > rmap[i][0]:
-        print >> sys.stderr, 'Not repeat'
+        print('Not repeat', file=sys.stderr)
         return
 
     repeat = repeats[rmap[i][1]]
@@ -260,7 +260,7 @@
         if (left >= rpos) and (right <= rpos + rlen):
             offset = left - rpos
             for genome_pos in allele.positions:
-                print genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1]
+                print(genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1])
 
 """
 """
@@ -284,7 +284,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -331,7 +331,7 @@
     gtf_file.close()
     
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -343,7 +343,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]]))
 
@@ -381,7 +381,7 @@
                     repeat_dic[rep][allele].append([chr, pos, strand])
                     repeat_pos[rep][allele].add(pos)
 
-    for rep, repeats in repeat_info.items():
+    for rep, repeats in list(repeat_info.items()):
         def my_cmp(a, b):
             if a[1] < b[1]:
                 return -1
@@ -687,11 +687,11 @@
 
     i = find_leftmost_pos(rmap, left)
     if i >= len(rmap):
-        print >> sys.stderr, 'Value Error'
+        print('Value Error', file=sys.stderr)
         return alignments
 
     if right > rmap[i][0]:
-        print >> sys.stderr, 'Not repeat'
+        print('Not repeat', file=sys.stderr)
         return alignments
 
     repeat = repeats[rmap[i][1]]
@@ -775,7 +775,7 @@
             prev_read_id = read_id
             continue
 
-        NH, NM, XA = "", sys.maxint, []
+        NH, NM, XA = "", sys.maxsize, []
         for i in range(11, len(cols)):
             col = cols[i]
             # "nM" from STAR
@@ -867,7 +867,7 @@
             chr, pos, cigar_str = alignment
             pos, cigar_str, NM_real = adjust_alignment(chr, pos, cigar_str)
             p_str = "%s\t%s\t%d\t%s\tNM:i:%d" % (read_id, chr, pos, cigar_str, NM_real)
-            print >> outfile, p_str
+            print(p_str, file=outfile)
 
         if aligner == "hisat2":
             if prev_read_id != read_id:
@@ -940,7 +940,7 @@
     num_aligned_reads, num_ualigned_reads = 0, 0
     
     prev_read_id, pair_list = "", set()
-    prev_NM = sys.maxint
+    prev_NM = sys.maxsize
     prev_NH1, prev_NH2 = 0, 0
     NH1_real, NH2_real = 0, 0
 
@@ -969,7 +969,7 @@
         if read_id != prev_read_id:
             num_pairs += 1
             pair_list = set()
-            prev_NM = sys.maxint
+            prev_NM = sys.maxsize
 
         flag = int(flag)
         canonical_pos1, canonical_pos2 = int(pos1), int(pos2)
@@ -982,7 +982,7 @@
             continue
 
         concordant = (flag & 0x2 != 0)        
-        NH, NM1, YT, XA = sys.maxint, sys.maxint, "", []
+        NH, NM1, YT, XA = sys.maxsize, sys.maxsize, "", []
         for i in range(11, len(cols)):
             col = cols[i]
             # "nM" from STAR
@@ -1151,7 +1151,7 @@
 
                         if p_str not in pair_reported:
                             pair_reported.add(p_str)
-                            print >> outfile, p_str
+                            print(p_str, file=outfile)
 
             if not me in read_dic:
                 read_dic[me] = []
@@ -1348,7 +1348,7 @@
     file.close()
 
     temp_junctions, temp_gtf_junctions = set(), set()
-    for read_name, can_junctions in db_junction_dic.items():
+    for read_name, can_junctions in list(db_junction_dic.items()):
         if len(can_junctions) <= 0:
             continue
 
@@ -1492,7 +1492,7 @@
                     break
 
         if found:
-            print >> mapped_file, read_name
+            print(read_name, file=mapped_file)
             mapped += 1
             if snp_included:
                 snp_mapped += 1
@@ -1501,7 +1501,7 @@
                 if snp_included:
                     snp_unique_mapped += 1
             if found_at_first:
-                print >> first_mapped_file, read_name
+                print(read_name, file=first_mapped_file)
                 first_mapped += 1
                 if snp_included:
                     snp_first_mapped += 1
@@ -1527,7 +1527,7 @@
             false_can_junctions += 1
         else:
             false_noncan_junctions += 1
-    print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions)
+    print("\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions), file=sys.stderr)
     
     return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \
         snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \
@@ -1587,7 +1587,7 @@
     file.close()
 
     temp_junctions, temp_gtf_junctions = set(), set()
-    for read_name, can_junctions in db_junction_dic.items():
+    for read_name, can_junctions in list(db_junction_dic.items()):
         if len(can_junctions) <= 0:
             continue
 
@@ -1767,17 +1767,17 @@
                     break
 
         if found:
-            print >> mapped_file, read_name
+            print(read_name, file=mapped_file)
             mapped += 1
             if snp_included:
                 snp_mapped += 1
             if len(maps) == 1:
                 unique_mapped += 1
-                print >> uniq_mapped_file, read_name
+                print(read_name, file=uniq_mapped_file)
                 if snp_included:
                     snp_unique_mapped += 1
             if found_at_first:
-                print >> first_mapped_file, read_name
+                print(read_name, file=first_mapped_file)
                 first_mapped += 1
                 if snp_included:
                     snp_first_mapped += 1
@@ -1802,7 +1802,7 @@
             false_can_junctions += 1
         else:
             false_noncan_junctions += 1
-    print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions)
+    print("\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions), file=sys.stderr)
         
     
     return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \
@@ -1832,9 +1832,9 @@
                 write = read_id in mapped_ids
 
             if write:
-                print >> mapped_file, line[:-1]
+                print(line[:-1], file=mapped_file)
             else:
-                print >> unmapped_file, line[:-1]
+                print(line[:-1], file=unmapped_file)
 
         read_file.close()
         mapped_file.close()
@@ -1865,7 +1865,7 @@
 """
 def create_sql_db(sql_db):
     if os.path.exists(sql_db):
-        print >> sys.stderr, sql_db, "already exists!"
+        print(sql_db, "already exists!", file=sys.stderr)
         return
     
     columns = [
@@ -1936,14 +1936,14 @@
             database_fname += "_single"
         database_fname += ".analysis"
         database_file = open(database_fname, "w")
-        print >> database_file, "end_type\ttype\taligner\tnum_reads\ttime\tmem\tmapped_reads\tunique_mapped_reads\tunmapped_reads\tmapping_point\ttrue_gtf_junctions\ttemp_junctions\ttemp_gtf_junctions"
+        print("end_type\ttype\taligner\tnum_reads\ttime\tmem\tmapped_reads\tunique_mapped_reads\tunmapped_reads\tmapping_point\ttrue_gtf_junctions\ttemp_junctions\ttemp_gtf_junctions", file=database_file)
         for aligner in aligners:
             for read_type in read_types:
                 sql_row = "SELECT end_type, type, aligner, num_reads, time, mem, mapped_reads, unique_mapped_reads, unmapped_reads, mapping_point, snp_mapped_reads, snp_unique_mapped_reads, snp_unmapped_reads, true_gtf_junctions, temp_junctions, temp_gtf_junctions FROM ReadCosts"
                 sql_row += " WHERE genome = '%s' and head = '%s' and aligner = '%s' and type = '%s' and end_type = '%s' ORDER BY created DESC LIMIT 1" % (genome_name, database_name, aligner, read_type, end_type)
                 output = sql_execute(sql_db, sql_row)
                 if output:
-                    print >> database_file, output
+                    print(output, file=database_file)
 
         database_file.close()
 
@@ -2069,7 +2069,7 @@
             type_sam_file.close()
             if numreads <= 0:
                 continue
-            print >> sys.stderr, "%s\t%d" % (readtype, numreads)
+            print("%s\t%d" % (readtype, numreads), file=sys.stderr)
 
             junctions, junctions_set = [], set()
             type_junction_file = open(type_junction_fname)
@@ -2208,7 +2208,7 @@
                     if version != "":
                         version = int(version)
                     else:
-                        version = sys.maxint
+                        version = sys.maxsize
 
                     if not RNA:
                         cmd += ["--no-spliced-alignment"]
@@ -2417,7 +2417,7 @@
                 if not RNA and readtype != "all":
                     continue
 
-                print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now()))
+                print("\t%s\t%s" % (aligner_name, str(datetime.now())), file=sys.stderr)
                 if options != "":
                     option_name = options.replace(' ', '').replace('-', '').replace(',', '')
                     aligner_name = aligner_name + '_' + option_name
@@ -2457,7 +2457,7 @@
                             dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fa", "../two.fa", "/dev/null")
                             start_time = datetime.now()
                             if verbose:
-                                print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd)
+                                print(start_time, "\t", " ".join(dummy_cmd), file=sys.stderr)
                             if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                                 proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE)
                             else:
@@ -2467,14 +2467,14 @@
                             duration = finish_time - start_time
                             duration = duration.total_seconds()
                             if verbose:
-                                print >> sys.stderr, finish_time, "duration:", duration
+                                print(finish_time, "duration:", duration, file=sys.stderr)
                             loading_time = duration
 
                     # Align all reads
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                        print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                     if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2"]:
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     else:
@@ -2487,11 +2487,11 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, "\t", finish_time, "finished:", duration
+                        print("\t", finish_time, "finished:", duration, file=sys.stderr)
 
                     if debug and aligner == "hisat2":
                         os.system("cat metrics.out")
-                        print >> sys.stderr, "\ttime: %.4f" % (duration)
+                        print("\ttime: %.4f" % (duration), file=sys.stderr)
 
                     if aligner == "star" and type in ["", "gtf"]:
                         os.system("mv Aligned.out.sam %s" % out_fname)
@@ -2499,7 +2499,7 @@
                         aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                            print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                         proc.communicate()
                         finish_time = datetime.now()
@@ -2508,7 +2508,7 @@
                         if duration < 0.1:
                             duration = 0.1
                         if verbose:
-                            print >> sys.stderr, "\t", finish_time, "finished:", duration
+                            print("\t", finish_time, "finished:", duration, file=sys.stderr)
                     elif aligner == "star" and type == "x2":
                         assert os.path.exists("SJ.out.tab")
                         os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb")
@@ -2518,18 +2518,18 @@
                             os.remove(file)
                         star_index_cmd = "%s/STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../data/%s.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 99 --runThreadN %d" % (aligner_bin_base, genome, num_threads)
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), star_index_cmd
+                            print("\t", datetime.now(), star_index_cmd, file=sys.stderr)
                         os.system(star_index_cmd)
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd)
+                            print("\t", datetime.now(), " ".join(dummy_cmd), file=sys.stderr)
                         proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                         proc.communicate()
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), "finished"
+                            print("\t", datetime.now(), "finished", file=sys.stderr)
                         aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                            print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                         proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                         proc.communicate()
                         finish_time = datetime.now()
@@ -2538,7 +2538,7 @@
                         if duration < 0.1:
                             duration = 0.1
                         if verbose:
-                            print >> sys.stderr, "\t", finish_time, "finished:", duration
+                            print("\t", finish_time, "finished:", duration, file=sys.stderr)
                         os.system("mv Aligned.out.sam %s" % out_fname)
                     elif aligner == "tophat2":
                         os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname)
@@ -2551,7 +2551,7 @@
                         os.system("tar cvzf %s.tar.gz %s &> /dev/null" % (out_fname, out_fname))
 
                 if runtime_only:
-                    print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024)
+                    print("\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
                     os.chdir("..")
                     continue
 
@@ -2560,7 +2560,7 @@
                     pid_list = []
                     if paired:
                         if mp_mode:
-                            for i in xrange(mp_num):
+                            for i in range(mp_num):
                                 p = Process(target=extract_pair, args=(out_fname, out_fname2, chr_dic, RNA, aligner, version, repeat_db, repeat_map, debug_dic, i))
                                 pid_list.append(p)
                                 p.start()
@@ -2570,7 +2570,7 @@
 
                             # merge 
                             os.system("mv %s %s" % (out_fname2 + ".0", out_fname2))
-                            for i in xrange(1, mp_num):
+                            for i in range(1, mp_num):
                                 os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2))
                                 os.system("rm %s" % (out_fname2 + "." + str(i)))
 
@@ -2581,7 +2581,7 @@
                     else:
                         if mp_mode:
                             # Prepare queues
-                            for i in xrange(mp_num): 
+                            for i in range(mp_num): 
                                 p = Process(target=extract_single, args=(out_fname, out_fname2, chr_dic, aligner, version, repeat_db, repeat_map, debug_dic, i))
                                 pid_list.append(p)
                                 p.start()
@@ -2592,7 +2592,7 @@
 
                             # merge 
                             os.system("mv %s %s" % (out_fname2 + ".0", out_fname2))
-                            for i in xrange(1, mp_num):
+                            for i in range(1, mp_num):
                                 os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2))
                                 os.system("rm %s" % (out_fname2 + "." + str(i)))
                             
@@ -2628,25 +2628,25 @@
                     assert mapped + unmapped == numreads
                     
                     if two_step:
-                        print >> sys.stderr, "\t\t%s" % readtype2
-                    print >> sys.stderr, "\t\taligned: %d, multi aligned: %d" % (aligned, multi_aligned)
-                    print >> sys.stderr, "\t\tcorrectly mapped: %d (%.2f%%) mapping_point: %.2f" % (mapped, float(mapped) * 100.0 / numreads, mapping_point * 100.0 / numreads)
-                    print >> sys.stderr, "\t\tcorrectly mapped at first: %d (%.2f%%)" % (first_mapped, float(first_mapped) * 100.0 / numreads)
-                    print >> sys.stderr, "\t\tuniquely and correctly mapped: %d (%.2f%%)" % (unique_mapped, float(unique_mapped) * 100.0 / numreads)
+                        print("\t\t%s" % readtype2, file=sys.stderr)
+                    print("\t\taligned: %d, multi aligned: %d" % (aligned, multi_aligned), file=sys.stderr)
+                    print("\t\tcorrectly mapped: %d (%.2f%%) mapping_point: %.2f" % (mapped, float(mapped) * 100.0 / numreads, mapping_point * 100.0 / numreads), file=sys.stderr)
+                    print("\t\tcorrectly mapped at first: %d (%.2f%%)" % (first_mapped, float(first_mapped) * 100.0 / numreads), file=sys.stderr)
+                    print("\t\tuniquely and correctly mapped: %d (%.2f%%)" % (unique_mapped, float(unique_mapped) * 100.0 / numreads), file=sys.stderr)
                     snp_numreads = snp_mapped + snp_unmapped
                     if snp_numreads > 0:
-                        print >> sys.stderr, "\t\t\t\tSNP: reads: %d" % (snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: correctly mapped: %d (%.2f%%)" % (snp_mapped, float(snp_mapped) * 100.0 / snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: correctly mapped at first: %d (%.2f%%)" % (snp_first_mapped, float(snp_first_mapped) * 100.0 / snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: uniquely and correctly mapped: %d (%.2f%%)" % (snp_unique_mapped, float(snp_unique_mapped) * 100.0 / snp_numreads)
+                        print("\t\t\t\tSNP: reads: %d" % (snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: correctly mapped: %d (%.2f%%)" % (snp_mapped, float(snp_mapped) * 100.0 / snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: correctly mapped at first: %d (%.2f%%)" % (snp_first_mapped, float(snp_first_mapped) * 100.0 / snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: uniquely and correctly mapped: %d (%.2f%%)" % (snp_unique_mapped, float(snp_unique_mapped) * 100.0 / snp_numreads), file=sys.stderr)
                     if readtype == readtype2:
-                        print >> sys.stderr, "\t\t\t%d reads per sec (all)" % (numreads / max(1.0, duration))
+                        print("\t\t\t%d reads per sec (all)" % (numreads / max(1.0, duration)), file=sys.stderr)
                     if RNA:
-                        print >> sys.stderr, "\t\tjunc. sensitivity %d / %d (%.2f%%), junc. accuracy: %d / %d (%.2f%%)" % \
+                        print("\t\tjunc. sensitivity %d / %d (%.2f%%), junc. accuracy: %d / %d (%.2f%%)" % \
                             (temp_gtf_junctions, len(junctions), float(temp_gtf_junctions) * 100.0 / max(1, len(junctions)), \
-                                 temp_gtf_junctions, temp_junctions, float(temp_gtf_junctions) * 100.0 / max(1, temp_junctions))
+                                 temp_gtf_junctions, temp_junctions, float(temp_gtf_junctions) * 100.0 / max(1, temp_junctions)), file=sys.stderr)
 
-                    print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024)
+                    print("\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
 
                     if duration > 0.0:
                         if sql_write and os.path.exists("../" + sql_db_name):
@@ -2669,14 +2669,14 @@
 
                 os.chdir("..")
 
-    print >> sys.stdout, "\t".join(["type", "aligner", "all", "all_time", "mem", "mapped", "unique_mapped", "unmapped", "mapping point", "snp_mapped", "snp_unique_mapped", "snp_unmapped", "true_gtf_junctions", "temp_junctions", "temp_gtf_junctions"])
+    print("\t".join(["type", "aligner", "all", "all_time", "mem", "mapped", "unique_mapped", "unmapped", "mapping point", "snp_mapped", "snp_unique_mapped", "snp_unmapped", "true_gtf_junctions", "temp_junctions", "temp_gtf_junctions"]), file=sys.stdout)
     for line in align_stat:
         outstr = ""
         for item in line:
             if outstr != "":
                 outstr += "\t"
             outstr += str(item)
-        print >> sys.stdout, outstr
+        print(outstr, file=sys.stdout)
 
     if os.path.exists(sql_db_name):
         write_analysis_data(sql_db_name, genome, data_base)
--- hisat2.orig/evaluation/simulation/init.py
+++ hisat2/evaluation/simulation/init.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 import string, re
@@ -50,7 +50,7 @@
 
                     if p_str not in pair_reported:
                         pair_reported.add(p_str)
-                        print >> out_file, p_str
+                        print(p_str, file=out_file)
 
         if not me in read_dic:
             read_dic[me] = []
@@ -83,8 +83,7 @@
             elif field.startswith("Zs"):
                 Zs = "\t" + field
 
-        print >> (sim1_file if left_read else sim2_file), \
-            "%s\t%s\t%s\t%s%s%s%s" % (read_id, chr, pos, cigar, TI, NM, Zs)
+        print("%s\t%s\t%s\t%s%s%s%s" % (read_id, chr, pos, cigar, TI, NM, Zs), file=(sim1_file if left_read else sim2_file))
 
     sim1_file.close()
     sim2_file.close()
@@ -194,7 +193,7 @@
                             break
                     assert left_anchor > 0
                     right_anchor = 0
-                    for ci in reversed(range(len(cigars))):
+                    for ci in reversed(list(range(len(cigars)))):
                         c = cigars[ci][-1]
                         c_len = int(cigars[ci][:-1])
                         if c in "MI":
@@ -262,7 +261,7 @@
 
                 if readtype == "all" or readtype == readtype2:
                     read_ids.add(read_id)
-                    print >> type_sam_file, line[:-1]
+                    print(line[:-1], file=type_sam_file)
                     junctions += get_junctions(chr, int(pos), cigar)
                     if paired:
                         junctions += get_junctions(chr2, int(pos2), cigar2)
@@ -285,7 +284,7 @@
             # write the junctions into a file
             type_junction_file = open(type_junction_fname, "w")
             for junction in junctions:
-                print >> type_junction_file, "%s\t%d\t%d" % (junction[0], junction[1], junction[2])
+                print("%s\t%d\t%d" % (junction[0], junction[1], junction[2]), file=type_junction_file)
             type_junction_file.close()
 
             def write_reads(read_fname, type_read_fname):
@@ -299,7 +298,7 @@
                         write = read_id in read_ids
 
                     if write:
-                        print >> type_read_file, line[:-1]
+                        print(line[:-1], file=type_read_file)
 
                 read_file.close()
                 type_read_file.close()
@@ -323,7 +322,7 @@
                 not os.path.exists(read_dir_base + read_dir + "/sim_2.fa"):
             continue
 
-        print >> sys.stderr, "Processing", read_dir, "..."
+        print("Processing", read_dir, "...", file=sys.stderr)
 
         os.mkdir(read_dir)
         os.chdir(read_dir)
--- hisat2.orig/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py
+++ hisat2/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 #
@@ -79,7 +79,7 @@
         aligner_cmd += ["-1", "%s" % read_fname[0],
                         "-2", "%s" % read_fname[1]]
     if verbose >= 1:
-        print >> sys.stderr, ' '.join(aligner_cmd)
+        print(' '.join(aligner_cmd), file=sys.stderr)
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
                                   stderr=open("/dev/null", 'w'))
@@ -116,7 +116,7 @@
 """ 
 def normalize(prob):
     total = sum(prob.values())
-    for allele, mass in prob.items():
+    for allele, mass in list(prob.items()):
         prob[allele] = mass / total
 
         
@@ -124,7 +124,7 @@
 """
 def prob_diff(prob1, prob2):
     diff = 0.0
-    for allele in prob1.keys():
+    for allele in list(prob1.keys()):
         if allele in prob2:
             diff += abs(prob1[allele] - prob2[allele])
         else:
@@ -153,15 +153,15 @@
                      Gene_length):
     def normalize2(prob, length):
         total = 0
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             total += (mass / length[allele])
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             prob[allele] = mass / length[allele] / total
 
     Gene_prob, Gene_prob_next = {}, {}
-    for cmpt, count in Gene_cmpt.items():
+    for cmpt, count in list(Gene_cmpt.items()):
         alleles = cmpt.split('-')
         for allele in alleles:
             if allele not in Gene_prob:
@@ -171,7 +171,7 @@
     normalize(Gene_prob)
     def next_prob(Gene_cmpt, Gene_prob, Gene_length):
         Gene_prob_next = {}
-        for cmpt, count in Gene_cmpt.items():
+        for cmpt, count in list(Gene_cmpt.items()):
             alleles = cmpt.split('-')
             alleles_prob = 0.0
             for allele in alleles:
@@ -201,14 +201,14 @@
             Gene_prob_next2 = next_prob(Gene_cmpt, Gene_prob_next, Gene_length)
             sum_squared_r, sum_squared_v = 0.0, 0.0
             p_r, p_v = {}, {}
-            for a in Gene_prob.keys():
+            for a in list(Gene_prob.keys()):
                 p_r[a] = Gene_prob_next[a] - Gene_prob[a]
                 sum_squared_r += (p_r[a] * p_r[a])
                 p_v[a] = Gene_prob_next2[a] - Gene_prob_next[a] - p_r[a]
                 sum_squared_v += (p_v[a] * p_v[a])
             if sum_squared_v > 0.0:
                 gamma = -math.sqrt(sum_squared_r / sum_squared_v)
-                for a in Gene_prob.keys():
+                for a in list(Gene_prob.keys()):
                     Gene_prob_next2[a] = max(0.0, Gene_prob[a] - 2 * gamma * p_r[a] + gamma * gamma * p_v[a]);
                 Gene_prob_next = next_prob(Gene_cmpt, Gene_prob_next2, Gene_length)
 
@@ -219,17 +219,17 @@
         if iter >= 10:
             Gene_prob2 = {}
             avg_prob = sum(Gene_prob.values()) / len(Gene_prob)
-            for allele, prob in Gene_prob.items():
+            for allele, prob in list(Gene_prob.items()):
                 if prob >= 0.005 or prob > avg_prob:
                     Gene_prob2[allele] = prob
             Gene_prob = Gene_prob2
 
         # DK - debugging purposes
         if iter % 10 == 0 and False:
-            print "iter", iter
-            for allele, prob in Gene_prob.items():
+            print("iter", iter)
+            for allele, prob in list(Gene_prob.items()):
                 if prob >= 0.01:
-                    print >> sys.stderr, "\t", iter, allele, prob, str(datetime.now())
+                    print("\t", iter, allele, prob, str(datetime.now()), file=sys.stderr)
         
         iter += 1
         
@@ -241,7 +241,7 @@
     
     # normalize(Gene_prob)
     normalize2(Gene_prob, Gene_length)
-    Gene_prob = [[allele, prob] for allele, prob in Gene_prob.items()]
+    Gene_prob = [[allele, prob] for allele, prob in list(Gene_prob.items())]
     Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp)
     return Gene_prob
 
@@ -288,7 +288,7 @@
 """
 def get_exonic_vars(Vars, exons):
     vars = set()
-    for var_id, var in Vars.items():
+    for var_id, var in list(Vars.items()):
         var_type, var_left, var_data = var
         var_right = var_left
         if var_type == "deletion":
@@ -305,7 +305,7 @@
 """
 def get_rep_alleles(Links, exon_vars):
     allele_vars = {}
-    for var, alleles in Links.items():
+    for var, alleles in list(Links.items()):
         if var not in exon_vars:
             continue
         for allele in alleles:
@@ -314,7 +314,7 @@
             allele_vars[allele].add(var)
 
     allele_groups = {}
-    for allele, vars in allele_vars.items():
+    for allele, vars in list(allele_vars.items()):
         vars = '-'.join(vars)
         if vars not in allele_groups:
             allele_groups[vars] = []
@@ -322,7 +322,7 @@
 
     allele_reps = {} # allele representatives
     allele_rep_groups = {} # allele groups by allele representatives
-    for allele_members in allele_groups.values():
+    for allele_members in list(allele_groups.values()):
         assert len(allele_members) > 0
         allele_rep = allele_members[0]
         allele_rep_groups[allele_rep] = allele_members
@@ -402,9 +402,9 @@
                 if j_type == "single" and j_pos == latest_pos - 1:
                     j_cmp_pos = j_pos - del_len + other_del_len
                     if debug:
-                        print Vars[j_id]
-                        print j_pos, ref_seq[j_pos]
-                        print j_cmp_pos, ref_seq[j_cmp_pos]
+                        print(Vars[j_id])
+                        print(j_pos, ref_seq[j_pos])
+                        print(j_cmp_pos, ref_seq[j_cmp_pos])
                     if j_data == ref_seq[j_cmp_pos]:
                         add_alt(Alts, alt_list, var_id, j_id)
                         latest_pos = j_pos
@@ -465,9 +465,9 @@
                     # DK - debugging purposes
                     if debug:
                         pos2_ = latest_pos + 1 + del_len - other_del_len
-                        print "DK: latest_pos:", latest_pos + 1, pos2_
-                        print "DK: var_pos:", var_pos, "del_len:", del_len, "other_del_len:", other_del_len
-                        print "DK:", ref_seq[latest_pos + 1], ref_seq[pos2_]
+                        print("DK: latest_pos:", latest_pos + 1, pos2_)
+                        print("DK: var_pos:", var_pos, "del_len:", del_len, "other_del_len:", other_del_len)
+                        print("DK:", ref_seq[latest_pos + 1], ref_seq[pos2_])
                     
                     latest_pos += 1
                     add_alt(Alts, alt_list, var_id, str(latest_pos))
@@ -479,9 +479,9 @@
                 if j_type == "single" and j_pos == latest_pos + 1:
                     j_cmp_pos = j_pos + del_len - other_del_len
                     if debug:
-                        print Vars[j_id]
-                        print j_pos, ref_seq[j_pos]
-                        print j_cmp_pos, ref_seq[j_cmp_pos]
+                        print(Vars[j_id])
+                        print(j_pos, ref_seq[j_pos])
+                        print(j_cmp_pos, ref_seq[j_cmp_pos])
 
                     if j_data == ref_seq[j_cmp_pos]:
                         add_alt(Alts, alt_list, var_id, j_id)
@@ -542,7 +542,7 @@
             continue
         debug = (var_id == "hv454a")
         if debug:
-            print Vars[var_id]
+            print(Vars[var_id])
 
         alt_list = []
         var_j = lower_bound(Var_list, var_pos + del_len - 1)
@@ -578,7 +578,7 @@
                                debug)
 
         if debug:
-            print "DK :-)"
+            print("DK :-)")
             sys.exit(1)
 
     def assert_print_alts(Alts, dir):
@@ -640,8 +640,8 @@
                         
             return seq, seq_left, seq_right
         
-        for alt_list1, alt_list2 in Alts.items():
-            if verbose >= 2: print >> sys.stderr, "\t", dir, ":", alt_list1, alt_list2
+        for alt_list1, alt_list2 in list(Alts.items()):
+            if verbose >= 2: print("\t", dir, ":", alt_list1, alt_list2, file=sys.stderr)
             out_str = "\t\t"
             alt_list1 = alt_list1.split('-')            
             for i in range(len(alt_list1)):
@@ -664,7 +664,7 @@
                     if j + 1 < len(alt_list3):
                         out_str += ", "
                 out_str += "]"
-            if verbose >= 2: print >> sys.stderr, out_str
+            if verbose >= 2: print(out_str, file=sys.stderr)
 
             for i in range(len(alt_list2)):
                 alt_list3 = alt_list2[i]
@@ -702,9 +702,9 @@
                         else:
                             seq2 += ref_seq[seq2_right:seq2_right+len_diff]
                 if verbose >= 3:
-                    print >> sys.stderr, "\t\t", alt_list1, alt_list3
-                    print >> sys.stderr, "\t\t\t", seq1, seq1_left, seq1_right
-                    print >> sys.stderr, "\t\t\t", seq2, seq2_left, seq2_right
+                    print("\t\t", alt_list1, alt_list3, file=sys.stderr)
+                    print("\t\t\t", seq1, seq1_left, seq1_right, file=sys.stderr)
+                    print("\t\t\t", seq2, seq2_left, seq2_right, file=sys.stderr)
                 assert seq1 == seq2            
             
     assert_print_alts(Alts_left, "left")
@@ -739,7 +739,7 @@
             # Left direction
             id_str = var_id
             total_del_len = length if type == "deletion" else 0
-            for j in reversed(range(0, i)):
+            for j in reversed(list(range(0, i))):
                 cmp_j = cmp_list[j]
                 j_type, j_pos, j_len = cmp_j[:3]
                 if j_type != "match":
@@ -792,16 +792,16 @@
                             alt_left_pos += alt_total_del_len
                         if left_pos >= alt_left_pos:
                             if verbose >= 2:
-                                print "LEFT:", cmp_list
-                                print "\t", type, "id_str:", id_str, "=>", alts_id_str, "=>", back_alts, "left_pos:", left_pos, "alt_left_pos:", alt_left_pos
+                                print("LEFT:", cmp_list)
+                                print("\t", type, "id_str:", id_str, "=>", alts_id_str, "=>", back_alts, "left_pos:", left_pos, "alt_left_pos:", alt_left_pos)
                             cmp_left = i + 1
                             break
 
             # DK - debugging purposes
             if debug:
-                print "DK: var_id:", var_id
-                print "DK: cmp_list:", cmp_list
-                print "DK: cmp_right:", cmp_right
+                print("DK: var_id:", var_id)
+                print("DK: cmp_list:", cmp_list)
+                print("DK: cmp_right:", cmp_right)
                 # sys.exit(1)
     
             # Right direction
@@ -824,7 +824,7 @@
 
                 # DK - debugging purposes
                 if debug:
-                    print "DK: id_str:", id_str
+                    print("DK: id_str:", id_str)
                 
                 if id_str in Alts_right:
                     orig_alts = id_str.split('-')
@@ -877,8 +877,8 @@
                                     
                         if right_pos <= alt_right_pos:
                             if verbose >= 2:
-                                print "RIGHT:", cmp_list
-                                print "\t", type, "id_str:", id_str, "=>", alts_id_str, "right_pos:", right_pos, "alt_right_pos:", alt_right_pos
+                                print("RIGHT:", cmp_list)
+                                print("\t", type, "id_str:", id_str, "=>", alts_id_str, "right_pos:", right_pos, "alt_right_pos:", alt_right_pos)
                             cmp_right = i - 1
                             break
         i += 1
@@ -971,7 +971,7 @@
         num_nt = sum(nt_dic.values())
         nt_set = []
         if num_nt >= 20:
-            for nt, count in nt_dic.items():
+            for nt, count in list(nt_dic.items()):
                 if nt not in "ACGT":
                     continue
                 if count >= num_nt * 0.2 or count >= 7:
@@ -980,7 +980,7 @@
 
     # Sort variants
     var_list = [[] for i in range(len(mpileup))]
-    for var_id, value in vars.items():
+    for var_id, value in list(vars.items()):
         var_type, var_pos, var_data = value
         assert var_pos < len(var_list)
         var_list[var_pos].append([var_id, var_type, var_data])
@@ -991,7 +991,7 @@
         nt_dic = mpileup[i][1]
         ref_nt = ref_seq[i]
         new_nt_dic = {}
-        for nt, count in nt_dic.items():
+        for nt, count in list(nt_dic.items()):
             var_id = ""
             if nt == 'D':
                 if i <= skip_i:
@@ -1041,8 +1041,8 @@
                   cmp_list,
                   debug = False):
     if debug:
-        print cmp_list
-        print read_seq
+        print(cmp_list)
+        print(read_seq)
 
     i = 0
     while i < len(cmp_list):
@@ -1089,7 +1089,7 @@
             nt_set = mpileup[left][0]
 
             if debug:
-                print left, read_bp, ref_bp, mpileup[left]
+                print(left, read_bp, ref_bp, mpileup[left])
 
             if len(nt_set) > 0 and read_bp not in nt_set:
                 read_bp = 'N' if len(nt_set) > 1 else nt_set[0]
@@ -1113,8 +1113,8 @@
                         var_idx += 1
 
                 if debug:
-                    print left, read_bp, ref_bp, mpileup[left]
-                    print cmp_list[i]
+                    print(left, read_bp, ref_bp, mpileup[left])
+                    print(cmp_list[i])
 
         read_pos += length
         i += 1
@@ -1132,8 +1132,8 @@
         i += 1
 
     if debug:
-        print cmp_list
-        print read_seq
+        print(cmp_list)
+        print(read_seq)
                             
     return cmp_list, read_seq
 
@@ -1267,9 +1267,9 @@
     for aligner, index_type in aligners:
         for f_ in [sys.stderr, report_file]:
             if index_type == "graph":
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
             else:
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
 
         remove_alignment_file = False
         if alignment_fname == "":
@@ -1364,7 +1364,7 @@
 
             # List of nodes that represent alleles
             allele_vars = {}
-            for var_id, allele_list in Links.items():
+            for var_id, allele_list in list(Links.items()):
                 for allele_id in allele_list:
                     if allele_id not in Genes[gene]:
                         continue
@@ -1480,8 +1480,8 @@
                     # Unalined?
                     if flag & 0x4 != 0:
                         if simulation and verbose >= 2:
-                            print "Unaligned"
-                            print "\t", line
+                            print("Unaligned")
+                            print("\t", line)
                         continue
 
                     # Concordantly mapped?
@@ -1686,7 +1686,7 @@
                             # Check if this deletion is artificial alignment
                             assert right_pos < mpileup
                             del_count, nt_count = 0, 0
-                            for nt, value in mpileup[right_pos][1].items():
+                            for nt, value in list(mpileup[right_pos][1].items()):
                                 count = value[0]
                                 if nt == 'D':
                                     del_count += count
@@ -1775,7 +1775,7 @@
                     def add_stat(Gene_cmpt, Gene_counts, Gene_count_per_read, include_alleles = set()):
                         max_count = max(Gene_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in Gene_count_per_read.items():
+                        for allele, count in list(Gene_count_per_read.items()):
                             if count < max_count:
                                 continue
 
@@ -1796,7 +1796,7 @@
                         # alleles = ["A*24:36N", "A*24:359N"]
                         allele1_found, allele2_found = False, False
                         if alleles[0] != "":
-                            for allele, count in Gene_count_per_read.items():
+                            for allele, count in list(Gene_count_per_read.items()):
                                 if count < max_count:
                                     continue
                                 if allele == alleles[0]:
@@ -1804,13 +1804,13 @@
                                 elif allele == alleles[1]:
                                     allele2_found = True
                             if allele1_found != allele2_found:
-                                print alleles[0], Gene_count_per_read[alleles[0]]
-                                print alleles[1], Gene_count_per_read[alleles[1]]
+                                print(alleles[0], Gene_count_per_read[alleles[0]])
+                                print(alleles[1], Gene_count_per_read[alleles[1]])
                                 if allele1_found:
-                                    print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]]))
+                                    print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]])))
                                 else:
-                                    print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]]))
-                                print read_seq
+                                    print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]])))
+                                print(read_seq)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -1832,7 +1832,7 @@
                             if debug_read_count == debug_max_read_count and \
                                Gene_count_per_read["A*24:02:01:02L"] < debug_max_read_count and \
                                Gene_count_per_read["A*01:01:01:01"] < debug_max_read_count:
-                                print prev_read_id
+                                print(prev_read_id)
                                 None
 
                             if prev_read_id == "HSQ1008:175:C0JVFACXX:7:1208:5604:41201":
@@ -1861,9 +1861,9 @@
                                base_fname == "hla":
                                 cur_cmpt = cur_cmpt.split('-')
                                 if not(set(cur_cmpt) & set(test_Gene_names)):
-                                    print "%s are chosen instead of %s" % ('-'.join(cur_cmpt), '-'.join(test_Gene_names))
+                                    print("%s are chosen instead of %s" % ('-'.join(cur_cmpt), '-'.join(test_Gene_names)))
                                     for prev_line in prev_lines:
-                                        print "\t", prev_line
+                                        print("\t", prev_line)
 
                             prev_lines = []
 
@@ -1880,18 +1880,18 @@
                         alleles = Links[var_id]
                         if verbose >= 2:
                             if add > 0 and not (set(alleles) & debug_allele_names):
-                                print "Add:", add, debug_allele_names, "-", var_id
-                                print "\t", line
-                                print "\t", alleles
+                                print("Add:", add, debug_allele_names, "-", var_id)
+                                print("\t", line)
+                                print("\t", alleles)
                             if add < 0 and set(alleles) & debug_allele_names:
-                                print "Add:", add, debug_allele_names, "-", var_id
-                                print "\t", line
+                                print("Add:", add, debug_allele_names, "-", var_id)
+                                print("\t", line)
 
                         for allele in alleles:
                             count_per_read[allele] += add
 
                     # Decide which allele(s) a read most likely came from
-                    for var_id, data in gene_vars.items():
+                    for var_id, data in list(gene_vars.items()):
                         if var_id == "unknown" or var_id.startswith("nv"):
                             continue
                         var_type, var_pos, var_data = data
@@ -1924,14 +1924,14 @@
                     DK_debug = False
                     if orig_read_id == "a46|L_451_88M12D12M_88|D|hv2":
                         DK_debug = True
-                        print line
-                        print cmp_list
-                        print "positive vars:", positive_vars
-                        print "negative vars:", negative_vars
-                        print "cmp_list[%d, %d]" % (cmp_list_left, cmp_list_right)
+                        print(line)
+                        print(cmp_list)
+                        print("positive vars:", positive_vars)
+                        print("negative vars:", negative_vars)
+                        print("cmp_list[%d, %d]" % (cmp_list_left, cmp_list_right))
 
                     # Deletions at 5' and 3' ends
-                    for var_id, data in gene_vars.items():
+                    for var_id, data in list(gene_vars.items()):
                         var_type, var_pos, var_data = data
                         if var_type != "deletion":
                             continue
@@ -2002,8 +2002,8 @@
                                     positive_vars.add(var_id)
 
                                     if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False:
-                                        print "add positive var:", var_id
-                                        print "\tcmp_list:", cmp_list_left, cmp_list_right, cmp_list
+                                        print("add positive var:", var_id)
+                                        print("\tcmp_list:", cmp_list_left, cmp_list_right, cmp_list)
 
                             
                             cmp_MD += ("%d%s" % (MD_match_len, ref_seq[ref_pos]))
@@ -2069,15 +2069,15 @@
                     if read_pos != len(read_seq) or \
                             cmp_cigar_str != cigar_str:
                             # cmp_MD != MD: # Disabled due to error correction
-                        print >> sys.stderr, "Error:", cigar_str, MD
-                        print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD
-                        print >> sys.stderr, "\tcmp list:", cmp_list
+                        print("Error:", cigar_str, MD, file=sys.stderr)
+                        print("\tcomputed:", cmp_cigar_str, cmp_MD, file=sys.stderr)
+                        print("\tcmp list:", cmp_list, file=sys.stderr)
                         assert False
 
                     # DK - debugging purposes
                     if DK_debug:
-                        print "positive:", positive_vars
-                        print "negative:", negative_vars
+                        print("positive:", positive_vars)
+                        print("negative:", negative_vars)
 
                     # Node
                     if assembly:
@@ -2100,8 +2100,8 @@
                         add_count(Gene_gen_count_per_read, positive_var, 1)
 
                     if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False:
-                        print "positive_vars:", positive_vars
-                        print "negative_vars:", negative_vars
+                        print("positive_vars:", positive_vars)
+                        print("negative_vars:", negative_vars)
 
 
                     for negative_var in negative_vars:
@@ -2118,7 +2118,7 @@
                     continue
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "\t\t\tNumber of reads aligned: %d" % num_reads
+                    print("\t\t\tNumber of reads aligned: %d" % num_reads, file=f_)
 
                 if prev_read_id != None:
                     if base_fname == "hla":
@@ -2182,7 +2182,7 @@
 
             if base_fname != "hla":
                 Gene_counts = Gene_gen_counts
-            Gene_counts = [[allele, count] for allele, count in Gene_counts.items()]
+            Gene_counts = [[allele, count] for allele, count in list(Gene_counts.items())]
             def Gene_count_cmp(a, b):
                 if a[1] != b[1]:
                     return b[1] - a[1]
@@ -2199,7 +2199,7 @@
                     for test_Gene_name in test_Gene_names:
                         if count[0] == test_Gene_name:
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1])
+                                print("\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1]), file=f_)
                             found = True
                             """
                             if count_i > 0 and Gene_counts[0][1] > count[1]:
@@ -2210,14 +2210,14 @@
                             """
                     if count_i < 5 and not found:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                            print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                 else:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                        print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                     if count_i >= 9:
                         break
             for f_ in [sys.stderr, report_file]:
-                print >> f_
+                print(file=f_)
 
             # Calculate the abundance of representative alleles on exonic sequences
             if base_fname == "hla":
@@ -2238,7 +2238,7 @@
 
                 if len(gen_alleles) > 0:
                     Gene_gen_cmpt2 = {}
-                    for cmpt, value in Gene_gen_cmpt.items():
+                    for cmpt, value in list(Gene_gen_cmpt.items()):
                         cmpt2 = []
                         for allele in cmpt.split('-'):
                             if allele in gen_alleles:
@@ -2262,7 +2262,7 @@
                             Gene_combined_prob[allele] = prob
                     for allele, prob in Gene_gen_prob:
                         Gene_combined_prob[allele] = prob * gen_prob_sum
-                    Gene_prob = [[allele, prob] for allele, prob in Gene_combined_prob.items()]
+                    Gene_prob = [[allele, prob] for allele, prob in list(Gene_combined_prob.items())]
                     Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp)
             else:
                 Gene_prob = single_abundance(Gene_gen_cmpt, Gene_lengths[gene])
@@ -2302,8 +2302,8 @@
                                                             ref_allele)
 
                     def get_best_alleles(left, right, vars):
-                        max_alleles, max_common = [], -sys.maxint
-                        for allele_name, allele_node in predicted_allele_nodes.items():
+                        max_alleles, max_common = [], -sys.maxsize
+                        for allele_name, allele_node in list(predicted_allele_nodes.items()):
                             tmp_vars = allele_node.get_var_ids(left, right)
                             tmp_common = len(set(vars) & set(tmp_vars))
                             tmp_common -= len(set(vars) | set(tmp_vars))
@@ -2315,13 +2315,13 @@
                         return max_alleles
 
                     for run, plus, minus in run_alignments:
-                        print run
-                        print "\tplus:"
+                        print(run)
+                        print("\tplus:")
                         for left, right, vars in plus:
-                            print "\t\t", left, right, vars, get_best_alleles(left, right, vars)
-                        print "\tminus:"
+                            print("\t\t", left, right, vars, get_best_alleles(left, right, vars))
+                        print("\tminus:")
                         for left, right, vars in minus:
-                            print "\t\t", left, right, vars, get_best_alleles(left, right, vars)
+                            print("\t\t", left, right, vars, get_best_alleles(left, right, vars))
                             
                     assert False
 
@@ -2342,7 +2342,7 @@
                 # Compare two alleles
                 if simulation and len(test_Gene_names) == 2:
                     allele_name1, allele_name2 = test_Gene_names
-                    print >> sys.stderr, allele_name1, "vs.", allele_name2
+                    print(allele_name1, "vs.", allele_name2, file=sys.stderr)
                     asm_graph.print_node_comparison(asm_graph.true_allele_nodes)
 
                 def compare_alleles(vars1, vars2, print_output = True):
@@ -2374,8 +2374,8 @@
                             skip = False
                             if print_output:
                                 if cmp_var_in_exon:
-                                    print >> sys.stderr, "\033[94mexon%d\033[00m" % (exon_i + 1),
-                                print >> sys.stderr, cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]]
+                                    print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=sys.stderr)
+                                print(cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]], file=sys.stderr)
                             var_i += 1; var_j += 1
                             continue
                         if cmp_var[1] <= node_var[1]:
@@ -2384,42 +2384,42 @@
                                     if print_output:
                                         if cmp_var_in_exon:
                                             for f_ in [sys.stderr, report_file]:
-                                                print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                                print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                         for f_ in [sys.stderr, report_file]:
-                                            print >> f_, "***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]]
+                                            print("***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]], file=f_)
                                     mismatches += 1
                             var_i += 1
                         else:
                             if print_output:
                                 if node_var_in_exon:
                                     for f_ in [sys.stderr, report_file]:
-                                        print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                        print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                 for f_ in [sys.stderr, report_file]:
-                                    print >> f_, "*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]]
+                                    print("*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]], file=f_)
                             mismatches += 1
                             var_j += 1
                             
                     return mismatches
                     
                 tmp_nodes = asm_graph.nodes
-                print >> sys.stderr, "Number of tmp nodes:", len(tmp_nodes)
+                print("Number of tmp nodes:", len(tmp_nodes), file=sys.stderr)
                 count = 0
-                for id, node in tmp_nodes.items():
+                for id, node in list(tmp_nodes.items()):
                     count += 1
                     if count > 10:
                         break
                     node_vars = node.get_var_ids()
-                    node.print_info(); print >> sys.stderr
+                    node.print_info(); print(file=sys.stderr)
                     if node.id in asm_graph.to_node:
                         for id2, at in asm_graph.to_node[node.id]:
-                            print >> sys.stderr, "\tat %d ==> %s" % (at, id2)
+                            print("\tat %d ==> %s" % (at, id2), file=sys.stderr)
 
                     if simulation:
                         cmp_Gene_names = test_Gene_names
                     else:
                         cmp_Gene_names = [allele_name for allele_name, _ in allele_node_order]
                         
-                    alleles, cmp_vars, max_common = [], [], -sys.maxint
+                    alleles, cmp_vars, max_common = [], [], -sys.maxsize
                     for cmp_Gene_name in cmp_Gene_names:
                         tmp_vars = allele_nodes[cmp_Gene_name].get_var_ids(node.left, node.right)
                         tmp_common = len(set(node_vars) & set(tmp_vars))
@@ -2432,19 +2432,19 @@
 
                     for allele_name, cmp_vars in alleles:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "vs.", allele_name
+                            print("vs.", allele_name, file=f_)
                         compare_alleles(cmp_vars, node_vars)
 
-                    print >> sys.stderr
-                    print >> sys.stderr
+                    print(file=sys.stderr)
+                    print(file=sys.stderr)
 
 
             # Identify alleles that perfectly or closesly match assembled alleles
-            for node_name, node in asm_graph.nodes.items():
+            for node_name, node in list(asm_graph.nodes.items()):
                 vars = set(node.get_var_ids())
 
-                max_allele_names, max_common = [], -sys.maxint
-                for allele_name, vars2 in allele_vars.items():
+                max_allele_names, max_common = [], -sys.maxsize
+                for allele_name, vars2 in list(allele_vars.items()):
                     vars2 = set(vars2)
                     tmp_common = len(vars & vars2) - len(vars | vars2)
                     if tmp_common > max_common:
@@ -2454,21 +2454,21 @@
                         max_allele_names.append(allele_name)
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "Genomic:", node_name
+                    print("Genomic:", node_name, file=f_)
                     node_vars = node.get_var_ids()
-                    min_mismatches = sys.maxint
+                    min_mismatches = sys.maxsize
                     for max_allele_name in max_allele_names:
                         cmp_vars = allele_vars[max_allele_name]
                         cmp_vars = sorted(cmp_vars, cmp=lambda a, b: int(a[2:]) - int(b[2:]))
                         print_output = False
                         tmp_mismatches = compare_alleles(cmp_vars, node_vars, print_output)
-                        print >> f_, "\t\t%s:" % max_allele_name, max_common, tmp_mismatches
+                        print("\t\t%s:" % max_allele_name, max_common, tmp_mismatches, file=f_)
                         if tmp_mismatches < min_mismatches:
                             min_mismatches = tmp_mismatches
                     if min_mismatches > 0:
-                        print >> f_, "Novel allele"
+                        print("Novel allele", file=f_)
                     else:
-                        print >> f_, "Known allele"
+                        print("Known allele", file=f_)
 
             """
             allele_exon_vars = {}
@@ -2520,7 +2520,7 @@
                                 else:
                                     break
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0)
+                                print("\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0), file=f_)
                             if rank_i < len(success):
                                 success[rank_i] = True
                             found_list[name_i] = True
@@ -2530,15 +2530,15 @@
                         break
                 if not found:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0)
+                        print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0), file=f_)
                     if best_alleles and prob_i < 2:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0)
+                            print("SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0), file=f_)
                 if not simulation and prob_i >= 9:
                     break
                 if prob_i >= 19:
                     break
-            print >> sys.stderr
+            print(file=sys.stderr)
 
             if simulation and not False in success:
                 aligner_type = "%s %s" % (aligner, index_type)
@@ -2589,7 +2589,7 @@
         Vars[gene][var_id] = [var_type, pos - left, data]
         Var_list[gene].append([pos - left, var_id])
         
-    for gene, in_var_list in Var_list.items():
+    for gene, in_var_list in list(Var_list.items()):
         Var_list[gene] = sorted(in_var_list)
 
     return Vars, Var_list
@@ -2707,7 +2707,7 @@
                    base_fname + ".link"]
     
     if verbose >= 1:
-        print >> sys.stderr, Gene_fnames
+        print(Gene_fnames, file=sys.stderr)
     
     if not typing_common.check_files(Gene_fnames):
         extract_hla_script = os.path.join(ex_path, "hisatgenotype_extract_vars.py")
@@ -2731,12 +2731,12 @@
         # DK - debugging purposes
         # extract_cmd += ["--ext-seq", "300"]
         if verbose >= 1:
-            print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+            print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
         proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
         proc.communicate()
         
         if not typing_common.check_files(Gene_fnames):
-            print >> sys.stderr, "Error: hisatgenotype_extract_vars failed!"
+            print("Error: hisatgenotype_extract_vars failed!", file=sys.stderr)
             sys.exit(1)
 
     for aligner, index_type in aligners:
@@ -2753,11 +2753,11 @@
                                  "%s_backbone.fa" % base_fname,
                                  "%s.graph" % base_fname]
                     if verbose >= 1:
-                        print >> sys.stderr, "\tRunning:", ' '.join(build_cmd)
+                        print("\tRunning:", ' '.join(build_cmd), file=sys.stderr)
                     proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                     proc.communicate()        
                     if not typing_common.check_files(Gene_hisat2_graph_index_fnames):
-                        print >> sys.stderr, "Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?"
+                        print("Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?", file=sys.stderr)
                         sys.exit(1)
             # Build HISAT2 linear indexes based on the above information
             else:
@@ -2771,7 +2771,7 @@
                     proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                     proc.communicate()        
                     if not typing_common.check_files(Gene_hisat2_linear_index_fnames):
-                        print >> sys.stderr, "Error: indexing HLA failed!"
+                        print("Error: indexing HLA failed!", file=sys.stderr)
                         sys.exit(1)
         else:
             assert aligner == "bowtie2" and index_type == "linear"
@@ -2785,7 +2785,7 @@
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not typing_common.check_files(Gene_bowtie2_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!"
+                    print("Error: indexing HLA failed!", file=sys.stderr)
                     sys.exit(1)
 
     # Read partial alleles from hla.data (temporary)
@@ -2815,21 +2815,21 @@
         refGene_loci[Gene_gene] = [Gene_name, chr, left, right, exons]
     Genes = {}
     if len(locus_list) == 0:
-        locus_list = refGene_loci.keys()
+        locus_list = list(refGene_loci.keys())
 
     read_Gene_alleles(base_fname + "_backbone.fa", Genes)
     read_Gene_alleles(base_fname + "_sequences.fa", Genes)
 
     # HLA gene alleles
     Gene_names = {}
-    for Gene_gene, data in Genes.items():
+    for Gene_gene, data in list(Genes.items()):
         Gene_names[Gene_gene] = list(data.keys())
 
     # HLA gene allele lengths
     Gene_lengths = {}
-    for Gene_gene, Gene_alleles in Genes.items():
+    for Gene_gene, Gene_alleles in list(Genes.items()):
         Gene_lengths[Gene_gene] = {}
-        for allele_name, seq in Gene_alleles.items():
+        for allele_name, seq in list(Gene_alleles.items()):
             Gene_lengths[Gene_gene][allele_name] = len(seq)
 
     # Read HLA variants, and link information
@@ -2886,7 +2886,7 @@
                 if str(test_i + 1) not in test_ids:
                     continue
 
-            print >> sys.stderr, "Test %d" % (test_i + 1), str(datetime.now())
+            print("Test %d" % (test_i + 1), str(datetime.now()), file=sys.stderr)
             test_locus_list = test_list[test_i]
             num_frag_list = typing_common.simulate_reads(Genes,
                                                          base_fname,
@@ -2910,7 +2910,7 @@
                     gene = test_Gene_name.split('*')[0]
                     test_Gene_seq = Genes[gene][test_Gene_name]
                     seq_type = "partial" if test_Gene_name in partial_alleles else "full"
-                    print >> sys.stderr, "\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_])
+                    print("\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_]), file=sys.stderr)
 
             if "single-end" in debug_instr:
                 read_fname = ["%s_input_1.fa" % base_fname]
@@ -2948,20 +2948,20 @@
                                      best_alleles,
                                      verbose)
 
-            for aligner_type, passed in tmp_test_passed.items():
+            for aligner_type, passed in list(tmp_test_passed.items()):
                 if aligner_type in test_passed:
                     test_passed[aligner_type] += passed
                 else:
                     test_passed[aligner_type] = passed
 
-                print >> sys.stderr, "\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1)))
+                print("\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1))), file=sys.stderr)
 
 
-        for aligner_type, passed in test_passed.items():
-            print >> sys.stderr, "%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list))
+        for aligner_type, passed in list(test_passed.items()):
+            print("%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list)), file=sys.stderr)
     
     else: # With real reads or BAMs
-        print >> sys.stderr, "\t", ' '.join(locus_list)
+        print("\t", ' '.join(locus_list), file=sys.stderr)
         fastq = True
         typing(ex_path,
                simulation,
@@ -3125,7 +3125,7 @@
     else:
         locus_list = args.locus_list.split(',')
     if args.aligners == "":
-        print >> sys.stderr, "Error: --aligners must be non-empty."
+        print("Error: --aligners must be non-empty.", file=sys.stderr)
         sys.exit(1)    
     args.aligners = args.aligners.split(',')
     for i in range(len(args.aligners)):
@@ -3136,7 +3136,7 @@
         args.read_fname = []
     if args.alignment_fname != "" and \
             not os.path.exists(args.alignment_fname):
-        print >> sys.stderr, "Error: %s doesn't exist." % args.alignment_fname
+        print("Error: %s doesn't exist." % args.alignment_fname, file=sys.stderr)
         sys.exit(1)
 
     if args.verbose and args.verbose_level == 0:
@@ -3154,10 +3154,10 @@
                 debug[item] = 1
 
     if not args.partial:
-        print >> sys.stderr, "Warning: --no-partial will be no longer supported!"
+        print("Warning: --no-partial will be no longer supported!", file=sys.stderr)
 
     if args.read_len * 2 > args.fragment_len:
-        print >> sys.stderr, "Warning: fragment might be too short (%d)" % (args.fragment_len)
+        print("Warning: fragment might be too short (%d)" % (args.fragment_len), file=sys.stderr)
 
     skip_fragment_regions = []
     if args.skip_fragment_regions != "":
@@ -3178,7 +3178,7 @@
     if args.stranded_seq != "":
         stranded_seq = args.stranded_seq.split(',')
         if len(stranded_seq) != 2:
-            print >> sys.stderr, "Error: --stranded-seq is incorrectly specified"
+            print("Error: --stranded-seq is incorrectly specified", file=sys.stderr)
             sys.exit(1)
     else:
         stranded_seq = []
--- hisat2.orig/evaluation/tests/genotype_genome/hisatgenotype_prev.py
+++ hisat2/evaluation/tests/genotype_genome/hisatgenotype_prev.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2016, Daehwan Kim <infphilo@gmail.com>
@@ -51,9 +51,9 @@
         aligner_cmd += ["-1", read_fnames[0],
                         "-2", read_fnames[1]]
 
-    print >> sys.stderr, "Aligning %s to %s ..." % (' '.join(read_fnames), base_fname)
+    print("Aligning %s to %s ..." % (' '.join(read_fnames), base_fname), file=sys.stderr)
     if verbose:
-        print >> sys.stderr, "\t%s" % (' '.join(aligner_cmd))
+        print("\t%s" % (' '.join(aligner_cmd)), file=sys.stderr)
 
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
@@ -69,7 +69,7 @@
                                    stderr=open("/dev/null", 'w'))
     sambam_proc.communicate()
 
-    print >> sys.stderr, "Sorting %s ..." % "TBD"
+    print("Sorting %s ..." % "TBD", file=sys.stderr)
     bamsort_cmd = ["samtools",
                    "sort",
                    "--threads", str(threads),
@@ -79,7 +79,7 @@
                                     stderr=open("/dev/null", 'w'))
     bamsort_proc.communicate()
 
-    print >> sys.stderr, "Indexing %s ..." % "TBD"
+    print("Indexing %s ..." % "TBD", file=sys.stderr)
 
     bamindex_cmd = ["samtools",
                     "index",
@@ -116,9 +116,9 @@
     # hisat2 graph index files
     genotype_fnames += ["%s.%d.ht2" % (base_fname, i+1) for i in range(8)]
     if not typing_common.check_files(genotype_fnames):
-        print >> sys.stderr, "Error: some of the following files are missing!"
+        print("Error: some of the following files are missing!", file=sys.stderr)
         for fname in genotype_fnames:
-            print >> sys.stderr, "\t%s" % fname
+            print("\t%s" % fname, file=sys.stderr)
         sys.exit(1)
 
     # Align reads, and sort the alignments into a BAM file
@@ -165,11 +165,11 @@
 
     # gene alleles
     allele_names = {}
-    for gene_name in genes.keys():
+    for gene_name in list(genes.keys()):
         if gene_name not in allele_names:
             allele_names[gene_name] = []
         gene_name2 = gene_name.split('-')[1]
-        for allele_name in allele_vars.keys():
+        for allele_name in list(allele_vars.keys()):
             allele_name1 = allele_name.split('*')[0]
             if gene_name2 == allele_name1:
                 allele_names[gene_name].append(allele_name)
@@ -196,7 +196,7 @@
         Vars[gene_name][var_id] = [var_type, pos, data]
         Var_list[gene_name].append([pos, var_id])
 
-    for gene_name, in_var_list in Var_list.items():
+    for gene_name, in_var_list in list(Var_list.items()):
         Var_list[gene_name] = sorted(in_var_list)
     def lower_bound(Var_list, pos):
         low, high = 0, len(Var_list)
@@ -233,7 +233,7 @@
     for test_i in range(len(test_list)):
         test_HLA_list = test_list[test_i]
         for test_HLA_names in test_HLA_list:
-            print >> sys.stderr, "\t%s" % (test_HLA_names)
+            print("\t%s" % (test_HLA_names), file=sys.stderr)
             for gene in test_HLA_names:
                 ref_allele = genes[gene]
                 ref_seq = gene_seqs[gene]
@@ -303,8 +303,8 @@
                     debug = False
                     if read_id in ["2339"] and False:
                         debug = True
-                        print "read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs
-                        print "            ", read_seq
+                        print("read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs)
+                        print("            ", read_seq)
 
                     vars = []
                     if Zs:
@@ -394,7 +394,7 @@
                     def add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read, exon = True):
                         max_count = max(HLA_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             """
@@ -414,7 +414,7 @@
                         alleles = ["", ""]
                         # alleles = ["B*40:304", "B*40:02:01"]
                         allele1_found, allele2_found = False, False
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             if allele == alleles[0]:
@@ -422,13 +422,13 @@
                             elif allele == alleles[1]:
                                 allele2_found = True
                         if allele1_found != allele2_found:
-                            print alleles[0], HLA_count_per_read[alleles[0]]
-                            print alleles[1], HLA_count_per_read[alleles[1]]
+                            print(alleles[0], HLA_count_per_read[alleles[0]])
+                            print(alleles[1], HLA_count_per_read[alleles[1]])
                             if allele1_found:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]]))
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]])))
                             else:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]]))
-                            print read_seq
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]])))
+                            print(read_seq)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -462,11 +462,11 @@
                             # daehwan - for debugging purposes
                             if debug:
                                 if allele in ["DQA1*05:05:01:01", "DQA1*05:05:01:02"]:
-                                    print allele, add, var_id
+                                    print(allele, add, var_id)
 
                     # Decide which allele(s) a read most likely came from
                     # also sanity check - read length, cigar string, and MD string
-                    for var_id, data in Vars[gene].items():
+                    for var_id, data in list(Vars[gene].items()):
                         var_type, var_pos, var_data = data
                         if var_type != "deletion":
                             continue
@@ -490,13 +490,13 @@
                                             add_count(var_id, -1)
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id]
+                                                print(cmp, var_id, Links[var_id])
                                     elif var_type == "deletion":
                                         del_len = int(var_data)
                                         if ref_pos < var_pos and ref_pos + length > var_pos + del_len:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id], -1, Vars[gene][var_id]
+                                                print(cmp, var_id, Links[var_id], -1, Vars[gene][var_id])
                                             # Check if this might be one of the two tandem repeats (the same left coordinate)
                                             cmp_left, cmp_right = cmp[1], cmp[1] + cmp[2]
                                             test1_seq1 = ref_seq[cmp_left-base_locus:cmp_right-base_locus]
@@ -510,7 +510,7 @@
                                                 add_count(var_id, -1)
                                     else:
                                         if debug:
-                                            print cmp, var_id, Links[var_id], -1
+                                            print(cmp, var_id, Links[var_id], -1)
                                         add_count(var_id, -1)
                                 var_idx += 1
 
@@ -531,7 +531,7 @@
                                         if var_data == read_base:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, var_data, read_base, Links[var_id]
+                                                print(cmp, var_id, 1, var_data, read_base, Links[var_id])
 
                                             # daehwan - for debugging purposes
                                             if False:
@@ -553,8 +553,8 @@
                             var_idx = lower_bound(Var_list[gene], ref_pos)
                             # daehwan - for debugging purposes
                             if debug:
-                                print left_pos, cigar_str, MD, vars
-                                print ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]]
+                                print(left_pos, cigar_str, MD, vars)
+                                print(ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]])
                                 # sys.exit(1)
                             while var_idx < len(Var_list[gene]):
                                 var_pos, var_id = Var_list[gene][var_idx]
@@ -566,7 +566,7 @@
                                         if var_data == ins_seq:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
+                                                print(cmp, var_id, 1, Links[var_id])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -602,8 +602,8 @@
                                         var_len = int(var_data)
                                         if var_len == length:
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
-                                                print ref_seq[var_pos - 10-base_locus:var_pos-base_locus], ref_seq[var_pos-base_locus:var_pos+int(var_data)-base_locus], ref_seq[var_pos+int(var_data)-base_locus:var_pos+int(var_data)+10-base_locus]
+                                                print(cmp, var_id, 1, Links[var_id])
+                                                print(ref_seq[var_pos - 10-base_locus:var_pos-base_locus], ref_seq[var_pos-base_locus:var_pos+int(var_data)-base_locus], ref_seq[var_pos+int(var_data)-base_locus:var_pos+int(var_data)+10-base_locus])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -634,9 +634,9 @@
                     if read_pos != len(read_seq) or \
                             cmp_cigar_str != cigar_str or \
                             cmp_MD != MD:
-                        print >> sys.stderr, "Error:", cigar_str, MD
-                        print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD
-                        print >> sys.stderr, "\tcmp list:", cmp_list
+                        print("Error:", cigar_str, MD, file=sys.stderr)
+                        print("\tcomputed:", cmp_cigar_str, cmp_MD, file=sys.stderr)
+                        print("\tcmp list:", cmp_list, file=sys.stderr)
                         assert False            
 
                     prev_read_id = read_id
@@ -648,7 +648,7 @@
                 if prev_read_id != None:
                     add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read)
 
-                HLA_counts = [[allele, count] for allele, count in HLA_counts.items()]
+                HLA_counts = [[allele, count] for allele, count in list(HLA_counts.items())]
                 def HLA_count_cmp(a, b):
                     if a[1] != b[1]:
                         return b[1] - a[1]
@@ -660,28 +660,28 @@
                 HLA_counts = sorted(HLA_counts, cmp=HLA_count_cmp)
                 for count_i in range(len(HLA_counts)):
                     count = HLA_counts[count_i]
-                    print >> sys.stderr, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                    print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=sys.stderr)
                     if count_i >= 9:
                         break
-                print >> sys.stderr
+                print(file=sys.stderr)
 
                 def normalize(prob):
                     total = sum(prob.values())
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         prob[allele] = mass / total
 
                 def normalize2(prob, length):
                     total = 0
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         assert allele in length
                         total += (mass / length[allele])
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         assert allele in length
                         prob[allele] = mass / length[allele] / total
 
                 def prob_diff(prob1, prob2):
                     diff = 0.0
-                    for allele in prob1.keys():
+                    for allele in list(prob1.keys()):
                         if allele in prob2:
                             diff += abs(prob1[allele] - prob2[allele])
                         else:
@@ -701,7 +701,7 @@
                         return 1
 
                 HLA_prob, HLA_prob_next = {}, {}
-                for cmpt, count in HLA_cmpt.items():
+                for cmpt, count in list(HLA_cmpt.items()):
                     alleles = cmpt.split('-')
                     for allele in alleles:
                         if allele not in HLA_prob:
@@ -718,7 +718,7 @@
                 normalize(HLA_prob)
                 def next_prob(HLA_cmpt, HLA_prob, HLA_length):
                     HLA_prob_next = {}
-                    for cmpt, count in HLA_cmpt.items():
+                    for cmpt, count in list(HLA_cmpt.items()):
                         alleles = cmpt.split('-')
                         alleles_prob = 0.0
                         for allele in alleles:
@@ -745,17 +745,17 @@
                     HLA_prob[allele] /= float(allele_len)
                 normalize(HLA_prob)
                 """
-                HLA_prob = [[allele, prob] for allele, prob in HLA_prob.items()]
+                HLA_prob = [[allele, prob] for allele, prob in list(HLA_prob.items())]
 
                 HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp)
                 success = [False for i in range(len(test_HLA_names))]
                 found_list = [False for i in range(len(test_HLA_names))]
                 for prob_i in range(len(HLA_prob)):
                     prob = HLA_prob[prob_i]
-                    print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0)
+                    print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0), file=sys.stderr)
                     if prob_i >= 9:
                         break
-                print >> sys.stderr
+                print(file=sys.stderr)
 
                 """
                 if len(test_HLA_names) == 2:
@@ -963,15 +963,15 @@
             if cigar_op in "MIS":
                 read_pos += length
 
-    for var_id, counts in var_counts.items():
+    for var_id, counts in list(var_counts.items()):
         if counts[0] < 2: # or counts[0] * 3 < counts[1]:
             continue
         assert var_id in vars
         var_chr, var_left, var_type, var_data = vars[var_id]
         assert var_id in clnsigs
         var_gene, var_clnsig = clnsigs[var_id]
-        print >> sys.stderr, "\t\t\t%s %s: %s:%d %s %s (%s): %d-%d" % \
-                (var_gene, var_id, var_chr, var_left, var_type, var_data, var_clnsig, counts[0], counts[1])
+        print("\t\t\t%s %s: %s:%d %s %s (%s): %d-%d" % \
+                (var_gene, var_id, var_chr, var_left, var_type, var_data, var_clnsig, counts[0], counts[1]), file=sys.stderr)
 
 
                 
@@ -1038,7 +1038,7 @@
         read_fnames = [args.read_fname_U]
     else:
         if args.read_fname_1 == "" or args.read_fname_2 == "":
-            print >> sys.stderr, "Error: please specify read file names correctly: -U or -1 and -2"
+            print("Error: please specify read file names correctly: -U or -1 and -2", file=sys.stderr)
             sys.exit(1)
         read_fnames = [args.read_fname_1, args.read_fname_2] 
 
--- hisat2.orig/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py
+++ hisat2/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
 #
@@ -69,7 +69,7 @@
             aligner_cmd += ["-1", fq_fnames[0],
                             "-2", fq_fnames[1]]
             # print >> sys.stderr, "Running:", ' '.join(aligner_cmd)
-            print sample, aligner, type
+            print(sample, aligner, type)
             align_proc = subprocess.Popen(aligner_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=open("/dev/null", 'w'))
@@ -113,7 +113,7 @@
 
                 if ((aligner == "hisat2" and NH == 1) or (aligner == "bowtie2" and AS > XS and read1_first if flag & 0x40 else read2_first)):
                     if chr in region_loci:
-                        for region, loci in region_loci[chr].items():
+                        for region, loci in list(region_loci[chr].items()):
                             _, _, loci_left, loci_right = loci
                             # there might be a different candidate region for each of left and right reads
                             if pos >= loci_left and pos < loci_right:
@@ -136,5 +136,5 @@
                 gene = "HLA-" + gene
                 if gene not in region_count:
                     continue
-                print "\t%s pair: %d, left+right: %d" % (gene, region_count[gene], region_read1_count[gene] + region_read2_count[gene])
+                print("\t%s pair: %d, left+right: %d" % (gene, region_count[gene], region_read1_count[gene] + region_read2_count[gene]))
             
--- hisat2.orig/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py
+++ hisat2/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess
 import multiprocessing
@@ -154,7 +154,7 @@
             if aligner == "hisat2" and index_type != "":
                 aligner_name += ("_" + index_type)
             two_step = (aligner == "tophat2" or type == "x2" or (aligner in ["hisat2", "hisat"] and type == ""))
-            print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now()))
+            print("\t%s\t%s" % (aligner_name, str(datetime.now())), file=sys.stderr)
             if paired:
                 aligner_dir = aligner_name + "_paired"
             else:
@@ -170,7 +170,7 @@
             aligner_cmd = get_aligner_cmd(aligner, type, index_type, version, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
             start_time = datetime.now()
             if verbose:
-                print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
             if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                 proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
             else:
@@ -180,7 +180,7 @@
             duration = finish_time - start_time
             duration = duration.total_seconds()
             if verbose:
-                print >> sys.stderr, "\t", finish_time, "finished:", duration
+                print("\t", finish_time, "finished:", duration, file=sys.stderr)
 
             assert os.path.exists(out_fname)
             correct_reads, correct_multi_reads, num_reads = 0, 0, 0
@@ -205,8 +205,8 @@
 
                 prev_read_id = read_id
 
-            print >> sys.stderr, "\tfirst: %d / %d (%.2f%%)" % (correct_reads, num_reads, float(correct_reads)/num_reads*100)
-            print >> sys.stderr, "\tall: %d / %d (%.2f%%)" % (correct_multi_reads, num_reads, float(correct_multi_reads)/num_reads*100)
+            print("\tfirst: %d / %d (%.2f%%)" % (correct_reads, num_reads, float(correct_reads)/num_reads*100), file=sys.stderr)
+            print("\tall: %d / %d (%.2f%%)" % (correct_multi_reads, num_reads, float(correct_multi_reads)/num_reads*100), file=sys.stderr)
 
             os.chdir("..")
 
--- hisat2.orig/evaluation/tests/one_snp_test/simulate_one_snp_reads.py
+++ hisat2/evaluation/tests/one_snp_test/simulate_one_snp_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 #
@@ -125,8 +125,8 @@
         Zs = "%d|I|%s" % (left_read_len, snp_id)
 
     if len(read_seq) != read_len:
-        print >> sys.stderr, "read length differs:", len(read_seq), "vs.", read_len
-        print >> sys.stderr, pos, cigar_str, MD, Zs
+        print("read length differs:", len(read_seq), "vs.", read_len, file=sys.stderr)
+        print(pos, cigar_str, MD, Zs, file=sys.stderr)
         assert False
 
     ref_read_seq = chr_seq[pos:pos+read_len]
@@ -257,8 +257,8 @@
         tMD += ("{}".format(match_len))
 
     if tMD != MD:
-        print >> sys.stderr, chr, pos, cigar, MD, Zs
-        print >> sys.stderr, tMD
+        print(chr, pos, cigar, MD, Zs, file=sys.stderr)
+        print(tMD, file=sys.stderr)
         assert False
         
         
@@ -272,14 +272,14 @@
 
     genome_seq = read_genome(genome_file)
     snps = read_snp(snp_file)
-    chr_ids = genome_seq.keys()
+    chr_ids = list(genome_seq.keys())
 
     sam_file = open(base_fname + ".sam", "w")
 
     # Write SAM header
-    print >> sam_file, "@HD\tVN:1.0\tSO:unsorted"
-    for chr in genome_seq.keys():
-        print >> sam_file, "@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr]))
+    print("@HD\tVN:1.0\tSO:unsorted", file=sam_file)
+    for chr in list(genome_seq.keys()):
+        print("@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr])), file=sam_file)
     
     read_file = open(base_fname + "_snp_1.fa", "w")
     ref_read_file = open(base_fname + "_ref_1.fa", "w")
@@ -312,12 +312,12 @@
             #    Zs2 = ("\tZs:Z:{}".format(Zs2))
 
             read_id_str = "{}_{}_{}_{}".format(cur_read_id, chr, pos, cigar_str)
-            print >> read_file, ">{}".format(read_id_str)
-            print >> read_file, read_seq
-            print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(read_id_str, flag, chr, pos + 1, cigar_str, chr, pos + 1, read_seq, MD, Zs)
+            print(">{}".format(read_id_str), file=read_file)
+            print(read_seq, file=read_file)
+            print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(read_id_str, flag, chr, pos + 1, cigar_str, chr, pos + 1, read_seq, MD, Zs), file=sam_file)
 
-            print >> ref_read_file, ">{}_{}_{}_100M".format(cur_read_id, chr, pos)
-            print >> ref_read_file, ref_read_seq
+            print(">{}_{}_{}_100M".format(cur_read_id, chr, pos), file=ref_read_file)
+            print(ref_read_seq, file=ref_read_file)
             """
             if paired_end:
                 print >> read2_file, ">{}".format(cur_read_id)
--- hisat2.orig/evaluation/tests/repeat/generate_repeats.py
+++ hisat2/evaluation/tests/repeat/generate_repeats.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys
 import struct
@@ -25,7 +25,7 @@
     chr_sa.append(num)
 
     if len(chr_sa) % 5000000 == 0:
-        print len(chr_sa)
+        print(len(chr_sa))
 f.close()
 
 assert chr_sa[0] + 1 == len(chr_sa)
@@ -41,7 +41,7 @@
 while i < len(chr_sa) - 1:
     pos = chr_sa[i]
     base_seq = chr_seq[pos:pos+seq_len]
-    for j in xrange(i+1, len(chr_sa)):
+    for j in range(i+1, len(chr_sa)):
         pos2 = chr_sa[j]
         cmp_seq = chr_seq[pos2:pos2+seq_len]
         if base_seq != cmp_seq:
@@ -53,13 +53,13 @@
     i = j
 
     if i % 5000000 == 0:
-        print i
+        print(i)
 
 found = False
-print len(repeats), "repeats"
+print(len(repeats), "repeats")
 deleted = set()
-for i in xrange(len(repeats) - 1):
-    for j in xrange(i + 1, len(repeats)):
+for i in range(len(repeats) - 1):
+    for j in range(i + 1, len(repeats)):
         if j in deleted:
             continue
         
@@ -84,17 +84,17 @@
 
         if num_close == 1 and num_close2 < 5:
             found = True
-            print pos_set
-            print pos_set2
-            print pos_seq
-            print pos_seq2
+            print(pos_set)
+            print(pos_set2)
+            print(pos_seq)
+            print(pos_seq2)
 
             file1 = open("1.fa", "w")
             file2 = open("2.fa", "w")
 
             pos_seq2_rc = list(pos_seq2)
             pos_seq2_rc = pos_seq2_rc[::-1]
-            for k in xrange(seq_len):
+            for k in range(seq_len):
                 nt = pos_seq2_rc[k]
                 if nt == 'A':
                     nt = 'T'
@@ -108,11 +108,11 @@
                 pos_seq2_rc[k] = nt
             pos_seq2_rc = ''.join(pos_seq2_rc)
 
-            for k in xrange(1000000):
-                print >> file1, ">%d" % k
-                print >> file2, ">%d" % k
-                print >> file1, pos_seq
-                print >> file2, pos_seq2_rc
+            for k in range(1000000):
+                print(">%d" % k, file=file1)
+                print(">%d" % k, file=file2)
+                print(pos_seq, file=file1)
+                print(pos_seq2_rc, file=file2)
 
             file1.close()
             file2.close()
@@ -121,7 +121,7 @@
     if found:
         break
 
-    print i
+    print(i)
 
 chr_seq = ""
 for line in open("%s.fa" % chr_name):
@@ -132,7 +132,7 @@
 
 N_ranges = []
 prev_nt = None
-for i in xrange(len(chr_seq)):
+for i in range(len(chr_seq)):
     nt = chr_seq[i]
     if nt == 'N':
         if prev_nt != 'N':
@@ -158,7 +158,7 @@
 to_genome_list = [[y, x] for x, y in to_joined_list]
 
 N_ranges_tmp = []
-for i in xrange(len(to_genome_list)):
+for i in range(len(to_genome_list)):
     to_genome = to_genome_list[i]
     if i == 0:
         if to_genome[1] > 0:
@@ -171,8 +171,8 @@
 
 file = open("%s_rep.info" % chr_name, "w")
 def print_rep_info(rep_name, rep_pos, rep_len, pos_set, pos_seq):
-    print >> file, ">%s*0\trep\t%d\t%d\t%d\t0" % (rep_name, rep_pos, rep_len, len(pos_set))
-    for i in xrange(0, len(pos_set), 10):
+    print(">%s*0\trep\t%d\t%d\t%d\t0" % (rep_name, rep_pos, rep_len, len(pos_set)), file=file)
+    for i in range(0, len(pos_set), 10):
         output = ""
         for j in range(i, i + 10):
             if j >= len(pos_set):
@@ -181,7 +181,7 @@
                 output += " "
 
             def convert(pos):
-                for i in xrange(len(to_genome_list)):
+                for i in range(len(to_genome_list)):
                     if i + 1 == len(to_genome_list) or (pos >= to_genome_list[i][0] and pos < to_genome_list[i+1][0]):
                         return pos - to_genome_list[i][0] + to_genome_list[i][1]
 
@@ -190,7 +190,7 @@
             pos = convert(pos_set[j])
             assert chr_seq[pos:pos+seq_len] == pos_seq
             output += ("%s:%d:+" % (chr_name, pos))
-        print >> file, output
+        print(output, file=file)
 print_rep_info("rep1", 0, seq_len, pos_set, pos_seq)
 print_rep_info("rep2", seq_len, seq_len, pos_set2, pos_seq2)
 file.close()
@@ -198,15 +198,15 @@
 chr_seq = chr_seq.replace(pos_seq, 'N' * seq_len)
 chr_seq = chr_seq.replace(pos_seq2, 'N' * seq_len)
 file = open("%s_mask.fa" % chr_name, "w")
-print >> file, ">%s_mask" % chr_name
-for i in xrange(0, len(chr_seq), 60):
-    print >> file, chr_seq[i:i+60]
+print(">%s_mask" % chr_name, file=file)
+for i in range(0, len(chr_seq), 60):
+    print(chr_seq[i:i+60], file=file)
 file.close()
 
 file = open("%s_rep.fa" % chr_name, "w")
 rep_seq = pos_seq + pos_seq2
-print >> file, ">rep"
-for i in xrange(0, len(rep_seq), 60):
-    print >> file, rep_seq[i:i+60]
+print(">rep", file=file)
+for i in range(0, len(rep_seq), 60):
+    print(rep_seq[i:i+60], file=file)
 file.close()
     
--- hisat2.orig/evaluation/tests/repeat/test_repeat.py
+++ hisat2/evaluation/tests/repeat/test_repeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 import sys, os, subprocess, random
 from argparse import ArgumentParser, FileType
 
@@ -52,7 +52,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -62,7 +62,7 @@
 def generate_random_seq(seq_len):
     assert seq_len > 0
     random_seq = ""
-    for i in xrange(seq_len):
+    for i in range(seq_len):
         random_seq += "ACGT"[random.randint(0, 3)]
     return random_seq
 
@@ -109,10 +109,10 @@
     ]
     
     for id, seq in seqs:
-        print ">%s" % id
-        print generate_random_seq(20)
-        print seq
-        print generate_random_seq(20)
+        print(">%s" % id)
+        print(generate_random_seq(20))
+        print(seq)
+        print(generate_random_seq(20))
 
 
 """
--- hisat2.orig/hisat2-build
+++ hisat2/hisat2-build
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2015, Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2-build-new
+++ hisat2/hisat2-build-new
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2-inspect
+++ hisat2/hisat2-inspect
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2015, Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2_extract_exons.py
+++ hisat2/hisat2_extract_exons.py
@@ -63,7 +63,7 @@
             trans[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -75,7 +75,7 @@
 
     # Calculate and print the unique junctions
     tmp_exons = set()
-    for chrom, strand, texons in trans.values():
+    for chrom, strand, texons in list(trans.values()):
         for i in range(len(texons)):
             tmp_exons.add((chrom, texons[i][0], texons[i][1], strand))
     tmp_exons = sorted(tmp_exons)
--- hisat2.orig/hisat2_extract_snps_haplotypes_UCSC.py
+++ hisat2/hisat2_extract_snps_haplotypes_UCSC.py
@@ -155,7 +155,7 @@
             vars_count[id] = 0
         vars_count[id] += 1
     vars_duplicate = set()
-    for id, count in vars_count.items():
+    for id, count in list(vars_count.items()):
         if count <= 1:
             continue
         vars_duplicate.add(id)
@@ -302,7 +302,7 @@
             h_end += (int(h2_data) - 1)
         assert h_begin <= h_end
         h_new_begin = h_begin
-        for h_j in reversed(range(0, h_i)):
+        for h_j in reversed(list(range(0, h_i))):
             hc = haplotypes[h_j].split('#')
             _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])]
             hc_begin = int(hc_begin)
--- hisat2.orig/hisat2_extract_snps_haplotypes_VCF.py
+++ hisat2/hisat2_extract_snps_haplotypes_VCF.py
@@ -409,7 +409,7 @@
             h_end += (int(h2_data) - 1)
         assert h_begin <= h_end
         h_new_begin = h_begin
-        for h_j in reversed(range(0, h_i)):
+        for h_j in reversed(list(range(0, h_i))):
             hc = haplotypes[h_j].split('#')
             _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])]
             hc_begin = int(hc_begin)
@@ -776,7 +776,7 @@
                 vars = []
 
         else:            
-            for chr in genotype_var_list.keys():
+            for chr in list(genotype_var_list.keys()):
                 chr_seq = chr_dic[chr]
                 chr_genotype_vars = genotype_var_list[chr]
                 curr_right = -1
--- hisat2.orig/hisat2_extract_splice_sites.py
+++ hisat2/hisat2_extract_splice_sites.py
@@ -64,7 +64,7 @@
             trans[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -76,7 +76,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add((chrom, exons[i-1][1], exons[i][0], strand))
     junctions = sorted(junctions)
@@ -88,7 +88,7 @@
     if verbose:
         exon_lengths, intron_lengths, trans_lengths = \
             Counter(), Counter(), Counter()
-        for chrom, strand, exons in trans.values():
+        for chrom, strand, exons in list(trans.values()):
             tran_len = 0
             for i, exon in enumerate(exons):
                 exon_len = exon[1]-exon[0]+1
@@ -100,7 +100,7 @@
             trans_lengths[tran_len] += 1
 
         print('genes: {}, genes with multiple isoforms: {}'.format(
-                len(genes), sum(len(v) > 1 for v in genes.values())),
+                len(genes), sum(len(v) > 1 for v in list(genes.values()))),
               file=stderr)
         print('transcripts: {}, transcript avg. length: {:.0f}'.format(
                 len(trans), sum(trans_lengths.elements())//len(trans)),
--- hisat2.orig/hisat2_simulate_reads.py
+++ hisat2/hisat2_simulate_reads.py
@@ -152,7 +152,7 @@
             transcripts[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chr, strand, exons] in transcripts.items():
+    for tran, [chr, strand, exons] in list(transcripts.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -163,7 +163,7 @@
             transcripts[tran] = [chr, strand, tmp_exons]
 
     tmp_transcripts = {}
-    for tran, [chr, strand, exons] in transcripts.items():
+    for tran, [chr, strand, exons] in list(transcripts.items()):
         exon_lens = [e[1] - e[0] + 1 for e in exons]
         transcript_len = sum(exon_lens)
         if transcript_len >= frag_len:
@@ -199,7 +199,7 @@
 """
 def sanity_check_input(genome_seq, genes, transcripts, snps, frag_len):
     num_canon_ss, num_ss = 0, 0
-    for transcript, [chr, strand, transcript_len, exons] in transcripts.items():
+    for transcript, [chr, strand, transcript_len, exons] in list(transcripts.items()):
         assert transcript_len >= frag_len
         if len(exons) <= 1:
             continue
@@ -274,7 +274,7 @@
 """
 def generate_dna_expr_profile(genome_seq):
     expr_profile = []
-    for chr_id, chr_seq in genome_seq.items():
+    for chr_id, chr_seq in list(genome_seq.items()):
         expr_profile.append(len(chr_seq))
     expr_sum = float(sum(expr_profile))
     expr_profile = [expr_profile[i] / expr_sum for i in range(len(expr_profile))]
--- hisat2.orig/hisat2lib/pymodule/ht2example.py
+++ hisat2/hisat2lib/pymodule/ht2example.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 #
 # Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
@@ -30,7 +30,7 @@
 # Get default options
 ht2_options = ht2py.get_options()
 
-print ht2_options
+print(ht2_options)
 ht2_options['gVerbose'] = 1
 ht2_options['startVerbose'] = 1
 # or
@@ -38,7 +38,7 @@
 
 handle = ht2py.init(ht2_index, ht2_options)
 
-print ht2py.index_getrefnamebyid(handle, 0)
+print(ht2py.index_getrefnamebyid(handle, 0))
 
 #print ht2py.index_getrefnamebyid(handle, 0, 1, 3, 5, 7, 9)
 # outofindex
@@ -62,7 +62,7 @@
     if direction == 1:
         chr_dir = '-'
 
-    print refnames[chr_id].split()[0] + ":" + str(chr_pos) + ':' + chr_dir
+    print(refnames[chr_id].split()[0] + ":" + str(chr_pos) + ':' + chr_dir)
 
 # close handle
 ht2py.close(handle)
--- hisat2.orig/hisat2lib/pymodule/setup.py
+++ hisat2/hisat2lib/pymodule/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 #
 # Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/scripts/sa.py
+++ hisat2/scripts/sa.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
 sa.py
@@ -15,7 +15,7 @@
 def loadBowtieSa(fh):
 	""" Load a .sa file from handle into an array of ints """
 	nsa = struct.unpack('I', fh.read(4))[0]
-	return [ struct.unpack('I', fh.read(4))[0] for i in xrange(0, nsa) ]
+	return [ struct.unpack('I', fh.read(4))[0] for i in range(0, nsa) ]
 
 def loadBowtieSaFilename(fn):
 	""" Load a .sa file from filename into an array of ints """
@@ -58,7 +58,7 @@
 		# Suffix array is in sas; note that $ is considered greater than all
 		# other characters
 		if ref is not None:
-			for i in xrange(1, len(sas)):
+			for i in range(1, len(sas)):
 				sa1, sa2 = sas[i-1], sas[i]
 				assert sa1 != sa2
 				# Sanity check that suffixes are really in order
@@ -76,4 +76,4 @@
 			assert sas[-1] == len(ref)
 	
 	go()
-	
\ No newline at end of file
+	
--- hisat2.orig/scripts/validate_repeat.py
+++ hisat2/scripts/validate_repeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 import sys, subprocess
 import re
 from argparse import ArgumentParser, FileType
@@ -203,7 +203,7 @@
                     seq = reverse_complement(seq)
 
                 if seq != repeat_sequence:
-                    print 'Mismatch', seq, repeat_sequence, snp_cnt, coord, snp_id_list, repeat_length
+                    print('Mismatch', seq, repeat_sequence, snp_cnt, coord, snp_id_list, repeat_length)
                     
     fp.close()
 
