[med-svn] [Git][med-team/htseq][upstream] New upstream version 0.11.2

Fri Jan 11 22:40:05 GMT 2019

Andreas Tille pushed to branch upstream at Debian Med / htseq


Commits:
237b3f13 by Andreas Tille at 2019-01-11T22:31:44Z
New upstream version 0.11.2
- - - - -


15 changed files:

- PKG-INFO
- README.rst
- VERSION
- python2/HTSeq/__init__.py
- python2/HTSeq/_version.py
- python2/HTSeq/scripts/count.py
- python2/HTSeq/scripts/qa.py
- python2/src/HTSeq/_HTSeq.pxd
- python2/src/HTSeq/_HTSeq.pyx
- python3/HTSeq/__init__.py
- python3/HTSeq/_version.py
- python3/HTSeq/scripts/count.py
- python3/HTSeq/scripts/qa.py
- python3/src/HTSeq/_HTSeq.pxd
- python3/src/HTSeq/_HTSeq.pyx


Changes:

=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: HTSeq
-Version: 0.10.0
+Version: 0.11.2
 Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
 Home-page: https://github.com/simon-anders/htseq
 Author: Simon Anders


=====================================
README.rst
=====================================
@@ -29,13 +29,15 @@ and are transferring the binaries onto another machine with a compatible
 environment (architechture, shared libraries). If you are not sure,
 chances are you need them.
 
-Both ``Linux`` and ``OSX`` are supported and binaries are provided for virtually
-all ``Linux`` versions and for some ``OSX`` versions (the latter only for ``Python 2.7``
-and ``Python 3.6``). A source package which should not require ``Cython`` nor ``SWIG``
-is provided for all other cases. ``Windows`` is not officially supported as we don't
-have access to a Continuous Integration ``Windows`` machine that supports ``pysam``.
-However, if you have built ``HTSeq`` for ``Windows``, please open an issue and we'll
-try and include it in the release.
+Both **Linux** and **OSX** are supported and binaries are provided for virtually
+all Linux versions and for some OSX versions (the latter only for Python 2.7
+and 3.6). A source package which should not require ``Cython`` nor ``SWIG``
+is provided for all other cases.
+
+**Windows is not officially supported** as we don't have access to a Continuous
+Integration Windows machine that supports ``pysam``. However, if you have built
+``HTSeq`` for Windows, please open an issue and we'll try and include it in the
+release.
 
 Installation
 ~~~~~~~~~~~~
@@ -57,6 +59,20 @@ To install directly from PyPI:
 
    </div>
 
+To install a specific version (e.g. version 0.11.0):
+
+.. raw:: html
+
+   <div class="highlight highlight-source-shell">
+
+::
+
+    pip install 'HTSeq==0.11.0'
+
+.. raw:: html
+
+   </div>
+
 If this fails, please install all dependencies first:
 
 .. raw:: html


=====================================
VERSION
=====================================
@@ -1 +1 @@
-0.10.0
+0.11.2


=====================================
python2/HTSeq/__init__.py
=====================================
@@ -7,21 +7,14 @@ import itertools
 import warnings
 import os
 import shlex
+import sys
 
-try:
-    from _HTSeq import *
-except ImportError:
-    if os.path.isfile("setup.py"):
-        raise ImportError("Cannot import 'HTSeq' when working directory is HTSeq's own build directory.")
-    else:
-        raise
+from _HTSeq import *
 
 from _version import __version__
 
-#from vcf_reader import *
-
 #########################
-## Utils
+# Utils
 #########################
 
 
@@ -82,7 +75,7 @@ class FileOrSequence(object):
 
 
 #########################
-## Features
+# Features
 #########################
 
 class GenomicFeature(object):
@@ -137,9 +130,10 @@ class GenomicFeature(object):
             sep = "="
         else:
             sep = " "
-        attr_str = '; '.join(['%s%s\"%s\"' % (ak, sep, attr[ak]) for ak in attr])
+        attr_str = '; '.join(
+            ['%s%s\"%s\"' % (ak, sep, attr[ak]) for ak in attr])
         return "\t".join(str(a) for a in (self.iv.chrom, source,
-                         self.type, self.iv.start+1, self.iv.end, score,
+                         self.type, self.iv.start + 1, self.iv.end, score,
                          self.iv.strand, frame, attr_str)) + "\n"
 
 
@@ -151,24 +145,27 @@ def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
     """Parses a GFF attribute string and returns it as a dictionary.
 
     If 'extra_return_first_value' is set, a pair is returned: the dictionary
-    and the value of the first attribute. This might be useful if this is the ID.
+    and the value of the first attribute. This might be useful if this is the
+    ID.
     """
     if attrStr.endswith("\n"):
         attrStr = attrStr[:-1]
     d = {}
     first_val = "_unnamed_"
-    for (i, attr) in itertools.izip(itertools.count(), _HTSeq.quotesafe_split(attrStr)):
+    for (i, attr) in itertools.izip(
+            itertools.count(),
+            _HTSeq.quotesafe_split(attrStr)):
         if _re_attr_empty.match(attr):
             continue
         if attr.count('"') not in (0, 2):
-            raise ValueError("The attribute string seems to contain mismatched quotes.")
+            raise ValueError(
+                "The attribute string seems to contain mismatched quotes.")
         mo = _re_attr_main.match(attr)
         if not mo:
             raise ValueError("Failure parsing GFF attribute line")
         val = mo.group(2)
         if val.startswith('"') and val.endswith('"'):
             val = val[1:-1]
-        # val = urllib.unquote(val)
         d[intern(mo.group(1))] = intern(val)
         if extra_return_first_value and i == 0:
             first_val = val
@@ -210,9 +207,15 @@ class GFF_Reader(FileOrSequence):
              strand, frame, attributeStr) = line.split("\t", 8)
             (attr, name) = parse_GFF_attribute_string(attributeStr, True)
             if self.end_included:
-                iv = GenomicInterval(seqname, int(start)-1, int(end), strand)
+                iv = GenomicInterval(
+                        seqname,
+                        int(start) - 1, int(end),
+                        strand)
             else:
-                iv = GenomicInterval(seqname, int(start)-1, int(end)-1, strand)
+                iv = GenomicInterval(
+                        seqname,
+                        int(start) - 1, int(end) - 1,
+                        strand)
             f = GenomicFeature(name, feature, iv)
             if score != ".":
                 score = float(score)
@@ -224,6 +227,7 @@ class GFF_Reader(FileOrSequence):
             f.attr = attr
             yield f
 
+
 def make_feature_dict(feature_sequence):
     """A feature dict is a convenient way to organize a sequence of Feature
     object (which you have got, e.g., from parse_GFF).
@@ -260,16 +264,17 @@ def make_feature_dict(feature_sequence):
 
 
 #########################
-## GenomicArray
+# GenomicArray
 #########################
 
 def read_chrom_lens(filename, delimiter="\t"):
-    return dict(((chrom, int(len))
-        for chrom, len in csv.reader(open(filename), delimiter=delimiter)))
+    return dict(
+        ((chrom, int(len))
+         for chrom, len in csv.reader(open(filename), delimiter=delimiter)))
 
 
 #########################
-## Sequence readers
+# Sequence readers
 #########################
 
 _re_fasta_header_line = re.compile(r'>\s*(\S+)\s*(.*)')
@@ -286,87 +291,96 @@ class FastaReader(FileOrSequence):
         self.raw_iterator = raw_iterator
 
     def __iter__(self):
-       seq = None
-       for line in FileOrSequence.__iter__(self):
-          if line.startswith(">"):
-             if seq:
-                if self.raw_iterator:
-                   s = (seq, name, descr)
-                else:
-                   s = Sequence(seq, name)
-                   s.descr = descr
-                yield s
-             mo = _re_fasta_header_line.match(line)
-             name = mo.group(1)
-             descr = mo.group(2)
-             seq = ""
-          else:
-             assert seq is not None, "FASTA file does not start with '>'."
-             seq += line[:-1]
-       if seq is not None:
-          if self.raw_iterator:
-             s = (seq, name, descr)
-          else:
-             s = Sequence(seq, name)
-             s.descr = descr
-          yield s
+        seq = None
+        name = None
+        descr = None
+        for line in FileOrSequence.__iter__(self):
+            if line.startswith(">"):
+                if seq:
+                    if self.raw_iterator:
+                        s = (seq, name, descr)
+                    else:
+                        s = Sequence(seq, name)
+                        s.descr = descr
+                    yield s
+                mo = _re_fasta_header_line.match(line)
+                name = mo.group(1)
+                descr = mo.group(2)
+                seq = ""
+            else:
+                assert seq is not None, "FASTA file does not start with '>'."
+                seq += line[:-1]
+        if seq is not None:
+            if self.raw_iterator:
+                s = (seq, name, descr)
+            else:
+                s = Sequence(seq, name)
+                s.descr = descr
+            yield s
 
     def get_sequence_lengths(self):
-       seqname = None
-       seqlengths = {}
-       for line in FileOrSequence.__iter__(self):
-          if line.startswith(">"):
-             if seqname is not None:
-                seqlengths[ seqname ] = length
-             mo = _re_fasta_header_line.match(line)
-             seqname = mo.group(1)
-             length = 0
-          else:
-             assert seqname is not None, "FASTA file does not start with '>'."
-             length += len(line.rstrip())
-       if seqname is not None:
-          seqlengths[ seqname ] = length
-       return seqlengths
+        seqname = None
+        length = 0
+        seqlengths = {}
+        for line in FileOrSequence.__iter__(self):
+            if line.startswith(">"):
+                if seqname is not None:
+                    seqlengths[seqname] = length
+                mo = _re_fasta_header_line.match(line)
+                seqname = mo.group(1)
+                length = 0
+            else:
+                assert seqname is not None, "FASTA file does not start with '>'."
+                length += len(line.rstrip())
+        if seqname is not None:
+            seqlengths[seqname] = length
+        return seqlengths
 
     @staticmethod
     def _import_pysam():
-       global pysam
-       try:
-          import pysam
-       except ImportError:
-          sys.stderr.write("Please install the 'pysam' package to be able to use the Fasta indexing functionality.")
-          raise
-
-    def build_index(self, force = False):
-       self._import_pysam()
-       if not isinstance(self.fos, str):
-          raise TypeError, "This function only works with FastaReader objects " + \
-             "connected to a fasta file via file name"
-       index_filename = self.fos + ".fai"
-       if os.access(index_filename, os.R_OK):
-          if (not force) and os.stat(self.filename_or_sequence).st_mtime <= \
-                os.stat(index_filename).st_mtime:
-             # index is up to date
-             return
-       pysam.faidx(self.fos)
-       if not os.access(index_filename, os.R_OK):
-          raise SystemError, "Building of Fasta index failed due to unknown error."
+        global pysam
+        try:
+            import pysam
+        except ImportError:
+            sys.stderr.write(
+                "Please install the 'pysam' package to be able to use the Fasta indexing functionality.")
+            raise
+
+    def build_index(self, force=False):
+        self._import_pysam()
+        if not isinstance(self.fos, str):
+            raise TypeError(
+                "This function only works with FastaReader objects " +
+                "connected to a fasta file via file name")
+        index_filename = self.fos + ".fai"
+        if os.access(index_filename, os.R_OK):
+            if (not force) and os.stat(self.filename_or_sequence).st_mtime <= \
+                 os.stat(index_filename).st_mtime:
+                # index is up to date
+                return
+        pysam.faidx(self.fos)
+        if not os.access(index_filename, os.R_OK):
+            raise SystemError(
+                "Building of Fasta index failed due to unknown error.")
 
     def __getitem__(self, iv):
-       if not isinstance(iv, GenomicInterval):
-          raise TypeError, "GenomicInterval expected as key."
-       if not isinstance(self.fos, str):
-          raise TypeError, "This function only works with FastaReader objects " + \
-             "connected to a fasta file via file name"
-       self._import_pysam()
-       fasta = pysam.faidx(self.fos, "%s:%d-%d" % (iv.chrom, iv.start, iv.end-1))
-       ans = list(FastaReader(fasta))
-       assert len(ans) == 1
-       ans[0].name = str(iv)
-       if iv.strand != "-":
-          return ans[0]
-       else:
-          return ans[0].get_reverse_complement()
+        if not isinstance(iv, GenomicInterval):
+            raise TypeError("GenomicInterval expected as key.")
+        if not isinstance(self.fos, str):
+            raise TypeError(
+                "This function only works with FastaReader objects " +
+                "connected to a fasta file via file name")
+        self._import_pysam()
+        fasta = pysam.faidx(
+                self.fos,
+                "%s:%d-%d" % (iv.chrom, iv.start, iv.end - 1))
+        ans = list(FastaReader(fasta))
+        assert len(ans) == 1
+        ans[0].name = str(iv)
+        if iv.strand != "-":
+            return ans[0]
+        else:
+            return ans[0].get_reverse_complement()
 
 
 class FastqReader(FileOrSequence):
@@ -401,20 +415,26 @@ class FastqReader(FileOrSequence):
             if not qual.endswith("\n"):
                 qual += "\n"
             if not id1.startswith("@"):
-                raise ValueError("Primary ID line in FASTQ file does"
-                   "not start with '@'. Either this is not FASTQ data or the parser got out of sync.")
+                raise ValueError(
+                    "Primary ID line in FASTQ file does "
+                    "not start with '@'. Either this is not FASTQ data or the "
+                    "parser got out of sync.")
             if not id2.startswith("+"):
-                raise ValueError("Secondary ID line in FASTQ file does"
-                   "not start with '+'. Maybe got out of sync.")
+                raise ValueError(
+                    "Secondary ID line in FASTQ file does"
+                    "not start with '+'. Maybe got out of sync.")
             if len(id2) > 2 and id1[1:] != id2[1:]:
-                raise ValueError("Primary and secondary ID line in FASTQ"
-                   "disagree.")
+                raise ValueError(
+                    "Primary and secondary ID line in FASTQ"
+                    "disagree.")
 
             if self.raw_iterator:
                 s = (seq[:-1], id1[1:-1], qual[:-1], self.qual_scale)
             else:
-                s = SequenceWithQualities(seq[:-1], id1[1:-1], qual[:-1],
-                                          self.qual_scale)
+                s = SequenceWithQualities(
+                        seq[:-1], id1[1:-1],
+                        qual[:-1],
+                        self.qual_scale)
             yield s
 
 
@@ -430,119 +450,145 @@ class BowtieReader(FileOrSequence):
             except ValueError:
                 if line.startswith("Reported "):
                     continue
-                warnings.warn("BowtieReader: Ignoring the following line, which could not be parsed:\n%s\n" % line,
-                  RuntimeWarning)
+                warnings.warn(
+                    "BowtieReader: Ignoring the following line, which could "
+                    "not be parsed:\n%s\n" % line,
+                    RuntimeWarning)
             yield algnt
 
 
 def bundle_multiple_alignments(sequence_of_alignments):
-   """Some alignment programs, e.g., Bowtie, can output multiple alignments,
-   i.e., the same read is reported consecutively with different alignments.
-   This function takes an iterator over alignments and bundles consecutive
-   alignments regarding the same read to a list of Alignment objects and
-   returns an iterator over these.
-   """
-   alignment_iter = iter(sequence_of_alignments)
-   algnt = alignment_iter.next()
-   ma = [ algnt ]
-   for algnt in alignment_iter:
-      if algnt.read.name != ma[0].read.name:
-         yield ma
-         ma = [ algnt ]
-      else:
-         ma.append(algnt)
-   yield ma
+    """Some alignment programs, e.g., Bowtie, can output multiple alignments,
+    i.e., the same read is reported consecutively with different alignments.
+    This function takes an iterator over alignments and bundles consecutive
+    alignments regarding the same read to a list of Alignment objects and
+    returns an iterator over these.
+    """
+    alignment_iter = iter(sequence_of_alignments)
+    algnt = alignment_iter.next()
+    ma = [algnt]
+    for algnt in alignment_iter:
+        if algnt.read.name != ma[0].read.name:
+            yield ma
+            ma = [algnt]
+        else:
+            ma.append(algnt)
+    yield ma
 
 
 class SolexaExportAlignment(Alignment):
-   """Iterating over SolexaExportReader objects will yield SoelxaExportRecord
-   objects. These have four fields:
-      read          - a SequenceWithQualities object
-      aligned       - a boolean, indicating whether the object was aligned
-      iv            - a GenomicInterval giving the alignment (or None, if not aligned)
-      passed_filter - a boolean, indicating whether the object passed the filter
-      nomatch_code  - a code indicating why no match was found (or None, if the
-        read was aligned)
-
-   As long as 'aligned' is True, a SolexaExportRecord can be treated as an
-   Alignment object.
-   """
-
-   def __init__(self):
-      # Data is filled in by SolexaExportRecord
-      pass
-
-   def __repr__(self):
-      if self.aligned:
-         return "< %s object: Read '%s', aligned to %s >" % (
-            self.__class__.__name__, self.read.name, self.iv)
-      else:
-         return "< %s object: Non-aligned read '%s' >" % (
-            self.__class__.__name__, self.read.name)
+    """Iterating over SolexaExportReader objects will yield SoelxaExportRecord
+    objects. These have four fields:
+       read          - a SequenceWithQualities object
+       aligned       - a boolean, indicating whether the object was aligned
+       iv            - a GenomicInterval giving the alignment (or None, if not aligned)
+       passed_filter - a boolean, indicating whether the object passed the filter
+       nomatch_code  - a code indicating why no match was found (or None, if the
+         read was aligned)
+
+    As long as 'aligned' is True, a SolexaExportRecord can be treated as an
+    Alignment object.
+    """
+
+    def __init__(self):
+        # Data is filled in by SolexaExportRecord
+        pass
+
+    def __repr__(self):
+        if self.aligned:
+            return "< %s object: Read '%s', aligned to %s >" % (
+              self.__class__.__name__, self.read.name, self.iv)
+        else:
+            return "< %s object: Non-aligned read '%s' >" % (
+              self.__class__.__name__, self.read.name)
+
 
 class SolexaExportReader(FileOrSequence):
-   """Parser for *_export.txt files from the SolexaPipeline software.
-
-   Iterating over a SolexaExportReader yields SolexaExportRecord objects.
-   """
-
-   def __init__(self, filename_or_sequence, solexa_old = False):
-      FileOrSequence.__init__(self, filename_or_sequence)
-      if solexa_old:
-         self.qualscale = "solexa-old"
-      else:
-         self.qualscale = "solexa"
-
-   @classmethod
-   def parse_line_bare(dummy, line):
-      if line[-1] == "\n":
-         line = line[:-1]
-      res = {}
-      (res['machine'], res['run_number'], res['lane'], res['tile'], res['x_coord'],
-         res['y_coord'], res['index_string'], res['read_nbr'], res['read_seq'],
-         res['qual_str'], res['chrom'], res['contig'], res['pos'], res['strand'],
-         res['match_descr'], res['single_read_algnt_score'],
-         res['paired_read_algnt_score'], res['partner_chrom'], res['partner_contig'],
-         res['partner_offset'], res['partner_strand'], res['passed_filtering']) \
-         = line.split("\t")
-      return res
-
-   def __iter__(self):
-      for line in FileOrSequence.__iter__(self):
-         record = SolexaExportAlignment()
-         fields = SolexaExportReader.parse_line_bare(line)
-         if fields['read_nbr'] != "1":
-            warnings.warn("Paired-end read encountered. PE is so far supported only for " +
-               "SAM files, not yet for SolexaExport. All PE-related fields are ignored. ")
-         record.read = SequenceWithQualities(
-            fields['read_seq'],
-            "%s:%s:%s:%s:%s#0" % (fields['machine'], fields['lane'], fields['tile'],
-               fields['x_coord'], fields['y_coord']),
-            fields['qual_str'], self.qualscale)
-         if fields['passed_filtering'] == 'Y':
-            record.passed_filter = True
-         elif fields['passed_filtering'] == 'N':
-            record.passed_filter = False
-         else:
-            raise ValueError, "Illegal 'passed filter' value in Solexa export data: '%s'." % fields['passed_filtering']
-         record.index_string = fields['index_string']
-         if fields['pos'] == '':
-            record.iv = None
-            record.nomatch_code = fields['chrom']
-         else:
-            if fields['strand'] == 'F':
-               strand = '+'
-            elif fields['strand'] == 'R':
-               strand = '-'
+    """Parser for *_export.txt files from the SolexaPipeline software.
+
+    Iterating over a SolexaExportReader yields SolexaExportRecord objects.
+    """
+
+    def __init__(self, filename_or_sequence, solexa_old=False):
+        FileOrSequence.__init__(self, filename_or_sequence)
+        if solexa_old:
+            self.qualscale = "solexa-old"
+        else:
+            self.qualscale = "solexa"
+
+    @classmethod
+    def parse_line_bare(dummy, line):
+        if line[-1] == "\n":
+            line = line[:-1]
+        res = {}
+        (res['machine'],
+         res['run_number'],
+         res['lane'],
+         res['tile'],
+         res['x_coord'],
+         res['y_coord'],
+         res['index_string'],
+         res['read_nbr'],
+         res['read_seq'],
+         res['qual_str'],
+         res['chrom'],
+         res['contig'],
+         res['pos'],
+         res['strand'],
+         res['match_descr'],
+         res['single_read_algnt_score'],
+         res['paired_read_algnt_score'],
+         res['partner_chrom'],
+         res['partner_contig'],
+         res['partner_offset'],
+         res['partner_strand'],
+         res['passed_filtering']) = line.split("\t")
+        return res
+
+    def __iter__(self):
+        for line in FileOrSequence.__iter__(self):
+            record = SolexaExportAlignment()
+            fields = SolexaExportReader.parse_line_bare(line)
+            if fields['read_nbr'] != "1":
+                warnings.warn(
+                    "Paired-end read encountered. PE is so far supported only "
+                    "for SAM files, not yet for SolexaExport. All PE-related "
+                    "fields are ignored.")
+            record.read = SequenceWithQualities(
+                fields['read_seq'],
+                "%s:%s:%s:%s:%s#0" % (fields['machine'],
+                                      fields['lane'],
+                                      fields['tile'],
+                                      fields['x_coord'],
+                                      fields['y_coord']),
+                fields['qual_str'], self.qualscale)
+            if fields['passed_filtering'] == 'Y':
+                record.passed_filter = True
+            elif fields['passed_filtering'] == 'N':
+                record.passed_filter = False
+            else:
+                raise ValueError(
+                    "Illegal 'passed filter' value in Solexa export data: '%s'." % fields['passed_filtering'])
+            record.index_string = fields['index_string']
+            if fields['pos'] == '':
+                record.iv = None
+                record.nomatch_code = fields['chrom']
             else:
-               raise ValueError, "Illegal strand value in Solexa export data."
-            start = int(fields['pos'])
-            chrom = fields['chrom']
-            if fields['chrom'] == "":
-               chrom = fields['contig']
-            record.iv = GenomicInterval(chrom, start,
-               start + len(fields['read_seq']), strand)
-         yield record
+                if fields['strand'] == 'F':
+                    strand = '+'
+                elif fields['strand'] == 'R':
+                    strand = '-'
+                else:
+                    raise ValueError(
+                        "Illegal strand value in Solexa export data.")
+                start = int(fields['pos'])
+                chrom = fields['chrom']
+                if fields['chrom'] == "":
+                    chrom = fields['contig']
+                record.iv = GenomicInterval(
+                    chrom, start,
+                    start + len(fields['read_seq']), strand)
+            yield record
 
 
 class SAM_Reader(FileOrSequence):
@@ -582,15 +628,53 @@ class GenomicArrayOfSets(GenomicArray):
 
 
 ###########################
-##   paired-end handling
+# paired-end handling
 ###########################
 
-
-def pair_SAM_alignments(alignments, bundle=False):
+def pair_SAM_alignments(
+        alignments,
+        bundle=False,
+        primary_only=False):
+    '''Iterate over SAM aligments, name-sorted paired-end
+
+    Args:
+        alignments (iterator of SAM/BAM alignments): the alignments to wrap
+        bundle (bool): if True, bundle all alignments from one read pair into a
+            single yield. If False (default), each pair of alignments is
+            yielded separately.
+        primary_only (bool): for each read, consider only the primary line
+            (SAM flag 0x900 = 0). The SAM specification requires one and only
+            one of those for each read.
+
+    Yields:
+        2-tuples with each pair of alignments or, if bundle==True, each bundled
+        list of alignments.
+    '''
 
     mate_missing_count = [0]
 
     def process_list(almnt_list):
+        '''Transform a list of alignment with the same read name into pairs
+
+        Args:
+            almnt_list (list): alignments to process
+
+        Yields:
+            each pair of alignments.
+
+        This function is needed because each line of a BAM file is not a read
+        but an alignment. For uniquely mapped and unmapped reads, those two are
+        the same. For multimapped reads, however, there can be more than one
+        alignment for each read. Also, it is normal for a mapper to uniquely
+        map one read and multimap its mate.
+
+        This function goes down the list of alignments for a given read name
+        and tries to find the first mate. So if read 1 is uniquely mapped but
+        read 2 is mapped 4 times, only (read 1, read 2 - first occurrence) will
+        yield; the other 3 alignments of read 2 are ignored.
+        '''
+
+
         while len(almnt_list) > 0:
             a1 = almnt_list.pop(0)
             # Find its mate
@@ -608,7 +692,8 @@ def pair_SAM_alignments(alignments, bundle=False):
                 if a1.mate_aligned:
                     mate_missing_count[0] += 1
                     if mate_missing_count[0] == 1:
-                        warnings.warn("Read " + a1.read.name + " claims to have an aligned mate " +
+                        warnings.warn(
+                            "Read " + a1.read.name + " claims to have an aligned mate " +
                             "which could not be found in an adjacent line.")
                 a2 = None
             if a2 is not None:
@@ -623,9 +708,14 @@ def pair_SAM_alignments(alignments, bundle=False):
     current_name = None
     for almnt in alignments:
         if not almnt.paired_end:
-            raise ValueError, "'pair_alignments' needs a sequence of paired-end alignments"
+            raise ValueError(
+                "'pair_alignments' needs a sequence of paired-end alignments")
         if almnt.pe_which == "unknown":
-            raise ValueError, "Paired-end read found with 'unknown' 'pe_which' status."
+            raise ValueError(
+                "Paired-end read found with 'unknown' 'pe_which' status.")
+        # FIXME: almnt.not_primary_alignment currently means secondary
+        if primary_only and (almnt.not_primary_alignment or almnt.supplementary):
+            continue
         if almnt.read.name == current_name:
             almnt_list.append(almnt)
         else:
@@ -642,118 +732,144 @@ def pair_SAM_alignments(alignments, bundle=False):
         for p in process_list(almnt_list):
             yield p
     if mate_missing_count[0] > 1:
-        warnings.warn("%d reads with missing mate encountered." % mate_missing_count[0])
-
-
-def pair_SAM_alignments_with_buffer(alignments, max_buffer_size=30000000):
-
-   almnt_buffer = {}
-   ambiguous_pairing_counter = 0
-   for almnt in alignments:
-
-      if not almnt.paired_end:
-         raise ValueError, "Sequence of paired-end alignments expected, but got single-end alignment."
-      if almnt.pe_which == "unknown":
-         raise ValueError, "Cannot process paired-end alignment found with 'unknown' 'pe_which' status."
-
-      matekey = (
-         almnt.read.name,
-         "second" if almnt.pe_which == "first" else "first",
-         almnt.mate_start.chrom if almnt.mate_aligned else None,
-         almnt.mate_start.pos if almnt.mate_aligned else None,
-         almnt.iv.chrom if almnt.aligned else None,
-         almnt.iv.start if almnt.aligned else None,
-         -almnt.inferred_insert_size if almnt.aligned and almnt.mate_aligned else None)
-
-      if matekey in almnt_buffer:
-         if len(almnt_buffer[ matekey ]) == 1:
-            mate = almnt_buffer[ matekey ][ 0 ]
-            del almnt_buffer[ matekey ]
-         else:
-            mate = almnt_buffer[ matekey ].pop(0)
-            if ambiguous_pairing_counter == 0:
-               ambiguous_pairing_first_occurance = matekey
-            ambiguous_pairing_counter += 1
-         if almnt.pe_which == "first":
-            yield (almnt, mate)
-         else:
-            yield (mate, almnt)
-      else:
-         almntkey = (
-            almnt.read.name, almnt.pe_which,
-            almnt.iv.chrom if almnt.aligned else None,
-            almnt.iv.start if almnt.aligned else None,
+        warnings.warn("%d reads with missing mate encountered." %
+                      mate_missing_count[0])
+
+
+def pair_SAM_alignments_with_buffer(
+        alignments,
+        max_buffer_size=30000000,
+        primary_only=False):
+    '''Iterate over SAM aligments with buffer, position-sorted paired-end
+
+    Args:
+        alignments (iterator of SAM/BAM alignments): the alignments to wrap
+        max_buffer_size (int): maxmal numer of alignments to keep in memory.
+        primary_only (bool): for each read, consider only the primary line
+            (SAM flag 0x900 = 0). The SAM specification requires one and only
+            one of those for each read.
+
+    Yields:
+        2-tuples with each pair of alignments.
+    '''
+
+    almnt_buffer = {}
+    ambiguous_pairing_counter = 0
+    for almnt in alignments:
+        if not almnt.paired_end:
+            raise ValueError(
+                "Sequence of paired-end alignments expected, but got single-end alignment.")
+        if almnt.pe_which == "unknown":
+            raise ValueError(
+                "Cannot process paired-end alignment found with 'unknown' 'pe_which' status.")
+        # FIXME: almnt.not_primary_alignment currently means secondary
+        if primary_only and (almnt.not_primary_alignment or almnt.supplementary):
+            continue
+
+        matekey = (
+            almnt.read.name,
+            "second" if almnt.pe_which == "first" else "first",
             almnt.mate_start.chrom if almnt.mate_aligned else None,
             almnt.mate_start.pos if almnt.mate_aligned else None,
-            almnt.inferred_insert_size if almnt.aligned and almnt.mate_aligned else None)
-         if almntkey not in almnt_buffer:
-            almnt_buffer[ almntkey ] = [ almnt ]
-         else:
-            almnt_buffer[ almntkey ].append(almnt)
-         if len(almnt_buffer) > max_buffer_size:
-            raise ValueError, "Maximum alignment buffer size exceeded while pairing SAM alignments."
-
-   if len(almnt_buffer) > 0:
-      warnings.warn("Mate records missing for %d records; first such record: %s." %
-         (len(almnt_buffer), str(almnt_buffer.values()[0][0])))
-      for almnt_list in almnt_buffer.values():
-         for almnt in almnt_list:
+            almnt.iv.chrom if almnt.aligned else None,
+            almnt.iv.start if almnt.aligned else None,
+            -almnt.inferred_insert_size if almnt.aligned and almnt.mate_aligned else None)
+
+        if matekey in almnt_buffer:
+            if len(almnt_buffer[matekey]) == 1:
+                mate = almnt_buffer[matekey][0]
+                del almnt_buffer[matekey]
+            else:
+                mate = almnt_buffer[matekey].pop(0)
+                if ambiguous_pairing_counter == 0:
+                    ambiguous_pairing_first_occurance = matekey
+                ambiguous_pairing_counter += 1
             if almnt.pe_which == "first":
-               yield (almnt, None)
+                yield (almnt, mate)
+            else:
+                yield (mate, almnt)
+        else:
+            almntkey = (
+                almnt.read.name, almnt.pe_which,
+                almnt.iv.chrom if almnt.aligned else None,
+                almnt.iv.start if almnt.aligned else None,
+                almnt.mate_start.chrom if almnt.mate_aligned else None,
+                almnt.mate_start.pos if almnt.mate_aligned else None,
+                almnt.inferred_insert_size if almnt.aligned and almnt.mate_aligned else None)
+            if almntkey not in almnt_buffer:
+                almnt_buffer[almntkey] = [almnt]
             else:
-               yield (None, almnt)
+                almnt_buffer[almntkey].append(almnt)
+            if len(almnt_buffer) > max_buffer_size:
+                raise ValueError(
+                    "Maximum alignment buffer size exceeded while pairing SAM alignments.")
+
+    if len(almnt_buffer) > 0:
+        warnings.warn(
+            "Mate records missing for %d records; first such record: %s." %
+            (len(almnt_buffer), str(almnt_buffer.values()[0][0])))
+        for almnt_list in almnt_buffer.values():
+            for almnt in almnt_list:
+                if almnt.pe_which == "first":
+                    yield (almnt, None)
+                else:
+                    yield (None, almnt)
 
-   if ambiguous_pairing_counter > 0:
-      warnings.warn("Mate pairing was ambiguous for %d records; mate key for first such record: %s." %
-         (ambiguous_pairing_counter, str(ambiguous_pairing_first_occurance)))
+    if ambiguous_pairing_counter > 0:
+        warnings.warn(
+            "Mate pairing was ambiguous for %d records; mate key for first such record: %s." %
+            (ambiguous_pairing_counter, str(ambiguous_pairing_first_occurance)))
 
 
 ###########################
-##   variant calls
+# variant calls
 ###########################
 
 
 _re_vcf_meta_comment = re.compile("^##([a-zA-Z]+)\=(.*)$")
 
-_re_vcf_meta_descr = re.compile('ID=[^,]+,?|Number=[^,]+,?|Type=[^,]+,?|Description="[^"]+",?')
+_re_vcf_meta_descr = re.compile(
+    'ID=[^,]+,?|Number=[^,]+,?|Type=[^,]+,?|Description="[^"]+",?')
 
 _re_vcf_meta_types = re.compile("[INFO|FILTER|FORMAT]")
 
 _vcf_typemap = {
-    "Integer":int,
-    "Float":float,
-    "String":str,
-    "Flag":bool
+    "Integer": int,
+    "Float": float,
+    "String": str,
+    "Flag": bool
 }
 
+
 class VariantCall(object):
 
-    def __init__(self, chrom = None, pos = None, identifier = None, ref = None, alt = None, qual = None, filtr = None, info = None):
-        self.chrom  = chrom
-        self.pos    = pos
-        self.id     = identifier
-        self.ref    = ref
-        self.alt    = alt
-        self.qual   = qual
+    def __init__(self, chrom=None, pos=None, identifier=None, ref=None,
+                 alt=None, qual=None, filtr=None, info=None):
+        self.chrom = chrom
+        self.pos = pos
+        self.id = identifier
+        self.ref = ref
+        self.alt = alt
+        self.qual = qual
         self.filter = filtr
-        self.info   = info
+        self.info = info
         self._original_line = None
 
     @classmethod
     def fromdict(cls, dictionary):
         ret = cls()
-        ret.chrom   = dictionary["chrom"]
-        ret.pos     = dictionary["pos"]
-        ret.id      = dictionary["id"]
-        ret.ref     = dictionary["ref"]
-        ret.alt     = dictionary["alt"]
-        ret.qual    = dictionary["qual"]
-        ret.filter  = dictionary["filter"]
-        ret.info    = dictionary["info"]
+        ret.chrom = dictionary["chrom"]
+        ret.pos = dictionary["pos"]
+        ret.id = dictionary["id"]
+        ret.ref = dictionary["ref"]
+        ret.alt = dictionary["alt"]
+        ret.qual = dictionary["qual"]
+        ret.filter = dictionary["filter"]
+        ret.info = dictionary["info"]
         ret._original_line = None
 
     @classmethod
-    def fromline(cls, line, nsamples = 0, sampleids = []):
+    def fromline(cls, line, nsamples=0, sampleids=[]):
         ret = cls()
         if nsamples == 0:
             ret.format = None
@@ -763,9 +879,10 @@ class VariantCall(object):
             ret.chrom, ret.pos, ret.id, ret.ref, ret.alt, ret.qual, ret.filter, ret.info = lsplit[:8]
             ret.format = lsplit[8].split(":")
             ret.samples = {}
-            spos=9
+            spos = 9
             for sid in sampleids:
-                ret.samples[ sid ] = dict((name, value) for (name, value) in itertools.izip(ret.format, lsplit[spos].split(":")))
+                ret.samples[sid] = dict((name, value) for (
+                    name, value) in itertools.izip(ret.format, lsplit[spos].split(":")))
                 spos += 1
         ret.pos = GenomicPosition(ret.chrom, int(ret.pos))
         ret.alt = ret.alt.split(",")
@@ -779,28 +896,29 @@ class VariantCall(object):
             return self.info
 
     def get_original_line(self):
-       warnings.warn("Original line is empty, probably this object was created from scratch and not from a line in a .vcf file!")
-       return self._original_line
+        warnings.warn(
+            "Original line is empty, probably this object was created from scratch and not from a line in a .vcf file!")
+        return self._original_line
 
     def sampleline(self):
-       if self.format == None:
-          print >> sys.stderr, "No samples in this variant call!"
-          return ""
-       keys = self.format
-       ret = [ ":".join(keys) ]
-       for sid in self.samples:
-          tmp = []
-          for k in keys:
-             if k in self.samples[sid]:
-                tmp.append(self.samples[sid][k])
-          ret.append(":".join(tmp))
-       return "\t".join(ret)
+        if self.format == None:
+            sys.stderr.write("No samples in this variant call!\n")
+            return ""
+        keys = self.format
+        ret = [":".join(keys)]
+        for sid in self.samples:
+            tmp = []
+            for k in keys:
+                if k in self.samples[sid]:
+                    tmp.append(self.samples[sid][k])
+            ret.append(":".join(tmp))
+        return "\t".join(ret)
 
     def to_line(self):
-       if self.format == None:
-          return "\t".join(map(str, [ self.pos.chrom, self.pos.pos, self.id, self.ref, ",".join(self.alt), self.qual, self.filter, self.infoline() ])) + "\n"
-       else:
-          return "\t".join(map(str, [ self.pos.chrom, self.pos.pos, self.id, self.ref, ",".join(self.alt), self.qual, self.filter, self.infoline(), self.sampleline() ])) + "\n"
+        if self.format == None:
+            return "\t".join(map(str, [self.pos.chrom, self.pos.pos, self.id, self.ref, ",".join(self.alt), self.qual, self.filter, self.infoline()])) + "\n"
+        else:
+            return "\t".join(map(str, [self.pos.chrom, self.pos.pos, self.id, self.ref, ",".join(self.alt), self.qual, self.filter, self.infoline(), self.sampleline()])) + "\n"
 
     def __descr__(self):
         return "<VariantCall at %s, ref '%s', alt %s >" % (str(self.pos).rstrip("/."), self.ref, str(self.alt).strip("[]"))
@@ -817,9 +935,9 @@ class VariantCall(object):
                     tmp[token[0]] = map(infodict[token[0]], token[1].split(","))
                 else:
                     tmp[token[0]] = token[1].split(",")
-                if len(tmp[ token[0] ]) == 1:
+                if len(tmp[token[0]]) == 1:
                     tmp[token[0]] = tmp[token[0]][0]
-            else: #Flag attribute found
+            else:  # Flag attribute found
                 tmp[token] = True
         diff = set(infodict.keys()).difference(set(tmp.keys()))
         for key in diff:
@@ -827,6 +945,7 @@ class VariantCall(object):
                 tmp[key] = False
         self.info = tmp
 
+
 class VCF_Reader(FileOrSequence):
 
     def __init__(self, filename_or_sequence):
@@ -839,10 +958,12 @@ class VCF_Reader(FileOrSequence):
         self.sampleids = []
 
     def make_info_dict(self):
-        self.infodict = dict((key, _vcf_typemap[self.info[key]["Type"]]) for key in self.info.keys())
+        self.infodict = dict(
+            (key,
+             _vcf_typemap[self.info[key]["Type"]]) for key in self.info.keys())
 
-    def parse_meta(self, header_filename = None):
-        if header_filename == None:
+    def parse_meta(self, header_filename=None):
+        if header_filename is None:
             the_iter = FileOrSequence.__iter__(self)
         else:
             the_iter = open(header_filename, "r")
@@ -854,22 +975,25 @@ class VCF_Reader(FileOrSequence):
                     if mo:
                         value = mo.group(2)
                         if mo.group(1) == "INFO":
-                            value = dict(e.rstrip(",").split("=",1) for e in _re_vcf_meta_descr.findall(value))
+                            value = dict(e.rstrip(",").split("=", 1)
+                                         for e in _re_vcf_meta_descr.findall(value))
                             key = value["ID"]
                             del value["ID"]
-                            self.info[ key ] = value
+                            self.info[key] = value
                         elif mo.group(1) == "FILTER":
-                            value = dict(e.rstrip(",").split("=",1) for e in _re_vcf_meta_descr.findall(value))
+                            value = dict(e.rstrip(",").split("=", 1)
+                                         for e in _re_vcf_meta_descr.findall(value))
                             key = value["ID"]
                             del value["ID"]
-                            self.filters[ key ] = value
+                            self.filters[key] = value
                         elif mo.group(1) == "FORMAT":
-                            value = dict(e.rstrip(",").split("=",1) for e in _re_vcf_meta_descr.findall(value))
+                            value = dict(e.rstrip(",").split("=", 1)
+                                         for e in _re_vcf_meta_descr.findall(value))
                             key = value["ID"]
                             del value["ID"]
-                            self.formats[ key ] = value
+                            self.formats[key] = value
                         else:
-                            self.metadata[ mo.group(1) ] = mo.group(2)
+                            self.metadata[mo.group(1)] = mo.group(2)
                 else:
                     self.sampleids = line.rstrip("\t\n").split("\t")[9:]
                     self.nsamples = len(self.sampleids)
@@ -961,14 +1085,14 @@ class BAM_Reader(object):
         try:
             import pysam
         except ImportError:
-            sys.stderr.write("Please Install PySam to use the BAM_Reader Class (http://code.google.com/p/pysam/)")
+            sys.stderr.write(
+                "Please Install PySam to use the BAM_Reader Class (http://code.google.com/p/pysam/)")
             raise
 
     def __iter__(self):
         sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
         self.record_no = 0
         for pa in sf:
-            #yield SAM_Alignment.from_pysam_AlignedRead(pa, sf)
             yield SAM_Alignment.from_pysam_AlignedSegment(pa, sf)
             self.record_no += 1
 
@@ -996,13 +1120,15 @@ class BAM_Reader(object):
 
     def __getitem__(self, iv):
         if not isinstance(iv, GenomicInterval):
-            raise TypeError("Use a HTSeq.GenomicInterval to access regions within .bam-file!")
+            raise TypeError(
+                "Use a HTSeq.GenomicInterval to access regions within .bam-file!")
         if self.sf is None:
             self.sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
             # NOTE: pysam 0.9 has renames _hasIndex into has_index
             if (hasattr(self.sf, '_hasIndex') and (not self.sf._hasIndex())) or (not self.sf.has_index()):
-                raise ValueError("The .bam-file has no index, random-access is disabled!")
-        for pa in self.sf.fetch(iv.chrom, iv.start+1, iv.end):
+                raise ValueError(
+                    "The .bam-file has no index, random-access is disabled!")
+        for pa in self.sf.fetch(iv.chrom, iv.start + 1, iv.end):
             yield SAM_Alignment.from_pysam_AlignedRead(pa, self.sf)
 
     def get_header_dict(self):
@@ -1011,11 +1137,13 @@ class BAM_Reader(object):
 
 
 class BAM_Writer(object):
-    def __init__(self, filename, template=None, referencenames=None, referencelengths=None, text=None, header=None):
+    def __init__(self, filename, template=None, referencenames=None,
+                 referencelengths=None, text=None, header=None):
         try:
             import pysam
         except ImportError:
-            sys.stderr.write("Please Install PySam to use the BAM_Writer Class (http://code.google.com/p/pysam/)")
+            sys.stderr.write(
+                "Please Install PySam to use the BAM_Writer Class (http://code.google.com/p/pysam/)")
             raise
 
         self.filename = filename
@@ -1038,7 +1166,6 @@ class BAM_Writer(object):
         return BAM_Writer(filename=fn, header=br.get_header_dict())
 
     def write(self, alnmt):
-        #self.sf.write(alnmt.to_pysam_AlignedRead(self.sf))
         self.sf.write(alnmt.to_pysam_AlignedSegment(self.sf))
 
     def close(self):
@@ -1059,9 +1186,21 @@ class BED_Reader(FileOrSequence):
                 raise ValueError("BED file line contains less than 3 fields")
             if len(fields) > 9:
                 raise ValueError("BED file line contains more than 9 fields")
-            iv = GenomicInterval(fields[0], int(fields[1]), int(fields[2]), fields[5] if len(fields) > 5 else ".")
-            f = GenomicFeature(fields[3] if len(fields) > 3 else "unnamed", "BED line", iv)
+            iv = GenomicInterval(
+                fields[0],
+                int(fields[1]),
+                int(fields[2]),
+                fields[5] if len(fields) > 5 else ".")
+            f = GenomicFeature(
+                fields[3] if len(fields) > 3 else "unnamed",
+                "BED line",
+                iv)
             f.score = float(fields[4]) if len(fields) > 4 else None
-            f.thick = GenomicInterval(iv.chrom, int(fields[6]), int(fields[7]), iv.strand) if len(fields) > 7 else None
-            f.itemRgb = [int(a) for a in fields[8].split(",")] if len(fields) > 8 else None
+            f.thick = GenomicInterval(
+                iv.chrom,
+                int(fields[6]),
+                int(fields[7]),
+                iv.strand) if len(fields) > 7 else None
+            f.itemRgb = [int(a) for a in fields[8].split(",")
+                         ] if len(fields) > 8 else None
             yield(f)


=====================================
python2/HTSeq/_version.py
=====================================
@@ -1 +1 @@
-__version__ = "0.10.0"
\ No newline at end of file
+__version__ = "0.11.2"
\ No newline at end of file


=====================================
python2/HTSeq/scripts/count.py
=====================================
@@ -41,12 +41,13 @@ def count_reads_in_features(sam_filenames, gff_filename,
             r = (r,)
         for read in r:
             if read is not None:
-                samoutfile.write(read.original_sam_line.rstrip() +
-                                 "\tXF:Z:" + assignment + "\n")
+                read.optional_fields.append(('XF', assignment))
+                samoutfile.write(read.get_sam_line() + "\n")
 
-    if samouts != "":
+    if samouts != []:
         if len(samouts) != len(sam_filenames):
-            raise ValueError('Select the same number of SAM input and output files')
+            raise ValueError(
+                    'Select the same number of SAM input and output files')
         # Try to open samout files early in case any of them has issues
         for samout in samouts:
             with open(samout, 'w'):
@@ -73,12 +74,14 @@ def count_reads_in_features(sam_filenames, gff_filename,
                 try:
                     feature_id = f.attr[id_attribute]
                 except KeyError:
-                    raise ValueError("Feature %s does not contain a '%s' attribute" %
-                                     (f.name, id_attribute))
+                    raise ValueError(
+                            "Feature %s does not contain a '%s' attribute" %
+                            (f.name, id_attribute))
                 if stranded != "no" and f.iv.strand == ".":
-                    raise ValueError("Feature %s at %s does not have strand information but you are "
-                                     "running htseq-count in stranded mode. Use '--stranded=no'." %
-                                     (f.name, f.iv))
+                    raise ValueError(
+                            "Feature %s at %s does not have strand information but you are "
+                            "running htseq-count in stranded mode. Use '--stranded=no'." %
+                            (f.name, f.iv))
                 features[f.iv] += feature_id
                 counts[f.attr[id_attribute]] = 0
                 attributes[f.attr[id_attribute]] = [
@@ -116,22 +119,28 @@ def count_reads_in_features(sam_filenames, gff_filename,
     lowqual_all = []
     nonunique_all = []
     for isam, (sam_filename) in enumerate(sam_filenames):
-        if samouts != '':
+        if samouts != []:
             samoutfile = open(samouts[isam], 'w')
         else:
             samoutfile = None
 
         try:
-            if sam_filename != "-":
-                read_seq_file = SAM_or_BAM_Reader(sam_filename)
-                read_seq = read_seq_file
-                first_read = next(iter(read_seq))
-            else:
+            if sam_filename == "-":
                 read_seq_file = SAM_or_BAM_Reader(sys.stdin)
-                read_seq_iter = iter(read_seq_file)
+            else:
+                read_seq_file = SAM_or_BAM_Reader(sam_filename)
+            read_seq_iter = iter(read_seq_file)
+            # Catch empty BAM files
+            try:
                 first_read = next(read_seq_iter)
+                pe_mode = first_read.paired_end
+            except:
+                first_read = None
+                pe_mode = False
+            if first_read is not None:
                 read_seq = itertools.chain([first_read], read_seq_iter)
-            pe_mode = first_read.paired_end
+            else:
+                read_seq = []
         except:
             sys.stderr.write(
                 "Error occured when reading beginning of SAM/BAM file.\n")
@@ -175,7 +184,10 @@ def count_reads_in_features(sam_filenames, gff_filename,
                     try:
                         if r.optional_field("NH") > 1:
                             nonunique += 1
-                            write_to_samout(r, "__alignment_not_unique", samoutfile)
+                            write_to_samout(
+                                    r,
+                                    "__alignment_not_unique",
+                                    samoutfile)
                             if multimapped_mode == 'none':
                                 continue
                     except KeyError:
@@ -325,7 +337,8 @@ def count_reads_in_features(sam_filenames, gff_filename,
     print('\t'.join(["__alignment_not_unique"] + pad + [str(c) for c in nonunique_all]))
 
 
-def my_showwarning(message, category, filename, lineno=None, file=None, line=None):
+def my_showwarning(message, category, filename, lineno=None, file=None,
+                   line=None):
     sys.stderr.write("Warning: %s\n" % message)
 
 
@@ -396,9 +409,11 @@ def main():
             "suitable for Ensembl GTF files: gene_id)")
 
     pa.add_argument(
-            "--additional-attr", type=str, nargs='+',
-            default=(), help="Additional feature attributes (default: none, " +
-            "suitable for Ensembl GTF files: gene_name)")
+            "--additional-attr", type=str,
+            action='append',
+            default=[], help="Additional feature attributes (default: none, " +
+            "suitable for Ensembl GTF files: gene_name). Use multiple times " +
+            "for each different attribute")
 
     pa.add_argument(
             "-m", "--mode", dest="mode",
@@ -414,19 +429,20 @@ def main():
 
     pa.add_argument(
             "--secondary-alignments", dest="secondary_alignments", type=str,
-            choices=("score", "ignore"), default="score",
+            choices=("score", "ignore"), default="ignore",
             help="Whether to score secondary alignments (0x100 flag)")
 
     pa.add_argument(
             "--supplementary-alignments", dest="supplementary_alignments", type=str,
-            choices=("score", "ignore"), default="score",
+            choices=("score", "ignore"), default="ignore",
             help="Whether to score supplementary alignments (0x800 flag)")
 
     pa.add_argument(
-            "-o", "--samout", type=str, dest="samouts", nargs='+',
-            default="", help="write out all SAM alignment records into an output " +
-            "SAM file called SAMOUT, annotating each line with its feature assignment " +
-            "(as an optional field with tag 'XF')")
+            "-o", "--samout", type=str, dest="samouts",
+            action='append',
+            default=[], help="write out all SAM alignment records into " +
+            "SAM files (one per input file needed), annotating each line " +
+            "with its feature assignment (as an optional field with tag 'XF')")
 
     pa.add_argument(
             "-q", "--quiet", action="store_true", dest="quiet",


=====================================
python2/HTSeq/scripts/qa.py
=====================================
@@ -5,18 +5,21 @@
 # (c) Simon Anders, European Molecular Biology Laboratory, 2010
 # released under GNU General Public License
 
-import sys, time, os.path, optparse
+import sys
+import os.path
+import optparse
 from itertools import *
 import numpy
 import HTSeq
 
+
 def main():
 
     try:
         import matplotlib
     except ImportError:
         sys.stderr.write("This script needs the 'matplotlib' library, which ")
-        sys.stderr.write("was not found. Please install it." )
+        sys.stderr.write("was not found. Please install it.")
     matplotlib.use('PDF')
     from matplotlib import pyplot
 
@@ -26,10 +29,10 @@ def main():
     except ImportError:
         from matplotlib.pyplot import normalize as Normalize
 
-
     # **** Parse command line ****
 
-    optParser = optparse.OptionParser( usage = "%prog [options] read_file",
+    optParser = optparse.OptionParser(
+        usage="%prog [options] read_file",
         description=
         "This script take a file with high-throughput sequencing reads " +
         "(supported formats: SAM, Solexa _export.txt, FASTQ, Solexa " +
@@ -40,62 +43,69 @@ def main():
         epilog =
         "Written by Simon Anders (sanders at fs.tum.de), European Molecular Biology " +
         " Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General " +
-        " Public License v3. Part of the 'HTSeq' framework, version %s." % HTSeq.__version__ )
-    optParser.add_option( "-t", "--type", type="choice", dest="type",
-        choices = ("sam", "bam", "solexa-export", "fastq", "solexa-fastq"),
-        default = "sam", help="type of read_file (one of: sam [default], bam, " +
-        "solexa-export, fastq, solexa-fastq)" )
-    optParser.add_option( "-o", "--outfile", type="string", dest="outfile",
-        help="output filename (default is <read_file>.pdf)" )
-    optParser.add_option( "-r", "--readlength", type="int", dest="readlen",
-        help="the maximum read length (when not specified, the script guesses from the file" )
-    optParser.add_option( "-g", "--gamma", type="float", dest="gamma",
+        " Public License v3. Part of the 'HTSeq' framework, version %s." % HTSeq.__version__)
+    optParser.add_option(
+        "-t", "--type", type="choice", dest="type",
+        choices=("sam", "bam", "solexa-export", "fastq", "solexa-fastq"),
+        default="sam", help="type of read_file (one of: sam [default], bam, " +
+        "solexa-export, fastq, solexa-fastq)")
+    optParser.add_option(
+        "-o", "--outfile", type="string", dest="outfile",
+        help="output filename (default is <read_file>.pdf)")
+    optParser.add_option(
+        "-r", "--readlength", type="int", dest="readlen",
+        help="the maximum read length (when not specified, the script guesses from the file")
+    optParser.add_option(
+        "-g", "--gamma", type="float", dest="gamma",
         default = 0.3,
-        help="the gamma factor for the contrast adjustment of the quality score plot" )
-    optParser.add_option( "-n", "--nosplit", action="store_true", dest="nosplit",
-        help="do not split reads in unaligned and aligned ones" )
-    optParser.add_option( "-m", "--maxqual", type="int", dest="maxqual", default=41,
-        help="the maximum quality score that appears in the data (default: 41)" )
-
-    if len( sys.argv ) == 1:
+        help="the gamma factor for the contrast adjustment of the quality score plot")
+    optParser.add_option(
+        "-n", "--nosplit", action="store_true", dest="nosplit",
+        help="do not split reads in unaligned and aligned ones")
+    optParser.add_option(
+        "-m", "--maxqual", type="int", dest="maxqual", default=41,
+        help="the maximum quality score that appears in the data (default: 41)")
+
+    if len(sys.argv) == 1:
         optParser.print_help()
         sys.exit(1)
 
     (opts, args) = optParser.parse_args()
 
-    if len( args ) != 1:
-        sys.stderr.write( sys.argv[0] + ": Error: Please provide one argument (the read_file).\n" )
-        sys.stderr.write( "  Call with '-h' to get usage information.\n" )
-        sys.exit( 1 )
+    if len(args) != 1:
+        sys.stderr.write(
+            sys.argv[0] +
+            ": Error: Please provide one argument (the read_file).\n")
+        sys.stderr.write("  Call with '-h' to get usage information.\n")
+        sys.exit(1)
 
     readfilename = args[0]
 
     if opts.type == "sam":
-        readfile = HTSeq.SAM_Reader( readfilename )
+        readfile = HTSeq.SAM_Reader(readfilename)
         isAlnmntFile = True
     elif opts.type == "bam":
-        readfile = HTSeq.BAM_Reader( readfilename )
+        readfile = HTSeq.BAM_Reader(readfilename)
         isAlnmntFile = True
     elif opts.type == "solexa-export":
-        readfile = HTSeq.SolexaExportReader( readfilename )
+        readfile = HTSeq.SolexaExportReader(readfilename)
         isAlnmntFile = True
     elif opts.type == "fastq":
-        readfile = HTSeq.FastqReader( readfilename )
+        readfile = HTSeq.FastqReader(readfilename)
         isAlnmntFile = False
     elif opts.type == "solexa-fastq":
-        readfile = HTSeq.FastqReader( readfilename, "solexa" )
+        readfile = HTSeq.FastqReader(readfilename, "solexa")
         isAlnmntFile = False
     else:
-        sys.error( "Oops." )
+        sys.error("Oops.")
 
     twoColumns = isAlnmntFile and not opts.nosplit
 
     if opts.outfile is None:
-        outfilename = os.path.basename( readfilename ) + ".pdf"
+        outfilename = os.path.basename(readfilename) + ".pdf"
     else:
         outfilename = opts.outfile
 
-
     # **** Get read length ****
 
     if opts.readlen is not None:
@@ -103,25 +113,23 @@ def main():
     else:
         readlen = 0
         if isAlnmntFile:
-            reads = ( a.read for a in readfile )
+            reads = (a.read for a in readfile)
         else:
             reads = readfile
-        for r in islice( reads, 10000 ):
-            if len( r ) > readlen:
-                readlen = len( r )
+        for r in islice(reads, 10000):
+            if len(r) > readlen:
+                readlen = len(r)
 
     max_qual = opts.maxqual
     gamma = opts.gamma
 
-
     # **** Initialize count arrays ****
 
-    base_arr_U = numpy.zeros( ( readlen, 5 ), numpy.int )
-    qual_arr_U = numpy.zeros( ( readlen, max_qual+1 ), numpy.int )
+    base_arr_U = numpy.zeros((readlen, 5), numpy.int)
+    qual_arr_U = numpy.zeros((readlen, max_qual+1), numpy.int)
     if twoColumns:
-        base_arr_A = numpy.zeros( ( readlen, 5 ), numpy.int )
-        qual_arr_A = numpy.zeros( ( readlen, max_qual+1 ), numpy.int )
-
+        base_arr_A = numpy.zeros((readlen, 5), numpy.int)
+        qual_arr_A = numpy.zeros((readlen, max_qual+1), numpy.int)
 
     # **** Main counting loop ****
 
@@ -130,110 +138,107 @@ def main():
         for a in readfile:
             if isAlnmntFile:
                 r = a.read
-        else:
-            r = a
-        if twoColumns and (isAlnmntFile and a.aligned):
-            r.add_bases_to_count_array( base_arr_A )
-            r.add_qual_to_count_array( qual_arr_A )
-        else:
-            r.add_bases_to_count_array( base_arr_U )
-            r.add_qual_to_count_array( qual_arr_U )
-        i += 1
-        if (i % 200000) == 0:
-            print i, "reads processed"
+            else:
+                r = a
+            if twoColumns and (isAlnmntFile and a.aligned):
+                r.add_bases_to_count_array(base_arr_A)
+                r.add_qual_to_count_array(qual_arr_A)
+            else:
+                r.add_bases_to_count_array(base_arr_U)
+                r.add_qual_to_count_array(qual_arr_U)
+            i += 1
+            if (i % 200000) == 0:
+                print i, "reads processed"
     except:
-        sys.stderr.write( "Error occured in: %s\n" %
-            readfile.get_line_number_string() )
+        sys.stderr.write("Error occured in: %s\n" %
+                         readfile.get_line_number_string())
         raise
     print i, "reads processed"
 
-
     # **** Normalize result ****
 
-    def norm_by_pos( arr ):
-        arr = numpy.array( arr, numpy.float )
-        arr_n = ( arr.T / arr.sum( 1 ) ).T
-        arr_n[ arr == 0 ] = 0
+    def norm_by_pos(arr):
+        arr = numpy.array(arr, numpy.float)
+        arr_n = (arr.T / arr.sum(1)).T
+        arr_n[arr == 0] = 0
         return arr_n
 
-    def norm_by_start( arr ):
-        arr = numpy.array( arr, numpy.float )
-        arr_n = ( arr.T / arr.sum( 1 )[ 0 ] ).T
-        arr_n[ arr == 0 ] = 0
+    def norm_by_start(arr):
+        arr = numpy.array(arr, numpy.float)
+        arr_n = (arr.T / arr.sum(1)[0]).T
+        arr_n[arr == 0] = 0
         return arr_n
 
-
-    base_arr_U_n = norm_by_pos( base_arr_U )
-    qual_arr_U_n = norm_by_start( qual_arr_U )
-    nreads_U = base_arr_U[0,:].sum()
+    base_arr_U_n = norm_by_pos(base_arr_U)
+    qual_arr_U_n = norm_by_start(qual_arr_U)
+    nreads_U = base_arr_U[0, :].sum()
     if twoColumns:
-        base_arr_A_n = norm_by_pos( base_arr_A )
-        qual_arr_A_n = norm_by_start( qual_arr_A )
-        nreads_A = base_arr_A[0,:].sum()
-
+        base_arr_A_n = norm_by_pos(base_arr_A)
+        qual_arr_A_n = norm_by_start(qual_arr_A)
+        nreads_A = base_arr_A[0, :].sum()
 
     # **** Make plot ****
 
-    def plot_bases( arr ):
-        xg = numpy.arange( readlen )
-        pyplot.plot( xg, arr[ : , 0 ], marker='.', color='red')
-        pyplot.plot( xg, arr[ : , 1 ], marker='.', color='darkgreen')
-        pyplot.plot( xg, arr[ : , 2 ], marker='.',color='lightgreen')
-        pyplot.plot( xg, arr[ : , 3 ], marker='.',color='orange')
-        pyplot.plot( xg, arr[ : , 4 ], marker='.',color='grey')
-        pyplot.axis( (0, readlen-1, 0, 1 ) )
-        pyplot.text( readlen*.70, .9, "A", color="red" )
-        pyplot.text( readlen*.75, .9, "C", color="darkgreen" )
-        pyplot.text( readlen*.80, .9, "G", color="lightgreen" )
-        pyplot.text( readlen*.85, .9, "T", color="orange" )
-        pyplot.text( readlen*.90, .9, "N", color="grey" )
+    def plot_bases(arr):
+        xg = numpy.arange(readlen)
+        pyplot.plot(xg, arr[:, 0], marker='.', color='red')
+        pyplot.plot(xg, arr[:, 1], marker='.', color='darkgreen')
+        pyplot.plot(xg, arr[:, 2], marker='.', color='lightgreen')
+        pyplot.plot(xg, arr[:, 3], marker='.', color='orange')
+        pyplot.plot(xg, arr[:, 4], marker='.', color='grey')
+        pyplot.axis((0, readlen-1, 0, 1))
+        pyplot.text(readlen*.70, .9, "A", color="red")
+        pyplot.text(readlen*.75, .9, "C", color="darkgreen")
+        pyplot.text(readlen*.80, .9, "G", color="lightgreen")
+        pyplot.text(readlen*.85, .9, "T", color="orange")
+        pyplot.text(readlen*.90, .9, "N", color="grey")
 
     pyplot.figure()
-    pyplot.subplots_adjust( top=.85 )
-    pyplot.suptitle( os.path.basename(readfilename), fontweight='bold' )
+    pyplot.subplots_adjust(top=.85)
+    pyplot.suptitle(os.path.basename(readfilename), fontweight='bold')
 
     if twoColumns:
 
-        pyplot.subplot( 221 )
-        plot_bases( base_arr_U_n )
-        pyplot.ylabel( "proportion of base" )
-        pyplot.title( "non-aligned reads\n%.0f%% (%.3f million)" %
-        ( 100. * nreads_U / (nreads_U+nreads_A), nreads_U / 1e6 ) )
-
-        pyplot.subplot( 222 )
-        plot_bases( base_arr_A_n )
-        pyplot.title( "aligned reads\n%.0f%% (%.3f million)" %
-        ( 100. * nreads_A / (nreads_U+nreads_A), nreads_A / 1e6 ) )
-
-        pyplot.subplot( 223 )
-        pyplot.pcolor( qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
-        pyplot.ylabel( "base-call quality score" )
-
-        pyplot.subplot( 224 )
-        pyplot.pcolor( qual_arr_A_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
+        pyplot.subplot(221)
+        plot_bases(base_arr_U_n)
+        pyplot.ylabel("proportion of base")
+        pyplot.title("non-aligned reads\n%.0f%% (%.4f million)" %
+                     (100. * nreads_U / (nreads_U+nreads_A), nreads_U / 1e6))
+
+        pyplot.subplot(222)
+        plot_bases(base_arr_A_n)
+        pyplot.title("aligned reads\n%.0f%% (%.4f million)" %
+                     (100. * nreads_A / (nreads_U+nreads_A), nreads_A / 1e6))
+
+        pyplot.subplot(223)
+        pyplot.pcolor(qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
+        pyplot.ylabel("base-call quality score")
+
+        pyplot.subplot(224)
+        pyplot.pcolor(qual_arr_A_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
 
     else:
 
-        pyplot.subplot( 211 )
-        plot_bases( base_arr_U_n )
-        pyplot.ylabel( "proportion of base" )
-        pyplot.title( "%.3f million reads" % ( nreads_U / 1e6 ) )
+        pyplot.subplot(211)
+        plot_bases(base_arr_U_n)
+        pyplot.ylabel("proportion of base")
+        pyplot.title("%.3f million reads" % (nreads_U / 1e6))
 
-        pyplot.subplot( 212 )
-        pyplot.pcolor( qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
-        pyplot.ylabel( "base-call quality score" )
+        pyplot.subplot(212)
+        pyplot.pcolor(qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
+        pyplot.ylabel("base-call quality score")
 
+    pyplot.savefig(outfilename)
 
-    pyplot.savefig( outfilename )
 
 if __name__ == "__main__":
     main()


=====================================
python2/src/HTSeq/_HTSeq.pxd
=====================================
@@ -7,47 +7,47 @@ cdef class GenomicInterval:
    cdef public long end
    cdef str _strand
 
-   cpdef is_contained_in( self, GenomicInterval iv )
-   cpdef contains( self, GenomicInterval iv )
-   cpdef overlaps( self, GenomicInterval iv )
-   cpdef extend_to_include( GenomicInterval self, GenomicInterval iv )
+   cpdef is_contained_in(self, GenomicInterval iv)
+   cpdef contains(self, GenomicInterval iv)
+   cpdef overlaps(self, GenomicInterval iv)
+   cpdef extend_to_include(GenomicInterval self, GenomicInterval iv)
 
-cdef class GenomicPosition( GenomicInterval ):
+cdef class GenomicPosition(GenomicInterval):
    pass
 
 
-cdef class Sequence( object ):
+cdef class Sequence(object):
    cdef public bytes seq
    cdef public str name
    cdef public str descr
-   cpdef Sequence get_reverse_complement( self )      
-   cpdef object add_bases_to_count_array( Sequence self, numpy.ndarray count_array_ )
-   cpdef Sequence trim_left_end( Sequence self, Sequence pattern, float mismatch_prop = ? )   
-   cpdef Sequence trim_right_end( Sequence self, Sequence pattern, float mismatch_prop = ? )   
+   cpdef Sequence get_reverse_complement(self, bint rename=?)      
+   cpdef object add_bases_to_count_array(Sequence self, numpy.ndarray count_array_)
+   cpdef Sequence trim_left_end(Sequence self, Sequence pattern, float mismatch_prop=?)   
+   cpdef Sequence trim_right_end(Sequence self, Sequence pattern, float mismatch_prop=?)   
    
 
-cdef class SequenceWithQualities( Sequence ):
+cdef class SequenceWithQualities(Sequence):
    cdef readonly bytes _qualstr
    cdef readonly bytes _qualstr_phred
    cdef readonly str _qualscale
    cdef readonly object _qualarr
-   cdef _fill_qual_arr( SequenceWithQualities self )
-   cpdef object add_qual_to_count_array( SequenceWithQualities self, numpy.ndarray count_array_ )
-   cpdef SequenceWithQualities trim_left_end_with_quals( SequenceWithQualities self, 
-         Sequence pattern, int max_mm_qual = ? )
-   cpdef SequenceWithQualities trim_right_end_with_quals( SequenceWithQualities self, 
-         Sequence pattern, int max_mm_qual = ? )
+   cdef _fill_qual_arr(SequenceWithQualities self)
+   cpdef object add_qual_to_count_array(SequenceWithQualities self, numpy.ndarray count_array_)
+   cpdef SequenceWithQualities trim_left_end_with_quals(SequenceWithQualities self, 
+         Sequence pattern, int max_mm_qual=?)
+   cpdef SequenceWithQualities trim_right_end_with_quals(SequenceWithQualities self, 
+         Sequence pattern, int max_mm_qual=?)
 
       
-cdef class Alignment( object ):
+cdef class Alignment(object):
    cdef public SequenceWithQualities _read
    cdef public GenomicInterval iv
    
-cdef class AlignmentWithSequenceReversal( Alignment ):   
+cdef class AlignmentWithSequenceReversal(Alignment):   
    cdef public SequenceWithQualities read_as_aligned
    cdef public SequenceWithQualities _read_as_sequenced
 
-cdef class SAM_Alignment( AlignmentWithSequenceReversal ):
+cdef class SAM_Alignment(AlignmentWithSequenceReversal):
    cdef public list cigar
    cdef public int aQual
    cdef public GenomicPosition mate_start


=====================================
python2/src/HTSeq/_HTSeq.pyx
=====================================
@@ -651,10 +651,15 @@ cdef class Sequence(object):
         self.name = name
         self.descr = None
 
-    cpdef Sequence get_reverse_complement(self):
-        return Sequence(
-            reverse_complement(self.seq),
-            "revcomp_of_" + self.name)
+    cpdef Sequence get_reverse_complement(self, bint rename=True):
+        if rename:
+            return Sequence(
+                reverse_complement(self.seq),
+                "revcomp_of_" + self.name)
+        else:
+            return Sequence(
+                reverse_complement(self.seq),
+                self.name)
 
     def __str__(self):
         return self.seq
@@ -903,13 +908,20 @@ cdef class SequenceWithQualities(Sequence):
         self.write_to_fastq_file(sio, convert_to_phred)
         return sio.getvalue()
 
-    cpdef SequenceWithQualities get_reverse_complement(self):
+    cpdef SequenceWithQualities get_reverse_complement(self, bint rename=True):
         cdef SequenceWithQualities res
-        res = SequenceWithQualities(
-            reverse_complement(self.seq),
-            "revcomp_of_" + self.name,
-            self._qualstr[::-1],
-            self._qualscale)
+        if rename:
+            res = SequenceWithQualities(
+                reverse_complement(self.seq),
+                "revcomp_of_" + self.name,
+                self._qualstr[::-1],
+                self._qualscale)
+        else:
+            res = SequenceWithQualities(
+                reverse_complement(self.seq),
+                self.name,
+                self._qualstr[::-1],
+                self._qualscale)
         if self._qualarr is not None:
             res._qualarr = self._qualarr[::-1]
         return res


=====================================
python3/HTSeq/__init__.py
=====================================
@@ -9,21 +9,10 @@ import os
 import shlex
 import sys
 
-try:
-    import HTSeq
-    from HTSeq._HTSeq import *
-    #from _HTSeq import *
-except ImportError:
-    if os.path.isfile("setup.py"):
-        raise ImportError(
-            "Cannot import 'HTSeq' when working directory is HTSeq's own build directory.")
-    else:
-        raise
-
+import HTSeq
+from HTSeq._HTSeq import *
 from HTSeq._version import __version__
 
-#from vcf_reader import *
-
 #########################
 # Utils
 #########################
@@ -32,14 +21,14 @@ from HTSeq._version import __version__
 class FileOrSequence(object):
     """ The construcutor takes one argument, which may either be a string,
     which is interpreted as a file name (possibly with path), or a
-    connection, by which we mean a text file opened for reading, or 
-    any other object that can provide an iterator over strings 
+    connection, by which we mean a text file opened for reading, or
+    any other object that can provide an iterator over strings
     (lines of the file).
 
     The advantage of passing a file name instead of an already opened file
     is that if an iterator is requested several times, the file will be
     re-opened each time. If the file is already open, its lines can be read
-    only once, and then, the iterator stays exhausted.      
+    only once, and then, the iterator stays exhausted.
 
     Furthermore, if a file name is passed that end in ".gz" or ".gzip"
     (case insensitive), it is transparently gunzipped.
@@ -90,14 +79,13 @@ class FileOrSequence(object):
 #########################
 
 class GenomicFeature(object):
-
     """A genomic feature, i.e., an interval on a genome with metadata.
 
     At minimum, the following information should be provided by slots:
 
       name: a string identifying the feature (e.g., a gene symbol)
       type: a string giving the feature type (e.g., "gene", "exon")
-      iv: a GenomicInterval object specifying the feature locus     
+      iv: a GenomicInterval object specifying the feature locus
     """
 
     def __init__(self, name, type_, interval):
@@ -145,8 +133,8 @@ class GenomicFeature(object):
         attr_str = '; '.join(
             ['%s%s\"%s\"' % (ak, sep, attr[ak]) for ak in attr])
         return "\t".join(str(a) for a in (self.iv.chrom, source,
-                                          self.type, self.iv.start + 1, self.iv.end, score,
-                                          self.iv.strand, frame, attr_str)) + "\n"
+                         self.type, self.iv.start + 1, self.iv.end, score,
+                         self.iv.strand, frame, attr_str)) + "\n"
 
 
 _re_attr_main = re.compile("\s*([^\s\=]+)[\s=]+(.*)")
@@ -157,13 +145,16 @@ def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
     """Parses a GFF attribute string and returns it as a dictionary.
 
     If 'extra_return_first_value' is set, a pair is returned: the dictionary
-    and the value of the first attribute. This might be useful if this is the ID.
+    and the value of the first attribute. This might be useful if this is the
+    ID.
     """
     if attrStr.endswith("\n"):
         attrStr = attrStr[:-1]
     d = {}
     first_val = "_unnamed_"
-    for (i, attr) in zip(itertools.count(), _HTSeq.quotesafe_split(attrStr.encode())):
+    for (i, attr) in zip(
+            itertools.count(),
+            _HTSeq.quotesafe_split(attrStr.encode())):
         attr = attr.decode()
         if _re_attr_empty.match(attr):
             continue
@@ -176,7 +167,6 @@ def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
         val = mo.group(2)
         if val.startswith('"') and val.endswith('"'):
             val = val[1:-1]
-        #val = urllib.unquote( val )
         d[sys.intern(mo.group(1))] = sys.intern(val)
         if extra_return_first_value and i == 0:
             first_val = val
@@ -185,14 +175,14 @@ def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
     else:
         return d
 
+
 _re_gff_meta_comment = re.compile("##\s*(\S+)\s+(\S*)")
 
 
 class GFF_Reader(FileOrSequence):
-
     """Parse a GFF file
 
-    Pass the constructor either a file name or an iterator of lines of a 
+    Pass the constructor either a file name or an iterator of lines of a
     GFF files. If a file name is specified, it may refer to a gzip compressed
     file.
 
@@ -208,7 +198,6 @@ class GFF_Reader(FileOrSequence):
         for line in FileOrSequence.__iter__(self):
             if isinstance(line, bytes):
                 line = line.decode()
-
             if line == "\n":
                 continue
             if line.startswith('#'):
@@ -221,10 +210,15 @@ class GFF_Reader(FileOrSequence):
              strand, frame, attributeStr) = line.split("\t", 8)
             (attr, name) = parse_GFF_attribute_string(attributeStr, True)
             if self.end_included:
-                iv = GenomicInterval(seqname, int(start) - 1, int(end), strand)
+                iv = GenomicInterval(
+                        seqname,
+                        int(start) - 1, int(end),
+                        strand)
             else:
-                iv = GenomicInterval(seqname, int(
-                    start) - 1, int(end) - 1, strand)
+                iv = GenomicInterval(
+                        seqname,
+                        int(start) - 1, int(end) - 1,
+                        strand)
             f = GenomicFeature(name, feature, iv)
             if score != ".":
                 score = float(score)
@@ -238,7 +232,7 @@ class GFF_Reader(FileOrSequence):
 
 
 def make_feature_dict(feature_sequence):
-    """A feature dict is a convenient way to organize a sequence of Feature 
+    """A feature dict is a convenient way to organize a sequence of Feature
     object (which you have got, e.g., from parse_GFF).
 
     The function returns a dict with all the feature types as keys. Each value
@@ -248,8 +242,8 @@ def make_feature_dict(feature_sequence):
     An example makes this clear. Let's say you load the C. elegans GTF file
     from Ensemble and make a feature dict:
 
-    >>> worm_features_dict = HTSeq.make_feature_dict( HTSeq.parse_GFF( 
-    ...     "test_data/Caenorhabditis_elegans.WS200.55.gtf.gz" ) )
+    >>> worm_features_dict = HTSeq.make_feature_dict(HTSeq.parse_GFF(
+    ...     "test_data/Caenorhabditis_elegans.WS200.55.gtf.gz"))
 
     (This command may take a few minutes to deal with the 430,000 features
     in the GTF file. Note that you may need a lot of RAM if you have millions
@@ -277,8 +271,9 @@ def make_feature_dict(feature_sequence):
 #########################
 
 def read_chrom_lens(filename, delimiter="\t"):
-    return dict(((chrom, int(len))
-                 for chrom, len in csv.reader(open(filename), delimiter=delimiter)))
+    return dict(
+        ((chrom, int(len))
+         for chrom, len in csv.reader(open(filename), delimiter=delimiter)))
 
 
 #########################
@@ -300,6 +295,8 @@ class FastaReader(FileOrSequence):
 
     def __iter__(self):
         seq = None
+        name = None
+        descr = None
         for line in FileOrSequence.__iter__(self):
             if line.startswith(">"):
                 if seq:
@@ -326,6 +323,7 @@ class FastaReader(FileOrSequence):
 
     def get_sequence_lengths(self):
         seqname = None
+        length = 0
         seqlengths = {}
         for line in FileOrSequence.__iter__(self):
             if line.startswith(">"):
@@ -354,12 +352,13 @@ class FastaReader(FileOrSequence):
     def build_index(self, force=False):
         self._import_pysam()
         if not isinstance(self.fos, str):
-            raise TypeError("This function only works with FastaReader objects " +
-                            "connected to a fasta file via file name")
+            raise TypeError(
+                "This function only works with FastaReader objects " +
+                "connected to a fasta file via file name")
         index_filename = self.fos + ".fai"
         if os.access(index_filename, os.R_OK):
-            if (not force) and os.stat( self.filename_or_sequence ).st_mtime <= \
-                    os.stat(index_filename).st_mtime:
+            if (not force) and os.stat(self.filename_or_sequence).st_mtime <= \
+                 os.stat(index_filename).st_mtime:
                 # index is up to date
                 return
         pysam.faidx(self.fos)
@@ -371,11 +370,13 @@ class FastaReader(FileOrSequence):
         if not isinstance(iv, GenomicInterval):
             raise TypeError("GenomicInterval expected as key.")
         if not isinstance(self.fos, str):
-            raise TypeError("This function only works with FastaReader objects " +
-                            "connected to a fasta file via file name")
+            raise TypeError(
+                "This function only works with FastaReader objects " +
+                "connected to a fasta file via file name")
         self._import_pysam()
-        fasta = pysam.faidx(self.fos, "%s:%d-%d" %
-                            (iv.chrom, iv.start, iv.end - 1))
+        fasta = pysam.faidx(
+                self.fos,
+                "%s:%d-%d" % (iv.chrom, iv.start, iv.end - 1))
         ans = list(FastaReader(fasta))
         assert len(ans) == 1
         ans[0].name = str(iv)
@@ -408,28 +409,35 @@ class FastqReader(FileOrSequence):
             qual = next(fin)
             if qual == "":
                 if id1 != "":
-                    warnings.warn("Number of lines in FASTQ file is not "
-                                  "a multiple of 4. Discarding the last, "
-                                  "incomplete record")
+                    warnings.warn(
+                        "Number of lines in FASTQ file is not "
+                        "a multiple of 4. Discarding the last, "
+                        "incomplete record")
                 break
 
             if not qual.endswith("\n"):
                 qual += "\n"
             if not id1.startswith("@"):
-                raise ValueError("Primary ID line in FASTQ file does"
-                                 "not start with '@'. Either this is not FASTQ data or the parser got out of sync.")
+                raise ValueError(
+                    "Primary ID line in FASTQ file does "
+                    "not start with '@'. Either this is not FASTQ data or the "
+                    "parser got out of sync.")
             if not id2.startswith("+"):
-                raise ValueError("Secondary ID line in FASTQ file does"
-                                 "not start with '+'. Maybe got out of sync.")
+                raise ValueError(
+                    "Secondary ID line in FASTQ file does"
+                    "not start with '+'. Maybe got out of sync.")
             if len(id2) > 2 and id1[1:] != id2[1:]:
-                raise ValueError("Primary and secondary ID line in FASTQ"
-                                 "disagree.")
+                raise ValueError(
+                    "Primary and secondary ID line in FASTQ"
+                    "disagree.")
 
             if self.raw_iterator:
                 s = (seq[:-1], id1[1:-1], qual[:-1], self.qual_scale)
             else:
-                s = SequenceWithQualities(seq[:-1].encode(), id1[1:-1],
-                                          qual[:-1].encode(), self.qual_scale)
+                s = SequenceWithQualities(
+                        seq[:-1].encode(), id1[1:-1],
+                        qual[:-1].encode(),
+                        self.qual_scale)
             yield s
 
 
@@ -445,8 +453,10 @@ class BowtieReader(FileOrSequence):
             except ValueError:
                 if line.startswith("Reported "):
                     continue
-                warnings.warn("BowtieReader: Ignoring the following line, which could not be parsed:\n%s\n" % line,
-                              RuntimeWarning)
+                warnings.warn(
+                    "BowtieReader: Ignoring the following line, which could "
+                    "not be parsed:\n%s\n" % line,
+                    RuntimeWarning)
             yield algnt
 
 
@@ -472,14 +482,14 @@ def bundle_multiple_alignments(sequence_of_alignments):
 class SolexaExportAlignment(Alignment):
     """Iterating over SolexaExportReader objects will yield SoelxaExportRecord
     objects. These have four fields:
-       read          - a SequenceWithQualities object 
+       read          - a SequenceWithQualities object
        aligned       - a boolean, indicating whether the object was aligned
        iv            - a GenomicInterval giving the alignment (or None, if not aligned)
        passed_filter - a boolean, indicating whether the object passed the filter
        nomatch_code  - a code indicating why no match was found (or None, if the
          read was aligned)
 
-    As long as 'aligned' is True, a SolexaExportRecord can be treated as an 
+    As long as 'aligned' is True, a SolexaExportRecord can be treated as an
     Alignment object.
     """
 
@@ -490,10 +500,10 @@ class SolexaExportAlignment(Alignment):
     def __repr__(self):
         if self.aligned:
             return "< %s object: Read '%s', aligned to %s >" % (
-                self.__class__.__name__, self.read.name, self.iv)
+              self.__class__.__name__, self.read.name, self.iv)
         else:
             return "< %s object: Non-aligned read '%s' >" % (
-                self.__class__.__name__, self.read.name)
+              self.__class__.__name__, self.read.name)
 
 
 class SolexaExportReader(FileOrSequence):
@@ -514,15 +524,28 @@ class SolexaExportReader(FileOrSequence):
         if line[-1] == "\n":
             line = line[:-1]
         res = {}
-        (res['machine'], res['run_number'], res['lane'], res['tile'], res['x_coord'],
-         res['y_coord'], res['index_string'], res['read_nbr'], res['read_seq'],
-         res['qual_str'], res['chrom'], res[
-             'contig'], res['pos'], res['strand'],
-         res['match_descr'], res['single_read_algnt_score'],
-         res['paired_read_algnt_score'], res[
-             'partner_chrom'], res['partner_contig'],
-         res['partner_offset'], res['partner_strand'], res['passed_filtering'] ) \
-            = line.split("\t")
+        (res['machine'],
+         res['run_number'],
+         res['lane'],
+         res['tile'],
+         res['x_coord'],
+         res['y_coord'],
+         res['index_string'],
+         res['read_nbr'],
+         res['read_seq'],
+         res['qual_str'],
+         res['chrom'],
+         res['contig'],
+         res['pos'],
+         res['strand'],
+         res['match_descr'],
+         res['single_read_algnt_score'],
+         res['paired_read_algnt_score'],
+         res['partner_chrom'],
+         res['partner_contig'],
+         res['partner_offset'],
+         res['partner_strand'],
+         res['passed_filtering']) = line.split("\t")
         return res
 
     def __iter__(self):
@@ -530,20 +553,25 @@ class SolexaExportReader(FileOrSequence):
             record = SolexaExportAlignment()
             fields = SolexaExportReader.parse_line_bare(line)
             if fields['read_nbr'] != "1":
-                warnings.warn("Paired-end read encountered. PE is so far supported only for " +
-                              "SAM files, not yet for SolexaExport. All PE-related fields are ignored. ")
+                warnings.warn(
+                    "Paired-end read encountered. PE is so far supported only "
+                    "for SAM files, not yet for SolexaExport. All PE-related "
+                    "fields are ignored.")
             record.read = SequenceWithQualities(
                 fields['read_seq'],
-                "%s:%s:%s:%s:%s#0" % (fields['machine'], fields['lane'], fields['tile'],
-                                      fields['x_coord'], fields['y_coord']),
+                "%s:%s:%s:%s:%s#0" % (fields['machine'],
+                                      fields['lane'],
+                                      fields['tile'],
+                                      fields['x_coord'],
+                                      fields['y_coord']),
                 fields['qual_str'], self.qualscale)
             if fields['passed_filtering'] == 'Y':
                 record.passed_filter = True
             elif fields['passed_filtering'] == 'N':
                 record.passed_filter = False
             else:
-                raise ValueError("Illegal 'passed filter' value in Solexa export data: '%s'." % fields[
-                                 'passed_filtering'])
+                raise ValueError(
+                    "Illegal 'passed filter' value in Solexa export data: '%s'." % fields['passed_filtering'])
             record.index_string = fields['index_string']
             if fields['pos'] == '':
                 record.iv = None
@@ -560,13 +588,14 @@ class SolexaExportReader(FileOrSequence):
                 chrom = fields['chrom']
                 if fields['chrom'] == "":
                     chrom = fields['contig']
-                record.iv = GenomicInterval(chrom, start,
-                                            start + len(fields['read_seq']), strand)
+                record.iv = GenomicInterval(
+                    chrom, start,
+                    start + len(fields['read_seq']), strand)
             yield record
 
 
 class SAM_Reader(FileOrSequence):
-    """A SAM_Reader object is associated with a SAM file that 
+    """A SAM_Reader object is associated with a SAM file that
     contains short read alignments. It can generate an iterator of Alignment
     objects."""
 
@@ -584,7 +613,6 @@ class SAM_Reader(FileOrSequence):
 
 
 class GenomicArrayOfSets(GenomicArray):
-
     """A GenomicArrayOfSets is a specialization of GenomicArray that allows to store
     sets of objects. On construction, the step vectors are initialized with empty sets.
     By using the 'add_value' method, objects can be added to intervals. If an object
@@ -606,11 +634,50 @@ class GenomicArrayOfSets(GenomicArray):
 # paired-end handling
 ###########################
 
-def pair_SAM_alignments(alignments, bundle=False):
+def pair_SAM_alignments(
+        alignments,
+        bundle=False,
+        primary_only=False):
+    '''Iterate over SAM aligments, name-sorted paired-end
+
+    Args:
+        alignments (iterator of SAM/BAM alignments): the alignments to wrap
+        bundle (bool): if True, bundle all alignments from one read pair into a
+            single yield. If False (default), each pair of alignments is
+            yielded separately.
+        primary_only (bool): for each read, consider only the primary line
+            (SAM flag 0x900 = 0). The SAM specification requires one and only
+            one of those for each read.
+
+    Yields:
+        2-tuples with each pair of alignments or, if bundle==True, each bundled
+        list of alignments.
+    '''
 
     mate_missing_count = [0]
 
     def process_list(almnt_list):
+        '''Transform a list of alignment with the same read name into pairs
+
+        Args:
+            almnt_list (list): alignments to process
+
+        Yields:
+            each pair of alignments.
+
+        This function is needed because each line of a BAM file is not a read
+        but an alignment. For uniquely mapped and unmapped reads, those two are
+        the same. For multimapped reads, however, there can be more than one
+        alignment for each read. Also, it is normal for a mapper to uniquely
+        map one read and multimap its mate.
+
+        This function goes down the list of alignments for a given read name
+        and tries to find the first mate. So if read 1 is uniquely mapped but
+        read 2 is mapped 4 times, only (read 1, read 2 - first occurrence) will
+        yield; the other 3 alignments of read 2 are ignored.
+        '''
+
+
         while len(almnt_list) > 0:
             a1 = almnt_list.pop(0)
             # Find its mate
@@ -622,14 +689,15 @@ def pair_SAM_alignments(alignments, bundle=False):
                 if not (a1.aligned and a2.aligned):
                     break
                 if a1.iv.chrom == a2.mate_start.chrom and a1.iv.start == a2.mate_start.pos and \
-                        a2.iv.chrom == a1.mate_start.chrom and a2.iv.start == a1.mate_start.pos:
+                   a2.iv.chrom == a1.mate_start.chrom and a2.iv.start == a1.mate_start.pos:
                     break
             else:
                 if a1.mate_aligned:
                     mate_missing_count[0] += 1
                     if mate_missing_count[0] == 1:
-                        warnings.warn("Read " + a1.read.name + " claims to have an aligned mate " +
-                                      "which could not be found in an adjacent line.")
+                        warnings.warn(
+                            "Read " + a1.read.name + " claims to have an aligned mate " +
+                            "which could not be found in an adjacent line.")
                 a2 = None
             if a2 is not None:
                 almnt_list.remove(a2)
@@ -648,6 +716,9 @@ def pair_SAM_alignments(alignments, bundle=False):
         if almnt.pe_which == "unknown":
             raise ValueError(
                 "Paired-end read found with 'unknown' 'pe_which' status.")
+        # FIXME: almnt.not_primary_alignment currently means secondary
+        if primary_only and (almnt.not_primary_alignment or almnt.supplementary):
+            continue
         if almnt.read.name == current_name:
             almnt_list.append(almnt)
         else:
@@ -668,18 +739,35 @@ def pair_SAM_alignments(alignments, bundle=False):
                       mate_missing_count[0])
 
 
-def pair_SAM_alignments_with_buffer(alignments, max_buffer_size=30000000):
+def pair_SAM_alignments_with_buffer(
+        alignments,
+        max_buffer_size=30000000,
+        primary_only=False):
+    '''Iterate over SAM aligments with buffer, position-sorted paired-end
+
+    Args:
+        alignments (iterator of SAM/BAM alignments): the alignments to wrap
+        max_buffer_size (int): maxmal numer of alignments to keep in memory.
+        primary_only (bool): for each read, consider only the primary line
+            (SAM flag 0x900 = 0). The SAM specification requires one and only
+            one of those for each read.
+
+    Yields:
+        2-tuples with each pair of alignments.
+    '''
 
     almnt_buffer = {}
     ambiguous_pairing_counter = 0
     for almnt in alignments:
-
         if not almnt.paired_end:
             raise ValueError(
                 "Sequence of paired-end alignments expected, but got single-end alignment.")
         if almnt.pe_which == "unknown":
             raise ValueError(
                 "Cannot process paired-end alignment found with 'unknown' 'pe_which' status.")
+        # FIXME: almnt.not_primary_alignment currently means secondary
+        if primary_only and (almnt.not_primary_alignment or almnt.supplementary):
+            continue
 
         matekey = (
             almnt.read.name,
@@ -720,8 +808,9 @@ def pair_SAM_alignments_with_buffer(alignments, max_buffer_size=30000000):
                     "Maximum alignment buffer size exceeded while pairing SAM alignments.")
 
     if len(almnt_buffer) > 0:
-        warnings.warn("Mate records missing for %d records; first such record: %s." %
-                      (len(almnt_buffer), str(list(almnt_buffer.values())[0][0])))
+        warnings.warn(
+            "Mate records missing for %d records; first such record: %s." %
+            (len(almnt_buffer), str(list(almnt_buffer.values())[0][0])))
         for almnt_list in list(almnt_buffer.values()):
             for almnt in almnt_list:
                 if almnt.pe_which == "first":
@@ -730,8 +819,9 @@ def pair_SAM_alignments_with_buffer(alignments, max_buffer_size=30000000):
                     yield (None, almnt)
 
     if ambiguous_pairing_counter > 0:
-        warnings.warn("Mate pairing was ambiguous for %d records; mate key for first such record: %s." %
-                      (ambiguous_pairing_counter, str(ambiguous_pairing_first_occurance)))
+        warnings.warn(
+            "Mate pairing was ambiguous for %d records; mate key for first such record: %s." %
+            (ambiguous_pairing_counter, str(ambiguous_pairing_first_occurance)))
 
 
 ###########################
@@ -756,7 +846,8 @@ _vcf_typemap = {
 
 class VariantCall(object):
 
-    def __init__(self, chrom=None, pos=None, identifier=None, ref=None, alt=None, qual=None, filtr=None, info=None):
+    def __init__(self, chrom=None, pos=None, identifier=None, ref=None,
+                 alt=None, qual=None, filtr=None, info=None):
         self.chrom = chrom
         self.pos = pos
         self.id = identifier
@@ -785,12 +876,10 @@ class VariantCall(object):
         ret = cls()
         if nsamples == 0:
             ret.format = None
-            ret.chrom, ret.pos, ret.id, ret.ref, ret.alt, ret.qual, ret.filter, ret.info = line.rstrip(
-                "\n").split("\t", 7)
+            ret.chrom, ret.pos, ret.id, ret.ref, ret.alt, ret.qual, ret.filter, ret.info = line.rstrip("\n").split("\t", 7)
         else:
             lsplit = line.rstrip("\n").split("\t")
-            ret.chrom, ret.pos, ret.id, ret.ref, ret.alt, ret.qual, ret.filter, ret.info = lsplit[
-                :8]
+            ret.chrom, ret.pos, ret.id, ret.ref, ret.alt, ret.qual, ret.filter, ret.info = lsplit[:8]
             ret.format = lsplit[8].split(":")
             ret.samples = {}
             spos = 9
@@ -874,10 +963,11 @@ class VCF_Reader(FileOrSequence):
 
     def make_info_dict(self):
         self.infodict = dict(
-            (key, _vcf_typemap[self.info[key]["Type"]]) for key in list(self.info.keys()))
+            (key,
+             _vcf_typemap[self.info[key]["Type"]]) for key in list(self.info.keys()))
 
     def parse_meta(self, header_filename=None):
-        if header_filename == None:
+        if header_filename is None:
             the_iter = FileOrSequence.__iter__(self)
         else:
             the_iter = open(header_filename, "r")
@@ -917,7 +1007,7 @@ class VCF_Reader(FileOrSequence):
 
     def meta_info(self, header_filename=None):
         ret = []
-        if header_filename == None:
+        if header_filename is None:
             the_iter = FileOrSequence.__iter__(self)
         else:
             the_iter = open(header_filename, "r")
@@ -1010,7 +1100,6 @@ class BAM_Reader(object):
         sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
         self.record_no = 0
         for pa in sf:
-            # yield SAM_Alignment.from_pysam_AlignedRead( pa, sf )
             yield SAM_Alignment.from_pysam_AlignedSegment(pa, sf)
             self.record_no += 1
 
@@ -1056,8 +1145,8 @@ class BAM_Reader(object):
 
 
 class BAM_Writer(object):
-
-    def __init__(self, filename, template=None, referencenames=None, referencelengths=None, text=None, header=None):
+    def __init__(self, filename, template=None, referencenames=None,
+                 referencelengths=None, text=None, header=None):
         try:
             import pysam
         except ImportError:
@@ -1071,15 +1160,20 @@ class BAM_Writer(object):
         self.referencelengths = referencelengths
         self.text = text
         self.header = header
-        self.sf = pysam.Samfile(self.filename, mode="wb", template=self.template, referencenames=self.referencenames,
-                                referencelengths=self.referencelengths, text=self.text, header=self.header)
+        self.sf = pysam.Samfile(
+                self.filename,
+                mode="wb",
+                template=self.template,
+                referencenames=self.referencenames,
+                referencelengths=self.referencelengths,
+                text=self.text,
+                header=self.header)
 
     @classmethod
     def from_BAM_Reader(cls, fn, br):
         return BAM_Writer(filename=fn, header=br.get_header_dict())
 
     def write(self, alnmt):
-        #self.sf.write( alnmt.to_pysam_AlignedRead( self.sf ) )
         self.sf.write(alnmt.to_pysam_AlignedSegment(self.sf))
 
     def close(self):
@@ -1100,13 +1194,21 @@ class BED_Reader(FileOrSequence):
                 raise ValueError("BED file line contains less than 3 fields")
             if len(fields) > 9:
                 raise ValueError("BED file line contains more than 9 fields")
-            iv = GenomicInterval(fields[0], int(fields[1]), int(
-                fields[2]), fields[5] if len(fields) > 5 else ".")
-            f = GenomicFeature(fields[3] if len(
-                fields) > 3 else "unnamed", "BED line", iv)
+            iv = GenomicInterval(
+                fields[0],
+                int(fields[1]),
+                int(fields[2]),
+                fields[5] if len(fields) > 5 else ".")
+            f = GenomicFeature(
+                fields[3] if len(fields) > 3 else "unnamed",
+                "BED line",
+                iv)
             f.score = float(fields[4]) if len(fields) > 4 else None
-            f.thick = GenomicInterval(iv.chrom, int(fields[6]), int(
-                fields[7]), iv.strand) if len(fields) > 7 else None
+            f.thick = GenomicInterval(
+                iv.chrom,
+                int(fields[6]),
+                int(fields[7]),
+                iv.strand) if len(fields) > 7 else None
             f.itemRgb = [int(a) for a in fields[8].split(",")
                          ] if len(fields) > 8 else None
             yield(f)


=====================================
python3/HTSeq/_version.py
=====================================
@@ -1 +1 @@
-__version__ = "0.10.0"
\ No newline at end of file
+__version__ = "0.11.2"
\ No newline at end of file


=====================================
python3/HTSeq/scripts/count.py
=====================================
@@ -41,10 +41,10 @@ def count_reads_in_features(sam_filenames, gff_filename,
             r = (r,)
         for read in r:
             if read is not None:
-                samoutfile.write(read.original_sam_line.rstrip() +
-                                 "\tXF:Z:" + assignment + "\n")
+                read.optional_fields.append(('XF', assignment))
+                samoutfile.write(read.get_sam_line() + "\n")
 
-    if samouts != "":
+    if samouts != []:
         if len(samouts) != len(sam_filenames):
             raise ValueError('Select the same number of SAM input and output files')
         # Try to open samout files early in case any of them has issues
@@ -116,22 +116,28 @@ def count_reads_in_features(sam_filenames, gff_filename,
     lowqual_all = []
     nonunique_all = []
     for isam, (sam_filename) in enumerate(sam_filenames):
-        if samouts != '':
+        if samouts != []:
             samoutfile = open(samouts[isam], 'w')
         else:
             samoutfile = None
 
         try:
-            if sam_filename != "-":
-                read_seq_file = SAM_or_BAM_Reader(sam_filename)
-                read_seq = read_seq_file
-                first_read = next(iter(read_seq))
-            else:
+            if sam_filename == "-":
                 read_seq_file = SAM_or_BAM_Reader(sys.stdin)
-                read_seq_iter = iter(read_seq_file)
+            else:
+                read_seq_file = SAM_or_BAM_Reader(sam_filename)
+            read_seq_iter = iter(read_seq_file)
+            # Catch empty BAM files
+            try:
                 first_read = next(read_seq_iter)
+                pe_mode = first_read.paired_end
+            except:
+                first_read = None
+                pe_mode = False
+            if first_read is not None:
                 read_seq = itertools.chain([first_read], read_seq_iter)
-            pe_mode = first_read.paired_end
+            else:
+                read_seq = []
         except:
             sys.stderr.write(
                 "Error occured when reading beginning of SAM/BAM file.\n")
@@ -139,12 +145,20 @@ def count_reads_in_features(sam_filenames, gff_filename,
 
         try:
             if pe_mode:
+                if ((supplementary_alignment_mode == 'ignore') and
+                   (secondary_alignment_mode == 'ignore')):
+                    primary_only = True
+                else:
+                    primary_only = False
                 if order == "name":
-                    read_seq = HTSeq.pair_SAM_alignments(read_seq)
+                    read_seq = HTSeq.pair_SAM_alignments(
+                            read_seq,
+                            primary_only=primary_only)
                 elif order == "pos":
                     read_seq = HTSeq.pair_SAM_alignments_with_buffer(
                             read_seq,
-                            max_buffer_size=max_buffer_size)
+                            max_buffer_size=max_buffer_size,
+                            primary_only=primary_only)
                 else:
                     raise ValueError("Illegal order specified.")
             empty = 0
@@ -396,9 +410,11 @@ def main():
             "suitable for Ensembl GTF files: gene_id)")
 
     pa.add_argument(
-            "--additional-attr", type=str, nargs='+',
-            default=(), help="Additional feature attributes (default: none, " +
-            "suitable for Ensembl GTF files: gene_name)")
+            "--additional-attr", type=str,
+            action='append',
+            default=[], help="Additional feature attributes (default: none, " +
+            "suitable for Ensembl GTF files: gene_name). Use multiple times " +
+            "for each different attribute")
 
     pa.add_argument(
             "-m", "--mode", dest="mode",
@@ -414,19 +430,20 @@ def main():
 
     pa.add_argument(
             "--secondary-alignments", dest="secondary_alignments", type=str,
-            choices=("score", "ignore"), default="score",
+            choices=("score", "ignore"), default="ignore",
             help="Whether to score secondary alignments (0x100 flag)")
 
     pa.add_argument(
             "--supplementary-alignments", dest="supplementary_alignments", type=str,
-            choices=("score", "ignore"), default="score",
+            choices=("score", "ignore"), default="ignore",
             help="Whether to score supplementary alignments (0x800 flag)")
 
     pa.add_argument(
-            "-o", "--samout", type=str, dest="samouts", nargs='+',
-            default="", help="write out all SAM alignment records into an output " +
-            "SAM file called SAMOUT, annotating each line with its feature assignment " +
-            "(as an optional field with tag 'XF')")
+            "-o", "--samout", type=str, dest="samouts",
+            action='append',
+            default=[], help="write out all SAM alignment records into " +
+            "SAM files (one per input file needed), annotating each line " +
+            "with its feature assignment (as an optional field with tag 'XF')")
 
     pa.add_argument(
             "-q", "--quiet", action="store_true", dest="quiet",


=====================================
python3/HTSeq/scripts/qa.py
=====================================
@@ -5,18 +5,21 @@
 # (c) Simon Anders, European Molecular Biology Laboratory, 2010
 # released under GNU General Public License
 
-import sys, time, os.path, optparse
+import sys
+import os.path
+import optparse
 from itertools import *
 import numpy
 import HTSeq
 
+
 def main():
 
     try:
         import matplotlib
     except ImportError:
         sys.stderr.write("This script needs the 'matplotlib' library, which ")
-        sys.stderr.write("was not found. Please install it." )
+        sys.stderr.write("was not found. Please install it.")
     matplotlib.use('PDF')
     from matplotlib import pyplot
 
@@ -26,10 +29,10 @@ def main():
     except ImportError:
         from matplotlib.pyplot import normalize as Normalize
 
-
     # **** Parse command line ****
 
-    optParser = optparse.OptionParser( usage = "%prog [options] read_file",
+    optParser = optparse.OptionParser(
+        usage="%prog [options] read_file",
         description=
         "This script take a file with high-throughput sequencing reads " +
         "(supported formats: SAM, Solexa _export.txt, FASTQ, Solexa " +
@@ -40,62 +43,68 @@ def main():
         epilog =
         "Written by Simon Anders (sanders at fs.tum.de), European Molecular Biology " +
         " Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General " +
-        " Public License v3. Part of the 'HTSeq' framework, version %s." % HTSeq.__version__ )
-    optParser.add_option( "-t", "--type", type="choice", dest="type",
-        choices = ("sam", "bam", "solexa-export", "fastq", "solexa-fastq"),
-        default = "sam", help="type of read_file (one of: sam [default], bam, " +
-        "solexa-export, fastq, solexa-fastq)" )
-    optParser.add_option( "-o", "--outfile", type="string", dest="outfile",
-        help="output filename (default is <read_file>.pdf)" )
-    optParser.add_option( "-r", "--readlength", type="int", dest="readlen",
-        help="the maximum read length (when not specified, the script guesses from the file" )
-    optParser.add_option( "-g", "--gamma", type="float", dest="gamma",
-        default = 0.3,
-        help="the gamma factor for the contrast adjustment of the quality score plot" )
-    optParser.add_option( "-n", "--nosplit", action="store_true", dest="nosplit",
-        help="do not split reads in unaligned and aligned ones" )
-    optParser.add_option( "-m", "--maxqual", type="int", dest="maxqual", default=41,
-        help="the maximum quality score that appears in the data (default: 41)" )
-
-    if len( sys.argv ) == 1:
+        " Public License v3. Part of the 'HTSeq' framework, version %s." % HTSeq.__version__)
+    optParser.add_option(
+        "-t", "--type", type="choice", dest="type",
+        choices=("sam", "bam", "solexa-export", "fastq", "solexa-fastq"),
+        default="sam", help="type of read_file (one of: sam [default], bam, " +
+        "solexa-export, fastq, solexa-fastq)")
+    optParser.add_option(
+        "-o", "--outfile", type="string", dest="outfile",
+        help="output filename (default is <read_file>.pdf)")
+    optParser.add_option(
+        "-r", "--readlength", type="int", dest="readlen",
+        help="the maximum read length (when not specified, the script guesses from the file")
+    optParser.add_option(
+        "-g", "--gamma", type="float", dest="gamma",
+        default=0.3,
+        help="the gamma factor for the contrast adjustment of the quality score plot")
+    optParser.add_option(
+        "-n", "--nosplit", action="store_true", dest="nosplit",
+        help="do not split reads in unaligned and aligned ones")
+    optParser.add_option(
+        "-m", "--maxqual", type="int", dest="maxqual", default=41,
+        help="the maximum quality score that appears in the data (default: 41)")
+
+    if len(sys.argv) == 1:
         optParser.print_help()
         sys.exit(1)
 
     (opts, args) = optParser.parse_args()
 
-    if len( args ) != 1:
-        sys.stderr.write( sys.argv[0] + ": Error: Please provide one argument (the read_file).\n" )
-        sys.stderr.write( "  Call with '-h' to get usage information.\n" )
-        sys.exit( 1 )
+    if len(args) != 1:
+        sys.stderr.write(
+            sys.argv[0] + ": Error: Please provide one argument (the read_file).\n")
+        sys.stderr.write("  Call with '-h' to get usage information.\n")
+        sys.exit(1)
 
     readfilename = args[0]
 
     if opts.type == "sam":
-        readfile = HTSeq.SAM_Reader( readfilename )
+        readfile = HTSeq.SAM_Reader(readfilename)
         isAlnmntFile = True
     elif opts.type == "bam":
-        readfile = HTSeq.BAM_Reader( readfilename )
+        readfile = HTSeq.BAM_Reader(readfilename)
         isAlnmntFile = True
     elif opts.type == "solexa-export":
-        readfile = HTSeq.SolexaExportReader( readfilename )
+        readfile = HTSeq.SolexaExportReader(readfilename)
         isAlnmntFile = True
     elif opts.type == "fastq":
-        readfile = HTSeq.FastqReader( readfilename )
+        readfile = HTSeq.FastqReader(readfilename)
         isAlnmntFile = False
     elif opts.type == "solexa-fastq":
-        readfile = HTSeq.FastqReader( readfilename, "solexa" )
+        readfile = HTSeq.FastqReader(readfilename, "solexa")
         isAlnmntFile = False
     else:
-        sys.error( "Oops." )
+        sys.error("Oops.")
 
     twoColumns = isAlnmntFile and not opts.nosplit
 
     if opts.outfile is None:
-        outfilename = os.path.basename( readfilename ) + ".pdf"
+        outfilename = os.path.basename(readfilename) + ".pdf"
     else:
         outfilename = opts.outfile
 
-
     # **** Get read length ****
 
     if opts.readlen is not None:
@@ -103,25 +112,23 @@ def main():
     else:
         readlen = 0
         if isAlnmntFile:
-            reads = ( a.read for a in readfile )
+            reads = (a.read for a in readfile)
         else:
             reads = readfile
-        for r in islice( reads, 10000 ):
-            if len( r ) > readlen:
-                readlen = len( r )
+        for r in islice(reads, 10000):
+            if len(r) > readlen:
+                readlen = len(r)
 
     max_qual = opts.maxqual
     gamma = opts.gamma
 
-
     # **** Initialize count arrays ****
 
-    base_arr_U = numpy.zeros( ( readlen, 5 ), numpy.int )
-    qual_arr_U = numpy.zeros( ( readlen, max_qual+1 ), numpy.int )
+    base_arr_U = numpy.zeros((readlen, 5), numpy.int)
+    qual_arr_U = numpy.zeros((readlen, max_qual+1), numpy.int)
     if twoColumns:
-        base_arr_A = numpy.zeros( ( readlen, 5 ), numpy.int )
-        qual_arr_A = numpy.zeros( ( readlen, max_qual+1 ), numpy.int )
-
+        base_arr_A = numpy.zeros((readlen, 5), numpy.int)
+        qual_arr_A = numpy.zeros((readlen, max_qual+1), numpy.int)
 
     # **** Main counting loop ****
 
@@ -130,110 +137,107 @@ def main():
         for a in readfile:
             if isAlnmntFile:
                 r = a.read
-        else:
-            r = a
-        if twoColumns and (isAlnmntFile and a.aligned):
-            r.add_bases_to_count_array( base_arr_A )
-            r.add_qual_to_count_array( qual_arr_A )
-        else:
-            r.add_bases_to_count_array( base_arr_U )
-            r.add_qual_to_count_array( qual_arr_U )
-        i += 1
-        if (i % 200000) == 0:
-            print(i, "reads processed")
+            else:
+                r = a
+            if twoColumns and (isAlnmntFile and a.aligned):
+                r.add_bases_to_count_array(base_arr_A)
+                r.add_qual_to_count_array(qual_arr_A)
+            else:
+                r.add_bases_to_count_array(base_arr_U)
+                r.add_qual_to_count_array(qual_arr_U)
+            i += 1
+            if (i % 200000) == 0:
+                print(i, "reads processed")
     except:
-        sys.stderr.write( "Error occured in: %s\n" %
-            readfile.get_line_number_string() )
+        sys.stderr.write("Error occured in: %s\n" %
+                         readfile.get_line_number_string())
         raise
     print(i, "reads processed")
 
-
     # **** Normalize result ****
 
-    def norm_by_pos( arr ):
-        arr = numpy.array( arr, numpy.float )
-        arr_n = ( arr.T / arr.sum( 1 ) ).T
-        arr_n[ arr == 0 ] = 0
+    def norm_by_pos(arr):
+        arr = numpy.array(arr, numpy.float)
+        arr_n = (arr.T / arr.sum(1)).T
+        arr_n[arr == 0] = 0
         return arr_n
 
-    def norm_by_start( arr ):
-        arr = numpy.array( arr, numpy.float )
-        arr_n = ( arr.T / arr.sum( 1 )[ 0 ] ).T
-        arr_n[ arr == 0 ] = 0
+    def norm_by_start(arr):
+        arr = numpy.array(arr, numpy.float)
+        arr_n = (arr.T / arr.sum(1)[0]).T
+        arr_n[arr == 0] = 0
         return arr_n
 
-
-    base_arr_U_n = norm_by_pos( base_arr_U )
-    qual_arr_U_n = norm_by_start( qual_arr_U )
-    nreads_U = base_arr_U[0,:].sum()
+    base_arr_U_n = norm_by_pos(base_arr_U)
+    qual_arr_U_n = norm_by_start(qual_arr_U)
+    nreads_U = base_arr_U[0, :].sum()
     if twoColumns:
-        base_arr_A_n = norm_by_pos( base_arr_A )
-        qual_arr_A_n = norm_by_start( qual_arr_A )
-        nreads_A = base_arr_A[0,:].sum()
-
+        base_arr_A_n = norm_by_pos(base_arr_A)
+        qual_arr_A_n = norm_by_start(qual_arr_A)
+        nreads_A = base_arr_A[0, :].sum()
 
     # **** Make plot ****
 
-    def plot_bases( arr ):
-        xg = numpy.arange( readlen )
-        pyplot.plot( xg, arr[ : , 0 ], marker='.', color='red')
-        pyplot.plot( xg, arr[ : , 1 ], marker='.', color='darkgreen')
-        pyplot.plot( xg, arr[ : , 2 ], marker='.',color='lightgreen')
-        pyplot.plot( xg, arr[ : , 3 ], marker='.',color='orange')
-        pyplot.plot( xg, arr[ : , 4 ], marker='.',color='grey')
-        pyplot.axis( (0, readlen-1, 0, 1 ) )
-        pyplot.text( readlen*.70, .9, "A", color="red" )
-        pyplot.text( readlen*.75, .9, "C", color="darkgreen" )
-        pyplot.text( readlen*.80, .9, "G", color="lightgreen" )
-        pyplot.text( readlen*.85, .9, "T", color="orange" )
-        pyplot.text( readlen*.90, .9, "N", color="grey" )
+    def plot_bases(arr):
+        xg = numpy.arange(readlen)
+        pyplot.plot(xg, arr[:, 0], marker='.', color='red')
+        pyplot.plot(xg, arr[:, 1], marker='.', color='darkgreen')
+        pyplot.plot(xg, arr[:, 2], marker='.', color='lightgreen')
+        pyplot.plot(xg, arr[:, 3], marker='.', color='orange')
+        pyplot.plot(xg, arr[:, 4], marker='.', color='grey')
+        pyplot.axis((0, readlen-1, 0, 1))
+        pyplot.text(readlen*.70, .9, "A", color="red")
+        pyplot.text(readlen*.75, .9, "C", color="darkgreen")
+        pyplot.text(readlen*.80, .9, "G", color="lightgreen")
+        pyplot.text(readlen*.85, .9, "T", color="orange")
+        pyplot.text(readlen*.90, .9, "N", color="grey")
 
     pyplot.figure()
-    pyplot.subplots_adjust( top=.85 )
-    pyplot.suptitle( os.path.basename(readfilename), fontweight='bold' )
+    pyplot.subplots_adjust(top=.85)
+    pyplot.suptitle(os.path.basename(readfilename), fontweight='bold')
 
     if twoColumns:
 
-        pyplot.subplot( 221 )
-        plot_bases( base_arr_U_n )
-        pyplot.ylabel( "proportion of base" )
-        pyplot.title( "non-aligned reads\n%.0f%% (%.3f million)" %
-        ( 100. * nreads_U / (nreads_U+nreads_A), nreads_U / 1e6 ) )
-
-        pyplot.subplot( 222 )
-        plot_bases( base_arr_A_n )
-        pyplot.title( "aligned reads\n%.0f%% (%.3f million)" %
-        ( 100. * nreads_A / (nreads_U+nreads_A), nreads_A / 1e6 ) )
-
-        pyplot.subplot( 223 )
-        pyplot.pcolor( qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
-        pyplot.ylabel( "base-call quality score" )
-
-        pyplot.subplot( 224 )
-        pyplot.pcolor( qual_arr_A_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
+        pyplot.subplot(221)
+        plot_bases(base_arr_U_n)
+        pyplot.ylabel("proportion of base")
+        pyplot.title("non-aligned reads\n%.0f%% (%.4f million)" %
+                     (100. * nreads_U / (nreads_U+nreads_A), nreads_U / 1e6))
+
+        pyplot.subplot(222)
+        plot_bases(base_arr_A_n)
+        pyplot.title("aligned reads\n%.0f%% (%.4f million)" %
+                     (100. * nreads_A / (nreads_U+nreads_A), nreads_A / 1e6))
+
+        pyplot.subplot(223)
+        pyplot.pcolor(qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
+        pyplot.ylabel("base-call quality score")
+
+        pyplot.subplot(224)
+        pyplot.pcolor(qual_arr_A_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
 
     else:
 
-        pyplot.subplot( 211 )
-        plot_bases( base_arr_U_n )
-        pyplot.ylabel( "proportion of base" )
-        pyplot.title( "%.3f million reads" % ( nreads_U / 1e6 ) )
+        pyplot.subplot(211)
+        plot_bases(base_arr_U_n)
+        pyplot.ylabel("proportion of base")
+        pyplot.title("%.3f million reads" % (nreads_U / 1e6))
 
-        pyplot.subplot( 212 )
-        pyplot.pcolor( qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
-        norm=Normalize( 0, 1 ) )
-        pyplot.axis( (0, readlen-1, 0, max_qual+1 ) )
-        pyplot.xlabel( "position in read" )
-        pyplot.ylabel( "base-call quality score" )
+        pyplot.subplot(212)
+        pyplot.pcolor(qual_arr_U_n.T ** gamma, cmap=pyplot.cm.Greens,
+                      norm=Normalize(0, 1))
+        pyplot.axis((0, readlen-1, 0, max_qual+1))
+        pyplot.xlabel("position in read")
+        pyplot.ylabel("base-call quality score")
 
+    pyplot.savefig(outfilename)
 
-    pyplot.savefig( outfilename )
 
 if __name__ == "__main__":
     main()


=====================================
python3/src/HTSeq/_HTSeq.pxd
=====================================
@@ -7,47 +7,47 @@ cdef class GenomicInterval:
    cdef public long end
    cdef str _strand
 
-   cpdef is_contained_in( self, GenomicInterval iv )
-   cpdef contains( self, GenomicInterval iv )
-   cpdef overlaps( self, GenomicInterval iv )
-   cpdef extend_to_include( GenomicInterval self, GenomicInterval iv )
+   cpdef is_contained_in(self, GenomicInterval iv)
+   cpdef contains(self, GenomicInterval iv)
+   cpdef overlaps(self, GenomicInterval iv)
+   cpdef extend_to_include(GenomicInterval self, GenomicInterval iv)
 
 cdef class GenomicPosition( GenomicInterval ):
    pass
 
 
-cdef class Sequence( object ):
+cdef class Sequence(object):
    cdef public bytes seq
    cdef public str name
    cdef public str descr
-   cpdef Sequence get_reverse_complement( self )      
-   cpdef object add_bases_to_count_array( Sequence self, numpy.ndarray count_array_ )
-   cpdef Sequence trim_left_end( Sequence self, Sequence pattern, float mismatch_prop = ? )   
-   cpdef Sequence trim_right_end( Sequence self, Sequence pattern, float mismatch_prop = ? )   
+   cpdef Sequence get_reverse_complement(self, bint rename=?)
+   cpdef object add_bases_to_count_array(Sequence self, numpy.ndarray count_array_)
+   cpdef Sequence trim_left_end(Sequence self, Sequence pattern, float mismatch_prop=?)   
+   cpdef Sequence trim_right_end(Sequence self, Sequence pattern, float mismatch_prop=?)   
    
 
-cdef class SequenceWithQualities( Sequence ):
+cdef class SequenceWithQualities(Sequence):
    cdef readonly bytes _qualstr
    cdef readonly bytes _qualstr_phred
    cdef readonly str _qualscale
    cdef readonly object _qualarr
-   cdef _fill_qual_arr( SequenceWithQualities self )
-   cpdef object add_qual_to_count_array( SequenceWithQualities self, numpy.ndarray count_array_ )
-   cpdef SequenceWithQualities trim_left_end_with_quals( SequenceWithQualities self, 
-         Sequence pattern, int max_mm_qual = ? )
-   cpdef SequenceWithQualities trim_right_end_with_quals( SequenceWithQualities self, 
-         Sequence pattern, int max_mm_qual = ? )
+   cdef _fill_qual_arr(SequenceWithQualities self)
+   cpdef object add_qual_to_count_array(SequenceWithQualities self, numpy.ndarray count_array_)
+   cpdef SequenceWithQualities trim_left_end_with_quals(SequenceWithQualities self, 
+         Sequence pattern, int max_mm_qual=?)
+   cpdef SequenceWithQualities trim_right_end_with_quals(SequenceWithQualities self, 
+         Sequence pattern, int max_mm_qual=?)
 
       
-cdef class Alignment( object ):
+cdef class Alignment(object):
    cdef public SequenceWithQualities _read
    cdef public GenomicInterval iv
    
-cdef class AlignmentWithSequenceReversal( Alignment ):   
+cdef class AlignmentWithSequenceReversal(Alignment):   
    cdef public SequenceWithQualities read_as_aligned
    cdef public SequenceWithQualities _read_as_sequenced
 
-cdef class SAM_Alignment( AlignmentWithSequenceReversal ):
+cdef class SAM_Alignment(AlignmentWithSequenceReversal):
    cdef public list cigar
    cdef public int aQual
    cdef public GenomicPosition mate_start


=====================================
python3/src/HTSeq/_HTSeq.pyx
=====================================
@@ -645,10 +645,15 @@ cdef class Sequence(object):
         self.name = name
         self.descr = None
 
-    cpdef Sequence get_reverse_complement(self):
-        return Sequence(
-            reverse_complement(self.seq),
-            "revcomp_of_" + self.name)
+    cpdef Sequence get_reverse_complement(self, bint rename=True):
+        if rename:
+            return Sequence(
+                reverse_complement(self.seq),
+                "revcomp_of_" + self.name)
+        else:
+            return Sequence(
+                reverse_complement(self.seq),
+                self.name)
 
     def __str__(self):
         return self.seq.decode()
@@ -898,13 +903,20 @@ cdef class SequenceWithQualities(Sequence):
         self.write_to_fastq_file(sio, convert_to_phred)
         return sio.getvalue()
 
-    cpdef SequenceWithQualities get_reverse_complement(self):
+    cpdef SequenceWithQualities get_reverse_complement(self, bint rename=True):
         cdef SequenceWithQualities res
-        res = SequenceWithQualities(
-            reverse_complement(self.seq),
-            "revcomp_of_" + self.name,
-            self._qualstr[::-1],
-            self._qualscale)
+        if rename:
+            res = SequenceWithQualities(
+                reverse_complement(self.seq),
+                "revcomp_of_" + self.name,
+                self._qualstr[::-1],
+                self._qualscale)
+        else:
+            res = SequenceWithQualities(
+                reverse_complement(self.seq),
+                self.name,
+                self._qualstr[::-1],
+                self._qualscale)
         if self._qualarr is not None:
             res._qualarr = self._qualarr[::-1]
         return res



View it on GitLab: https://salsa.debian.org/med-team/htseq/commit/237b3f1303e6f7f84c205db94eb8919765a26a34

-- 
View it on GitLab: https://salsa.debian.org/med-team/htseq/commit/237b3f1303e6f7f84c205db94eb8919765a26a34
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190111/d9cd240f/attachment-0001.html>