[med-svn] [Git][med-team/changeo][master] 2 commits: routine-update: New upstream version

Sun Mar 8 07:25:03 GMT 2020


Andreas Tille pushed to branch master at Debian Med / changeo


Commits:
23e323c9 by Andreas Tille at 2020-03-08T08:03:55+01:00
routine-update: New upstream version

- - - - -
eb1a2bdd by Andreas Tille at 2020-03-08T08:03:55+01:00
New upstream version 0.4.6
- - - - -


16 changed files:

- NEWS.rst
- PKG-INFO
- bin/AlignRecords.py
- bin/AssignGenes.py
- bin/BuildTrees.py
- bin/ConvertDb.py
- bin/CreateGermlines.py
- bin/DefineClones.py
- bin/MakeDb.py
- changeo.egg-info/PKG-INFO
- changeo/Commandline.py
- changeo/Gene.py
- changeo/IO.py
- changeo/Receptor.py
- changeo/Version.py
- debian/changelog


Changes:

=====================================
NEWS.rst
=====================================
@@ -1,6 +1,50 @@
 Release Notes
 ===============================================================================
 
+Version 0.4.6:  July 19, 2019
+-------------------------------------------------------------------------------
+
+BuildTrees:
+
++ Added capability of running IgPhyML on outputted data (``--igphyml``) and
+  support for passing IgPhyML arguments through BuildTrees.
++ Added the ``--clean`` argument to force deletion of all intermediate files
+  after IgPhyML execution.
++ Added the ``--format`` argument to allow specification input and output of
+  either the Change-O standard (``changeo``) or AIRR Rearrangement standard
+  (``airr``).
+
+CreateGermlines:
+
++ Fixed a bug causing incorrect reporting of the germline format in the
+  console log.
+
+ConvertDb:
+
++ Removed requirement for the ``NP1_LENGTH`` and ``NP2_LENGTH`` fields from
+  the genbank subcommand.
+
+DefineClones:
+
++ Fixed a biopython warning arising when applying ``--model aa`` to junction
+  sequences that are not a multiple of three. The junction will now be
+  padded with an appropriate number of Ns (usually resulting in a translation
+  to X).
+
+MakeDb:
+
++ Added the ``--10x`` argument to all subcommands to support merging of
+  Cell Ranger annotation data, such as UMI count and C-region assignment,
+  with the output of the supported alignment tools.
++ Added inference of the receptor locus from the alignment data to all
+  subcommands, which is output in the ``LOCUS`` field.
++ Combined the extended field arguments of all subcommands (``--scores``,
+  ``--regions``, ``--cdr3``, and ``--junction``) into a single ``--extended``
+  argument.
++ Removed parsing of old IgBLAST v1.5 CDR3 fields
+  (``CDR3_IGBLAST``, ``CDR3_IGBLAST_AA``).
+
+
 Version 0.4.5:  January 9, 2019
 -------------------------------------------------------------------------------
 


=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: changeo
-Version: 0.4.5
+Version: 0.4.6
 Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
 Home-page: http://changeo.readthedocs.io
 Author: Namita Gupta, Jason Anthony Vander Heiden


=====================================
bin/AlignRecords.py
=====================================
@@ -320,7 +320,7 @@ def getArgParser():
     subparsers.required = True
 
     # Parent parser
-    parser_parent = getCommonArgParser(format=False, multiproc=True)
+    parser_parent = getCommonArgParser(format=True, multiproc=True)
 
     # Argument parser for column-wise alignment across records
     parser_across = subparsers.add_parser('across', parents=[parser_parent],
@@ -400,12 +400,6 @@ if __name__ == '__main__':
     args = parser.parse_args()
     args_dict = parseCommonArgs(args)
 
-    # Convert case of fields
-    # if 'seq_fields' in args_dict:
-    #     args_dict['seq_fields'] = [f.upper() for f in args_dict['seq_fields']]
-    # if 'group_fields' in args_dict and args_dict['group_fields'] is not None:
-    #     args_dict['group_fields'] = [f.upper() for f in args_dict['group_fields']]
-
     # Check if a valid MUSCLE executable was specified for muscle mode
     if not shutil.which(args.muscle_exec):
         parser.error('%s does not exist or is not executable.' % args.muscle_exec)


=====================================
bin/AssignGenes.py
=====================================
@@ -169,8 +169,8 @@ def getArgParser():
                                     Specifying "airr" will output the AIRR TSV format provided by
                                     the IgBLAST argument "-outfmt 19".''')
     group_igblast.add_argument('--exec', action='store', dest='igblast_exec',
-                              default=default_igblast_exec,
-                              help='Path to the igblastn executable.')
+                               default=default_igblast_exec,
+                               help='Path to the igblastn executable.')
     parser_igblast.set_defaults(func=assignIgBLAST)
 
     return parser


=====================================
bin/BuildTrees.py
=====================================
@@ -9,18 +9,21 @@ from changeo import __version__, __date__
 
 # Imports
 import os
-import sys
+import random
+import subprocess
+import multiprocessing as mp
 from argparse import ArgumentParser
 from collections import OrderedDict
 from textwrap import dedent
 from time import time
 from Bio.Seq import Seq
+from functools import partial
 
 # Presto and changeo imports
 from presto.Defaults import default_out_args
 from presto.IO import  printLog, printMessage, printWarning, printError, printDebug
 from changeo.Defaults import default_format
-from changeo.IO import splitName, getDbFields, getFormatOperators, getOutputHandle
+from changeo.IO import splitName, getDbFields, getFormatOperators, getOutputHandle, getOutputName
 from changeo.Alignment import getRegions
 from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
 
@@ -110,7 +113,7 @@ def checkFrameShifts(receptor, oqpos, ospos, log, debug):
     return frameshifts
 
 
-def findAndMask(receptor, scodons, qcodons, spos, s_end, qpos, log, debug, recursive):
+def findAndMask(receptor, scodons, qcodons, spos, s_end, qpos, log, debug, recursive=False):
     """
     Find and mask split codons
 
@@ -123,7 +126,7 @@ def findAndMask(receptor, scodons, qcodons, spos, s_end, qpos, log, debug, recur
       qpos (int): starting position of input sequence in IMGT sequence
       log (dict): log of information for each sequence
       debug (bool): print debugging statements?
-
+      recursive (bool): was this function called recursively?
     """
     frameshifts = 0
     while spos < s_end and qpos < len(qcodons):
@@ -333,6 +336,7 @@ def deduplicate(useqs, receptors, log=None, meta_data=None, delim=":"):
       receptors (dict): receptors within a clone (index is value in useqs dict).
       log (collections.OrderedDict): log of sequence errors.
       meta_data (str): Field to append to sequence IDs. Splits identical sequences with different meta_data.
+      meta_data (str): Field to append to sequence IDs. Splits identical sequences with different meta_data.
       delim (str): delimited to use when appending meta_data.
 
     Returns:
@@ -357,8 +361,12 @@ def deduplicate(useqs, receptors, log=None, meta_data=None, delim=":"):
             rj = receptors[useqs[kj]]
             dist = unAmbigDist(ski, skj, True)
             m_match = True
-            if meta_data is not None and meta_data[0] != "DUPCOUNT":
-                m_match = ri.getField(meta_data[0]) == rj.getField(meta_data[0])
+            if meta_data is not None:
+                matches = 0
+                for m in meta_data:
+                    if ri.getField(m) == rj.getField(m) and m != "DUPCOUNT":
+                        matches += 1
+                m_match = (matches == len(meta_data))
             if dist == 0 and m_match:
                 ncounti = ki.count("A") + ki.count("T") + ki.count("G") + ki.count("C")
                 ncountj = kj.count("A") + kj.count("T") + kj.count("G") + kj.count("C")
@@ -417,6 +425,44 @@ def hasPTC(sequence):
     return -1
 
 
+
+def rmCDR3(sequences, clones):
+    """
+    Remove CDR3 from all sequences and germline of a clone
+
+     Arguments:
+       sequences (list): list of sequences in clones.
+       clones (list): list of Receptor objects.
+    """
+
+    for i in range(0,len(sequences)):
+        imgtar = clones[i].getField("imgtpartlabels")
+        germline = clones[i].getField("germline_imgt_d_mask")
+        nseq = []
+        nimgtar = []
+        ngermline = []
+        ncdr3 = 0
+        #print("imgtarlen: " + str(len(imgtar)))
+        #print("seqlen: " + str(len(sequences[i])))
+        #print("germline: " + str(len(germline)))
+        #if len(germline) < len(sequences[i]):
+        #    print("\n" + str(clones[i].sequence_id))
+        #    print("\n " + str((sequences[i])) )
+        #    print("\n" + str((germline)))
+        for j in range(0,len(imgtar)):
+            if imgtar[j] != 108:
+                nseq.append(sequences[i][j])
+                if j < len(germline):
+                    ngermline.append(germline[j])
+                nimgtar.append(imgtar[j])
+            else:
+                ncdr3 += 1
+        clones[i].setField("imgtpartlabels",nimgtar)
+        clones[i].setField("germline_imgt_d_mask", "".join(ngermline))
+        sequences[i] = "".join(nseq)
+        #print("Length: " + str(ncdr3))
+
+
 def characterizePartitionErrors(sequences, clones, meta_data):
     """
     Characterize potential mismatches between IMGT labels within a clone
@@ -435,8 +481,8 @@ def characterizePartitionErrors(sequences, clones, meta_data):
     nseqs = len(sequences)
     imgtar = clones[0].getField("imgtpartlabels")
     germline = clones[0].getField("germline_imgt_d_mask")
-
     correctseqs = False
+
     for seqi in range(0, len(sequences)):
         i = sequences[seqi]
         if len(i) != sites or len(clones[seqi].getField("imgtpartlabels")) != len(imgtar):
@@ -498,12 +544,10 @@ def characterizePartitionErrors(sequences, clones, meta_data):
         germline = germline + "N" * (seqdiff)
 
     if sites % 3 != 0:
-        printError("number of sites must be divisible by 3! len: %d, clone: %s , seq: %s" %(len(sequences[0]),\
-        clones[0].clone,sequences[0]))
-
+        printError("number of sites must be divisible by 3! len: %d, clone: %s , id: %s, seq: %s" %(len(sequences[0]),\
+        clones[0].clone,clones[0].sequence_id,sequences[0]))
     return imgtar, germline, sites, nseqs
 
-
 def outputSeqPartFiles(out_dir, useqs_f, meta_data, clones, collapse, nseqs, delim, newgerm, conseqs, duplicate, imgt):
     """
     Create intermediate sequence alignment and partition files for IgPhyML output
@@ -513,7 +557,7 @@ def outputSeqPartFiles(out_dir, useqs_f, meta_data, clones, collapse, nseqs, del
         useqs_f (dict): unique sequences mapped to ids.
         meta_data (str): Field to append to sequence IDs. Splits identical sequences with different meta_data.
         clones (list) : list of receptor objects.
-        collpase (bool) : deduplicate sequences.
+        collapse (bool) : deduplicate sequences.
         nseqs (int): number of sequences.
         delim (str) : delimiter for extracting metadata from ID.
         newgerm (str) : modified germline of clonal lineage.
@@ -521,6 +565,11 @@ def outputSeqPartFiles(out_dir, useqs_f, meta_data, clones, collapse, nseqs, del
         duplicate (bool) : duplicate sequence if only one in a clone.
         imgt (list) : IMGT numbering of clonal positions .
     """
+
+    # bootstrap these data if desired
+    lg = len(newgerm)
+    sites = range(0, lg)
+
     transtable = clones[0].sequence_id.maketrans(" ", "_")
     outfile = os.path.join(out_dir, "%s.fasta" % clones[0].clone)
     with open(outfile, "w") as clonef:
@@ -530,32 +579,37 @@ def outputSeqPartFiles(out_dir, useqs_f, meta_data, clones, collapse, nseqs, del
                 cid = ""
                 if meta_data is not None:
                     seq, cid = seq_f.split(delim)
-                    clones[num].setField(meta_data[0], clones[num].getField(meta_data[0]).replace(":", "_"))
-                    cid = delim + str(clones[num].getField(meta_data[0]))
+                    cid = delim + cid.replace(":", "_")
                 sid = clones[num].sequence_id.translate(transtable) + cid
-                clonef.write(">%s\n%s\n" % (sid, seq.replace(".", "-")))
+                clonef.write(">%s\n%s\n" % (sid.replace(":","-"), seq.replace(".", "-")))
                 if len(useqs_f) == 1 and duplicate:
                     if meta_data is not None:
                         if meta_data[0] == "DUPCOUNT":
                             cid = delim + "0"
                     sid = clones[num].sequence_id.translate(transtable) + "_1" + cid
-                    clonef.write(">%s\n%s\n" % (sid, seq.replace(".", "-")))
+                    clonef.write(">%s\n%s\n" % (sid.replace(":","-"), seq.replace(".", "-")))
         else:
             for j in range(0, nseqs):
                 cid = ""
                 if meta_data is not None:
-                    clones[j].setField(meta_data[0], clones[j].getField(meta_data[0]).replace(":", "_"))
-                    cid = delim+str(clones[j].getField(meta_data[0]))
+                    meta_data_list = []
+                    for m in meta_data:
+                        meta_data_list.append(clones[j].getField(m).replace(":", "_"))
+                    cid = delim + str(delim.join(meta_data_list))
                 sid = clones[j].sequence_id.translate(transtable) + cid
-                clonef.write(">%s\n%s\n" % (sid, conseqs[j].replace(".", "-")))
+                clonef.write(">%s\n%s\n" % (sid.replace(":","-"), conseqs[j].replace(".", "-")))
                 if nseqs == 1 and duplicate:
                     if meta_data is not None:
                         if meta_data[0] == "DUPCOUNT":
                             cid = delim + "0"
                     sid = clones[j].sequence_id.translate(transtable)+"_1" + cid
-                    clonef.write(">%s\n%s\n" % (sid, conseqs[j].replace(".", "-")))
+                    clonef.write(">%s\n%s\n" % (sid.replace(":","-"), conseqs[j].replace(".", "-")))
 
-        clonef.write(">%s_GERM\n" % clones[0].clone)
+        germ_id = ["GERM"]
+        if meta_data is not None:
+            for i in range(1,len(meta_data)):
+                germ_id.append("GERM")
+        clonef.write(">%s_%s\n" % (clones[0].clone,"_".join(germ_id)))
         for i in range(0, len(newgerm)):
             clonef.write("%s" % newgerm[i].replace(".","-"))
         clonef.write("\n")
@@ -572,7 +626,7 @@ def outputSeqPartFiles(out_dir, useqs_f, meta_data, clones, collapse, nseqs, del
         partf.write("\n")
 
 
-def outputIgPhyML(clones, sequences, meta_data=None, collapse=False, logs=None,
+def outputIgPhyML(clones, sequences, meta_data=None, collapse=False, ncdr3=False, logs=None,
                   fail_writer=None, out_dir=None, min_seq=1):
     """
     Create intermediate sequence alignment and partition files for IgPhyML output
@@ -582,6 +636,7 @@ def outputIgPhyML(clones, sequences, meta_data=None, collapse=False, logs=None,
       sequences (list): sequences within the same clone (share indexes with clones parameter).
       meta_data (str): Field to append to sequence IDs. Splits identical sequences with different meta_data
       collapse (bool): if True collapse identical sequences.
+      ncdr3 (bool): if True remove CDR3
       logs (dict): contains log information for each sequence
       out_dir (str): directory for output files.
       fail_writer (changeo.IO.TSVWriter): failed sequences writer object.
@@ -624,14 +679,36 @@ def outputIgPhyML(clones, sequences, meta_data=None, collapse=False, logs=None,
     elif len(lcodon) == 1:
         newgerm[-1] = newgerm[-1] + "NN"
 
+    if ncdr3:
+        ngerm = []
+        nimgt = []
+        for i in range(0, len(newseqs)):
+            nseq = []
+            ncdr3 = 0
+            for j in range(0, len(imgt)):
+                if imgt[j] != 108:
+                    nseq.append(newseqs[i][j])
+                    if i == 0:
+                        ngerm.append(newgerm[j])
+                        nimgt.append(imgt[j])
+                else:
+                    ncdr3 += 1
+            newseqs[i] = nseq
+        newgerm = ngerm
+        imgt = nimgt
+            #print("Length: " + str(ncdr3))
+
     useqs_f = OrderedDict()
     conseqs = []
     for j in range(0, nseqs):
         conseq = "".join([str(seq_rec) for seq_rec in newseqs[j]])
         if meta_data is not None:
-            if isinstance(clones[j].getField(meta_data[0]), str):
-                clones[j].setField(meta_data[0],clones[j].getField(meta_data[0]).replace("_", ""))
-            conseq_f = "".join([str(seq_rec) for seq_rec in newseqs[j]])+delim+str(clones[j].getField(meta_data[0]))
+            meta_data_list = []
+            for m in range(0,len(meta_data)):
+                if isinstance(clones[j].getField(meta_data[m]), str):
+                    clones[j].setField(meta_data[m],clones[j].getField(meta_data[m]).replace("_", ""))
+                meta_data_list.append(str(clones[j].getField(meta_data[m])))
+            conseq_f = "".join([str(seq_rec) for seq_rec in newseqs[j]])+delim+":".join(meta_data_list)
         else:
             conseq_f = conseq
         if conseq_f in useqs_f and collapse:
@@ -679,9 +756,12 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer):
       cloneseqs (list): list of masked clone sequences.
       logs (dict): contains log information for each sequence.
       fails (dict): counts of various sequence processing failures.
+      out_args (dict): arguments for output preferences.
+      fail_writer (changeo.IO.TSVWriter): failed sequences writer object.
 
     Returns:
-      None: returns None if an error occurs.
+      0: returns 0 if an error occurs or masking fails.
+      1: returns 1 masking succeeds
     """
     if r.clone is None:
         printError("Cannot export datasets until sequences are clustered into clones.")
@@ -703,13 +783,13 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer):
         logs[r.sequence_id]["FAIL"] = "Germline PTC"
         fails["seq_fail"] += 1
         fails["germlineptc"] += 1
-        return None
+        return 0
 
     if r.functional and ptcs < 0:
         #If IMGT regions are provided, record their positions
         regions = getRegions(r.sequence_imgt, r.junction_length)
         #print(regions["cdr1_imgt"]+regions["fwr4_imgt"])
-        if regions["cdr3_imgt"] is not "":
+        if regions["cdr3_imgt"] is not "" and regions["cdr3_imgt"] is not None:
             simgt = regions["fwr1_imgt"] + regions["cdr1_imgt"] + regions["fwr2_imgt"] + regions["cdr2_imgt"] + \
                     regions["fwr3_imgt"] + regions["cdr3_imgt"] + regions["fwr4_imgt"]
             if len(simgt) < len(r.sequence_imgt):
@@ -733,12 +813,11 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer):
                 logs[r.sequence_id]["FWRCDRSEQ"] = simgt
                 fails["seq_fail"] += 1
                 fails["region_fail"] += 1
-                return None
+                return 0
         else:
             #imgt_warn = "\n! IMGT FWR/CDR sequence columns not detected.\n! Cannot run CDR/FWR partitioned model on this data.\n"
             imgtpartlabels = [0] * len(r.sequence_imgt)
             r.setField("imgtpartlabels", imgtpartlabels)
-        #print(r.sequence_imgt)
         mout = maskSplitCodons(r)
         mask_seq = mout[0]
         ptcs = hasPTC(mask_seq)
@@ -755,6 +834,7 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer):
             else:
                 clones[r.clone] = [r]
                 cloneseqs[r.clone] = [mask_seq]
+            return 1
         else:
             if out_args["failed"]:
                 fail_writer.writeReceptor(r)
@@ -779,16 +859,140 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer):
         fails["seq_fail"] += 1
         fails["nf_fail"] += 1
 
+    return 0
+
+# Run IgPhyML on outputed data
+def runIgPhyML(outfile, igphyml_out, clone_dir, nproc=1, optimization="lr", omega="e,e", kappa="e", motifs="FCH",
+               hotness="e,e,e,e,e,e",oformat="tab", nohlp=False, clean="none"):
+    """
+    Run IgPhyML on outputted data
+
+    Arguments:
+      outfile (str): Output file name.
+      igphymlout (str): igphyml output file
+      nproc (int): Number of threads to parallelize IgPhyML across
+      optimization (str): Optimize combination of topology (t) branch lengths (l) and parameters (r) in IgPhyML.
+      omega (str): omega optimization in IgPhyML (--omega)
+      kappa (str): kappa optimization in IgPhyML (-t)
+      motifs (str): motifs to use in IgPhyML (--motifs)
+      hotness (str): motif in IgPhyML (--hotness)
+      oformat (str): output format for IgPhyML (tab or txt)
+      nohlp (bool): If True, only estimate GY94 trees and parameters
+      clean (str): delete intermediate files? (none, all)
+    """
+    osplit = outfile.split(".")
+    outrep = ".".join(osplit[0:(len(osplit)-1)]) + "_gy.tsv"
+    gyout = outfile + "_igphyml_stats_gy.txt"
+
+    gy_args = ["igphyml", "--repfile", outfile, "-m", "GY", "--run_id", "gy", "--outrep", outrep, "--threads",
+               str(nproc),"--outname",gyout]
+
+
+    hlp_args = ["igphyml","--repfile", outrep, "-m", "HLP", "--run_id", "hlp", "--threads", str(nproc), "-o",
+                optimization, "--omega", omega, "-t", kappa, "--motifs", motifs, "--hotness", hotness, "--oformat",
+                oformat, "--outname", igphyml_out]
+
+    log = OrderedDict()
+    log["START"] = "IgPhyML GY94 tree estimation"
+    printLog(log)
+
+    try: #check for igphyml executable
+        subprocess.check_output(["igphyml"])
+    except:
+        printError("igphyml not found :-/")
+    try: #get GY94 starting topologies
+        p = subprocess.check_output(gy_args)
+    except subprocess.CalledProcessError as e:
+        print(" ".join(gy_args))
+        print('error>', e.output, '<')
+        printError("GY94 tree building in IgPhyML failed")
+
+    log = OrderedDict()
+    log["START"] = "IgPhyML HLP analysis"
+    log["OPTIMIZE"] = optimization
+    log["TS/TV"] = kappa
+    log["wFWR,wCDR"] = omega
+    log["MOTIFS"] = motifs
+    log["HOTNESS"] = hotness
+    log["NPROC"] = nproc
+    printLog(log)
+
+    if not nohlp:
+        try: #estimate HLP parameters/trees
+            p = subprocess.check_output(hlp_args)
+        except subprocess.CalledProcessError as e:
+            print(" ".join(hlp_args))
+            print('error>', e.output, '<')
+            printError("HLP tree building failed")
+
+    log = OrderedDict()
+    log["OUTPUT"] = igphyml_out
+    if oformat == "tab":
+        igf = open(igphyml_out)
+        names = igf.readline().split("\t")
+        vals = igf.readline().split("\t")
+        for i in range(3,len(names)-1):
+            log[names[i]] = round(float(vals[i]),2)
+    printLog(log)
+
+    if clean != "none":
+        log = OrderedDict()
+        log["START"] = "CLEANING"
+        log["SCOPE"] = clean
+        printLog(log)
+        todelete = open(outrep)
+        for line in todelete:
+            line = line.rstrip("\n")
+            line = line.rstrip("\r")
+            lsplit = line.split("\t")
+            if len(lsplit) == 4:
+                os.remove(lsplit[0])
+                os.remove(lsplit[1])
+                os.remove(lsplit[3])
+        todelete.close()
+        os.remove(outrep)
+        os.remove(outfile)
+        os.remove(gyout)
+        cilog = outrep + "_igphyml_CIlog.txt_hlp"
+        if os.path.isfile(cilog):
+            os.remove(cilog)
+        if oformat == "tab":
+            os.rmdir(clone_dir)
+        else:
+            printWarning("Using --clean all with --oformat txt will delete all tree file results.\n"
+                         "You'll have to do that yourself.")
+        log = OrderedDict()
+        log["END"] = "IgPhyML analysis"
+        printLog(log)
+
 
 # Note: Collapse can give misleading dupcount information if some sequences have ambiguous characters at polymorphic sites
-def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=default_format, out_args=default_out_args):
+def buildTrees(db_file, meta_data=None, target_clones=None, collapse=False, ncdr3=False, sample_depth=-1, min_seq=1,append=None,
+               igphyml=False, nproc=1, optimization="lr", omega="e,e", kappa="e", motifs="FCH",
+               hotness="e,e,e,e,e,e", oformat="tab", clean="none", nohlp=False,
+               format=default_format, out_args=default_out_args):
     """
     Masks codons split by alignment to IMGT reference, then produces input files for IgPhyML
 
     Arguments:
       db_file (str): input tab-delimited database file.
       meta_data (str): Field to append to sequence IDs. Splits identical sequences with different meta_data
+      target_clones (str): List of clone IDs to analyze.
       collapse (bool): if True collapse identical sequences.
+      ncdr3 (bool): if True remove all CDR3s.
+      sample_depth (int): depth of subsampling before deduplication
+      min_seq (int): minimum number of sequences per clone
+      append (str): column name to append to sequence_id
+      igphyml (bool): If True, run IgPhyML on outputted data
+      nproc (int) : Number of threads to parallelize IgPhyML across
+      optimization (str): Optimize combination of topology (t) branch lengths (l) and parameters (r) in IgPhyML.
+      omega (str): omega optimization in IgPhyML (--omega)
+      kappa (str): kappa optimization in IgPhyML (-t)
+      motifs (str): motifs to use in IgPhyML (--motifs)
+      hotness (str): motif in IgPhyML (--hotness)
+      oformat (str): output format for IgPhyML (tab or txt)
+      clean (str): delete intermediate files? (none, all)
+      nohlp (bool): If True, only estimate GY94 trees and parameters
       format (str): input and output format.
       out_args (dict): arguments for output preferences.
 
@@ -803,19 +1007,27 @@ def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=defaul
     printLog(log)
 
     # Open output files
+    out_label = "lineages"
+
     pass_handle = getOutputHandle(db_file,
-                                  out_label="lineages",
+                                  out_label=out_label,
                                   out_dir=out_args["out_dir"],
-                                  out_name=out_args["out_name"],
+                                  out_name= out_args["out_name"],
                                   out_type="tsv")
 
+    igphyml_out = None
+    if igphyml:
+        igphyml_out = getOutputName(db_file, out_label="igphyml-pass",
+                                    out_dir=out_args["out_dir"],
+                                    out_name=out_args["out_name"],
+                                    out_type=oformat)
+
     dir_name, __ = os.path.split(pass_handle.name)
 
     if out_args["out_name"] is None:
         __, clone_name, __ = splitName(db_file)
     else:
         clone_name = out_args["out_name"]
-    # clone_dir = outdir/out_name
     if dir_name is None:
         clone_dir = clone_name
     else:
@@ -853,8 +1065,52 @@ def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=defaul
     # Mask codons split by indels
     start_time = time()
     printMessage("Correcting frames and indels of sequences", start_time=start_time, width=50)
+
+    #subsampling loop
+    init_clone_sizes = {}
+    big_enough = []
+    all_records = []
+    found_no_funct = False
     for r in records:
-        maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer)
+        if r.functional is None:
+            r.functional = True
+            if found_no_funct is False:
+                printWarning("FUNCTIONAL column not found.")
+                found_no_funct = True
+        all_records.append(r)
+        if r.clone in init_clone_sizes:
+            init_clone_sizes[r.clone] += 1
+        else:
+            init_clone_sizes[r.clone] = 1
+
+    for r in all_records:
+        if target_clones is None or r.clone in target_clones:
+            if init_clone_sizes[r.clone] >= min_seq:
+               big_enough.append(r)
+
+    if len(big_enough) == 0:
+        printError("\n\nNo sequences found that match specified criteria.",1)
+
+    if sample_depth > 0:
+        random.shuffle(big_enough)
+
+    total = 0
+    for r in big_enough:
+        if r.functional is None:
+            r.functional = True
+            if found_no_funct is False:
+                printWarning("FUNCTIONAL column not found.")
+                found_no_funct = True
+
+        r.sequence_id = r.sequence_id.replace(",","-") #remove commas from sequence ID
+        r.sequence_id = r.sequence_id.replace(":","-") #remove colons from sequence ID
+        if append is not None:
+            if append is not None:
+                for m in append:
+                    r.sequence_id = r.sequence_id + "_" + r.getField(m)
+        total += maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer)
+        if total == sample_depth:
+            break
 
     # Start processing clones
     clonesizes = {}
@@ -868,7 +1124,9 @@ def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=defaul
             clonesizes[str(k)] = -len(cloneseqs[str(k)])
         else:
             clonesizes[str(k)] = outputIgPhyML(clones[str(k)], cloneseqs[str(k)], meta_data=meta_data, collapse=collapse,
-                                           logs=logs, fail_writer=fail_writer, out_dir=clone_dir, min_seq=min_seq)
+                                           ncdr3=ncdr3, logs=logs, fail_writer=fail_writer,
+                                           out_dir=clone_dir, min_seq=min_seq)
+
         #If clone is too small, size is returned as a negative
         if clonesizes[str(k)] > 0:
             nclones += 1
@@ -893,7 +1151,11 @@ def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=defaul
         outfile = os.path.join(clone_dir, "%s.fasta" % key)
         partfile = os.path.join(clone_dir, "%s.part.txt" % key)
         if clonesizes[key] > 0:
-            pass_handle.write("%s\t%s\t%s\t%s\n" % (outfile, "N", key+"_GERM", partfile))
+            germ_id = ["GERM"]
+            if meta_data is not None:
+                for i in range(1, len(meta_data)):
+                    germ_id.append("GERM")
+            pass_handle.write("%s\t%s\t%s_%s\t%s\n" % (outfile, "N", key,"_".join(germ_id), partfile))
 
     handle.close()
     output = {"pass": None, "fail": None}
@@ -925,8 +1187,15 @@ def buildTrees(db_file, meta_data=None, collapse=False, min_seq=1, format=defaul
     log["END"] = "BuildTrees"
     printLog(log)
 
+    #Run IgPhyML on outputted data?
+    if igphyml:
+        runIgPhyML(pass_handle.name, igphyml_out=igphyml_out, clone_dir=clone_dir, nproc=nproc,
+                   optimization=optimization, omega=omega, kappa=kappa, motifs=motifs,
+                   hotness=hotness, oformat=oformat, nohlp=nohlp,clean=clean)
+
     return output
 
+
 def getArgParser():
     """
     Defines the ArgumentParser
@@ -944,29 +1213,73 @@ def getArgParser():
                     successfully processed records.
                  lineages-fail
                     database records failed processing.
+                 igphyml-pass
+                    parameter estimates and lineage trees from running IgPhyML, if specified
 
              required fields:
                  SEQUENCE_ID, SEQUENCE_INPUT, SEQUENCE_IMGT,
-                 GERMLINE_IMGT_D_MASK, V_CALL, J_CALL
+                 GERMLINE_IMGT_D_MASK, V_CALL, J_CALL, CLONE,
+                 V_SEQ_START
               """)
 
     # Parent parser
-    parser_parent = getCommonArgParser(out_file=False, log=True, format=False)
+    parser_parent = getCommonArgParser(out_file=False, log=True, format=True)
 
     # Define argument parser
     parser = ArgumentParser(description=__doc__, epilog=fields,
                             parents=[parser_parent],
                             formatter_class=CommonHelpFormatter, add_help=False)
 
-    group = parser.add_argument_group("tree building arguments")
+    group = parser.add_argument_group("sequence processing arguments")
     group.add_argument("--collapse", action="store_true", dest="collapse",
                         help="""If specified, collapse identical sequences before exporting to fasta.""")
+    group.add_argument("--ncdr3", action="store_true", dest="ncdr3",
+                        help="""If specified, remove CDR3 from all sequences.""")
     group.add_argument("--md", nargs="+", action="store", dest="meta_data",
                        help="""List of fields to containing metadata to include in output fasta file 
                             sequence headers.""")
+    group.add_argument("--clones", nargs="+", action="store", dest="target_clones",
+                       help="""List of clone IDs to output, if specified.""")
     group.add_argument("--minseq", action="store", dest="min_seq", type=int, default=1,
                        help="""Minimum number of data sequences. Any clones with fewer than the specified
                             number of sequences will be excluded.""")
+    group.add_argument("--sample", action="store", dest="sample_depth", type=int, default=-1,
+                       help="""Depth of reads to be subsampled (before deduplication).""")
+    group.add_argument("--append", nargs="+", action="store", dest="append",
+                       help="""List of columns to append to sequence ID to ensure uniqueness.""")
+
+    igphyml_group = parser.add_argument_group("IgPhyML arguments (see igphyml -h for details)")
+    igphyml_group.add_argument("--igphyml", action="store_true", dest="igphyml",
+                               help="""Run IgPhyML on output?""")
+    igphyml_group.add_argument("--nproc", action="store", dest="nproc", type=int, default=1,
+                               help="""Number of threads to parallelize IgPhyML across.""")
+    igphyml_group.add_argument("--clean", action="store", choices=("none", "all"),
+                               dest="clean", type=str, default="none",
+                               help="""Delete intermediate files? 
+                               none: leave all intermediate files; all: delete all intermediate files.""")
+    igphyml_group.add_argument("--optimize", action="store", dest="optimization", type=str, default="lr",
+                               choices=("n","r","l","lr","tl","tlr"),
+                               help="""Optimize combination of topology (t) branch lengths (l) and parameters (r), or 
+                               nothing (n), for IgPhyML.""")
+    igphyml_group.add_argument("--omega", action="store", dest="omega", type=str, default="e,e",
+                               choices = ("e", "ce", "e,e", "ce,e", "e,ce", "ce,ce"),
+                               help="""Omega parameters to estimate for FWR,CDR respectively: 
+                               e = estimate, ce = estimate + confidence interval""")
+    igphyml_group.add_argument("-t", action="store", dest="kappa", type=str, default="e",
+                               choices=("e", "ce"),
+                               help="""Kappa parameters to estimate: 
+                               e = estimate, ce = estimate + confidence interval""")
+    igphyml_group.add_argument("--motifs", action="store", dest="motifs", type=str,
+                               default="WRC_2:0,GYW_0:1,WA_1:2,TW_0:3,SYC_2:4,GRS_0:5",
+                               help="""Which motifs to estimate mutability.""")
+    igphyml_group.add_argument("--hotness", action="store", dest="hotness", type=str, default="e,e,e,e,e,e",
+                               help="""Mutability parameters to estimate: 
+                               e = estimate, ce = estimate + confidence interval""")
+    igphyml_group.add_argument("--oformat", action="store", dest="oformat", type=str, default="tab",
+                               choices=("tab", "txt"),
+                               help="""IgPhyML output format.""")
+    igphyml_group.add_argument("--nohlp", action="store_true", dest="nohlp",
+                               help="""Don't run HLP model?""")
 
     return parser
 
@@ -985,4 +1298,4 @@ if __name__ == "__main__":
     # Call main for each input file
     for f in args.__dict__["db_files"]:
         args_dict["db_file"] = f
-        buildTrees(**args_dict)
+        buildTrees(**args_dict)
\ No newline at end of file


=====================================
bin/ConvertDb.py
=====================================
@@ -534,7 +534,7 @@ def convertToFasta(db_file, id_field=default_id_field, seq_field=default_seq_fie
 
 
 def makeGenbankFeatures(record, start=None, end=None, product=default_product,
-                        inference=None, db_xref=default_db_xref,
+                        inference=None, db_xref=None,
                         c_field=None, allow_stop=False, asis_calls=False,
                         allele_delim=default_allele_delim):
     """
@@ -616,8 +616,9 @@ def makeGenbankFeatures(record, start=None, end=None, product=default_product,
     c_region_length = len(record.sequence_input[(c_region_start + start_trim - 1):]) - end_trim
     if c_region_length > 0:
         if c_gene is not None:
-            c_region = [('gene', c_gene),
-                        ('db_xref', '%s:%s' % (db_xref, c_gene))]
+            c_region = [('gene', c_gene)]
+            if db_xref is not None:
+                c_region.append(('db_xref', '%s:%s' % (db_xref, c_gene)))
         else:
             c_region = []
 
@@ -838,8 +839,7 @@ def convertToGenbank(db_file, inference=None, db_xref=None, molecule=default_mol
     try:
         required = ['sequence_input',
                     'v_call', 'd_call', 'j_call',
-                    'v_seq_start', 'd_seq_start', 'j_seq_start',
-                    'np1_length', 'np2_length']
+                    'v_seq_start', 'd_seq_start', 'j_seq_start']
         checkFields(required, db_iter.fields, schema=schema)
     except LookupError as e:
         printError(e)
@@ -1050,77 +1050,78 @@ def getArgParser():
                                        description='Creates files for GenBank/TLS submissions.')
     # Genbank source information arguments
     group_gb_src = parser_gb.add_argument_group('source information arguments')
+    group_gb_src.add_argument('--mol', action='store', dest='molecule', default=default_molecule,
+                              help='''The source molecule type. Usually one of "mRNA" or "genomic DNA".''')
     group_gb_src.add_argument('--product', action='store', dest='product', default=default_product,
-                          help='''The product name, such as "immunoglobulin heavy chain".''')
+                              help='''The product name, such as "immunoglobulin heavy chain".''')
+    group_gb_src.add_argument('--db', action='store', dest='db_xref', default=None,
+                              help='''Name of the reference database used for alignment. 
+                                   Usually "IMGT/GENE-DB".''')
     group_gb_src.add_argument('--inf', action='store', dest='inference', default=None,
-                          help='''Name and version of the inference tool used for reference alignment in the 
-                               form tool:version.''')
-    group_gb_src.add_argument('--db', action='store', dest='db_xref', default=default_db_xref,
-                          help='Name of the reference database used for alignment.')
-    group_gb_src.add_argument('--mol', action='store', dest='molecule', default=default_molecule,
-                          help='''The source molecule type. Usually one of "mRNA" or "genomic DNA".''')
+                              help='''Name and version of the inference tool used for reference alignment in the 
+                                   form tool:version.''')
     # Genbank sample information arguments
     group_gb_sam = parser_gb.add_argument_group('sample information arguments')
     group_gb_sam.add_argument('--organism', action='store', dest='organism', default=None,
-                          help='The scientific name of the organism.')
+                              help='The scientific name of the organism.')
     group_gb_sam.add_argument('--sex', action='store', dest='sex', default=None,
-                          help='''If specified, adds the given sex annotation 
-                               to the fasta headers.''')
+                              help='''If specified, adds the given sex annotation 
+                                   to the fasta headers.''')
     group_gb_sam.add_argument('--isolate', action='store', dest='isolate', default=None,
-                          help='''If specified, adds the given isolate annotation 
-                               (sample label) to the fasta headers.''')
+                              help='''If specified, adds the given isolate annotation 
+                                   (sample label) to the fasta headers.''')
     group_gb_sam.add_argument('--tissue', action='store', dest='tissue', default=None,
-                          help='''If specified, adds the given tissue-type annotation 
-                               to the fasta headers.''')
+                              help='''If specified, adds the given tissue-type annotation 
+                                   to the fasta headers.''')
     group_gb_sam.add_argument('--cell-type', action='store', dest='cell_type', default=None,
-                          help='''If specified, adds the given cell-type annotation 
-                               to the fasta headers.''')
+                              help='''If specified, adds the given cell-type annotation 
+                                   to the fasta headers.''')
     group_gb_sam.add_argument('-y', action='store', dest='yaml_config', default=None,
-                          help='''A yaml file specifying sample features (BioSample attributes) 
-                               in the form \'variable: value\'. If specified, any features provided in the 
-                               yaml file will override those provided at the commandline. Note,
-                               this config file applies to sample features only and
-                               cannot be used for required source features such as 
-                               the --product or --mol argument.''')
+                              help='''A yaml file specifying sample features (BioSample attributes) 
+                                   in the form \'variable: value\'. If specified, any features provided in the 
+                                   yaml file will override those provided at the commandline. Note,
+                                   this config file applies to sample features only and
+                                   cannot be used for required source features such as 
+                                   the --product or --mol argument.''')
     # General genbank conversion arguments
     group_gb_cvt = parser_gb.add_argument_group('conversion arguments')
     group_gb_cvt.add_argument('--label', action='store', dest='label', default=None,
                               help='''If specified, add a field name to the sequence identifier. 
-                                Sequence identifiers will be output in the form <label>=<id>.''')
+                                   Sequence identifiers will be output in the form <label>=<id>.''')
     group_gb_cvt.add_argument('--cf', action='store', dest='c_field', default=None,
                               help='''Field containing the C region call. If unspecified, the C region gene 
-                                call will be excluded from the feature table.''')
+                                   call will be excluded from the feature table.''')
     group_gb_cvt.add_argument('--nf', action='store', dest='count_field', default=None,
                               help='''If specified, use the provided column to add the AIRR_READ_COUNT 
-                                note to the feature table.''')
+                                   note to the feature table.''')
     group_gb_cvt.add_argument('--if', action='store', dest='index_field', default=None,
                               help='''If specified, use the provided column to add the AIRR_CELL_INDEX 
-                                note to the feature table.''')
+                                   note to the feature table.''')
     group_gb_cvt.add_argument('--allow-stop', action='store_true', dest='allow_stop',
                               help='''If specified, retain records in the output with stop codons in the junction region.
-                                In such records the CDS will be removed and replaced with a similar misc_feature in 
-                                the feature table.''')
+                                   In such records the CDS will be removed and replaced with a similar misc_feature in 
+                                   the feature table.''')
     group_gb_cvt.add_argument('--asis-id', action='store_true', dest='asis_id',
                               help='''If specified, use the existing sequence identifier for the output identifier. 
-                                By default, only the row number will be used as the identifier to avoid
-                                the 50 character limit.''')
+                                   By default, only the row number will be used as the identifier to avoid
+                                   the 50 character limit.''')
     group_gb_cvt.add_argument('--asis-calls', action='store_true', dest='asis_calls',
                               help='''Specify to prevent alleles from being parsed using the IMGT nomenclature.
-                                Note, this requires the gene assignments to be exact matches to valid 
-                                records in the references database specified by the --db argument.''')
+                                   Note, this requires the gene assignments to be exact matches to valid 
+                                   records in the references database specified by the --db argument.''')
     group_gb_cvt.add_argument('--allele-delim', action='store', dest='allele_delim', default=default_allele_delim,
                               help='''The delimiter to use for splitting the gene name from the allele number.
-                                Note, this only applies when specifying --asis-calls. By default,
-                                this argument will be ignored and allele numbers extracted under the
-                                expectation of IMGT nomenclature consistency.''')
+                                   Note, this only applies when specifying --asis-calls. By default,
+                                   this argument will be ignored and allele numbers extracted under the
+                                   expectation of IMGT nomenclature consistency.''')
     group_gb_cvt.add_argument('--asn', action='store_true', dest='build_asn',
-                          help='''If specified, run tbl2asn to generate the .sqn submission file after making 
-                               the .fsa and .tbl files.''')
+                              help='''If specified, run tbl2asn to generate the .sqn submission file after making 
+                                   the .fsa and .tbl files.''')
     group_gb_cvt.add_argument('--sbt', action='store', dest='asn_template', default=None,
-                          help='''If provided along with --asn, use the specified file for the template file
-                               argument to tbl2asn.''')
+                              help='''If provided along with --asn, use the specified file for the template file
+                                   argument to tbl2asn.''')
     group_gb_cvt.add_argument('--exec', action='store', dest='tbl2asn_exec', default=default_tbl2asn_exec,
-                          help='The name or location of the tbl2asn executable.')
+                              help='The name or location of the tbl2asn executable.')
     parser_gb.set_defaults(func=convertToGenbank)
 
     return parser


=====================================
bin/CreateGermlines.py
=====================================
@@ -20,13 +20,14 @@ from presto.Defaults import default_out_args
 from presto.IO import printLog, printMessage, printProgress, printError, printWarning
 from changeo.Defaults import default_v_field, default_d_field, default_j_field, default_clone_field, \
                              default_seq_field, default_format
-from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs, \
+                                setDefaultFields
 from changeo.Gene import buildGermline, buildClonalGermline
 from changeo.IO import countDbFile, getDbFields, getFormatOperators, getOutputHandle, readGermlines, \
                        checkFields
 
 # Defaults
-default_germ_types = 'dmask'
+default_germ_types = ['dmask']
 
 
 def createGermlines(db_file, references, seq_field=default_seq_field, v_field=default_v_field,
@@ -276,7 +277,7 @@ def getArgParser():
               ''')
     # Define argument parser
     parser = ArgumentParser(description=__doc__, epilog=fields,
-                            parents=[getCommonArgParser(format=False)],
+                            parents=[getCommonArgParser(format=True)],
                             formatter_class=CommonHelpFormatter, add_help=False)
 
     # Germlines arguments
@@ -298,14 +299,18 @@ def getArgParser():
                              calls are ambiguous within a clonal group, this will place the germline call 
                              used for the entire clone within the
                              GERMLINE_V_CALL, GERMLINE_D_CALL and GERMLINE_J_CALL fields.''')
-    group.add_argument('--sf', action='store', dest='seq_field', default=default_seq_field,
-                        help='Field containing the aligned sequence.')
-    group.add_argument('--vf', action='store', dest='v_field', default=default_v_field,
-                        help='Field containing the germline V segment call.')
-    group.add_argument('--df', action='store', dest='d_field', default=default_d_field,
-                        help='Field containing the germline D segment call.')
-    group.add_argument('--jf', action='store', dest='j_field', default=default_j_field,
-                        help='Field containing the germline J segment call.')
+    group.add_argument('--sf', action='store', dest='seq_field', default=None,
+                        help='''Field containing the aligned sequence.
+                             Defaults to SEQUENCE_IMGT (changeo) or sequence_alignment (airr).''')
+    group.add_argument('--vf', action='store', dest='v_field', default=None,
+                        help='''Field containing the germline V segment call.
+                             Defaults to V_CALL (changeo) or v_call (airr).''')
+    group.add_argument('--df', action='store', dest='d_field', default=None,
+                        help='''Field containing the germline D segment call.
+                             Defaults to D_CALL (changeo) or d_call (airr).''')
+    group.add_argument('--jf', action='store', dest='j_field', default=None,
+                        help='''Field containing the germline J segment call.
+                             Defaults to J_CALL (changeo) or j_call (airr).''')
 
     return parser
 
@@ -321,23 +326,12 @@ if __name__ == '__main__':
     args = parser.parse_args()
     args_dict = parseCommonArgs(args)
 
-    # # Set default fields if not specified.
-    # default_fields = {'seq_field': default_seq_field,
-    #                   'v_field': default_v_field,
-    #                   'd_field': default_d_field,
-    #                   'j_field': default_j_field}
-    #
-    # # Default Change-O fields
-    # if args_dict['format'] == 'changeo':
-    #     for f in default_fields:
-    #         if args_dict[f] is None:  args_dict[f] = default_fields[f]
-    #         else: args_dict[f] = args_dict[f].upper()
-    #
-    # # Default AIRR fields
-    # if args_dict['format'] == 'airr':
-    #     for f in default_fields:
-    #         if args_dict[f] is None:  args_dict[f] = ChangeoSchema.fromReceptor(default_fields[f])
-    #         else: args_dict[f] = args_dict[f].lower()
+    # Set default fields
+    default_fields = {'seq_field': default_seq_field,
+                      'v_field': default_v_field,
+                      'd_field': default_d_field,
+                      'j_field': default_j_field}
+    args_dict = setDefaultFields(args_dict, default_fields, format=args_dict['format'])
 
     # Check that reference files exist
     for f in args_dict['references']:


=====================================
bin/DefineClones.py
=====================================
@@ -23,7 +23,8 @@ from presto.Defaults import default_out_args
 from presto.IO import printLog, printProgress, printCount, printWarning, printError
 from presto.Multiprocessing import manageProcesses
 from changeo.Defaults import default_format, default_v_field, default_j_field, default_junction_field
-from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs, \
+                                setDefaultFields
 from changeo.Distance import distance_models, calcDistances, formClusters
 from changeo.IO import countDbFile, getDbFields, getFormatOperators, getOutputHandle, \
                        AIRRWriter, ChangeoWriter, checkFields
@@ -296,7 +297,11 @@ def distanceClones(result, seq_field=default_junction_field, model=default_dista
     for rec in result.data_pass:
         seq = rec.getField(seq_field)
         seq = re.sub('[\.-]', 'N', seq)
-        if model == 'aa':  seq = translate(seq)
+        # Translate sequence for amino acid model
+        if model == 'aa':
+            # Check for valid translation
+            if len(seq) % 3 > 0:  seq = seq + 'N' * (3 - len(seq) % 3)
+            seq = translate(seq)
         seq_map.setdefault(seq, []).append(rec)
 
     # Define sequences
@@ -618,17 +623,20 @@ def getArgParser():
               ''')
     # Define argument parser
     parser = ArgumentParser(description=__doc__, epilog=fields,
-                            parents=[getCommonArgParser(format=False, multiproc=True)],
+                            parents=[getCommonArgParser(format=True, multiproc=True)],
                             formatter_class=CommonHelpFormatter, add_help=False)
 
     # Distance cloning method
     group = parser.add_argument_group('cloning arguments')
-    group.add_argument('--sf', action='store', dest='seq_field', default=default_junction_field,
-                        help='Field to be used to calculate distance between records.')
-    group.add_argument('--vf', action='store', dest='v_field', default=default_v_field,
-                        help='Field containing the germline V segment call.')
-    group.add_argument('--jf', action='store', dest='j_field', default=default_j_field,
-                        help='Field containing the germline J segment call.')
+    group.add_argument('--sf', action='store', dest='seq_field', default=None,
+                        help='''Field to be used to calculate distance between records.
+                              Defaults to JUNCTION (changeo) or junction (airr).''')
+    group.add_argument('--vf', action='store', dest='v_field', default=None,
+                        help='''Field containing the germline V segment call.
+                             Defaults to V_CALL (changeo) or v_call (airr).''')
+    group.add_argument('--jf', action='store', dest='j_field', default=None,
+                        help='''Field containing the germline J segment call.
+                             Defaults to J_CALL (changeo) or j_call (airr).''')
     group.add_argument('--gf', nargs='+', action='store', dest='group_fields', default=None,
                         help='Additional fields to use for grouping clones aside from V, J and junction length.')
     group.add_argument('--mode', action='store', dest='mode',
@@ -693,22 +701,11 @@ if __name__ == '__main__':
     args = parser.parse_args()
     args_dict = parseCommonArgs(args)
 
-    # # Set default fields if not specified.
-    # default_fields = {'seq_field': default_junction_field,
-    #                   'v_field': default_v_field,
-    #                   'j_field': default_j_field}
-    #
-    # # Default Change-O fields
-    # if args_dict['format'] == 'changeo':
-    #     for f in default_fields:
-    #         if args_dict[f] is None:  args_dict[f] = default_fields[f]
-    #         else: args_dict[f] = args_dict[f].upper()
-    #
-    # # Default AIRR fields
-    # if args_dict['format'] == 'airr':
-    #     for f in default_fields:
-    #         if args_dict[f] is None:  args_dict[f] = ChangeoSchema.toAIRR(default_fields[f])
-    #         else: args_dict[f] = args_dict[f].lower()
+    # Set default fields
+    default_fields = {'seq_field': default_junction_field,
+                      'v_field': default_v_field,
+                      'j_field': default_j_field}
+    args_dict = setDefaultFields(args_dict, default_fields, format=args_dict['format'])
 
     # Define grouping and cloning function arguments
     args_dict['group_args'] = {'action': args_dict['action'],


=====================================
bin/MakeDb.py
=====================================
@@ -10,6 +10,7 @@ from changeo import __version__, __date__
 # Imports
 import os
 import re
+import csv
 from argparse import ArgumentParser
 from collections import OrderedDict
 from textwrap import dedent
@@ -26,6 +27,55 @@ from changeo.IO import countDbFile, extractIMGT, readGermlines, getFormatOperato
                        AIRRWriter, ChangeoWriter, IgBLASTReader, IMGTReader, IHMMuneReader
 from changeo.Receptor import ChangeoSchema, AIRRSchema
 
+# 10X Receptor attributes
+cellranger_base = ['cell', 'c_call', 'conscount', 'umicount']
+cellranger_extended = ['cell', 'c_call', 'conscount', 'umicount',
+                       'v_call_10x', 'd_call_10x', 'j_call_10x',
+                       'junction_10x', 'junction_10x_aa']
+
+
+def readCellRanger(cellranger_file, fields=cellranger_base):
+    """
+    Load a Cell Ranger annotation table
+
+    Arguments:
+      cellranger_file (str): path to the annotation file.
+      fields (list): list of fields to keep.
+
+    Returns:
+      dict: dict of dicts with contig_id as the primary key.
+    """
+    # Mapping of 10X annotations to Receptor attributes
+    cellranger_map = {'cell':  'barcode',
+                      'c_call': 'c_gene',
+                      'locus': 'chain',
+                      'conscount': 'reads',
+                      'umicount': 'umis',
+                      'v_call_10x': 'v_gene',
+                      'd_call_10x': 'd_gene',
+                      'j_call_10x': 'j_gene',
+                      'junction_10x': 'cdr3_nt',
+                      'junction_10x_aa': 'cdr3'}
+
+    # Function to parse individual fields
+    def _parse(x):
+        return '' if x == 'None' else x
+
+    # Generate annotation dictionary
+    ann_dict = {}
+    with open(cellranger_file) as csv_file:
+        # Detect delimiters
+        dialect = csv.Sniffer().sniff(csv_file.readline())
+        csv_file.seek(0)
+        # Read in annotation file
+        csv_reader = csv.DictReader(csv_file, dialect=dialect)
+
+        # Generate annotation dictionary
+        for row in csv_reader:
+            ann_dict[row['contig_id']] = {f: _parse(row[cellranger_map[f]]) for f in fields}
+
+    return ann_dict
+
 
 def addGermline(receptor, references):
     """
@@ -83,17 +133,18 @@ def getSeqDict(seq_file):
     return seq_dict
 
 
-def writeDb(records, fields, aligner_file, total_count, id_dict=None, partial=False, asis_id=True,
-            writer=ChangeoWriter, out_file=None, out_args=default_out_args):
+def writeDb(records, fields, aligner_file, total_count, id_dict=None, annotations=None,
+            partial=False, asis_id=True, writer=ChangeoWriter, out_file=None, out_args=default_out_args):
     """
     Writes parsed records to an output file
     
-    Arguments:
+    Arguments: 
       records : a iterator of Receptor objects containing alignment data.
       fields : a list of ordered field names to write.
       aligner_file : input file name.
       total_count : number of records (for progress bar).
       id_dict : a dictionary of the truncated sequence ID mapped to the full sequence ID.
+      annotations : additional annotation dictionary.
       partial : if True put incomplete alignments in the pass file.
       asis_id : if ID is to be parsed for pRESTO output with default delimiters.
       writer : writer class.
@@ -157,6 +208,22 @@ def writeDb(records, fields, aligner_file, total_count, id_dict=None, partial=Fa
     else:
         printError('Invalid output writer.')
 
+    # Additional annotation (e.g. 10X cell calls)
+    # _append_table = None
+    # if cellranger_file is not None:
+    #     with open(cellranger_file) as csv_file:
+    #         # Read in annotation file (use Sniffer to discover file delimiters)
+    #         dialect = csv.Sniffer().sniff(csv_file.readline())
+    #         csv_file.seek(0)
+    #         csv_reader = csv.DictReader(csv_file, dialect = dialect)
+    #
+    #         # Generate annotation dictionary
+    #         anntab_dict = {entry['contig_id']: {cellranger_map[field]: entry[field] \
+    #                        for field in cellranger_map.keys()} for entry in csv_reader}
+    #
+    #     fields = _annotate(fields, cellranger_map.values())
+    #     _append_table = lambda sequence_id: anntab_dict[sequence_id]
+
     # Set pass criteria
     _pass = _gentle if partial else _strict
 
@@ -190,16 +257,21 @@ def writeDb(records, fields, aligner_file, total_count, id_dict=None, partial=Fa
                 for k, v in ann_raw.items():
                     ann_parsed[ChangeoSchema.toReceptor(k)] = v
 
-                # If first record, use parsed description to define extra columns
-                if i == 1:  fields = _annotate(fields, ann_parsed.keys())
-
-                # Update Receptor record
+                # Add annotations to Receptor and update field list
                 record.setDict(ann_parsed, parse=True)
+                if i == 1:  fields = _annotate(fields, ann_parsed.keys())
             except IndexError:
                 # Could not parse pRESTO-style annotations so fall back to no parse
                 asis_id = True
                 printWarning('Sequence annotation format not recognized. Sequence headers will not be parsed.')
 
+        # Add supplemental annotation fields
+        # if _append_table is not None:
+        #     record.setDict(_append_table(record.sequence_id), parse=True)
+        if annotations is not None:
+            record.setDict(annotations[record.sequence_id], parse=True)
+            if i == 1:  fields = _annotate(fields, annotations[record.sequence_id].keys())
+
         # Count pass or fail and write to appropriate file
         if _pass(record):
             pass_count += 1
@@ -234,7 +306,7 @@ def writeDb(records, fields, aligner_file, total_count, id_dict=None, partial=Fa
         # Print progress
         printProgress(i, total_count, 0.05, start_time=start_time)
 
-    # Print consol log
+    # Print console log
     log = OrderedDict()
     log['OUTPUT'] = os.path.basename(pass_handle.name) if pass_handle is not None else None
     log['PASS'] = pass_count
@@ -254,9 +326,8 @@ def writeDb(records, fields, aligner_file, total_count, id_dict=None, partial=Fa
     return output
 
 
-def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=True,
-              parse_scores=False, parse_regions=False, parse_junction=False,
-              format=default_format, out_file=None, out_args=default_out_args):
+def parseIMGT(aligner_file, seq_file=None, repo=None, cellranger_file=None, partial=False, asis_id=True,
+              extended=False, format=default_format, out_file=None, out_args=default_out_args):
     """
     Main for IMGT aligned sample sequences.
 
@@ -266,8 +337,7 @@ def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=Tru
       repo : folder with germline repertoire files.
       partial : If True put incomplete alignments in the pass file.
       asis_id : if ID is to be parsed for pRESTO output with default delimiters.
-      parse_scores : if True add alignment score fields to output file.
-      parse_regions : if True add FWR and CDR region fields to output file.
+      extended : if True add alignment score, FWR, CDR and junction fields to output file.
       format : output format. one of 'changeo' or 'airr'.
       out_file : output file name. Automatically generated from the input file if None.
       out_args : common output argument dictionary from parseCommonArgs.
@@ -283,19 +353,28 @@ def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=Tru
     log['SEQ_FILE'] = os.path.basename(seq_file) if seq_file else ''
     log['ASIS_ID'] = asis_id
     log['PARTIAL'] = partial
-    log['SCORES'] = parse_scores
-    log['REGIONS'] = parse_regions
-    log['JUNCTION'] = parse_junction
+    log['EXTENDED'] = extended
     printLog(log)
 
     start_time = time()
     printMessage('Loading files', start_time=start_time, width=20)
+
     # Extract IMGT files
     temp_dir, imgt_files = extractIMGT(aligner_file)
+
     # Count records in IMGT files
     total_count = countDbFile(imgt_files['summary'])
+
     # Get (parsed) IDs from fasta file submitted to IMGT
     id_dict = getIDforIMGT(seq_file) if seq_file else {}
+
+    # Load supplementary annotation table
+    if cellranger_file is not None:
+        f = cellranger_extended if extended else cellranger_base
+        annotations = readCellRanger(cellranger_file, fields=f)
+    else:
+        annotations = None
+
     printMessage('Done', start_time=start_time, end=True, width=20)
 
     # Define format operators
@@ -307,9 +386,9 @@ def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=Tru
 
     # Define output fields
     fields = list(schema.standard_fields)
-    custom = IMGTReader.customFields(scores=parse_scores, regions=parse_regions,
-                                     junction=parse_junction, schema=schema)
-    fields.extend(custom)
+    if extended:
+        custom = IMGTReader.customFields(scores=True, regions=True, junction=True, schema=schema)
+        fields.extend(custom)
 
     # Parse IMGT output and write db
     with open(imgt_files['summary'], 'r') as summary_handle, \
@@ -331,8 +410,8 @@ def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=Tru
             germ_iter = (addGermline(x, references) for x in parse_iter)
 
         # Write db
-        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count,
-                         id_dict=id_dict, asis_id=asis_id, partial=partial,
+        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count, 
+                         annotations=annotations, id_dict=id_dict, asis_id=asis_id, partial=partial,
                          writer=writer, out_file=out_file, out_args=out_args)
 
     # Cleanup temp directory
@@ -341,9 +420,8 @@ def parseIMGT(aligner_file, seq_file=None, repo=None, partial=False, asis_id=Tru
     return output
 
 
-def parseIgBLAST(aligner_file, seq_file, repo, partial=False, asis_id=True, asis_calls=False,
-                 parse_regions=False, parse_scores=False, parse_igblast_cdr3=False,
-                 format='changeo', out_file=None, out_args=default_out_args):
+def parseIgBLAST(aligner_file, seq_file, repo, cellranger_file=None, partial=False, asis_id=True, asis_calls=False,
+                 extended=False, format='changeo', out_file=None, out_args=default_out_args):
     """
     Main for IgBLAST aligned sample sequences.
 
@@ -354,9 +432,7 @@ def parseIgBLAST(aligner_file, seq_file, repo, partial=False, asis_id=True, asis
       partial : If True put incomplete alignments in the pass file.
       asis_id : if ID is to be parsed for pRESTO output with default delimiters.
       asis_calls : if True do not parse gene calls for allele names.
-      parse_regions : if True add FWR and CDR fields to output file.
-      parse_scores : if True add alignment score fields to output file.
-      parse_igblast_cdr3 : if True parse CDR3 sequences generated by IgBLAST.
+      extended : if True add alignment scores, FWR, IMGT CDR, and IgBLAST CDR3 to the output.
       format : output format. one of 'changeo' or 'airr'.
       out_file : output file name. Automatically generated from the input file if None.
       out_args : common output argument dictionary from parseCommonArgs.
@@ -370,21 +446,31 @@ def parseIgBLAST(aligner_file, seq_file, repo, partial=False, asis_id=True, asis
     log['ALIGNER'] = 'IgBLAST'
     log['ALIGNER_FILE'] = os.path.basename(aligner_file)
     log['SEQ_FILE'] = os.path.basename(seq_file)
-    log['PARTIAL'] = partial
-    log['SCORES'] = parse_scores
-    log['REGIONS'] = parse_regions
     log['ASIS_ID'] = asis_id
     log['ASIS_CALLS'] = asis_calls
+    log['PARTIAL'] = partial
+    log['EXTENDED'] = extended
     printLog(log)
 
     start_time = time()
     printMessage('Loading files', start_time=start_time, width=20)
+
     # Count records in sequence file
     total_count = countSeqFile(seq_file)
+
     # Get input sequence dictionary
     seq_dict = getSeqDict(seq_file)
+
     # Create germline repo dictionary
     references = readGermlines(repo, asis=asis_calls)
+
+    # Load supplementary annotation table
+    if cellranger_file is not None:
+        f = cellranger_extended if extended else cellranger_base
+        annotations = readCellRanger(cellranger_file, fields=f)
+    else:
+        annotations = None
+
     printMessage('Done', start_time=start_time, end=True, width=20)
 
     # Check for IMGT-gaps in germlines
@@ -400,24 +486,23 @@ def parseIgBLAST(aligner_file, seq_file, repo, partial=False, asis_id=True, asis
 
     # Define output fields
     fields = list(schema.standard_fields)
-    custom = IgBLASTReader.customFields(scores=parse_scores, regions=parse_regions,
-                                        cdr3=parse_igblast_cdr3, schema=schema)
-    fields.extend(custom)
+    if extended:
+        custom = IgBLASTReader.customFields(scores=True, regions=True, cdr3=False, schema=schema)
+        fields.extend(custom)
 
     # Parse and write output
     with open(aligner_file, 'r') as f:
         parse_iter = IgBLASTReader(f, seq_dict, references, asis_calls=asis_calls)
         germ_iter = (addGermline(x, references) for x in parse_iter)
-        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count,
-                         partial=partial, asis_id=asis_id,
-                         writer=writer, out_file=out_file, out_args=out_args)
+        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count, 
+                        annotations=annotations, partial=partial, asis_id=asis_id,
+                        writer=writer, out_file=out_file, out_args=out_args)
 
     return output
 
 
-def parseIHMM(aligner_file, seq_file, repo, partial=False, asis_id=True,
-              parse_scores=False, parse_regions=False,
-              format=default_format, out_file=None, out_args=default_out_args):
+def parseIHMM(aligner_file, seq_file, repo, cellranger_file=None, partial=False, asis_id=True,
+              extended=False, format=default_format, out_file=None, out_args=default_out_args):
     """
     Main for iHMMuneAlign aligned sample sequences.
 
@@ -426,9 +511,8 @@ def parseIHMM(aligner_file, seq_file, repo, partial=False, asis_id=True,
       seq_file : fasta file input to iHMMuneAlign (from which to get sequence).
       repo : folder with germline repertoire files.
       partial : If True put incomplete alignments in the pass file.
-      parse_scores : if True parse alignment scores.
-      parse_regions : if True add FWR and CDR region fields.
       asis_id : if ID is to be parsed for pRESTO output with default delimiters.
+      extended : if True parse alignment scores, FWR and CDR region fields.
       format : output format. One of 'changeo' or 'airr'.
       out_file : output file name. Automatically generated from the input file if None.
       out_args : common output argument dictionary from parseCommonArgs.
@@ -444,18 +528,28 @@ def parseIHMM(aligner_file, seq_file, repo, partial=False, asis_id=True,
     log['SEQ_FILE'] = os.path.basename(seq_file)
     log['ASIS_ID'] = asis_id
     log['PARTIAL'] = partial
-    log['SCORES'] = parse_scores
-    log['REGIONS'] = parse_regions
+    log['EXTENDED'] = extended
     printLog(log)
 
     start_time = time()
     printMessage('Loading files', start_time=start_time, width=20)
+
     # Count records in sequence file
     total_count = countSeqFile(seq_file)
+
     # Get input sequence dictionary
     seq_dict = getSeqDict(seq_file)
+
     # Create germline repo dictionary
     references = readGermlines(repo)
+
+    # Load supplementary annotation table
+    if cellranger_file is not None:
+        f = cellranger_extended if extended else cellranger_base
+        annotations = readCellRanger(cellranger_file, fields=f)
+    else:
+        annotations = None
+
     printMessage('Done', start_time=start_time, end=True, width=20)
 
     # Check for IMGT-gaps in germlines
@@ -471,17 +565,17 @@ def parseIHMM(aligner_file, seq_file, repo, partial=False, asis_id=True,
 
     # Define output fields
     fields = list(schema.standard_fields)
-    custom = IHMMuneReader.customFields(scores=parse_scores, regions=parse_regions,
-                                        schema=schema)
-    fields.extend(custom)
+    if extended:
+        custom = IHMMuneReader.customFields(scores=True, regions=True, schema=schema)
+        fields.extend(custom)
 
     # Parse and write output
     with open(aligner_file, 'r') as f:
         parse_iter = IHMMuneReader(f, seq_dict, references)
         germ_iter = (addGermline(x, references) for x in parse_iter)
-        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count,
-                         asis_id=asis_id, partial=partial,
-                         writer=writer, out_file=out_file, out_args=out_args)
+        output = writeDb(germ_iter, fields=fields, aligner_file=aligner_file, total_count=total_count, 
+                        annotations=annotations, asis_id=asis_id, partial=partial,
+                        writer=writer, out_file=out_file, out_args=out_args)
 
     return output
 
@@ -529,6 +623,11 @@ def getArgParser():
 
               ihmm specific output fields:
                   V_GERM_START_VDJ, V_GERM_LENGTH_VDJ, VDJ_SCORE
+                  
+              10X specific output fields:
+                  CELL, C_CALL, CONSCOUNT, UMICOUNT, 
+                  V_CALL_10X, D_CALL_10X, J_CALL_10X,
+                  JUNCTION_10X, JUNCTION_10X_AA
               ''')
                 
     # Define ArgumentParser
@@ -565,38 +664,32 @@ def getArgParser():
                                 required=True,
                                 help='''List of input FASTA files (with .fasta, .fna or .fa
                                      extension), containing sequences.''')
-    group_igblast.add_argument('--partial', action='store_true', dest='partial',
-                                help='''If specified, include incomplete V(D)J alignments in
-                                     the pass file instead of the fail file. An incomplete alignment
-                                     is defined as a record for which a valid IMGT-gapped sequence 
-                                     cannot be built or that is missing a V gene assignment, 
-                                     J gene assignment, junction region, or productivity call.''')
-    group_igblast.add_argument('--scores', action='store_true', dest='parse_scores',
-                                help='''Specify if alignment score metrics should be
-                                     included in the output. Adds the <VDJ>_SCORE, <VDJ>_IDENTITY,
-                                     <VDJ>_EVALUE, <VDJ>_CIGAR columns.''')
-    group_igblast.add_argument('--regions', action='store_true', dest='parse_regions',
-                                help='''Specify if IMGT FWR and CDRs should be
-                                     included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
-                                     FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
-                                     CDR3_IMGT columns.''')
-    group_igblast.add_argument('--cdr3', action='store_true',
-                                dest='parse_igblast_cdr3', 
-                                help='''Specify if the CDR3 sequences generated by IgBLAST 
-                                     should be included in the output. Adds the columns
-                                     CDR3_IGBLAST_NT and CDR3_IGBLAST_AA. Requires IgBLAST
-                                     version 1.5 or greater.''')
+    group_igblast.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
+                                help='''Table file containing 10X annotations (with .csv or .tsv
+                                     extension).''')
     group_igblast.add_argument('--asis-id', action='store_true', dest='asis_id',
                                 help='''Specify to prevent input sequence headers from being parsed
-                                    to add new columns to database. Parsing of sequence headers requires
-                                    headers to be in the pRESTO annotation format, so this should be specified
-                                    when sequence headers are incompatible with the pRESTO annotation scheme.
-                                    Note, unrecognized header formats will default to this behavior.''')
+                                     to add new columns to database. Parsing of sequence headers requires
+                                     headers to be in the pRESTO annotation format, so this should be specified
+                                     when sequence headers are incompatible with the pRESTO annotation scheme.
+                                     Note, unrecognized header formats will default to this behavior.''')
     group_igblast.add_argument('--asis-calls', action='store_true', dest='asis_calls',
                                 help='''Specify to prevent gene calls from being parsed into standard allele names
                                      in both the IgBLAST output and reference database. Note, this requires
                                      the sequence identifiers in the reference sequence set and the IgBLAST
                                      database to be exact string matches.''')
+    group_igblast.add_argument('--partial', action='store_true', dest='partial',
+                                help='''If specified, include incomplete V(D)J alignments in
+                                     the pass file instead of the fail file. An incomplete alignment
+                                     is defined as a record for which a valid IMGT-gapped sequence 
+                                     cannot be built or that is missing a V gene assignment, 
+                                     J gene assignment, junction region, or productivity call.''')
+    group_igblast.add_argument('--extended', action='store_true', dest='extended',
+                               help='''Specify to include additional aligner specific fields in the output. 
+                                     Adds the <VDJ>_SCORE, <VDJ>_IDENTITY, <VDJ>_EVALUE, and <VDJ>_CIGAR;
+                                     FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, and FWR4_IMGT; CDR1_IMGT, CDR2_IMGT, and
+                                     CDR3_IMGT; CDR3_IGBLAST_NT and CDR3_IGBLAST_AA (requires IgBLAST
+                                     version 1.5 or greater).''')
     parser_igblast.set_defaults(func=parseIgBLAST)
 
     # IMGT aligner
@@ -608,10 +701,10 @@ def getArgParser():
                                              (does not work with V-QUEST).''')
     group_imgt = parser_imgt.add_argument_group('aligner parsing arguments')
     group_imgt.add_argument('-i', nargs='+', action='store', dest='aligner_files',
-                             help='''Either zipped IMGT output files (.zip or .txz) or a
-                                  folder containing unzipped IMGT output files (which must
-                                  include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences,
-                                  and 6_Junction).''')
+                            help='''Either zipped IMGT output files (.zip or .txz) or a
+                                 folder containing unzipped IMGT output files (which must
+                                 include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences,
+                                 and 6_Junction).''')
     group_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files', required=False,
                             help='''List of FASTA files (with .fasta, .fna or .fa
                                   extension) that were submitted to IMGT/HighV-QUEST. 
@@ -623,31 +716,25 @@ def getArgParser():
                                  These reference sequences must contain IMGT-numbering spacers (gaps)
                                  in the V segment. If unspecified, the germline sequence reconstruction 
                                  will not be included in the output.''')
+    group_imgt.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
+                            help='''Table file containing 10X annotations (with .csv or .tsv
+                                 extension).''')
     group_imgt.add_argument('--asis-id', action='store_true', dest='asis_id',
-                             help='''Specify to prevent input sequence headers from being parsed
-                                  to add new columns to database. Parsing of sequence headers requires
-                                  headers to be in the pRESTO annotation format, so this should be specified
-                                  when sequence headers are incompatible with the pRESTO annotation scheme.
-                                  Note, unrecognized header formats will default to this behavior.''')
+                            help='''Specify to prevent input sequence headers from being parsed
+                                 to add new columns to database. Parsing of sequence headers requires
+                                 headers to be in the pRESTO annotation format, so this should be specified
+                                 when sequence headers are incompatible with the pRESTO annotation scheme.
+                                 Note, unrecognized header formats will default to this behavior.''')
     group_imgt.add_argument('--partial', action='store_true', dest='partial',
-                             help='''If specified, include incomplete V(D)J alignments in
-                                  the pass file instead of the fail file. An incomplete alignment
-                                  is defined as a record that is missing a V gene assignment, 
-                                  J gene assignment, junction region, or productivity call.''')
-    group_imgt.add_argument('--scores', action='store_true', dest='parse_scores',
-                             help='''Specify if alignment score metrics should be
-                                  included in the output. Adds the <VDJ>_SCORE and 
-                                  <VDJ>_IDENTITY> columns.''')
-    group_imgt.add_argument('--regions', action='store_true', dest='parse_regions',
-                             help='''Specify if IMGT FWRs and CDRs should be
-                                  included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
-                                  FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
-                                  CDR3_IMGT columns.''')
-    group_imgt.add_argument('--junction', action='store_true', dest='parse_junction',
-                             help='''Specify if detailed junction fields should be
-                                  included in the output. Adds the columns 
-                                  N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH,
-                                  P5J_LENGTH, D_FRAME.''')
+                            help='''If specified, include incomplete V(D)J alignments in
+                                 the pass file instead of the fail file. An incomplete alignment
+                                 is defined as a record that is missing a V gene assignment, 
+                                 J gene assignment, junction region, or productivity call.''')
+    group_imgt.add_argument('--extended', action='store_true', dest='extended',
+                            help='''Specify to include additional aligner specific fields in the output. 
+                                 Adds <VDJ>_SCORE and <VDJ>_IDENTITY>; FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, and FWR4_IMGT;
+                                 CDR1_IMGT, CDR2_IMGT, and CDR3_IMGT; and N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, 
+                                 P3D_LENGTH, P5J_LENGTH, and D_FRAME.''')
     parser_imgt.set_defaults(func=parseIMGT)
 
     # iHMMuneAlign Aligner
@@ -668,6 +755,9 @@ def getArgParser():
                              required=True,
                              help='''List of input FASTA files (with .fasta, .fna or .fa
                                   extension) containing sequences.''')
+    group_ihmm.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
+                                help='''Table file containing 10X annotations (with .csv or .tsv
+                                     extension).''')
     group_ihmm.add_argument('--asis-id', action='store_true', dest='asis_id',
                              help='''Specify to prevent input sequence headers from being parsed
                                   to add new columns to database. Parsing of sequence headers requires
@@ -680,15 +770,11 @@ def getArgParser():
                                      is defined as a record for which a valid IMGT-gapped sequence 
                                      cannot be built or that is missing a V gene assignment, 
                                      J gene assignment, junction region, or productivity call.''')
-    group_ihmm.add_argument('--scores', action='store_true', dest='parse_scores',
-                             help='''Specify if alignment score metrics should be
-                                  included in the output. Adds the path score of the
-                                  iHMMune-Align hidden Markov model to VDJ_SCORE.''')
-    group_ihmm.add_argument('--regions', action='store_true', dest='parse_regions',
-                             help='''Specify if IMGT FWRs and CDRs should be
-                                  included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
-                                  FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
-                                  CDR3_IMGT columns.''')
+    group_ihmm.add_argument('--extended', action='store_true', dest='extended',
+                             help='''Specify to include additional aligner specific fields in the output. 
+                                  Adds the path score of the iHMMune-Align hidden Markov model as VDJ_SCORE;
+                                  FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, and FWR4_IMGT; CDR1_IMGT, CDR2_IMGT, and
+                                  CDR3_IMGT.''')
     parser_ihmm.set_defaults(func=parseIHMM)
 
     return parser
@@ -713,7 +799,7 @@ if __name__ == "__main__":
     if 'out_files' in args_dict: del args_dict['out_files']
     if 'command' in args_dict: del args_dict['command']
     if 'func' in args_dict: del args_dict['func']           
-    
+
     # Call main
     for i, f in enumerate(args.__dict__['aligner_files']):
         args_dict['aligner_file'] = f
@@ -721,4 +807,6 @@ if __name__ == "__main__":
                                 if args.__dict__['seq_files'] else None
         args_dict['out_file'] = args.__dict__['out_files'][i] \
                                 if args.__dict__['out_files'] else None
+        args_dict['cellranger_file'] = args.__dict__['cellranger_file'][i] \
+                                if args.__dict__['cellranger_file'] else None
         args.func(**args_dict)


=====================================
changeo.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: changeo
-Version: 0.4.5
+Version: 0.4.6
 Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
 Home-page: http://changeo.readthedocs.io
 Author: Namita Gupta, Jason Anthony Vander Heiden


=====================================
changeo/Commandline.py
=====================================
@@ -16,6 +16,7 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, \
 # Changeo imports
 from presto.IO import printWarning, printError
 from changeo.Defaults import choices_format, default_format
+from changeo.Receptor import AIRRSchema, ChangeoSchema
 
 
 class CommonHelpFormatter(RawDescriptionHelpFormatter, ArgumentDefaultsHelpFormatter):
@@ -242,4 +243,25 @@ def checkArgs(parser):
         parser.print_help()
         sys.exit(1)
 
-    return True
\ No newline at end of file
+    return True
+
+def setDefaultFields(args, defaults, format='changeo'):
+    """
+    Sets default field arguments by format
+
+    Arguments:
+      args (dict): parsed argument dictionary.
+      defaults (dict): default variables to set with with keys as argument variables and values
+                       as Change-O field names.
+      format (str): one of 'changeo' or 'airr' which defines the file format.
+
+    Returns:
+      dict: modified input args.
+    """
+    if format == 'airr':
+        defaults = {k: AIRRSchema.fromReceptor(ChangeoSchema.toReceptor(v)) \
+                    for k, v in defaults.items()}
+    for f in defaults:
+        if args[f] is None:  args[f] = defaults[f]
+
+    return(args)


=====================================
changeo/Gene.py
=====================================
@@ -16,6 +16,7 @@ from changeo.Defaults import default_v_field, default_d_field, default_j_field,
 allele_regex = re.compile(r'((IG[HLK]|TR[ABGD])([VDJ][A-Z0-9]+[-/\w]*[-\*][\.\w]+))')
 gene_regex = re.compile(r'((IG[HLK]|TR[ABGD])([VDJ][A-Z0-9]+[-/\w]*))')
 family_regex = re.compile(r'((IG[HLK]|TR[ABGD])([VDJ][A-Z0-9]+))')
+locus_regex = re.compile(r'(IG[HLK]|TR[ABGD])')
 
 v_allele_regex = re.compile(r'((IG[HLK]|TR[ABGD])V[A-Z0-9]+[-/\w]*[-\*][\.\w]+)')
 d_allele_regex = re.compile(r'((IG[HLK]|TR[ABGD])D[A-Z0-9]+[-/\w]*[-\*][\.\w]+)')
@@ -24,7 +25,6 @@ j_allele_regex = re.compile(r'((IG[HLK]|TR[ABGD])J[A-Z0-9]+[-/\w]*[-\*][\.\w]+)'
 allele_number_regex = re.compile(r'(?<=\*)([\.\w]+)')
 c_gene_regex = re.compile(r'((IG[HLK]|TR[ABGD])([DMAGEC][P0-9]?[A-Z]?))')
 
-
 # TODO:  might be cleaner as getAllele(), getGene(), getFamily()
 def parseAllele(alleles, regex, action='first'):
     """


=====================================
changeo/IO.py
=====================================
@@ -20,7 +20,7 @@ from Bio.Seq import Seq
 # Presto and changeo imports
 from presto.IO import getFileType, printError, printWarning
 from changeo.Defaults import default_csv_size
-from changeo.Gene import allele_regex, v_allele_regex, d_allele_regex, j_allele_regex, \
+from changeo.Gene import allele_regex, v_allele_regex, d_allele_regex, j_allele_regex, locus_regex, \
                          parseAllele
 from changeo.Receptor import AIRRSchema, ChangeoSchema, Receptor, ReceptorData
 from changeo.Alignment import decodeBTOP, encodeCIGAR, padAlignment, gapV, inferJunction, getRegions
@@ -410,6 +410,7 @@ class IMGTReader:
                            'p5j_length',
                            'd_frame']
 
+
         fields = []
         if scores:  fields.extend(score_fields)
         if regions:  fields.extend(region_fields)
@@ -529,13 +530,23 @@ class IMGTReader:
         delim_regex = re.compile('\sor\s')
 
         # Gene calls
-        result = {}
-        v_call = summary['V-GENE and allele']
-        d_call = summary['D-GENE and allele']
-        j_call = summary['J-GENE and allele']
-        result['v_call'] = delim_regex.sub(',', clean_regex.sub('', v_call)) if v_call else None
-        result['d_call'] = delim_regex.sub(',', clean_regex.sub('', d_call)) if d_call else None
-        result['j_call'] = delim_regex.sub(',', clean_regex.sub('', j_call)) if j_call else None
+        v_str = summary['V-GENE and allele']
+        d_str = summary['D-GENE and allele']
+        j_str = summary['J-GENE and allele']
+        v_call = delim_regex.sub(',', clean_regex.sub('', v_str)) if v_str else None
+        d_call = delim_regex.sub(',', clean_regex.sub('', d_str)) if d_str else None
+        j_call = delim_regex.sub(',', clean_regex.sub('', j_str)) if j_str else None
+
+        # Locus
+        locus_list = [parseAllele(v_call, locus_regex, action='first'),
+                      parseAllele(j_call, locus_regex, action='first')]
+        locus = set(filter(None, locus_list))
+
+        # Result
+        result = {'v_call': v_call,
+                  'd_call': d_call,
+                  'j_call': j_call,
+                  'locus': locus.pop() if len(locus) == 1 else None}
 
         return result
 
@@ -873,7 +884,7 @@ class IgBLASTReader:
         # IgBLAST CDR3 fields
         cdr3_fields = ['cdr3_igblast',
                        'cdr3_igblast_aa']
-
+        
         fields = []
         if scores:  fields.extend(score_fields)
         if regions:  fields.extend(region_fields)
@@ -1051,7 +1062,10 @@ class IgBLASTReader:
             result['j_call'] = None if summary['j_match'] == 'N/A' else summary['j_match']
 
         # Parse locus
-        result['locus'] = None if summary['chain_type'] == 'N/A' else summary['chain_type']
+        locus = None if summary['chain_type'] == 'N/A' else summary['chain_type']
+        locus_map = {'VH': 'IGH', 'VK': 'IGK', 'VL': 'IGL',
+                     'VB': 'TRB', 'VD': 'TRD', 'VA': 'TRA', 'VG': 'TRG'}
+        result['locus'] = locus_map.get(locus, locus)
 
         # Parse quality information
         result['stop'] = 'T' if summary['stop_codon'] == 'Yes' else 'F'
@@ -1624,7 +1638,7 @@ class IHMMuneReader:
 
     # Ordered list of known fields
     @staticmethod
-    def customFields(scores=False, regions=False, schema=None):
+    def customFields(scores=False, regions=False, cell=False, schema=None):
         """
         Returns non-standard Receptor attributes defined by the parser
 
@@ -1749,10 +1763,16 @@ class IHMMuneReader:
         d_call = parseAllele(record['D_CALL'], d_allele_regex, action='list')
         j_call = parseAllele(record['J_CALL'], j_allele_regex, action='list')
 
+        # Locus
+        locus_list = [parseAllele(record['V_CALL'], locus_regex, action='first'),
+                      parseAllele(record['J_CALL'], locus_regex, action='first')]
+        locus = set(filter(None, locus_list))
+
         # Build return object
         result = {'v_call': ','.join(v_call) if v_call else None,
                   'd_call': ','.join(d_call) if d_call else None,
-                  'j_call': ','.join(j_call) if j_call else None}
+                  'j_call': ','.join(j_call) if j_call else None,
+                  'locus': locus.pop() if len(locus) == 1 else None}
 
         return result
 
@@ -2312,7 +2332,7 @@ def yamlDict(file):
       dict: dictionary of key:value pairs in the file.
     """
     try:
-        yaml_dict = dict(yaml.load(open(file, 'r')))
+        yaml_dict = dict(yaml.load(open(file, 'r'), Loader=yaml.FullLoader))
     except:
         printError('YAML file is invalid.')
 


=====================================
changeo/Receptor.py
=====================================
@@ -32,7 +32,7 @@ from changeo.Gene import allele_number_regex, allele_regex, gene_regex, family_r
 #           changeo.Receptor.Schema
 #         """
 #         with resource_stream(__name__, 'data/receptor.yaml') as f:
-#             data = yaml.load(f)
+#             data = yaml.load(f, Loader=yaml.FullLoader)
 #             receptor = {v[schema]: k for k, v in data['receptor'].items()}
 #             definition = data[schema]
 #
@@ -92,6 +92,7 @@ class AIRRSchema:
                                  ('productive', 'functional'),
                                  ('stop_codon', 'stop'),
                                  ('vj_in_frame', 'in_frame'),
+                                 ('locus', 'locus'),
                                  ('v_call', 'v_call'),
                                  ('d_call', 'd_call'),
                                  ('j_call', 'j_call'),
@@ -116,7 +117,6 @@ class AIRRSchema:
 
     # Custom fields
     _custom_map = OrderedDict([('c_call', 'c_call'),
-                               ('locus', 'locus'),
                                ('germline_alignment_d_mask', 'germline_imgt_d_mask'),
                                ('v_score', 'v_score'),
                                ('v_identity', 'v_identity'),
@@ -138,6 +138,8 @@ class AIRRSchema:
                                ('fwr2', 'fwr2_imgt'),
                                ('fwr3', 'fwr3_imgt'),
                                ('fwr4', 'fwr4_imgt'),
+                               ('junction_start', 'junction_start'),
+                               ('junction_end', 'junction_end'),
                                ('cdr1_start', 'cdr1_start'),
                                ('cdr1_end', 'cdr1_end'),
                                ('cdr2_start', 'cdr2_start'),
@@ -163,6 +165,7 @@ class AIRRSchema:
                                ('cdr3_igblast_aa', 'cdr3_igblast_aa'),
                                ('duplicate_count', 'dupcount'),
                                ('consensus_count', 'conscount'),
+                               ('umi_count', 'umicount'),
                                ('clone_id', 'clone'),
                                ('cell_id', 'cell')])
     custom_fields = list(_custom_map.keys())
@@ -219,6 +222,7 @@ class ChangeoSchema:
                                  ('STOP', 'stop'),
                                  ('MUTATED_INVARIANT', 'mutated_invariant'),
                                  ('INDELS', 'indels'),
+                                 ('LOCUS', 'locus'),
                                  ('V_CALL', 'v_call'),
                                  ('D_CALL', 'd_call'),
                                  ('J_CALL', 'j_call'),
@@ -246,7 +250,8 @@ class ChangeoSchema:
     standard_fields = list(_standard_map.keys())
 
     # Custom fields
-    _custom_map = OrderedDict([('V_SCORE', 'v_score'),
+    _custom_map = OrderedDict([('JUNCTION_START', 'junction_start'),
+                               ('V_SCORE', 'v_score'),
                                ('V_IDENTITY', 'v_identity'),
                                ('V_EVALUE', 'v_evalue'),
                                ('V_BTOP', 'v_btop'),
@@ -276,10 +281,12 @@ class ChangeoSchema:
                                ('P3D_LENGTH', 'p3d_length'),
                                ('P5J_LENGTH', 'p5j_length'),
                                ('D_FRAME', 'd_frame'),
+                               ('C_CALL', 'c_call'),
                                ('CDR3_IGBLAST', 'cdr3_igblast'),
                                ('CDR3_IGBLAST_AA', 'cdr3_igblast_aa'),
                                ('CONSCOUNT', 'conscount'),
                                ('DUPCOUNT', 'dupcount'),
+                               ('UMICOUNT', 'umicount'),
                                ('CLONE', 'clone'),
                                ('CELL', 'cell')])
     custom_fields = list(_custom_map.keys())
@@ -340,6 +347,7 @@ class ReceptorData:
 
       junction (Bio.Seq.Seq): ungapped junction region nucletide sequence.
       junction_aa (Bio.Seq.Seq): ungapped junction region amino acid sequence.
+      junction_start (int): start positions of the junction in the input nucleotide sequence.
       junction_length (int): length of the junction in nucleotides.
 
       germline_vdj (Bio.Seq.Seq): full ungapped germline V(D)J nucleotide sequence.
@@ -415,6 +423,7 @@ class ReceptorData:
 
       conscount (int): number of reads contributing to the UMI consensus sequence.
       dupcount (int): copy number of the sequence.
+      umicount (int): number of UMIs representing the sequence.
 
       clone (str): clonal cluster identifier.
       cell (str): origin cell identifier.
@@ -422,7 +431,7 @@ class ReceptorData:
       annotations (dict): dictionary containing all unknown fields.
     """
     #with resource_stream(__name__, 'data/receptor.yaml') as f:
-    #    data = yaml.load(f)
+    #    data = yaml.load(f, Loader=yaml.FullLoader)
     #
     # # Define type parsers
     # parsers = {k: v['type'] for k, v in data['receptor'].items()}
@@ -458,6 +467,7 @@ class ReceptorData:
                'sequence_vdj': 'nucleotide',
                'junction': 'nucleotide',
                'junction_aa': 'aminoacid',
+               'junction_start': 'integer',
                'junction_length': 'integer',
                'germline_imgt': 'nucleotide',
                'germline_imgt_d_mask': 'nucleotide',
@@ -519,6 +529,7 @@ class ReceptorData:
                'cdr3_igblast_aa': 'aminoacid',
                'conscount': 'integer',
                'dupcount': 'integer',
+               'umicount': 'integer',
                'clone': 'identity',
                'cell': 'identity'}
 
@@ -533,6 +544,7 @@ class ReceptorData:
                        ('d_germ_start', 'd_germ_length', 'd_germ_end'),
                        ('j_seq_start', 'j_seq_length', 'j_seq_end'),
                        ('j_germ_start', 'j_germ_length', 'j_germ_end'),
+                       ('junction_start', 'junction_length', 'junction_end'),
                        ('fwr1_start', 'fwr1_length', 'fwr1_end'),
                        ('fwr2_start', 'fwr2_length', 'fwr2_end'),
                        ('fwr3_start', 'fwr3_length', 'fwr3_end'),
@@ -619,10 +631,21 @@ class Receptor:
     _derived = {'v_seq_end': 'integer',
                 'v_germ_end_vdj': 'integer',
                 'v_germ_end_imgt': 'integer',
+                'd_seq_end': 'integer',
+                'd_germ_end': 'integer',
                 'j_seq_end': 'integer',
                 'j_germ_end': 'integer',
-                'd_seq_end': 'integer',
-                'd_germ_end': 'integer'}
+                'junction_end': 'integer'}
+
+    def _junction_start(self):
+        """
+        Determine the position of the first junction nucleotide in the input sequence
+        """
+        try:
+            x = self.v_germ_end_imgt - 310
+            return self.v_seq_end - x if x >= 0 else None
+        except TypeError:
+            return None
 
     def __init__(self, data):
         """
@@ -654,6 +677,10 @@ class Receptor:
             f = getattr(ReceptorData, ReceptorData.parsers[k])
             setattr(self, k, f(data.pop(k, None)))
 
+        # Derive junction_start if not provided
+        if not hasattr(self, 'junction_start'):
+            setattr(self, 'junction_start', self._junction_start(self))
+
         # Add remaining elements as annotations dictionary
         self.annotations = data
 
@@ -1055,7 +1082,7 @@ class Receptor:
     @property
     def v_seq_end(self):
         """
-        position of the last V nucleotide in the input sequence.
+        Position of the last V nucleotide in the input sequence
         """
         try:  return self.v_seq_start + self.v_seq_length - 1
         except TypeError:  return None
@@ -1063,7 +1090,7 @@ class Receptor:
     @property
     def v_germ_end_imgt(self):
         """
-        position of the last nucleotide in the IMGT-gapped V germline sequence alignment.
+        Position of the last nucleotide in the IMGT-gapped V germline sequence alignment
         """
         try:  return self.v_germ_start_imgt + self.v_germ_length_imgt - 1
         except TypeError:  return None
@@ -1071,7 +1098,7 @@ class Receptor:
     @property
     def v_germ_end_vdj(self):
         """
-        position of the last nucleotide in the ungapped V germline sequence alignment.
+        Position of the last nucleotide in the ungapped V germline sequence alignment
         """
         try:  return self.v_germ_start_vdj + self.v_germ_length_vdj - 1
         except TypeError:  return None
@@ -1079,7 +1106,7 @@ class Receptor:
     @property
     def d_seq_end(self):
         """
-        position of the last D nucleotide in the input sequence.
+        Position of the last D nucleotide in the input sequence
         """
         try:  return self.d_seq_start + self.d_seq_length - 1
         except TypeError:  return None
@@ -1087,7 +1114,7 @@ class Receptor:
     @property
     def d_germ_end(self):
         """
-        position of the last nucleotide in the D germline sequence alignment.
+        Position of the last nucleotide in the D germline sequence alignment
         """
         try:  return self.d_germ_start + self.d_germ_length - 1
         except TypeError:  return None
@@ -1095,7 +1122,7 @@ class Receptor:
     @property
     def j_seq_end(self):
         """
-        position of the last J nucleotide in the input sequence.
+        Position of the last J nucleotide in the input sequence
         """
         try:  return self.j_seq_start + self.j_seq_length - 1
         except TypeError:  return None
@@ -1103,26 +1130,15 @@ class Receptor:
     @property
     def j_germ_end(self):
         """
-        position of the last nucleotide in the J germline sequence alignment.
+        Position of the last nucleotide in the J germline sequence alignment
         """
         try:  return self.j_germ_start + self.j_germ_length - 1
         except TypeError:  return None
 
-    @property
-    def junction_start(self):
-        """
-        position of the first junction nucleotide in the input sequence.
-        """
-        try:
-            x = self.v_germ_end_imgt - 310
-            return self.v_seq_end - x if x >= 0 else None
-        except TypeError:
-            return None
-
     @property
     def junction_end(self):
         """
-        position of the last junction nucleotide in the input sequence.
+        Position of the last junction nucleotide in the input sequence
         """
         try:
             gaps = self.junction.count('.')


=====================================
changeo/Version.py
=====================================
@@ -3,7 +3,7 @@ Version and authorship information
 """
 
 __author__    = 'Namita Gupta, Jason Anthony Vander Heiden'
-__copyright__ = 'Copyright 2017 Kleinstein Lab, Yale University. All rights reserved.'
+__copyright__ = 'Copyright 2019 Kleinstein Lab, Yale University. All rights reserved.'
 __license__   = 'Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)'
-__version__   = '0.4.5'
-__date__      = '2019.01.09'
+__version__   = '0.4.6'
+__date__      = '2019.07.19'


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+changeo (0.4.6-1) UNRELEASED; urgency=medium
+
+  * Team upload.
+  * New upstream version
+
+ -- Andreas Tille <tille at debian.org>  Sun, 08 Mar 2020 07:57:50 +0100
+
 changeo (0.4.5-1) unstable; urgency=medium
 
   * Team upload.



View it on GitLab: https://salsa.debian.org/med-team/changeo/-/compare/c151bc479469689f71edad06b80f5a38e67ea434...eb1a2bdd1e9b3a0f667c5df7716e69fcb04d4dd6

-- 
View it on GitLab: https://salsa.debian.org/med-team/changeo/-/compare/c151bc479469689f71edad06b80f5a38e67ea434...eb1a2bdd1e9b3a0f667c5df7716e69fcb04d4dd6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200308/8fbd2e12/attachment-0001.html>