[med-svn] [SCM] cufflinks branch, master, updated. upstream/0.9.3-21-g7df8ccc
Alex Mestiashvili
alex at biotec.tu-dresden.de
Wed May 25 07:53:10 UTC 2011
The following commit has been merged in the master branch:
commit 35a00c5ca500d3cb2173c03292191bd6e6ada7db
Author: Alex Mestiashvili <alex at biotec.tu-dresden.de>
Date: Wed May 25 09:44:50 2011 +0200
fixing repository layout
diff --git a/debian/changelog.orig b/debian/changelog.orig
deleted file mode 100644
index fe60738..0000000
--- a/debian/changelog.orig
+++ /dev/null
@@ -1,17 +0,0 @@
-cufflinks (1.0.2-1) unstable; urgency=low
-
-<<<<<<< HEAD
- * Upstream release
-
- -- Alex Mestiashvili <alex at biotec.tu-dresden.de> Tue, 24 May 2011 13:43:09 +0200
-
-cufflinks (0.9.3-1) unstable; urgency=low
-
- * Initial release (Closes: #nnnn) <nnnn is the bug number of your ITP>
-
- -- Alex Mestiashvili <alex at biotec.tu-dresden.de> Tue, 03 May 2011 13:53:19 +0000
-=======
- * Initial release (Closes: #627799)
-
- -- Alex Mestiashvili <alex at biotec.tu-dresden.de> Tue, 24 May 2011 13:43:09 +0200
->>>>>>> fedd24b9bf5c659622fa3386243a60f6197a83ac
diff --git a/src/hits.h.orig b/src/hits.h.orig
deleted file mode 100644
index 56b791a..0000000
--- a/src/hits.h.orig
+++ /dev/null
@@ -1,1056 +0,0 @@
-#ifndef BWT_MAP_H
-#define BWT_MAP_H
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <cstring>
-#include <map>
-#include <vector>
-#include <cassert>
-
-#include <boost/shared_ptr.hpp>
-
-<<<<<<< HEAD
-#include <bam/sam.h>
-=======
-#include <samtools/sam.h>
->>>>>>> fedd24b9bf5c659622fa3386243a60f6197a83ac
-
-#include "common.h"
-#include "multireads.h"
-
-using namespace std;
-using boost::shared_ptr;
-
-/*
- * hits.h
- * Cufflinks
- *
- * Created by Cole Trapnell on 3/23/09.
- * Copyright 2009 Cole Trapnell. All rights reserved.
- *
- */
-
-enum CuffStrand { CUFF_STRAND_UNKNOWN = 0, CUFF_FWD = 1, CUFF_REV = 2, CUFF_BOTH = 3 };
-
-
-enum CigarOpCode
-{
- MATCH = BAM_CMATCH,
- INS = BAM_CINS,
- DEL = BAM_CDEL,
- REF_SKIP = BAM_CREF_SKIP,
- SOFT_CLIP = BAM_CSOFT_CLIP,
- HARD_CLIP = BAM_CHARD_CLIP,
- PAD = BAM_CPAD
-};
-
-struct CigarOp
-{
- CigarOp(CigarOpCode o, uint32_t l) : opcode(o), length(l) {}
- CigarOpCode opcode : 3;
- uint32_t length : 29;
-
- bool operator==(const CigarOp& rhs) const { return opcode == rhs.opcode && length == rhs.length; }
-
-};
-
-typedef uint64_t InsertID;
-typedef uint64_t RefID;
-
-extern int num_deleted;
-
-/* Stores the information from a single record of the bowtie map. A given read
- may have many of these.
-*/
-struct ReadHit
-{
- ReadHit() :
- _ref_id(0),
- _insert_id(0),
- _error_prob(1.0),
- _base_mass(1.0),
- _edit_dist(0xFFFFFFFF),
- _num_hits(1)
- {
- num_deleted++;
- }
-
- ReadHit(RefID ref_id,
- InsertID insert_id,
- int left,
- int read_len,
- bool antisense,
- CuffStrand source_strand,
- RefID partner_ref,
- int partner_pos,
- double error_prob,
- unsigned int edit_dist,
- int num_hits,
- float base_mass) :
- _ref_id(ref_id),
- _insert_id(insert_id),
- _left(left),
- _partner_ref_id(partner_ref),
- _partner_pos(partner_pos),
- _cigar(vector<CigarOp>(1,CigarOp(MATCH,read_len))),
- _source_strand(source_strand),
- _antisense_aln(antisense),
- _error_prob(error_prob),
- _base_mass(base_mass),
- _edit_dist(edit_dist),
- _num_hits(num_hits)
- {
- assert(_cigar.capacity() == _cigar.size());
- _right = get_right();
- num_deleted++;
- }
-
- ReadHit(RefID ref_id,
- InsertID insert_id,
- int left,
- const vector<CigarOp>& cigar,
- bool antisense_aln,
- CuffStrand source_strand,
- RefID partner_ref,
- int partner_pos,
- double error_prob,
- unsigned int edit_dist,
- int num_hits,
- float base_mass) :
- _ref_id(ref_id),
- _insert_id(insert_id),
- _left(left),
- _partner_ref_id(partner_ref),
- _partner_pos(partner_pos),
- _cigar(cigar),
- _source_strand(source_strand),
- _antisense_aln(antisense_aln),
- _error_prob(error_prob),
- _base_mass(base_mass),
- _edit_dist(edit_dist),
- _num_hits(num_hits)
- {
- assert(_cigar.capacity() == _cigar.size());
- _right = get_right();
- num_deleted++;
- }
-
- ReadHit(const ReadHit& other)
- {
- _ref_id = other._ref_id;
- _insert_id = other._insert_id;
- _left = other._left;
- _partner_ref_id = other._partner_ref_id;
- _partner_pos = other._partner_pos;
- _cigar = other._cigar;
- _source_strand = other._source_strand;
- _antisense_aln = other._antisense_aln;
- _error_prob = other._error_prob;
- _num_hits = other._num_hits;
- _base_mass = other._base_mass;
- _edit_dist = other._edit_dist;
- _right = get_right();
- num_deleted++;
- }
-
- ~ReadHit()
- {
- --num_deleted;
- }
-
- int read_len() const
- {
- int len = 0;
- for (size_t i = 0; i < _cigar.size(); ++i)
- {
- const CigarOp& op = _cigar[i];
- switch(op.opcode)
- {
- case MATCH:
- case INS:
- case SOFT_CLIP:
- len += op.length;
- break;
- default:
- break;
- }
- }
-
- return len;
- }
-
- bool contains_splice() const
- {
- for (size_t i = 0; i < _cigar.size(); ++i)
- {
- if (_cigar[i].opcode == REF_SKIP)
- return true;
- }
- return false;
- }
-
- bool is_singleton() const
- {
- return (partner_ref_id() == 0 ||
- partner_ref_id() != ref_id() ||
- abs(partner_pos() - left()) > max_partner_dist);
- }
-
- bool operator==(const ReadHit& rhs) const
- {
- return (_insert_id == rhs._insert_id &&
- _ref_id == rhs._ref_id &&
- _antisense_aln == rhs._antisense_aln &&
- _left == rhs._left &&
- _source_strand == rhs._source_strand &&
- /* DO NOT USE ACCEPTED IN COMPARISON */
- _cigar == rhs._cigar);
- }
-
- RefID ref_id() const { return _ref_id; }
- InsertID insert_id() const { return _insert_id; }
-
- RefID partner_ref_id() const { return _partner_ref_id; }
- int partner_pos() const { return _partner_pos; }
-
- int left() const { return _left; }
- int right() const { return _right; }
- CuffStrand source_strand() const { return _source_strand; }
- bool antisense_align() const { return _antisense_aln; }
-
- double error_prob() const { return _error_prob; }
-
- // Number of multi-hits for this read
- int num_hits() const { return _num_hits; }
-
- // We are ignoring the _base_mass and re-calculating based on multi-hits
- double mass() const
- {
- if (is_singleton())
- return 1.0/_num_hits;
- return 0.5 / _num_hits;
- }
-
- // For convenience, if you just want a copy of the gap intervals
- // for this hit.
- void gaps(vector<pair<int,int> >& gaps_out) const
- {
- gaps_out.clear();
- int pos = _left;
- for (size_t i = 0; i < _cigar.size(); ++i)
- {
- const CigarOp& op = _cigar[i];
-
- switch(op.opcode)
- {
- case REF_SKIP:
- gaps_out.push_back(make_pair(pos, pos + op.length - 1));
- pos += op.length;
- break;
- case SOFT_CLIP:
- pos += op.length;
- break;
- case HARD_CLIP:
- break;
- case MATCH:
- pos += op.length;
- break;
- case INS:
- pos -= op.length;
- break;
- case DEL:
- pos += op.length;
- break;
- default:
- break;
- }
- }
- }
-
- const vector<CigarOp>& cigar() const { return _cigar; }
-
- bool contiguous() const
- {
- return _cigar.size() == 1 && _cigar[0].opcode == MATCH;
- }
-
- unsigned int edit_dist() const { return _edit_dist; }
-
- //const string& hitfile_rec() const { return _hitfile_rec; }
- //void hitfile_rec(const string& rec) { _hitfile_rec = rec; }
-
-private:
-
- int get_right() const
- {
- int r = _left;
- for (size_t i = 0; i < _cigar.size(); ++i)
- {
- const CigarOp& op = _cigar[i];
-
- switch(op.opcode)
- {
- case MATCH:
- case REF_SKIP:
- case SOFT_CLIP:
- case DEL:
- r += op.length;
- break;
- case INS:
- case HARD_CLIP:
- default:
- break;
- }
- }
- return r;
- }
-
- RefID _ref_id;
- InsertID _insert_id; // Id of the sequencing insert
- int _left; // Position in the reference of the left side of the alignment
- int _right;
-
- RefID _partner_ref_id; // Reference contig on which we expect the mate
- int _partner_pos; // Position at which we expect the mate of this hit
-
-
- vector<CigarOp> _cigar;
-
- CuffStrand _source_strand; // Which strand the read really came from, if known
- bool _antisense_aln; // Whether the alignment is to the reverse strand
- float _error_prob; // Probability that this alignment is incorrect
- float _base_mass;
- unsigned int _edit_dist; // Number of mismatches
- int _num_hits; // Number of multi-hits (1 by default)
- //string _hitfile_rec; // Points to the buffer for the record from which this hit came
-};
-
-class ReadTable
-{
-public:
-
- ReadTable() {}
-
- // This function should NEVER return zero
- InsertID get_id(const string& name)
- {
- uint64_t _id = hash_string(name.c_str());
- assert(_id);
- return _id;
- }
-
-private:
-
- // This is FNV-1, see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
- inline uint64_t hash_string(const char* __s)
- {
- uint64_t hash = 0xcbf29ce484222325ull;
- for ( ; *__s; ++__s)
- {
- hash *= 1099511628211ull;
- hash ^= *__s;
- }
- return hash;
- }
-};
-
-class RefSequenceTable
-{
-public:
-
- typedef std::string Sequence;
-
- struct SequenceInfo
- {
- SequenceInfo(uint32_t _order,
- char* _name,
- Sequence* _seq) :
- observation_order(_order),
- name(_name),
- seq(_seq) {}
- uint32_t observation_order;
- char* name;
- Sequence* seq;
- };
-
- typedef map<string, RefID> IDTable;
- typedef map<RefID, SequenceInfo> InvertedIDTable;
- typedef InvertedIDTable::iterator iterator;
- typedef InvertedIDTable::const_iterator const_iterator;
-
- RefSequenceTable(bool keep_names, bool keep_seqs = false) :
- _next_id(1),
- _keep_names(keep_names) {}
-
- ~RefSequenceTable()
- {
- for (InvertedIDTable::iterator itr = _by_id.begin();
- itr != _by_id.end();
- ++itr)
- {
- free(itr->second.name);
- }
- }
-
- RefID get_id(const string& name,
- Sequence* seq)
- {
- if (name.empty())
- return 0;
-#if ENABLE_THREADS
- table_lock.lock();
-#endif
- uint64_t _id = hash_string(name.c_str());
- pair<InvertedIDTable::iterator, bool> ret =
- _by_id.insert(make_pair(_id, SequenceInfo(_next_id, NULL, NULL)));
- if (ret.second == true)
- {
- char* _name = NULL;
- if (_keep_names)
- _name = strdup(name.c_str());
- ret.first->second.name = _name;
- ret.first->second.seq = seq;
- ++_next_id;
- }
- assert (_id);
-#if ENABLE_THREADS
- table_lock.unlock();
-#endif
- return _id;
- }
-
- // You must call invert() before using this function
- const char* get_name(RefID ID) const
- {
- InvertedIDTable::const_iterator itr = _by_id.find(ID);
- if (itr != _by_id.end())
- {
- //const SequenceInfo& info = itr->second;
- return itr->second.name;
- }
- else
- {
- return NULL;
- }
- }
-
- Sequence* get_seq(RefID ID) const
- {
- InvertedIDTable::const_iterator itr = _by_id.find(ID);
- if (itr != _by_id.end())
- return itr->second.seq;
- else
- return NULL;
- }
-
- const SequenceInfo* get_info(RefID ID) const
- {
-
- InvertedIDTable::const_iterator itr = _by_id.find(ID);
- if (itr != _by_id.end())
- {
- return &(itr->second);
- }
- else
- return NULL;
- }
-
- int observation_order(RefID ID) const
- {
- InvertedIDTable::const_iterator itr = _by_id.find(ID);
- if (itr != _by_id.end())
- {
- return itr->second.observation_order;
- }
- else
- return -1;
- }
-
- void order_recs_lexicographically()
- {
- map<string, RefID> str_to_id;
-
- for (InvertedIDTable::iterator i = _by_id.begin(); i != _by_id.end(); ++i)
- {
- str_to_id[i->second.name] = i->first;
- //fprintf(stderr, "%d: %s\n", i->second.observation_order, i->second.name);
- }
-
- size_t new_order = 1;
- for (map<string, RefID>::iterator i = str_to_id.begin(); i != str_to_id.end(); ++i, ++new_order)
- {
- _by_id.find(get_id(i->first, NULL))->second.observation_order = new_order;
- verbose_msg( "%lu: %s\n", new_order, i->first.c_str());
- }
- }
-
- void print_rec_ordering()
- {
- for (InvertedIDTable::iterator i = _by_id.begin(); i != _by_id.end(); ++i)
- {
- verbose_msg( "%lu: %s\n", i->second.observation_order, i->second.name);
- }
- }
-
- iterator begin() { return _by_id.begin(); }
- iterator end() { return _by_id.end(); }
-
- const_iterator begin() const { return _by_id.begin(); }
- const_iterator end() const { return _by_id.end(); }
-
- size_t size() const { return _by_id.size(); }
-
- void clear()
- {
- //_by_name.clear();
- _by_id.clear();
- }
-
-private:
-
- // This is FNV-1, see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
- inline uint64_t hash_string(const char* __s)
- {
- uint64_t hash = 0xcbf29ce484222325ull;
- for ( ; *__s; ++__s)
- {
- hash *= 1099511628211ull;
- hash ^= *__s;
- }
- return hash;
- }
-// inline uint32_t hash_string(const char* __s)
-// {
-// uint32_t hash = 0x811c9dc5;
-// for ( ; *__s; ++__s)
-// {
-// hash *= 16777619;
-// hash ^= *__s;
-// }
-// return hash;
-// }
-
- //IDTable _by_name;
- RefID _next_id;
- bool _keep_names;
- InvertedIDTable _by_id;
-#if ENABLE_THREADS
- static boost::mutex table_lock;
-#endif
-};
-
-
-bool hit_insert_id_lt(const ReadHit& h1, const ReadHit& h2);
-
-/******************************************************************************
- The HitFactory abstract class is responsible for returning a single ReadHit
- from an alignment file. The only class that actually implements this interface
- right now in Cufflinks is SAMHitFactory
-*******************************************************************************/
-class HitFactory
-{
-public:
-
- HitFactory(ReadTable& insert_table,
- RefSequenceTable& reference_table) :
- _insert_table(insert_table),
- _ref_table(reference_table),
- _num_seq_header_recs(0) {}
-
- HitFactory& operator=(const HitFactory& rhs)
- {
- if (this != &rhs)
- {
- //_hit_file = rhs._hit_file;
- _insert_table = rhs._insert_table;
- _ref_table = rhs._ref_table;
- }
- return *this;
- }
- virtual ~HitFactory() {}
-
- ReadHit create_hit(const string& insert_name,
- const string& ref_name,
- int left,
- const vector<CigarOp>& cigar,
- bool antisense_aln,
- CuffStrand source_strand,
- const string& partner_ref,
- int partner_pos,
- double error_prob,
- unsigned int edit_dist,
- int num_hits,
- float base_mass);
-
- ReadHit create_hit(const string& insert_name,
- const string& ref_name,
- uint32_t left,
- uint32_t read_len,
- bool antisense_aln,
- CuffStrand source_strand,
- const string& partner_ref,
- int partner_pos,
- double error_prob,
- unsigned int edit_dist,
- int num_hits,
- float base_mass);
-
- virtual void reset() = 0;
-
- virtual void undo_hit() = 0;
-
- // next_record() should always set _curr_pos before reading the
- // next_record so undo_hit() will work properly.
- virtual bool next_record(const char*& buf, size_t& buf_size) = 0;
-
- virtual bool records_remain() const = 0;
-
- virtual bool get_hit_from_buf(const char* bwt_buf,
- ReadHit& bh,
- bool strip_slash,
- char* name_out = NULL,
- char* name_tags = NULL) = 0;
-
- RefSequenceTable& ref_table() { return _ref_table; }
-
- //FILE* hit_file() { return _hit_file; }
-
- virtual bool inspect_header() = 0;
-
- const ReadGroupProperties& read_group_properties()
- {
- return _rg_props;
- }
-
-protected:
-
- bool parse_header_string(const string& header_rec,
- ReadGroupProperties& rg_props);
-
- void finalize_rg_props();
-
- // TODO: We want to keep a collection of these, indexed by RG ID. See #180
- ReadGroupProperties _rg_props;
-
-private:
-
-
- ReadTable& _insert_table;
- RefSequenceTable& _ref_table;
- uint32_t _num_seq_header_recs;
-
-
-};
-
-/******************************************************************************
- SAMHitFactory turns SAM alignments into ReadHits
-*******************************************************************************/
-class SAMHitFactory : public HitFactory
-{
-public:
- SAMHitFactory(const string& hit_file_name,
- ReadTable& insert_table,
- RefSequenceTable& reference_table) :
- HitFactory(insert_table, reference_table),
- _line_num(0),
- _curr_pos(0)
- {
- _hit_file = fopen(hit_file_name.c_str(), "r");
- if (_hit_file == NULL)
- {
- throw std::runtime_error("Error: could not open file for reading");
- }
-
- if (inspect_header() == false)
- {
- throw std::runtime_error("Error: could not parse SAM header");
- }
-
- // Override header-inferred read group properities with whatever
- // the user supplied.
- if (global_read_properties != NULL)
- {
- _rg_props = *global_read_properties;
- }
- }
-
- ~SAMHitFactory()
- {
- if (_hit_file)
- {
- fclose(_hit_file);
- }
- }
-
- virtual void undo_hit()
- {
- fseeko(_hit_file, _curr_pos, SEEK_SET);
- --_line_num;
- }
-
- void reset() { rewind(_hit_file); }
-
- void mark_curr_pos() { _curr_pos = ftell(_hit_file); }
- bool records_remain() const { return !feof(_hit_file); }
-
- bool next_record(const char*& buf, size_t& buf_size);
-
- bool get_hit_from_buf(const char* bwt_buf,
- ReadHit& bh,
- bool strip_slash,
- char* name_out = NULL,
- char* name_tags = NULL);
-
- bool inspect_header();
-
-private:
- static const size_t _hit_buf_max_sz = 10 * 1024;
- char _hit_buf[_hit_buf_max_sz];
- int _line_num;
-
- FILE* _hit_file;
- off_t _curr_pos;
-};
-
-/******************************************************************************
- BAMHitFactory turns SAM alignments into ReadHits
- *******************************************************************************/
-class BAMHitFactory : public HitFactory
-{
-public:
- BAMHitFactory(const string& hit_file_name,
- ReadTable& insert_table,
- RefSequenceTable& reference_table) :
- HitFactory(insert_table, reference_table)
- {
- _hit_file = samopen(hit_file_name.c_str(), "rb", 0);
-
- memset(&_next_hit, 0, sizeof(_next_hit));
-
- if (_hit_file == NULL || _hit_file->header == NULL)
- {
- throw std::runtime_error("Fail to open BAM file");
- }
-
- _beginning = bgzf_tell(_hit_file->x.bam);
- _eof_encountered = false;
-
- if (inspect_header() == false)
- {
- throw std::runtime_error("Error: could not parse BAM header");
- }
-
- // Override header-inferred read group properities with whatever
- // the user supplied.
- if (global_read_properties != NULL)
- {
- _rg_props = *global_read_properties;
- }
- }
-
- ~BAMHitFactory()
- {
- if (_hit_file)
- {
- samclose(_hit_file);
- }
- }
-
- void mark_curr_pos()
- {
- _curr_pos = bgzf_tell(_hit_file->x.bam);
- }
-
-
- void undo_hit()
- {
- bgzf_seek(_hit_file->x.bam, _curr_pos, SEEK_SET);
- //--_line_num;
- }
-
- bool records_remain() const
- {
- return !_eof_encountered;
- }
-
- void reset()
- {
- if (_hit_file && _hit_file->x.bam)
- {
- bgzf_seek(_hit_file->x.bam, _beginning, SEEK_SET);
- _eof_encountered = false;
- }
- }
-
-
- bool next_record(const char*& buf, size_t& buf_size);
-
- bool get_hit_from_buf(const char* bwt_buf,
- ReadHit& bh,
- bool strip_slash,
- char* name_out = NULL,
- char* name_tags = NULL);
-
- bool inspect_header();
-
-private:
- samfile_t* _hit_file;
- int64_t _curr_pos;
- int64_t _beginning;
-
- bam1_t _next_hit;
- bool _eof_encountered;
-};
-
-// Forward declaration of BundleFactory, because MateHit will need a pointer
-// back to the Factory that created. Ultimately, we should replace this
-// with a pointer back to the ReadGroupProperty object corresponding to each
-// MateHit. That, however, requires that we link fragment length distributions
-// and bias models, etc, with each read group, and that's more than we can
-// afford to implement right now.
-
-/*******************************************************************************
- MateHit is a class that encapsulates a paired-end alignment as a single object.
- MateHits can be "open" when one hit has been read from a stream of individual
- read alignments, but the other hasn't. A "closed" MateHit is one where either
- both read alignments have been installed in the MateHit, or one read hit has,
- but the other will never come (i.e. singletons)
-*******************************************************************************/
-class MateHit
-{
-public:
- MateHit() :
- _refid(0),
- _left_alignment(NULL),
- _right_alignment(NULL),
- _collapse_mass(0.0),
- _is_mapped(false){}
-
- MateHit(shared_ptr<ReadGroupProperties const> rg_props,
- RefID refid,
- const ReadHit* left_alignment,
- const ReadHit* right_alignment) :
- _rg_props(rg_props),
- _refid(refid),
- _left_alignment(left_alignment),
- _right_alignment(right_alignment),
- _collapse_mass(0.0),
- _is_mapped(false)
- {
- //_expected_inner_dist = min(genomic_inner_dist(), _expected_inner_dist);
- }
- ~MateHit()
- {
- //fprintf(stderr, "Killing hit %lx\n",this);
- }
-
- //bool closed() {return _closed;}
-
- shared_ptr<ReadGroupProperties const> read_group_props() const { return _rg_props; }
-
- const ReadHit* left_alignment() const {return _left_alignment;}
- void left_alignment(const ReadHit* left_alignment)
- {
- _left_alignment = left_alignment;
- }
-
- const ReadHit* right_alignment() const {return _right_alignment;}
- void right_alignment(const ReadHit* right_alignment)
- {
- _right_alignment = right_alignment;
- }
-
- bool is_mapped() const {return _is_mapped;}
- void is_mapped(bool mapped)
- {
- _is_mapped = mapped;
- }
-
- int num_hits() const
- {
- assert(_left_alignment);
- return _left_alignment->num_hits();
- }
-
- bool is_multi() const
- {
- return num_hits() > 1;
- }
-
- bool is_pair() const
- {
- return (_left_alignment && _right_alignment);
- }
-
- int left() const
- {
- if (_right_alignment && _left_alignment)
- {
- return min(_right_alignment->left(),_left_alignment->left());
- }
- if (_left_alignment)
- return _left_alignment->left();
- else if (_right_alignment)
- return _right_alignment->left();
- return -1;
- }
-
- int right() const
- {
- if (_right_alignment && _left_alignment)
- {
- return max(_right_alignment->right(),_left_alignment->right());
- }
- if (_right_alignment)
- return _right_alignment->right();
- else if (_left_alignment)
- return _left_alignment->right();
- return -1;
- }
-
- CuffStrand strand() const
- {
- CuffStrand left_strand = CUFF_STRAND_UNKNOWN;
- CuffStrand right_strand = CUFF_STRAND_UNKNOWN;
- if (_left_alignment)
- {
- left_strand = _left_alignment->source_strand();
- }
- if (_right_alignment)
- {
- right_strand = _right_alignment->source_strand();
- //assert ( s != CUFF_STRAND_UNKNOWN ? s == r : true);
- }
- assert (left_strand == right_strand ||
- left_strand == CUFF_STRAND_UNKNOWN ||
- right_strand == CUFF_STRAND_UNKNOWN);
-
- return max(left_strand, right_strand);
- }
-
-
- bool contains_splice() const
- {
- if (_right_alignment)
- return (_left_alignment->contains_splice() || _right_alignment->contains_splice());
- return (_left_alignment->contains_splice());
- }
-
- InsertID insert_id() const
- {
- if (_left_alignment) return _left_alignment->insert_id();
- if (_right_alignment) return _right_alignment->insert_id();
- return 0;
- }
-
- RefID ref_id() const { return _refid; }
-
- int genomic_inner_dist() const
- {
- if (_left_alignment && _right_alignment)
- {
- return _right_alignment->left() - _left_alignment->right();
- }
- else
- {
- return -1;
- }
- return -1;
- }
-
- pair<int,int> genomic_outer_span() const
- {
- if (_left_alignment && _right_alignment)
- {
- return make_pair(left(),
- right() - 1);
- }
-
- return make_pair(-1,-1);
- }
-
- pair<int,int> genomic_inner_span() const
- {
- if (_left_alignment && _right_alignment)
- {
- return make_pair(_left_alignment->right(),
- _right_alignment->left() - 1);
- }
-
- return make_pair(-1,-1);
- }
-
- // MRT is incorrect and not added to rg_props until after inspect_map
- // We are ignoring the mass reported by the ReadHits and re-calculating based on multi-hits
- double mass() const
- {
- double base_mass = 1.0;
-
- if (is_multi())
- {
- shared_ptr<MultiReadTable> mrt = _rg_props->multi_read_table();
- if (mrt)
- return mrt->get_mass(*this);
- else
- return base_mass/num_hits();
- }
- return base_mass;
- }
-
- double common_scale_mass() const
- {
- double m = mass();
- m *= _rg_props->mass_scale_factor();
-
- return m;
- }
-
- unsigned int edit_dist() const
- {
- unsigned int edits = 0;
- if (_left_alignment)
- edits += _left_alignment->edit_dist();
- if (_right_alignment)
- edits += _right_alignment->edit_dist();
- return edits;
- }
-
- double collapse_mass() const { return _collapse_mass; }
- void collapse_mass(double m) { _collapse_mass = m; }
- void incr_collapse_mass(double incr) { _collapse_mass += incr; }
-
-private:
-
- shared_ptr<ReadGroupProperties const> _rg_props;
- RefID _refid;
- const ReadHit* _left_alignment;
- const ReadHit* _right_alignment;
- double _collapse_mass;
- bool _is_mapped;
- //bool _closed;
-};
-
-bool mate_hit_lt(const MateHit& lhs, const MateHit& rhs);
-
-bool hits_eq_mod_id(const ReadHit& lhs, const ReadHit& rhs);
-
-bool hits_eq_non_multi(const MateHit& lhs, const MateHit& rhs);
-
-bool hits_equals(const MateHit& lhs, const MateHit& rhs);
-
-bool has_no_collapse_mass(const MateHit& hit);
-
-// Assumes hits are sorted by mate_hit_lt
-void collapse_hits(const vector<MateHit>& hits,
- vector<MateHit>& non_redundant);
-
-
-
-#endif
--
Transcript assembly, differential expression, and differential regulation for RNA-Seq.
More information about the debian-med-commit
mailing list