src/biu/RNAStructure.cc
Go to the documentation of this file.00001 #include <biu/RNAStructure.hh>
00002 #include <stack>
00003 #include <biu/assertbiu.hh>
00004 #include <limits>
00005
00006 namespace biu
00007 {
00008 const size_t RNAStructure::MIN_LOOP_LENGTH = 3;
00009 const size_t RNAStructure::INVALID_INDEX =
00010 std::numeric_limits<size_t>::max();
00011 const Alphabet RNAStructure::STRUCT_ALPH("().", 1);
00012 const Alphabet::AlphElem RNAStructure::STRUCT_BND_OPEN = RNAStructure::STRUCT_ALPH.getElement("(");
00013 const Alphabet::AlphElem RNAStructure::STRUCT_BND_CLOSE = RNAStructure::STRUCT_ALPH.getElement(")");
00014 const Alphabet::AlphElem RNAStructure::STRUCT_UNBOUND = RNAStructure::STRUCT_ALPH.getElement(".");
00015
00017
00019
00020 RNAStructure::RNAStructure( const std::string& rnaSeqStr,
00021 const std::string& rnaStructBracketDotStr,
00022 const AllowedBasePairs* const _bPair)
00023 : rnaSeq(NULL),
00024 seqShared(false),
00025 rnaStructBracketDot(NULL),
00026 rnaBonds(),
00027 bPair(_bPair)
00028 {
00029 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00030 assertbiu(bPair->getAlphabet()->isAlphabetString(rnaSeqStr),
00031 "RNA sequence contains characters which are not in the alphabet.");
00032 assertbiu(STRUCT_ALPH.isAlphabetString(rnaStructBracketDotStr),
00033 "RNA structure contains characters which are not in the alphabet.");
00034
00035
00036 rnaSeq = new Sequence(bPair->getAlphabet()->getSequence(rnaSeqStr));
00037 rnaStructBracketDot = new Structure(
00038 STRUCT_ALPH.getSequence(rnaStructBracketDotStr));
00039
00040 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00041 "RNA sequence and structure have to have the same length.");
00042
00043
00044 initBonds();
00045 }
00046
00047 RNAStructure::RNAStructure( Sequence* rnaSeq_,
00048 const Structure* const rnaStructBracketDot_,
00049 const AllowedBasePairs* const bPair_,
00050 const bool seqIsShared)
00051 : rnaSeq(NULL),
00052 seqShared(seqIsShared),
00053 rnaStructBracketDot(NULL),
00054 bPair(bPair_)
00055 {
00056 assertbiu(rnaSeq_ != NULL, "no RNA sequence given.");
00057 assertbiu(rnaStructBracketDot_ != NULL, "no RNA structure given.");
00058 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00059 assertbiu(bPair->getAlphabet()->isAlphabetSequence(*rnaSeq_),
00060 "RNA sequence contains characters which are not in the alphabet.");
00061 assertbiu(STRUCT_ALPH.isAlphabetSequence(*rnaStructBracketDot_),
00062 "RNA structure contains characters which are not in the alphabet.");
00063 assertbiu(rnaSeq_->size() == rnaStructBracketDot_->size(),
00064 "RNA sequence and structure have to have the same length.");
00065
00066
00067
00068 if (seqShared) {
00069 rnaSeq = rnaSeq_;
00070 } else {
00071 rnaSeq = new Sequence(*rnaSeq_);
00072 }
00073
00074 rnaStructBracketDot = new Structure(*rnaStructBracketDot_);
00075
00076
00077 initBonds();
00078 }
00079
00080 RNAStructure::RNAStructure(const RNAStructure& rnaStruct)
00081 : rnaSeq(rnaStruct.rnaSeq),
00082 seqShared(rnaStruct.seqShared),
00083 rnaStructBracketDot(new Structure(*(rnaStruct.rnaStructBracketDot))),
00084 rnaBonds(rnaStruct.rnaBonds), bPair(rnaStruct.bPair)
00085 {
00086 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00087 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00088 "RNA sequence and structure have to have the same length.");
00089
00090 if (!seqShared) {
00091 rnaSeq = new Sequence(*(rnaStruct.rnaSeq));
00092 }
00093 }
00094
00095 RNAStructure::~RNAStructure() {
00096 if(!seqShared && rnaSeq != NULL) {
00097 delete rnaSeq; rnaSeq = NULL;
00098 }
00099 if (rnaStructBracketDot != NULL) {
00100 delete rnaStructBracketDot; rnaStructBracketDot = NULL;
00101 }
00102 }
00103
00104 bool
00105 RNAStructure::isAllowedBasePair( size_t first,
00106 size_t second) const
00107 {
00108 if (first == RNAStructure::INVALID_INDEX || second == RNAStructure::INVALID_INDEX)
00109 return false;
00110 assertbiu(first < rnaSeq->size() && second < rnaSeq->size(),
00111 "First or second is not a base position in the RNA sequence.");
00112 return bPair->allowedBasePair((*rnaSeq)[first], (*rnaSeq)[second]);
00113 }
00114
00115 RNAStructure&
00116 RNAStructure::operator= (const RNAStructure& rnaStruct2) {
00117 assertbiu( seqShared == rnaStruct2.seqShared,
00118 "sequence of both has to be shared or not");
00119 if (this != &rnaStruct2) {
00120
00121 if (seqShared) {
00122 rnaSeq = rnaStruct2.rnaSeq;
00123 } else {
00124 rnaSeq->resize(rnaStruct2.rnaSeq->size());
00125 std::copy( rnaStruct2.rnaSeq->begin(),
00126 rnaStruct2.rnaSeq->end(),
00127 rnaSeq->begin());
00128 }
00129
00130 rnaStructBracketDot->resize(rnaStruct2.rnaStructBracketDot->size());
00131 std::copy( rnaStruct2.rnaStructBracketDot->begin(),
00132 rnaStruct2.rnaStructBracketDot->end(),
00133 rnaStructBracketDot->begin());
00134
00135 bPair = rnaStruct2.bPair;
00136 rnaBonds = rnaStruct2.rnaBonds;
00137 }
00138 return *this;
00139 }
00140
00141 bool
00142 RNAStructure::operator== (const RNAStructure& rnaStruct2)
00143 const {
00144 return (rnaSeq == rnaStruct2.rnaSeq || *rnaSeq == *(rnaStruct2.rnaSeq))
00145 && *rnaStructBracketDot == *(rnaStruct2.rnaStructBracketDot)
00146 && rnaBonds == rnaStruct2.rnaBonds
00147 && *bPair == *(rnaStruct2.bPair);
00148 }
00149
00150 bool
00151 RNAStructure::operator!= (const RNAStructure& rnaStruct2)
00152 const {
00153 return (rnaSeq != rnaStruct2.rnaSeq && *rnaSeq != *(rnaStruct2.rnaSeq))
00154 || *rnaStructBracketDot != *(rnaStruct2.rnaStructBracketDot)
00155 || rnaBonds != rnaStruct2.rnaBonds
00156 || *bPair != *(rnaStruct2.bPair);
00157 }
00158
00159 bool
00160 RNAStructure::hasValidStructure() const {
00161 int count = 0;
00162
00163 for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00164 if ((*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00165 count++;
00166 }
00167 else if ((*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE) {
00168 count--;
00169 }
00170 if (count < 0) {
00171 return false;
00172 }
00173 }
00174 return (count == 0);
00175 }
00176
00177 bool
00178 RNAStructure::hasValidBasePairs() const{
00179 for (size_t i=0; i < rnaBonds.size(); i++) {
00180
00181 if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00182 && !isAllowedBasePair(i, rnaBonds[i])) {
00183 return false;
00184 }
00185 }
00186 return true;
00187 }
00188
00189 bool
00190 RNAStructure::hasMinLoopLength() const {
00191 for (size_t i=0; i < rnaBonds.size(); i++) {
00192
00193 if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00194 && (rnaBonds[i]-i) <= RNAStructure::MIN_LOOP_LENGTH) {
00195 return false;
00196 }
00197 }
00198 return true;
00199 }
00200
00201
00202 bool
00203 RNAStructure::isValid() const{
00204 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00205 "RNA sequence and structure have to have the same length.");
00206 return ( hasValidStructure() && hasMinLoopLength()
00207 && hasValidBasePairs() );
00208 }
00209
00210 std::string
00211 RNAStructure::getStringRepresentation() const {
00212 return ( getStructureString() + "(" + getSequenceString() + ")" );
00213 }
00214
00215 void
00216 RNAStructure::initBonds() {
00217
00218 rnaBonds = std::vector<size_t>
00219 (rnaStructBracketDot->size(), RNAStructure::INVALID_INDEX);
00220
00221
00222 if (!hasValidStructure()) {
00223 return;
00224 }
00225
00226
00227 std::stack<size_t> openingBonds;
00228
00229
00230 for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00231 if ( (*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00232 openingBonds.push(i);
00233 }
00234 else if ( (*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE ) {
00235 assertbiu(!openingBonds.empty(),
00236 "Closing without opening bond.");
00237
00238 rnaBonds[openingBonds.top()] = i;
00239 openingBonds.pop();
00240 }
00241 }
00242 assertbiu(openingBonds.empty(), "Opening without closing bond.");
00243 }
00244
00245 }