src/biu/RNAStructure.cc
Go to the documentation of this file.00001 #include <biu/RNAStructure.hh>
00002 #include <stack>
00003 #include <biu/assertbiu.hh>
00004
00005 namespace biu
00006 {
00007 const size_t RNAStructure::MIN_LOOP_LENGTH = 3;
00008 const size_t RNAStructure::INVALID_INDEX =
00009 std::numeric_limits<size_t>::max();
00010 const Alphabet RNAStructure::STRUCT_ALPH("().", 1);
00011 const Alphabet::AlphElem RNAStructure::STRUCT_BND_OPEN = RNAStructure::STRUCT_ALPH.getElement("(");
00012 const Alphabet::AlphElem RNAStructure::STRUCT_BND_CLOSE = RNAStructure::STRUCT_ALPH.getElement(")");
00013 const Alphabet::AlphElem RNAStructure::STRUCT_UNBOUND = RNAStructure::STRUCT_ALPH.getElement(".");
00014
00016
00018
00019 RNAStructure::RNAStructure( const std::string& rnaSeqStr,
00020 const std::string& rnaStructBracketDotStr,
00021 const AllowedBasePairs* const _bPair)
00022 : rnaSeq(NULL),
00023 seqShared(false),
00024 rnaStructBracketDot(NULL),
00025 rnaBonds(),
00026 bPair(_bPair)
00027 {
00028 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00029 assertbiu(bPair->getAlphabet()->isAlphabetString(rnaSeqStr),
00030 "RNA sequence contains characters which are not in the alphabet.");
00031 assertbiu(STRUCT_ALPH.isAlphabetString(rnaStructBracketDotStr),
00032 "RNA structure contains characters which are not in the alphabet.");
00033
00034
00035 rnaSeq = new Sequence(bPair->getAlphabet()->getSequence(rnaSeqStr));
00036 rnaStructBracketDot = new Structure(
00037 STRUCT_ALPH.getSequence(rnaStructBracketDotStr));
00038
00039 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00040 "RNA sequence and structure have to have the same length.");
00041
00042
00043 initBonds();
00044 }
00045
00046 RNAStructure::RNAStructure( Sequence* rnaSeq_,
00047 const Structure* const rnaStructBracketDot_,
00048 const AllowedBasePairs* const bPair_,
00049 const bool seqIsShared)
00050 : rnaSeq(NULL),
00051 seqShared(seqIsShared),
00052 rnaStructBracketDot(NULL),
00053 bPair(bPair_)
00054 {
00055 assertbiu(rnaSeq_ != NULL, "no RNA sequence given.");
00056 assertbiu(rnaStructBracketDot_ != NULL, "no RNA structure given.");
00057 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00058 assertbiu(bPair->getAlphabet()->isAlphabetSequence(*rnaSeq_),
00059 "RNA sequence contains characters which are not in the alphabet.");
00060 assertbiu(STRUCT_ALPH.isAlphabetSequence(*rnaStructBracketDot_),
00061 "RNA structure contains characters which are not in the alphabet.");
00062 assertbiu(rnaSeq_->size() == rnaStructBracketDot_->size(),
00063 "RNA sequence and structure have to have the same length.");
00064
00065
00066
00067 if (seqShared) {
00068 rnaSeq = rnaSeq_;
00069 } else {
00070 rnaSeq = new Sequence(*rnaSeq_);
00071 }
00072
00073 rnaStructBracketDot = new Structure(*rnaStructBracketDot_);
00074
00075
00076 initBonds();
00077 }
00078
00079 RNAStructure::RNAStructure(const RNAStructure& rnaStruct)
00080 : rnaSeq(rnaStruct.rnaSeq),
00081 seqShared(rnaStruct.seqShared),
00082 rnaStructBracketDot(new Structure(*(rnaStruct.rnaStructBracketDot))),
00083 rnaBonds(rnaStruct.rnaBonds), bPair(rnaStruct.bPair)
00084 {
00085 assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00086 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00087 "RNA sequence and structure have to have the same length.");
00088
00089 if (!seqShared) {
00090 rnaSeq = new Sequence(*(rnaStruct.rnaSeq));
00091 }
00092 }
00093
00094 RNAStructure::~RNAStructure() {
00095 if(!seqShared && rnaSeq != NULL) {
00096 delete rnaSeq; rnaSeq = NULL;
00097 }
00098 if (rnaStructBracketDot != NULL) {
00099 delete rnaStructBracketDot; rnaStructBracketDot = NULL;
00100 }
00101 }
00102
00103 bool
00104 RNAStructure::isAllowedBasePair( size_t first,
00105 size_t second) const
00106 {
00107 if (first == RNAStructure::INVALID_INDEX || second == RNAStructure::INVALID_INDEX)
00108 return false;
00109 assertbiu(first < rnaSeq->size() && second < rnaSeq->size(),
00110 "First or second is not a base position in the RNA sequence.");
00111 return bPair->allowedBasePair((*rnaSeq)[first], (*rnaSeq)[second]);
00112 }
00113
00114 RNAStructure&
00115 RNAStructure::operator= (const RNAStructure& rnaStruct2) {
00116 assertbiu( seqShared == rnaStruct2.seqShared,
00117 "sequence of both has to be shared or not");
00118 if (this != &rnaStruct2) {
00119
00120 if (seqShared) {
00121 rnaSeq = rnaStruct2.rnaSeq;
00122 } else {
00123 rnaSeq->resize(rnaStruct2.rnaSeq->size());
00124 std::copy( rnaStruct2.rnaSeq->begin(),
00125 rnaStruct2.rnaSeq->end(),
00126 rnaSeq->begin());
00127 }
00128
00129 rnaStructBracketDot->resize(rnaStruct2.rnaStructBracketDot->size());
00130 std::copy( rnaStruct2.rnaStructBracketDot->begin(),
00131 rnaStruct2.rnaStructBracketDot->end(),
00132 rnaStructBracketDot->begin());
00133
00134 bPair = rnaStruct2.bPair;
00135 rnaBonds = rnaStruct2.rnaBonds;
00136 }
00137 return *this;
00138 }
00139
00140 bool
00141 RNAStructure::operator== (const RNAStructure& rnaStruct2)
00142 const {
00143 return (rnaSeq == rnaStruct2.rnaSeq || *rnaSeq == *(rnaStruct2.rnaSeq))
00144 && *rnaStructBracketDot == *(rnaStruct2.rnaStructBracketDot)
00145 && rnaBonds == rnaStruct2.rnaBonds
00146 && *bPair == *(rnaStruct2.bPair);
00147 }
00148
00149 bool
00150 RNAStructure::operator!= (const RNAStructure& rnaStruct2)
00151 const {
00152 return (rnaSeq != rnaStruct2.rnaSeq && *rnaSeq != *(rnaStruct2.rnaSeq))
00153 || *rnaStructBracketDot != *(rnaStruct2.rnaStructBracketDot)
00154 || rnaBonds != rnaStruct2.rnaBonds
00155 || *bPair != *(rnaStruct2.bPair);
00156 }
00157
00158 bool
00159 RNAStructure::hasValidStructure() const {
00160 int count = 0;
00161
00162 for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00163 if ((*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00164 count++;
00165 }
00166 else if ((*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE) {
00167 count--;
00168 }
00169 if (count < 0) {
00170 return false;
00171 }
00172 }
00173 return (count == 0);
00174 }
00175
00176 bool
00177 RNAStructure::hasValidBasePairs() const{
00178 for (size_t i=0; i < rnaBonds.size(); i++) {
00179
00180 if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00181 && !isAllowedBasePair(i, rnaBonds[i])) {
00182 return false;
00183 }
00184 }
00185 return true;
00186 }
00187
00188 bool
00189 RNAStructure::hasMinLoopLength() const {
00190 for (size_t i=0; i < rnaBonds.size(); i++) {
00191
00192 if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00193 && (rnaBonds[i]-i) <= RNAStructure::MIN_LOOP_LENGTH) {
00194 return false;
00195 }
00196 }
00197 return true;
00198 }
00199
00200
00201 bool
00202 RNAStructure::isValid() const{
00203 assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00204 "RNA sequence and structure have to have the same length.");
00205 return ( hasValidStructure() && hasMinLoopLength()
00206 && hasValidBasePairs() );
00207 }
00208
00209 std::string
00210 RNAStructure::getStringRepresentation() const {
00211 return ( getStructureString() + "(" + getSequenceString() + ")" );
00212 }
00213
00214 void
00215 RNAStructure::initBonds() {
00216
00217 rnaBonds = std::vector<size_t>
00218 (rnaStructBracketDot->size(), RNAStructure::INVALID_INDEX);
00219
00220
00221 if (!hasValidStructure()) {
00222 return;
00223 }
00224
00225
00226 std::stack<size_t> openingBonds;
00227
00228
00229 for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00230 if ( (*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00231 openingBonds.push(i);
00232 }
00233 else if ( (*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE ) {
00234 assertbiu(!openingBonds.empty(),
00235 "Closing without opening bond.");
00236
00237 rnaBonds[openingBonds.top()] = i;
00238 openingBonds.pop();
00239 }
00240 }
00241 assertbiu(openingBonds.empty(), "Opening without closing bond.");
00242 }
00243
00244 }