LocARNA-1.9.2
|
00001 #ifndef LOCARNA_ARC_MATCHES_HH 00002 #define LOCARNA_ARC_MATCHES_HH 00003 00004 #ifdef HAVE_CONFIG_H 00005 #include <config.h> 00006 #endif 00007 00008 #include <algorithm> 00009 #include <vector> 00010 00011 #include "scoring_fwd.hh" 00012 #include "aux.hh" 00013 #include "matrix.hh" 00014 #include "basepairs.hh" 00015 00016 #include <assert.h> 00017 00018 namespace LocARNA { 00019 class Scoring; 00020 class Sequence; 00021 class RnaData; 00022 class AnchorConstraints; 00023 class TraceController; 00024 class MatchController; 00025 00034 class ArcMatch { 00035 public: 00036 typedef std::vector<int>::size_type size_type; 00037 typedef size_type idx_type; 00038 typedef BasePairs__Arc Arc; 00039 private: 00040 const Arc *arcA_; 00041 const Arc *arcB_; 00042 idx_type idx_; 00043 00044 public: 00052 ArcMatch(const Arc *arcA, const Arc *arcB, idx_type idx) 00053 : arcA_(arcA), arcB_(arcB), idx_(idx) {} 00054 00060 const Arc & 00061 arcA() const { 00062 return *arcA_; 00063 } 00064 00070 const Arc & 00071 arcB() const { 00072 return *arcB_; 00073 } 00074 00080 idx_type 00081 idx() const { 00082 return idx_; 00083 } 00084 }; 00085 00087 typedef std::vector<ArcMatch> ArcMatchVec; 00088 00090 typedef std::vector<ArcMatch::idx_type> ArcMatchIdxVec; 00091 00115 class ArcMatches { 00116 public: 00117 typedef std::vector<int>::size_type size_type; 00118 typedef BasePairs__Arc Arc; 00119 protected: 00120 size_type lenA; 00121 size_type lenB; 00122 00123 BasePairs *bpsA; 00124 BasePairs *bpsB; 00125 00126 /* Constraints and Heuristics */ 00127 00128 size_type max_length_diff; 00129 00130 size_type max_diff_at_am; 00131 00132 const MatchController &match_controller; 00133 00134 00135 const AnchorConstraints &constraints; 00136 00149 bool 00150 is_valid_arcmatch(const Arc &arcA, const Arc &arcB) const; 00151 00152 /* END constraints and heuristics */ 00153 00154 bool maintain_explicit_scores; 00155 00156 00157 00159 ArcMatchVec arc_matches_vec; 00160 00165 size_type number_of_arcmatches; 00166 00168 std::vector<score_t> scores; 00169 00172 Matrix<ArcMatchIdxVec> common_right_end_lists; 00173 00176 Matrix<ArcMatchIdxVec> common_left_end_lists; 00177 00179 ArcMatchIdxVec inner_arcmatch_idxs; 00180 00182 void 00183 init_inner_arc_matchs(); 00184 00187 class lex_greater_left_ends { 00188 const ArcMatches &arc_matches; 00189 00190 public: 00196 explicit lex_greater_left_ends(const ArcMatches &arc_matches_) 00197 : arc_matches(arc_matches_) {} 00198 00209 bool 00210 operator()(const ArcMatch::idx_type &i, 00211 const ArcMatch::idx_type &j) const { 00212 size_type ali = arc_matches.arcmatch(i).arcA().left(); 00213 size_type bli = arc_matches.arcmatch(i).arcB().left(); 00214 size_type alj = arc_matches.arcmatch(j).arcA().left(); 00215 size_type blj = arc_matches.arcmatch(j).arcB().left(); 00216 00217 return (ali > alj) || (ali == alj && bli > blj); 00218 } 00219 }; 00220 00225 class tuple5 { 00226 public: 00227 typedef std::vector<int>::size_type size_type; 00228 00229 size_type i; 00230 size_type j; 00231 size_type k; 00232 size_type l; 00233 score_t score; 00234 00235 00245 tuple5(size_type i_, 00246 size_type j_, 00247 size_type k_, 00248 size_type l_, 00249 score_t score_) 00250 : i(i_), j(j_), k(k_), l(l_), score(score_) {} 00251 }; 00252 00253 public: 00276 ArcMatches(const Sequence &seqA_, 00277 const Sequence &seqB_, 00278 const std::string &arcmatch_scores_file, 00279 int probability_scale, 00280 size_type max_length_diff, 00281 size_type max_diff_at_am, 00282 const MatchController &trace_controller, 00283 const AnchorConstraints &constraints); 00284 00306 ArcMatches(const RnaData &rnadataA, 00307 const RnaData &rnadataB, 00308 double min_prob, 00309 size_type max_length_diff, 00310 size_type max_diff_at_am, 00311 const MatchController &trace_controller, 00312 const AnchorConstraints &constraints); 00313 00315 ~ArcMatches(); 00316 00317 // for the mea probabilistic consistency transformation, support to read 00318 // and write the arcmatch scores 00319 // this allows in general to have user defined arc-match scores 00320 00333 void 00334 read_arcmatch_scores(const std::string &arcmatch_scores_file, 00335 int probability_scale); 00336 00342 void 00343 write_arcmatch_scores(const std::string &arcmatch_scores_file, 00344 const Scoring &scoring) const; 00345 00347 const BasePairs & 00348 get_base_pairsA() const { 00349 return *bpsA; 00350 } 00351 00353 const BasePairs & 00354 get_base_pairsB() const { 00355 return *bpsB; 00356 } 00357 00360 bool 00361 explicit_scores() const { 00362 return maintain_explicit_scores; 00363 } 00364 00370 void 00371 make_scores_explicit(const Scoring &scoring); 00372 00379 score_t 00380 get_score(const ArcMatch &am) const { 00381 assert(maintain_explicit_scores); 00382 return scores[am.idx()]; 00383 } 00384 00386 size_type 00387 num_arc_matches() const { 00388 return number_of_arcmatches; 00389 } 00390 00392 const ArcMatch & 00393 arcmatch(size_type idx) const { 00394 assert(idx < number_of_arcmatches); 00395 return arc_matches_vec[idx]; 00396 } 00397 00398 // ============================================================ 00399 // Iteration over arc matches 00400 // 00401 00403 const ArcMatchIdxVec & 00404 common_right_end_list(size_type i, size_type j) const { 00405 return common_right_end_lists(i, j); 00406 } 00407 00409 const ArcMatchIdxVec & 00410 common_left_end_list(size_type i, size_type j) const { 00411 return common_left_end_lists(i, j); 00412 } 00413 00414 // ============================================================ 00415 00437 void 00438 get_max_right_ends(size_type al, 00439 size_type bl, 00440 size_type *max_ar, 00441 size_type *max_br, 00442 bool no_lonely_pairs) const; 00443 00450 void 00451 get_min_right_ends(size_type al, 00452 size_type bl, 00453 size_type *min_ar, 00454 size_type *min_br) const; 00455 00456 // ------------------------------------------------------------ 00457 // inner arc matches 00458 00463 bool 00464 exists_inner_arc_match(const ArcMatch &am) const { 00465 return inner_arcmatch_idxs[am.idx()] < num_arc_matches(); 00466 } 00467 00473 const ArcMatch & 00474 inner_arc_match(const ArcMatch &am) const { 00475 return arcmatch(inner_arcmatch_idxs[am.idx()]); 00476 } 00477 00484 void 00485 sort_right_adjacency_lists(); 00486 00487 // ------------------------------------------------------------ 00488 // iteration (in no specific order) 00489 00491 typedef ArcMatchVec::const_iterator const_iterator; 00492 00494 const_iterator 00495 begin() const { 00496 return arc_matches_vec.begin(); 00497 } 00498 00500 const_iterator 00501 end() const { 00502 return arc_matches_vec.begin() + number_of_arcmatches; 00503 } 00504 }; 00505 00513 class ArcMatchesIndexed : public ArcMatches { 00514 public: 00537 ArcMatchesIndexed(const Sequence &seqA_, 00538 const Sequence &seqB_, 00539 const std::string &arcmatch_scores_file, 00540 int probability_scale, 00541 size_type max_length_diff, 00542 size_type max_diff_at_am, 00543 const MatchController &trace_controller, 00544 const AnchorConstraints &constraints) 00545 : ArcMatches(seqA_, 00546 seqB_, 00547 arcmatch_scores_file, 00548 probability_scale, 00549 max_length_diff, 00550 max_diff_at_am, 00551 trace_controller, 00552 constraints), 00553 am_index_() { 00554 build_arcmatch_index(); 00555 } 00556 00578 ArcMatchesIndexed(const RnaData &rnadataA, 00579 const RnaData &rnadataB, 00580 double min_prob, 00581 size_type max_length_diff, 00582 size_type max_diff_at_am, 00583 const MatchController &trace_controller, 00584 const AnchorConstraints &constraints) 00585 : ArcMatches(rnadataA, 00586 rnadataB, 00587 min_prob, 00588 max_length_diff, 00589 max_diff_at_am, 00590 trace_controller, 00591 constraints), 00592 am_index_() { 00593 build_arcmatch_index(); 00594 } 00595 00596 private: 00598 typedef std::pair<size_type, size_type> idx_pair_t; 00599 00601 typedef unordered_map<idx_pair_t, 00602 ArcMatch::idx_type, 00603 pair_of_size_t_hash>::type am_index_type; 00604 00606 am_index_type am_index_; 00607 00612 void 00613 build_arcmatch_index(); 00614 00615 public: 00624 const ArcMatch::idx_type 00625 invalid_am_index() const { 00626 // The invalid arc match index is implemented to be the 00627 // maximum valid index + 1. 00628 // This allows an efficient implementation, where we push 00629 // an invalid arc match to the end of vector arc_matches_vec. 00630 // Thus, we return size-1! 00631 00632 return number_of_arcmatches; 00633 } 00634 00643 const ArcMatch::idx_type 00644 am_index(const size_type &arcAIdx, const size_type &arcBIdx) const { 00645 am_index_type::const_iterator it = 00646 am_index_.find(idx_pair_t(arcAIdx, arcBIdx)); 00647 if (am_index_.end() != it) { 00648 return it->second; 00649 } else { 00650 return invalid_am_index(); 00651 } 00652 } 00653 00663 const ArcMatch & 00664 am_index(const Arc &arcA, const Arc &arcB) const { 00665 return arc_matches_vec[am_index(arcA.idx(), arcB.idx())]; 00666 } 00667 }; 00668 00669 } // end namespace LocARNA 00670 00671 #endif // LOCARNA_ARC_MATCHES_HH