LocARNA-1.9.2
|
00001 #ifndef LOCARNA_SCORING_HH 00002 #define LOCARNA_SCORING_HH 00003 00004 #ifdef HAVE_CONFIG_H 00005 #include <config.h> 00006 #endif 00007 00008 #include <math.h> 00009 #include <vector> 00010 00011 #include "aux.hh" 00012 00013 #include "scoring_fwd.hh" 00014 #include "matrix.hh" 00015 00016 #ifndef NDEBUG 00017 #include "sequence.hh" 00018 #endif 00019 00020 #include "arc_matches.hh" 00021 00022 namespace LocARNA { 00023 00024 //#define MEA_SCORING_OLD 00025 00026 class RibosumFreq; 00027 class Ribofit; 00028 class Scoring; 00029 class Sequence; 00030 class BasePairs; 00031 class BasePairs__Arc; 00032 class ArcMatch; 00033 class MatchProbs; 00034 class RnaData; 00035 00037 typedef std::vector<infty_score_t> ScoreVector; 00038 00040 typedef std::vector<pf_score_t> PFScoreVector; 00041 00043 typedef Matrix<infty_score_t> ScoreMatrix; 00044 00046 typedef Matrix<pf_score_t> PFScoreMatrix; 00047 00049 typedef Matrix<double> ProbMatrix; 00050 00060 class ScoringParams { 00061 public: 00068 const score_t basematch; 00069 00071 const score_t basemismatch; 00072 00074 const score_t indel; 00075 00077 const score_t indel_loop; 00078 00080 const score_t indel_opening; 00081 00084 const score_t indel_opening_loop; 00085 00091 const RibosumFreq *ribosum; 00092 00099 const Ribofit *ribofit; 00100 00102 const score_t unpaired_penalty; 00103 00108 const score_t struct_weight; 00109 00114 const score_t tau_factor; 00115 00117 const score_t exclusion; 00118 00119 const double exp_probA; 00120 00121 const double exp_probB; 00122 00123 const double temperature_alipf; 00124 00126 const bool stacking; 00127 00129 const bool new_stacking; 00130 00132 const bool mea_scoring; 00133 00135 const score_t alpha_factor; 00136 00138 const score_t beta_factor; 00139 00141 const score_t gamma_factor; 00142 00150 const score_t probability_scale; 00151 00152 /* 00153 The mea score is composed in the following way: 00154 00155 params->probability_scale * 00156 ( 00157 sum_basematchs (i,j) 00158 P(i~j) 00159 + alpha_factor/100 * (P(i unstructured) + P(j unstructured)) 00160 + 00161 sum_arcmatchs (a,b) 00162 beta_factor/100 * (P(a)+P(b)) * P(al~bl) * P(ar~br) * 00163 ribosum_arcmatch(a,b) 00164 ) 00165 */ 00166 00167 public: 00194 ScoringParams(score_t basematch_, 00195 score_t basemismatch_, 00196 score_t indel_, 00197 score_t indel_loop_, 00198 score_t indel_opening_, 00199 score_t indel_opening_loop_, 00200 RibosumFreq *ribosum_, 00201 Ribofit *ribofit_, 00202 score_t unpaired_penalty_, 00203 score_t struct_weight_, 00204 score_t tau_factor_, 00205 score_t exclusion_, 00206 double exp_probA_, 00207 double exp_probB_, 00208 double temp_, 00209 bool stacking_, 00210 bool new_stacking_, 00211 bool mea_scoring_, 00212 score_t alpha_factor_, 00213 score_t beta_factor_, 00214 score_t gamma_factor_, 00215 score_t probability_scale_) 00216 : basematch(basematch_), 00217 basemismatch(basemismatch_), 00218 indel(indel_), 00219 indel_loop(indel_loop_), 00220 indel_opening(indel_opening_), 00221 indel_opening_loop(indel_opening_loop_), 00222 ribosum(ribosum_), 00223 ribofit(ribofit_), 00224 unpaired_penalty(unpaired_penalty_), 00225 struct_weight(struct_weight_), 00226 tau_factor(tau_factor_), 00227 exclusion(exclusion_), 00228 exp_probA(exp_probA_), 00229 exp_probB(exp_probB_), 00230 temperature_alipf(temp_), 00231 stacking(stacking_), 00232 new_stacking(new_stacking_), 00233 mea_scoring(mea_scoring_), 00234 alpha_factor(alpha_factor_), 00235 beta_factor(beta_factor_), 00236 gamma_factor(gamma_factor_), 00237 probability_scale(probability_scale_) {} 00238 }; 00239 00278 class Scoring { 00279 public: 00280 typedef BasePairs__Arc Arc; 00281 00282 private: 00283 const ScoringParams *params; 00284 00285 const ArcMatches *arc_matches; 00286 00287 const MatchProbs *match_probs; 00288 00289 const RnaData &rna_dataA; 00290 const RnaData &rna_dataB; 00291 const Sequence &seqA; 00292 const Sequence &seqB; 00293 00298 score_t lambda_; 00299 00300 public: 00317 Scoring(const Sequence &seqA, 00318 const Sequence &seqB, 00319 const RnaData &rna_dataA, 00320 const RnaData &rna_dataB, 00321 const ArcMatches &arc_matches, 00322 const MatchProbs *match_probs, 00323 const ScoringParams ¶ms, 00324 bool exp_scores = false); 00325 00336 void 00337 modify_by_parameter(score_t lambda); 00338 00346 void 00347 apply_unpaired_penalty(); 00348 00354 score_t 00355 lambda() const { 00356 return lambda_; 00357 } 00358 00359 private: 00360 // ------------------------------ 00361 // tables for precomputed score contributions 00362 // 00363 Matrix<score_t> 00364 sigma_tab; 00365 00366 std::vector<score_t> gapcost_tabA; 00367 std::vector<score_t> gapcost_tabB; 00368 00369 std::vector<score_t> weightsA; //<! weights of base pairs in A 00370 std::vector<score_t> weightsB; //<! weights of base pairs in B 00371 00372 std::vector<score_t> stack_weightsA; //<! weights of stacked base 00373 //<! pairs in A 00374 std::vector<score_t> stack_weightsB; //<! weights of stacked base 00375 //<! pairs in B 00376 00377 // ------------------------------ 00378 // tables for precomputed exp score contributions for partition function 00379 // 00380 Matrix<pf_score_t> 00381 exp_sigma_tab; 00382 pf_score_t exp_indel_opening_score; 00383 00384 pf_score_t exp_indel_opening_loop_score; 00385 00386 00387 std::vector<pf_score_t> 00388 exp_gapcost_tabA; 00389 std::vector<pf_score_t> 00390 exp_gapcost_tabB; 00391 00392 Matrix<size_t> identity; 00393 00394 void 00395 precompute_sequence_identities(); 00396 00405 score_t 00406 round2score(double d) const { 00407 return (score_t)((d < 0) ? (d - 0.5) : (d + 0.5)); 00408 } 00409 00419 score_t 00420 sigma_(int i, int j) const; 00421 00427 void 00428 precompute_sigma(); 00429 00435 void 00436 precompute_exp_sigma(); 00437 00439 void 00440 precompute_gapcost(); 00441 00443 void 00444 precompute_exp_gapcost(); 00445 00447 void 00448 precompute_weights(); 00449 00459 void 00460 precompute_weights(const RnaData &rna_data, 00461 const BasePairs &bps, 00462 double exp_prob, 00463 std::vector<score_t> &weights, 00464 std::vector<score_t> &stack_weights); 00465 00475 score_t 00476 probToWeight(double p, double prob_exp) const; 00477 00482 double 00483 ribosum_arcmatch_prob(const Arc &arcA, const Arc &arcB) const; 00484 00498 score_t 00499 riboX_arcmatch_score(const Arc &arcA, const Arc &arcB) const; 00500 00501 pf_score_t 00502 boltzmann_weight(score_t s) const { 00503 return exp(s / (pf_score_t)params->temperature_alipf); 00504 } 00505 00507 void 00508 subtract(std::vector<score_t> &v, score_t x) const; 00509 00511 void 00512 subtract(Matrix<score_t> &m, score_t x) const; 00513 00514 public: 00515 // ------------------------------------------------------------ 00516 // SCORE CONTRIBUTIONS 00517 00526 score_t 00527 basematch(size_type i, size_type j) const { 00528 return sigma_tab(i, j); 00529 } 00530 00539 pf_score_t 00540 exp_basematch(size_type i, size_type j) const { 00541 return exp_sigma_tab(i, j); 00542 } 00543 00561 score_t 00562 arcmatch(const ArcMatch &am, bool stacked = false) const; 00563 00581 score_t 00582 arcmatch(const BasePairs__Arc &arcA, 00583 const BasePairs__Arc &arcB, 00584 bool stacked = false) const; 00585 00595 template <bool gapAorB> 00596 score_t 00597 arcDel(const BasePairs__Arc &arc, bool stacked = false) const; 00598 00606 pf_score_t 00607 exp_arcmatch(const ArcMatch &am) const { 00608 return boltzmann_weight(arcmatch(am)); 00609 } 00610 00618 score_t 00619 arcmatch_stacked(const ArcMatch &am) const { 00620 return arcmatch(am, true); 00621 } 00622 00632 template <bool gapInA> 00633 inline 00634 score_t 00635 gapX(size_type alignedToGap) const; 00636 00644 score_t 00645 gapA(size_type posA) const { 00646 assert(1 <= posA && posA <= seqA.length()); 00647 00648 return gapcost_tabA[posA]; 00649 } 00650 00658 pf_score_t 00659 exp_gapA(size_type posA) const { 00660 assert(1 <= posA && posA <= seqA.length()); 00661 return exp_gapcost_tabA[posA]; 00662 } 00663 00671 score_t 00672 gapB(size_type posB) const { 00673 assert(1 <= posB && posB <= seqB.length()); 00674 00675 return gapcost_tabB[posB]; 00676 } 00677 00685 pf_score_t 00686 exp_gapB(size_type posB) const { 00687 assert(1 <= posB && posB <= seqB.length()); 00688 00689 return exp_gapcost_tabB[posB]; 00690 } 00691 00693 score_t 00694 exclusion() const { 00695 return params->exclusion; 00696 } 00697 00699 score_t 00700 indel_opening() const { 00701 return params->indel_opening; 00702 } 00703 00705 score_t 00706 loop_indel_score(const score_t score) const { 00707 return round2score(score * params->indel_loop / params->indel); 00708 } 00710 score_t 00711 indel_opening_loop() const { 00712 return params->indel_opening_loop; 00713 } 00714 00716 pf_score_t 00717 exp_indel_opening() const { 00718 return exp_indel_opening_score; 00719 } 00720 00722 pf_score_t 00723 exp_indel_opening_loop() const { 00724 return exp_indel_opening_loop_score; 00725 } 00726 00727 // 00728 // ------------------------------------------------------------ 00729 00739 double 00740 prob_exp(size_type len) const; 00741 00747 bool 00748 stacking() const { 00749 return params->stacking || params->new_stacking; 00750 } 00751 00759 bool 00760 is_stackable_arcA(const Arc &a) const; 00761 00769 bool 00770 is_stackable_arcB(const Arc &a) const; 00771 00779 bool 00780 is_stackable_am(const ArcMatch &am) const; 00781 00782 }; // end class Scoring 00783 00784 template <bool isA> 00785 score_t 00786 Scoring::arcDel(const Arc &arcX, 00787 bool stacked) const { // TODO Important Scoring scheme for 00788 // aligning an arc to a gap is not 00789 // defined and implemented! 00790 00791 if (arc_matches 00792 ->explicit_scores()) { // will not take stacking into account!!! 00793 std::cerr 00794 << "ERROR sparse explicit scores is not supported!" 00795 << std::endl; // TODO: Supporting explicit scores for arcgap 00796 assert(!arc_matches->explicit_scores()); 00797 } 00798 00799 if (!params->mea_scoring) { 00800 return 00801 // base pair weights 00802 loop_indel_score( 00803 gapX<isA>(arcX.left()) + 00804 gapX<isA>(arcX.right())) + // score of aligining base-pair 00805 // ends wo gap, such that it is 00806 // multiply by gamma_loop ratio 00807 (stacked ? (isA ? stack_weightsA[arcX.idx()] 00808 : stack_weightsB[arcX.idx()]) 00809 : (isA ? weightsA[arcX.idx()] : weightsB[arcX.idx()])); 00810 } else { 00811 std::cerr << "ERROR sparse mea_scoring is not supported!" 00812 << std::endl; // TODO: Supporting mea_scoring for arcgap 00813 assert(!params->mea_scoring); 00814 return 0; 00815 } 00816 } 00817 00818 template <> 00819 inline 00820 score_t 00821 Scoring::gapX<true>(size_type alignedToGap) const { 00822 return gapA(alignedToGap); 00823 } 00824 00825 template <> 00826 inline 00827 score_t 00828 Scoring::gapX<false>(size_type alignedToGap) const { 00829 return gapB(alignedToGap); 00830 } 00831 00832 } // end namespace LocARNA 00833 00834 #endif // LOCARNA_SCORING_HH