LocARNA-1.9.2: src/LocARNA/scoring.hh Source File

00001 #ifndef LOCARNA_SCORING_HH
00002 #define LOCARNA_SCORING_HH
00003 
00004 #ifdef HAVE_CONFIG_H
00005 #include <config.h>
00006 #endif
00007 
00008 #include <math.h>
00009 #include <vector>
00010 
00011 #include "aux.hh"
00012 
00013 #include "scoring_fwd.hh"
00014 #include "matrix.hh"
00015 
00016 #ifndef NDEBUG
00017 #include "sequence.hh"
00018 #endif
00019 
00020 #include "arc_matches.hh"
00021 
00022 namespace LocARNA {
00023 
00024     //#define MEA_SCORING_OLD
00025 
00026     class RibosumFreq;
00027     class Ribofit;
00028     class Scoring;
00029     class Sequence;
00030     class BasePairs;
00031     class BasePairs__Arc;
00032     class ArcMatch;
00033     class MatchProbs;
00034     class RnaData;
00035 
00037     typedef std::vector<infty_score_t> ScoreVector;
00038 
00040     typedef std::vector<pf_score_t> PFScoreVector;
00041 
00043     typedef Matrix<infty_score_t> ScoreMatrix;
00044 
00046     typedef Matrix<pf_score_t> PFScoreMatrix;
00047 
00049     typedef Matrix<double> ProbMatrix;
00050 
00060     class ScoringParams {
00061     public:
00068         const score_t basematch;
00069 
00071         const score_t basemismatch;
00072 
00074         const score_t indel;
00075 
00077         const score_t indel_loop;
00078 
00080         const score_t indel_opening;
00081 
00084         const score_t indel_opening_loop;
00085 
00091         const RibosumFreq *ribosum;
00092 
00099         const Ribofit *ribofit;
00100 
00102         const score_t unpaired_penalty;
00103 
00108         const score_t struct_weight;
00109 
00114         const score_t tau_factor;
00115 
00117         const score_t exclusion;
00118 
00119         const double exp_probA;
00120 
00121         const double exp_probB;
00122 
00123         const double temperature_alipf;
00124 
00126         const bool stacking;
00127 
00129         const bool new_stacking;
00130 
00132         const bool mea_scoring;
00133 
00135         const score_t alpha_factor;
00136 
00138         const score_t beta_factor;
00139 
00141         const score_t gamma_factor;
00142 
00150         const score_t probability_scale;
00151 
00152         /*
00153           The mea score is composed in the following way:
00154 
00155           params->probability_scale *
00156           (
00157           sum_basematchs (i,j)
00158           P(i~j)
00159           + alpha_factor/100 * (P(i unstructured) + P(j unstructured))
00160           +
00161           sum_arcmatchs (a,b)
00162           beta_factor/100 * (P(a)+P(b)) * P(al~bl) * P(ar~br) *
00163           ribosum_arcmatch(a,b)
00164           )
00165         */
00166 
00167     public:
00194         ScoringParams(score_t basematch_,
00195                       score_t basemismatch_,
00196                       score_t indel_,
00197                       score_t indel_loop_,
00198                       score_t indel_opening_,
00199                       score_t indel_opening_loop_,
00200                       RibosumFreq *ribosum_,
00201                       Ribofit *ribofit_,
00202                       score_t unpaired_penalty_,
00203                       score_t struct_weight_,
00204                       score_t tau_factor_,
00205                       score_t exclusion_,
00206                       double exp_probA_,
00207                       double exp_probB_,
00208                       double temp_,
00209                       bool stacking_,
00210                       bool new_stacking_,
00211                       bool mea_scoring_,
00212                       score_t alpha_factor_,
00213                       score_t beta_factor_,
00214                       score_t gamma_factor_,
00215                       score_t probability_scale_)
00216             : basematch(basematch_),
00217               basemismatch(basemismatch_),
00218               indel(indel_),
00219               indel_loop(indel_loop_),
00220               indel_opening(indel_opening_),
00221               indel_opening_loop(indel_opening_loop_),
00222               ribosum(ribosum_),
00223               ribofit(ribofit_),
00224               unpaired_penalty(unpaired_penalty_),
00225               struct_weight(struct_weight_),
00226               tau_factor(tau_factor_),
00227               exclusion(exclusion_),
00228               exp_probA(exp_probA_),
00229               exp_probB(exp_probB_),
00230               temperature_alipf(temp_),
00231               stacking(stacking_),
00232               new_stacking(new_stacking_),
00233               mea_scoring(mea_scoring_),
00234               alpha_factor(alpha_factor_),
00235               beta_factor(beta_factor_),
00236               gamma_factor(gamma_factor_),
00237               probability_scale(probability_scale_) {}
00238     };
00239 
00278     class Scoring {
00279     public:
00280         typedef BasePairs__Arc Arc; 
00281 
00282     private:
00283         const ScoringParams *params; 
00284 
00285         const ArcMatches *arc_matches; 
00286 
00287         const MatchProbs *match_probs; 
00288 
00289         const RnaData &rna_dataA; 
00290         const RnaData &rna_dataB; 
00291         const Sequence &seqA;     
00292         const Sequence &seqB;     
00293 
00298         score_t lambda_;
00299 
00300     public:
00317         Scoring(const Sequence &seqA,
00318                 const Sequence &seqB,
00319                 const RnaData &rna_dataA,
00320                 const RnaData &rna_dataB,
00321                 const ArcMatches &arc_matches,
00322                 const MatchProbs *match_probs,
00323                 const ScoringParams &params,
00324                 bool exp_scores = false);
00325 
00336         void
00337         modify_by_parameter(score_t lambda);
00338 
00346         void
00347         apply_unpaired_penalty();
00348 
00354         score_t
00355         lambda() const {
00356             return lambda_;
00357         }
00358 
00359     private:
00360         // ------------------------------
00361         // tables for precomputed score contributions
00362         //
00363         Matrix<score_t>
00364             sigma_tab; 
00365 
00366         std::vector<score_t> gapcost_tabA; 
00367         std::vector<score_t> gapcost_tabB; 
00368 
00369         std::vector<score_t> weightsA; //<! weights of base pairs in A
00370         std::vector<score_t> weightsB; //<! weights of base pairs in B
00371 
00372         std::vector<score_t> stack_weightsA; //<! weights of stacked base
00373         //<! pairs in A
00374         std::vector<score_t> stack_weightsB; //<! weights of stacked base
00375         //<! pairs in B
00376 
00377         // ------------------------------
00378         // tables for precomputed exp score contributions for partition function
00379         //
00380         Matrix<pf_score_t>
00381             exp_sigma_tab; 
00382         pf_score_t exp_indel_opening_score; 
00383 
00384         pf_score_t exp_indel_opening_loop_score; 
00385 
00386 
00387         std::vector<pf_score_t>
00388             exp_gapcost_tabA; 
00389         std::vector<pf_score_t>
00390             exp_gapcost_tabB; 
00391 
00392         Matrix<size_t> identity; 
00393 
00394         void
00395         precompute_sequence_identities();
00396 
00405         score_t
00406         round2score(double d) const {
00407             return (score_t)((d < 0) ? (d - 0.5) : (d + 0.5));
00408         }
00409 
00419         score_t
00420         sigma_(int i, int j) const;
00421 
00427         void
00428         precompute_sigma();
00429 
00435         void
00436         precompute_exp_sigma();
00437 
00439         void
00440         precompute_gapcost();
00441 
00443         void
00444         precompute_exp_gapcost();
00445 
00447         void
00448         precompute_weights();
00449 
00459         void
00460         precompute_weights(const RnaData &rna_data,
00461                            const BasePairs &bps,
00462                            double exp_prob,
00463                            std::vector<score_t> &weights,
00464                            std::vector<score_t> &stack_weights);
00465 
00475         score_t
00476         probToWeight(double p, double prob_exp) const;
00477 
00482         double
00483         ribosum_arcmatch_prob(const Arc &arcA, const Arc &arcB) const;
00484 
00498         score_t
00499         riboX_arcmatch_score(const Arc &arcA, const Arc &arcB) const;
00500 
00501         pf_score_t
00502         boltzmann_weight(score_t s) const {
00503             return exp(s / (pf_score_t)params->temperature_alipf);
00504         }
00505 
00507         void
00508         subtract(std::vector<score_t> &v, score_t x) const;
00509 
00511         void
00512         subtract(Matrix<score_t> &m, score_t x) const;
00513 
00514     public:
00515         // ------------------------------------------------------------
00516         // SCORE CONTRIBUTIONS
00517 
00526         score_t
00527         basematch(size_type i, size_type j) const {
00528             return sigma_tab(i, j);
00529         }
00530 
00539         pf_score_t
00540         exp_basematch(size_type i, size_type j) const {
00541             return exp_sigma_tab(i, j);
00542         }
00543 
00561         score_t
00562         arcmatch(const ArcMatch &am, bool stacked = false) const;
00563 
00581         score_t
00582         arcmatch(const BasePairs__Arc &arcA,
00583                  const BasePairs__Arc &arcB,
00584                  bool stacked = false) const;
00585 
00595         template <bool gapAorB>
00596         score_t
00597         arcDel(const BasePairs__Arc &arc, bool stacked = false) const;
00598 
00606         pf_score_t
00607         exp_arcmatch(const ArcMatch &am) const {
00608             return boltzmann_weight(arcmatch(am));
00609         }
00610 
00618         score_t
00619         arcmatch_stacked(const ArcMatch &am) const {
00620             return arcmatch(am, true);
00621         }
00622 
00632         template <bool gapInA>
00633         inline
00634         score_t
00635         gapX(size_type alignedToGap) const;
00636 
00644         score_t
00645         gapA(size_type posA) const {
00646             assert(1 <= posA && posA <= seqA.length());
00647 
00648             return gapcost_tabA[posA];
00649         }
00650 
00658         pf_score_t
00659         exp_gapA(size_type posA) const {
00660             assert(1 <= posA && posA <= seqA.length());
00661             return exp_gapcost_tabA[posA];
00662         }
00663 
00671         score_t
00672         gapB(size_type posB) const {
00673             assert(1 <= posB && posB <= seqB.length());
00674 
00675             return gapcost_tabB[posB];
00676         }
00677 
00685         pf_score_t
00686         exp_gapB(size_type posB) const {
00687             assert(1 <= posB && posB <= seqB.length());
00688 
00689             return exp_gapcost_tabB[posB];
00690         }
00691 
00693         score_t
00694         exclusion() const {
00695             return params->exclusion;
00696         }
00697 
00699         score_t
00700         indel_opening() const {
00701             return params->indel_opening;
00702         }
00703 
00705         score_t
00706         loop_indel_score(const score_t score) const {
00707             return round2score(score * params->indel_loop / params->indel);
00708         }
00710         score_t
00711         indel_opening_loop() const {
00712             return params->indel_opening_loop;
00713         }
00714 
00716         pf_score_t
00717         exp_indel_opening() const {
00718             return exp_indel_opening_score;
00719         }
00720 
00722         pf_score_t
00723         exp_indel_opening_loop() const {
00724             return exp_indel_opening_loop_score;
00725         }
00726 
00727         //
00728         // ------------------------------------------------------------
00729 
00739         double
00740         prob_exp(size_type len) const;
00741 
00747         bool
00748         stacking() const {
00749             return params->stacking || params->new_stacking;
00750         }
00751 
00759         bool
00760         is_stackable_arcA(const Arc &a) const;
00761 
00769         bool
00770         is_stackable_arcB(const Arc &a) const;
00771 
00779         bool
00780         is_stackable_am(const ArcMatch &am) const;
00781 
00782     }; // end class Scoring
00783 
00784     template <bool isA>
00785     score_t
00786     Scoring::arcDel(const Arc &arcX,
00787                     bool stacked) const { // TODO Important Scoring scheme for
00788                                           // aligning an arc to a gap is not
00789                                           // defined and implemented!
00790 
00791         if (arc_matches
00792                 ->explicit_scores()) { // will not take stacking into account!!!
00793             std::cerr
00794                 << "ERROR sparse explicit scores is not supported!"
00795                 << std::endl; // TODO: Supporting explicit scores for arcgap
00796             assert(!arc_matches->explicit_scores());
00797         }
00798 
00799         if (!params->mea_scoring) {
00800             return
00801                 // base pair weights
00802                 loop_indel_score(
00803                     gapX<isA>(arcX.left()) +
00804                     gapX<isA>(arcX.right())) + // score of aligining base-pair
00805                                                // ends wo gap, such that it is
00806                                                // multiply by gamma_loop ratio
00807                 (stacked ? (isA ? stack_weightsA[arcX.idx()]
00808                                 : stack_weightsB[arcX.idx()])
00809                          : (isA ? weightsA[arcX.idx()] : weightsB[arcX.idx()]));
00810         } else {
00811             std::cerr << "ERROR sparse mea_scoring is not supported!"
00812                       << std::endl; // TODO: Supporting mea_scoring for arcgap
00813             assert(!params->mea_scoring);
00814             return 0;
00815         }
00816     }
00817 
00818     template <>
00819     inline
00820     score_t
00821     Scoring::gapX<true>(size_type alignedToGap) const {
00822         return gapA(alignedToGap);
00823     }
00824 
00825     template <>
00826     inline
00827     score_t
00828     Scoring::gapX<false>(size_type alignedToGap) const {
00829         return gapB(alignedToGap);
00830     }
00831 
00832 } // end namespace LocARNA
00833 
00834 #endif // LOCARNA_SCORING_HH