@ARTICLE{Wallach-2023, AUTHOR={Wallach, Thomas and Raden, Martin and Hinkelmann, Lukas and Brehm, Mariam and Rabsch, Dominik and Weidling, Hannah and Krüger, Christina and Kettenmann, Helmut and Backofen, Rolf and Lehnardt, Seija }, TITLE={Distinct {SARS-CoV-2} {RNA} fragments activate {Toll}-like receptors 7 and 8 and induce cytokine release from human macrophages and microglia}, JOURNAL={Frontiers in Immunology}, VOLUME={13}, YEAR={2023}, DOI={10.3389/fimmu.2022.1066456}, ISSN={1664-3224}, ABSTRACT={The pandemic coronavirus disease 19 (COVID-19) is caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) and is marked by thromboembolic events and an inflammatory response throughout the body, including the brain. Employing the machine learning approach BrainDead we systematically screened for SARS-CoV-2 genome-derived single-stranded (ss) RNA fragments with high potential to activate the viral RNA-sensing innate immune receptors Toll-like receptor (TLR)7 and/or TLR8. Analyzing HEK TLR7/8 reporter cells we tested such RNA fragments with respect to their potential to induce activation of human TLR7 and TLR8 and to activate human macrophages, as well as iPSC-derived human microglia, the resident immune cells in the brain. We experimentally validated several sequence-specific RNA fragment candidates out of the SARS-CoV-2 RNA fragments predicted in silico as activators of human TLR7 and TLR8. Moreover, these SARS-CoV-2 ssRNAs induced cytokine release from human macrophages and iPSC-derived human microglia in a sequence- and species-specific fashion. Our findings determine TLR7 and TLR8 as key sensors of SARS-CoV-2-derived ssRNAs and may deepen our understanding of the mechanisms how this virus triggers, but also modulates an inflammatory response through innate immune signaling.}, user = {mmann} } @article{Rodrigues2022, user = {mmann}, title = {Temporal annotation of high-resolution intra-annual wood density information of Eucalyptus urophylla and its correlation with hydroclimatic conditions}, journal = {Dendrochronologia}, volume = {74}, pages = {125978}, year = {2022}, issn = {1125-7865}, doi = {https://doi.org/10.1016/j.dendro.2022.125978}, author = {Gleice Gomes Rodrigues and Martin Raden and Luciana Duque Silva and Hans-Peter Kahle}, abstract = {Three different Eucalyptus urophylla clones grown under two different spacing regimes in an experimental site in the state of São Paulo, Brazil, were analyzed to test effects of clone identity, spacing, cambial age and hydroclimatic conditions on high-resolution intra-annual wood density profiles. Since distinct periodic tree-ring boundaries were not visible on the stem cross-sectional surfaces, finding an alternative method for synchronization of density profiles was crucial for the analysis. The challenge was to generate intra- and inter-tree synchronized density profiles that possess high amplitude variation and low phase variation. Thus, we developed a protocol and workflow of how such high-resolution density profiles can be spatially aligned and temporally annotated to enable correlation analyses between trees and with time series of environmental stimuli. Mean wood density was significantly different between clones, but not between the spacings. Wood density increased significantly with increasing cambial age and decreasing growth rate. Principal component analysis showed that the overall variability in the temporally annotated density profiles is dominated by a highly significant common signal. We found significant negative correlation values for precipitation, indicating that water supply is the main driver of stem growth at the site, and providing evidence for the correctness of the method. The developed workflow can easily be adjusted to the analysis of other intra-annual tree-ring features like anatomical xylem cell traits or isotopic signals in the wood. It has a large potential to be used as a general guideline for the synchronization of intra-annual tree-ring traits, especially when distinct tree-ring boundaries are missing, as it is often the case under tropical climatic conditions. The workflow supports the development of spatially aligned and temporally annotated chronologies under non-annual growth rhythms.} } @Article{Alkhnbashi-CRISPRloci, author = {Alkhnbashi, Omer S. and Alexander Mitrofanov and Robson Bonidia and Martin Raden and Tran, Van Dinh and Florian Eggenhofer and Shah, Shiraz A. and Ekrem \"{O}zt\"{u}rk and Padilha, Victor A. and Sanches, Danilo S. and de Carvalho, Andre C.P.L.F. and Rolf Backofen}, title = {CRISPRloci: comprehensive and accurate annotation of CRISPR-Cas system}, journal = NAR, year = {2021}, volume = {}, number = {}, pages = {}, user = {mmann}, doi = {10.1093/nar/gkab456}, issn = {0305-1048}, issn = {1362-4962}, abstract = {CRISPR-Cas systems are adaptive immune systemsin prokaryotes, providing resistance against invading viruses and plasmids. The identification of CRISPR loci is currently a non-standardized, ambiguous process, requiring the manual combination of multiple tools, where existing tools detect only parts of the CRISPR-systems, and lack quality control, annotation and assessment capabilities of the detected CRISPR loci. OurCRISPRloci server provides the first resource for the prediction and assessment of all possible CRISPR loci. The server integrates a series of advanced Machine Learning tools within a seamless web interface featuring: (i) prediction of all CRISPR arrays in the correct orientation; (ii) definition of CRISPR leaders for each locus; and (iii) annotation of cas genes and their unambiguous classification. As a result, CRISPRloci is able to accurately determine the CRISPR array and associated information, such as: the Cas subtypes; cassette boundaries; accuracy of the repeat structure, orientation and leader sequence; virus-host interactions; self-targeting; as well as the annotation of cas genes, all of which have been missing from existing tools. This annotation is presented in an interactive interface, making it easy for scientists to gain an overview of the CRISPR system in their organism of interest. Predictions are also rendered in GFF format, enabling in-depth genome browser inspection. In summary, CRISPRloci constitutes a full suite for CRISPR-Cas system characterization that offers annotation quality previously available only after manual inspection. } } @article{Parise-2021, year = {2021}, publisher = {Frontiers}, volume = {}, number = {}, doi = {10.3389/fmicb.2021.656435}, issn = {1664-302X}, abstract = "Small RNAs (sRNAs) are one of the key players in the post-transcriptional regulation of bacterial gene expression. These molecules, together with transcription factors, form regulatory networks and greatly influence the bacterial regulatory landscape. Little is known concerning sRNAs and their influence on the regulatory machinery in the genus Corynebacterium, despite its medical, veterinary and biotechnological importance. We integrate sRNAs and their regulatory interactions into the transcriptional regulatory networks of six corynebacterial species, covering four human and animal pathogens, and integrate this data into the CoryneRegNet database. To this end, we predicted sRNAs to regulate 754 genes, including 206 transcription factors, in corynebacterial gene regulatory networks. Amongst them, the sRNA Cd-NCTC13129-sRNA-2 is predicted to directly regulate ydfH , which indirectly regulates 66 genes, including the global regulator glxR in C. diphtheriae . All of the sRNA-enriched regulatory networks of the genus Corynebacterium are made publicly available in the newest release of CoryneRegNet ( www.exbio.wzw.tum.de/coryneregnet/ ) to aid in providing valuable insights and to guide future experiments.", author = {Mariana T.D. Parise and Doglas Parise and Flavia F. Aburjaile and Anne C.P. Gomide and Rodrigo B. Kato and Martin Raden and Rolf Backofen and Vasco A. {de Carvalho Azevedo} and Jan Baumbach}, title = {An integrated database of small {RNA}s and their interplay with transcriptional gene regulatory networks in corynebacteria}, journal = {Frontiers in Microbiology}, user = {mmann}, note = {(epub ahead of print)} } @article{Wallach-2021, year = {2021}, publisher = {Sprinter Nature}, volume = {16}, number = {80}, doi = {10.1186/s13024-021-00498-5}, issn = {1664-302X}, abstract = "Results: We identified a specific pattern of miRNAs released from apoptotic cortical neurons that activate TLR7 and/or TLR8, depending on sequence and species. Exposure of microglia and macrophages to certain miRNA classes released from apoptotic neurons resulted in the sequence-specific production of distinct cytokines/ chemokines and increased phagocytic activity. Out of those miRNAs miR-100-5p and miR-298-5p, which have consistently been linked to neurodegenerative diseases, entered microglia, located to their endosomes, and directly bound to human TLR8. The miRNA-TLR interaction required novel sequence features, but no specific structure formation of mature miRNA. As a consequence of miR-100-5p- and miR-298-5p-induced TLR activation, cortical neurons underwent cell-autonomous apoptosis. Presence of miR-100-5p and miR-298-5p in cerebrospinal fluid led to neurodegeneration and microglial accumulation in the murine cerebral cortex through TLR7 signaling. Conclusion: Our data demonstrate that specific miRNAs are released from apoptotic cortical neurons, serve as endogenous TLR7/8 ligands, and thereby trigger further neuronal apoptosis in the CNS. Our findings underline the recently discovered role of miRNAs as extracellular signaling molecules, particularly in the context of neurodegeneration. Keywords: Extracellular microRNAs, Endogenous Toll-like receptor ligands, Cortical neurons, Neuronal apoptosis, Microglia, Neurodegeneration, miRNA microarray", author = {Thomas Wallach and Zoe Mossmann and Michal Szczepek and Max Wetzel and Rui Machado and Martin Raden and Milad Miladi and Gunnar Kleinau and Christina Kr\"{u}ger and Paul Dembny and Drew Adler and Yuanyuan Zhai and Omar Dzaye and Matthias Futschik and Rolf Backofen and Patrick Scheerer and Seija Lehnardt}, title = {MicroRNA-100-5p and microRNA-298-5p released from apoptotic cortical neurons are endogenous Toll-like receptor 7/8 ligands that contribute to neurodegeneration}, journal = {Molecular Neurodegeneration}, user = {mmann} } @article{Stangler-2021, year = {2021}, publisher = {MDPI}, volume = {12}, number = {3}, pages = {274}, doi = {10.3390/f12030274}, issn = {1999-4907}, abstract = "{Research Highlights: Our results provide novel perspectives on the effectiveness and collapse of compensatory mechanisms of tracheid development of Norway spruce during intra-seasonal drought and the environmental control of intra-annual density fluctuations. Background and Objectives: This study aimed to compare and integrate complementary methods of investigating intra-annual wood formation dynamics to gain a better understanding of the endogenous and environmental control of tree-ring development and the impact of anticipated climatic changes on forest growth and productivity. Materials and Methods: We performed an integrated analysis of xylogenesis observations, quantitative wood anatomy and point-dendrometer measurements of Norway spruce (Picea abies (L.) Karst.) trees growing along an elevational gradient in south-western Germany during a growing season with an anomalous dry June followed by an extraordinary humid July. Results: Strong endogenous control of tree-ring formation was suggested at the highest elevation where the decreasing rates of tracheid enlargement and wall thickening during drought were effectively compensated by increased cell differentiation duration. A shift to environmental control of tree-ring formation during drought was indicated at the lowest elevation, where we detected absence of compensatory mechanisms, eventually stimulating the formation of an intra-annual density fluctuation. Transient drought stress in June also led to bimodal patterns and decreasing daily rates of stem radial displacement, radial xylem growth and woody biomass production. Comparing xylogenesis data with dendrometer measurements showed ambivalent results and it appears that with decreasing daily rates of radial xylem growth, the signal-to-noise ratio in dendrometer time series between growth and fluctuations of tree water status becomes increasingly detrimental. Conclusions: Our study provides new perspectives into the complex interplay between rates and durations of tracheid development during dry-wet cycles, and thereby contributes to an improved and mechanistic understanding of the environmental control of wood formation processes leading to the formation of intra-annual density fluctuations in tree-rings of Norway spruce.}", author = {Dominik F. Stangler and Hans-Peter Kahle and Martin Raden and Elena Larysch and Thomas Seifert and Heinrich Spiecker}, title = {Effects of intra-seasonal drought on kinetics of tracheid differentiation and seasonal growth dynamics of Norway spruce along an elevational gradient}, journal = {Forests}, user = {mmann} } @article{Raden-2021-BrainDead, year = {2021}, volume = {18}, number = {sup1}, pages = {268-277}, doi = {10.1080/15476286.2021.1940697}, issn = {1547-6286}, issn = {1555-8584}, abstract = "{MicroRNAs (miRNAs) are about 22 nucleotides long and have been linked to various human diseases. They can serve as activation signals for membrane receptors, a recently discovered function that is independent of the miRNAs' conventional role in post-transcriptional gene regulation. Here, we introduce a machine learning approach, BrainDead, to identify oligonucleotides that act as ligands for single-stranded RNA-detecting Toll-like receptors (TLR)7/8, thereby triggering an immune response. BrainDead was trained on activation data obtained from in vitro experiments on murine microglia, the resident immune cells in the brain, incorporating sequence, intra-molecular structure, as well as inter-molecular homo-dimerization potential of candidate RNAs. It was applied to analyze all known human miRNAs regarding their potential to induce TLR7/8 signaling and microglia activation. We validated the predicted functional activity of subsets of high- and low-scoring miRNAs experimentally, of which a selection has been linked to Alzheimer's disease, the most common cause of dementia in humans. High agreement of predictions and experiments confirms the robustness and power of BrainDead. The results provide new insight into the mechanisms how miRNAs act as TLR ligands. Eventually, BrainDead implements a generic machine learning methodology for learning and predicting functions of short RNAs in any context.}", author = {Martin Raden and Thomas Wallach and Milad Miladi and Yuanyuan Zhai and Christina Kr\"{u}ger and Zoe J. Mossmann and Paul Dembny and Rolf Backofen and Seija Lehnardt}, title = {Structure-aware machine learning classification of oligonucleotide-induced immune response identifies micro{RNA}s operating as {Toll}-like receptor 7/8 ligands}, journal = {RNA Biology}, user = {mmann}, note = {(MR, TW, MM contributed equally)} } @article{Raden-2020-CopomuS, author = {Raden, Martin and Gutmann, Fabio and Uhl, Michael and Backofen, Rolf}, title = {{CopomuS} - ranking compensatory mutations to guide {RNA-RNA} interaction verification experiments}, journal = {International Journal of Molecular Sciences }, volume = {21}, number = {11}, pages = {3852}, doi = {10.3390/ijms21113852}, year = {2020}, note = {(This article belongs to the Special Issue RNA Structure Prediction)}, issn = {1661-6596}, issn = {1422-0067}, user = {mmann}, abstract = { In silico RNA-RNA interaction prediction is widely applied to identify putative interaction partners and to assess interaction details in base pair resolution. To verify specific interactions, in vitro evidence can be obtained via compensatory mutation experiments. Unfortunately, the selection of compensatory mutations is non-trivial and typically based on subjective ad hoc decisions. To support the decision process, we introduce our COmPensatOry MUtation Selector CopomuS. CopomuS evaluates the effects of mutations on RNA-RNA interaction formation using a set of objective criteria, and outputs a reliable ranking of compensatory mutation candidates. For RNA-RNA interaction assessment, the state-of-the-art IntaRNA prediction tool is applied. We investigate characteristics of successfully verified RNA-RNA interactions from the literature, which guided the design of CopomuS. Finally, we evaluate its performance based on experimentally validated compensatory mutations of prokaryotic sRNAs and their target mRNAs. CopomuS predictions highly agree with known results, making it a valuable tool to support the design of verification experiments for RNA-RNA interactions. It is part of the IntaRNA package and available as stand-alone webserver for ad hoc application. } } @article{Miladi-2020-MutaRNA, author = {Miladi, Milad and Raden, Martin and Diederichs, Sven and Backofen, Rolf}, title = {{MutaRNA}: analysis and visualization of mutation-induced changes in {RNA} structure}, journal = {Nucleic Acids Research}, volume = {}, number = {}, pages = {}, note = {(MM and MR contributed equally)}, year = {2020}, month = {05}, issn = {0305-1048}, doi = {10.1093/nar/gkaa331}, url = {https://doi.org/10.1093/nar/gkaa331}, eprint = {https://academic.oup.com/nar/advance-article-pdf/doi/10.1093/nar/gkaa331/33202973/gkaa331.pdf}, user = {mmann}, abstract = {RNA molecules fold into complex structures as a result of intramolecular interactions between their nucleotides. The function of many non-coding RNAs and some cis-regulatory elements of messenger RNAs highly depends on their fold. Single-nucleotide variants (SNVs) and other types of mutations can disrupt the native function of an RNA element by altering its base pairing pattern. Identifying the effect of a mutation on an RNA’s structure is, therefore, a crucial step in evaluating the impact of mutations on the post-transcriptional regulation and function of RNAs within the cell. Even though a single nucleotide variation can have striking impacts on the structure formation, interpreting and comparing the impact usually needs expertise and meticulous efforts. Here, we present MutaRNA, a web server for visualization and interpretation of mutation-induced changes on the RNA structure in an intuitive and integrative fashion. To this end, probabilities of base pairing and position-wise unpaired probabilities of wildtype and mutated RNA sequences are computed and compared. Differential heatmap-like dot plot representations in combination with circular plots and arc diagrams help to identify local structure abberations, which are otherwise hidden in standard outputs. Eventually, MutaRNA provides a comprehensive and comparative overview of the mutation-induced changes in base pairing potentials and accessibility. The MutaRNA web server is freely available at http://rna.informatik.uni-freiburg.de/MutaRNA.} } @Article{Raden-crossdating-2019, author = {Martin Raden and Alexander Mattheis and Hans-Peter Kahle and Heinrich Spiecker and Rolf Backofen}, title = {The potential of intra-annual density information for crossdating of short tree-ring series}, journal = {Dendrochronologia}, year = {2020}, volume = {60}, issn = {1125-7865}, number = {}, pages = {125679}, doi = {10.1016/j.dendro.2020.125679}, user = {mmann}, abstract = {The crossdating of tree-ring series is typically based on tree-ring width sequences, which is a crude abstraction of the growth signal stored in tree rings. In contrast, intra-annual wood density data allows a much more detailed comparison of wood growth processes and new measurement techniques scale well to measure large amounts of samples. Thus, chronologies of intra-annual densitometric curves can be built. Here, we investigate to what extent intra-annual wood density information can improve crossdating. We evaluate different approaches on a data set of Norway spruce trees (Picea abies) and compare the results to standard methods that are based on ring width or maximum density. Our results show that intra-annual densitometric data indeed increases crossdating success rate notably for short tree ring series that cover less than 25 years.} } @article{Raden-IntaRNA-benchmark-2019, author={Martin Raden and Teresa M{\"u}ller and Stefan Mautner and Rick Gelhausen and Rolf Backofen}, title={The impact of various seed, accessibility and interaction constraints on {sRNA} target prediction - a systematic assessment}, journal={BMC Bioinformatics}, year={2020}, volume={21}, pages={15}, doi={10.1186/s12859-019-3143-4}, issn={1471-2105}, user={mmann}, abstract={ Seed and accessibility constraints are core features to enable highly accurate sRNA target screens based on RNA-RNA interaction prediction. Currently, available tools provide different (sets of) constraints and default parameter sets. Thus, it is hard to impossible for users to estimate the influence of individual restrictions on the prediction results. Here, we present a systematic assessment of the impact of established and new constraints on sRNA target prediction both on a qualitative as well as computational level. This is done exemplarily based on the performance of IntaRNA, one of the most exact sRNA target prediction tools. IntaRNA provides various ways to constrain considered seed interactions, e.g. based on seed length, its accessibility, minimal unpaired probabilities, or energy thresholds, beside analogous constraints for the overall interaction. Thus, our results reveal the impact of individual constraints and their combinations. This provides both a guide for users what is important and recommendations for existing and upcoming sRNA target prediction approaches. We show on a large sRNA target screen benchmark data set that only by altering the parameter set, IntaRNA recovers 30 percent more verified interactions while becoming 5-times faster. This exemplifies the potential of seed, accessibility and interaction constraints for sRNA target prediction. } } @article{Gelhausen-IntaRNAhelix-2019, author = {Rick Gelhausen and Sebastian Will and Ivo L. Hofacker and Rolf Backofen and Martin Raden}, title = {{IntaRNAhelix} - Composing {RNA-RNA} interactions from stable inter-molecular helices boosts bacterial {sRNA} target prediction}, journal={Journal of Bioinformatics and Computational Biology}, year = {2019}, volume = {17}, number = {5}, pages = {1940009}, issn = {0219-7200}, issn = {1757-6334}, doi = {10.1142/S0219720019400092}, user = {mmann}, abstract= {Efficient computational tools for the identification of putative target RNAs regulated by prokaryotic sRNAs rely on thermodynamic models of RNA secondary structures. While they typically predict RNA-RNA interaction complexes accurately, they yield many highly-ranked false positives in target screens. One obvious source of this low specificity appears to be the disability of current secondary-structure-based models to reflect steric constraints, which nevertheless govern the kinetic formation of RNA-RNA interactions. For example, often- even thermodynamically favorable -extensions of short initial kissing hairpin interactions are kinetically prohibited, since this would require unwinding of intra-molecular helices as well as sterically impossible bending of the interaction helix. Another source is the consideration of instable and thus unlikely subinteractions that enable better scoring of longer interactions. In consequence, the efficient prediction methods that do not consider such effects show a high false positive rate. To increase the prediction accuracy we devise IntaRNAhelix, a dynamic programming algorithm that length-restricts the runs of consecutive inter-molecular base pairs (perfect canonical stackings), which we hypothesize to implicitely model the steric and kinetic effects. The novel method is implemented by extending the state-of-the-art tool IntaRNA. Our comprehensive bacterial sRNA target prediction benchmark demonstrates significant improvements of the prediction accuracy and enables more than 40-times faster computations. These results indicate- supporting our hypothesis -that stable helix composition increases the accuracy of interaction prediction models compared to the current state-of-the-art approach.} } @Article{Entzian-2019, author = {Gregor Entzian and Martin Raden}, title = {{pourRNA} - a time- and memory-efficient approach for the guided exploration of {RNA} energy landscapes}, journal = {Bioinformatics}, year = {2020}, volume = {36}, number = {2}, pages = {462-469}, user = {mmann}, doi = {10.1093/bioinformatics/btz583}, issn = {1367-4803}, issn = {1367-4811}, abstract = {Motivation: The folding dynamics of RNAs are typically studied via coarse-grained models of the underlying energy landscape to face the exponential growths of the RNA secondary structure space. Still, studies of exact folding kinetics based on gradient basin abstractions are currently limited to short sequence lengths due to vast memory requirements. In order to compute exact transition rates between gradient basins, state-of-the-art approaches apply global flooding schemes that require to memorize the whole structure space at once. pourRNA tackles this problem via local flooding techniques where memorization is limited to the structure ensembles of individual gradient basins. Results: Compared to the only available tool for exact gradient basin based macro state transition rates (namely barriers), pourRNA computes the same exact transition rates up to ten times faster and requires two orders of magnitude less memory for sequences that are still computationally accessible for exhaustive enumeration. Parallelized computation as well as additional heuristics further speed up computations while still producing high quality transition model approximations. The introduced heuristics enable a guided trade-off between model quality and required computational resources. We introduce and evaluate a macroscopic direct-path heuristics to efficiently compute refolding energy barrier estimations for the co-transcriptionally trapped RNA sv11 of length 115nt. Finally, we also show how pourRNA can be used to identify folding funnels and their respective energetically lowest minima. Availability: pourRNA is freely available at https://github.com/ViennaRNA/pourRNA} } @InProceedings{Miladi-Pankov-2019, author = {Milad Miladi and Martin Raden and Sebastian Will and Rolf Backofen}, title = {Fast and Accurate Structure Probability Estimation for Simultaneous Alignment and Folding of {RNA}s}, booktitle = {19th International Workshop on Algorithms in Bioinformatics (WABI 2019)}, pages = {14:1--14:13}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-123-8}, ISSN = {1868-8969}, year = {2019}, volume = {143}, editor = {Katharina T. Huber and Dan Gusfield}, publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, address = {Dagstuhl, Germany}, doi = {10.4230/LIPIcs.WABI.2019.14}, user = {mmann}, abstract= {Motivation: Simultaneous alignment and folding (SAF) of RNAs is the indispensable gold standard for inferring the structure of non-coding RNAs and their general analysis. The original algorithm, proposed by Sankoff, solves the theoretical problem exactly with a complexity of O(n6) in the full energy model. Over the last two decades, several variants and improvements of the Sankoff algorithm have been proposed to reduce its extreme complexity by proposing simplified energy models or imposing restrictions on the predicted alignments. Results: Here we introduce a novel variant of Sankoff's algorithm that reconciles the simplifications of PMcomp, namely moving from the full energy model to a simpler base pair-based model, with the accuracy of the loop-based full energy model. Instead of estimating pseudo-energies from unconditional base pair probabilities, our model calculates energies from conditional base pair probabilities that allow to accurately capture structure probabilities, which obey a conditional dependency. Supporting modifications with surgical precision, this model gives rise to the fast and highly accurate novel algorithm Pankov (Probabilistic Sankoff-like simultaneous alignment and folding of RNAs inspired by Markov chains). Pankov benefits from the speed-up of excluding unreliable base-pairing without compromising the loop-based free energy model of the Sankoff's algorithm. We show that Pankov outperforms its predecessors LocARNA and SPARSE in folding quality and is faster than LocARNA. Pankov is developed as a branch of the LocARNA package and available at https://github.com/mmiladi/Pankov.} } @Article{Mautner-SHAKER-2019, author = {Stefan Mautner and Soheila Montaseri and Milad Miladi and Martin Raden and Fabrizio Costa and Rolf Backofen}, title = {ShaKer: RNA SHAPE prediction using graph kernel}, journal = {Bioinformatics}, year = {2019}, volume = {35}, number = {14}, pages = {i354-i359}, month = {07}, user = {mmann}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btz395}, abstract = {SHAPE experiments are used to probe the structure of RNA molecules. We present ShaKer to predict SHAPE data for RNA using a graph-kernel-based machine learning approach that is trained on experimental SHAPE information. While other available methods require a manually curated reference structure, ShaKer predicts reactivity data based on sequence input only and by sampling the ensemble of possible structures. Thus, ShaKer is well placed to enable experiment-driven, transcriptome-wide SHAPE data prediction to enable the study of RNA structuredness and to improve RNA structure and RNA-RNA interaction prediction. For performance evaluation we use accuracy and accessibility comparing to experimental SHAPE data and competing methods. We can show that Shaker outperforms its competitors and is able to predict high quality SHAPE annotations even when no reference structure is provided. ShaKer is freely available at https://github.com/BackofenLab/ShaKer} } @conference{Gelhausen-helixLength-2019, author={Rick Gelhausen and Sebastian Will and Hofacker, Ivo L. and Rolf Backofen and Martin Raden}, title={Constraint Maximal Inter-molecular Helix Lengths within {RNA-RNA} Interaction Prediction Improves Bacterial {sRNA} Target Prediction}, booktitle={Proceedings of the 12th International Joint Conference on Biomedical Engineering Systems and Technologies - Volume 3: BIOINFORMATICS,}, year={2019}, pages={131-140}, publisher={SciTePress}, organization={INSTICC}, location = {Prague, Czech Republic}, doi={10.5220/0007689701310140}, isbn={978-989-758-353-7}, user = {mmann}, abstract= {Efficient computational tools for the identification of putative target RNAs regulated by prokaryotic sRNAs rely on thermodynamic models of RNA secondary structures. While they typically predict RNA-RNA in- teraction complexes accurately, they yield many highly-ranked false positives in target screens. One obvious source of this low specificity appears to be the disability of current secondary-structure-based models to reflect steric constraints, which nevertheless govern the kinetic formation of RNA-RNA interactions. For example, often?even thermodynamically favorable?extensions of short initial kissing hairpin interactions are kineti- cally prohibited, since this would require unwinding of intra-molecular helices as well as sterically impossible bending of the interaction helix. In consequence, the efficient prediction methods, which do not consider such effects, predict over-long helices. To increase the prediction accuracy, we devise a dynamic programming algorithm that length-restricts the runs of consecutive inter-molecular base pairs (perfect canonical stackings), which we hypothesize to implicitely model the steric and kinetic effects. The novel method is implemented by extending the state-of-the-art tool IntaRNA. Our comprehensive bacterial sRNA target prediction benchmark demonstrates significant improvements of the prediction accuracy and enables 3-4 times faster computations. These results indicate?supporting our hypothesis?that length-limitations on inter-molecular subhelices in- crease the accuracy of interaction prediction models compared to the current state-of-the-art approach.} } @article {Miladi-2018-shape, author = {Miladi, Milad and Montaseri, Soheila and Backofen, Rolf and Raden, Martin}, title = {Integration of accessibility data from structure probing into {RNA}-{RNA} interaction prediction}, journal = {Bioinformatics}, volume = {35}, number = {16}, pages = {2862-2864}, year = {2019}, issn = {1367-4803}, doi = {10.1093/bioinformatics/bty1029}, publisher = {Oxford University Press}, abstract = { Experimental structure probing data has been shown to improve thermodynamics-based RNA secondary structure prediction. To this end, chemical reactivity information (as provided e.g. by SHAPE) is incorporated, which encodes whether or not individual nucleotides are involved in intra-molecular structure. Since inter-molecular RNA-RNA interactions are often confined to unpaired RNA regions, SHAPE data is even more promising to improve interaction prediction. Here we show how such experimental data can be incorporated seamlessly into accessibility-based RNA-RNA interaction prediction approaches, as implemented in IntaRNA. This is possible via the computation and use of unpaired probabilities that incorporate the structure probing information. We show that experimental SHAPE data can significantly improve RNA-RNA interaction prediction. We evaluate our approach by investigating interactions of a spliceosomal U1 snRNA transcript with its target splice sites. When SHAPE data is incorporated, known target sites are predicted with increased precision and specificity. Keywords: RNA-RNA interaction prediction, accessibility, RNA structure probing, RNA secondary structure, chemical footprinting, SHAPE. Availability: https://github.com/BackofenLab/IntaRNA}, user = {miladim} } @article {Gruening-2018-bioconda, author = {Gr{\"u}ning, Bj{\"o}rn and Dale, Ryan Sj{\"o}din, Andreas and Chapman, Brad A. and Rowe, Jillian and Tomkins-Tinch, Christopher H. and Valieris, Renan and {Bioconda Team} and K{\"o}ster, Johannes}, title = {{Bioconda}: sustainable and comprehensive software distribution for the life sciences}, year = {2018}, doi = {10.1038/s41592-018-0046-7}, abstract = {We present Bioconda (https://bioconda.github.io), a distribution of bioinformatics software for the lightweight, multi-platform and language-agnostic package manager, Conda. Currently, Bioconda offers a collection of over 2900 software tools, which are continuously maintained, updated, and extended by a growing global community of more than 200 contributors. Bioconda improves analysis reproducibility by allowing users to define isolated environments with defined software versions, all of which are easily installed and managed without administrative privileges.}, journal = {Nature Methods}, issn = {1548-7105}, volume = {15}, number = {7}, pages = {475-476}, user = {mmann}, note = {preprint at bioRxiv https://doi.org/10.1101/207092} } @article{Raden-2018-teaching, author = {Raden, Martin and Mohamed, Mostafa M and Ali, Syed M and Backofen, Rolf}, title = {Interactive implementations of thermodynamics-based {RNA} structure and {RNA-RNA} interaction prediction approaches for example-driven teaching}, journal = {PLOS Comput. Biol}, volume = {14}, number = {8}, pages = {e1006341}, year = {2018}, doi = {10.1371/journal.pcbi.1006341}, issn = {1553-734X}, issn = {1553-7358}, user = {mmann}, abstract = {The investigation of RNA-based regulation of cellular processes is becoming an increasingly important part of biological or medical research. For the analysis of this type of data, RNA-related prediction tools integrated into of many pipelines and workflows. In order to correctly apply and tune these programs, the user has to have a precise understanding of their limitations and concepts. Within this manuscript, we provide the mathematical foundations and extract the algorithmic ideas that are core to state-of-the-art RNA structure and RNA-RNA interaction prediction algorithms. To allow the reader to change and adapt the algorithms or to play with different inputs, we provide an open-source web interface to JavaScript implementations and visualizations of each algorithm. The conceptual, teaching-focused presentation enables a high-level survey of the approaches while providing sufficient details for understanding important concepts. This is boosted by the simple generation and study of examples using the web interface available under http://rna.informatik.uni-freiburg.de/Teaching/. In combination, we provide a valuable resource for teaching, learning and understanding the discussed prediction tools and thus enable a more informed analysis of RNA-related effects.} } @article{Raden-2018-websrv, author = {Raden, Martin and Ali, Syed M and Alkhnbashi, Omer S and Busch, Anke and Costa, Fabrizio and Davis, Jason A and Eggenhofer, Florian and Gelhausen, Rick and Georg, Jens and Heyne, Steffen and Hiller, Michael and Kundu, Kousik and Kleinkauf, Robert and Lott, Steffen C and Mohamed, Mostafa M and Mattheis, Alexander and Miladi, Milad and Richter, Andreas S and Will, Sebastian and Wolff, Joachim and Wright, Patrick R and Backofen, Rolf}, title = {{Freiburg} {RNA} {tools}: a central online resource for {RNA}-focused research and teaching}, journal = {Nucleic Acids Research}, volume = {46}, number = {W1}, pages = {W25-W29}, year = {2018}, doi = {10.1093/nar/gky329}, issn = {0305-1048}, issn = {1362-4962}, user = {mmann}, abstract = {The Freiburg RNA tools webserver is a well established online resource for RNA-focused research. It provides a unified user interface and comprehensive result visualization for efficient command line tools. The webserver includes RNA-RNA interaction prediction (IntaRNA, CopraRNA, metaMIR), sRNA homology search (GLASSgo), sequence-structure alignments (LocARNA, MARNA, CARNA, ExpaRNA), CRISPR repeat classification (CRISPRmap), sequence design (antaRNA, INFO-RNA, SECISDesign), structure aberration evaluation of point mutations (RaSE), and RNA/protein-family models visualization (CMV), and other methods. Open education resources offer interactive visualizations of RNA structure and RNA-RNA interaction prediction as well as basic and advanced sequence alignment algorithms. The services are freely available at http://rna.informatik.uni-freiburg.de.} } @article{Georg:2018, author = {Steffen C. Lott and Richard A Sch{\"a}fer and Martin Mann and Rolf Backofen and Wolfgang R Hess and Bj{\"o}rn Voss and Jens Georg}, title = {{GLASSgo} - Automated and reliable detection of {sRNA} homologs from a single input sequences}, journal = {Frontiers in Genetics}, issn = {1664-8021}, volume = {9}, number = {}, pages = {124}, year = {2018}, doi = {10.3389/fgene.2018.00124}, user = {mmann}, abstract = {Bacterial small RNAs (sRNAs) are important post-transcriptional regulators of gene expression. The functional and evolutionary characterization of sRNAs requires the identification of homologs, which is frequently challenging due to their heterogeneity, short length and partly, little sequence conservation. We developed the GLobal Automatic Small RNA Search go (GLASSgo) algorithm to identify sRNA homologs in complex genomic databases starting from a single sequence. GLASSgo combines an iterative BLAST strategy with pairwise identity filtering and a graph-based clustering method that utilizes RNA secondary structure information. We tested the specificity, sensitivity and runtime of GLASSgo, BLAST and the combination RNAlien/cmsearch in a typical use case scenario on 40 bacterial sRNA families. The sensitivity of the tested methods was similar, while the specificity of GLASSgo and RNAlien/cmsearch was significantly higher than that of BLAST. GLASSgo was on average about 87 times faster than RNAlien/cmsearch, and only about 7.5 times slower than BLAST, which shows that GLASSgo optimizes the trade-off between speed and accuracy in the task of finding sRNA homologs. GLASSgo is fully automated, whereas BLAST often recovers only parts of homologs and RNAlien/cmsearch requires extensive additional bioinformatic work to get a comprehensive set of homologs. GLASSgo is available as an easy-to-use web server to find homologous sRNAs in large databases.} } @article{Wright-2018, author = {Wright, Patrick R. and Mann, Martin and Backofen, Rolf}, title = {Structure and interaction prediction in prokaryotic {RNA} biology}, volume = {6}, number = {2}, pages = {}, year = {2018}, doi = {10.1128/microbiolspec.RWR-0001-2017}, journal = {Microbiol Spectrum}, issn = {2165-0497}, abstract = {Many years of research in RNA biology have soundly established the importance of RNA based regulation far beyond most early traditional presumptions. Importantly, the advances in "wet" laboratory techniques have produced unprecedented amounts of data that require efficient and precise computational analysis schemes and algorithms. Hence, many \textit{in silico} methods that attempt topological and functional classification of novel putative RNA based regulators are available. In this review we technically outline thermodynamics-based standard RNA secondary structure and RNA-RNA interaction prediction approaches that have proven valuable to the RNA research community in the past and present. For these, we highlight their usability with a special focus on prokaryotic organisms and also briefly mention recent advances in whole genome interactomics and how this may influence the field of predictive RNA research.}, user = {mmann} } @article{Mann-MICA-2018, title = "{MICA}: Multiple interval-based curve alignment", journal = "SoftwareX", volume = "7", number = "", pages = "53-58", year = "2018", issn = "2352-7110", doi = "10.1016/j.softx.2018.02.003", user = "mmann", author = "Martin Mann and Hans-Peter Kahle and Matthias Beck and Bela Johannes Bender and Heinrich Spiecker and Rolf Backofen", abstract = "Abstract \{MICA\} enables the automatic synchronization of discrete data curves. To this end, characteristic points of the curves’ shapes are identified. These landmarks are used within a heuristic curve registration approach to align profile pairs by mapping similar characteristics onto each other. In combination with a progressive alignment scheme, this enables the computation of multiple curve alignments. Multiple curve alignments are needed to derive meaningful representative consensus data of measured time or data series. \{MICA\} was already successfully applied to generate representative profiles of tree growth data based on intra-annual wood density profiles or cell formation data. The \{MICA\} package provides a command-line and graphical user interface. The R interface enables the direct embedding of multiple curve alignment computation into larger analyses pipelines. Source code, binaries and documentation are freely available at https://github.com/BackofenLab/MICA " } @Article{Davis_Saunders_Mann-Combi_ensem_miRNA-NAR2017, author = {Davis, Jason A. and Saunders, Sita J. and Mann, Martin and Backofen, Rolf}, title = {Combinatorial ensemble {miRNA} target prediction of co-regulation networks with non-prediction data}, journal = NAR, year = {2017}, volume = {45}, number = {15}, pages = {8745-8757}, user = {mmann}, pmid = {28911111}, doi = {10.1093/nar/gkx605}, issn = {0305-1048}, issn = {1362-4962}, abstract = {MicroRNAs (miRNAs) are key regulators of cell-fate decisions in development and disease with a vast array of target interactions that can be investigated using computational approaches. For this study, we developed metaMIR, a combinatorial approach to identify miRNAs that co-regulate identified subsets of genes from a user-supplied list. We based metaMIR predictions on an improved dataset of human miRNA-target interactions, compiled using a machine-learning-based meta-analysis of established algorithms. Simultaneously, the inverse dataset of negative interactions not likely to occur was extracted to increase classifier performance, as measured using an expansive set of experimentally validated interactions from a variety of sources. In a second differential mode, candidate miRNAs are predicted by indicating genes to be targeted and others to be avoided to potentially increase specificity of results. As an example, we investigate the neural crest, a transient structure in vertebrate development where miRNAs play a pivotal role. Patterns of metaMIR-predicted miRNA regulation alone partially recapitulated functional relationships among genes, and separate differential analysis revealed miRNA candidates that would downregulate components implicated in cancer progression while not targeting tumour suppressors. Such an approach could aid in therapeutic application of miRNAs to reduce unintended effects. The utility is available at http://rna.informatik.uni-freiburg.de/metaMIR/.} } @Article{Mann_Wright_Backofen-IntaR_enhan_and-NAR2017, author = {Mann, Martin and Wright, Patrick R. and Backofen, Rolf}, title = {{IntaRNA} 2.0: enhanced and customizable prediction of {RNA}-{RNA} interactions}, journal = NAR, year = {2017}, volume = {45}, number = {W1}, pages = {W435-W439}, user = {wrightp}, pmid = {28472523}, doi = {10.1093/nar/gkx279}, issn = {1362-4962}, issn = {0305-1048}, abstract = {The IntaRNA algorithm enables fast and accurate prediction of RNA-RNA hybrids by incorporating seed constraints and interaction site accessibility. Here, we introduce IntaRNAv2, which enables enhanced parameterization as well as fully customizable control over the prediction modes and output formats. Based on up to date benchmark data, the enhanced predictive quality is shown and further improvements due to more restrictive seed constraints are highlighted. The extended web interface provides visualizations of the new minimal energy profiles for RNA-RNA interactions. These allow a detailed investigation of interaction alternatives and can reveal potential interaction site multiplicity. IntaRNAv2 is freely available (source and binary), and distributed via the conda package manager. Furthermore, it has been included into the Galaxy workflow framework and its already established web interface enables ad hoc usage.} } @Article{Stangler:16, author = {Stangler, D.F. and Mann, M. and Kahle, H.-P. and Rosskopf, E. and Fink, S. and Spiecker, H.}, title = {Spatiotemporal alignment of radial tracheid diameter profiles of submontane Norway spruce}, journal = {Dendrochronologia}, issn = {1125-7865}, year = {2016}, volume = {37}, number = {}, pages = {33-45}, doi = {10.1016/j.dendro.2015.12.001}, user = {mmann}, abstract= {Studying intra-annual wood formation dynamics provides valuable information on how tree growth and forests are affected by environmental changes and climatic extreme events. This study has the aim to evaluate and to quantify synergetic potentials emerging from a combination of current state of the art techniques used to monitor intra-annual wood formation processes. Norway spruce trees were studied in detail during the growing season 2009 with weekly sampling of microcores, high resolution point-dendrometers and wood anatomical analysis. The combination of the applied techniques allowed us to convert the spatial scales of radial tracheid diameter profiles to seasonal time scales and to synchronize fluctuations in intra-annual cell diameter profiles. This spatiotemporal information was used to validate the recently introduced software MICA (Multiple interval-based curve alignment). In comparison to the conventional approach of averaging profiles of tree ring variables, the MICA aligned profiles exhibit a significantly higher synchronicity of the averaged data points. We also demonstrate two new features in the MICA application that enable to extrapolate spatiotemporal information between intra-annual profiles for the construction of robust mean (consensus) profiles that are representative for the population dynamics. By using a set of complementary techniques in an integrated approach, this study highlights a new methodological framework that can contribute to a better understanding of the environmental control of wood formation during the growing season.} } @article{Kleinkauf-web-2015, author = {Kleinkauf, R. and Houwaart, T. and Backofen, R. and Mann, M.}, title = {{antaRNA} - Multi-objective inverse folding of pseudoknot {RNA} using ant-colony optimization}, journal = {BMC Bioinformatics}, volume = {16}, number = {1}, doi = {10.1186/s12859-015-0815-6}, pages = {1-7}, year = {2015}, user = {mmann}, issn = {1471-2105}, abstract = { Background: Many functional RNA molecules fold into pseudoknot structures, which are often essential for the formation of an RNA's 3D structure. Currently the design of RNA molecules, which fold into a specific structure (known as RNA inverse folding) within biotechnological applications, is lacking the feature of incorporating pseudoknot structures into the design. Hairpin-(H)- and kissing hairpin-(K)-type pseudoknots cover a wide range of biologically functional pseudoknots and can be represented on a secondary structure level. Results: The RNA inverse folding program antaRNA, which takes secondary structure, target GC-content and sequence constraints as input, is extended to provide solutions for such H- and K-type pseudoknotted secondary structure constraint. We demonstrate the easy and flexible interchangeability of modules within the antaRNA framework by incorporating pKiss as structure prediction tool capable of predicting the mentioned pseudoknot types. The performance of the approach is demonstrated on a subset of the Pseudobase++ dataset. Conclusions: This new service is available via a standalone version and is also part of the Freiburg RNA Tools webservice. Furthermore, antaRNA is available in Galaxy and is part of the RNA-workbench Docker image. } } @article{Kleinkauf2015, author = {Kleinkauf, R. and Mann, M. and Backofen, R.}, title = {{antaRNA} - Ant Colony Based {RNA} Sequence Design}, journal = {Bioinformatics}, volume = {31}, number = {19}, doi = {10.1093/bioinformatics/btv319}, pages = { 3114-3121}, year = {2015}, user = {robertk}, issn = {1367-4811}, issn = {1367-4803}, abstract = {Motivation: RNA sequence design is studied at least as long as the classical folding problem. While for the latter the functional fold of an RNA molecule is to be found, inverse folding tries to identify RNA sequences that fold into a function-specific target structure. In combination with RNA-based biotechnology and synthetic biology, reliable RNA sequence design becomes a crucial step to generate novel biochemical components. Results: In this article, the computational tool antaRNA is presented. It is capable of compiling RNA sequences for a given structure that comply in addition with an adjustable full range objective GCcontent distribution, specific sequence constraints and additional fuzzy structure constraints. antaRNA applies ant colony optimization meta-heuristics and its superior performance is shown on a biological datasets. Availability: http://www.bioinf.uni-freiburg.de/Software/antaRNA} } @Article{Mann-CAM-14, author = {Martin Mann and Feras Nahar and Norah Schnorr and Rolf Backofen and Peter F. Stadler and Christoph Flamm}, title = {Atom Mapping with Constraint Programming}, journal = {BMC Algorithms for Molecular Biology}, year = {2014}, volume = {9}, number = {1}, pages = {23}, doi = {10.1186/s13015-014-0023-3}, issn = {1748-7188}, user = {mmann}, note = {In Thematic series on Constraints and Bioinformatics}, abstract = { Chemical reactions are rearrangements of chemical bonds. Each atom in an educt molecule thus appears again in a specific position of one of the reaction products. This bijection between educt and product atoms is not reported by chemical reaction databases, however, so that the 'Atom Mapping Problem' of finding this bijection is left as an important computational task for many practical applications in computational chemistry and systems biology. Elementary chemical reactions feature a cyclic imaginary transition state (ITS) that imposes additional restrictions on the bijection between educt and product atoms that are not taken into account by previous approaches. We demonstrate that Constraint Programming is well-suited to solving the Atom Mapping Problem in this setting. The performance of our approach is evaluated for a manually curated subset of chemical reactions from the KEGG database featuring various ITS cycle layouts and reaction mechanisms.} } @article{Mann-Backofen_2014, author={Mann, Martin and Backofen, Rolf}, title={Exact methods for lattice protein models}, journal={Bio-Algorithms and Med-Systems}, year={2014}, volume={10}, number={4}, pages={213-225}, doi = {10.1515/bams-2014-0014}, issn={1896-530X}, issn={1895-9091}, user = {mmann}, abstract = {Lattice protein models are well studied abstractions of globular proteins. By discretizing the structure space and simplifying the energy model over regular proteins, they enable detailed studies of protein structure formation and evolution. But even in the simplest lattice protein models, the prediction of optimal structures is computationally hard. Therefore, often heuristic approaches are applied to find such conformations. Commonly, heuristic methods find only locally optimal solutions. Nevertheless, there exist methods that guarantee to predict globally optimal structures. Currently only one such exact approach is publicly available, namely the Constraint-based Protein Structure Prediction (CPSP) method and variants. Here, we review exact approaches and derived methods. We discuss fundamental concepts like hydrophobic core construction and their use in optimal structure prediction as well as possible applications like combinations of different energy models.} } @Article{Wright_Georg_Mann-Copra_and_IntaR-NAR2014, author = {Wright, Patrick R. and Georg, Jens and Mann, Martin and Sorescu, Dragos A. and Richter, Andreas S. and Lott, Steffen and Kleinkauf, Robert and Hess, Wolfgang R. and Backofen, Rolf}, title = {{CopraRNA} and {IntaRNA}: predicting small {RNA} targets, networks and interaction domains}, journal = NAR, year = {2014}, volume = {42}, number = {Web Server issue}, pages = {W119-23}, note = {PRW, JG and MM contributed equally to this work}, user = {wrightp}, pmid = {24838564}, doi = {10.1093/nar/gku359}, issn = {0305-1048}, issn = {1362-4962}, abstract = {CopraRNA (Comparative prediction algorithm for small RNA targets) is the most recent asset to the Freiburg RNA Tools webserver. It incorporates and extends the functionality of the existing tool IntaRNA (Interacting RNAs) in order to predict targets, interaction domains and consequently the regulatory networks of bacterial small RNA molecules. The CopraRNA prediction results are accompanied by extensive postprocessing methods such as functional enrichment analysis and visualization of interacting regions. Here, we introduce the functionality of the CopraRNA and IntaRNA webservers and give detailed explanations on their postprocessing functionalities. Both tools are freely accessible at http://rna.informatik.uni-freiburg.de.} } @Article{Kundu_Mann_Costa-MoDPe_inter_web-2014, author = {Kundu, Kousik and Mann, Martin and Costa, Fabrizio and Backofen, Rolf}, title = {{MoDPepInt}: an interactive web server for prediction of modular domain-peptide interactions}, journal = {Bioinformatics}, year = {2014}, volume = {30}, number = {18}, pages = {2668-2669}, user = {kousik}, pmid = {24872426}, doi = {10.1093/bioinformatics/btu350}, issn = {1367-4811}, issn = {1367-4803}, abstract = {SUMMARY:: MoDPepInt (Modular Domain Peptide Interaction) is a new easy-to-use web server for the prediction of binding partners for modular protein domains. Currently, we offer models for SH2, SH3 and PDZ domains via the tools SH2PepInt, SH3PepInt and PDZPepInt, respectively. More specifically, our server offers predictions for 51 SH2 human domains and 69 SH3 human domains via single domain models, and predictions for 226 PDZ domains across several species, via 43 multidomain models. All models are based on support vector machines with different kernel functions ranging from polynomial, to Gaussian, to advanced graph kernels. In this way, we model non-linear interactions between amino acid residues. Results were validated on manually curated datasets achieving competitive performance against various state-of-the-art approaches. Availability and implementation: The MoDPepInt server is available under the URL http://modpepint.informatik.uni-freiburg.de/ CONTACT: : backofen@informatik.uni-freiburg.de Supplementary information: Supplementary data are available at Bioinformatics online.} } @article{Mann_basin_14, author = {Martin Mann and Marcel Kucharik and Christoph Flamm and Michael T. Wolfinger}, title = {Memory efficient {RNA} energy landscape exploration}, journal= {Bioinformatics}, year = {2014}, pages = {2584-2591}, volume = {30}, number = {18}, arxiv= {1404.0270}, doi = {10.1093/bioinformatics/btu337}, pmid = {24833804}, issn = {1367-4811}, issn = {1367-4803}, user = {mmann}, abstract= {Energy landscapes provide a valuable means for studying the folding dynamics of short RNA molecules in detail by modeling all possible structures and their transitions. Higher abstraction levels based on a macro-state decomposition of the landscape enable the study of larger systems, however they are still restricted by huge memory requirements of exact approaches. We present a highly parallelizable local enumeration scheme that enables the computation of exact macro-state transition models with highly reduced memory requirements. The approach is evaluated on RNA secondary structure landscapes using a gradient basin definition for macro-states. Furthermore, we demonstrate the need for exact transition models by comparing two barrier-based appoaches and perform a detailed investigation of gradient basins in RNA energy landscapes. Source code is part of the C++ Energy Landscape Library available at http://www.bioinf.uni-freiburg.de/Software/.} } @Article{Wright_Richter_Papenfort-Compa_genom_boost-PNAS2013, author = {Wright, Patrick R. and Richter, Andreas S. and Papenfort, Kai and Mann, Martin and Vogel, Jorg and Hess, Wolfgang R. and Backofen, Rolf and Georg, Jens}, title = {Comparative genomics boosts target prediction for bacterial small {RNAs}}, journal = PNAS, year = 2013, volume = 110, number = 37, pages = {E3487-96}, user = {wrightp}, pmid = 23980183, doi = {10.1073/pnas.1303248110}, issn = {1091-6490}, issn = {0027-8424}, abstract = {Small RNAs (sRNAs) constitute a large and heterogeneous class of bacterial gene expression regulators. Much like eukaryotic microRNAs, these sRNAs typically target multiple mRNAs through short seed pairing, thereby acting as global posttranscriptional regulators. In some bacteria, evidence for hundreds to possibly more than 1,000 different sRNAs has been obtained by transcriptome sequencing. However, the experimental identification of possible targets and, therefore, their confirmation as functional regulators of gene expression has remained laborious. Here, we present a strategy that integrates phylogenetic information to predict sRNA targets at the genomic scale and reconstructs regulatory networks upon functional enrichment and network analysis (CopraRNA, for Comparative Prediction Algorithm for sRNA Targets). Furthermore, CopraRNA precisely predicts the sRNA domains for target recognition and interaction. When applied to several model sRNAs, CopraRNA revealed additional targets and functions for the sRNAs CyaR, FnrS, RybB, RyhB, SgrS, and Spot42. Moreover, the mRNAs gdhA, lrp, marA, nagZ, ptsI, sdhA, and yobF-cspC were suggested as regulatory hubs targeted by up to seven different sRNAs. The verification of many previously undetected targets by CopraRNA, even for extensively investigated sRNAs, demonstrates its advantages and shows that CopraRNA-based analyses can compete with experimental target prediction approaches. A Web interface allows high-confidence target prediction and efficient classification of bacterial sRNAs.} } @InProceedings{Mann_kekule_13, author = {Martin Mann and Bernhard Thiel}, title = {Kekule structure enumeration yields unique {SMILES}}, booktitle = {Proceedings of the Workshop on Constraint Based Methods for Bioinformatics (WCB 2013)}, year = {2013}, pages = {1-9}, location = {Uppsala, S}, user = {mmann}, abstract= {A standard representation of molecules is based on graphs where atoms correspond to vertices and covalent bonds are represented by a number of edges according to the bond order. This depiction reaches its limitations for aromatic molecules where the aromatic ring can be encoded by different bond order layouts, i.e. Kekule structures, since electrons are shared within the ring rather than fixed to a specific bond. Thus, several Kekule structures are possible for aromatic molecules. Here, we propose a new constraint programming based approach to enumerate all Kekule structures for a given molecule. Furthermore, the ambiguity information derived is used to enable a unique Kekule-based SMILES encoding of the molecule independent of any aromaticity detection algorithm. This is of importance, since there is no generally accepted aromaticity definition available that covers all cases.} } @InProceedings{Mann_atommapping_13, author = {Martin Mann and Feras Nahar and Heinz Ekker and Rolf Backofen and Peter F. Stadler and Christoph Flamm}, title = {Atom Mapping with Constraint Programming}, booktitle={Proc. of the 19th International Conference on Principles and Practice of Constraint Programming (CP'13)}, series={LNCS}, publisher={Springer}, year = {2013}, pages = {805-822}, volume = {8124}, doi = {10.1007/978-3-642-40627-0_59}, location = {Uppsala, Sweden}, editor = {C. Schulte}, isbn = {978-3-642-40626-3}, user = {mmann}, abstract= {Chemical reactions consist of a rearrangement of bonds so that each atom in an educt molecule appears again in a specific position of a reaction product. In general this bijection between educt and product atoms is not reported by chemical reaction databases, leaving the Atom Mapping Problem as an important computational task for many practical applications in computational chemistry and systems biology. Elementary chemical reactions feature a cyclic imaginary transition state (ITS) that imposes additional restrictions on the bijection between educt and product atoms that are not taken into account by previous approaches. We demonstrate that Constraint Programming is well-suited to solving the Atom Mapping Problem in this setting. The performance of our approach is evaluated for a subset of chemical reactions from the KEGG database featuring various ITS cycle layouts and reaction mechanisms.} } @InProceedings{Mann_GGL_13, author = {Martin Mann and Heinz Ekker and Christoph Flamm}, title = {The Graph Grammar Library - a generic framework for chemical graph rewrite systems}, isbn={978-3-642-38882-8}, booktitle={Theory and Practice of Model Transformations, Proc. of ICMT 2013}, volume={7909}, series={LNCS}, publisher={Springer}, editor={Duddy, Keith and Kappel, Gerti}, doi={10.1007/978-3-642-38883-5_5}, year = {2013}, pages = {52-53}, note = {Extended abstract and poster at ICMT, full article at arXiv.}, arxiv = {1304.1356}, location = {Budapest, HU}, user = {mmann}, abstract= {Graph rewrite systems are powerful tools to model and study complex problems in various fields of research. Their successful application to chemical reaction modelling on a molecular level was shown but no appropriate and simple system is available at the moment. The presented Graph Grammar Library (GGL) implements a generic Double Push Out approach for general graph rewrite systems. The framework focuses on a high level of modularity as well as high performance, using state-of-the-art algorithms and data structures, and comes with extensive documentation. The large GGL chemistry module enables extensive and detailed studies of chemical systems. It well meets the requirements and abilities envisioned by Yadav et al. (2004) for such chemical rewrite systems. Here, molecules are represented as undirected labeled graphs while chemical reactions are described by according graph grammar rules. Beside the graph transformation, the GGL offers advanced cheminformatics algorithms for instance to estimate energies ofmolecules or aromaticity perception. These features are illustrated using a set of reactions from polyketide chemistry a huge class of natural compounds of medical relevance. The graph grammar based simulation of chemical reactions offered by the GGL is a powerful tool for extensive cheminformatics studies on a molecular level. The GGL already provides rewrite rules for all enzymes listed in the KEGG LIGAND database is freely available at http://www.tbi.univie.ac.at/software/GGL/.} } @InProceedings{Mann_atomMapping_12, author = {Martin Mann and Heinz Ekker and Peter F. Stadler and Christoph Flamm}, title = {Atom Mapping with Constraint Programming}, booktitle = {Proceedings of the Workshop on Constraint Based Methods for Bioinformatics (WCB 2012)}, year = {2012}, pages = {23-29}, location = {Budapest, HU}, user = {mmann}, note = {http://www.bioinf.uni-freiburg.de/Events/WCB12/proceedings.pdf}, abstract= {The mass ow in a chemical reaction network is determined by the propagation of atoms from educt to product molecules within each of the constituent chemical reactions. The Atom Mapping Problem for a given chemical reaction is the computational task of determining the correspondences of the atoms between educt and product molecules. We propose here a Constraint Programming approach to identify atom mappings for "elementary" reactions. These feature a cyclic imaginary transition state (ITS) imposing an additional strong constraint on the bijection between educt and product atoms. The ongoing work presented here identifies only chemically feasible ITSs by integrating the cyclic structure of the chemical transformation into the search.} } @Article{Mann-Saunders:12, author = {Martin Mann and Rhodri Saunders and Cameron Smith and Rolf Backofen and Charlotte M. Deane}, title = {Producing high-accuracy lattice models from protein atomic co-ordinates including side chains}, journal = {Advances in Bioinformatics}, issn = {1687-8027}, issn = {1687-8035}, year = {2012}, volume = {2012}, number = {Article ID 148045}, pages = {6}, doi = {10.1155/2012/148045}, user = {mmann}, note = {MM and RS contributed equally to this work.}, abstract= {Lattice models are a common abstraction used in the study of protein structure, folding, and refinement. They are advantageous because the discretisation of space can make extensive protein evaluations computationally feasible. Various approaches to the protein chain lattice fitting problem have been suggested but only a single backbone-only tool is available currently. We introduce LatFit, a new tool to produce high-accuracy lattice protein models. It generates both backbone-only and backbone-side-chain models in any user defined lattice. LatFit implements a new distance RMSD-optimisation fitting procedure in addition to the known coordinate RMSD method. The program is freely available for academic download and as a web-server: http://cpsp.informatik.uni-freiburg.de/LatFit/ We tested LatFit's accuracy and speed using a large non-redundant set of high resolution proteins (SCOP database) on three commonly used lattices: 3D cubic, face-centred cubic, and knight's walk. Fitting speed compared favourably to other methods, and both backbone-only and backbone-side-chain models show low deviation from the original data about 1.5A RMSD in the FCC lattice). To our knowledge this represents the first comprehensive study of lattice quality for on-lattice protein models including side chains while LatFit is the only available tool for such models.} } @Article{Sorescu_Mohl_Mann-CARNA_RNA_struc-NAR2012, author = {Dragos A. Sorescu and Mathias M{\"o}hl and Martin Mann and Rolf Backofen and Sebastian Will}, title = {{CARNA} - alignment of {RNA} structure ensembles}, journal = {Nucleic Acids Res}, year = {2012}, volume = {40}, number = {W1}, pages = {W49-W53}, note = {DAS, MM{\"o}, and MMa contributed equally to this work.}, user = {will}, pmid = {22689637}, doi = {10.1093/nar/gks491}, issn = {1362-4962}, issn = {0305-1048}, abstract = {Due to recent algorithmic progress, tools for the gold standard of comparative RNA analysis, namely Sankoff-style simultaneous alignment and folding, are now readily applicable. Such approaches, however, compare RNAs with respect to a simultaneously predicted, single, nested consensus structure. To make multiple alignment of RNAs available in cases, where this limitation of the standard approach is critical, we introduce a web server that provides a complete and convenient interface to the RNA structure alignment tool 'CARNA'. This tool uniquely supports RNAs with multiple conserved structures per RNA and aligns pseudoknots intrinsically; these features are highly desirable for aligning riboswitches, RNAs with conserved folding pathways, or pseudoknots. We represent structural input and output information as base pair probability dot plots; this provides large flexibility in the input, ranging from fixed structures to structure ensembles, and enables immediate visual analysis of the results. In contrast to conventional Sankoff-style approaches, 'CARNA' optimizes all structural similarities in the input simultaneously, for example across an entire RNA structure ensemble. Even compared with already costly Sankoff-style alignment, 'CARNA' solves an intrinsically much harder problem by applying advanced, constraint-based, algorithmic techniques. Although 'CARNA' is specialized to the alignment of RNAs with several conserved structures, its performance on RNAs in general is on par with state-of-the-art general-purpose RNA alignment tools, as we show in a Bralibase 2.1 benchmark. The web server is freely available at http://rna.informatik.uni-freiburg.de/CARNA.} } @Article{Bender:12, author = {Bela Bender and Martin Mann and Rolf Backofen and Heinrich Spiecker}, title = {Microstructure Alignment of Wood Density Profiles: an Approach to Equalize Radial Differences in Growth Rate }, journal = {Trees - Structure and Function}, issn = {0931-1890}, issn = {1432-2285}, year = {2012}, volume = {26}, number = {4}, pages = {1267-1274}, doi = {10.1007/s00468-012-0702-y}, user = {mmann}, note = {BB and MM contributed equally to this work.}, abstract= {We studied intra-annual wood density profiles of Douglas-fir tree rings (Pseudotsuga menziesii [Mirb.] Franco) in southwestern Germany. Growth rate varies differently over time throughout the circumference of trees. This leads to differences in wood formation, which can be observed in the shape of the density profiles of the same tree ring measured in different radial directions. Due to this spatial variation in density profiles, we need a reliable method to determine an average profile, which preserves the common characteristics of the data. To this end, we developed a multiple interval-based curve alignment (MICA) procedure. It identifies characteristic points within the profiles such as minima, maxima and inflection points. These reference points are shifted gradually against each other within a proportionally defined base line interval. Using our progressive alignment approach, we are able to calculate an average profile that represents very well the characteristics of all measured curves of a specific tree ring. We applied the procedure to get year-specific average profiles using various trees. This results in representative mean density profiles that preserves the density variations common to all aligned profiles. Individual noise is reduced thereby enabling the analysis of the impact of weather variations on wood density.} } @PhdThesis{Mann:PhDThesis:2011, author = {Martin Mann}, title = {Computational Methods for Lattice Protein Models}, school = {Albert-Ludwigs-University Freiburg}, year = {2011}, doi = {10.6094/UNIFR/8156}, month = {June}, user = {mmann}, abstract = { Proteins are involved in almost all processes in living cells. They act as regulators, catalyzers, transporters, and in many other functions that are determined by their three-dimensional structures. This thesis studies the fundamental concepts that define and guide the folding processes of proteins. Therein, the prediction of a protein's native fold as well as the modeling of its folding process are of great importance. To enable large scale studies, lattice protein models are used that are available at different levels of abstraction. Central to this thesis is the development and implementation of efficient methods to study proteins represented in complex three-dimensional lattices. A major focus is the development of procedures that enable the usage of more realistic side chain models. An important task when studying protein models is the transfer of real protein structures into the model. This NP-complete problem is tackled in the first part of the thesis. A combination of efficient heuristics and constraint-based search yields models of high quality and low runtimes. The second part of the thesis presents methods to determine minimum energy structures. Here, a constraint-based approach is introduced that for the first time makes it possible to predict energetically optimal structures within hydrophobic-polar (HP) side chain models. This enables the first study of optimal structures within the model revealing an immense degeneracy. Since many structures cannot be distinguished by the energy function, an equivalence relation for the grouping of optimal structures is introduced. An extension of the constraint-based structure prediction approach enables the efficient and direct computation of the resulting equivalence classes. HP-optimal structures from different classes can be used for the initialization of local search methods that tackle more advanced energy functions. The superiority of such an approach compared to standard strategies is demonstrated. In addition, to enable local search methods in side chain models, the definition and efficient implementation of a neighborhood relation between structures is needed. The third part of the thesis covers the presentation of an interval-based local neighborhood relation for arbitrary lattices. An efficient procedure for the enumeration of neighbored structures opens the door for new studies in side chain lattice protein models. Subsequently, the phenomenon of co-translational folding is explored, i.e. the folding of the emerging protein while it is assembled at the ribosome. Co-translational folding is assumed to guide the folding process into the native structure. The introduced methods enable a classification of protein sequences based on their co-translational folding potential. An extensive, comparative study identifies new characteristics in sequence and structure that are exclusive to co-translationally folding proteins. Furthermore, some hypotheses from literature are disproved that have been proposed based on thought experiments. An extension of the study to real protein structures and domains highlights the alpha/beta-domain proteins. This class shows the strongest bias towards the identified characteristics of co-translational folding proteins. In the final part of the thesis the focus is shifted to evolutionary studies. Therein, intensive analyses of neutral networks are done that are graph-based tools to study neutral evolution. Neutral networks describe the possible evolutionary pathways that preserve a given function and thus the associated structure. A new sequence design approach is introduced that enables the neutral network exploration without a full sequence space enumeration. This is the first method that is able to design non-degenerated sequences for a given structure, which is known to be a difficult, NP-complete problem. A thorough analysis of the resulting neutral networks in three-dimensional lattice models reveals considerable differences, e.g. in network sizes, compared to two-dimensional models. To focus the investigation of neutral evolution on the structural core of proteins an according H-fold definition is presented. The H-folds enable additional evolutionary studies of the flexible loop regions of proteins. In conclusion, this thesis describes a variety of new and efficient methods that enable extensive studies of structures and sequences in lattice protein models. All methods are freely available for further research within two software packages and via a web frontend for ad hoc usage. The implemented tools as well as the studies presented thus provide an important contribution to in silico protein research. } } @Article{SaundersMann_11, author = {Rhodri Saunders and Martin Mann and Charlotte Deane}, title = {Signatures of Co-translational Folding}, journal = {Biotechnology Journal, Special issue: Protein folding in vivo}, issn = {1860-7314}, year = {2011}, volume = {6}, number = {6}, pages = {742-751}, doi = {10.1002/biot.201000330}, month = {March}, user = {mmann}, abstract= {Global and co-translational protein folding may both occur in vivo, and understanding the relationship between these folding mechanisms is pivotal to our understanding of protein structure formation. Within this study, over 1.5 million hydrophobic-polar sequences were classified based on their ability to attain a unique but not necessarily minimal energy conformation via co-translational folding. The sequence and structure properties of the sets were then compared to elucidate signatures of co-translational folding. The strongest signature of co-translational folding is a reduced number of possible favourable contacts in the amino-terminus. There is no evidence of fewer contacts, more local contacts, nor less compact structures. Co-translational folding does produce a more compact amino- than carboxy-terminal region and an amino-terminal biased set of core residues. In real proteins these signatures are also observed and found most strongly in proteins of the SCOP alpha/beta class where 71% have an amino-terminal set of core residues. The prominence of co-translational features in experimentally determined protein structures suggests that the importance of co-translational folding is currently underestimated.}, note = {RS and MM have contributed equally to this work.} } @article{Mann:Klemm:11, author = {Martin Mann and Konstantin Klemm}, title = {Efficient exploration of discrete energy landscapes}, journal = {Phys. Rev. E}, issn = {1539-3755}, volume = {83}, number = {1}, pages = {online}, year = {2011}, month = {January}, doi = {10.1103/PhysRevE.83.011113}, arxiv = {0910.2559}, user = {mmann}, abstract = {Many physical and chemical processes, such as folding of biopolymers, are best described as dynamics on large combinatorial energy landscapes. A concise approximate description of the dynamics is obtained by partitioning the micro-states of the landscape into macro-states. Since most landscapes of interest are not tractable analytically, the probabilities of transitions between macro-states need to be extracted numerically from the microscopic ones, typically by full enumeration of the state space or approximations using the Arrhenius law. Here we propose to approximate transition probabilities by a Markov chain Monte-Carlo method. For landscapes of the number partitioning problem and an RNA switch molecule we show that the method allows for accurate probability estimates with significantly reduced computational cost.} } @InProceedings{Bogomolov:shapeTriples:2010, author = {Sergiy Bogomolov and Martin Mann and Björn Voss and Andreas Podelski and Rolf Backofen}, title = {Shape-based barrier estimation for {RNA}s}, booktitle ={In Proceedings of German Conference on Bioinformatics GCB'10}, series = {LNI}, volume = {173}, publisher = {GI}, year = 2010, pages = {42-51}, note = {SB and MM contributed equally to this work.}, abstract = {The ability of some RNA molecules to switch between different metastable conformations plays an important role in cellular processes. In order to identify such molecules and to predict their conformational changes one has to investigate the refolding pathways. As a qualitative measure of these transitions, the barrier height marks the energy peak along such refolding paths. We introduce a meta-heuristic to estimate such barriers, which is an NP-complete problem. To guide an arbitrary path heuristic, the method uses RNA shape representative structures as intermediate checkpoints for detours. This enables a broad but estimationcient search for refolding pathways. The resulting Shape Triples meta-heuristic enables a close to optimal estimation of the barrier height that outperforms the precision of the employed path heuristic.}, user = {mmann} } @article{Flamm_etal_10, author = {Christoph Flamm and Alexander Ullrich and Heinz Ekker and Martin Mann and Daniel Hoegerl and Markus Rohrschneider and Sebastian Sauer and Gerik Scheuermann and Konstantin Klemm and Ivo L. Hofacker and Peter F. Stadler}, title = {Evolution of Metabolic Networks: A Computational Framework}, journal = {Journal of Systems Chemistry}, issn = {1759-2208}, volume = {1}, number = {1}, pages = {4}, year = {2010}, doi = {10.1186/1759-2208-1-4}, user = {mmann}, abstract = {Background: The metabolic architectures of extant organisms share many key pathways such as the citric acid cycle, glycolysis, or the biosynthesis of most amino acids. Several competing hypotheses for the evolutionary mechanisms that shape metabolic networks have been discussed in the literature, each of which finds support from comparative analysis of extant genomes. Alternatively, the principles of metabolic evolution can be studied by direct computer simulation. This requires, however, an explicit implementation of all pertinent components: a universe of chemical reaction upon which the metabolism is built, an explicit representation of the enzymes that implement the metabolism, of a genetic system that encodes these enzymes, and of a fitness function that can be selected for. Results: We describe here a simulation environment that implements all these components in a simplified ways so that large-scale evolutionary studies are feasible. We employ an artificial chemistry that views chemical reactions as graph rewriting operations and utilizes a toy-version of quantum chemistry to derive thermodynamic parameters. Minimalist organisms with simple string encoded genomes produce model ribozymes whose catalytic activity is determined by an ad hoc mapping between their secondary structure and the transition state graphs that they stabilize. Fitness is computed utilizing the ideas of metabolic flux analysis. We present an implementation of the complete system and first simulation results. Conclusions: The simulation system presented here allows coherent investigations into the evolutionary mechanisms of the first steps of metabolic evolution using a self-consistent toy universe.} } @InProceedings{MannPalu_LatFitCOLA_10, author = {Martin Mann and Alessandro Dal Palu}, title = {Lattice model refinement of protein structures}, booktitle = {Proceedings of the Workshop on Constraint Based Methods for Bioinformatics (WCB 2010)}, year = {2010}, pages = {7}, location = {Edinburgh, UK}, arxiv = {1005.1853}, user = {mmann}, abstract= {In this paper we model and implement a Constraint Programming method to refine a lattice fitting of a protein structure produced by a greedy search. We show that the model is able to provide better quality solutions. The prototype is implemented in COLA and it is based on a limited discrepancy approach. Finally, some promising extensions based on local search are discussed.} } @InProceedings{Mann:Backofen:Will:_equivalence_classes:WCB09, author = {Martin Mann and Rolf Backofen and Sebastian Will}, title = {Equivalence Classes of Optimal Structures in {HP}~Protein Models Including Side Chains}, booktitle = {Proceedings of the Fifth Workshop on Constraint Based Methods for Bioinformatics (WCB09)}, year = 2009, arxiv = {0910.3848}, user = {will}, abstract = {Lattice protein models, as the Hydrophobic-Polar (HP) model, are a common abstraction to enable exhaustive studies on structure, function, or evolution of proteins. A main issue is the high number of optimal structures, resulting from the hydrophobicity-based energy function applied. We introduce an equivalence relation on protein structures that correlates to the energy function. We discuss the efficient enumeration of optimal representatives of the corresponding equivalence classes and the application of the results.} } @InProceedings{Mann:etal:_constraint-based_local_move:WCB09, author = {Martin Mann and Mohamed Abou Hamra and Kathleen Steinh{\"o}fel and Rolf Backofen}, title = {Constraint-based Local Move Definitions for Lattice Protein Models Including Side Chains}, booktitle = {Proceedings of the Fifth Workshop on Constraint Based Methods for Bioinformatics (WCB09)}, year = {2009}, arxiv = {0910.3880}, user = {will}, abstract = {The simulation of a protein's folding process is often done via stochastic local search, which requires a procedure to apply structural changes onto a given conformation. Here, we introduce a constraint-based approach to enumerate lattice protein structures according to k-local moves in arbitrary lattices. Our declarative description is much more flexible for extensions than standard operational formulations. It enables a generic calculation of k-local neighbors in backbone-only and side chain models. We exemplify the procedure using a simple hierarchical folding scheme.} } @InProceedings{Ullah:CPSP_LS:09, title = {Protein Folding Simulation by Two-Stage Optimization}, year = {2009}, author = {Abu Dayem Ullah and Leonidas Kapsokalivas and Martin Mann and Kathleen Steinh\"ofel}, booktitle = {Proc. of ISICA'09}, series = {CCIS}, volume = {51}, pages = {138-145}, doi = {10.1007/978-3-642-04962-0_16}, address = {Wuhan, China}, month = {Oct}, publisher = {Springer}, abstract = {We propose a two-stage optimization approach for protein folding simulation in the FCC lattice, inspired from the phenomenon of hydrophobic collapse. Given a protein sequence, the first stage of the approach produces compact protein structures with the maximal number of contacts among hydrophobic monomers, using the CPSP tools for optimal structure prediction in the HP model. The second stage uses those compact structures as starting points to further optimize the protein structure for the input sequence by employing simulated annealing local search and a 20 amino acid pairwise interactions energy function. Experimental results with PDB sequences show that compact structures produced by the CPSP tools are up to two orders of magnitude better, in terms of the pairwise energy function, than randomly generated ones. Also, initializing simulated annealing with these compact structures, produces better structures in fewer iterations than initializing with random structures. Hence, the proposed two-stage optimization outperforms a local search procedure based on simulated annealing alone.}, user = {mmann} } @Article{Mann_CPSPweb_2009, author = {Mann, Martin and Smith, Cameron and Rabbath, Mohamad and Edwards, Marlien and Will, Sebastian and Backofen, Rolf}, title = {{CPSP}-web-tools: a server for {3D} lattice protein studies}, journal = {Bioinformatics}, issn = {1367-4803}, year = {2009}, volume = {25}, number = {5}, pages = {676-7}, user = {arichter}, pmid = {19151096}, doi = {10.1093/bioinformatics/btp034}, abstract = {Studies on proteins are often restricted to highly simplified models to face the immense computational complexity of the associated problems. Constraint-based protein structure prediction (CPSP) tools is a package of very fast algorithms for ab initio optimal structure prediction and related problems in 3D HP-models [cubic and face centered cubic (FCC)]. Here, we present CPSP-web-tools, an interactive online interface of these programs for their immediate use. They include the first method for the direct prediction of optimal energies and structures in 3D HP side-chain models. This newest extension of the CPSP approach is described here for the first time. AVAILABILITY AND IMPLEMENTATION: Free access at http://cpsp.informatik.uni-freiburg.de} } @Article{Mann:Will:Backofen:CPSP-tools:BMCB:2008, author = {Martin Mann and Sebastian Will and Rolf Backofen}, title = {{CPSP}-tools - Exact and Complete Algorithms for High-throughput {3D} Lattice Protein Studies}, journal = {BMC Bioinformatics}, issn = {1471-2105}, year = {2008}, volume = {9}, pages = {230}, doi = {10.1186/1471-2105-9-230}, user = {mmann}, abstract = {Background: The principles of protein folding and evolution pose problems of very high inherent complexity. Often these problems are tackled using simplified protein models, e.g. lattice proteins. The CPSP-tools package provides programs to solve exactly and completely the problems typical of studies using 3D lattice protein models. Among the tasks addressed are the prediction of (all) globally optimal and/or suboptimal structures as well as sequence design and neutral network exploration. Results: In contrast to stochastic approaches, which are not capable of answering many fundamental questions, our methods are based on fast, non-heuristic techniques. The resulting tools are designed for high-throughput studies of 3D-lattice proteins utilizing the Hydrophobic-Polar (HP) model. The source bundle is freely available at http://www.bioinf.uni-freiburg.de/sw/cpsp/ Conclusions: The CPSP-tools package is the first set of exact and complete methods for extensive, high-throughput studies of non-restricted 3D-lattice protein models. In particular, our package deals with cubic and face centered cubic (FCC) lattices.} } @Article{Mann_LatPack_HFSP_08, author = {Martin Mann and Daniel Maticzka and Rhodri Saunders and Rolf Backofen}, title = {Classifying protein-like sequences in arbitrary lattice protein models using {LatPack}}, journal = {HFSP Journal}, issn = {1955-2068}, year = {2008}, volume = {2}, number = {6}, pages = {396-404}, doi = {10.2976/1.3027681}, note = {Special issue on protein folding: experimental and theoretical approaches}, user = {mmann}, abstract = {Knowledge of a protein's 3-dimensional native structure is vital in determining its chemical properties and functionality. However, experimental methods to determine structure are very costly and time-consuming. Computational approaches, such as folding simulations and structure prediction algorithms, are quicker and cheaper but lack consistent accuracy. This currently restricts extensive computational studies to abstract protein models. It is thus essential that simplifications induced by the models do not negate scientific value. Key to this is the use of thoroughly defined protein-like sequences. In such cases abstract models can allow for the investigation of important biological questions. Here we present a procedure to generate and classify protein-like sequence data sets. Our LatPack tools, and the approach in general, are applicable to arbitrary lattice protein models. Identification is based on thermodynamic and kinetic features. Further LatPack can incorporate the sequential assembly of proteins by addressing co-translational folding. We demonstrate the approach in the widely used, unrestricted 3D-cubic HP-model. The resulting sequence set is the first large data set for this model exhibiting the protein-like properties required. Our data and tools are freely available and can be used to investigate protein-related problems. Furthermore our data sets can serve as the first benchmark sequence sets for folding algorithms that have traditionally only been tested on random sequences.} } @InProceedings{ZampelliMann_DDSgraph_JFPC08, author = {Stephane Zampelli and Martin Mann and Yves Deville and R. Backofen}, title = {Techniques de Décomposition pour l’Isomorphisme de Sous-Graphe}, booktitle = {Proc. of the 4th Journées Francophones de Programmation par Contraintes (JFPC'08)}, year = {2008}, user = {mmann}, abstract = {French article about the use of Decomposition During Search (DDS) when solving the subgraph monomorphism problem with CP. An english version of the article is available at http://arxiv.org/abs/0805.1030v1 }, } @TechReport{Zampelli:Mann:DDSgraph:INGI2008, author = {Stephane Zampelli and Martin Mann and Yves Deville and Rolf Backofen}, title = {Decomposition Techniques for Subgraph Matching}, pages = 15, year = 2008, user = {mmann}, institution = {Universite catholique de Louvain}, note = {INGI2008/03}, arxiv = {0805.1030v1}, abstract = {In the constraint programming framework, state-of-the-art static and dynamic decomposition techniques are hard to apply to prob- lems with complete initial constraint graphs. For such problems, we pro- pose a hybrid approach of these techniques in the presence of global constraints. In particular, we solve the subgraph isomorphism problem. Further we design specific heuristics for this hard problem, exploiting its special structure to achieve decomposition. The underlying idea is to pre- compute a static heuristic on a subset of its constraint network, to follow this static ordering until a first problem decomposition is available, and to switch afterwards to a fully propagated, dynamically decomposing search. Experimental results show that, for sparse graphs, our decom- position method solves more instances than dedicated, state-of-the-art matching algorithms or standard constraint programming approaches.} } @InProceedings{Mann_ELL_BIRD07, author = {Martin Mann and Sebastian Will and Rolf Backofen}, title = {The Energy Landscape Library - A Platform for Generic Algorithms}, booktitle = {BIRD'07 - 1st international Conference on Bioinformatics Research and Development}, year = {2007}, location = {Berlin, Germany}, volume = {217}, publisher = {Oesterreichische Computer Gesellschaft}, isbn = {978-3-85403-217-5}, pages = {83-86}, abstract = {The study of energy landscapes of biopolymers and their models is an important field in bioinformatics. For instance the investigation of kinetics or folding simulations are done using methods that are based on sampling or exhaustive enumeration. Most of such algorithms are independent of the underlying landscape model. Therefore frameworks for generic algorithms to investigate the landscape properties is needed. Here, we present the Energy Landscape Library (ELL) that allows such a model-independent formulation of generic algorithms dealing with discrete states. The ELL is a completely object-oriented C++ library that is highly modular, easy to extend, and freely available online. It can be used for a fast and easy implementation of new generic algorithms (possibly based on the provided basic method pool) or as a framework to test their properties for different landscape models, which can be formulated straightforward.}, user = {mmann} } @TechReport{Mann:Tack:Will:DDSintegration:CoRR2007, author = {Martin Mann and Guido Tack and Sebastian Will}, title = {Decomposition During Search for Propagation-Based Constraint Solvers}, pages = 15, year = 2007, user = {mmann}, institution = {ALU Freiburg}, arxiv = {0712.2389}, abstract = {We describe decomposition during search (DDS), an inte- gration of and/or tree search into propagation-based constraint solvers. The presented search algorithm dynamically decomposes sub-problems of a constraint satisfaction problem into independent partial problems, avoiding redundant work. The paper discusses how DDS interacts with key features that make propagation-based solvers successful: constraint propagation, especially for global constraints, and dynamic search heuristics. We have implemented DDS for the Gecode constraint programming li- brary. Two applications, solution counting in graph coloring and protein structure prediction, exemplify the benefits of DDS in practice.} } @TechReport{Mann:Tack:Will:_decom_durin_searc:CPAIOR2007, author = {Martin Mann and Guido Tack and Sebastian Will}, title = {Decomposition During Search}, pages = 16, year = 2007, user = {will}, institution = {ALU Freiburg}, abstract = {We describe decomposition during search (DDS) as a novel search algorithm for counting the solutions of a CSP. DDS enhances standard tree search by dynamically decomposing sub-problems into independent problems, avoiding redundant work. The paper provides formal definitions and analysis of the introduced method. We integrate DDS into a modern constraint programming system, using Gecode as an example. Two applications, graph coloring and protein structure prediction, show the potential for huge benefits of DDS in practice.} } @InProceedings{Will:Mann:WCB2006, author = {Sebastian Will and Martin Mann}, title = {Counting Protein Structures by DFS with Dynamic Decomposition}, booktitle = {Proc. of the Workshop on Constraint Based Methods for Bioinformatics. http://www.dimi.uniud.it/dovier/WCB06/WCB06_proceedings.pdf}, pages = {83-90}, year = 2006, user = {will}, abstract = {We introduce depth-first search with dynamic decomposition for counting the solutions of a binary CSP completely. In particular, we use the method for computing the number of minimal energy structures for model proteins.}, }