@PhdThesis{Mann:PhDThesis:2011, author = {Martin Mann}, title = {Computational Methods for Lattice Protein Models}, school = {Albert-Ludwigs-University Freiburg}, year = {2011}, doi = {10.6094/UNIFR/8156}, month = {June}, user = {mmann}, abstract = { Proteins are involved in almost all processes in living cells. They act as regulators, catalyzers, transporters, and in many other functions that are determined by their three-dimensional structures. This thesis studies the fundamental concepts that define and guide the folding processes of proteins. Therein, the prediction of a protein's native fold as well as the modeling of its folding process are of great importance. To enable large scale studies, lattice protein models are used that are available at different levels of abstraction. Central to this thesis is the development and implementation of efficient methods to study proteins represented in complex three-dimensional lattices. A major focus is the development of procedures that enable the usage of more realistic side chain models. An important task when studying protein models is the transfer of real protein structures into the model. This NP-complete problem is tackled in the first part of the thesis. A combination of efficient heuristics and constraint-based search yields models of high quality and low runtimes. The second part of the thesis presents methods to determine minimum energy structures. Here, a constraint-based approach is introduced that for the first time makes it possible to predict energetically optimal structures within hydrophobic-polar (HP) side chain models. This enables the first study of optimal structures within the model revealing an immense degeneracy. Since many structures cannot be distinguished by the energy function, an equivalence relation for the grouping of optimal structures is introduced. An extension of the constraint-based structure prediction approach enables the efficient and direct computation of the resulting equivalence classes. HP-optimal structures from different classes can be used for the initialization of local search methods that tackle more advanced energy functions. The superiority of such an approach compared to standard strategies is demonstrated. In addition, to enable local search methods in side chain models, the definition and efficient implementation of a neighborhood relation between structures is needed. The third part of the thesis covers the presentation of an interval-based local neighborhood relation for arbitrary lattices. An efficient procedure for the enumeration of neighbored structures opens the door for new studies in side chain lattice protein models. Subsequently, the phenomenon of co-translational folding is explored, i.e. the folding of the emerging protein while it is assembled at the ribosome. Co-translational folding is assumed to guide the folding process into the native structure. The introduced methods enable a classification of protein sequences based on their co-translational folding potential. An extensive, comparative study identifies new characteristics in sequence and structure that are exclusive to co-translationally folding proteins. Furthermore, some hypotheses from literature are disproved that have been proposed based on thought experiments. An extension of the study to real protein structures and domains highlights the alpha/beta-domain proteins. This class shows the strongest bias towards the identified characteristics of co-translational folding proteins. In the final part of the thesis the focus is shifted to evolutionary studies. Therein, intensive analyses of neutral networks are done that are graph-based tools to study neutral evolution. Neutral networks describe the possible evolutionary pathways that preserve a given function and thus the associated structure. A new sequence design approach is introduced that enables the neutral network exploration without a full sequence space enumeration. This is the first method that is able to design non-degenerated sequences for a given structure, which is known to be a difficult, NP-complete problem. A thorough analysis of the resulting neutral networks in three-dimensional lattice models reveals considerable differences, e.g. in network sizes, compared to two-dimensional models. To focus the investigation of neutral evolution on the structural core of proteins an according H-fold definition is presented. The H-folds enable additional evolutionary studies of the flexible loop regions of proteins. In conclusion, this thesis describes a variety of new and efficient methods that enable extensive studies of structures and sequences in lattice protein models. All methods are freely available for further research within two software packages and via a web frontend for ad hoc usage. The implemented tools as well as the studies presented thus provide an important contribution to in silico protein research. } }