src/biu/Alphabet.hh
Go to the documentation of this file.00001 #ifndef ALPHABET_HH_ 00002 #define ALPHABET_HH_ 00003 00004 00005 #include <vector> 00006 #include <string> 00007 00008 #include "biu/HashMap.hh" 00009 00010 namespace biu 00011 { 00018 class Alphabet 00019 { 00020 public: 00022 typedef size_t AlphElem; 00024 typedef unsigned char CAlphElem; 00025 00027 typedef std::vector<AlphElem> Sequence; 00028 00030 typedef std::vector<CAlphElem> CSequence; 00031 00032 private: 00033 00034 #ifdef HAVE_GNU_HASH_MAP 00035 00039 class hash_string { 00040 public: 00041 00042 size_t operator()(const std::string& str) const 00043 { 00044 size_t hash = 5381; 00045 00046 for (size_t i = 0; i < str.size(); i++) { 00047 hash = ((hash << 5) + hash) + (size_t)str[i]; // hash * 33 + str[i] 00048 } 00049 00050 return hash; 00051 } 00052 00053 }; 00054 00055 typedef __gnu_cxx::hash_map< std::string, AlphElem, hash_string > STR2ALPH_MAP; 00056 #else 00057 typedef std::map< std::string, AlphElem > STR2ALPH_MAP; 00058 #endif 00059 00062 STR2ALPH_MAP string2alph; 00063 00065 std::vector<std::string> alph2string; 00066 00068 size_t elementLength; 00069 00073 std::vector<int> compressBase; 00074 00075 public: 00076 00084 Alphabet( const std::string& alphabetString, 00085 const size_t elementLength); 00086 00092 Alphabet(const std::vector<std::string> & alphabetStrings); 00093 00094 virtual ~Alphabet(); 00095 00096 bool operator== (const Alphabet& alph2) const; 00097 bool operator!= (const Alphabet& alph2) const; 00098 00101 size_t getAlphabetSize() const; 00102 00105 size_t getElementLength() const; 00106 00108 // string to sequence to string 00110 00114 Sequence getSequence(const std::string& seqString) const; 00115 00119 AlphElem getElement(const std::string& alphElemStr) const; 00120 00124 std::string getString(const Alphabet::Sequence& sequence) const; 00125 00129 std::string getString(const Alphabet::AlphElem& elem) const; 00130 00134 CSequence compress(const Alphabet::Sequence& sequence) const; 00135 00139 CSequence compressS(const std::string& sequence) const; 00140 00145 Sequence decompress(const CSequence& sequence, const size_t seqLength) const; 00146 00151 std::string decompressS(const CSequence& sequence, const size_t seqLength) const; 00152 00153 00155 // miscellaneous 00157 00163 bool isAlphabetString(const std::string& str) const; 00164 00170 bool isAlphabetSequence(const Sequence& seq) const; 00171 00176 size_t getIndex(const AlphElem& elem) const; 00177 00183 size_t getIndex(const std::string& elemStr) const; 00184 00188 AlphElem getElement(const size_t index) const; 00189 00190 }; 00191 00192 } // namespace biu 00193 00194 // include definitions 00195 #include "biu/Alphabet.icc" 00196 00197 #endif /*ALPHABET_HH_*/