Generated on Mon Jun 23 16:24:56 2008 for BIU-1.7.0 by doxygen 1.5.1

src/biu/Alphabet.cc

Go to the documentation of this file.
00001 #include <biu/Alphabet.hh>
00002 #include <biu/assertbiu.hh>
00003 
00004 #include <iostream>
00005 #include <math.h>
00006 
00007 namespace biu {
00008     
00009     Alphabet::Alphabet( const std::string& alphabetString, 
00010                         const size_t _elementLength)
00011      : elementLength(_elementLength) 
00012     {
00013         assertbiu (alphabetString.length() % elementLength == 0,
00014     "length of the alphabetstring has to be multiple of the element length");
00015             
00016              // initialisieren
00017         alph2string = std::vector<std::string>( 
00018                             alphabetString.length() / elementLength, "");
00019         AlphElem actElem = 0;
00020         std::string actStr = "";
00021         for (std::string::size_type i = 0; i<alphabetString.length(); 
00022                     i+= elementLength) {
00023             actStr = alphabetString.substr(i,elementLength);
00024             string2alph[actStr] = actElem;
00025             alph2string[actElem] = actStr;
00026             actElem++;
00027         }
00028     }
00029 
00030     Alphabet::Alphabet(const std::vector<std::string> & alphabetStrings) {
00031 
00032         size_t alphabetSize=alphabetStrings.size();
00033 
00034         assertbiu (alphabetSize>0,"No init of empty alphabet allowed");
00035         
00036         elementLength = alphabetStrings[0].length();
00037         
00038 #ifdef NDEBUG
00039         {
00040             size_t i;
00041             for(i=0; i<alphabetSize; i++) {
00042                 assertbiu(alphabetStrings[i].length() == elementLength, 
00043                           "All strings must have equal length");
00044             }
00045         }
00046 #endif
00047         
00048         alph2string = std::vector<std::string>(alphabetSize, "");
00049         
00050         AlphElem actElem = 0;
00051         std::string actStr = "";
00052         for (std::string::size_type i = 0; i<alphabetSize; i++) {
00053             actStr = alphabetStrings[i];
00054             string2alph[actStr] = actElem;
00055             alph2string[actElem] = actStr;
00056             actElem++;
00057         }
00058     }
00059 
00060     Alphabet::~Alphabet() {     
00061     }
00062 
00063     bool            
00064     Alphabet::operator== (const Alphabet& alph2) const {
00065         return string2alph == alph2.string2alph
00066             && alph2string == alph2.alph2string
00067             && elementLength == alph2.elementLength;
00068     }
00069 
00070     bool            
00071     Alphabet::operator!= (const Alphabet& alph2) const {
00072         return string2alph != alph2.string2alph
00073             && alph2string != alph2.alph2string
00074             && elementLength != alph2.elementLength;
00075     }
00076 
00077     // string to sequence to string
00078 
00079     Alphabet::Sequence 
00080     Alphabet::getSequence(const std::string& seqString) const {
00081         assertbiu (seqString.length() % elementLength == 0,
00082             "the sequence length has to be a multiple of the element length");
00083             
00084         Alphabet::Sequence seq(seqString.length() / elementLength,0);
00085         Alphabet::Sequence::size_type seqPos = 0; 
00086         for (std::string::size_type i=0; i<seqString.length(); 
00087                     i+= elementLength) {
00088             seq[seqPos++] = getElement(seqString.substr(i,elementLength));
00089         }
00090         return seq;
00091     }
00092     
00093     std::string 
00094     Alphabet::getString(const Alphabet::Sequence& sequence) const {
00095         std::string str(sequence.size()*elementLength,' ');
00096         std::string::size_type i = 0, j = 0; 
00097         for (Alphabet::Sequence::const_iterator it = sequence.begin(); 
00098                     it != sequence.end(); it++) {
00099             for (j=0; j<elementLength; j++) {
00100                 str[i++] = alph2string.at(*it)[j];
00101             }
00102         }
00103         return str;
00104     }
00105     
00106     std::string 
00107     Alphabet::getString(const Alphabet::AlphElem& elem) const {
00108         assertbiu( elem < alph2string.size(), 
00109             "elem is no valid element of the alphabet");
00110         return alph2string.at(elem);
00111     }
00112 
00113     Alphabet::AlphElem 
00114     Alphabet::getElement(const std::string& alphElemStr) const {
00115         STR2ALPH_TYPE::const_iterator member = string2alph.find(alphElemStr);
00116         assertbiu(member != string2alph.end(),
00117             "element '"+alphElemStr+"' is no member of the alphabet");
00118         return member->second;
00119     }
00120 
00121     bool    
00122     Alphabet::isAlphabetString(const std::string& str) const {
00123                 // muss vielfaches der elementlaenge sein
00124         bool retVal = (str.length() % elementLength == 0);
00125         for (std::string::size_type i = 0; retVal && i< str.size(); 
00126                 i+=elementLength) {
00127             retVal =    string2alph.find(str.substr(i,elementLength)) 
00128                         != string2alph.end();
00129         }
00130         return retVal;
00131     }
00132 
00133     size_t 
00134     Alphabet::getIndex(const AlphElem& elem) const {
00135         return (size_t)elem;
00136     }
00137     
00138     size_t 
00139     Alphabet::getIndex(const std::string& elemStr) const {
00140         return getIndex(getElement(elemStr));
00141     }
00142     
00143     Alphabet::AlphElem 
00144     Alphabet::getElement(const size_t index) const {
00145         assertbiu(index < getAlphabetSize(), 
00146             "the index leaves the valid indices of the alphabet");
00147         return index;
00148     }
00149     
00150     
00151     Alphabet::CSequence
00152     Alphabet::compress(const Alphabet::Sequence& seq) const 
00153     {
00154             // base of the number system used for the compression
00155         size_t base = this->getAlphabetSize();
00156             // the number of positions that can be saved in a byte
00157         size_t numOfPos = (size_t) (log(256.) / log((double) base));
00158             // ceil (s->size() / numOfPos) bytes are necessary to save the
00159             // sequence
00160         CSequence cs(
00161          (size_t) ceil((double) seq.size() / (double) numOfPos), 0);
00162 
00163             // 1 byte = seq[0] * base^0 + ... + seq[numOfPos] * base^numOfPos
00164         for (size_t i = 0; i < seq.size(); i++) {
00165             cs[i/numOfPos] += seq.at(i) * (int) pow(base, i%numOfPos);
00166         }
00167         return cs;
00168     }
00169 
00170     Alphabet::Sequence
00171     Alphabet::decompress(const Alphabet::CSequence & seq, 
00172         const size_t seqLength) const 
00173     {
00174             // base of the number system used for the compression
00175         size_t base = this->getAlphabetSize();
00176             // the number of positions that can be saved in a byte
00177         size_t numOfPos = (size_t) (log(256.) / log((double) base));
00178         Sequence uncompressedSeq(seqLength, this->getElement(0));
00179             // element of cs which is currently processed
00180         unsigned char c = seq[0];
00181 
00182         assertbiu(seqLength <= seq.size()*numOfPos,
00183          "The compressed sequence can't hold <seqLength> many elements.");
00184 
00185             // 1 byte = s[0] * base^0 + ... + s[numOfPos-1] * base^(numOfPos-1)
00186         for (size_t i = 0; i < seqLength; i++) {
00187                 // update c if new element of cs is processed
00188             if (i%numOfPos == 0) {
00189                 c = seq[i/numOfPos];
00190             }
00191             uncompressedSeq[i] = this->getElement( c % base );
00192             c /= base;
00193         }
00194 
00195         return uncompressedSeq;
00196     }
00197     
00198     bool 
00199     Alphabet::isAlphabetSequence(const Alphabet::Sequence& seq) const {
00200         const size_t size  = getAlphabetSize();
00201         for (Sequence::const_iterator it = seq.begin(); it != seq.end(); it++) {
00202             if ((size_t)*it > size)
00203                 return false;
00204         }
00205         return true;
00206     }
00207     
00208     
00209     
00210 } // namespace biu
00211