Column-oriented GPU-accelerated Database Management System
CoGaDB
/home/sebastian/gpudbms/trunk/cogadb/include/compression/bit_vector_compressed_column.hpp
Go to the documentation of this file.
00001 #pragma once
00002 #include <iostream>
00003 #include <core/compressed_column.hpp>
00004 #include <map>
00005 #include <string>
00006 #include <boost/archive/binary_iarchive.hpp>
00007 #include <boost/archive/binary_oarchive.hpp>
00008 #include <boost/serialization/map.hpp>
00009 
00010 namespace CoGaDB{       
00011 
00012     const char one = '1';
00013     const char zero = '0';
00014 
00015     typedef std::string sstring;
00016     
00020     template<class T>
00021     class BitVectorCompressedColumn : public CompressedColumn<T> {
00022         public:
00023         /***************** constructors and destructor *****************/
00024         BitVectorCompressedColumn(const sstring& name, AttributeType db_type);
00025         ~BitVectorCompressedColumn();
00026         
00027         bool insert(const boost::any& new_value);
00028         bool insert(const T& new_value);
00029         template <typename InputIterator>
00030         bool insert(InputIterator first, InputIterator last);
00031         
00032         bool update(TID tid, const boost::any& new_value);
00033         bool update(PositionListPtr tids, const boost::any& new_value); 
00034         
00035         bool remove(TID tid);
00036         //assumes tid list is sorted ascending
00037         bool remove(PositionListPtr tids);
00038         bool clearContent();
00039         
00040         const boost::any get(TID tid);
00041         void print() const throw();
00042         size_t size() const throw();
00043         unsigned int getSizeinBytes() const throw();
00044         
00045         const ColumnPtr copy() const;
00046         
00047         bool store(const sstring& path_);
00048         bool load(const sstring& path_);
00049         
00050         T& operator[](const int index);
00051     private:  
00052         // Methods
00053         bool reorganizeAfterInsert(const T& new_Value);
00054         void reorganizeAfterRemove(TID tid); 
00055         sstring buildNewKey();
00056         sstring buildNewKey(TID tid);
00057         sstring getPath(const sstring& path_);        
00058         int countValueForKey(const sstring key);
00059         sstring getKey(const int index);
00060                 
00061         // Attributes
00062         std::map<sstring, T> bitVectorMap;
00063     };     
00064     
00065     /***************** Start of Implementation Section ******************/
00066         
00067         template<class T>
00068         BitVectorCompressedColumn<T>::BitVectorCompressedColumn(const sstring& name, AttributeType db_type) : CompressedColumn<T>(name, db_type), bitVectorMap() {}
00069     
00070         template<class T>
00071         BitVectorCompressedColumn<T>::~BitVectorCompressedColumn() {}
00072     
00073         template<class T>
00074         bool BitVectorCompressedColumn<T>::insert(const boost::any& new_value)
00075     {
00076         if (new_value.empty()) return false;
00077         
00078                 if (typeid(T) == new_value.type())
00079         {
00080             T value = boost::any_cast<T>(new_value);
00081             return this->insert(value);
00082                 }
00083                 return false;
00084         }
00085     
00086         template<class T>
00087         bool BitVectorCompressedColumn<T>::insert(const T& new_value)
00088     {        
00089         bool found = reorganizeAfterInsert(new_value);
00090         
00091         if (found) return true;
00092                 
00093         sstring new_key = buildNewKey();
00094 
00095         bitVectorMap[new_key] = new_value;
00096                 
00097                 return true;
00098         }
00099     
00100     template <typename T> 
00101     sstring BitVectorCompressedColumn<T>::buildNewKey(TID tid)
00102     {
00103         sstring key = "";
00104         sstring firstEntry = bitVectorMap.begin()->first;
00105         for (unsigned int i = 0; i < firstEntry.length(); ++i) 
00106         {
00107             if (i == tid) key += one;
00108             else          key += zero;
00109         }
00110                 
00111         return key;
00112     }
00113     
00114     template <typename T> 
00115     sstring BitVectorCompressedColumn<T>::buildNewKey()
00116     {        
00117         if (bitVectorMap.empty()) 
00118         {
00119             return "1";
00120         }
00121         
00122         return buildNewKey((bitVectorMap.begin()->first.length())-1);
00123     }
00124     
00125     template <typename T> 
00126         bool BitVectorCompressedColumn<T>::reorganizeAfterInsert(const T& new_Value)
00127     {
00128         std::map<sstring, T> temp;
00129         bool found = false;
00130         sstring key;
00131         T value;
00132         
00133         while (!bitVectorMap.empty())
00134         {
00135             key = bitVectorMap.begin()->first;
00136             value = bitVectorMap.begin()->second;
00137             
00138             bitVectorMap.erase(key);  
00139             
00140             if (value == new_Value) 
00141             {
00142                 key += one;
00143                 found = true;
00144             }
00145             else 
00146             {
00147                 key += zero;
00148             }
00149             
00150             temp[key] = value;
00151         }
00152         
00153         bitVectorMap = temp;
00154         
00155         return found;
00156     }
00157     
00158         template <typename T> 
00159         template <typename InputIterator>
00160         bool BitVectorCompressedColumn<T>::insert(InputIterator first, InputIterator last)
00161     {
00162         for (InputIterator it = first; it != last; it++) 
00163         {
00164             insert(*it);
00165         }
00166 
00167                 return true;
00168         }
00169     
00170         template<class T>
00171         const boost::any BitVectorCompressedColumn<T>::get(TID tid)
00172     {        
00173         return boost::any(operator[](tid));
00174         }
00175     
00176         template<class T>
00177         void BitVectorCompressedColumn<T>::print() const throw()
00178     {
00179         typename std::map<sstring, T>::const_iterator iter;
00180         for(iter = bitVectorMap.begin(); iter != bitVectorMap.end(); iter++)
00181         {
00182             std::cout << "key: " << iter->first << " value: " << iter->second << std::endl; 
00183         }
00184         }
00185     
00186         template<class T>
00187         size_t BitVectorCompressedColumn<T>::size() const throw()
00188     {
00189         if (!bitVectorMap.empty()) 
00190         {
00191             sstring firstEntry = bitVectorMap.begin()->first;
00192             return firstEntry.length();
00193         }
00194         
00195                 return 0;
00196         }
00197     
00198         template<class T>
00199         const ColumnPtr BitVectorCompressedColumn<T>::copy() const
00200     {
00201                 return ColumnPtr(new BitVectorCompressedColumn<T>(*this));
00202         }
00203     
00204     template<class T>
00205         int BitVectorCompressedColumn<T>::countValueForKey(const sstring key)
00206     {
00207         int result = 0;
00208  
00209         for (unsigned int i = 0; i < key.length(); i++) 
00210         {                    
00211             if (key[i] == one) result++;
00212         }
00213         
00214         return result;
00215     }
00216     
00217         template<class T>
00218         bool BitVectorCompressedColumn<T>::update(TID tid, const boost::any& new_value)
00219     {
00220         if (new_value.empty()) return false;
00221         
00222         if (typeid(T) != new_value.type())
00223         {
00224             std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 
00225             return false;
00226         }
00227         
00228         sstring old_key = getKey(tid);
00229             
00230         if (old_key.empty()) 
00231         {
00232             std::cout << "Error!!! TID not found " << tid << std::endl; 
00233             return false;
00234         }            
00235        
00236         T old_value = bitVectorMap[old_key];
00237         T value = boost::any_cast<T>(new_value);
00238         
00239         int sumElements = countValueForKey(old_key);
00240         
00241         if (sumElements == 1)
00242         {
00243             bitVectorMap[old_key] = value;
00244         }
00245         else if (sumElements > 1)
00246         {
00247             old_key[tid] = zero;
00248             bitVectorMap[old_key] = old_value;
00249                
00250             sstring new_Key = buildNewKey(tid);
00251             bitVectorMap[new_Key] = value;
00252         }
00253            
00254                 return true;
00255         }
00256     
00257         template<class T>
00258         bool BitVectorCompressedColumn<T>::update(PositionListPtr tids, const boost::any& new_value)
00259     {
00260         if(!tids || tids->empty()) return false;
00261         
00262         for(unsigned int i = 0; i < tids->size(); i++)
00263         {
00264             update((*tids)[i], new_value);
00265         }
00266         return true;
00267         }
00268         
00269         template<class T>
00270         bool BitVectorCompressedColumn<T>::remove(TID tid)
00271     {
00272         sstring key = getKey(tid);
00273        
00274         if (key.empty()) 
00275         {
00276             std::cout << "Error!!! TID not found " << tid << std::endl; 
00277             return false;
00278         }            
00279         
00280         if (countValueForKey(key) == 1) bitVectorMap.erase(key);
00281         
00282         reorganizeAfterRemove(tid);
00283     
00284                 return true;    
00285         }
00286     
00287     template<class T>
00288     void BitVectorCompressedColumn<T>::reorganizeAfterRemove(TID tid)
00289     {
00290         std::map<sstring, T> temp;
00291         sstring key;
00292         T value;
00293         
00294         while (!bitVectorMap.empty())
00295         {
00296             key = bitVectorMap.begin()->first;
00297             value = bitVectorMap.begin()->second;
00298             
00299             bitVectorMap.erase(key); 
00300             
00301             temp[key.erase(tid,1)] = value;
00302         }
00303         
00304         bitVectorMap = temp;
00305     }
00306     
00307         template<class T>
00308         bool BitVectorCompressedColumn<T>::remove(PositionListPtr tids)
00309     {
00310         if(!tids || tids->empty()) return false;
00311         
00312         typename PositionList::reverse_iterator rit;
00313         
00314                 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit)
00315         {
00316             remove(*rit);
00317         }
00318     
00319         return true;                    
00320         }
00321     
00322         template<class T>
00323         bool BitVectorCompressedColumn<T>::clearContent()
00324     {
00325                 bitVectorMap.clear();
00326                 return true;
00327         }
00328     
00329     template<class T>
00330     sstring BitVectorCompressedColumn<T>::getPath(const sstring& path_)
00331     {
00332         sstring path(path_);
00333         path += "/";
00334                 path += this->getName();
00335         
00336         return path;
00337     }
00338     
00339         template<class T>
00340         bool BitVectorCompressedColumn<T>::store(const sstring& path_)
00341     {
00342                 sstring path = getPath(path_);
00343         
00344                 std::ofstream outfile(path.c_str(), std::ios_base::binary | std::ios_base::out);
00345                 boost::archive::binary_oarchive oa(outfile);
00346         
00347                 oa << bitVectorMap;
00348         
00349                 outfile.flush();
00350                 outfile.close();
00351                 return true;
00352         }
00353     
00354         template<class T>
00355         bool BitVectorCompressedColumn<T>::load(const sstring& path_)
00356     {
00357         sstring path = getPath(path_);
00358                 
00359                 std::ifstream infile (path.c_str(), std::ios_base::binary | std::ios_base::in);
00360                 boost::archive::binary_iarchive ia(infile);
00361         
00362                 ia >> bitVectorMap;
00363 
00364                 infile.close();
00365         
00366                 return true;
00367         }
00368     
00369         template<class T>
00370         T& BitVectorCompressedColumn<T>::operator[](const int index)
00371     {
00372         return bitVectorMap[getKey(index)];
00373         }
00374     
00375     template<class T>
00376     sstring BitVectorCompressedColumn<T>::getKey(const int index)
00377     {
00378         sstring key; 
00379         typename std::map<sstring, T>::iterator iter;
00380         for(iter = bitVectorMap.begin(); iter != bitVectorMap.end(); iter++) 
00381         {
00382             key = iter->first;
00383             if (key[index] == one) return key;
00384         }
00385         
00386         return "";
00387         }
00388     
00389         template<class T>
00390         unsigned int BitVectorCompressedColumn<T>::getSizeinBytes() const throw()
00391     {
00392                 return bitVectorMap.size() * (sizeof(T) + sizeof(sstring));
00393         }  
00394 };
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines