Column-oriented GPU-accelerated Database Management System
CoGaDB
/home/sebastian/gpudbms/trunk/cogadb/include/compression/dictionary_compressed_column.hpp
Go to the documentation of this file.
00001 
00002 #pragma once
00003 
00004 #include <core/compressed_column.hpp>
00005 
00006 namespace CoGaDB{
00007         
00015 template<class T>
00016 class DictionaryCompressedColumn : public CompressedColumn<T>{
00017         public:
00018         /***************** constructors and destructor *****************/
00019         DictionaryCompressedColumn(const std::string& name, AttributeType db_type);
00020         virtual ~DictionaryCompressedColumn();
00021 
00022         bool insert(const boost::any& new_Value);
00023         bool insert(const T& new_value);
00024                         
00025         template <typename InputIterator>
00026         bool insert(InputIterator first, InputIterator last);
00027         
00028         
00029         virtual bool update(TID tid, const boost::any& new_value);
00030         virtual bool update(PositionListPtr tid, const boost::any& new_value);  
00031         
00032         virtual bool remove(TID tid);
00033         //assumes tid list is sorted ascending
00034         virtual bool remove(PositionListPtr tid);
00035         bool clearContent();
00036 
00037         virtual const boost::any get(TID tid);
00038         //virtual const boost::any* const getRawData()=0;
00039         virtual void print() const throw();
00040         virtual size_t size() const throw();
00041         virtual unsigned int getSizeinBytes() const throw();
00042 
00043         virtual const ColumnPtr copy() const;
00044 
00045         virtual bool store(const std::string& path);
00046         virtual bool load(const std::string& path);
00047         virtual bool isMaterialized() const  throw();
00048         
00049         virtual bool isCompressed() const  throw();     
00050 
00051         
00052         virtual T& operator[](const int index);
00053 
00054         private:
00055             std::vector<uint32_t> ids_;
00056             typedef std::map<T,uint32_t> Dictionary;
00057             Dictionary dictionary_;
00058             std::vector<T> reverse_lookup_vector_; 
00059             uint32_t maximal_id_;
00060             
00061         
00062 };
00063 
00064 
00065 /***************** Start of Implementation Section ******************/
00066 
00067         
00068         template<class T>
00069         DictionaryCompressedColumn<T>::DictionaryCompressedColumn(const std::string& name, AttributeType db_type) : CompressedColumn<T>(name,db_type), ids_(), dictionary_(), reverse_lookup_vector_(), maximal_id_(0){
00070 
00071         }
00072 
00073         template<class T>
00074         DictionaryCompressedColumn<T>::~DictionaryCompressedColumn(){
00075 
00076         }
00077 
00078         template<class T>
00079         bool DictionaryCompressedColumn<T>::insert(const boost::any& new_Value){
00080 
00081             T value = boost::any_cast<T>(new_Value);
00082             
00083             return this->insert(value);
00084         }
00085         
00086         template<class T>
00087         bool DictionaryCompressedColumn<T>::insert(const T& value){
00088             typename Dictionary::iterator it = dictionary_.find(value);
00089             if(it!=dictionary_.end()){
00090                 ids_.push_back(it->second);
00091             }else{
00092                 ids_.push_back(maximal_id_);
00093                 dictionary_.insert(std::make_pair(value,maximal_id_));
00094                 //element id is position in reverse lookup vector to get the real value in O(1) time
00095                 reverse_lookup_vector_.push_back(value); 
00096                 maximal_id_++;
00097                 
00098             }
00099             
00100             return true;            
00101         }
00102         
00103         template <typename T> 
00104         template <typename InputIterator>
00105         bool DictionaryCompressedColumn<T>::insert(InputIterator first, InputIterator last) {
00106                 for (; first != last; ++first) {
00107                         if (!this->insert(*first)) {
00108                                 return false;
00109                         }
00110                 }
00111                 return true;
00112         }        
00113 
00114         
00115 
00116         template<class T>
00117         const boost::any DictionaryCompressedColumn<T>::get(TID tid){
00118 
00119                 return boost::any(this->operator [](tid));
00120         }
00121 
00122         template<class T>
00123         void DictionaryCompressedColumn<T>::print() const throw(){
00124                 std::cout << "| " << this->name_ << " (Dictionary Compressed) |" << std::endl;
00125                 std::cout << "________________________" << std::endl;
00126                 for(unsigned int i=0;i<this->size();i++){
00127                         std::cout << "| " << reverse_lookup_vector_[ids_[i]] << " |" << std::endl;
00128                 }           
00129         }
00130         template<class T>
00131         size_t DictionaryCompressedColumn<T>::size() const throw(){
00132 
00133                 return ids_.size();
00134         }
00135         template<class T>
00136         const ColumnPtr DictionaryCompressedColumn<T>::copy() const{
00137                 return ColumnPtr(new DictionaryCompressedColumn<T>(*this));
00138         }
00139 
00140         template<class T>
00141         bool DictionaryCompressedColumn<T>::update(TID index, const boost::any& new_Value){
00142             T value = boost::any_cast<T>(new_Value);
00143             if(index>=this->size()) return false;
00144           
00145             typename Dictionary::iterator it = dictionary_.find(value);
00146             if(it!=dictionary_.end()){
00147                 //ids_.push_back(it->second);
00148                 ids_[index]=it->second;
00149                 //reverse_lookup_vector_[ids[index]]=it->first;
00150             }else{
00151                 //ids_.push_back(maximal_id_);
00152                 ids_[index]=maximal_id_;
00153                 dictionary_.insert(std::make_pair(value,maximal_id_));
00154                 //element id is position in reverse lookup vector to get the real value in O(1) time
00155                 reverse_lookup_vector_.push_back(value); 
00156                 maximal_id_++;
00157                 
00158             }
00159         }
00160 
00161         template<class T>
00162         bool DictionaryCompressedColumn<T>::update(PositionListPtr tids, const boost::any& new_Value){
00163             if(!tids)
00164                     return false;
00165             //test whether tid list has at least one element, if not, return with error
00166             if(tids->empty())
00167                     return false;
00168             
00169             bool result=true;
00170             for(unsigned int i=0;i<tids->size();i++){
00171                 result = result && this->update((*tids)[i],new_Value);
00172             }
00173                 return result;          
00174         }
00175         
00176         template<class T>
00177         bool DictionaryCompressedColumn<T>::remove(TID tid){
00178                 ids_.erase(ids_.begin()+tid);
00179                 return true;
00180         
00181         }
00182         
00183         template<class T>
00184         bool DictionaryCompressedColumn<T>::remove(PositionListPtr tids){
00185                 if(!tids)
00186                         return false;
00187                 //test whether tid list has at least one element, if not, return with error
00188                 if(tids->empty())
00189                         return false;           
00190 
00191                 typename PositionList::reverse_iterator rit;
00192                 //delete tuples in reverse order, otherwise the first deletion would invalidate all other tids
00193                 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit){
00194                         ids_.erase(ids_.begin()+(*rit));
00195                 }
00196                         
00197                 return true;                    
00198         }
00199 
00200 
00201         template<class T>
00202         bool DictionaryCompressedColumn<T>::clearContent(){
00203             this->ids_.clear();
00204             this->dictionary_.clear();
00205             this->reverse_lookup_vector_.clear();
00206             this->maximal_id_=0;
00207             return true;
00208         }
00209         
00210         template<class T>
00211         bool DictionaryCompressedColumn<T>::store(const std::string& path_){
00212                 std::string path(path_);
00213                 path += "/";
00214                 path += this->name_;
00215                            if(!quiet && verbose && debug) std::cout << "Writing Column " << this->getName() << " to File " << path << std::endl;
00216                 std::ofstream outfile (path.c_str(),std::ios_base::binary | std::ios_base::out);
00217                 boost::archive::binary_oarchive oa(outfile);
00218 
00219                 oa << ids_;
00220                 oa << dictionary_;
00221                 oa << reverse_lookup_vector_;
00222                 oa << this->maximal_id_;
00223 
00224                 outfile.flush();
00225                 outfile.close();
00226                 return true;
00227         }
00228         template<class T>
00229         bool DictionaryCompressedColumn<T>::load(const std::string& path_){
00230                 std::string path(path_);
00231                 if(!quiet && verbose && debug) std::cout << "Loading column '" << this->name_ << "' from path '" << path << "'..." << std::endl;
00232                 //string path("data/");
00233                 path += "/";
00234                 path += this->name_;
00235                 
00236                 if(!quiet && verbose && debug) std::cout << "Opening File '" << path << "'..." << std::endl;
00237                 std::ifstream infile (path.c_str(),std::ios_base::binary | std::ios_base::in);
00238                 boost::archive::binary_iarchive ia(infile);
00239                 ia >> ids_;
00240                 ia >> dictionary_;
00241                 ia >> reverse_lookup_vector_;
00242                 ia >> this->maximal_id_;
00243                 infile.close();
00244 
00245 
00246                 return true;
00247         }
00248         template<class T>
00249         bool DictionaryCompressedColumn<T>::isMaterialized() const  throw(){
00250                 return false;
00251         }
00252         
00253         template<class T>
00254         bool DictionaryCompressedColumn<T>::isCompressed() const  throw(){
00255                 return true;
00256         }
00257 
00258         template<class T>
00259         T& DictionaryCompressedColumn<T>::operator[](const int index){
00260                 return this->reverse_lookup_vector_[ids_[index]];
00261         }
00262 
00263         template<class T>
00264         unsigned int DictionaryCompressedColumn<T>::getSizeinBytes() const throw(){
00265                 return 0; //ids_.capacity()+this->reverse_lookup_vector_.capacity()+this->dictionary_.capacity()+sizeof(DictionaryCompressedColumn<T>) ; //return values_.capacity()*sizeof(T);
00266         }
00267 
00268       
00269         
00270 /***************** End of Implementation Section ******************/
00271 
00272 
00273 
00274 }; //end namespace CogaDB
00275 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines