Column-oriented GPU-accelerated Database Management System
CoGaDB
|
00001 00002 #pragma once 00003 00004 #include <core/compressed_column.hpp> 00005 00006 namespace CoGaDB{ 00007 00015 template<class T> 00016 class DictionaryCompressedColumn : public CompressedColumn<T>{ 00017 public: 00018 /***************** constructors and destructor *****************/ 00019 DictionaryCompressedColumn(const std::string& name, AttributeType db_type); 00020 virtual ~DictionaryCompressedColumn(); 00021 00022 bool insert(const boost::any& new_Value); 00023 bool insert(const T& new_value); 00024 00025 template <typename InputIterator> 00026 bool insert(InputIterator first, InputIterator last); 00027 00028 00029 virtual bool update(TID tid, const boost::any& new_value); 00030 virtual bool update(PositionListPtr tid, const boost::any& new_value); 00031 00032 virtual bool remove(TID tid); 00033 //assumes tid list is sorted ascending 00034 virtual bool remove(PositionListPtr tid); 00035 bool clearContent(); 00036 00037 virtual const boost::any get(TID tid); 00038 //virtual const boost::any* const getRawData()=0; 00039 virtual void print() const throw(); 00040 virtual size_t size() const throw(); 00041 virtual unsigned int getSizeinBytes() const throw(); 00042 00043 virtual const ColumnPtr copy() const; 00044 00045 virtual bool store(const std::string& path); 00046 virtual bool load(const std::string& path); 00047 virtual bool isMaterialized() const throw(); 00048 00049 virtual bool isCompressed() const throw(); 00050 00051 00052 virtual T& operator[](const int index); 00053 00054 private: 00055 std::vector<uint32_t> ids_; 00056 typedef std::map<T,uint32_t> Dictionary; 00057 Dictionary dictionary_; 00058 std::vector<T> reverse_lookup_vector_; 00059 uint32_t maximal_id_; 00060 00061 00062 }; 00063 00064 00065 /***************** Start of Implementation Section ******************/ 00066 00067 00068 template<class T> 00069 DictionaryCompressedColumn<T>::DictionaryCompressedColumn(const std::string& name, AttributeType db_type) : CompressedColumn<T>(name,db_type), ids_(), dictionary_(), reverse_lookup_vector_(), maximal_id_(0){ 00070 00071 } 00072 00073 template<class T> 00074 DictionaryCompressedColumn<T>::~DictionaryCompressedColumn(){ 00075 00076 } 00077 00078 template<class T> 00079 bool DictionaryCompressedColumn<T>::insert(const boost::any& new_Value){ 00080 00081 T value = boost::any_cast<T>(new_Value); 00082 00083 return this->insert(value); 00084 } 00085 00086 template<class T> 00087 bool DictionaryCompressedColumn<T>::insert(const T& value){ 00088 typename Dictionary::iterator it = dictionary_.find(value); 00089 if(it!=dictionary_.end()){ 00090 ids_.push_back(it->second); 00091 }else{ 00092 ids_.push_back(maximal_id_); 00093 dictionary_.insert(std::make_pair(value,maximal_id_)); 00094 //element id is position in reverse lookup vector to get the real value in O(1) time 00095 reverse_lookup_vector_.push_back(value); 00096 maximal_id_++; 00097 00098 } 00099 00100 return true; 00101 } 00102 00103 template <typename T> 00104 template <typename InputIterator> 00105 bool DictionaryCompressedColumn<T>::insert(InputIterator first, InputIterator last) { 00106 for (; first != last; ++first) { 00107 if (!this->insert(*first)) { 00108 return false; 00109 } 00110 } 00111 return true; 00112 } 00113 00114 00115 00116 template<class T> 00117 const boost::any DictionaryCompressedColumn<T>::get(TID tid){ 00118 00119 return boost::any(this->operator [](tid)); 00120 } 00121 00122 template<class T> 00123 void DictionaryCompressedColumn<T>::print() const throw(){ 00124 std::cout << "| " << this->name_ << " (Dictionary Compressed) |" << std::endl; 00125 std::cout << "________________________" << std::endl; 00126 for(unsigned int i=0;i<this->size();i++){ 00127 std::cout << "| " << reverse_lookup_vector_[ids_[i]] << " |" << std::endl; 00128 } 00129 } 00130 template<class T> 00131 size_t DictionaryCompressedColumn<T>::size() const throw(){ 00132 00133 return ids_.size(); 00134 } 00135 template<class T> 00136 const ColumnPtr DictionaryCompressedColumn<T>::copy() const{ 00137 return ColumnPtr(new DictionaryCompressedColumn<T>(*this)); 00138 } 00139 00140 template<class T> 00141 bool DictionaryCompressedColumn<T>::update(TID index, const boost::any& new_Value){ 00142 T value = boost::any_cast<T>(new_Value); 00143 if(index>=this->size()) return false; 00144 00145 typename Dictionary::iterator it = dictionary_.find(value); 00146 if(it!=dictionary_.end()){ 00147 //ids_.push_back(it->second); 00148 ids_[index]=it->second; 00149 //reverse_lookup_vector_[ids[index]]=it->first; 00150 }else{ 00151 //ids_.push_back(maximal_id_); 00152 ids_[index]=maximal_id_; 00153 dictionary_.insert(std::make_pair(value,maximal_id_)); 00154 //element id is position in reverse lookup vector to get the real value in O(1) time 00155 reverse_lookup_vector_.push_back(value); 00156 maximal_id_++; 00157 00158 } 00159 } 00160 00161 template<class T> 00162 bool DictionaryCompressedColumn<T>::update(PositionListPtr tids, const boost::any& new_Value){ 00163 if(!tids) 00164 return false; 00165 //test whether tid list has at least one element, if not, return with error 00166 if(tids->empty()) 00167 return false; 00168 00169 bool result=true; 00170 for(unsigned int i=0;i<tids->size();i++){ 00171 result = result && this->update((*tids)[i],new_Value); 00172 } 00173 return result; 00174 } 00175 00176 template<class T> 00177 bool DictionaryCompressedColumn<T>::remove(TID tid){ 00178 ids_.erase(ids_.begin()+tid); 00179 return true; 00180 00181 } 00182 00183 template<class T> 00184 bool DictionaryCompressedColumn<T>::remove(PositionListPtr tids){ 00185 if(!tids) 00186 return false; 00187 //test whether tid list has at least one element, if not, return with error 00188 if(tids->empty()) 00189 return false; 00190 00191 typename PositionList::reverse_iterator rit; 00192 //delete tuples in reverse order, otherwise the first deletion would invalidate all other tids 00193 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit){ 00194 ids_.erase(ids_.begin()+(*rit)); 00195 } 00196 00197 return true; 00198 } 00199 00200 00201 template<class T> 00202 bool DictionaryCompressedColumn<T>::clearContent(){ 00203 this->ids_.clear(); 00204 this->dictionary_.clear(); 00205 this->reverse_lookup_vector_.clear(); 00206 this->maximal_id_=0; 00207 return true; 00208 } 00209 00210 template<class T> 00211 bool DictionaryCompressedColumn<T>::store(const std::string& path_){ 00212 std::string path(path_); 00213 path += "/"; 00214 path += this->name_; 00215 if(!quiet && verbose && debug) std::cout << "Writing Column " << this->getName() << " to File " << path << std::endl; 00216 std::ofstream outfile (path.c_str(),std::ios_base::binary | std::ios_base::out); 00217 boost::archive::binary_oarchive oa(outfile); 00218 00219 oa << ids_; 00220 oa << dictionary_; 00221 oa << reverse_lookup_vector_; 00222 oa << this->maximal_id_; 00223 00224 outfile.flush(); 00225 outfile.close(); 00226 return true; 00227 } 00228 template<class T> 00229 bool DictionaryCompressedColumn<T>::load(const std::string& path_){ 00230 std::string path(path_); 00231 if(!quiet && verbose && debug) std::cout << "Loading column '" << this->name_ << "' from path '" << path << "'..." << std::endl; 00232 //string path("data/"); 00233 path += "/"; 00234 path += this->name_; 00235 00236 if(!quiet && verbose && debug) std::cout << "Opening File '" << path << "'..." << std::endl; 00237 std::ifstream infile (path.c_str(),std::ios_base::binary | std::ios_base::in); 00238 boost::archive::binary_iarchive ia(infile); 00239 ia >> ids_; 00240 ia >> dictionary_; 00241 ia >> reverse_lookup_vector_; 00242 ia >> this->maximal_id_; 00243 infile.close(); 00244 00245 00246 return true; 00247 } 00248 template<class T> 00249 bool DictionaryCompressedColumn<T>::isMaterialized() const throw(){ 00250 return false; 00251 } 00252 00253 template<class T> 00254 bool DictionaryCompressedColumn<T>::isCompressed() const throw(){ 00255 return true; 00256 } 00257 00258 template<class T> 00259 T& DictionaryCompressedColumn<T>::operator[](const int index){ 00260 return this->reverse_lookup_vector_[ids_[index]]; 00261 } 00262 00263 template<class T> 00264 unsigned int DictionaryCompressedColumn<T>::getSizeinBytes() const throw(){ 00265 return 0; //ids_.capacity()+this->reverse_lookup_vector_.capacity()+this->dictionary_.capacity()+sizeof(DictionaryCompressedColumn<T>) ; //return values_.capacity()*sizeof(T); 00266 } 00267 00268 00269 00270 /***************** End of Implementation Section ******************/ 00271 00272 00273 00274 }; //end namespace CogaDB 00275