Column-oriented GPU-accelerated Database Management System
CoGaDB
|
00001 #pragma once 00002 00003 #include <core/column_base_typed.hpp> 00004 #include <iostream> 00005 #include <fstream> 00006 00007 #include <stdint.h> 00008 00009 #include <gpu/gpu_base_column.hpp> 00010 #include <util/begin_ptr.hpp> 00011 00012 #include <hardware_optimizations/simd_acceleration.hpp> 00013 00014 namespace CoGaDB{ 00015 00016 template<typename T> 00017 class Column : public ColumnBaseTyped<T>{ 00018 public: 00019 /***************** constructors and destructor *****************/ 00020 Column(const std::string& name, AttributeType db_type); 00021 virtual ~Column(); 00022 00023 virtual bool insert(const boost::any& new_value); 00024 bool insert(const T& new_value); 00025 template <typename InputIterator> 00026 bool insert(InputIterator first, InputIterator last); 00027 00028 virtual bool update(TID tid, const boost::any& new_value); 00029 virtual bool update(PositionListPtr tid, const boost::any& new_value); 00030 00031 virtual bool remove(TID tid); 00032 //assumes tid list is sorted ascending 00033 virtual bool remove(PositionListPtr tid); 00034 virtual bool clearContent(); 00035 00036 virtual const boost::any get(TID tid); 00037 //virtual const boost::any* const getRawData(); 00038 virtual void print() const throw(); 00039 virtual size_t size() const throw(); 00040 virtual unsigned int getSizeinBytes() const throw(); 00041 00042 virtual const ColumnPtr copy() const; 00043 virtual const ColumnPtr materialize() throw(); 00044 virtual const ColumnPtr gather(PositionListPtr tid_list); 00045 00046 virtual bool store(const std::string& path); 00047 virtual bool load(const std::string& path); 00048 virtual bool isMaterialized() const throw(); 00049 virtual bool isCompressed() const throw(); 00050 00051 virtual T& operator[](const int index); 00052 //inline T& operator[](const int index) __attribute__((always_inline)); 00053 00054 virtual const PositionListPtr selection(const boost::any& value_for_comparison, const ValueComparator comp); 00055 virtual const PositionListPtr selection(ColumnPtr comparison_column, const ValueComparator comp); 00056 00057 std::vector<T>& getContent(); 00058 00059 private: 00060 00061 struct Type_TID_Comparator { 00062 inline bool operator() (std::pair<T,TID> i, std::pair<T,TID> j) { return (i.first<j.first);} 00063 } type_tid_comparator; 00064 00065 00067 std::vector<T> values_; 00068 }; 00069 00070 00071 00072 /***************** Start of Implementation Section ******************/ 00073 00074 00075 template<class T> 00076 Column<T>::Column(const std::string& name, AttributeType db_type) : ColumnBaseTyped<T>(name,db_type), type_tid_comparator(), values_(){ 00077 00078 } 00079 00080 template<class T> 00081 Column<T>::~Column(){ 00082 00083 } 00084 00085 template<class T> 00086 std::vector<T>& Column<T>::getContent(){ 00087 return values_; 00088 } 00089 /* 00090 template<class T> 00091 const PositionListPtr Column<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp){ 00092 00093 T value; 00094 00095 if(value_for_comparison.type()!=typeid(T)){ 00096 //catch some special cases 00097 if(typeid(T)==typeid(float) && value_for_comparison.type()==typeid(int)){ 00098 value = boost::any_cast<int>(value_for_comparison); 00099 }else{ 00100 std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 00101 std::cout << "Column Type: " << typeid(T).name() << " filter value type: " << value_for_comparison.type().name() << std::endl; 00102 std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; 00103 exit(-1); 00104 } 00105 }else{ 00106 //everything fine, filter value matches type of column 00107 value = boost::any_cast<T>(value_for_comparison); 00108 } 00109 00110 //T value = boost::any_cast<T>(value_for_comparison); 00111 PositionListPtr result_tids; 00112 00113 result_tids = PositionListPtr(new PositionList()); 00114 //one third rule for selections: assume a selectivity of 0.3, meaning we need roughly 0.3 times of the input to store the result 00115 //this optimizatio nshould minimize the number of reallocations during the insertion process 00116 //result_tids->reserve(0.3*this->size()); 00117 00118 unsigned int array_size=this->size()+1; 00119 //calls new internally 00120 result_tids->resize(array_size); 00121 //tids.reserve(array_size); 00122 //get pointer 00123 unsigned int* array_tids=hype::util::begin_ptr(*result_tids); 00124 assert(array_tids!=NULL); 00125 unsigned int pos=0; 00126 00127 if(!quiet) std::cout << "Using CPU for Selection..." << std::endl; 00128 //unsigned int array_size = this->size(); 00129 //for(TID i=0;i<array_size;i++){ 00130 00131 //boost::any value = column->get(i); 00132 //val = values_[i]; 00133 T* array=hype::util::begin_ptr(values_); 00134 00135 if(comp==EQUAL){ 00136 for(TID i=0;i<array_size;i++){ 00137 if(value==array[i]){ 00138 //result_tids->push_back(i); 00139 array_tids[pos++]=i; 00140 } 00141 } 00142 }else if(comp==LESSER){ 00143 for(TID i=0;i<array_size;i++){ 00144 if(array[i]<value){ 00145 //result_table->insert(this->fetchTuple(i)); 00146 //result_tids->push_back(i); 00147 array_tids[pos++]=i; 00148 } 00149 } 00150 }else if(comp==LESSER_EQUAL){ 00151 for(TID i=0;i<array_size;i++){ 00152 if(array[i]<=value){ 00153 //result_tids->push_back(i); 00154 //result_table->insert(this->fetchTuple(i)); 00155 array_tids[pos++]=i; 00156 } 00157 } 00158 }else if(comp==GREATER){ 00159 for(TID i=0;i<array_size;i++){ 00160 if(array[i]>value){ 00161 //result_tids->push_back(i); 00162 //result_table->insert(this->fetchTuple(i)); 00163 array_tids[pos++]=i; 00164 } 00165 } 00166 }else if(comp==GREATER_EQUAL){ 00167 for(TID i=0;i<array_size;i++){ 00168 if(array[i]>=value){ 00169 //result_tids->push_back(i); 00170 //result_table->insert(this->fetchTuple(i)); 00171 array_tids[pos++]=i; 00172 } 00173 } 00174 }else{ 00175 00176 } 00177 //shrink to actual result size 00178 result_tids->resize(pos); 00179 return result_tids; 00180 }*/ 00181 00182 00183 00184 00185 00186 00187 template<class T> 00188 const PositionListPtr Column<T>::selection(ColumnPtr comparison_column, const ValueComparator comp){ 00189 //same behaviour as in parent class 00190 return CoGaDB::ColumnBaseTyped<T>::selection(comparison_column, comp); 00191 } 00192 00193 00194 template<class T> 00195 bool Column<T>::insert(const boost::any& new_value){ 00196 if(new_value.empty()) return false; 00197 if(typeid(T)==new_value.type()){ 00198 T value = boost::any_cast<T>(new_value); 00199 values_.push_back(value); 00200 return true; 00201 } 00202 return false; 00203 } 00204 00205 template<class T> 00206 bool Column<T>::insert(const T& new_value){ 00207 values_.push_back(new_value); 00208 return true; 00209 } 00210 00211 00212 template <typename T> 00213 template <typename InputIterator> 00214 bool Column<T>::insert(InputIterator first, InputIterator last){ 00215 this->values_.insert(this->values_.end(),first,last); 00216 return true; 00217 } 00218 00219 template<class T> 00220 bool Column<T>::update(TID tid, const boost::any& new_value){ 00221 if(new_value.empty()) return false; 00222 if(typeid(T)==new_value.type()){ 00223 T value = boost::any_cast<T>(new_value); 00224 values_[tid]=value; 00225 return true; 00226 }else{ 00227 std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 00228 } 00229 return false; 00230 } 00231 00232 template<class T> 00233 bool Column<T>::update(PositionListPtr tids, const boost::any& new_value){ 00234 if(!tids) 00235 return false; 00236 if(new_value.empty()) return false; 00237 if(typeid(T)==new_value.type()){ 00238 T value = boost::any_cast<T>(new_value); 00239 for(unsigned int i=0;i<tids->size();i++){ 00240 TID tid=(*tids)[i]; 00241 values_[tid]=value; 00242 } 00243 return true; 00244 }else{ 00245 std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 00246 } 00247 return false; 00248 } 00249 00250 00251 00252 00253 template<class T> 00254 bool Column<T>::remove(TID tid){ 00255 values_.erase(values_.begin()+tid); 00256 return true; 00257 } 00258 00259 template<class T> 00260 bool Column<T>::remove(PositionListPtr tids){ 00261 if(!tids) 00262 return false; 00263 //test whether tid list has at least one element, if not, return with error 00264 if(tids->empty()) 00265 return false; 00266 00267 //assert(); 00268 00269 typename PositionList::reverse_iterator rit; 00270 00271 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit) 00272 values_.erase(values_.begin()+(*rit)); 00273 00274 /* 00275 //delete tuples in reverse order, otherwise the first deletion would invalidate all other tids 00276 unsigned int i=tids->size()-1; 00277 while(true) 00278 TID = (*tids)[i]; 00279 values_.erase(values_.begin()+tid); 00280 if(i==0) break; 00281 }*/ 00282 00283 00284 return true; 00285 } 00286 00287 template<class T> 00288 bool Column<T>::clearContent(){ 00289 values_.clear(); 00290 return true; 00291 } 00292 00293 template<class T> 00294 const boost::any Column<T>::get(TID tid){ 00295 if(tid<values_.size()) 00296 return boost::any(values_[tid]); 00297 else{ 00298 std::cout << "fatal Error!!! Invalid TID!!! Attribute: " << this->name_ << " TID: " << tid << std::endl; 00299 } 00300 return boost::any(); 00301 } 00302 00303 template<class T> 00304 void Column<T>::print() const throw(){ 00305 std::cout << "| " << this->name_ << " |" << std::endl; 00306 std::cout << "________________________" << std::endl; 00307 for(unsigned int i=0;i<values_.size();i++){ 00308 std::cout << "| " << values_[i] << " |" << std::endl; 00309 } 00310 } 00311 template<class T> 00312 size_t Column<T>::size() const throw(){ 00313 return values_.size(); 00314 } 00315 00316 template<class T> 00317 const ColumnPtr Column<T>::materialize() throw(){ 00318 return this->copy(); 00319 } 00320 template<class T> 00321 const ColumnPtr Column<T>::copy() const{ 00322 return ColumnPtr(new Column<T>(*this)); 00323 } 00324 00325 template<class T> 00326 const ColumnPtr Column<T>::gather(PositionListPtr tid_list){ 00327 Column<T>* result = new Column<T>(this->name_,this->db_type_); 00328 std::vector<T>& data = result->getContent(); 00329 data.resize(tid_list->size()); 00330 //if(!CoGaDB::quiet && CoGaDB::verbose && CoGaDB::debug) std::cout << "Result size:" << tid_list->size() << std::endl; 00331 //if(!CoGaDB::quiet && CoGaDB::verbose && CoGaDB::debug) std::cout << "Column size: " << this->size() << std::endl; 00332 for(unsigned int i=0;i<tid_list->size();i++){ 00333 //std::cout << "tid: " << (*tid_list)[i] << std::endl; 00334 //std::cout << "value: " << (*this)[(*tid_list)[i]] << std::endl; 00335 data[i]=(*this)[(*tid_list)[i]]; 00336 } 00337 return ColumnPtr(result); 00338 } 00339 00340 00341 /***************** relational operations on Columns which return lookup tables *****************/ 00342 // template<class T> 00343 // const std::vector<TID> Column<T>::sort(const ComputeDevice comp_dev) const { 00344 00345 // return std::vector<TID>(); 00346 // } 00347 00348 // template<class T> 00349 // const std::vector<TID> Column<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp, const ComputeDevice comp_dev) const { 00350 00351 // return std::vector<TID>(); 00352 // } 00353 // //join algorithms 00354 // template<class T> 00355 // const std::vector<TID_Pair> Column<T>::sort_merge_join(ColumnPtr join_Column, const ComputeDevice comp_dev) const{ 00356 00357 // return std::vector<TID_Pair>(); 00358 // } 00359 // template<class T> 00360 // const std::vector<TID_Pair> Column<T>::nested_loop_join(ColumnPtr join_Column, const ComputeDevice comp_dev) const{ 00361 00362 // return std::vector<TID_Pair>(); 00363 // } 00364 template<class T> 00365 bool Column<T>::store(const std::string& path_){ 00366 //string path("data/"); 00367 std::string path(path_); 00368 path += "/"; 00369 path += this->name_; 00370 if(!quiet && verbose && debug) std::cout << "Writing Column " << this->getName() << " to File " << path << std::endl; 00371 std::ofstream outfile (path.c_str(),std::ios_base::binary | std::ios_base::out); 00372 boost::archive::binary_oarchive oa(outfile); 00373 00374 oa << values_; 00375 00376 outfile.flush(); 00377 outfile.close(); 00378 return true; 00379 } 00380 template<class T> 00381 bool Column<T>::load(const std::string& path_){ 00382 std::string path(path_); 00383 if(!quiet && verbose && debug) std::cout << "Loading column '" << this->name_ << "' from path '" << path << "'..." << std::endl; 00384 //string path("data/"); 00385 path += "/"; 00386 path += this->name_; 00387 00388 if(!quiet && verbose && debug) std::cout << "Opening File '" << path << "'..." << std::endl; 00389 std::ifstream infile (path.c_str(),std::ios_base::binary | std::ios_base::in); 00390 boost::archive::binary_iarchive ia(infile); 00391 ia >> values_; 00392 infile.close(); 00393 00394 00395 return true; 00396 } 00397 template<class T> 00398 bool Column<T>::isMaterialized() const throw(){ 00399 return true; 00400 } 00401 00402 template<class T> 00403 bool Column<T>::isCompressed() const throw(){ 00404 return false; 00405 } 00406 00407 template<class T> 00408 T& Column<T>::operator[](const int index){ 00409 00410 return values_[index]; 00411 } 00412 00413 00414 00415 /* 00416 template<> 00417 bool& Column<bool>::operator[](const int index) const{ 00418 00419 return false; // values_[index]; 00420 }*/ 00421 template<class T> 00422 unsigned int Column<T>::getSizeinBytes() const throw(){ 00423 return values_.capacity()*sizeof(T); 00424 } 00425 00426 //total template specialization 00427 template<> 00428 inline unsigned int Column<std::string>::getSizeinBytes() const throw(){ 00429 unsigned int size_in_bytes=0; 00430 for(unsigned int i=0;i<values_.size();++i){ 00431 size_in_bytes+=values_[i].capacity(); 00432 } 00433 //return values_.size()*sizeof(T); 00434 return size_in_bytes; 00435 } 00436 00437 /***************** End of Implementation Section ******************/ 00438 00439 00440 00441 00442 }; //end namespace CogaDB 00443