Column-oriented GPU-accelerated Database Management System
CoGaDB
|
00001 00002 #pragma once 00003 00004 #include <core/base_column.hpp> 00005 #include <iostream> 00006 00007 #include <utility> 00008 #include <functional> 00009 #include <algorithm> 00010 00011 #include <boost/unordered_map.hpp> 00012 #include <boost/any.hpp> 00013 00014 //#include <core/column.hpp> 00015 00017 namespace CoGaDB { 00018 00032 template<class T> 00033 class ColumnBaseTyped : public ColumnBase { 00034 public: 00035 //typedef boost::shared_ptr<ColumnBaseTyped> ColumnPtr; 00036 /***************** constructors and destructor *****************/ 00037 ColumnBaseTyped(const std::string& name, AttributeType db_type); 00038 virtual ~ColumnBaseTyped(); 00039 00040 virtual bool insert(const boost::any& new_Value) = 0; 00041 virtual bool insert(const T& new_Value) = 0; 00042 00043 virtual bool update(TID tid, const boost::any& new_value) = 0; 00044 virtual bool update(PositionListPtr tid, const boost::any& new_value) = 0; 00045 00046 virtual bool remove(TID tid)=0; 00047 //assumes tid list is sorted ascending 00048 virtual bool remove(PositionListPtr tid)=0; 00049 virtual bool clearContent()=0; 00050 00051 virtual const boost::any get(TID tid)=0; 00052 //virtual const boost::any* const getRawData()=0; 00053 virtual void print() const throw()=0; 00054 virtual size_t size() const throw()=0; 00055 virtual unsigned int getSizeinBytes() const throw()=0; 00056 00057 virtual const ColumnPtr copy() const=0; 00058 /***************** relational operations on Columns which return lookup tables *****************/ 00059 virtual const PositionListPtr sort(SortOrder order); 00060 virtual const PositionListPtr selection(const boost::any& value_for_comparison, const ValueComparator comp); 00061 virtual const PositionListPtr parallel_selection(const boost::any& value_for_comparison, const ValueComparator comp, unsigned int number_of_threads); 00062 //join algorithms 00063 virtual const PositionListPairPtr hash_join(ColumnPtr join_column); 00064 virtual const PositionListPairPtr sort_merge_join(ColumnPtr join_column); 00065 virtual const PositionListPairPtr nested_loop_join(ColumnPtr join_column); 00066 00067 00068 virtual bool add(const boost::any& new_Value); 00069 //vector addition between columns 00070 virtual bool add(ColumnPtr join_column); 00071 00072 virtual bool minus(const boost::any& new_Value); 00073 virtual bool minus(ColumnPtr join_column); 00074 00075 virtual bool multiply(const boost::any& new_Value); 00076 virtual bool multiply(ColumnPtr join_column); 00077 00078 virtual bool division(const boost::any& new_Value); 00079 virtual bool division(ColumnPtr join_column); 00080 00081 //template <typename U, typename BinaryOperator> 00082 //std::pair<ColumnPtr,ColumnPtr> aggregate_by_keys(ColumnBaseTyped<U>* keys, BinaryOperator binary_op) const; 00083 00084 virtual bool store(const std::string& path) = 0; 00085 virtual bool load(const std::string& path) = 0; 00086 virtual bool isMaterialized() const throw() = 0; 00087 virtual bool isCompressed() const throw() = 0; 00089 virtual const std::type_info& type() const throw(); 00094 virtual T& operator[](const int index) = 0; 00095 inline bool operator==(ColumnBaseTyped<T>& column); 00096 }; 00097 00098 00099 template<class T> 00100 ColumnBaseTyped<T>::ColumnBaseTyped(const std::string& name, AttributeType db_type) : ColumnBase(name,db_type){ 00101 00102 } 00103 00104 template<class T> 00105 ColumnBaseTyped<T>::~ColumnBaseTyped(){ 00106 00107 } 00108 00109 template<class T> 00110 const std::type_info& ColumnBaseTyped<T>::type() const throw(){ 00111 return typeid(T); 00112 } 00113 00114 template<class T> 00115 const PositionListPtr ColumnBaseTyped<T>::sort(SortOrder order){ 00116 00117 PositionListPtr ids = PositionListPtr( new PositionList()); 00118 std::vector<std::pair<T,TID> > v; 00119 00120 for(unsigned int i=0;i<this->size();i++){ 00121 v.push_back (std::pair<T,TID>((*this)[i],i) ); 00122 } 00123 00124 //TODO: change implementation, so that no copy operations are required -> use boost zip iterators! 00125 00126 if(order==ASCENDING){ 00127 //tbb::parallel_sort(v.begin(),v.end(),std::less_equal<std::pair<T,TID> >()); 00128 std::stable_sort(v.begin(),v.end(),std::less_equal<std::pair<T,TID> >()); 00129 }else if(order==DESCENDING){ 00130 //tbb::parallel_sort(v.begin(),v.end(),std::greater_equal<std::pair<T,TID> >()); 00131 std::stable_sort(v.begin(),v.end(),std::greater_equal<std::pair<T,TID> >()); 00132 }else{ 00133 std::cout << "FATAL ERROR: ColumnBaseTyped<T>::sort(): Unknown Sorting Order!" << std::endl; 00134 } 00135 00136 for(unsigned int i=0;i<v.size();i++){ 00137 ids->push_back(v[i].second); 00138 } 00139 00140 return ids; 00141 } 00142 00143 00144 template<class T> 00145 const PositionListPtr ColumnBaseTyped<T>::parallel_selection(const boost::any&, const ValueComparator, unsigned int){ 00146 00147 PositionListPtr result_tids( new PositionList()); 00148 00149 return result_tids; 00150 } 00151 00152 00153 00154 template<class T> 00155 const PositionListPtr ColumnBaseTyped<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp){ 00156 if(value_for_comparison.type()!=typeid(T)){ 00157 std::cout << "Fatal Error!!! Typemismatch for column " << name_ << std::endl; 00158 std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; 00159 exit(-1); 00160 } 00161 00162 T value = boost::any_cast<T>(value_for_comparison); 00163 00164 00165 PositionListPtr result_tids; 00166 00167 result_tids = PositionListPtr(new PositionList()); 00168 00169 if(!quiet) std::cout << "Using CPU for Selection..." << std::endl; 00170 for(TID i=0;i<this->size();i++){ 00171 00172 //boost::any value = column->get(i); 00173 //val = values_[i]; 00174 00175 if(comp==EQUAL){ 00176 if(value==(*this)[i]){ 00177 //result_table->insert(this->fetchTuple(i)); 00178 result_tids->push_back(i); 00179 } 00180 }else if(comp==LESSER){ 00181 if((*this)[i]<value){ 00182 //result_table->insert(this->fetchTuple(i)); 00183 result_tids->push_back(i); 00184 } 00185 }else if(comp==GREATER){ 00186 if((*this)[i]>value){ 00187 result_tids->push_back(i); 00188 //result_table->insert(this->fetchTuple(i)); 00189 } 00190 }else{ 00191 00192 } 00193 } 00194 00195 //} 00196 return result_tids; 00197 } 00198 00199 00200 template<class T> 00201 const PositionListPairPtr ColumnBaseTyped<T>::hash_join(ColumnPtr join_column_){ 00202 00203 typedef boost::unordered_multimap<T,TID,boost::hash<T>, std::equal_to<T> > HashTable; 00204 00205 if(join_column_->type()!=typeid(T)){ 00206 std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; 00207 std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; 00208 exit(-1); 00209 } 00210 00211 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<T> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<T> >(join_column_); //static_cast<IntColumnPtr>(column1); 00212 00213 PositionListPairPtr join_tids( new PositionListPair()); 00214 join_tids->first = PositionListPtr( new PositionList() ); 00215 join_tids->second = PositionListPtr( new PositionList() ); 00216 00217 00218 //create hash table 00219 HashTable hashtable; 00220 for(unsigned int i=0;i<this->size();i++) 00221 hashtable.insert( 00222 std::pair<T,TID> ((*this)[i],i) 00223 ); 00224 00225 //probe larger relation 00226 for(unsigned int i=0;i<join_column->size();i++){ 00227 std::pair<typename HashTable::iterator, typename HashTable::iterator> range = hashtable.equal_range((*join_column)[i]); 00228 for(typename HashTable::iterator it=range.first ; it!=range.second;it++){ 00229 if(it->first==(*join_column)[i]){ 00230 join_tids->first->push_back(it->second); 00231 join_tids->second->push_back(i); 00232 //cout << "match! " << it->second << ", " << i << " " << it->first << endl; 00233 } 00234 } 00235 } 00236 00237 return join_tids; 00238 } 00239 00240 template<class Type> 00241 const PositionListPairPtr ColumnBaseTyped<Type>::sort_merge_join(ColumnPtr join_column_){ 00242 00243 if(join_column_->type()!=typeid(Type)){ 00244 std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; 00245 std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; 00246 exit(-1); 00247 } 00248 00249 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); 00250 00251 PositionListPairPtr join_tids( new PositionListPair()); 00252 join_tids->first = PositionListPtr( new PositionList() ); 00253 join_tids->second = PositionListPtr( new PositionList() ); 00254 00255 return join_tids; 00256 } 00257 00258 00259 template<class Type> 00260 const PositionListPairPtr ColumnBaseTyped<Type>::nested_loop_join(ColumnPtr join_column_){ 00261 assert(join_column_!=NULL); 00262 if(join_column_->type()!=typeid(Type)){ 00263 std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; 00264 std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; 00265 exit(-1); 00266 } 00267 00268 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); 00269 00270 PositionListPairPtr join_tids( new PositionListPair()); 00271 join_tids->first = PositionListPtr( new PositionList() ); 00272 join_tids->second = PositionListPtr( new PositionList() ); 00273 00274 for(unsigned int i=0;i<this->size();i++){ 00275 for(unsigned int j=0;j<join_column->size();j++){ 00276 if((*this)[i]==(*join_column)[j]){ 00277 if(debug) std::cout << "MATCH: (" << i << "," << j << ")" << std::endl; 00278 join_tids->first->push_back(i); 00279 join_tids->second->push_back(j); 00280 } 00281 } 00282 } 00283 00284 return join_tids; 00285 } 00286 00287 template<class T> 00288 bool ColumnBaseTyped<T>::operator==(ColumnBaseTyped<T>& column){ 00289 if(this->size()!=column.size()) return false; 00290 for(unsigned int i=0;i<this->size();i++){ 00291 if((*this)[i]!=column[i]){ 00292 return false; 00293 } 00294 } 00295 return true; 00296 } 00297 00298 template<class Type> 00299 bool ColumnBaseTyped<Type>::add(const boost::any& new_value){ 00300 if(new_value.empty()) return false; 00301 if(typeid(Type)==new_value.type()){ 00302 Type value = boost::any_cast<Type>(new_value); 00303 //std::transform(myvec.begin(), myvec.end(), myvec.begin(), 00304 //bind2nd(std::plus<double>(), 1.0)); 00305 for(unsigned int i=0;i<this->size();i++){ 00306 this->operator[](i)+=value; 00307 } 00308 return true; 00309 } 00310 return false; 00311 } 00312 00313 00314 00315 template<class Type> 00316 bool ColumnBaseTyped<Type>::add(ColumnPtr column){ 00317 //std::transform ( first, first+5, second, results, std::plus<int>() ); 00318 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column); 00319 if(!column) return false; 00320 for(unsigned int i=0;i<this->size();i++){ 00321 this->operator[](i)+=typed_column->operator[](i); 00322 } 00323 return true; 00324 } 00325 00326 00327 00328 template<class Type> 00329 bool ColumnBaseTyped<Type>::minus(const boost::any& new_value){ 00330 //shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column); 00331 if(new_value.empty()) return false; 00332 if(typeid(Type)==new_value.type()){ 00333 Type value = boost::any_cast<Type>(new_value); 00334 for(unsigned int i=0;i<this->size();i++){ 00335 this->operator[](i)-=value; 00336 } 00337 return true; 00338 } 00339 return false; 00340 } 00341 00342 template<class Type> 00343 bool ColumnBaseTyped<Type>::minus(ColumnPtr column){ 00344 //std::transform ( first, first+5, second, results, std::plus<int>() ); 00345 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column); 00346 if(!column) return false; 00347 for(unsigned int i=0;i<this->size();i++){ 00348 this->operator[](i)-=typed_column->operator[](i); 00349 } 00350 return true; 00351 } 00352 00353 00354 template<class Type> 00355 bool ColumnBaseTyped<Type>::multiply(const boost::any& new_value){ 00356 if(new_value.empty()) return false; 00357 if(typeid(Type)==new_value.type()){ 00358 Type value = boost::any_cast<Type>(new_value); 00359 for(unsigned int i=0;i<this->size();i++){ 00360 this->operator[](i)*=value; 00361 } 00362 return true; 00363 } 00364 return false; 00365 } 00366 00367 template<class Type> 00368 bool ColumnBaseTyped<Type>::multiply(ColumnPtr column){ 00369 //std::transform ( first, first+5, second, results, std::plus<int>() ); 00370 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column); 00371 if(!column) return false; 00372 for(unsigned int i=0;i<this->size();i++){ 00373 this->operator[](i)*=typed_column->operator[](i); 00374 } 00375 return true; 00376 } 00377 00378 00379 00380 template<class Type> 00381 bool ColumnBaseTyped<Type>::division(const boost::any& new_value){ 00382 if(new_value.empty()) return false; 00383 if(typeid(Type)==new_value.type()){ 00384 Type value = boost::any_cast<Type>(new_value); 00385 //check that we do not devide by zero 00386 if(value==0) return false; 00387 for(unsigned int i=0;i<this->size();i++){ 00388 this->operator[](i)/=value; 00389 } 00390 return true; 00391 } 00392 return false; 00393 } 00394 00395 template<class Type> 00396 bool ColumnBaseTyped<Type>::division(ColumnPtr column){ 00397 //std::transform ( first, first+5, second, results, std::plus<int>() ); 00398 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column); 00399 if(!column) return false; 00400 for(unsigned int i=0;i<this->size();i++){ 00401 this->operator[](i)/=typed_column->operator[](i); 00402 } 00403 return true; 00404 } 00405 00406 //total tempalte specializations, because numeric computations are undefined on strings 00407 template<> 00408 inline bool ColumnBaseTyped<std::string>::add(const boost::any&){ return false; } 00409 template<> 00410 inline bool ColumnBaseTyped<std::string>::add(ColumnPtr){ return false; } 00411 00412 template<> 00413 inline bool ColumnBaseTyped<std::string>::minus(const boost::any&){ return false; } 00414 template<> 00415 inline bool ColumnBaseTyped<std::string>::minus(ColumnPtr){ return false; } 00416 00417 00418 template<> 00419 inline bool ColumnBaseTyped<std::string>::multiply(const boost::any&){ return false; } 00420 template<> 00421 inline bool ColumnBaseTyped<std::string>::multiply(ColumnPtr){ return false; } 00422 00423 template<> 00424 inline bool ColumnBaseTyped<std::string>::division(const boost::any&){ return false; } 00425 template<> 00426 inline bool ColumnBaseTyped<std::string>::division(ColumnPtr){ return false; } 00427 00428 }; //end namespace CogaDB 00429