Column-oriented GPU-accelerated Database Management System
CoGaDB
/home/sebastian/gpudbms/trunk/cogadb/include/compression/tmp/db2/core/column_base_typed.hpp
Go to the documentation of this file.
00001 
00002 #pragma once
00003 
00004 #include <core/base_column.hpp>
00005 #include <iostream>
00006 
00007 #include <utility>
00008 #include <functional>
00009 #include <algorithm>
00010 
00011 #include <boost/unordered_map.hpp>
00012 #include <boost/any.hpp>
00013 
00014 //#include <core/column.hpp>
00015 
00017 namespace CoGaDB {
00018 
00032         template<class T>
00033         class ColumnBaseTyped : public ColumnBase {
00034         public:
00035                 //typedef boost::shared_ptr<ColumnBaseTyped> ColumnPtr;
00036                 /***************** constructors and destructor *****************/
00037                 ColumnBaseTyped(const std::string& name, AttributeType db_type);
00038                 virtual ~ColumnBaseTyped();
00039 
00040                 virtual bool insert(const boost::any& new_Value) = 0;
00041                 virtual bool insert(const T& new_Value) = 0;
00042                 
00043                 virtual bool update(TID tid, const boost::any& new_value) = 0;
00044                 virtual bool update(PositionListPtr tid, const boost::any& new_value) = 0;      
00045 
00046                 virtual bool remove(TID tid)=0;
00047                 //assumes tid list is sorted ascending
00048                 virtual bool remove(PositionListPtr tid)=0;
00049                 virtual bool clearContent()=0;
00050 
00051                 virtual const boost::any get(TID tid)=0;
00052                 //virtual const boost::any* const getRawData()=0;
00053                 virtual void print() const throw()=0;
00054                 virtual size_t size() const throw()=0;
00055                 virtual unsigned int getSizeinBytes() const throw()=0;
00056 
00057                 virtual const ColumnPtr copy() const=0;
00058                 /***************** relational operations on Columns which return lookup tables *****************/
00059                 virtual const PositionListPtr sort(SortOrder order); 
00060                 virtual const PositionListPtr selection(const boost::any& value_for_comparison, const ValueComparator comp);
00061                 virtual const PositionListPtr parallel_selection(const boost::any& value_for_comparison, const ValueComparator comp, unsigned int number_of_threads);
00062                 //join algorithms
00063                 virtual const PositionListPairPtr hash_join(ColumnPtr join_column);
00064                 virtual const PositionListPairPtr sort_merge_join(ColumnPtr join_column);
00065                 virtual const PositionListPairPtr nested_loop_join(ColumnPtr join_column);
00066 
00067 
00068                 virtual bool add(const boost::any& new_Value);
00069                 //vector addition between columns                       
00070                 virtual bool add(ColumnPtr join_column);
00071 
00072                 virtual bool minus(const boost::any& new_Value);
00073                 virtual bool minus(ColumnPtr join_column);      
00074 
00075                 virtual bool multiply(const boost::any& new_Value);
00076                 virtual bool multiply(ColumnPtr join_column);
00077 
00078                 virtual bool division(const boost::any& new_Value);     
00079                 virtual bool division(ColumnPtr join_column);   
00080 
00081                 //template <typename U, typename BinaryOperator>
00082                 //std::pair<ColumnPtr,ColumnPtr> aggregate_by_keys(ColumnBaseTyped<U>* keys, BinaryOperator binary_op) const;
00083 
00084                 virtual bool store(const std::string& path) = 0;
00085                 virtual bool load(const std::string& path) = 0;
00086                 virtual bool isMaterialized() const  throw() = 0;
00087                 virtual bool isCompressed() const  throw() = 0; 
00089                 virtual const std::type_info& type() const throw();
00094                 virtual T& operator[](const int index) = 0;
00095                 inline bool operator==(ColumnBaseTyped<T>& column);
00096         };
00097 
00098 
00099         template<class T>
00100         ColumnBaseTyped<T>::ColumnBaseTyped(const std::string& name, AttributeType db_type) : ColumnBase(name,db_type){
00101 
00102         }
00103 
00104         template<class T>
00105         ColumnBaseTyped<T>::~ColumnBaseTyped(){
00106 
00107         }
00108 
00109         template<class T>
00110         const std::type_info& ColumnBaseTyped<T>::type() const throw(){
00111                 return typeid(T);
00112         }
00113 
00114         template<class T>
00115         const PositionListPtr ColumnBaseTyped<T>::sort(SortOrder order){
00116 
00117                 PositionListPtr ids = PositionListPtr( new PositionList());
00118                 std::vector<std::pair<T,TID> > v;
00119 
00120                 for(unsigned int i=0;i<this->size();i++){
00121                         v.push_back (std::pair<T,TID>((*this)[i],i) );
00122                 }
00123 
00124                 //TODO: change implementation, so that no copy operations are required -> use boost zip iterators!
00125 
00126                 if(order==ASCENDING){
00127                         //tbb::parallel_sort(v.begin(),v.end(),std::less_equal<std::pair<T,TID> >());
00128                         std::stable_sort(v.begin(),v.end(),std::less_equal<std::pair<T,TID> >());
00129                 }else if(order==DESCENDING){
00130                         //tbb::parallel_sort(v.begin(),v.end(),std::greater_equal<std::pair<T,TID> >());
00131                         std::stable_sort(v.begin(),v.end(),std::greater_equal<std::pair<T,TID> >()); 
00132                 }else{
00133                         std::cout << "FATAL ERROR: ColumnBaseTyped<T>::sort(): Unknown Sorting Order!" << std::endl;
00134                 }
00135 
00136                 for(unsigned int i=0;i<v.size();i++){
00137                         ids->push_back(v[i].second);
00138                 }
00139 
00140                 return ids;
00141         }
00142 
00143 
00144         template<class T>
00145         const PositionListPtr ColumnBaseTyped<T>::parallel_selection(const boost::any&, const ValueComparator, unsigned int){
00146 
00147                 PositionListPtr result_tids( new PositionList());
00148 
00149                 return result_tids;
00150         }
00151 
00152 
00153 
00154         template<class T>
00155         const PositionListPtr ColumnBaseTyped<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp){
00156                 if(value_for_comparison.type()!=typeid(T)){
00157                         std::cout << "Fatal Error!!! Typemismatch for column " << name_ << std::endl;
00158                         std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
00159                         exit(-1);
00160                 }
00161 
00162                 T value = boost::any_cast<T>(value_for_comparison);
00163 
00164 
00165                 PositionListPtr result_tids;
00166 
00167                 result_tids = PositionListPtr(new PositionList());
00168 
00169                 if(!quiet) std::cout << "Using CPU for Selection..." << std::endl;
00170                 for(TID i=0;i<this->size();i++){
00171 
00172                         //boost::any value = column->get(i);
00173                         //val = values_[i];
00174 
00175                         if(comp==EQUAL){
00176                                 if(value==(*this)[i]){
00177                                         //result_table->insert(this->fetchTuple(i));
00178                                         result_tids->push_back(i);
00179                                 }
00180                         }else if(comp==LESSER){
00181                                 if((*this)[i]<value){
00182                                         //result_table->insert(this->fetchTuple(i));
00183                                         result_tids->push_back(i);
00184                                 }
00185                         }else if(comp==GREATER){
00186                                 if((*this)[i]>value){
00187                                         result_tids->push_back(i);
00188                                         //result_table->insert(this->fetchTuple(i));
00189                                 }
00190                         }else{
00191 
00192                         }
00193                 }       
00194 
00195                 //}
00196                 return result_tids;
00197         }
00198 
00199 
00200         template<class T>
00201         const PositionListPairPtr ColumnBaseTyped<T>::hash_join(ColumnPtr join_column_){
00202 
00203                 typedef boost::unordered_multimap<T,TID,boost::hash<T>, std::equal_to<T> > HashTable;
00204 
00205                 if(join_column_->type()!=typeid(T)){
00206                         std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
00207                         std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
00208                         exit(-1);
00209                 }
00210 
00211                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<T> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<T> >(join_column_); //static_cast<IntColumnPtr>(column1);
00212 
00213                 PositionListPairPtr join_tids( new PositionListPair());
00214                 join_tids->first = PositionListPtr( new PositionList() );
00215                 join_tids->second = PositionListPtr( new PositionList() );
00216 
00217 
00218                 //create hash table
00219                 HashTable hashtable;
00220                 for(unsigned int i=0;i<this->size();i++)        
00221                         hashtable.insert(
00222                         std::pair<T,TID> ((*this)[i],i)
00223                         );
00224 
00225                 //probe larger relation
00226                 for(unsigned int i=0;i<join_column->size();i++){
00227                         std::pair<typename HashTable::iterator, typename HashTable::iterator> range =  hashtable.equal_range((*join_column)[i]);
00228                         for(typename HashTable::iterator it=range.first ; it!=range.second;it++){
00229                                 if(it->first==(*join_column)[i]){
00230                                         join_tids->first->push_back(it->second);
00231                                         join_tids->second->push_back(i);
00232                                         //cout << "match! " << it->second << ", " << i << "     "  << it->first << endl;
00233                                 }
00234                         }
00235                 }
00236 
00237                 return join_tids;
00238         }
00239 
00240         template<class Type>
00241         const PositionListPairPtr ColumnBaseTyped<Type>::sort_merge_join(ColumnPtr join_column_){
00242 
00243                 if(join_column_->type()!=typeid(Type)){
00244                         std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
00245                         std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
00246                         exit(-1);
00247                 }
00248 
00249                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1);
00250 
00251                 PositionListPairPtr join_tids( new PositionListPair());
00252                 join_tids->first = PositionListPtr( new PositionList() );
00253                 join_tids->second = PositionListPtr( new PositionList() );
00254 
00255                 return join_tids;
00256         }
00257 
00258 
00259         template<class Type>
00260         const PositionListPairPtr ColumnBaseTyped<Type>::nested_loop_join(ColumnPtr join_column_){
00261                 assert(join_column_!=NULL);
00262                 if(join_column_->type()!=typeid(Type)){
00263                         std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
00264                         std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
00265                         exit(-1);
00266                 }
00267 
00268                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1);
00269 
00270                 PositionListPairPtr join_tids( new PositionListPair());
00271                 join_tids->first = PositionListPtr( new PositionList() );
00272                 join_tids->second = PositionListPtr( new PositionList() );
00273 
00274                 for(unsigned int i=0;i<this->size();i++){
00275                         for(unsigned int j=0;j<join_column->size();j++){
00276                                 if((*this)[i]==(*join_column)[j]){
00277                                         if(debug) std::cout << "MATCH: (" << i << "," << j << ")" << std::endl;
00278                                         join_tids->first->push_back(i);
00279                                         join_tids->second->push_back(j);
00280                                 }
00281                         }
00282                 }
00283 
00284                 return join_tids;
00285         }
00286 
00287         template<class T>
00288         bool ColumnBaseTyped<T>::operator==(ColumnBaseTyped<T>& column){
00289                 if(this->size()!=column.size()) return false;
00290                 for(unsigned int i=0;i<this->size();i++){
00291                         if((*this)[i]!=column[i]){      
00292                                 return false;
00293                         }
00294                 }
00295                 return true;
00296         }
00297 
00298         template<class Type>
00299         bool ColumnBaseTyped<Type>::add(const boost::any& new_value){
00300                 if(new_value.empty()) return false;
00301                 if(typeid(Type)==new_value.type()){
00302                         Type value = boost::any_cast<Type>(new_value);
00303                         //std::transform(myvec.begin(), myvec.end(), myvec.begin(),
00304                         //bind2nd(std::plus<double>(), 1.0));
00305                         for(unsigned int i=0;i<this->size();i++){
00306                                 this->operator[](i)+=value;
00307                         }
00308                         return true;
00309                 }
00310                 return false;
00311         }
00312 
00313 
00314 
00315         template<class Type>
00316         bool ColumnBaseTyped<Type>::add(ColumnPtr column){
00317                 //std::transform ( first, first+5, second, results, std::plus<int>() );         
00318                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column);
00319                 if(!column) return false;
00320                 for(unsigned int i=0;i<this->size();i++){
00321                         this->operator[](i)+=typed_column->operator[](i);
00322                 }                       
00323                 return true;
00324         }
00325 
00326 
00327 
00328         template<class Type>
00329         bool ColumnBaseTyped<Type>::minus(const boost::any& new_value){
00330                 //shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column);    
00331                 if(new_value.empty()) return false;
00332                 if(typeid(Type)==new_value.type()){
00333                         Type value = boost::any_cast<Type>(new_value);
00334                         for(unsigned int i=0;i<this->size();i++){
00335                                 this->operator[](i)-=value;
00336                         }
00337                         return true;
00338                 }
00339                 return false;
00340         }
00341 
00342         template<class Type>
00343         bool ColumnBaseTyped<Type>::minus(ColumnPtr column){
00344                 //std::transform ( first, first+5, second, results, std::plus<int>() );         
00345                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column);
00346                 if(!column) return false;
00347                 for(unsigned int i=0;i<this->size();i++){
00348                         this->operator[](i)-=typed_column->operator[](i);
00349                 }                       
00350                 return true;
00351         }       
00352 
00353 
00354         template<class Type>
00355         bool ColumnBaseTyped<Type>::multiply(const boost::any& new_value){
00356                 if(new_value.empty()) return false;
00357                 if(typeid(Type)==new_value.type()){
00358                         Type value = boost::any_cast<Type>(new_value);
00359                         for(unsigned int i=0;i<this->size();i++){
00360                                 this->operator[](i)*=value;
00361                         }
00362                         return true;
00363                 }
00364                 return false;
00365         }
00366 
00367         template<class Type>
00368         bool ColumnBaseTyped<Type>::multiply(ColumnPtr column){
00369                 //std::transform ( first, first+5, second, results, std::plus<int>() );         
00370                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column);
00371                 if(!column) return false;
00372                 for(unsigned int i=0;i<this->size();i++){
00373                         this->operator[](i)*=typed_column->operator[](i);
00374                 }                       
00375                 return true;
00376         }
00377 
00378 
00379 
00380         template<class Type>
00381         bool ColumnBaseTyped<Type>::division(const boost::any& new_value){
00382                 if(new_value.empty()) return false;
00383                 if(typeid(Type)==new_value.type()){
00384                         Type value = boost::any_cast<Type>(new_value);
00385                         //check that we do not devide by zero
00386                         if(value==0) return false;
00387                         for(unsigned int i=0;i<this->size();i++){
00388                                 this->operator[](i)/=value;
00389                         }
00390                         return true;
00391                 }
00392                 return false;
00393         }
00394 
00395         template<class Type>
00396         bool ColumnBaseTyped<Type>::division(ColumnPtr column){
00397                 //std::transform ( first, first+5, second, results, std::plus<int>() );         
00398                 shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > typed_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(column);
00399                 if(!column) return false;
00400                 for(unsigned int i=0;i<this->size();i++){
00401                         this->operator[](i)/=typed_column->operator[](i);
00402                 }                       
00403                 return true;
00404         }
00405 
00406         //total tempalte specializations, because numeric computations are undefined on strings 
00407         template<>
00408         inline bool ColumnBaseTyped<std::string>::add(const boost::any&){ return false; }
00409         template<>
00410         inline bool ColumnBaseTyped<std::string>::add(ColumnPtr){ return false; }
00411 
00412         template<>
00413         inline bool ColumnBaseTyped<std::string>::minus(const boost::any&){ return false;       }
00414         template<>
00415         inline bool ColumnBaseTyped<std::string>::minus(ColumnPtr){ return false;       }
00416 
00417 
00418         template<>
00419         inline bool ColumnBaseTyped<std::string>::multiply(const boost::any&){ return false;    }
00420         template<>
00421         inline bool ColumnBaseTyped<std::string>::multiply(ColumnPtr){ return false;    }
00422 
00423         template<>
00424         inline bool ColumnBaseTyped<std::string>::division(const boost::any&){ return false;    }
00425         template<>
00426         inline bool ColumnBaseTyped<std::string>::division(ColumnPtr){ return false;    }
00427 
00428 }; //end namespace CogaDB
00429 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines