Column-oriented GPU-accelerated Database Management System
CoGaDB
/home/sebastian/gpudbms/trunk/cogadb/include/core/column.hpp
Go to the documentation of this file.
00001 #pragma once
00002 
00003 #include <core/column_base_typed.hpp>
00004 #include <iostream>
00005 #include <fstream>
00006 
00007 #include <stdint.h>
00008 
00009 #include <gpu/gpu_base_column.hpp>
00010 #include <util/begin_ptr.hpp>
00011 
00012 #include <hardware_optimizations/simd_acceleration.hpp>
00013 
00014 namespace CoGaDB{
00015 
00016 template<typename T>
00017 class Column : public ColumnBaseTyped<T>{
00018         public:
00019         /***************** constructors and destructor *****************/
00020         Column(const std::string& name, AttributeType db_type);
00021         virtual ~Column();
00022 
00023         virtual bool insert(const boost::any& new_value);
00024         bool insert(const T& new_value);        
00025         template <typename InputIterator>
00026         bool insert(InputIterator first, InputIterator last);
00027 
00028         virtual bool update(TID tid, const boost::any& new_value);
00029         virtual bool update(PositionListPtr tid, const boost::any& new_value);  
00030         
00031         virtual bool remove(TID tid);
00032         //assumes tid list is sorted ascending
00033         virtual bool remove(PositionListPtr tid);
00034         virtual bool clearContent();
00035 
00036         virtual const boost::any get(TID tid);
00037         //virtual const boost::any* const getRawData();
00038         virtual void print() const throw();
00039         virtual size_t size() const throw();
00040         virtual unsigned int getSizeinBytes() const throw();
00041 
00042         virtual const ColumnPtr copy() const;
00043         virtual const ColumnPtr materialize() throw();
00044         virtual const ColumnPtr gather(PositionListPtr tid_list);
00045 
00046         virtual bool store(const std::string& path);
00047         virtual bool load(const std::string& path);
00048         virtual bool isMaterialized() const  throw();
00049         virtual bool isCompressed() const  throw();     
00050         
00051         virtual T& operator[](const int index);
00052         //inline T& operator[](const int index) __attribute__((always_inline));
00053 
00054         virtual const PositionListPtr selection(const boost::any& value_for_comparison, const ValueComparator comp);  
00055         virtual const PositionListPtr selection(ColumnPtr comparison_column, const ValueComparator comp);
00056         
00057         std::vector<T>& getContent();
00058 
00059         private:
00060 
00061                 struct Type_TID_Comparator {
00062                         inline bool operator() (std::pair<T,TID> i, std::pair<T,TID> j) { return (i.first<j.first);}
00063                 } type_tid_comparator;
00064 
00065         
00067         std::vector<T> values_;
00068 };
00069 
00070 
00071 
00072 /***************** Start of Implementation Section ******************/
00073 
00074         
00075         template<class T>
00076         Column<T>::Column(const std::string& name, AttributeType db_type) : ColumnBaseTyped<T>(name,db_type), type_tid_comparator(), values_(){
00077 
00078         }
00079 
00080         template<class T>
00081         Column<T>::~Column(){
00082 
00083         }
00084 
00085         template<class T>
00086         std::vector<T>& Column<T>::getContent(){
00087                 return values_;
00088         }
00089         /*
00090         template<class T>
00091         const PositionListPtr Column<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp){
00092             
00093             T value;
00094             
00095             if(value_for_comparison.type()!=typeid(T)){ 
00096                 //catch some special cases
00097                 if(typeid(T)==typeid(float) && value_for_comparison.type()==typeid(int)){
00098                     value = boost::any_cast<int>(value_for_comparison);
00099                 }else{
00100                     std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl;
00101                     std::cout << "Column Type: " << typeid(T).name() << " filter value type: " << value_for_comparison.type().name() << std::endl;
00102                     std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
00103                     exit(-1);
00104                 }
00105             }else{
00106                 //everything fine, filter value matches type of column
00107                 value = boost::any_cast<T>(value_for_comparison);
00108             }
00109 
00110             //T value = boost::any_cast<T>(value_for_comparison);
00111             PositionListPtr result_tids;
00112 
00113             result_tids = PositionListPtr(new PositionList());
00114             //one third rule for selections: assume a selectivity of 0.3, meaning we need roughly 0.3 times of the input to store the result 
00115             //this optimizatio nshould minimize the number of reallocations during the insertion process
00116             //result_tids->reserve(0.3*this->size()); 
00117             
00118              unsigned int array_size=this->size()+1;
00119             //calls new internally   
00120              result_tids->resize(array_size);
00121              //tids.reserve(array_size);
00122              //get pointer
00123              unsigned int* array_tids=hype::util::begin_ptr(*result_tids);
00124              assert(array_tids!=NULL);
00125              unsigned int pos=0;
00126 
00127             if(!quiet) std::cout << "Using CPU for Selection..." << std::endl;
00128             //unsigned int array_size = this->size();
00129             //for(TID i=0;i<array_size;i++){
00130 
00131                     //boost::any value = column->get(i);
00132                     //val = values_[i];
00133             T* array=hype::util::begin_ptr(values_);
00134             
00135             if(comp==EQUAL){
00136                 for(TID i=0;i<array_size;i++){
00137                     if(value==array[i]){
00138                             //result_tids->push_back(i);
00139                             array_tids[pos++]=i;
00140                     }
00141                 }
00142             }else if(comp==LESSER){
00143                 for(TID i=0;i<array_size;i++){
00144                     if(array[i]<value){
00145                             //result_table->insert(this->fetchTuple(i));
00146                             //result_tids->push_back(i);
00147                             array_tids[pos++]=i;
00148                     }
00149                 }
00150              }else if(comp==LESSER_EQUAL){
00151                 for(TID i=0;i<array_size;i++){
00152                     if(array[i]<=value){
00153                             //result_tids->push_back(i);
00154                             //result_table->insert(this->fetchTuple(i));
00155                             array_tids[pos++]=i;
00156                     }
00157                 } 
00158             }else if(comp==GREATER){
00159                 for(TID i=0;i<array_size;i++){
00160                     if(array[i]>value){
00161                             //result_tids->push_back(i);
00162                             //result_table->insert(this->fetchTuple(i));
00163                             array_tids[pos++]=i;
00164                     }
00165                 }    
00166             }else if(comp==GREATER_EQUAL){
00167                 for(TID i=0;i<array_size;i++){
00168                     if(array[i]>=value){
00169                             //result_tids->push_back(i);
00170                             //result_table->insert(this->fetchTuple(i));
00171                             array_tids[pos++]=i;
00172                     }
00173                 }      
00174             }else{
00175 
00176             }
00177             //shrink to actual result size
00178             result_tids->resize(pos);
00179             return result_tids;
00180         }*/
00181   
00182         
00183     
00184         
00185         
00186 
00187         template<class T>
00188         const PositionListPtr Column<T>::selection(ColumnPtr comparison_column, const ValueComparator comp){
00189             //same behaviour as in parent class
00190             return CoGaDB::ColumnBaseTyped<T>::selection(comparison_column, comp);
00191         }
00192         
00193 
00194         template<class T>
00195         bool Column<T>::insert(const boost::any& new_value){
00196                 if(new_value.empty()) return false;
00197                 if(typeid(T)==new_value.type()){
00198                          T value = boost::any_cast<T>(new_value);
00199                          values_.push_back(value);
00200                          return true;
00201                 }
00202                 return false;
00203         }
00204 
00205         template<class T>
00206         bool Column<T>::insert(const T& new_value){
00207                 values_.push_back(new_value);
00208                 return true;
00209         }
00210 
00211 
00212         template <typename T> 
00213         template <typename InputIterator>
00214         bool Column<T>::insert(InputIterator first, InputIterator last){
00215                 this->values_.insert(this->values_.end(),first,last);
00216                 return true;
00217         }
00218 
00219         template<class T>
00220         bool Column<T>::update(TID tid, const boost::any& new_value){
00221                 if(new_value.empty()) return false;
00222                 if(typeid(T)==new_value.type()){
00223                          T value = boost::any_cast<T>(new_value);
00224                          values_[tid]=value;
00225                          return true;
00226                 }else{
00227                         std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 
00228                 }
00229                 return false;
00230         }
00231 
00232         template<class T>
00233         bool Column<T>::update(PositionListPtr tids, const boost::any& new_value){
00234                 if(!tids)
00235                         return false;
00236         if(new_value.empty()) return false;
00237                 if(typeid(T)==new_value.type()){
00238                          T value = boost::any_cast<T>(new_value);
00239                          for(unsigned int i=0;i<tids->size();i++){
00240                                 TID tid=(*tids)[i];
00241                                 values_[tid]=value;
00242                          }
00243                          return true;
00244                 }else{
00245                         std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 
00246                 }
00247                 return false;           
00248         }
00249         
00250 
00251 
00252 
00253         template<class T>
00254         bool Column<T>::remove(TID tid){
00255                 values_.erase(values_.begin()+tid);
00256                 return true;
00257         }
00258         
00259         template<class T>
00260         bool Column<T>::remove(PositionListPtr tids){
00261                 if(!tids)
00262                         return false;
00263                 //test whether tid list has at least one element, if not, return with error
00264                 if(tids->empty())
00265                         return false;           
00266 
00267                 //assert();
00268 
00269                 typename PositionList::reverse_iterator rit;
00270 
00271                 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit)
00272                         values_.erase(values_.begin()+(*rit));
00273 
00274                 /*
00275                 //delete tuples in reverse order, otherwise the first deletion would invalidate all other tids
00276                 unsigned int i=tids->size()-1;
00277                 while(true)     
00278                         TID = (*tids)[i];
00279                         values_.erase(values_.begin()+tid);             
00280                         if(i==0) break;
00281                 }*/
00282                 
00283                 
00284                 return true;                    
00285         }
00286 
00287         template<class T>
00288         bool Column<T>::clearContent(){
00289                 values_.clear();
00290                 return true;
00291         }
00292 
00293         template<class T>
00294         const boost::any Column<T>::get(TID tid){
00295                 if(tid<values_.size())
00296                         return boost::any(values_[tid]);
00297                 else{
00298                         std::cout << "fatal Error!!! Invalid TID!!! Attribute: " << this->name_ << " TID: " << tid  << std::endl;
00299                 }
00300                 return boost::any();
00301         }
00302 
00303         template<class T>
00304         void Column<T>::print() const throw(){
00305                 std::cout << "| " << this->name_ << " |" << std::endl;
00306                 std::cout << "________________________" << std::endl;
00307                 for(unsigned int i=0;i<values_.size();i++){
00308                         std::cout << "| " << values_[i] << " |" << std::endl;
00309                 }
00310         }
00311         template<class T>
00312         size_t Column<T>::size() const throw(){
00313                 return values_.size();
00314         }
00315 
00316         template<class T>
00317         const ColumnPtr Column<T>::materialize() throw(){
00318             return this->copy(); 
00319          }
00320         template<class T>
00321         const ColumnPtr Column<T>::copy() const{
00322                 return ColumnPtr(new Column<T>(*this));
00323         }
00324         
00325         template<class T>
00326         const ColumnPtr Column<T>::gather(PositionListPtr tid_list){
00327             Column<T>* result = new Column<T>(this->name_,this->db_type_);
00328             std::vector<T>& data = result->getContent();
00329             data.resize(tid_list->size());
00330             //if(!CoGaDB::quiet && CoGaDB::verbose && CoGaDB::debug) std::cout << "Result size:" << tid_list->size() << std::endl;
00331             //if(!CoGaDB::quiet && CoGaDB::verbose && CoGaDB::debug) std::cout << "Column size: " << this->size() << std::endl;
00332             for(unsigned int i=0;i<tid_list->size();i++){
00333                 //std::cout << "tid: " << (*tid_list)[i] << std::endl;
00334                 //std::cout << "value: " << (*this)[(*tid_list)[i]] << std::endl;
00335                 data[i]=(*this)[(*tid_list)[i]];
00336             }
00337             return ColumnPtr(result); 
00338         }        
00339         
00340         
00341         /***************** relational operations on Columns which return lookup tables *****************/
00342 //      template<class T>
00343 //      const std::vector<TID> Column<T>::sort(const ComputeDevice comp_dev) const {
00344 
00345 //              return std::vector<TID>();
00346 //      }
00347 
00348 //      template<class T> 
00349 //      const std::vector<TID> Column<T>::selection(const boost::any& value_for_comparison, const ValueComparator comp, const ComputeDevice comp_dev) const {
00350 
00351 //              return std::vector<TID>();
00352 //      }
00353 //      //join algorithms
00354 //      template<class T>
00355 //      const std::vector<TID_Pair> Column<T>::sort_merge_join(ColumnPtr join_Column, const ComputeDevice comp_dev) const{
00356 
00357 //              return std::vector<TID_Pair>();
00358 //      }
00359 //      template<class T>
00360 //      const std::vector<TID_Pair> Column<T>::nested_loop_join(ColumnPtr join_Column, const ComputeDevice comp_dev) const{
00361 
00362 //              return std::vector<TID_Pair>();
00363 //      }
00364         template<class T>
00365         bool Column<T>::store(const std::string& path_){
00366                 //string path("data/");
00367                 std::string path(path_);
00368                 path += "/";
00369                 path += this->name_;
00370                            if(!quiet && verbose && debug) std::cout << "Writing Column " << this->getName() << " to File " << path << std::endl;
00371                 std::ofstream outfile (path.c_str(),std::ios_base::binary | std::ios_base::out);
00372                 boost::archive::binary_oarchive oa(outfile);
00373 
00374                 oa << values_;
00375 
00376                 outfile.flush();
00377                 outfile.close();
00378                 return true;
00379         }
00380         template<class T>
00381         bool Column<T>::load(const std::string& path_){
00382                 std::string path(path_);
00383                 if(!quiet && verbose && debug) std::cout << "Loading column '" << this->name_ << "' from path '" << path << "'..." << std::endl;
00384                 //string path("data/");
00385                 path += "/";
00386                 path += this->name_;
00387                 
00388                 if(!quiet && verbose && debug) std::cout << "Opening File '" << path << "'..." << std::endl;
00389                 std::ifstream infile (path.c_str(),std::ios_base::binary | std::ios_base::in);
00390                 boost::archive::binary_iarchive ia(infile);
00391                 ia >> values_;
00392                 infile.close();
00393 
00394 
00395                 return true;
00396         }
00397         template<class T>
00398         bool Column<T>::isMaterialized() const  throw(){
00399                 return true;
00400         }
00401         
00402         template<class T>
00403         bool Column<T>::isCompressed() const  throw(){
00404                 return false;
00405         }
00406 
00407         template<class T>
00408         T& Column<T>::operator[](const int index){
00409                 
00410                 return values_[index];
00411         }
00412 
00413 
00414 
00415 /*
00416         template<>
00417         bool& Column<bool>::operator[](const int index) const{
00418                 
00419                 return false; // values_[index];
00420         }*/
00421         template<class T>
00422         unsigned int Column<T>::getSizeinBytes() const throw(){
00423                         return values_.capacity()*sizeof(T);
00424         }
00425 
00426         //total template specialization
00427         template<>
00428         inline unsigned int Column<std::string>::getSizeinBytes() const throw(){
00429                 unsigned int size_in_bytes=0;
00430                 for(unsigned int i=0;i<values_.size();++i){
00431                         size_in_bytes+=values_[i].capacity();
00432                 }
00433                 //return values_.size()*sizeof(T);
00434                 return size_in_bytes;
00435         }
00436 
00437 /***************** End of Implementation Section ******************/
00438 
00439 
00440 
00441 
00442 }; //end namespace CogaDB
00443 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines