Column-oriented GPU-accelerated Database Management System
CoGaDB
|
00001 #pragma once 00002 #include <iostream> 00003 #include <core/compressed_column.hpp> 00004 #include <map> 00005 #include <string> 00006 #include <boost/archive/binary_iarchive.hpp> 00007 #include <boost/archive/binary_oarchive.hpp> 00008 #include <boost/serialization/map.hpp> 00009 00010 namespace CoGaDB{ 00011 00012 const char one = '1'; 00013 const char zero = '0'; 00014 00015 typedef std::string sstring; 00016 00020 template<class T> 00021 class BitVectorCompressedColumn : public CompressedColumn<T> { 00022 public: 00023 /***************** constructors and destructor *****************/ 00024 BitVectorCompressedColumn(const sstring& name, AttributeType db_type); 00025 ~BitVectorCompressedColumn(); 00026 00027 bool insert(const boost::any& new_value); 00028 bool insert(const T& new_value); 00029 template <typename InputIterator> 00030 bool insert(InputIterator first, InputIterator last); 00031 00032 bool update(TID tid, const boost::any& new_value); 00033 bool update(PositionListPtr tids, const boost::any& new_value); 00034 00035 bool remove(TID tid); 00036 //assumes tid list is sorted ascending 00037 bool remove(PositionListPtr tids); 00038 bool clearContent(); 00039 00040 const boost::any get(TID tid); 00041 void print() const throw(); 00042 size_t size() const throw(); 00043 unsigned int getSizeinBytes() const throw(); 00044 00045 const ColumnPtr copy() const; 00046 00047 bool store(const sstring& path_); 00048 bool load(const sstring& path_); 00049 00050 T& operator[](const int index); 00051 private: 00052 // Methods 00053 bool reorganizeAfterInsert(const T& new_Value); 00054 void reorganizeAfterRemove(TID tid); 00055 sstring buildNewKey(); 00056 sstring buildNewKey(TID tid); 00057 sstring getPath(const sstring& path_); 00058 int countValueForKey(const sstring key); 00059 sstring getKey(const int index); 00060 00061 // Attributes 00062 std::map<sstring, T> bitVectorMap; 00063 }; 00064 00065 /***************** Start of Implementation Section ******************/ 00066 00067 template<class T> 00068 BitVectorCompressedColumn<T>::BitVectorCompressedColumn(const sstring& name, AttributeType db_type) : CompressedColumn<T>(name, db_type), bitVectorMap() {} 00069 00070 template<class T> 00071 BitVectorCompressedColumn<T>::~BitVectorCompressedColumn() {} 00072 00073 template<class T> 00074 bool BitVectorCompressedColumn<T>::insert(const boost::any& new_value) 00075 { 00076 if (new_value.empty()) return false; 00077 00078 if (typeid(T) == new_value.type()) 00079 { 00080 T value = boost::any_cast<T>(new_value); 00081 return this->insert(value); 00082 } 00083 return false; 00084 } 00085 00086 template<class T> 00087 bool BitVectorCompressedColumn<T>::insert(const T& new_value) 00088 { 00089 bool found = reorganizeAfterInsert(new_value); 00090 00091 if (found) return true; 00092 00093 sstring new_key = buildNewKey(); 00094 00095 bitVectorMap[new_key] = new_value; 00096 00097 return true; 00098 } 00099 00100 template <typename T> 00101 sstring BitVectorCompressedColumn<T>::buildNewKey(TID tid) 00102 { 00103 sstring key = ""; 00104 sstring firstEntry = bitVectorMap.begin()->first; 00105 for (unsigned int i = 0; i < firstEntry.length(); ++i) 00106 { 00107 if (i == tid) key += one; 00108 else key += zero; 00109 } 00110 00111 return key; 00112 } 00113 00114 template <typename T> 00115 sstring BitVectorCompressedColumn<T>::buildNewKey() 00116 { 00117 if (bitVectorMap.empty()) 00118 { 00119 return "1"; 00120 } 00121 00122 return buildNewKey((bitVectorMap.begin()->first.length())-1); 00123 } 00124 00125 template <typename T> 00126 bool BitVectorCompressedColumn<T>::reorganizeAfterInsert(const T& new_Value) 00127 { 00128 std::map<sstring, T> temp; 00129 bool found = false; 00130 sstring key; 00131 T value; 00132 00133 while (!bitVectorMap.empty()) 00134 { 00135 key = bitVectorMap.begin()->first; 00136 value = bitVectorMap.begin()->second; 00137 00138 bitVectorMap.erase(key); 00139 00140 if (value == new_Value) 00141 { 00142 key += one; 00143 found = true; 00144 } 00145 else 00146 { 00147 key += zero; 00148 } 00149 00150 temp[key] = value; 00151 } 00152 00153 bitVectorMap = temp; 00154 00155 return found; 00156 } 00157 00158 template <typename T> 00159 template <typename InputIterator> 00160 bool BitVectorCompressedColumn<T>::insert(InputIterator first, InputIterator last) 00161 { 00162 for (InputIterator it = first; it != last; it++) 00163 { 00164 insert(*it); 00165 } 00166 00167 return true; 00168 } 00169 00170 template<class T> 00171 const boost::any BitVectorCompressedColumn<T>::get(TID tid) 00172 { 00173 return boost::any(operator[](tid)); 00174 } 00175 00176 template<class T> 00177 void BitVectorCompressedColumn<T>::print() const throw() 00178 { 00179 typename std::map<sstring, T>::const_iterator iter; 00180 for(iter = bitVectorMap.begin(); iter != bitVectorMap.end(); iter++) 00181 { 00182 std::cout << "key: " << iter->first << " value: " << iter->second << std::endl; 00183 } 00184 } 00185 00186 template<class T> 00187 size_t BitVectorCompressedColumn<T>::size() const throw() 00188 { 00189 if (!bitVectorMap.empty()) 00190 { 00191 sstring firstEntry = bitVectorMap.begin()->first; 00192 return firstEntry.length(); 00193 } 00194 00195 return 0; 00196 } 00197 00198 template<class T> 00199 const ColumnPtr BitVectorCompressedColumn<T>::copy() const 00200 { 00201 return ColumnPtr(new BitVectorCompressedColumn<T>(*this)); 00202 } 00203 00204 template<class T> 00205 int BitVectorCompressedColumn<T>::countValueForKey(const sstring key) 00206 { 00207 int result = 0; 00208 00209 for (unsigned int i = 0; i < key.length(); i++) 00210 { 00211 if (key[i] == one) result++; 00212 } 00213 00214 return result; 00215 } 00216 00217 template<class T> 00218 bool BitVectorCompressedColumn<T>::update(TID tid, const boost::any& new_value) 00219 { 00220 if (new_value.empty()) return false; 00221 00222 if (typeid(T) != new_value.type()) 00223 { 00224 std::cout << "Fatal Error!!! Typemismatch for column " << this->name_ << std::endl; 00225 return false; 00226 } 00227 00228 sstring old_key = getKey(tid); 00229 00230 if (old_key.empty()) 00231 { 00232 std::cout << "Error!!! TID not found " << tid << std::endl; 00233 return false; 00234 } 00235 00236 T old_value = bitVectorMap[old_key]; 00237 T value = boost::any_cast<T>(new_value); 00238 00239 int sumElements = countValueForKey(old_key); 00240 00241 if (sumElements == 1) 00242 { 00243 bitVectorMap[old_key] = value; 00244 } 00245 else if (sumElements > 1) 00246 { 00247 old_key[tid] = zero; 00248 bitVectorMap[old_key] = old_value; 00249 00250 sstring new_Key = buildNewKey(tid); 00251 bitVectorMap[new_Key] = value; 00252 } 00253 00254 return true; 00255 } 00256 00257 template<class T> 00258 bool BitVectorCompressedColumn<T>::update(PositionListPtr tids, const boost::any& new_value) 00259 { 00260 if(!tids || tids->empty()) return false; 00261 00262 for(unsigned int i = 0; i < tids->size(); i++) 00263 { 00264 update((*tids)[i], new_value); 00265 } 00266 return true; 00267 } 00268 00269 template<class T> 00270 bool BitVectorCompressedColumn<T>::remove(TID tid) 00271 { 00272 sstring key = getKey(tid); 00273 00274 if (key.empty()) 00275 { 00276 std::cout << "Error!!! TID not found " << tid << std::endl; 00277 return false; 00278 } 00279 00280 if (countValueForKey(key) == 1) bitVectorMap.erase(key); 00281 00282 reorganizeAfterRemove(tid); 00283 00284 return true; 00285 } 00286 00287 template<class T> 00288 void BitVectorCompressedColumn<T>::reorganizeAfterRemove(TID tid) 00289 { 00290 std::map<sstring, T> temp; 00291 sstring key; 00292 T value; 00293 00294 while (!bitVectorMap.empty()) 00295 { 00296 key = bitVectorMap.begin()->first; 00297 value = bitVectorMap.begin()->second; 00298 00299 bitVectorMap.erase(key); 00300 00301 temp[key.erase(tid,1)] = value; 00302 } 00303 00304 bitVectorMap = temp; 00305 } 00306 00307 template<class T> 00308 bool BitVectorCompressedColumn<T>::remove(PositionListPtr tids) 00309 { 00310 if(!tids || tids->empty()) return false; 00311 00312 typename PositionList::reverse_iterator rit; 00313 00314 for (rit = tids->rbegin(); rit!=tids->rend(); ++rit) 00315 { 00316 remove(*rit); 00317 } 00318 00319 return true; 00320 } 00321 00322 template<class T> 00323 bool BitVectorCompressedColumn<T>::clearContent() 00324 { 00325 bitVectorMap.clear(); 00326 return true; 00327 } 00328 00329 template<class T> 00330 sstring BitVectorCompressedColumn<T>::getPath(const sstring& path_) 00331 { 00332 sstring path(path_); 00333 path += "/"; 00334 path += this->getName(); 00335 00336 return path; 00337 } 00338 00339 template<class T> 00340 bool BitVectorCompressedColumn<T>::store(const sstring& path_) 00341 { 00342 sstring path = getPath(path_); 00343 00344 std::ofstream outfile(path.c_str(), std::ios_base::binary | std::ios_base::out); 00345 boost::archive::binary_oarchive oa(outfile); 00346 00347 oa << bitVectorMap; 00348 00349 outfile.flush(); 00350 outfile.close(); 00351 return true; 00352 } 00353 00354 template<class T> 00355 bool BitVectorCompressedColumn<T>::load(const sstring& path_) 00356 { 00357 sstring path = getPath(path_); 00358 00359 std::ifstream infile (path.c_str(), std::ios_base::binary | std::ios_base::in); 00360 boost::archive::binary_iarchive ia(infile); 00361 00362 ia >> bitVectorMap; 00363 00364 infile.close(); 00365 00366 return true; 00367 } 00368 00369 template<class T> 00370 T& BitVectorCompressedColumn<T>::operator[](const int index) 00371 { 00372 return bitVectorMap[getKey(index)]; 00373 } 00374 00375 template<class T> 00376 sstring BitVectorCompressedColumn<T>::getKey(const int index) 00377 { 00378 sstring key; 00379 typename std::map<sstring, T>::iterator iter; 00380 for(iter = bitVectorMap.begin(); iter != bitVectorMap.end(); iter++) 00381 { 00382 key = iter->first; 00383 if (key[index] == one) return key; 00384 } 00385 00386 return ""; 00387 } 00388 00389 template<class T> 00390 unsigned int BitVectorCompressedColumn<T>::getSizeinBytes() const throw() 00391 { 00392 return bitVectorMap.size() * (sizeof(T) + sizeof(sstring)); 00393 } 00394 };