Go to the documentation of this file.
00001 
00002 
00003 #include "definitions.hpp"
00004 
00005 #include <algorithm>
00006 #include <cstdlib>
00007 #include <iostream>
00008 #include <fstream>
00009 #include <cassert>
00010 #include <vector>
00011 
00012 #include <stdint.h>
00013 
00014 #include <tbb/parallel_sort.h>
00015 #include <tbb/task_scheduler_init.h>
00016 
00017 #include <boost/chrono.hpp>
00018 
00019 #include <sys/mman.h>
00020 
00021 #include <hype.hpp>
00022 
00023 #include <config/configuration.hpp>
00024 #include <query_processing/operator.hpp>
00025 #include <query_processing/processing_device.hpp>
00026 
00027 //using namespace std;
00028 using namespace boost::chrono;
00029 
00030 
00031    enum SchedulingConfiguration{CPU_ONLY,GPU_ONLY,HYBRID};
00032 
00033 
00034         uint64_t getTimestamp()
00035         {
00036       high_resolution_clock::time_point tp = high_resolution_clock::now();
00037       nanoseconds dur = tp.time_since_epoch();
00038 
00039       return (uint64_t)dur.count();
00040         }
00041 
00042 struct Random_Number_Generator{
00043 
00044    Random_Number_Generator(unsigned int max_value_size) : max_value_size_(max_value_size){}
00045 
00046    ElementType operator() (){
00047       return (ElementType) rand()%max_value_size_;
00048    }
00049    private:
00050       unsigned int max_value_size_;
00051 };
00052 
00053 enum Architecture{Architecture_32Bit,Architecture_64Bit};
00054 
00055 Architecture getArchitecture(){
00056 #ifdef __LP64__
00057   //64-bit Intel or PPC
00058   //#warning "Compiling for 64 Bit"
00059   return Architecture_64Bit;
00060 #else
00061   //32-bit Intel, PPC or ARM
00062   //#warning "Compiling for 32 Bit"
00063   return Architecture_32Bit;
00064 #endif
00065 }
00066 
00067 
00068 void CPU_Sort(VecPtr dataset){
00069    assert(dataset!=NULL);
00070    Vec data_copy(dataset->begin(),dataset->end());
00071    std::sort(data_copy.begin(),data_copy.end());
00072 }
00073 
00074 void CPU_Sort_Parallel(VecPtr dataset){
00075    assert(dataset!=NULL);
00076    //tbb::task_scheduler_init init(4);
00077    Vec data_copy(dataset->begin(),dataset->end());
00078    tbb::parallel_sort(data_copy.begin(),data_copy.end());
00079 }
00080 
00081 void GPU_Sort(VecPtr dataset);
00082 
00083 //void GPU_Sort(VecPtr dataset){
00084 // assert(dataset!=NULL);
00085 // // transfer data to the device
00086 //  thrust::device_vector<ElementType> d_vec (dataset->begin(),dataset->end());
00087 
00088 //  // sort data on the device (846M keys per second on GeForce GTX 480)
00089 //  thrust::sort(d_vec.begin(), d_vec.end());
00090 
00091 //  // transfer data back to host
00092 //  thrust::copy(d_vec.begin(), d_vec.end(), dataset->begin());
00093 //}
00094 
00095    class CPU_Serial_Sort_Operator : public hype::queryprocessing::Operator{
00096       public:
00097       CPU_Serial_Sort_Operator(const hype::SchedulingDecision& sd, VecPtr input_data) : Operator(sd), input_data_(input_data){
00098 
00099       }
00100 
00101       virtual bool execute(){
00102          CPU_Sort(input_data_);
00103          return true;
00104       }
00105 
00106       VecPtr input_data_;     
00107    };
00108 
00109    class CPU_Parallel_Sort_Operator : public hype::queryprocessing::Operator{
00110       public:
00111       CPU_Parallel_Sort_Operator(const hype::SchedulingDecision& sd, VecPtr input_data) : Operator(sd), input_data_(input_data){
00112 
00113       }
00114 
00115       virtual bool execute(){
00116          CPU_Sort_Parallel(input_data_);
00117          return true;
00118       }
00119 
00120       VecPtr input_data_;     
00121    };
00122 
00123    class GPU_Sort_Operator : public hype::queryprocessing::Operator{
00124       public:
00125       GPU_Sort_Operator(const hype::SchedulingDecision& sd, VecPtr input_data) : Operator(sd), input_data_(input_data){
00126 
00127       }
00128 
00129       virtual bool execute(){
00130          //std::cout << "[GPU_Sort_Operator] Exec GPU Sort" << std::endl;
00131          GPU_Sort(input_data_);
00132          return true;
00133       }
00134 
00135       VecPtr input_data_;     
00136    };
00137 
00138 
00139 VecPtr generate_dataset(unsigned int size_in_number_of_elements){
00140    VecPtr data(new Vec());
00141    for(unsigned int i=0;i<size_in_number_of_elements;i++){
00142       ElementType e = (ElementType) rand();
00143       data->push_back(e);
00144    }
00145    assert(data!=NULL);
00146    //std::cout << "created new data set: " << data.get() << " of size: " << data->size() << std::endl;
00147    return data;
00148 }
00149 
00150 vector<VecPtr> generate_random_datasets(unsigned int max_size_in_number_of_elements, unsigned int number_of_datasets){
00151    vector<VecPtr> datasets;
00152       for(unsigned int i=0;i<number_of_datasets;i++){
00153          VecPtr vec_ptr = generate_dataset((unsigned int) (rand()%max_size_in_number_of_elements) );
00154          assert(vec_ptr!=NULL);
00155          datasets.push_back(vec_ptr);
00156       }
00157    return datasets;
00158 }
00159 
00160 int main(int argc, char* argv[]){
00161 
00162    //we don't want the OS to swap out our data to disc that's why we lock it
00163    mlockall(MCL_CURRENT|MCL_FUTURE);
00164 
00165 // tbb::task_scheduler_init init(8);
00166    
00167 
00168    //tbb::task_scheduler_init (2);
00169 // tbb::task_scheduler_init init(1);
00170 // 
00171 
00172 
00173    cout << "TBB use " << tbb::task_scheduler_init::default_num_threads() << " number of threads as a default" << endl;
00174    unsigned int MAX_DATASET_SIZE_IN_MB=10; //MB  //(10*1000*1000)/sizeof(int); //1000000;
00175    unsigned int NUMBER_OF_DATASETS=10; //3; //100;
00176    unsigned int NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD=100; //3; //1000;
00177    unsigned int RANDOM_SEED=0;
00178    //unsigned int READY_QUEUE_LENGTH=100;
00179 
00180    SchedulingConfiguration sched_config=HYBRID; //CPU_ONLY,GPU_ONLY,HYBRID
00181    //SchedulingConfiguration sched_config=GPU_ONLY;
00182    //SchedulingConfiguration sched_config=CPU_ONLY;
00183 
00184    std::string stemod_optimization_criterion="Response Time";
00185    std::string stemod_statistical_method="Least Squares 1D";
00186    std::string stemod_recomputation_heuristic="Periodic Recomputation";
00187 
00188 // Declare the supported options.
00189 boost::program_options::options_description desc("Allowed options");
00190 desc.add_options()
00191     ("help", "produce help message")
00192     ("number_of_datasets", boost::program_options::value<unsigned int>(), "set the number of data sets for workload")
00193     ("number_of_operations", boost::program_options::value<unsigned int>(), "set the number of operations in workload")
00194     ("max_dataset_size_in_MB", boost::program_options::value<unsigned int>(), "set the maximal dataset size in MB")
00195     //("ready_queue_length", boost::program_options::value<unsigned int>(), "set the queue length of operators that may be concurrently scheduled (clients are blocked on a processing device)")
00196     ("scheduling_method", boost::program_options::value<std::string>(), "set the decision model (CPU_ONLY, GPU_ONLY, HYBRID)")
00197     ("random_seed", boost::program_options::value<unsigned int>(), "seed to use before for generating datasets and operation workload")
00198     ("optimization_criterion", boost::program_options::value<std::string>(), "set the decision models optimization_criterion for all algorithms")
00199     ("statistical_method", boost::program_options::value<std::string>(), "set the decision models statistical_method for all algorithms")
00200     ("recomputation_heuristic", boost::program_options::value<std::string>(), "set the decision models recomputation_heuristic for all algorithms")
00201 ;
00202 
00203 boost::program_options::variables_map vm;
00204 boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
00205 boost::program_options::notify(vm);    
00206 
00207 if (vm.count("help")) {
00208     cout << desc << "\n";
00209     return 1;
00210 }
00211 
00212 if (vm.count("number_of_datasets")) {
00213     cout << "Number of Datasets: " 
00214  << vm["number_of_datasets"].as<unsigned int>() << "\n";
00215    NUMBER_OF_DATASETS=vm["number_of_datasets"].as<unsigned int>();
00216 } else {
00217     cout << "number_of_datasets was not specified, using default value...\n";
00218 }
00219 
00220 if (vm.count("number_of_operations")) {
00221     cout << "Number of Operations: " 
00222  << vm["number_of_operations"].as<unsigned int>() << "\n";
00223    NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD=vm["number_of_operations"].as<unsigned int>();
00224 } else {
00225     cout << "number_of_operations was not specified, using default value...\n";
00226 }
00227 
00228 if (vm.count("max_dataset_size_in_MB")) {
00229     cout << "max_dataset_size_in_MB: " 
00230  << vm["max_dataset_size_in_MB"].as<unsigned int>() << "MB \n";
00231    MAX_DATASET_SIZE_IN_MB=vm["max_dataset_size_in_MB"].as<unsigned int>(); //*1024*1024)/sizeof(int); //convert value in MB to equivalent number of integer elements
00232 } else {
00233     cout << "max_dataset_size_in_MB was not specified, using default value...\n";
00234 }
00235 
00236 if (vm.count("random_seed")) {
00237     cout << "Random Seed: " 
00238  << vm["random_seed"].as<unsigned int>() << "\n";
00239    RANDOM_SEED=vm["random_seed"].as<unsigned int>();
00240 } else {
00241     cout << "random_seed was not specified, using default value...\n";
00242 }
00243 
00244 
00245 if (vm.count("scheduling_method")) {
00246     cout << "scheduling_method: " 
00247  << vm["scheduling_method"].as<std::string>() << "\n";
00248    std::string scheduling_method=vm["scheduling_method"].as<std::string>();
00249    if(scheduling_method=="CPU_ONLY"){
00250       sched_config=CPU_ONLY;
00251    }else if(scheduling_method=="GPU_ONLY"){
00252       sched_config=GPU_ONLY;
00253    }else if(scheduling_method=="HYBRID"){
00254       sched_config=HYBRID;
00255    }
00256    
00257 } else {
00258     cout << "scheduling_method was not specified, using default value...\n";
00259 }
00260 
00261 if (vm.count("optimization_criterion")) {
00262     cout << "optimization_criterion: " 
00263  << vm["optimization_criterion"].as<std::string>() << "\n";
00264    stemod_optimization_criterion=vm["optimization_criterion"].as<std::string>();
00265 
00266    if(sched_config!=HYBRID){
00267       cout << "Specification of STEMOD Parameter needs hybrid scheduling (scheduling_method=HYBRID)" << endl;
00268       return -1;
00269    }
00270 
00271 } else {
00272     cout << "optimization_criterion was not specified, using default value...\n";
00273 }
00274 
00275 if (vm.count("statistical_method")) {
00276     cout << "statistical_method: " 
00277  << vm["statistical_method"].as<std::string>() << "\n";
00278    stemod_statistical_method=vm["statistical_method"].as<std::string>();
00279    if(sched_config!=HYBRID){
00280       cout << "Specification of STEMOD Parameter needs hybrid scheduling (scheduling_method=HYBRID)" << endl;
00281       return -1;
00282    }
00283 
00284 } else {
00285     cout << "statistical_method was not specified, using default value...\n";
00286 }
00287 
00288 if (vm.count("recomputation_heuristic")) {
00289     cout << "recomputation_heuristic: " 
00290  << vm["recomputation_heuristic"].as<std::string>() << "\n";
00291    stemod_recomputation_heuristic=vm["recomputation_heuristic"].as<std::string>();
00292    if(sched_config!=HYBRID){
00293       cout << "Specification of STEMOD Parameter needs hybrid scheduling (scheduling_method=HYBRID)" << endl;
00294       return -1;
00295    }
00296 
00297 } else {
00298     cout << "recomputation_heuristic was not specified, using default value...\n";
00299 }
00300 
00301 /*
00302 if (vm.count("ready_queue_length")) {
00303     cout << "Ready Queue Length: " 
00304  << vm["ready_queue_length"].as<std::string>() << "\n";
00305    READY_QUEUE_LENGTH=vm["ready_queue_length"].as<unsigned int>();
00306    if(sched_config!=HYBRID){
00307       cout << "Specification of STEMOD Parameter needs hybrid scheduling (scheduling_method=HYBRID)" << endl;
00308       return -1;
00309    }
00310 
00311 } else {
00312     cout << "ready_queue_length was not specified, using default value...\n";
00313 }*/
00314 
00315 
00316 
00317 
00318 
00319 //"if (vm.count(\"$VAR\")) {
00320 //    cout << \"$VAR: \" 
00321 // << vm[\"$VAR\"].as<std::string>() << \"\n\";
00322 // std::string s=vm[\"$VAR\"].as<std::string>();
00323 
00324 // 
00325 //} else {
00326 //    cout << \"$VAR was not specified, using default value...\n\";
00327 //}"
00328 
00329 
00330 
00331 
00332    srand(RANDOM_SEED);
00333 
00334    cout << "Generating Data sets..." << endl;
00335    cout << "Estimated RAM usage: " << MAX_DATASET_SIZE_IN_MB*NUMBER_OF_DATASETS << "MB" << endl;
00336    if(MAX_DATASET_SIZE_IN_MB*NUMBER_OF_DATASETS>1024*3.7 && getArchitecture()==Architecture_32Bit){
00337       cout << "Memory for Datasets to generate exceeds 32 bit adress space! (" << MAX_DATASET_SIZE_IN_MB*NUMBER_OF_DATASETS << "MB)" << endl;
00338       return -1; 
00339    }
00340    //generate_random_datasets expects data size in number of integer elements, while MAX_DATASET_SIZE_IN_MB specifies data size in Mega Bytes
00341    vector<VecPtr> datasets=generate_random_datasets( (MAX_DATASET_SIZE_IN_MB*1024*1024)/sizeof(int), NUMBER_OF_DATASETS);
00342    vector<unsigned int> query_indeces(NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD);
00343 
00344    std::generate(query_indeces.begin(), query_indeces.end(), Random_Number_Generator(NUMBER_OF_DATASETS));
00345 
00346    //std::copy(query_indeces.begin(), query_indeces.end(), std::ostream_iterator<unsigned int>(std::cout, "\n"));
00347 
00348    //setup STEMOD
00349    //stemod::Scheduler::instance().addAlgorithm("SORT","CPU_Algorithm_serial","Least Squares 1D","Periodic Recomputation");
00350    hype::Scheduler::instance().addAlgorithm("SORT","CPU_Algorithm_parallel", hype::CPU, "Least Squares 1D", "Periodic Recomputation");
00351    hype::Scheduler::instance().addAlgorithm("SORT","GPU_Algorithm", hype::GPU, "Least Squares 1D", "Periodic Recomputation");
00352 
00353    cout << "Setting Optimization Criterion '" << stemod_optimization_criterion << "'...";
00354    if(!hype::Scheduler::instance().setOptimizationCriterion("SORT",stemod_optimization_criterion)){ 
00355       std::cout << "Error: Could not set '" << stemod_optimization_criterion << "' as Optimization Criterion!" << std::endl;  return -1;}
00356    else cout << "Success..." << endl;
00357    //if(!scheduler.setOptimizationCriterion("MERGE","Throughput")) std::cout << "Error" << std::endl;
00358 
00359    if(!hype::Scheduler::instance().setStatisticalMethod("CPU_Algorithm_parallel",stemod_statistical_method)){ 
00360       std::cout << "Error" << std::endl; return -1;
00361    } else cout << "Success..." << endl;
00362    if(!hype::Scheduler::instance().setStatisticalMethod("GPU_Algorithm",stemod_statistical_method)){ 
00363       std::cout << "Error" << std::endl; return -1;
00364    } else cout << "Success..." << endl;
00365 
00366    if(!hype::Scheduler::instance().setRecomputationHeuristic("CPU_Algorithm_parallel",stemod_recomputation_heuristic)){ 
00367       std::cout << "Error" << std::endl; return -1;
00368    }  else cout << "Success..." << endl;
00369    if(!hype::Scheduler::instance().setRecomputationHeuristic("GPU_Algorithm",stemod_recomputation_heuristic)){ 
00370       std::cout << "Error" << std::endl; return -1;
00371    } else cout << "Success..." << endl;
00372 
00373 
00374    hype::queryprocessing::ProcessingDevice& cpu = hype::queryprocessing::getProcessingDevice(hype::CPU);
00375    hype::queryprocessing::ProcessingDevice& gpu = hype::queryprocessing::getProcessingDevice(hype::GPU);
00376       
00377    cpu.start();
00378    gpu.start();
00379 
00380    //boost::this_thread::sleep( boost::posix_time::seconds(30) );
00381 
00382    cout << "Starting Benchmark..." << endl;
00383 
00384    uint64_t begin_benchmark_timestamp = getTimestamp();
00385    uint64_t end_training_timestamp=0;
00386 
00387    for(unsigned int i=0;i<NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD;i++){
00388       unsigned int index = query_indeces[i];
00389       VecPtr dataset = datasets[index];
00390    
00391       assert(NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD==query_indeces.size());
00392       assert(index<NUMBER_OF_DATASETS); //NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD);   
00393    
00394       hype::Tuple t;
00395       t.push_back(dataset->size());
00396       //stemod::SchedulingDecision sched_dec_local("",stemod::core::EstimatedTime(0),t);
00397    
00398 
00399       //cout << "RUN: " << i << "/" << NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD << endl;
00400       
00401       if(sched_config==HYBRID){ //CPU_ONLY,GPU_ONLY,HYBRID)
00402          //cout << "scheduling operator " << i << endl;
00403          const unsigned int number_of_training_operations = (hype::core::Runtime_Configuration::instance().getTrainingLength()*2)+1; //*number of algortihms per operation (2)
00404          if(number_of_training_operations==i){
00405             if(!hype::core::quiet)
00406                cout << "waiting for training to complete" << endl;
00407                //wait until training operations finished
00408                while(!cpu.isIdle() || !gpu.isIdle()){
00409                   boost::this_thread::sleep(boost::posix_time::microseconds(20));
00410                }  
00411                end_training_timestamp = getTimestamp();
00412                //cout << "stat: cpu " << !cpu.isIdle() << " gpu " << !gpu.isIdle() << endl; 
00413             if(!hype::core::quiet)
00414                cout << "training completed! Time: " << end_training_timestamp-begin_benchmark_timestamp << "ns (" 
00415                     << double(end_training_timestamp-begin_benchmark_timestamp)/(1000*1000*1000) <<"s)" << endl;
00416          } 
00417       
00418          hype::SchedulingDecision sched_dec = hype::Scheduler::instance().getOptimalAlgorithmName("SORT",t);
00419          
00420          //cout << "Estimated Time: " << sched_dec.getEstimatedExecutionTimeforAlgorithm().getTimeinNanoseconds() << endl;
00421       //cout << "Decision: " << sched_dec.getNameofChoosenAlgorithm() << endl;
00422 
00423 //    if(sched_dec.getNameofChoosenAlgorithm()=="CPU_Algorithm_serial"){
00424 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00425 //       CPU_Sort(dataset);
00426 //       alg_measure.afterAlgorithmExecution(); 
00427 //    }else if(sched_dec.getNameofChoosenAlgorithm()=="CPU_Algorithm_parallel"){
00428 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00429 //       CPU_Sort_Parallel(dataset);
00430 //       alg_measure.afterAlgorithmExecution(); 
00431 //    }else if(sched_dec.getNameofChoosenAlgorithm()=="GPU_Algorithm"){
00432 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00433 //       GPU_Sort(dataset);
00434 //       alg_measure.afterAlgorithmExecution(); 
00435 //    }
00436 
00437       if(sched_dec.getNameofChoosenAlgorithm()=="CPU_Algorithm_serial"){
00438          cpu.addOperator( boost::shared_ptr<CPU_Serial_Sort_Operator>( new CPU_Serial_Sort_Operator(sched_dec, dataset) ) );
00439 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00440 //       CPU_Sort(dataset);
00441 //       alg_measure.afterAlgorithmExecution(); 
00442       }else if(sched_dec.getNameofChoosenAlgorithm()=="CPU_Algorithm_parallel"){
00443          cpu.addOperator( boost::shared_ptr<CPU_Parallel_Sort_Operator>( new CPU_Parallel_Sort_Operator(sched_dec, dataset) ) );
00444 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00445 //       CPU_Sort_Parallel(dataset);
00446 //       alg_measure.afterAlgorithmExecution(); 
00447       }else if(sched_dec.getNameofChoosenAlgorithm()=="GPU_Algorithm"){
00448          gpu.addOperator( boost::shared_ptr<GPU_Sort_Operator>( new GPU_Sort_Operator(sched_dec, dataset) ) );
00449 //       stemod::AlgorithmMeasurement alg_measure(sched_dec);
00450 //       GPU_Sort(dataset);
00451 //       alg_measure.afterAlgorithmExecution(); 
00452       }
00453 
00454       }else if(sched_config==CPU_ONLY){
00455          CPU_Sort_Parallel(dataset);
00456          //std::cout << "Assigning Operator to CPU... " << std::endl;
00457          //cpu.addOperator( boost::shared_ptr<CPU_Parallel_Sort_Operator>( new CPU_Parallel_Sort_Operator(sched_dec_local, dataset) ) );
00458       }else if(sched_config==GPU_ONLY){
00459          GPU_Sort(dataset);
00460          //std::cout << "Assigning Operator to GPU... " << std::endl;
00461          //gpu.addOperator( boost::shared_ptr<GPU_Sort_Operator>( new GPU_Sort_Operator(sched_dec_local, dataset) ) );
00462       }
00463 
00464    }
00465 
00466 // boost::this_thread::sleep( boost::posix_time::seconds(3) );
00467 
00468 // cpu.stop();
00469 // gpu.stop();
00470 
00471    while(!cpu.isIdle() || !gpu.isIdle()){
00472 
00473    }
00474    uint64_t end_benchmark_timestamp = getTimestamp();
00475    cout << "stat: cpu " << !cpu.isIdle() << " gpu " << !gpu.isIdle() << endl; 
00476    cout << "[Main Thread] Processing Devices finished..." << endl;
00477 
00478    cpu.stop();
00479    gpu.stop();
00480 
00481 
00482    //if one of the following assertiosn are not fulfilled, then abort, because results are rubbish
00483    assert(end_benchmark_timestamp>=begin_benchmark_timestamp);
00484    double time_for_training_phase=0;
00485    double relative_error_cpu_parallel_algorithm = 0;
00486    double relative_error_gpu_algorithm = 0;
00487 
00488    if(sched_config==HYBRID){ //a training phase only exists when the decision model is used
00489       assert(end_training_timestamp>=begin_benchmark_timestamp);
00490       assert(end_benchmark_timestamp>=end_training_timestamp);
00491       time_for_training_phase=end_training_timestamp-begin_benchmark_timestamp;
00492       relative_error_cpu_parallel_algorithm = hype::Report::instance().getRelativeEstimationError("CPU_Algorithm_parallel");
00493       relative_error_gpu_algorithm = hype::Report::instance().getRelativeEstimationError("GPU_Algorithm");
00494    }
00495 
00496    cout << "Time for Training: " << time_for_training_phase << "ns (" 
00497                     << double(time_for_training_phase)/(1000*1000*1000) <<"s)" << endl;
00498    
00499    cout << "Time for Workload: " <<  end_benchmark_timestamp-begin_benchmark_timestamp << "ns (" 
00500         << double(end_benchmark_timestamp-begin_benchmark_timestamp)/(1000*1000*1000) << "s)" << endl;
00501 
00502 
00503 
00504    double total_time_cpu=cpu.getTotalProcessingTime();
00505    double total_time_gpu=gpu.getTotalProcessingTime();
00506    double total_processing_time_forall_devices=total_time_cpu + total_time_gpu;
00507 
00508    unsigned int total_dataset_size_in_bytes = 0;
00509    
00510    for(unsigned int i=0;i<datasets.size();i++){
00511       total_dataset_size_in_bytes += datasets[i]->size()*sizeof(ElementType);
00512    }
00513    
00514    double percentaged_execution_time_on_cpu=0;
00515    double percentaged_execution_time_on_gpu=0;
00516 
00517    if(total_processing_time_forall_devices>0){
00518       percentaged_execution_time_on_cpu = total_time_cpu/total_processing_time_forall_devices;
00519       percentaged_execution_time_on_gpu = total_time_gpu/total_processing_time_forall_devices;
00520    }
00521 
00522 
00523 
00524    cout << "Time for CPU: " <<  total_time_cpu  << "ns \tTime for GPU: " << total_time_gpu << "ns" << endl 
00525         << "CPU Utilization: " <<  percentaged_execution_time_on_cpu << endl
00526         << "GPU Utilization: " <<  percentaged_execution_time_on_gpu << endl;
00527 
00528    cout << "Relative Error CPU_Algorithm_parallel: " << relative_error_cpu_parallel_algorithm << endl;
00529    cout << "Relative Error GPU_Algorithm: " << relative_error_gpu_algorithm    << endl;
00530    
00531    cout << "Total Size of Datasets: " << total_dataset_size_in_bytes << " Byte (" << total_dataset_size_in_bytes/(1024*1024) << "MB)" << endl;
00532 
00533 
00534 
00535  cout << MAX_DATASET_SIZE_IN_MB << "\t"
00536       << NUMBER_OF_DATASETS << "\t"
00537       << NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD << "\t"
00538       << sched_config << "\t"
00539       << total_dataset_size_in_bytes << "\t"
00540       << RANDOM_SEED << "\t"
00541       << stemod_optimization_criterion << "\t"
00542       << stemod_statistical_method << "\t"
00543       << stemod_recomputation_heuristic << "\t"
00544       << hype::core::Runtime_Configuration::instance().getMaximalReadyQueueLength() << "\t"
00545       << hype::core::Runtime_Configuration::instance().getHistoryLength() << "\t"
00546       << hype::core::Runtime_Configuration::instance().getRecomputationPeriod() << "\t"
00547       << hype::core::Runtime_Configuration::instance().getTrainingLength() << "\t"
00548       << hype::core::Runtime_Configuration::instance().getOutlinerThreshold()<< "\t"
00549       << hype::core::Runtime_Configuration::instance().getMaximalSlowdownOfNonOptimalAlgorithm() << "\t"    
00550       << end_benchmark_timestamp-begin_benchmark_timestamp << "\t"
00551       << time_for_training_phase << "\t"
00552       << total_time_cpu << "\t"
00553       << total_time_gpu << "\t"
00554       << percentaged_execution_time_on_cpu << "\t"
00555       << percentaged_execution_time_on_gpu << "\t"
00556       << relative_error_cpu_parallel_algorithm << "\t"
00557       << relative_error_gpu_algorithm     
00558       << endl;
00559 
00560  std::fstream file("benchmark_results.log",std::ios_base::out | std::ios_base::app);
00561 
00562    file.seekg(0, ios::end); // put the "cursor" at the end of the file
00563    unsigned int file_length = file.tellg(); // find the position of the cursor
00564 
00565  if(file_length==0){ //if file empty, write header
00566  file << "MAX_DATASET_SIZE_IN_MB" << "\t"
00567       << "NUMBER_OF_DATASETS" << "\t"
00568       << "NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD" << "\t"
00569       << "sched_config" << "\t"
00570       << "total_size_of_datasets_in_bytes" << "\t"    
00571       << "RANDOM_SEED" << "\t"
00572       << "stemod_optimization_criterion" << "\t"
00573       << "stemod_statistical_method" << "\t"
00574       << "stemod_recomputation_heuristic" << "\t"
00575       << "stemod_maximal_ready_queue_length" << "\t"
00576       << "stemod_history_length" << "\t"
00577       << "stemod_recomputation_period" << "\t"
00578       << "stemod_length_of_training_phase" << "\t"
00579       << "stemod_outliner_threshold_in_percent" << "\t"     
00580       << "stemod_maximal_slowdown_of_non_optimal_algorithm" << "\t"  
00581       << "workload_execution_time_in_ns" << "\t"
00582       << "execution_time_training_only_in_ns" << "\t"    
00583       << "total_time_cpu"  << "\t"
00584       << "total_time_gpu"  << "\t"
00585       << "spent_time_on_cpu_in_percent"  << "\t"
00586       << "spent_time_on_gpu_in_percent"  << "\t"
00587       << "average_estimation_error_CPU_Algorithm_parallel" << "\t"
00588       << "average_estimation_error_GPU_Algorithm"     
00589       << endl;
00590    }
00591 
00592  file << MAX_DATASET_SIZE_IN_MB << "\t"
00593       << NUMBER_OF_DATASETS << "\t"
00594       << NUMBER_OF_SORT_OPERATIONS_IN_WORKLOAD << "\t"
00595       << sched_config << "\t"
00596       << total_dataset_size_in_bytes << "\t"    
00597       << RANDOM_SEED << "\t"
00598       << stemod_optimization_criterion << "\t"
00599       << stemod_statistical_method << "\t"
00600       << stemod_recomputation_heuristic << "\t"
00601       << hype::core::Runtime_Configuration::instance().getMaximalReadyQueueLength() << "\t"
00602       << hype::core::Runtime_Configuration::instance().getHistoryLength() << "\t"
00603       << hype::core::Runtime_Configuration::instance().getRecomputationPeriod() << "\t"
00604       << hype::core::Runtime_Configuration::instance().getTrainingLength() << "\t"
00605       << hype::core::Runtime_Configuration::instance().getOutlinerThreshold()<< "\t"
00606       << hype::core::Runtime_Configuration::instance().getMaximalSlowdownOfNonOptimalAlgorithm() << "\t" 
00607       << end_benchmark_timestamp-begin_benchmark_timestamp << "\t"
00608       << time_for_training_phase << "\t"     
00609       << total_time_cpu << "\t"
00610       << total_time_gpu << "\t"
00611       << percentaged_execution_time_on_cpu << "\t"
00612       << percentaged_execution_time_on_gpu  << "\t"
00613       << relative_error_cpu_parallel_algorithm << "\t"
00614       << relative_error_gpu_algorithm        
00615       << endl;
00616 
00617  file.close();
00618 
00619  return 0;
00620 }
00621