forked from lijiext/lammps
261 lines
8.4 KiB
C++
261 lines
8.4 KiB
C++
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 2.0
|
|
// Copyright (2014) Sandia Corporation
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the Corporation nor the names of the
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
//
|
|
// ************************************************************************
|
|
//@HEADER
|
|
|
|
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
|
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
|
|
|
#include <impl/Kokkos_Timer.hpp>
|
|
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <fstream>
|
|
#include <string>
|
|
#include <sstream>
|
|
|
|
|
|
namespace Perf {
|
|
|
|
template <typename Device, bool Near>
|
|
struct UnorderedMapTest
|
|
{
|
|
typedef Device execution_space;
|
|
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
|
|
typedef typename map_type::histogram_type histogram_type;
|
|
|
|
struct value_type {
|
|
uint32_t failed_count;
|
|
uint32_t max_list;
|
|
};
|
|
|
|
uint32_t capacity;
|
|
uint32_t inserts;
|
|
uint32_t collisions;
|
|
double seconds;
|
|
map_type map;
|
|
histogram_type histogram;
|
|
|
|
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
|
|
: capacity(arg_capacity)
|
|
, inserts(arg_inserts)
|
|
, collisions(arg_collisions)
|
|
, seconds(0)
|
|
, map(capacity)
|
|
, histogram(map.get_histogram())
|
|
{
|
|
Kokkos::Timer wall_clock ;
|
|
wall_clock.reset();
|
|
|
|
value_type v = {};
|
|
int loop_count = 0;
|
|
do {
|
|
++loop_count;
|
|
|
|
v = value_type();
|
|
Kokkos::parallel_reduce(inserts, *this, v);
|
|
|
|
if (v.failed_count > 0u) {
|
|
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
|
|
map.rehash( new_capacity );
|
|
}
|
|
} while (v.failed_count > 0u);
|
|
|
|
seconds = wall_clock.seconds();
|
|
|
|
switch (loop_count)
|
|
{
|
|
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
|
|
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
|
|
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
|
|
}
|
|
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
|
|
|
|
histogram.calculate();
|
|
Device::fence();
|
|
}
|
|
|
|
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
|
|
{
|
|
metrics_out << map.capacity() << " , ";
|
|
metrics_out << inserts/collisions << " , ";
|
|
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
|
|
metrics_out << inserts << " , ";
|
|
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
|
|
metrics_out << collisions << " , ";
|
|
metrics_out << 1e9*(seconds/inserts) << " , ";
|
|
metrics_out << seconds << std::endl;
|
|
|
|
length_out << map.capacity() << " , ";
|
|
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
length_out << collisions << " , ";
|
|
histogram.print_length(length_out);
|
|
|
|
distance_out << map.capacity() << " , ";
|
|
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
distance_out << collisions << " , ";
|
|
histogram.print_distance(distance_out);
|
|
|
|
block_distance_out << map.capacity() << " , ";
|
|
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
block_distance_out << collisions << " , ";
|
|
histogram.print_block_distance(block_distance_out);
|
|
}
|
|
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void init( value_type & v ) const
|
|
{
|
|
v.failed_count = 0;
|
|
v.max_list = 0;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void join( volatile value_type & dst, const volatile value_type & src ) const
|
|
{
|
|
dst.failed_count += src.failed_count;
|
|
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()(uint32_t i, value_type & v) const
|
|
{
|
|
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
|
|
typename map_type::insert_result result = map.insert(key,i);
|
|
v.failed_count += !result.failed() ? 0 : 1;
|
|
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
|
|
}
|
|
|
|
};
|
|
|
|
template <typename Device, bool Near>
|
|
void run_performance_tests(std::string const & base_file_name)
|
|
{
|
|
#if 0
|
|
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
|
|
std::string length_file_name = base_file_name + std::string("-length.csv");
|
|
std::string distance_file_name = base_file_name + std::string("-distance.csv");
|
|
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
|
|
|
|
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
|
|
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
|
|
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
|
|
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
|
|
|
|
|
|
/*
|
|
const double test_ratios[] = {
|
|
0.50
|
|
, 0.75
|
|
, 0.80
|
|
, 0.85
|
|
, 0.90
|
|
, 0.95
|
|
, 1.00
|
|
, 1.25
|
|
, 2.00
|
|
};
|
|
*/
|
|
|
|
const double test_ratios[] = { 1.00 };
|
|
|
|
const int num_ratios = sizeof(test_ratios) / sizeof(double);
|
|
|
|
/*
|
|
const uint32_t collisions[] {
|
|
1
|
|
, 4
|
|
, 16
|
|
, 64
|
|
};
|
|
*/
|
|
|
|
const uint32_t collisions[] = { 16 };
|
|
|
|
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
|
|
|
|
// set up file headers
|
|
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
|
|
length_out << "Capacity , Percent Full , ";
|
|
distance_out << "Capacity , Percent Full , ";
|
|
block_distance_out << "Capacity , Percent Full , ";
|
|
|
|
for (int i=0; i<100; ++i) {
|
|
length_out << i << " , ";
|
|
distance_out << i << " , ";
|
|
block_distance_out << i << " , ";
|
|
}
|
|
|
|
length_out << "\b\b\b " << std::endl;
|
|
distance_out << "\b\b\b " << std::endl;
|
|
block_distance_out << "\b\b\b " << std::endl;
|
|
|
|
Kokkos::Timer wall_clock ;
|
|
for (int i=0; i < num_collisions ; ++i) {
|
|
wall_clock.reset();
|
|
std::cout << "Collisions: " << collisions[i] << std::endl;
|
|
for (int j = 0; j < num_ratios; ++j) {
|
|
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
|
|
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
|
|
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
|
|
std::cout << capacity << std::flush;
|
|
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
|
|
Device::fence();
|
|
test.print(metrics_out, length_out, distance_out, block_distance_out);
|
|
}
|
|
std::cout << "\b\b " << std::endl;
|
|
|
|
}
|
|
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
|
|
}
|
|
metrics_out.close();
|
|
length_out.close();
|
|
distance_out.close();
|
|
block_distance_out.close();
|
|
#else
|
|
(void)base_file_name;
|
|
std::cout << "skipping test" << std::endl;
|
|
#endif
|
|
}
|
|
|
|
|
|
} // namespace Perf
|
|
|
|
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|