Template Numerical Library: TNL::Algorithms::SegmentsReductionKernels::CSRScalarKernel< Index, Device

Public Types
using	ConstViewType = CSRScalarKernel< Index, Device >

using	DeviceType = Device

using	IndexType = Index

using	ViewType = CSRScalarKernel< Index, Device >

Public Member Functions
__cuda_callable__ ConstViewType	getConstView () const

__cuda_callable__ ViewType	getView ()

template<typename Segments >
void	init (const Segments &segments)

void	reset ()

Static Public Member Functions
static std::string	getKernelType ()

template<typename SegmentsView , typename Fetch , typename Reduction , typename ResultKeeper , typename Value = typename detail::FetchLambdaAdapter< Index, Fetch >::ReturnType>
static void	reduceAllSegments (const SegmentsView &segments, Fetch &fetch, const Reduction &reduction, ResultKeeper &keeper, const Value &identity=Reduction::template getIdentity< Value >())
	Call reduceSegments for all segments.

template<typename SegmentsView , typename Fetch , typename Reduction , typename ResultKeeper , typename Value = typename detail::FetchLambdaAdapter< Index, Fetch >::ReturnType>
static void	reduceSegments (const SegmentsView &segments, Index begin, Index end, Fetch &fetch, const Reduction &reduction, ResultKeeper &keeper, const Value &identity=Reduction::template getIdentity< Value >())
	Compute reduction in each segment.

Member Function Documentation

◆ reduceAllSegments()

template<typename Index , typename Device >

template<typename SegmentsView , typename Fetch , typename Reduction , typename ResultKeeper , typename Value >

void TNL::Algorithms::SegmentsReductionKernels::CSRScalarKernel< Index, Device >::reduceAllSegments	(	const SegmentsView &	segments,
		Fetch &	fetch,
		const Reduction &	reduction,
		ResultKeeper &	keeper,
		const Value &	identity = Reduction::template getIdentity< Value >() )

static

Call reduceSegments for all segments.

See reduceSegments for more details.

◆ reduceSegments()

template<typename Index , typename Device >

template<typename SegmentsView , typename Fetch , typename Reduction , typename ResultKeeper , typename Value >

void TNL::Algorithms::SegmentsReductionKernels::CSRScalarKernel< Index, Device >::reduceSegments	(	const SegmentsView &	segments,
		Index	begin,
		Index	end,
		Fetch &	fetch,
		const Reduction &	reduction,
		ResultKeeper &	keeper,
		const Value &	identity = Reduction::template getIdentity< Value >() )

static

Compute reduction in each segment.

Template Parameters

Fetch	is type of lambda function for data fetching.
Reduction	is a reduction operation.
ResultKeeper	is lambda function for storing results from particular segments.

Parameters

segments	is the segments data structure to be reduced.
begin	defines begining of an interval [ begin, end ) of segments in which we want to perform the reduction.
end	defines and of an interval [ begin, end ) of segments in which we want to perform the reduction.
fetch	is a lambda function for fetching of data. It is suppos have one of the following forms: Full form auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) { ... } __cuda_callable__ #define __cuda_callable__ Definition Macros.h:49 Brief form auto fetch = [=] __cuda_callable__ ( IndexType globalIdx, bool& compute ) { ... } where for both variants segmentIdx is segment index, localIdx is a rank of element in the segment, globalIdx is index of the element in related container and compute is a boolean variable which serves for stopping the reduction if it is set to false. It is however, only a hint and the real behaviour depends on type of kernel used for the reduction. Some kernels are optimized so that they can be significantly faster with the brief variant of the fetch lambda function.
reduction	is a lambda function representing the reduction operation. It is supposed to be defined as:

auto reduction = [=] __cuda_callable__ ( const Value& a, const Value& b ) -> Value { ... }

where a and b are values to be reduced and the lambda function returns result of the reduction.

Parameters

keeper is a lambda function for saving results from particular segments. It is supposed to be defined as:

auto keeper = [=] __cuda_callable__ ( IndexType segmentIdx, const Value& value ) { ... }

where segmentIdx is an index of the segment and value is the result of the reduction in given segment to be stored.

Parameters

identity is the initial value for the reduction operation. If Reduction does not have a static member function template getIdentity, it must be supplied explicitly by the user.

Example: #include <iostream>

#include <TNL/Functional.h>

#include <TNL/Containers/Vector.h>

#include <TNL/Algorithms/Segments/CSR.h>

#include <TNL/Algorithms/SegmentsReductionKernels/DefaultKernel.h>

#include <TNL/Devices/Host.h>

#include <TNL/Devices/Cuda.h>

template< typename Device >

void

SegmentsExample()

{

using SegmentsType = typename TNL::Algorithms::Segments::CSR< Device, int >;

using SegmentsReductionKernel =

typename TNL::Algorithms::SegmentsReductionKernels::DefaultKernel< typename SegmentsType::ViewType >::type;

/***

* Create segments with given segments sizes.

*/

const int size( 5 );

SegmentsType segments{ 1, 2, 3, 4, 5 };

/***

* Allocate array for the segments;

*/

TNL::Containers::Array< double, Device > data( segments.getStorageSize(), 0.0 );

/***

* Insert data into particular segments.

*/

auto data_view = data.getView();

segments.forElements( 0,

size,

[ = ] __cuda_callable__( int segmentIdx, int localIdx, int globalIdx ) mutable

{

if( localIdx <= segmentIdx )

data_view[ globalIdx ] = segmentIdx;

} );

/***

* Compute sums of elements in each segment.

*/

TNL::Containers::Vector< double, Device > sums( size );

auto sums_view = sums.getView();

auto fetch_full = [ = ] __cuda_callable__( int segmentIdx, int localIdx, int globalIdx ) -> double

{

if( localIdx <= segmentIdx )

return data_view[ globalIdx ];

else

return 0.0;

};

auto fetch_brief = [ = ] __cuda_callable__( int globalIdx ) -> double

{

return data_view[ globalIdx ];

};

auto keep = [ = ] __cuda_callable__( int globalIdx, const double& value ) mutable

{

sums_view[ globalIdx ] = value;

};

SegmentsReductionKernel kernel;

kernel.init( segments );

kernel.reduceAllSegments( segments, fetch_full, TNL::Plus{}, keep );

std::cout << "The sums with full fetch form are: " << sums << std::endl;

kernel.reduceAllSegments( segments, fetch_brief, TNL::Plus{}, keep );

std::cout << "The sums with brief fetch form are: " << sums << std::endl;

}

int

main( int argc, char* argv[] )

{

std::cout << "Example of CSR segments on host: " << std::endl;

SegmentsExample< TNL::Devices::Host >();

#ifdef __CUDACC__

std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;

SegmentsExample< TNL::Devices::Cuda >();

#endif

return EXIT_SUCCESS;

}

std::cout

TNL::Algorithms::Segments::CSR
Data structure for CSR segments format.
Definition CSR.h:27

TNL::Containers::Array
Array is responsible for memory management, access to array elements, and general array operations.
Definition Array.h:64

TNL::Containers::Vector
Vector extends Array with algebraic operations.
Definition Vector.h:36

std::endl
T endl(T... args)

TNL::Algorithms::SegmentsReductionKernels::DefaultKernel
Definition DefaultKernel.h:21

TNL::Plus
Function object implementing x + y.
Definition Functional.h:17

Output: Example of CSR segments on host:

The sums with full fetch form are: [ 0, 2, 6, 12, 20 ]

The sums with brief fetch form are: [ 0, 2, 6, 12, 20 ]

Example of CSR segments on CUDA GPU:

The sums with full fetch form are: [ 0, 2, 6, 12, 20 ]

The sums with brief fetch form are: [ 0, 2, 6, 12, 20 ]

The documentation for this struct was generated from the following files:

src/TNL/Algorithms/SegmentsReductionKernels/CSRScalarKernel.h
src/TNL/Algorithms/SegmentsReductionKernels/CSRScalarKernel.hpp

Public Types

Public Member Functions

Static Public Member Functions

Member Function Documentation

◆ reduceAllSegments()

◆ reduceSegments()