Template Numerical Library version\ main:bb09b17
Loading...
Searching...
No Matches
TNL::Containers::DistributedArray< Value, Device, Index, Allocator > Class Template Reference

Distributed array. More...

#include <TNL/Containers/DistributedArray.h>

Collaboration diagram for TNL::Containers::DistributedArray< Value, Device, Index, Allocator >:

Public Types

using AllocatorType = Allocator
 
using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >
 
using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index >
 
using DeviceType = Device
 
using IndexType = Index
 
using LocalRangeType = Subrange< Index >
 
using LocalViewType = Containers::ArrayView< Value, Device, Index >
 
template<typename _Value , typename _Device = Device, typename _Index = Index, typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Value >>
using Self = DistributedArray< _Value, _Device, _Index, _Allocator >
 A template which allows to quickly obtain a DistributedArray type with changed template parameters.
 
using SynchronizerType = typename ViewType::SynchronizerType
 
using ValueType = Value
 
using ViewType = DistributedArrayView< Value, Device, Index >
 

Public Member Functions

 DistributedArray ()=default
 Constructs an empty array with zero size.
 
 DistributedArray (const AllocatorType &allocator)
 Constructs an empty array and sets the provided allocator.
 
 DistributedArray (const DistributedArray &array)
 Copy constructor (makes a deep copy).
 
 DistributedArray (const DistributedArray &array, const AllocatorType &allocator)
 Copy constructor with a specific allocator (makes a deep copy).
 
 DistributedArray (DistributedArray &&) noexcept=default
 
 DistributedArray (LocalRangeType localRange, Index ghosts, Index globalSize, const MPI::Comm &communicator, const AllocatorType &allocator=AllocatorType())
 
void copyFromGlobal (ConstLocalViewType globalArray)
 
bool empty () const
 
template<typename Function >
void forElements (IndexType begin, IndexType end, Function &&f)
 Process the lambda function f for each array element in interval [ begin, end).
 
template<typename Function >
void forElements (IndexType begin, IndexType end, Function &&f) const
 Process the lambda function f for each array element in interval [ begin, end) for constant instances of the array.
 
AllocatorType getAllocator () const
 Returns the allocator associated to the array.
 
const MPI::CommgetCommunicator () const
 Returns the MPI communicator associated to the array.
 
ConstLocalViewType getConstLocalView () const
 Returns a non-modifiable view of the local part of the array.
 
ConstLocalViewType getConstLocalViewWithGhosts () const
 Returns a non-modifiable view of the local part of the array, including ghost values.
 
ConstViewType getConstView () const
 Returns a non-modifiable view of the array.
 
ValueType getElement (IndexType i) const
 
IndexType getGhosts () const
 
const LocalRangeTypegetLocalRange () const
 Returns the local range of the distributed array.
 
LocalViewType getLocalView ()
 Returns a modifiable view of the local part of the array.
 
LocalViewType getLocalViewWithGhosts ()
 Returns a modifiable view of the local part of the array, including ghost values.
 
IndexType getSize () const
 
std::shared_ptr< SynchronizerType > getSynchronizer () const
 
int getValuesPerElement () const
 
ViewType getView ()
 Returns a modifiable view of the array.
 
void loadFromGlobalFile (const String &fileName, bool allowCasting=false)
 
void loadFromGlobalFile (File &file, bool allowCasting=false)
 
 operator ConstViewType () const
 Conversion operator to a non-modifiable view of the array.
 
 operator ViewType ()
 Conversion operator to a modifiable view of the array.
 
template<typename Array >
bool operator!= (const Array &array) const
 
template<typename Array , typename... , typename = std::enable_if_t< HasSubscriptOperator< Array >::value >>
DistributedArrayoperator= (const Array &array)
 
template<typename Array , typename... , typename >
DistributedArray< Value, Device, Index, Allocator > & operator= (const Array &array)
 
DistributedArrayoperator= (const DistributedArray &array)
 
DistributedArrayoperator= (DistributedArray &&) noexcept(false)=default
 
template<typename Array >
bool operator== (const Array &array) const
 
__cuda_callable__ ValueType & operator[] (IndexType i)
 
__cuda_callable__ const ValueType & operator[] (IndexType i) const
 
void reset ()
 
void setDistribution (LocalRangeType localRange, Index ghosts, Index globalSize, const MPI::Comm &communicator)
 Set new global size and distribution of the array.
 
void setElement (IndexType i, ValueType value)
 
template<typename Array >
void setLike (const Array &array)
 
void setSynchronizer (std::shared_ptr< SynchronizerType > synchronizer, int valuesPerElement=1)
 
void setValue (ValueType value)
 
void startSynchronization ()
 
void waitForSynchronization () const
 

Protected Attributes

LocalArrayType localData
 
ViewType view
 

Detailed Description

template<typename Value, typename Device = Devices::Host, typename Index = int, typename Allocator = typename Allocators::Default< Device >::template Allocator< Value >>
class TNL::Containers::DistributedArray< Value, Device, Index, Allocator >

Distributed array.

Example
#include <iostream>
#include <TNL/Containers/BlockPartitioning.h>
#include <TNL/Containers/DistributedArray.h>
#include <TNL/MPI/ScopedInitializer.h>
/***
* The following works for any device (CPU, GPU ...).
*/
template< typename Device >
void
distributedArrayExample()
{
using IndexType = typename ArrayType::IndexType;
using LocalRangeType = typename ArrayType::LocalRangeType;
const TNL::MPI::Comm communicator = MPI_COMM_WORLD;
// We set the global array size to a prime number to force non-uniform distribution.
const int size = 97;
const int ghosts = ( communicator.size() > 1 ) ? 4 : 0;
const LocalRangeType localRange = TNL::Containers::splitRange< IndexType >( size, communicator );
ArrayType a( localRange, ghosts, size, communicator );
a.forElements( 0,
size,
[] __cuda_callable__( int idx, int& value )
{
value = idx;
} );
ArrayType b( localRange, ghosts, size, communicator );
b.forElements( 0,
size,
[] __cuda_callable__( int idx, int& value )
{
value = idx - ( idx == 90 );
} );
for( int i = 0; i < communicator.size(); i++ ) {
if( communicator.rank() == i )
std::cout << "MPI rank = " << communicator.rank() << std::endl
<< " size = " << a.getSize() << std::endl
<< " local range = " << a.getLocalRange().getBegin() << " - " << a.getLocalRange().getEnd() << std::endl
<< " ghosts = " << a.getGhosts() << std::endl
<< " local data = " << a.getLocalView() << std::endl
<< " local data with ghosts = " << a.getLocalViewWithGhosts() << std::endl;
TNL::MPI::Barrier();
}
}
int
main( int argc, char* argv[] )
{
TNL::MPI::ScopedInitializer mpi( argc, argv );
if( TNL::MPI::GetRank() == 0 )
std::cout << "The first test runs on CPU ..." << std::endl;
distributedArrayExample< TNL::Devices::Host >();
#ifdef __CUDACC__
TNL::MPI::Barrier();
if( TNL::MPI::GetRank() == 0 )
std::cout << "The second test runs on GPU ..." << std::endl;
distributedArrayExample< TNL::Devices::Cuda >();
#endif
}
#define __cuda_callable__
Definition Macros.h:49
Distributed array.
Definition DistributedArray.h:24
Definition Subrange.h:17
An RAII wrapper for custom MPI communicators.
Definition Comm.h:63
int size() const
Returns the size of the group associated with a communicator.
Definition Comm.h:223
int rank() const
Determines the rank of the calling process in the communicator.
Definition Comm.h:216
T endl(T... args)
Subrange< Index > splitRange(Index rangeBegin, Index rangeEnd, int rank, int num_subintervals)
A helper function which splits a one-dimensional range.
Definition BlockPartitioning.h:27
Definition ScopedInitializer.h:63
Output
Rank 2: rank on node is 2, using GPU id 0 of 2
Environment:
CUDA_VISIBLE_DEVICES=
Rank 1: rank on node is 1, using GPU id 1 of 2
Environment:
CUDA_VISIBLE_DEVICES=
Rank 0: rank on node is 0, using GPU id 0 of 2
Environment:
CUDA_VISIBLE_DEVICES=
Rank 3: rank on node is 3, using GPU id 1 of 2
Environment:
CUDA_VISIBLE_DEVICES=
The first test runs on CPU ...
MPI rank = 0
size = 97
local range = 0 - 25
ghosts = 4
local data = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ]
local data with ghosts = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0, 0, 0 ]
MPI rank = 1
size = 97
local range = 25 - 49
ghosts = 4
local data = [ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48 ]
local data with ghosts = [ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 0, 0, 0, 0 ]
MPI rank = 2
size = 97
local range = 49 - 73
ghosts = 4
local data = [ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72 ]
local data with ghosts = [ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 0, 0, 0, 0 ]
MPI rank = 3
size = 97
local range = 73 - 97
ghosts = 4
local data = [ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96 ]
local data with ghosts = [ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 0, 0, 0, 0 ]
The second test runs on GPU ...
MPI rank = 0
size = 97
local range = 0 - 25
ghosts = 4
local data = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ]
local data with ghosts = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0, 0, 0 ]
MPI rank = 1
size = 97
local range = 25 - 49
ghosts = 4
local data = [ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48 ]
local data with ghosts = [ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 0, 0, 0, 0 ]
MPI rank = 2
size = 97
local range = 49 - 73
ghosts = 4
local data = [ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72 ]
local data with ghosts = [ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 0, 0, 0, 0 ]
MPI rank = 3
size = 97
local range = 73 - 97
ghosts = 4
local data = [ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96 ]
local data with ghosts = [ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 0, 0, 0, 0 ]

Constructor & Destructor Documentation

◆ DistributedArray() [1/3]

template<typename Value , typename Device , typename Index , typename Allocator >
TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::DistributedArray ( const AllocatorType & allocator)
explicit

Constructs an empty array and sets the provided allocator.

Parameters
allocatorThe allocator to be associated with this array.

◆ DistributedArray() [2/3]

template<typename Value , typename Device , typename Index , typename Allocator >
TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::DistributedArray ( const DistributedArray< Value, Device, Index, Allocator > & array)
explicit

Copy constructor (makes a deep copy).

Parameters
arrayThe array to be copied.

◆ DistributedArray() [3/3]

template<typename Value , typename Device , typename Index , typename Allocator >
TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::DistributedArray ( const DistributedArray< Value, Device, Index, Allocator > & array,
const AllocatorType & allocator )
explicit

Copy constructor with a specific allocator (makes a deep copy).

Parameters
arrayThe array to be copied.
allocatorThe allocator to be associated with this array.

Member Function Documentation

◆ forElements() [1/2]

template<typename Value , typename Device , typename Index , typename Allocator >
template<typename Function >
void TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::forElements ( IndexType begin,
IndexType end,
Function && f )

Process the lambda function f for each array element in interval [ begin, end).

The lambda function is supposed to be declared as

f( IndexType elementIdx, ValueType& elementValue )

where

  • elementIdx is an index of the array element being currently processed
  • elementValue is a value of the array element being currently processed

This is performed at the same place where the array is allocated, i.e. it is efficient even on GPU.

Parameters
beginThe beginning of the array elements interval.
endThe end of the array elements interval.
fThe lambda function to be processed.

◆ forElements() [2/2]

template<typename Value , typename Device , typename Index , typename Allocator >
template<typename Function >
void TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::forElements ( IndexType begin,
IndexType end,
Function && f ) const

Process the lambda function f for each array element in interval [ begin, end) for constant instances of the array.

The lambda function is supposed to be declared as

f( IndexType elementIdx, ValueType& elementValue )

where

  • elementIdx is an index of the array element being currently processed
  • elementValue is a value of the array element being currently processed

This is performed at the same place where the array is allocated, i.e. it is efficient even on GPU.

Parameters
beginThe beginning of the array elements interval.
endThe end of the array elements interval.
fThe lambda function to be processed.

◆ setDistribution()

template<typename Value , typename Device , typename Index , typename Allocator >
void TNL::Containers::DistributedArray< Value, Device, Index, Allocator >::setDistribution ( LocalRangeType localRange,
Index ghosts,
Index globalSize,
const MPI::Comm & communicator )

Set new global size and distribution of the array.

Parameters
localRangeThe range of elements in the global array that is owned by this rank.
ghostsNumber of ghost elements allocated by this rank.
globalSizeThe size of the global array.
communicatorReference to the MPI communicator on which the array is distributed.

The documentation for this class was generated from the following files: