33 #include <dpu_log_internals.h> 34 #include <dpu_management.h> 50 __get_block(
struct sg_block_info *out, uint32_t dpu_index, uint32_t block_index,
void *args)
52 auto f =
static_cast<F *
>(args);
53 return (*f)(out, dpu_index, block_index);
71 what() const noexcept
override 80 explicit DpuError(dpu_error_t ErrorId)
83 msg = dpu_error_to_string(errorId);
89 throwOnErr(dpu_error_t Error)
91 if (Error != DPU_OK) {
111 : cSymbol({ .address = Address, .size = Size })
117 struct dpu_symbol_t cSymbol;
134 get(
const std::string &SymbolName)
137 DpuError::throwOnErr(dpu_get_symbol(cProgram, SymbolName.c_str(), &symbol.cSymbol));
142 struct dpu_program_t *cProgram {
nullptr };
169 template <
typename T>
171 copy(
const std::string &DstSymbol,
unsigned Offset,
const std::vector<T> &SrcBuffer,
unsigned Size)
173 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
174 DpuError::throwOnErr(dpu_broadcast_to(cSet, DstSymbol.c_str(), Offset, SrcBuffer.data(), Size, flags));
184 template <
typename T>
186 copy(
const std::string &DstSymbol,
unsigned Offset,
const std::vector<T> &SrcBuffer)
188 copy(DstSymbol, Offset, SrcBuffer, SrcBuffer.size() *
sizeof(T));
198 template <
typename T>
200 copy(
const std::string &DstSymbol,
const std::vector<T> &SrcBuffer,
unsigned Size)
202 copy(DstSymbol, 0, SrcBuffer, Size);
211 template <
typename T>
213 copy(
const std::string &DstSymbol,
const std::vector<T> &SrcBuffer)
215 copy(DstSymbol, 0, SrcBuffer, SrcBuffer.size() *
sizeof(T));
226 template <
typename T>
228 copy(
DpuSymbol &DstSymbol,
unsigned Offset,
const std::vector<T> &SrcBuffer,
unsigned Size)
230 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
231 DpuError::throwOnErr(dpu_broadcast_to_symbol(cSet, DstSymbol.cSymbol, Offset, SrcBuffer.data(), Size, flags));
241 template <
typename T>
243 copy(
DpuSymbol &DstSymbol,
unsigned Offset,
const std::vector<T> &SrcBuffer)
245 copy(DstSymbol, Offset, SrcBuffer, SrcBuffer.size() *
sizeof(T));
255 template <
typename T>
259 copy(DstSymbol, 0, SrcBuffer, Size);
268 template <
typename T>
272 copy(DstSymbol, 0, SrcBuffer, SrcBuffer.size() *
sizeof(T));
283 template <
typename T>
285 copy(
const std::string &DstSymbol,
unsigned Offset,
const std::vector<std::vector<T>> &SrcBuffers,
unsigned Size)
287 struct dpu_set_t dpu;
290 DPU_FOREACH (cSet, dpu, dpuIdx) {
291 DpuError::throwOnErr(dpu_prepare_xfer(dpu, (
void *)SrcBuffers[dpuIdx].data()));
294 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
295 DpuError::throwOnErr(dpu_push_xfer(cSet, DPU_XFER_TO_DPU, DstSymbol.c_str(), Offset, Size, flags));
305 template <
typename T>
307 copy(
const std::string &DstSymbol,
unsigned Offset,
const std::vector<std::vector<T>> &SrcBuffers)
309 if (SrcBuffers.size() == 0) {
310 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
313 unsigned nrElements = SrcBuffers[0].size();
314 for (
auto buf : SrcBuffers) {
315 if (nrElements != buf.size()) {
316 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
320 copy(DstSymbol, Offset, SrcBuffers, nrElements *
sizeof(T));
330 template <
typename T>
332 copy(
const std::string &DstSymbol,
const std::vector<std::vector<T>> &SrcBuffers,
unsigned Size)
334 copy(DstSymbol, 0, SrcBuffers, Size);
343 template <
typename T>
345 copy(
const std::string &DstSymbol,
const std::vector<std::vector<T>> &SrcBuffers)
347 copy(DstSymbol, 0, SrcBuffers);
362 get_block_t &get_block_info,
364 bool length_check =
true)
366 dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
368 flags =
static_cast<dpu_sg_xfer_flags_t
>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
370 DpuError::throwOnErr(dpu_push_sg_xfer(cSet, DPU_XFER_TO_DPU, DstSymbol.c_str(), Offset, Size, &get_block_info, flags));
382 copyScatterGather(
const std::string &DstSymbol, get_block_t &get_block_info,
unsigned Size,
bool length_check =
true)
384 copyScatterGather(DstSymbol, 0, get_block_info, Size, length_check);
398 copyScatterGather(
const std::string &DstSymbol,
unsigned Offset, F f,
unsigned Size,
bool length_check =
true)
400 get_block_t get_block_info { __get_block<F>, &f,
sizeof(f) };
401 copyScatterGather(DstSymbol, Offset, get_block_info, Size, length_check);
416 copyScatterGather(DstSymbol, 0, f, Size, length_check);
427 template <
typename T>
429 copy(
DpuSymbol &DstSymbol,
unsigned Offset,
const std::vector<std::vector<T>> &SrcBuffers,
unsigned Size)
431 struct dpu_set_t dpu;
434 DPU_FOREACH (cSet, dpu, dpuIdx) {
435 DpuError::throwOnErr(dpu_prepare_xfer(dpu, (
void *)SrcBuffers[dpuIdx].data()));
438 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
439 DpuError::throwOnErr(dpu_push_xfer_symbol(cSet, DPU_XFER_TO_DPU, DstSymbol.cSymbol, Offset, Size, flags));
449 template <
typename T>
451 copy(
DpuSymbol &DstSymbol,
unsigned Offset,
const std::vector<std::vector<T>> &SrcBuffers)
453 if (SrcBuffers.size() == 0) {
454 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
457 unsigned nrElements = SrcBuffers[0].size();
458 for (
auto buf : SrcBuffers) {
459 if (nrElements != buf.size()) {
460 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
464 copy(DstSymbol, Offset, SrcBuffers, nrElements *
sizeof(T));
474 template <
typename T>
476 copy(
DpuSymbol &DstSymbol,
const std::vector<std::vector<T>> &SrcBuffers,
unsigned Size)
478 copy(DstSymbol, 0, SrcBuffers, Size);
487 template <
typename T>
491 copy(DstSymbol, 0, SrcBuffers);
505 dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
507 flags =
static_cast<dpu_sg_xfer_flags_t
>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
509 DpuError::throwOnErr(
510 dpu_push_sg_xfer_symbol(cSet, DPU_XFER_TO_DPU, DstSymbol.cSymbol, Offset, Size, &get_block_info, flags));
523 copyScatterGather(DstSymbol, 0, get_block_info, Size, length_check);
538 get_block_t get_block_info { __get_block<F>, &f,
sizeof(f) };
539 copyScatterGather(DstSymbol, Offset, get_block_info, Size, length_check);
553 copyScatterGather(DstSymbol, 0, f, Size, length_check);
564 template <
typename T>
566 copy(std::vector<std::vector<T>> &DstBuffers,
unsigned Size,
const std::string &SrcSymbol,
unsigned Offset)
568 struct dpu_set_t dpu;
571 DPU_FOREACH (cSet, dpu, dpuIdx) {
572 DpuError::throwOnErr(dpu_prepare_xfer(dpu, DstBuffers[dpuIdx].data()));
575 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
576 DpuError::throwOnErr(dpu_push_xfer(cSet, DPU_XFER_FROM_DPU, SrcSymbol.c_str(), Offset, Size, flags));
586 template <
typename T>
588 copy(std::vector<std::vector<T>> &DstBuffers,
unsigned Size,
const std::string &SrcSymbol)
590 copy(DstBuffers, Size, SrcSymbol, 0);
600 template <
typename T>
602 copy(std::vector<std::vector<T>> &DstBuffers,
const std::string &SrcSymbol,
unsigned Offset)
604 if (DstBuffers.size() == 0) {
605 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
608 unsigned nrElements = DstBuffers[0].size();
609 for (
auto buf : DstBuffers) {
610 if (nrElements != buf.size()) {
611 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
615 copy(DstBuffers, nrElements *
sizeof(T), SrcSymbol, Offset);
624 template <
typename T>
626 copy(std::vector<std::vector<T>> &DstBuffers,
const std::string &SrcSymbol)
628 copy(DstBuffers, SrcSymbol, 0);
643 const std::string &SrcSymbol,
645 bool length_check =
true)
647 dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
649 flags =
static_cast<dpu_sg_xfer_flags_t
>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
651 DpuError::throwOnErr(dpu_push_sg_xfer(cSet, DPU_XFER_FROM_DPU, SrcSymbol.c_str(), Offset, Size, &get_block_info, flags));
663 copyScatterGather(get_block_t &get_block_info,
unsigned Size,
const std::string &SrcSymbol,
bool length_check =
true)
665 copyScatterGather(get_block_info, Size, SrcSymbol, 0, length_check);
679 copyScatterGather(F f,
unsigned Size,
const std::string &SrcSymbol,
unsigned Offset,
bool length_check =
true)
681 get_block_t get_block_info { __get_block<F>, &f,
sizeof(f) };
682 copyScatterGather(get_block_info, Size, SrcSymbol, Offset, length_check);
697 copyScatterGather(f, Size, SrcSymbol, 0, length_check);
708 template <
typename T>
710 copy(std::vector<std::vector<T>> &DstBuffers,
unsigned Size,
DpuSymbol &SrcSymbol,
unsigned Offset)
712 struct dpu_set_t dpu;
715 DPU_FOREACH (cSet, dpu, dpuIdx) {
716 DpuError::throwOnErr(dpu_prepare_xfer(dpu, DstBuffers[dpuIdx].data()));
719 dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
720 DpuError::throwOnErr(dpu_push_xfer_symbol(cSet, DPU_XFER_FROM_DPU, SrcSymbol.cSymbol, Offset, Size, flags));
730 template <
typename T>
732 copy(std::vector<std::vector<T>> &DstBuffers,
unsigned Size,
DpuSymbol &SrcSymbol)
734 copy(DstBuffers, Size, SrcSymbol, 0);
744 template <
typename T>
746 copy(std::vector<std::vector<T>> &DstBuffers,
DpuSymbol &SrcSymbol,
unsigned Offset)
748 if (DstBuffers.size() == 0) {
749 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
752 unsigned nrElements = DstBuffers[0].size();
753 for (
auto buf : DstBuffers) {
754 if (nrElements != buf.size()) {
755 DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
759 copy(DstBuffers, nrElements *
sizeof(T), SrcSymbol, Offset);
768 template <
typename T>
772 copy(DstBuffers, SrcSymbol, 0);
787 dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
789 flags =
static_cast<dpu_sg_xfer_flags_t
>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
791 DpuError::throwOnErr(
792 dpu_push_sg_xfer_symbol(cSet, DPU_XFER_FROM_DPU, SrcSymbol.cSymbol, Offset, Size, &get_block_info, flags));
806 copyScatterGather(get_block_info, Size, SrcSymbol, 0, length_check);
822 get_block_t get_block_info { __get_block<F>, &f,
sizeof(f) };
823 copyScatterGather(get_block_info, Size, SrcSymbol, Offset, length_check);
838 copyScatterGather(f, Size, SrcSymbol, 0, length_check);
849 dpu_launch_policy_t policy = async ? DPU_ASYNCHRONOUS : DPU_SYNCHRONOUS;
850 DpuError::throwOnErr(dpu_launch(cSet, policy));
854 struct dpu_set_t cSet;
857 DpuSetOps(
const struct dpu_set_t &CSet,
bool Async)
877 for (
auto rank : _ranks) {
880 for (
auto dpu : _dpus) {
890 std::vector<DpuSet *> &
899 std::vector<DpuSet *> &
913 allocate(
unsigned NrDpus = ALLOCATE_ALL,
const std::string &Profile =
"")
915 struct dpu_set_t cSet;
916 DpuError::throwOnErr(dpu_alloc(NrDpus, Profile.c_str(), &cSet));
928 allocateRanks(
unsigned NrRanks = ALLOCATE_ALL,
const std::string &Profile =
"")
930 struct dpu_set_t cSet;
931 DpuError::throwOnErr(dpu_alloc_ranks(NrRanks, Profile.c_str(), &cSet));
942 load(
const std::string &Executable)
945 DpuError::throwOnErr(dpu_load(cSet, Executable.c_str(), &Program.cProgram));
955 log(std::ostream &LogStream)
957 for (
DpuSet *dpuSet : _dpus) {
958 struct dpu_t *
dpu = dpu_from_set(dpuSet->cSet);
959 DpuError::throwOnErr(ostreamPrint(&LogStream, DPU_LOG_FORMAT_HEADER));
960 DpuError::throwOnErr(dpulog_read_for_dpu_(dpu, ostreamPrint, &LogStream));
972 std::vector<DpuSet *> _dpus;
973 std::vector<DpuSet *> _ranks;
975 DpuSet(
struct dpu_set_t CSet,
bool ManageCSet =
true,
bool DetectChildren =
true)
977 , manageCSet(ManageCSet)
980 if (DetectChildren) {
981 struct dpu_set_t cRank;
982 DPU_RANK_FOREACH (CSet, cRank) {
984 struct dpu_set_t cDpu;
986 DPU_FOREACH (cRank, cDpu) {
989 dpu->_dpus.push_back(dpu);
990 dpu->_ranks.push_back(rank);
991 rank->_dpus.push_back(dpu);
992 _dpus.push_back(dpu);
995 rank->_ranks.push_back(rank);
996 _ranks.push_back(rank);
1002 ostreamPrint(
void *Arg,
const char *Fmt, ...)
1004 std::ostream *LogStream = (std::ostream *)Arg;
1008 if (vasprintf(&str, Fmt, ap) == -1) {
1010 return DPU_ERR_SYSTEM;
1026 struct CallContext {
1028 std::atomic_uint count;
1043 unsigned flags = DPU_CALLBACK_ASYNC;
1045 flags |= DPU_CALLBACK_NONBLOCKING;
1048 flags |= DPU_CALLBACK_SINGLE_CALL;
1051 CallContext *context =
new CallContext;
1052 context->callback = Callback;
1053 context->count = SingleCall ? 1 :
set->_ranks.size();
1055 DpuError::throwOnErr(dpu_callback(cSet, cbWrapper, (
void *)context, (dpu_callback_flags_t)flags));
1069 call(Callback,
true,
false);
1079 DpuError::throwOnErr(dpu_sync(set->cSet));
1092 cbWrapper(
struct dpu_set_t CSet,
unsigned Idx,
void *Arg)
1094 DpuSet dpuSet(CSet,
false,
true);
1095 CallContext *context =
static_cast<CallContext *
>(Arg);
1096 context->callback(dpuSet, Idx);
1097 if (--context->count == 0) {
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:732
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:710
void copy(DpuSymbol &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:489
void copyScatterGather(get_block_t &get_block_info, unsigned Size, const std::string &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:641
std::function< void(DpuSet &, unsigned)> CallbackFn
Function used in DpuSetAsync::call as callback.
Definition: dpu.hpp:151
void copy(const std::string &DstSymbol, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:213
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:186
Exception thrown by the methods of this module.
Definition: dpu.hpp:59
static DpuSet allocateRanks(unsigned NrRanks=ALLOCATE_ALL, const std::string &Profile="")
Allocate a number of DPU ranks with the given profile.
Definition: dpu.hpp:928
DpuProgram load(const std::string &Executable)
Load a DPU program on each DPU of the set.
Definition: dpu.hpp:942
Interface of a DPU set for asynchronous operations.
Definition: dpu.hpp:1023
void copy(DpuSymbol &DstSymbol, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:270
Operations on a DPU set that can be run synchronously or asynchronously.
Definition: dpu.hpp:156
const unsigned ALLOCATE_ALL
Constant used to allocate all available DPUs in DpuSet::allocate and DpuSet::allocateRanks.
Definition: dpu.hpp:46
void copyScatterGather(get_block_t &get_block_info, unsigned Size, const std::string &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:663
std::vector< DpuSet * > & dpus()
Definition: dpu.hpp:891
void copy(DpuSymbol &DstSymbol, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:257
void copy(DpuSymbol &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:476
void copy(const std::string &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:332
DpuSetAsync async()
Definition: dpu.hpp:1105
void copyScatterGather(DpuSymbol &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:521
void copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:804
void copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:785
void copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:679
void copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:820
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:228
void copyScatterGather(const std::string &DstSymbol, unsigned Offset, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:360
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:429
Representation of a symbol in a DPU program.
Definition: dpu.hpp:100
void sync()
Wait for the end of all queued asynchronous operations.
Definition: dpu.hpp:1077
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:285
void exec()
Execute a DPU program.
Definition: dpu.hpp:847
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:307
void copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:695
void copyScatterGather(const std::string &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:398
void copy(std::vector< std::vector< T >> &DstBuffers, const std::string &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:626
void copyScatterGather(const std::string &DstSymbol, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:414
void call(const CallbackFn &Callback)
Call the given function on each DPU rank.
Definition: dpu.hpp:1067
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:171
Contains all that is needed to manage DPUs.
Definition: dpu.hpp:40
void log(std::ostream &LogStream)
Display the DPU logs on the given stream.
Definition: dpu.hpp:955
std::vector< DpuSet * > & ranks()
Definition: dpu.hpp:900
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:451
void call(const CallbackFn &Callback, bool IsBlocking, bool SingleCall)
Call the given function on each DPU rank, or the whole set.
Definition: dpu.hpp:1041
void copyScatterGather(DpuSymbol &DstSymbol, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:551
void copyScatterGather(const std::string &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:382
void copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:503
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:243
void copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:836
A set of DPUs.
Definition: dpu.hpp:869
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, const std::string &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:588
void copy(std::vector< std::vector< T >> &DstBuffers, DpuSymbol &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:746
void copy(std::vector< std::vector< T >> &DstBuffers, const std::string &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:602
DpuSymbol(unsigned Address, unsigned Size)
Construct DPU symbol from explicit address and size.
Definition: dpu.hpp:110
void copy(std::vector< std::vector< T >> &DstBuffers, DpuSymbol &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:770
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, const std::string &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:566
virtual const char * what() const noexcept override
Definition: dpu.hpp:71
Representation of a DPU program.
Definition: dpu.hpp:123
static DpuSet allocate(unsigned NrDpus=ALLOCATE_ALL, const std::string &Profile="")
Allocate a number of DPUs with the given profile.
Definition: dpu.hpp:913
void copy(const std::string &DstSymbol, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:200
void copy(const std::string &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:345
void copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:536