DPU C++ Host API  2025.1.0
dpu.hpp
1 /* Copyright 2020 UPMEM. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
23 #include <atomic>
24 #include <cstdarg>
25 #include <climits>
26 #include <functional>
27 #include <ostream>
28 #include <string>
29 #include <vector>
30 
31 extern "C" {
32 #include <dpu.h>
33 #include <dpu_log_internals.h>
34 #include <dpu_management.h>
35 }
36 
40 namespace dpu {
41 
46 const unsigned ALLOCATE_ALL = DPU_ALLOCATE_ALL;
47 
48 template <class F>
49 static bool
50 __get_block(struct sg_block_info *out, uint32_t dpu_index, uint32_t block_index, void *args)
51 {
52  auto f = static_cast<F *>(args);
53  return (*f)(out, dpu_index, block_index);
54 }
55 
59 class DpuError : public std::exception {
60  friend class DpuProgram;
61  friend class DpuSetOps;
62  friend class DpuSet;
63  friend class DpuSetAsync;
64 
65 public:
70  virtual const char *
71  what() const noexcept override
72  {
73  return msg;
74  }
75 
76 private:
77  dpu_error_t errorId;
78  const char *msg;
79 
80  explicit DpuError(dpu_error_t ErrorId)
81  : errorId(ErrorId)
82  {
83  msg = dpu_error_to_string(errorId);
84  }
85 
86  ~DpuError() { free((void *)msg); }
87 
88  static void
89  throwOnErr(dpu_error_t Error)
90  {
91  if (Error != DPU_OK) {
92  throw DpuError(Error);
93  }
94  }
95 };
96 
100 class DpuSymbol {
101  friend class DpuProgram;
102  friend class DpuSetOps;
103 
104 public:
110  DpuSymbol(unsigned Address, unsigned Size)
111  : cSymbol({ .address = Address, .size = Size })
112  {
113  }
114 
115 private:
116  DpuSymbol() { }
117  struct dpu_symbol_t cSymbol;
118 };
119 
123 class DpuProgram {
124  friend class DpuSet;
125 
126 public:
133  DpuSymbol
134  get(const std::string &SymbolName)
135  {
136  DpuSymbol symbol;
137  DpuError::throwOnErr(dpu_get_symbol(cProgram, SymbolName.c_str(), &symbol.cSymbol));
138  return symbol;
139  }
140 
141 private:
142  struct dpu_program_t *cProgram { nullptr };
143 };
144 
145 class DpuSet;
146 class DpuSetAsync;
147 
151 using CallbackFn = std::function<void(DpuSet &, unsigned)>;
152 
156 class DpuSetOps {
157  friend class DpuSet;
158  friend class DpuSetAsync;
159 
160 public:
169  template <typename T>
170  void
171  copy(const std::string &DstSymbol, unsigned Offset, const std::vector<T> &SrcBuffer, unsigned Size)
172  {
173  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
174  DpuError::throwOnErr(dpu_broadcast_to(cSet, DstSymbol.c_str(), Offset, SrcBuffer.data(), Size, flags));
175  }
176 
184  template <typename T>
185  void
186  copy(const std::string &DstSymbol, unsigned Offset, const std::vector<T> &SrcBuffer)
187  {
188  copy(DstSymbol, Offset, SrcBuffer, SrcBuffer.size() * sizeof(T));
189  }
190 
198  template <typename T>
199  void
200  copy(const std::string &DstSymbol, const std::vector<T> &SrcBuffer, unsigned Size)
201  {
202  copy(DstSymbol, 0, SrcBuffer, Size);
203  }
204 
211  template <typename T>
212  void
213  copy(const std::string &DstSymbol, const std::vector<T> &SrcBuffer)
214  {
215  copy(DstSymbol, 0, SrcBuffer, SrcBuffer.size() * sizeof(T));
216  }
217 
226  template <typename T>
227  void
228  copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector<T> &SrcBuffer, unsigned Size)
229  {
230  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
231  DpuError::throwOnErr(dpu_broadcast_to_symbol(cSet, DstSymbol.cSymbol, Offset, SrcBuffer.data(), Size, flags));
232  }
233 
241  template <typename T>
242  void
243  copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector<T> &SrcBuffer)
244  {
245  copy(DstSymbol, Offset, SrcBuffer, SrcBuffer.size() * sizeof(T));
246  }
247 
255  template <typename T>
256  void
257  copy(DpuSymbol &DstSymbol, const std::vector<T> &SrcBuffer, unsigned Size)
258  {
259  copy(DstSymbol, 0, SrcBuffer, Size);
260  }
261 
268  template <typename T>
269  void
270  copy(DpuSymbol &DstSymbol, const std::vector<T> &SrcBuffer)
271  {
272  copy(DstSymbol, 0, SrcBuffer, SrcBuffer.size() * sizeof(T));
273  }
274 
283  template <typename T>
284  void
285  copy(const std::string &DstSymbol, unsigned Offset, const std::vector<std::vector<T>> &SrcBuffers, unsigned Size)
286  {
287  struct dpu_set_t dpu;
288  unsigned dpuIdx;
289 
290  DPU_FOREACH (cSet, dpu, dpuIdx) {
291  DpuError::throwOnErr(dpu_prepare_xfer(dpu, (void *)SrcBuffers[dpuIdx].data()));
292  }
293 
294  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
295  DpuError::throwOnErr(dpu_push_xfer(cSet, DPU_XFER_TO_DPU, DstSymbol.c_str(), Offset, Size, flags));
296  }
297 
305  template <typename T>
306  void
307  copy(const std::string &DstSymbol, unsigned Offset, const std::vector<std::vector<T>> &SrcBuffers)
308  {
309  if (SrcBuffers.size() == 0) {
310  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
311  }
312 
313  unsigned nrElements = SrcBuffers[0].size();
314  for (auto buf : SrcBuffers) {
315  if (nrElements != buf.size()) {
316  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
317  }
318  }
319 
320  copy(DstSymbol, Offset, SrcBuffers, nrElements * sizeof(T));
321  }
322 
330  template <typename T>
331  void
332  copy(const std::string &DstSymbol, const std::vector<std::vector<T>> &SrcBuffers, unsigned Size)
333  {
334  copy(DstSymbol, 0, SrcBuffers, Size);
335  }
336 
343  template <typename T>
344  void
345  copy(const std::string &DstSymbol, const std::vector<std::vector<T>> &SrcBuffers)
346  {
347  copy(DstSymbol, 0, SrcBuffers);
348  }
349 
359  void
360  copyScatterGather(const std::string &DstSymbol,
361  unsigned Offset,
362  get_block_t &get_block_info,
363  unsigned Size,
364  bool length_check = true)
365  {
366  dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
367  if (!length_check) {
368  flags = static_cast<dpu_sg_xfer_flags_t>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
369  }
370  DpuError::throwOnErr(dpu_push_sg_xfer(cSet, DPU_XFER_TO_DPU, DstSymbol.c_str(), Offset, Size, &get_block_info, flags));
371  }
372 
381  void
382  copyScatterGather(const std::string &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check = true)
383  {
384  copyScatterGather(DstSymbol, 0, get_block_info, Size, length_check);
385  }
386 
396  template <class F>
397  void
398  copyScatterGather(const std::string &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check = true)
399  {
400  get_block_t get_block_info { __get_block<F>, &f, sizeof(f) };
401  copyScatterGather(DstSymbol, Offset, get_block_info, Size, length_check);
402  }
403 
412  template <class F>
413  void
414  copyScatterGather(const std::string &DstSymbol, F f, unsigned Size, bool length_check = true)
415  {
416  copyScatterGather(DstSymbol, 0, f, Size, length_check);
417  }
418 
427  template <typename T>
428  void
429  copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector<std::vector<T>> &SrcBuffers, unsigned Size)
430  {
431  struct dpu_set_t dpu;
432  unsigned dpuIdx;
433 
434  DPU_FOREACH (cSet, dpu, dpuIdx) {
435  DpuError::throwOnErr(dpu_prepare_xfer(dpu, (void *)SrcBuffers[dpuIdx].data()));
436  }
437 
438  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
439  DpuError::throwOnErr(dpu_push_xfer_symbol(cSet, DPU_XFER_TO_DPU, DstSymbol.cSymbol, Offset, Size, flags));
440  }
441 
449  template <typename T>
450  void
451  copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector<std::vector<T>> &SrcBuffers)
452  {
453  if (SrcBuffers.size() == 0) {
454  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
455  }
456 
457  unsigned nrElements = SrcBuffers[0].size();
458  for (auto buf : SrcBuffers) {
459  if (nrElements != buf.size()) {
460  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
461  }
462  }
463 
464  copy(DstSymbol, Offset, SrcBuffers, nrElements * sizeof(T));
465  }
466 
474  template <typename T>
475  void
476  copy(DpuSymbol &DstSymbol, const std::vector<std::vector<T>> &SrcBuffers, unsigned Size)
477  {
478  copy(DstSymbol, 0, SrcBuffers, Size);
479  }
480 
487  template <typename T>
488  void
489  copy(DpuSymbol &DstSymbol, const std::vector<std::vector<T>> &SrcBuffers)
490  {
491  copy(DstSymbol, 0, SrcBuffers);
492  }
493 
502  void
503  copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, get_block_t &get_block_info, unsigned Size, bool length_check = true)
504  {
505  dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
506  if (!length_check) {
507  flags = static_cast<dpu_sg_xfer_flags_t>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
508  }
509  DpuError::throwOnErr(
510  dpu_push_sg_xfer_symbol(cSet, DPU_XFER_TO_DPU, DstSymbol.cSymbol, Offset, Size, &get_block_info, flags));
511  }
512 
520  void
521  copyScatterGather(DpuSymbol &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check = true)
522  {
523  copyScatterGather(DstSymbol, 0, get_block_info, Size, length_check);
524  }
525 
534  template <class F>
535  void
536  copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check = true)
537  {
538  get_block_t get_block_info { __get_block<F>, &f, sizeof(f) };
539  copyScatterGather(DstSymbol, Offset, get_block_info, Size, length_check);
540  }
541 
549  template <class F>
550  void
551  copyScatterGather(DpuSymbol &DstSymbol, F f, unsigned Size, bool length_check = true)
552  {
553  copyScatterGather(DstSymbol, 0, f, Size, length_check);
554  }
555 
564  template <typename T>
565  void
566  copy(std::vector<std::vector<T>> &DstBuffers, unsigned Size, const std::string &SrcSymbol, unsigned Offset)
567  {
568  struct dpu_set_t dpu;
569  unsigned dpuIdx;
570 
571  DPU_FOREACH (cSet, dpu, dpuIdx) {
572  DpuError::throwOnErr(dpu_prepare_xfer(dpu, DstBuffers[dpuIdx].data()));
573  }
574 
575  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
576  DpuError::throwOnErr(dpu_push_xfer(cSet, DPU_XFER_FROM_DPU, SrcSymbol.c_str(), Offset, Size, flags));
577  }
578 
586  template <typename T>
587  void
588  copy(std::vector<std::vector<T>> &DstBuffers, unsigned Size, const std::string &SrcSymbol)
589  {
590  copy(DstBuffers, Size, SrcSymbol, 0);
591  }
592 
600  template <typename T>
601  void
602  copy(std::vector<std::vector<T>> &DstBuffers, const std::string &SrcSymbol, unsigned Offset)
603  {
604  if (DstBuffers.size() == 0) {
605  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
606  }
607 
608  unsigned nrElements = DstBuffers[0].size();
609  for (auto buf : DstBuffers) {
610  if (nrElements != buf.size()) {
611  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
612  }
613  }
614 
615  copy(DstBuffers, nrElements * sizeof(T), SrcSymbol, Offset);
616  }
617 
624  template <typename T>
625  void
626  copy(std::vector<std::vector<T>> &DstBuffers, const std::string &SrcSymbol)
627  {
628  copy(DstBuffers, SrcSymbol, 0);
629  }
630 
640  void
641  copyScatterGather(get_block_t &get_block_info,
642  unsigned Size,
643  const std::string &SrcSymbol,
644  unsigned Offset,
645  bool length_check = true)
646  {
647  dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
648  if (!length_check) {
649  flags = static_cast<dpu_sg_xfer_flags_t>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
650  }
651  DpuError::throwOnErr(dpu_push_sg_xfer(cSet, DPU_XFER_FROM_DPU, SrcSymbol.c_str(), Offset, Size, &get_block_info, flags));
652  }
653 
662  void
663  copyScatterGather(get_block_t &get_block_info, unsigned Size, const std::string &SrcSymbol, bool length_check = true)
664  {
665  copyScatterGather(get_block_info, Size, SrcSymbol, 0, length_check);
666  }
667 
677  template <class F>
678  void
679  copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, unsigned Offset, bool length_check = true)
680  {
681  get_block_t get_block_info { __get_block<F>, &f, sizeof(f) };
682  copyScatterGather(get_block_info, Size, SrcSymbol, Offset, length_check);
683  }
684 
693  template <class F>
694  void
695  copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, bool length_check = true)
696  {
697  copyScatterGather(f, Size, SrcSymbol, 0, length_check);
698  }
699 
708  template <typename T>
709  void
710  copy(std::vector<std::vector<T>> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset)
711  {
712  struct dpu_set_t dpu;
713  unsigned dpuIdx;
714 
715  DPU_FOREACH (cSet, dpu, dpuIdx) {
716  DpuError::throwOnErr(dpu_prepare_xfer(dpu, DstBuffers[dpuIdx].data()));
717  }
718 
719  dpu_xfer_flags_t flags = async ? DPU_XFER_ASYNC : DPU_XFER_DEFAULT;
720  DpuError::throwOnErr(dpu_push_xfer_symbol(cSet, DPU_XFER_FROM_DPU, SrcSymbol.cSymbol, Offset, Size, flags));
721  }
722 
730  template <typename T>
731  void
732  copy(std::vector<std::vector<T>> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol)
733  {
734  copy(DstBuffers, Size, SrcSymbol, 0);
735  }
736 
744  template <typename T>
745  void
746  copy(std::vector<std::vector<T>> &DstBuffers, DpuSymbol &SrcSymbol, unsigned Offset)
747  {
748  if (DstBuffers.size() == 0) {
749  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
750  }
751 
752  unsigned nrElements = DstBuffers[0].size();
753  for (auto buf : DstBuffers) {
754  if (nrElements != buf.size()) {
755  DpuError::throwOnErr(DPU_ERR_INVALID_MEMORY_TRANSFER);
756  }
757  }
758 
759  copy(DstBuffers, nrElements * sizeof(T), SrcSymbol, Offset);
760  }
761 
768  template <typename T>
769  void
770  copy(std::vector<std::vector<T>> &DstBuffers, DpuSymbol &SrcSymbol)
771  {
772  copy(DstBuffers, SrcSymbol, 0);
773  }
774 
784  void
785  copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check = true)
786  {
787  dpu_sg_xfer_flags_t flags = async ? DPU_SG_XFER_ASYNC : DPU_SG_XFER_DEFAULT;
788  if (!length_check) {
789  flags = static_cast<dpu_sg_xfer_flags_t>(flags | DPU_SG_XFER_DISABLE_LENGTH_CHECK);
790  }
791  DpuError::throwOnErr(
792  dpu_push_sg_xfer_symbol(cSet, DPU_XFER_FROM_DPU, SrcSymbol.cSymbol, Offset, Size, &get_block_info, flags));
793  }
794 
803  void
804  copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, bool length_check = true)
805  {
806  copyScatterGather(get_block_info, Size, SrcSymbol, 0, length_check);
807  }
808 
818  template <class F>
819  void
820  copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check = true)
821  {
822  get_block_t get_block_info { __get_block<F>, &f, sizeof(f) };
823  copyScatterGather(get_block_info, Size, SrcSymbol, Offset, length_check);
824  }
825 
834  template <class F>
835  void
836  copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, bool length_check = true)
837  {
838  copyScatterGather(f, Size, SrcSymbol, 0, length_check);
839  }
840 
846  void
848  {
849  dpu_launch_policy_t policy = async ? DPU_ASYNCHRONOUS : DPU_SYNCHRONOUS;
850  DpuError::throwOnErr(dpu_launch(cSet, policy));
851  }
852 
853 private:
854  struct dpu_set_t cSet;
855  bool async;
856 
857  DpuSetOps(const struct dpu_set_t &CSet, bool Async)
858  : cSet(CSet)
859  , async(Async)
860  {
861  }
862 };
863 
869 class DpuSet : public DpuSetOps {
870  friend class DpuSetOps;
871  friend class DpuSetAsync;
872 
873 public:
874  ~DpuSet()
875  {
876  if (manageCSet) {
877  for (auto rank : _ranks) {
878  delete rank;
879  }
880  for (auto dpu : _dpus) {
881  delete dpu;
882  }
883  dpu_free(cSet);
884  }
885  }
886 
890  std::vector<DpuSet *> &
892  {
893  return _dpus;
894  }
895 
899  std::vector<DpuSet *> &
901  {
902  return _ranks;
903  }
904 
912  static DpuSet
913  allocate(unsigned NrDpus = ALLOCATE_ALL, const std::string &Profile = "")
914  {
915  struct dpu_set_t cSet;
916  DpuError::throwOnErr(dpu_alloc(NrDpus, Profile.c_str(), &cSet));
917  return DpuSet(cSet);
918  }
919 
927  static DpuSet
928  allocateRanks(unsigned NrRanks = ALLOCATE_ALL, const std::string &Profile = "")
929  {
930  struct dpu_set_t cSet;
931  DpuError::throwOnErr(dpu_alloc_ranks(NrRanks, Profile.c_str(), &cSet));
932  return DpuSet(cSet);
933  }
934 
941  DpuProgram
942  load(const std::string &Executable)
943  {
944  DpuProgram Program;
945  DpuError::throwOnErr(dpu_load(cSet, Executable.c_str(), &Program.cProgram));
946  return Program;
947  }
948 
954  void
955  log(std::ostream &LogStream)
956  {
957  for (DpuSet *dpuSet : _dpus) {
958  struct dpu_t *dpu = dpu_from_set(dpuSet->cSet);
959  DpuError::throwOnErr(ostreamPrint(&LogStream, DPU_LOG_FORMAT_HEADER));
960  DpuError::throwOnErr(dpulog_read_for_dpu_(dpu, ostreamPrint, &LogStream));
961  }
962  }
963 
968  async();
969 
970 private:
971  bool manageCSet;
972  std::vector<DpuSet *> _dpus;
973  std::vector<DpuSet *> _ranks;
974 
975  DpuSet(struct dpu_set_t CSet, bool ManageCSet = true, bool DetectChildren = true)
976  : DpuSetOps(CSet, false)
977  , manageCSet(ManageCSet)
978  {
979 
980  if (DetectChildren) {
981  struct dpu_set_t cRank;
982  DPU_RANK_FOREACH (CSet, cRank) {
983  DpuSet *rank = new DpuSet(cRank, false, false);
984  struct dpu_set_t cDpu;
985 
986  DPU_FOREACH (cRank, cDpu) {
987  DpuSet *dpu = new DpuSet(cDpu, false, false);
988 
989  dpu->_dpus.push_back(dpu);
990  dpu->_ranks.push_back(rank);
991  rank->_dpus.push_back(dpu);
992  _dpus.push_back(dpu);
993  }
994 
995  rank->_ranks.push_back(rank);
996  _ranks.push_back(rank);
997  }
998  }
999  }
1000 
1001  static dpu_error_t
1002  ostreamPrint(void *Arg, const char *Fmt, ...)
1003  {
1004  std::ostream *LogStream = (std::ostream *)Arg;
1005  char *str;
1006  va_list ap;
1007  va_start(ap, Fmt);
1008  if (vasprintf(&str, Fmt, ap) == -1) {
1009  va_end(ap);
1010  return DPU_ERR_SYSTEM;
1011  }
1012 
1013  *LogStream << str;
1014 
1015  free(str);
1016  return DPU_OK;
1017  }
1018 };
1019 
1023 class DpuSetAsync : public DpuSetOps {
1024  friend class DpuSet;
1025 
1026  struct CallContext {
1027  CallbackFn callback;
1028  std::atomic_uint count;
1029  };
1030 
1031 public:
1040  void
1041  call(const CallbackFn &Callback, bool IsBlocking, bool SingleCall)
1042  {
1043  unsigned flags = DPU_CALLBACK_ASYNC;
1044  if (!IsBlocking) {
1045  flags |= DPU_CALLBACK_NONBLOCKING;
1046  }
1047  if (SingleCall) {
1048  flags |= DPU_CALLBACK_SINGLE_CALL;
1049  }
1050 
1051  CallContext *context = new CallContext;
1052  context->callback = Callback;
1053  context->count = SingleCall ? 1 : set->_ranks.size();
1054 
1055  DpuError::throwOnErr(dpu_callback(cSet, cbWrapper, (void *)context, (dpu_callback_flags_t)flags));
1056  }
1057 
1066  void
1067  call(const CallbackFn &Callback)
1068  {
1069  call(Callback, true, false);
1070  }
1071 
1076  void
1078  {
1079  DpuError::throwOnErr(dpu_sync(set->cSet));
1080  }
1081 
1082 private:
1083  DpuSet *set;
1084 
1085  explicit DpuSetAsync(DpuSet *Set)
1086  : DpuSetOps(Set->cSet, true)
1087  , set(Set)
1088  {
1089  }
1090 
1091  static dpu_error_t
1092  cbWrapper(struct dpu_set_t CSet, unsigned Idx, void *Arg)
1093  {
1094  DpuSet dpuSet(CSet, false, true);
1095  CallContext *context = static_cast<CallContext *>(Arg);
1096  context->callback(dpuSet, Idx);
1097  if (--context->count == 0) {
1098  delete context;
1099  }
1100  return DPU_OK;
1101  }
1102 };
1103 
1104 inline DpuSetAsync
1106 {
1107  return DpuSetAsync(this);
1108 }
1109 
1110 }
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:732
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:710
void copy(DpuSymbol &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:489
void copyScatterGather(get_block_t &get_block_info, unsigned Size, const std::string &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:641
std::function< void(DpuSet &, unsigned)> CallbackFn
Function used in DpuSetAsync::call as callback.
Definition: dpu.hpp:151
void copy(const std::string &DstSymbol, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:213
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:186
Exception thrown by the methods of this module.
Definition: dpu.hpp:59
static DpuSet allocateRanks(unsigned NrRanks=ALLOCATE_ALL, const std::string &Profile="")
Allocate a number of DPU ranks with the given profile.
Definition: dpu.hpp:928
DpuProgram load(const std::string &Executable)
Load a DPU program on each DPU of the set.
Definition: dpu.hpp:942
Interface of a DPU set for asynchronous operations.
Definition: dpu.hpp:1023
void copy(DpuSymbol &DstSymbol, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:270
Operations on a DPU set that can be run synchronously or asynchronously.
Definition: dpu.hpp:156
const unsigned ALLOCATE_ALL
Constant used to allocate all available DPUs in DpuSet::allocate and DpuSet::allocateRanks.
Definition: dpu.hpp:46
void copyScatterGather(get_block_t &get_block_info, unsigned Size, const std::string &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:663
std::vector< DpuSet * > & dpus()
Definition: dpu.hpp:891
void copy(DpuSymbol &DstSymbol, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:257
void copy(DpuSymbol &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:476
void copy(const std::string &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:332
DpuSetAsync async()
Definition: dpu.hpp:1105
void copyScatterGather(DpuSymbol &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:521
void copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:804
void copyScatterGather(get_block_t &get_block_info, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:785
void copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:679
void copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, unsigned Offset, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:820
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:228
void copyScatterGather(const std::string &DstSymbol, unsigned Offset, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:360
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:429
Representation of a symbol in a DPU program.
Definition: dpu.hpp:100
void sync()
Wait for the end of all queued asynchronous operations.
Definition: dpu.hpp:1077
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers, unsigned Size)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:285
void exec()
Execute a DPU program.
Definition: dpu.hpp:847
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:307
void copyScatterGather(F f, unsigned Size, const std::string &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:695
void copyScatterGather(const std::string &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:398
void copy(std::vector< std::vector< T >> &DstBuffers, const std::string &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:626
void copyScatterGather(const std::string &DstSymbol, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:414
void call(const CallbackFn &Callback)
Call the given function on each DPU rank.
Definition: dpu.hpp:1067
void copy(const std::string &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:171
Contains all that is needed to manage DPUs.
Definition: dpu.hpp:40
void log(std::ostream &LogStream)
Display the DPU logs on the given stream.
Definition: dpu.hpp:955
std::vector< DpuSet * > & ranks()
Definition: dpu.hpp:900
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:451
void call(const CallbackFn &Callback, bool IsBlocking, bool SingleCall)
Call the given function on each DPU rank, or the whole set.
Definition: dpu.hpp:1041
void copyScatterGather(DpuSymbol &DstSymbol, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:551
void copyScatterGather(const std::string &DstSymbol, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:382
void copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, get_block_t &get_block_info, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:503
void copy(DpuSymbol &DstSymbol, unsigned Offset, const std::vector< T > &SrcBuffer)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:243
void copyScatterGather(F f, unsigned Size, DpuSymbol &SrcSymbol, bool length_check=true)
Copy data from the DPUs in the set with a scatter-gather transfer.
Definition: dpu.hpp:836
A set of DPUs.
Definition: dpu.hpp:869
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, const std::string &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:588
void copy(std::vector< std::vector< T >> &DstBuffers, DpuSymbol &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:746
void copy(std::vector< std::vector< T >> &DstBuffers, const std::string &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:602
DpuSymbol(unsigned Address, unsigned Size)
Construct DPU symbol from explicit address and size.
Definition: dpu.hpp:110
void copy(std::vector< std::vector< T >> &DstBuffers, DpuSymbol &SrcSymbol)
Copy data from the DPUs in the set.
Definition: dpu.hpp:770
void copy(std::vector< std::vector< T >> &DstBuffers, unsigned Size, const std::string &SrcSymbol, unsigned Offset)
Copy data from the DPUs in the set.
Definition: dpu.hpp:566
virtual const char * what() const noexcept override
Definition: dpu.hpp:71
Representation of a DPU program.
Definition: dpu.hpp:123
static DpuSet allocate(unsigned NrDpus=ALLOCATE_ALL, const std::string &Profile="")
Allocate a number of DPUs with the given profile.
Definition: dpu.hpp:913
void copy(const std::string &DstSymbol, const std::vector< T > &SrcBuffer, unsigned Size)
Copy the same data to all the DPUs in the set.
Definition: dpu.hpp:200
void copy(const std::string &DstSymbol, const std::vector< std::vector< T >> &SrcBuffers)
Copy the different buffers to the DPUs in the set.
Definition: dpu.hpp:345
void copyScatterGather(DpuSymbol &DstSymbol, unsigned Offset, F f, unsigned Size, bool length_check=true)
Copy the different buffers to the DPUs in the set with a scatter/gather transfer. ...
Definition: dpu.hpp:536