Macros | |
| #define | FORALL_HERE_OVERLOAD(...) |
| Overload. More... | |
| #define | FORALL_OVERLOAD(...) |
Functions | |
| GlobalCompletionEvent & | Grappa::default_gce () |
| template<SyncMode S = SyncMode::Blocking, TaskMode B = TaskMode::Bound, GlobalCompletionEvent * GCE = nullptr, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename F = decltype(nullptr)> | |
| void | Grappa::forall_here (int64_t start, int64_t iters, F loop_body) |
| Grappa::FORALL_HERE_OVERLOAD (TaskMode B, SyncMode S=SyncMode::Blocking, GlobalCompletionEvent *GCE=nullptr, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) | |
| Grappa::FORALL_HERE_OVERLOAD (SyncMode S, GlobalCompletionEvent *GCE, TaskMode B=TaskMode::Bound, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) | |
| Grappa::FORALL_HERE_OVERLOAD (SyncMode S, GlobalCompletionEvent *GCE, int64_t Threshold, TaskMode B=TaskMode::Bound) | |
| Grappa::FORALL_HERE_OVERLOAD (GlobalCompletionEvent *GCE, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) | |
| Grappa::FORALL_HERE_OVERLOAD (int64_t Threshold, GlobalCompletionEvent *GCE=nullptr, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) | |
| Grappa::FORALL_OVERLOAD (TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking, GlobalCompletionEvent *C=&impl::local_gce, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) | |
| Grappa::FORALL_OVERLOAD (SyncMode S, TaskMode B=TaskMode::Bound, GlobalCompletionEvent *C=&impl::local_gce, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) | |
| Grappa::FORALL_OVERLOAD (GlobalCompletionEvent *C, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) | |
| Grappa::FORALL_OVERLOAD (int64_t Threshold, GlobalCompletionEvent *C=&impl::local_gce, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) | |
| Grappa::FORALL_OVERLOAD (TaskMode B, GlobalCompletionEvent *C, SyncMode S=SyncMode::Blocking, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) | |
| template<typename T > | |
| std::pair< Core, Core > | Grappa::cores_with_elements (GlobalAddress< T > base, size_t nelem) |
| Return range of cores that have elements for the given linear address range. More... | |
| template<GlobalCompletionEvent * GCE = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename T = decltype(nullptr), typename F = decltype(nullptr)> | |
| void | Grappa::on_cores_localized_async (GlobalAddress< T > base, int64_t nelems, F do_on_core) |
| Run privateTasks on each core that contains elements of the given region of global memory. More... | |
| template<TaskMode B = TaskMode::Bound, SyncMode S = SyncMode::Blocking, GlobalCompletionEvent * GCE = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename T = decltype(nullptr), typename F = decltype(nullptr)> | |
| void | Grappa::forall (GlobalAddress< T > base, int64_t nelems, F loop_body) |
| Parallel loop over a global array. More... | |
| #define FORALL_HERE_OVERLOAD | ( | ... | ) |
Overload.
Definition at line 191 of file ParallelLoop.hpp.
| #define FORALL_OVERLOAD | ( | ... | ) |
Definition at line 277 of file ParallelLoop.hpp.
| std::pair<Core,Core> Grappa::cores_with_elements | ( | GlobalAddress< T > | base, |
| size_t | nelem | ||
| ) |
Return range of cores that have elements for the given linear address range.
Definition at line 329 of file ParallelLoop.hpp.
|
inline |
Definition at line 60 of file ParallelLoop.hpp.
| void Grappa::forall | ( | GlobalAddress< T > | base, |
| int64_t | nelems, | ||
| F | loop_body | ||
| ) |
Parallel loop over a global array.
Overload for specifying GCE only.
Overload to allow using default GCE but specifying threshold.
Overload for specifying just SyncMode (or SyncMode first)
Spawned from a single core, fans out and runs tasks on elements that are local to each core.
Subject to "may-parallelism",
loop_threshold.Takes an optional pointer to a global static GlobalCompletionEvent as a template parameter to allow for programmer-specified task joining (to potentially allow more than one in flight simultaneously, though this call is itself sync.
takes a lambda/functor that operates on a range of iterations: void(int64_t first_index, int64_t niters, T * first_element)
first_index niters times and get the correct global index because a single task may span more than one block.Example:
Alternatively, forall can take a lambda/functor with signature: void(int64_t index, T& element) (internally wraps this call in a loop and passes to the other version of forall)
This is meant to make it easy to make a loop where you don't care about amortizing anything for a single task. If you would like to do something that will be used by multiple iterations, use the other version of Grappa::forall that takes a lambda that operates on a range.
Example:
Definition at line 498 of file ParallelLoop.hpp.
| void Grappa::forall_here | ( | int64_t | start, |
| int64_t | iters, | ||
| F | loop_body | ||
| ) |
Definition at line 186 of file ParallelLoop.hpp.
| Grappa::FORALL_HERE_OVERLOAD | ( | TaskMode | B, |
| SyncMode | S = SyncMode::Blocking, |
||
| GlobalCompletionEvent * | GCE = nullptr, |
||
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG |
||
| ) |
| Grappa::FORALL_HERE_OVERLOAD | ( | SyncMode | S, |
| GlobalCompletionEvent * | GCE, | ||
| TaskMode | B = TaskMode::Bound, |
||
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG |
||
| ) |
| Grappa::FORALL_HERE_OVERLOAD | ( | SyncMode | S, |
| GlobalCompletionEvent * | GCE, | ||
| int64_t | Threshold, | ||
| TaskMode | B = TaskMode::Bound |
||
| ) |
| Grappa::FORALL_HERE_OVERLOAD | ( | GlobalCompletionEvent * | GCE, |
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG, |
||
| TaskMode | B = TaskMode::Bound, |
||
| SyncMode | S = SyncMode::Blocking |
||
| ) |
| Grappa::FORALL_HERE_OVERLOAD | ( | int64_t | Threshold, |
| GlobalCompletionEvent * | GCE = nullptr, |
||
| TaskMode | B = TaskMode::Bound, |
||
| SyncMode | S = SyncMode::Blocking |
||
| ) |
| Grappa::FORALL_OVERLOAD | ( | TaskMode | B = TaskMode::Bound, |
| SyncMode | S = SyncMode::Blocking, |
||
| GlobalCompletionEvent * | C = &impl::local_gce, |
||
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG |
||
| ) |
| Grappa::FORALL_OVERLOAD | ( | SyncMode | S, |
| TaskMode | B = TaskMode::Bound, |
||
| GlobalCompletionEvent * | C = &impl::local_gce, |
||
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG |
||
| ) |
| Grappa::FORALL_OVERLOAD | ( | GlobalCompletionEvent * | C, |
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG, |
||
| TaskMode | B = TaskMode::Bound, |
||
| SyncMode | S = SyncMode::Blocking |
||
| ) |
| Grappa::FORALL_OVERLOAD | ( | int64_t | Threshold, |
| GlobalCompletionEvent * | C = &impl::local_gce, |
||
| TaskMode | B = TaskMode::Bound, |
||
| SyncMode | S = SyncMode::Blocking |
||
| ) |
| Grappa::FORALL_OVERLOAD | ( | TaskMode | B, |
| GlobalCompletionEvent * | C, | ||
| SyncMode | S = SyncMode::Blocking, |
||
| int64_t | Threshold = impl::USE_LOOP_THRESHOLD_FLAG |
||
| ) |
| void Grappa::on_cores_localized_async | ( | GlobalAddress< T > | base, |
| int64_t | nelems, | ||
| F | do_on_core | ||
| ) |
Run privateTasks on each core that contains elements of the given region of global memory.
do_on_core: void(T* local_base, size_t nlocal) Internally creates privateTask with 2*8-byte words, so do_on_core can be 8 bytes and not cause heap allocation.
Definition at line 363 of file ParallelLoop.hpp.