VTK-m  2.0
UnionFind.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 
11 #ifndef vtk_m_worklet_connectivity_union_find_h
12 #define vtk_m_worklet_connectivity_union_find_h
13 
15 
16 namespace vtkm
17 {
18 namespace worklet
19 {
20 namespace connectivity
21 {
22 
23 // Reference:
24 // Jayanti, Siddhartha V., and Robert E. Tarjan.
25 // "Concurrent Disjoint Set Union." arXiv preprint arXiv:2003.01203 (2020).
26 class UnionFind
27 {
28 public:
29  // This is the naive findRoot() without path compaction in SV Jayanti et. al.
30  // Since the parents array is read-only in this function, there is no data
31  // race when it is called by multiple treads concurrently. We can just call
32  // Get(). For cases where findRoot() is used in other functions that write
33  // to parents (e.g. Unite()), Get() actually calls Load() with
34  // memory_order_acquire ordering, which in turn ensure writes by other threads
35  // are reflected.
36  template <typename Parents>
37  static VTKM_EXEC vtkm::Id findRoot(const Parents& parents, vtkm::Id index)
38  {
39  while (parents.Get(index) != index)
40  index = parents.Get(index);
41  return index;
42  }
43 
44  template <typename Parents>
45  static VTKM_EXEC void Unite(Parents& parents, vtkm::Id u, vtkm::Id v)
46  {
47  // Data Race Resolutions
48  // Since this function modifies the Union-Find data structure, concurrent
49  // invocation of it by 2 or more threads causes potential data race. Here
50  // is a case analysis why the potential data race does no harm in the
51  // context of the single pass connected component algorithm.
52 
53  // Case 1, Two threads calling Unite(u, v) (and/or Unite(v, u)) concurrently.
54  // Problem: One thread might attach u to v while the other thread attach
55  // v to u, causing a cycle in the Union-Find data structure.
56  // Resolution: This is not so much of a race condition but a problem with
57  // the consistency of the algorithm. This can also happen in serial.
58  // This is resolved by "linking by index" as in SV Jayanti et.al. with less
59  // than as the total order. The two threads will make the same decision on
60  // how to Unite the two trees (e.g. from root with larger id to root with
61  // smaller id.) This avoids cycles in the resulting graph and maintains the
62  // rooted forest structure of Union-Find at the expense of duplicated (but
63  // benign) work.
64 
65  // Case 2, T0 calling Unite(u, v) and T1 calling Unite(u, w) concurrently.
66  // Problem I: There is a potential write after read data race. After T0
67  // calls findRoot() for u but before actually updating the parent of root_u,
68  // T1 might have changed root_u to root_w and made root_u "obsolete".
69  // When the root of the tree to be attached to (e.g. root_u, when root_u < root_v)
70  // is changed, there is no hazard, since we are just attaching a tree to a
71  // now a non-root node, root_u, (thus, root_w <- root_u <- root_v).
72  // However, when the root of the attaching tree (root_v) is changed, it
73  // means that the root_u has been attached to yet some other root_s and became
74  // a non-root node. If we are now attaching this non-root node to root_w we
75  // would leave root_s behind and undoing previous work.
76  // Atomic Load/Store with memory_order_acquire are not able to detect this
77  // data race. While Load sees all previous Stores by other threads, it can not
78  // be aware of any Store after the Load.
79  // Resolution: Use atomic Compare and Swap in a loop when updating root_u.
80  // CAS will check if root of u has been updated by some other thread between
81  // findRoot(u) is called and when root of u is going to be updated. This is
82  // done by comparing the root_u = findRoot(u) to the current value at
83  // parents[root_u]. If they are the same, no data race has happened and the
84  // value in parents[root_u] is updated. However, if root_u != parent[root_u],
85  // it means parent[root_u] has been updated by some other thread. CAS returns
86  // the new value of parent[root_u] (root_s in the Problem description) which
87  // we can use as the new root_u. We keep retrying until there is no more
88  // data race and root_u == root_v i.e. they are in the same component.
89 
90  // Problem II: There is a potential concurrent write data race as it is
91  // possible for the two threads to try to change the same old root to
92  // different new roots, e.g. T0 calls parents.Set(root_u, root_v) while T1
93  // calls parents.Set(root_u, root_w) where root_v < root_u and root_w < root_u
94  // (but the order of root_v and root_w is unspecified.) Each thread assumes
95  // success while the outcome is actually unspecified.
96  // Resolution: Use an atomic Compare and Swap is suggested in SV Janati et. al.
97  // as well as J. Jaiganesht et. al. to resolve the data race. The CAS
98  // checks if the old root is the same as what we expected. If so, there is
99  // no data race, CAS will set the root to the desired new value. The return
100  // value from CAS will equal to our expected old root and signifies a
101  // successful write which terminates the while loop.
102  // If the old root is not what we expected, it has been updated by some
103  // other thread and the update by this thread fails. The root as updated by
104  // the other thread is returned. This returned value would not equal to
105  // our desired new root, signifying the need to retry with the while loop.
106  // We can use this return "new root" as is without calling findRoot() to
107  // find the "new root". The while loop terminates when both u and v have
108  // the same root (thus united).
109  vtkm::Id root_u = UnionFind::findRoot(parents, u);
110  vtkm::Id root_v = UnionFind::findRoot(parents, v);
111 
112  while (root_u != root_v)
113  {
114  // FIXME: we might be executing the loop one extra time than necessary.
115  if (root_u < root_v)
116  parents.CompareExchange(root_v, &root_v, root_u);
117  else if (root_u > root_v)
118  parents.CompareExchange(root_u, &root_u, root_v);
119  }
120  }
121 
122  // This compresses the path from each node to its root thus flattening the
123  // trees and guarantees that the output trees will be rooted stars, i.e.
124  // they all have depth of 1.
125 
126  // There is a "seemly" data race for this function. The root returned by
127  // findRoot() in one thread might become out of date if some other
128  // thread changed it before this function calls parents.Set() making the
129  // result tree not "short" enough and thus calls for a CompareAndSwap retry
130  // loop. However, this data race does not happen for the following reasons:
131  // 1. Since the only way for a root of a tree to be changed is through Unite(),
132  // as long as there is no concurrent invocation of Unite() and Flatten() there
133  // is no data race. This applies even for a compacting findRoot() which can
134  // still only change the parents of non-root nodes.
135  // 2. By the same token, since findRoot() does not change root and most
136  // "damage" parents.Set() can do is resetting root's parent to itself,
137  // the root of a tree can never be changed by this function. Thus, here
138  // is no data race between concurrent invocations of this function.
139 
140  // Since the current findRoot() does not do path compaction, this algorithm
141  // has O(n) depth with O(n^2) of total work on a Parallel Random Access
142  // Machine (PRAM). However, we don't live in a synchronous, infinite number
143  // of processor PRAM world. In reality, since we put "parent pointers" in a
144  // array and all the pointers are pointing from larger indices to smaller
145  // ones, invocation for nodes with smaller ids are mostly likely be scheduled
146  // before and completes earlier than nodes with larger ids. This makes
147  // the "effective" path length shorter for nodes with larger ids.
148  // In this way, concurrency actually helps with algorithm complexity.
149  template <typename Parents>
150  static VTKM_EXEC void Flatten(Parents& parents, vtkm::Id index)
151  {
152  auto root = findRoot(parents, index);
153  parents.Set(index, root);
154  }
155 };
156 
158 {
159 public:
160  using ControlSignature = void(WholeArrayInOut comp);
161  using ExecutionSignature = void(WorkIndex, _1);
162  using InputDomain = _1;
163 
164  template <typename InOutPortalType>
165  VTKM_EXEC void operator()(vtkm::Id index, InOutPortalType& comps) const
166  {
167  UnionFind::Flatten(comps, index);
168  }
169 };
170 
171 } // connectivity
172 } // worklet
173 } // vtkm
174 #endif // vtk_m_worklet_connectivity_union_find_h
vtkm::worklet::connectivity::PointerJumping::ControlSignature
void(WholeArrayInOut comp) ControlSignature
Definition: UnionFind.h:160
VTKM_EXEC
#define VTKM_EXEC
Definition: ExportMacros.h:51
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
WorkletMapField.h
vtkm::worklet::connectivity::UnionFind::Unite
static VTKM_EXEC void Unite(Parents &parents, vtkm::Id u, vtkm::Id v)
Definition: UnionFind.h:45
vtkm::worklet::connectivity::UnionFind
Definition: UnionFind.h:26
vtkm::Id
vtkm::Int32 Id
Represents an ID (index into arrays).
Definition: Types.h:191
vtkm::worklet::connectivity::UnionFind::Flatten
static VTKM_EXEC void Flatten(Parents &parents, vtkm::Id index)
Definition: UnionFind.h:150
vtkm::worklet::connectivity::PointerJumping::ExecutionSignature
void(WorkIndex, _1) ExecutionSignature
Definition: UnionFind.h:161
vtkm::worklet::connectivity::PointerJumping::InputDomain
_1 InputDomain
Definition: UnionFind.h:162
vtkm::worklet::connectivity::PointerJumping
Definition: UnionFind.h:157
vtkm::worklet::connectivity::UnionFind::findRoot
static VTKM_EXEC vtkm::Id findRoot(const Parents &parents, vtkm::Id index)
Definition: UnionFind.h:37
vtkm::worklet::connectivity::PointerJumping::operator()
VTKM_EXEC void operator()(vtkm::Id index, InOutPortalType &comps) const
Definition: UnionFind.h:165
vtkm::worklet::WorkletMapField
Base class for worklets that do a simple mapping of field arrays.
Definition: WorkletMapField.h:38
vtkm::exec::arg::WorkIndex
The ExecutionSignature tag to use to get the work index.
Definition: WorkIndex.h:39