VTK-m  2.0
ArrayPortalFromThrust.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_exec_cuda_internal_ArrayPortalFromThrust_h
11 #define vtk_m_exec_cuda_internal_ArrayPortalFromThrust_h
12 
13 #include <vtkm/Types.h>
15 
16 #include <iterator>
17 #include <type_traits>
18 
20 VTKM_THIRDPARTY_PRE_INCLUDE
21 #include <thrust/system/cuda/memory.h>
22 VTKM_THIRDPARTY_POST_INCLUDE
23 
24 namespace vtkm
25 {
26 namespace exec
27 {
28 namespace cuda
29 {
30 namespace internal
31 {
32 
33 // The clang-format rules want to put the curly braces on separate lines. Since
34 // these declarations are a type-level truth table, minimize the amount of
35 // space it takes up.
36 // clang-format off
37 template <typename T> struct UseScalarTextureLoad : public std::false_type {};
38 template <typename T> struct UseVecTextureLoads : public std::false_type {};
39 template <typename T> struct UseMultipleScalarTextureLoads : public std::false_type {};
40 
41 //currently CUDA doesn't support texture loading of signed char's so that is why
42 //you don't see vtkm::Int8 in any of the lists.
43 template <> struct UseScalarTextureLoad<const vtkm::UInt8> : std::true_type {};
44 template <> struct UseScalarTextureLoad<const vtkm::Int16> : std::true_type {};
45 template <> struct UseScalarTextureLoad<const vtkm::UInt16> : std::true_type {};
46 template <> struct UseScalarTextureLoad<const vtkm::Int32> : std::true_type {};
47 template <> struct UseScalarTextureLoad<const vtkm::UInt32> : std::true_type {};
48 template <> struct UseScalarTextureLoad<const vtkm::Float32> : std::true_type {};
49 template <> struct UseScalarTextureLoad<const vtkm::Float64> : std::true_type {};
50 
51 //CUDA needs vec types converted to CUDA types ( float2, uint2), so we have a special
52 //case for these vec texture loads.
53 template <> struct UseVecTextureLoads<const vtkm::Vec2i_32> : std::true_type {};
54 template <> struct UseVecTextureLoads<const vtkm::Vec2ui_32> : std::true_type {};
55 template <> struct UseVecTextureLoads<const vtkm::Vec2f_32> : std::true_type {};
56 template <> struct UseVecTextureLoads<const vtkm::Vec2f_64> : std::true_type {};
57 
58 template <> struct UseVecTextureLoads<const vtkm::Vec4i_32> : std::true_type {};
59 template <> struct UseVecTextureLoads<const vtkm::Vec4ui_32> : std::true_type {};
60 template <> struct UseVecTextureLoads<const vtkm::Vec4f_32> : std::true_type {};
61 
62 //CUDA doesn't support loading 3 wide values through a texture unit by default,
63 //so instead we fetch through texture three times and store the result
64 //currently CUDA doesn't support texture loading of signed char's so that is why
65 //you don't see vtkm::Int8 in any of the lists.
66 
67 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec2ui_8> : std::true_type {};
68 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec2i_16> : std::true_type {};
69 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec2ui_16> : std::true_type {};
70 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec2i_64> : std::true_type {};
71 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec2ui_64> : std::true_type {};
72 
73 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3ui_8> : std::true_type {};
74 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3i_16> : std::true_type {};
75 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3ui_16> : std::true_type {};
76 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3i_32> : std::true_type {};
77 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3ui_32> : std::true_type {};
78 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3f_32> : std::true_type {};
79 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec3f_64> : std::true_type {};
80 
81 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4ui_8> : std::true_type {};
82 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4i_16> : std::true_type {};
83 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4ui_16> : std::true_type {};
84 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4i_64> : std::true_type {};
85 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4ui_64> : std::true_type {};
86 template <> struct UseMultipleScalarTextureLoads<const vtkm::Vec4f_64> : std::true_type {};
87 // clang-format on
88 
89 //this T type is not one that is valid to be loaded through texture memory
90 template <typename T, typename Enable = void>
91 struct load_through_texture
92 {
93  static constexpr vtkm::IdComponent WillUseTexture = 0;
94 
95  __device__ static T get(const T* const data) { return *data; }
96 };
97 
98 //only load through a texture if we have sm 35 support
99 
100 // this T type is valid to be loaded through a single texture memory fetch
101 template <typename T>
102 struct load_through_texture<T, typename std::enable_if<UseScalarTextureLoad<const T>::value>::type>
103 {
104 
105  static constexpr vtkm::IdComponent WillUseTexture = 1;
106 
107  __device__ static T get(const T* const data)
108  {
109 #if __CUDA_ARCH__ >= 350
110  // printf("__CUDA_ARCH__ UseScalarTextureLoad");
111  return __ldg(data);
112 #else
113  return *data;
114 #endif
115  }
116 };
117 
118 // this T type is valid to be loaded through a single vec texture memory fetch
119 template <typename T>
120 struct load_through_texture<T, typename std::enable_if<UseVecTextureLoads<const T>::value>::type>
121 {
122  static constexpr vtkm::IdComponent WillUseTexture = 1;
123 
124  __device__ static T get(const T* const data)
125  {
126 #if __CUDA_ARCH__ >= 350
127  // printf("__CUDA_ARCH__ UseVecTextureLoads");
128  return getAs(data);
129 #else
130  return *data;
131 #endif
132  }
133 
134  __device__ static vtkm::Vec2i_32 getAs(const vtkm::Vec2i_32* const data)
135  {
136  const int2 temp = __ldg((const int2*)data);
137  return vtkm::Vec2i_32(temp.x, temp.y);
138  }
139 
140  __device__ static vtkm::Vec2ui_32 getAs(const vtkm::Vec2ui_32* const data)
141  {
142  const uint2 temp = __ldg((const uint2*)data);
143  return vtkm::Vec2ui_32(temp.x, temp.y);
144  }
145 
146  __device__ static vtkm::Vec4i_32 getAs(const vtkm::Vec4i_32* const data)
147  {
148  const int4 temp = __ldg((const int4*)data);
149  return vtkm::Vec4i_32(temp.x, temp.y, temp.z, temp.w);
150  }
151 
152  __device__ static vtkm::Vec4ui_32 getAs(const vtkm::Vec4ui_32* const data)
153  {
154  const uint4 temp = __ldg((const uint4*)data);
155  return vtkm::Vec4ui_32(temp.x, temp.y, temp.z, temp.w);
156  }
157 
158  __device__ static vtkm::Vec2f_32 getAs(const vtkm::Vec2f_32* const data)
159  {
160  const float2 temp = __ldg((const float2*)data);
161  return vtkm::Vec2f_32(temp.x, temp.y);
162  }
163 
164  __device__ static vtkm::Vec4f_32 getAs(const vtkm::Vec4f_32* const data)
165  {
166  const float4 temp = __ldg((const float4*)data);
167  return vtkm::Vec4f_32(temp.x, temp.y, temp.z, temp.w);
168  }
169 
170  __device__ static vtkm::Vec2f_64 getAs(const vtkm::Vec2f_64* const data)
171  {
172  const double2 temp = __ldg((const double2*)data);
173  return vtkm::Vec2f_64(temp.x, temp.y);
174  }
175 };
176 
177 //this T type is valid to be loaded through multiple texture memory fetches
178 template <typename T>
179 struct load_through_texture<
180  T,
181  typename std::enable_if<UseMultipleScalarTextureLoads<const T>::value>::type>
182 {
183  static constexpr vtkm::IdComponent WillUseTexture = 1;
184 
185  using NonConstT = typename std::remove_const<T>::type;
186 
187  __device__ static T get(const T* const data)
188  {
189 #if __CUDA_ARCH__ >= 350
190  // printf("__CUDA_ARCH__ UseMultipleScalarTextureLoads");
191  return getAs(data);
192 #else
193  return *data;
194 #endif
195  }
196 
197  __device__ static T getAs(const T* const data)
198  {
199  //we need to fetch each component individually
200  const vtkm::IdComponent NUM_COMPONENTS = T::NUM_COMPONENTS;
201  using ComponentType = typename T::ComponentType;
202  const ComponentType* recasted_data = (const ComponentType*)(data);
203  NonConstT result;
204 #pragma unroll
205  for (vtkm::IdComponent i = 0; i < NUM_COMPONENTS; ++i)
206  {
207  result[i] = __ldg(recasted_data + i);
208  }
209  return result;
210  }
211 };
212 
213 class ArrayPortalFromThrustBase
214 {
215 };
216 
220 template <typename T>
221 class ArrayPortalFromThrust : public ArrayPortalFromThrustBase
222 {
223 public:
224  using ValueType = T;
225  using IteratorType = T*;
226  using difference_type = std::ptrdiff_t;
227 
228  VTKM_EXEC_CONT ArrayPortalFromThrust() {}
229 
230  VTKM_CONT
231  ArrayPortalFromThrust(IteratorType begin, IteratorType end)
232  : BeginIterator(begin)
233  , EndIterator(end)
234  {
235  }
236 
241  template <typename OtherT>
242  VTKM_EXEC_CONT ArrayPortalFromThrust(const ArrayPortalFromThrust<OtherT>& src)
243  : BeginIterator(src.GetIteratorBegin())
244  , EndIterator(src.GetIteratorEnd())
245  {
246  }
247 
249  vtkm::Id GetNumberOfValues() const
250  {
251  // Not using std::distance because on CUDA it cannot be used on a device.
252  return static_cast<vtkm::Id>((this->EndIterator - this->BeginIterator));
253  }
254 
256  ValueType Get(vtkm::Id index) const
257  {
258  return *(this->BeginIterator + static_cast<difference_type>(index));
259  }
260 
262  void Set(vtkm::Id index, ValueType value) const
263  {
264  *(this->BeginIterator + static_cast<difference_type>(index)) = value;
265  }
266 
268  IteratorType GetIteratorBegin() const { return this->BeginIterator; }
269 
271  IteratorType GetIteratorEnd() const { return this->EndIterator; }
272 
273 private:
274  IteratorType BeginIterator;
275  IteratorType EndIterator;
276 };
277 
278 template <typename T>
279 class ConstArrayPortalFromThrust : public ArrayPortalFromThrustBase
280 {
281 public:
282  using ValueType = T;
283  using IteratorType = const T*;
284  using difference_type = std::ptrdiff_t;
285 
286  VTKM_EXEC_CONT ConstArrayPortalFromThrust()
287  : BeginIterator(nullptr)
288  , EndIterator(nullptr)
289  {
290  }
291 
292  VTKM_CONT
293  ConstArrayPortalFromThrust(IteratorType begin, IteratorType end)
294  : BeginIterator(begin)
295  , EndIterator(end)
296  {
297  // printf("ConstArrayPortalFromThrust() %s \n", __PRETTY_FUNCTION__ );
298  }
299 
304  // template<typename OtherT>
306  ConstArrayPortalFromThrust(const ArrayPortalFromThrust<T>& src)
307  : BeginIterator(src.GetIteratorBegin())
308  , EndIterator(src.GetIteratorEnd())
309  {
310  }
311 
313  vtkm::Id GetNumberOfValues() const
314  {
315  // Not using std::distance because on CUDA it cannot be used on a device.
316  return static_cast<vtkm::Id>((this->EndIterator - this->BeginIterator));
317  }
318 
319 //The VTKM_CUDA_DEVICE_PASS define makes sure that the device only signature
320 //only shows up for the device compilation. This allows the nvcc compiler
321 //to have separate host and device code paths for the same method. This
322 //solves the problem of trying to call a device only method from a
323 //device/host method
324 #ifdef VTKM_CUDA_DEVICE_PASS
325  __device__ ValueType Get(vtkm::Id index) const
326  {
328  index);
329  }
330 
331  __device__ void Set(vtkm::Id vtkmNotUsed(index), ValueType vtkmNotUsed(value)) const {}
332 
333 #else
334  ValueType Get(vtkm::Id vtkmNotUsed(index)) const { return ValueType(); }
335 
336  void Set(vtkm::Id vtkmNotUsed(index), ValueType vtkmNotUsed(value)) const
337  {
338 #if !(defined(VTKM_MSVC) && defined(VTKM_CUDA))
339  VTKM_ASSERT(true && "Cannot set to const array.");
340 #endif
341  }
342 #endif
343 
345  IteratorType GetIteratorBegin() const { return this->BeginIterator; }
346 
348  IteratorType GetIteratorEnd() const { return this->EndIterator; }
349 
350 private:
351  IteratorType BeginIterator;
352  IteratorType EndIterator;
353 };
354 }
355 }
356 }
357 } // namespace vtkm::exec::cuda::internal
358 
359 #endif //vtk_m_exec_cuda_internal_ArrayPortalFromThrust_h
vtkm::get
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT auto get(const vtkm::Tuple< Ts... > &tuple) -> decltype(vtkm::Get< static_cast< vtkm::IdComponent >(Index)>(tuple))
Compatible with std::get for vtkm::Tuple.
Definition: Tuple.h:101
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
vtkm::Vec4i_32
vtkm::Vec< vtkm::Int32, 4 > Vec4i_32
Vec4i_32 corresponds to a 4-dimensional vector of 32-bit integer values.
Definition: Types.h:1166
Types.h
VTKM_ASSERT
#define VTKM_ASSERT(condition)
Definition: Assert.h:43
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::Get
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT auto Get(const vtkm::Tuple< Ts... > &tuple) -> decltype(tuple.template Get< Index >())
Retrieve the object from a vtkm::Tuple at the given index.
Definition: Tuple.h:83
vtkm::IdComponent
vtkm::Int32 IdComponent
Represents a component ID (index of component in a vector).
Definition: Types.h:168
ArrayPortalToIterators.h
vtkm::Vec2ui_32
vtkm::Vec< vtkm::UInt32, 2 > Vec2ui_32
Vec2ui_32 corresponds to a 2-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:966
vtkm::Int16
int16_t Int16
Definition: Types.h:158
vtkm::Vec4f_32
vtkm::Vec< vtkm::Float32, 4 > Vec4f_32
Vec4f_32 corresponds to a 4-dimensional vector of 32-bit floating point values.
Definition: Types.h:1136
vtkm::Vec2i_32
vtkm::Vec< vtkm::Int32, 2 > Vec2i_32
Vec2i_32 corresponds to a 2-dimensional vector of 32-bit integer values.
Definition: Types.h:932
vtkm::Id
vtkm::Int32 Id
Represents an ID (index into arrays).
Definition: Types.h:191
ThrustPatches.h
vtkm::Vec2f_32
vtkm::Vec< vtkm::Float32, 2 > Vec2f_32
Vec2f_32 corresponds to a 2-dimensional vector of 32-bit floating point values.
Definition: Types.h:902
vtkm::Vec2f_64
vtkm::Vec< vtkm::Float64, 2 > Vec2f_64
Vec2f_64 corresponds to a 2-dimensional vector of 64-bit floating point values.
Definition: Types.h:908
VTKM_CONT
#define VTKM_CONT
Definition: ExportMacros.h:57
vtkm::UInt8
uint8_t UInt8
Definition: Types.h:157
vtkmNotUsed
#define vtkmNotUsed(parameter_name)
Simple macro to identify a parameter as unused.
Definition: ExportMacros.h:128
vtkm::Vec
A short fixed-length array.
Definition: Types.h:767
vtkm::UInt32
uint32_t UInt32
Definition: Types.h:161
vtkm::Float32
float Float32
Definition: Types.h:154
vtkm::Int32
int32_t Int32
Definition: Types.h:160
vtkm::Float64
double Float64
Definition: Types.h:155
vtkm::UInt16
uint16_t UInt16
Definition: Types.h:159
vtkm::Vec4ui_32
vtkm::Vec< vtkm::UInt32, 4 > Vec4ui_32
Vec4ui_32 corresponds to a 4-dimensional vector of 32-bit unsigned integer values.
Definition: Types.h:1200