array
C++ library for multi-dimensional arrays
ein_reduce.h
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
19 #ifndef NDARRAY_EIN_REDUCE_H
20 #define NDARRAY_EIN_REDUCE_H
21 
22 #include "array/array.h"
23 
24 namespace nda {
25 
26 namespace internal {
27 
28 // TODO: Find a way to enable operations with non-op types? e.g. scalars.
29 template <class T>
30 using enable_if_ein_op =
31  std::enable_if_t<std::is_same<typename T::is_ein_op, std::true_type>::value>;
32 
33 template <class T>
34 using enable_if_ein_assign =
35  std::enable_if_t<std::is_same<typename T::is_assign, std::true_type>::value>;
36 
37 // Briefly, the high level goal of the next few classes is to enable construction of
38 // expressions describing Einstein summations or other reductions. This is done using
39 // a small expression template system. Normally, expression templates are troublesome
40 // due to overwhemling the compiler's ability to do CSE and other optimziations. In
41 // the case of Einstein reductions, the expressions will usually be very small...
42 
43 template <class Derived>
44 struct ein_op_base {
45  using is_ein_op = std::true_type;
46  using is_assign = std::false_type;
47 
48  // We need to be able to get the derived type when creating binary operations using
49  // this operation as an operand.
50  const Derived& derived() const { return *static_cast<const Derived*>(this); }
51 
52  auto operator-() const { return make_ein_op_negate(derived()); }
53  template <class T, class = enable_if_ein_op<T>>
54  auto operator+(const T& r) const {
55  return make_ein_op_add(derived(), r);
56  }
57  template <class T, class = enable_if_ein_op<T>>
58  auto operator-(const T& r) const {
59  return make_ein_op_sub(derived(), r);
60  }
61  template <class T, class = enable_if_ein_op<T>>
62  auto operator*(const T& r) const {
63  return make_ein_op_mul(derived(), r);
64  }
65  template <class T, class = enable_if_ein_op<T>>
66  auto operator/(const T& r) const {
67  return make_ein_op_div(derived(), r);
68  }
69 };
70 
71 // A leaf operand of an Einstein reduction expression. The Is... indicate the
72 // dimension of the reduction to use to address this operand.
73 template <class Op, size_t... Is>
74 struct ein_op : public ein_op_base<ein_op<Op, Is...>> {
75  Op op;
76  ein_op(Op op) : op(std::move(op)) {}
77 
78  // The largest dimension used by this operand.
79  static constexpr index_t MaxIndex = sizeof...(Is) == 0 ? -1 : variadic_max(Is...);
80 
81  // auto doesn't work here because it doesn't include the reference type of operator() when we
82  // need it, but it writing it includes it when we can't, e.g. if op(...) doesn't return a
83  // reference.
84  template <class Idx>
85  NDARRAY_INLINE decltype(op(Is...)) operator()(const Idx& i) const {
86  return op(std::get<Is>(i)...);
87  }
88 
89  // Only ein_op has assignment operators.
90  template <class T, class = enable_if_ein_op<T>>
91  auto operator=(const T& r) const {
92  return make_ein_op_assign(*this, r);
93  }
94  template <class T, class = enable_if_ein_op<T>>
95  auto operator+=(const T& r) const {
96  return make_ein_op_add_assign(*this, r);
97  }
98  template <class T, class = enable_if_ein_op<T>>
99  auto operator-=(const T& r) const {
100  return make_ein_op_sub_assign(*this, r);
101  }
102  template <class T, class = enable_if_ein_op<T>>
103  auto operator*=(const T& r) const {
104  return make_ein_op_mul_assign(*this, r);
105  }
106 };
107 
108 // A unary operation on an Einstein operand.
109 template <class Op, class Derived>
110 struct ein_unary_op : public ein_op_base<Derived> {
111  Op op;
112  ein_unary_op(const Op& op) : op(op) {}
113  static constexpr index_t MaxIndex = Op::MaxIndex;
114 };
115 
116 // Unary negate.
117 template <class Op>
118 struct ein_negate_op : public ein_unary_op<Op, ein_negate_op<Op>> {
119  using base = ein_unary_op<Op, ein_negate_op<Op>>;
120  ein_negate_op(const Op& op) : base(op) {}
121  template <class Idx>
122  NDARRAY_INLINE auto operator()(const Idx& i) const {
123  return -base::op(i);
124  }
125 };
126 
127 template <class Op>
128 auto make_ein_op_negate(const Op& op) {
129  return ein_negate_op<Op>(op);
130 }
131 
132 // Cast to a different type.
133 template <class Type, class Op>
134 struct ein_cast_op : public ein_unary_op<Op, ein_cast_op<Type, Op>> {
135  using base = ein_unary_op<Op, ein_cast_op<Type, Op>>;
136  ein_cast_op(const Op& op) : base(op) {}
137  template <class Idx>
138  NDARRAY_INLINE auto operator()(const Idx& i) const {
139  return static_cast<Type>(base::op(i));
140  }
141 };
142 
143 // A binary operation of two operands.
144 template <class OpA, class OpB, class Derived>
145 struct ein_binary_op : public ein_op_base<Derived> {
146  OpA op_a;
147  OpB op_b;
148  ein_binary_op(const OpA& a, const OpB& b) : op_a(a), op_b(b) {}
149  static constexpr index_t MaxIndex = internal::max(OpA::MaxIndex, OpB::MaxIndex);
150 };
151 
152 #define NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op) \
153  template <class OpA, class OpB> \
154  auto make_##name(const OpA& a, const OpB& b) { \
155  return name<OpA, OpB>(a, b); \
156  }
157 
158 #define NDARRAY_MAKE_EIN_BINARY_OP(name, op, is_assign_) \
159  template <class OpA, class OpB> \
160  struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> { \
161  using base = ein_binary_op<OpA, OpB, name>; \
162  name(const OpA& a, const OpB& b) : base(a, b) {} \
163  using is_assign = is_assign_; \
164  template <class Idx> \
165  NDARRAY_INLINE auto operator()(const Idx& i) const { \
166  return base::op_a(i) op base::op_b(i); \
167  } \
168  }; \
169  NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)
170 
171 #define NDARRAY_MAKE_EIN_BINARY_FN(name, fn) \
172  template <class OpA, class OpB> \
173  struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> { \
174  using base = ein_binary_op<OpA, OpB, name>; \
175  name(const OpA& a, const OpB& b) : base(a, b) {} \
176  template <class Idx> \
177  NDARRAY_INLINE auto operator()(const Idx& i) const { \
178  using internal::min; \
179  using internal::max; \
180  return fn(base::op_a(i), base::op_b(i)); \
181  } \
182  }; \
183  NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)
184 
185 // Define the expression types for the operations we support.
186 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add, +, std::false_type);
187 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub, -, std::false_type);
188 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul, *, std::false_type);
189 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_div, /, std::false_type);
190 NDARRAY_MAKE_EIN_BINARY_FN(ein_op_min, min);
191 NDARRAY_MAKE_EIN_BINARY_FN(ein_op_max, max);
192 
193 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_assign, =, std::true_type);
194 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add_assign, +=, std::true_type);
195 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub_assign, -=, std::true_type);
196 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul_assign, *=, std::true_type);
197 
198 #undef NDARRAY_MAKE_EIN_BINARY_FN
199 #undef NDARRAY_MAKE_EIN_BINARY_OP
200 #undef NDARRAY_MAKE_EIN_BINARY_HELPERS
201 
202 } // namespace internal
203 
206 template <class Type, class Op>
207 auto cast(const internal::ein_op_base<Op>& op) {
208  return internal::ein_cast_op<Type, Op>(op.derived());
209 }
210 
212 template <class OpA, class OpB>
213 auto min(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
214  return internal::make_ein_op_min(a.derived(), b.derived());
215 }
216 template <class OpA, class OpB>
217 auto max(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
218  return internal::make_ein_op_max(a.derived(), b.derived());
219 }
220 
221 namespace internal {
222 
223 // Let these be found via ADL too.
224 using nda::cast;
225 using nda::max;
226 using nda::min;
227 
228 // If multiple operands provide the same dim, we need to reconcile them
229 // to one dim. If you follow a compiler or runtime error here, your
230 // Einstein expression tries to address two dimensions that have different
231 // bounds with the same loop variable.
232 // TODO: It would be nice if this error would appear when constructing the
233 // Einstein expression when possible, but that's really hard to do.
234 template <class Dim0, class... Dims,
235  class = std::enable_if_t<!any(not_equal(Dim0::Min, Dims::Min)...)>,
236  class = std::enable_if_t<!any(not_equal(Dim0::Extent, Dims::Extent)...)>>
237 NDARRAY_INLINE const Dim0& reconcile_dim(const Dim0& dim0, const Dims&... dims) {
238  if (dim0.stride() != 0) {
239  // If the first dim is an output dimension, just require the other dims
240  // to be in-bounds. This is a slightly relaxed requirement compared to the
241  // compile-time constraints.
242  assert(all(dims.is_in_range(dim0)...));
243  } else {
244  // If this is a reduction dimension, all of the dims must match. For
245  // example, this is the constraint that checks that the inner dimensions of
246  // a matrix multiplication are compatible.
247  assert(all(dim0.min() == dims.min()...));
248  assert(all(dim0.extent() == dims.extent()...));
249  }
250  return dim0;
251 }
252 // If we have zero dims, the user skipped a dim index, so we need a dummy
253 // loop.
254 NDARRAY_INLINE dim<0, 1, 0> reconcile_dim() { return {}; }
255 
256 template <class... Dims, size_t... Is>
257 NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
258  return reconcile_dim(std::get<Is>(dims)...);
259 }
260 template <class... Dims>
261 NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims) {
262  return reconcile_dim(dims, make_index_sequence<sizeof...(Dims)>());
263 }
264 
265 // Get the shape of an ein_reduce operand, or an empty shape if not an array.
266 template <class T, class Shape>
267 NDARRAY_INLINE const auto& dims_of(const array_ref<T, Shape>& op) {
268  return op.shape().dims();
269 }
270 template <class T>
271 NDARRAY_INLINE std::tuple<> dims_of(const T& op) {
272  return std::tuple<>();
273 }
274 
275 // Helper to reinterpret a dim with a new stride.
276 template <index_t NewStride, index_t Min, index_t Extent, index_t Stride>
277 NDARRAY_INLINE auto with_stride(const dim<Min, Extent, Stride>& d) {
278  return dim<Min, Extent, NewStride>(d.min(), d.extent());
279 }
280 template <index_t NewStride, class Dim>
281 NDARRAY_INLINE auto with_stride(const std::tuple<Dim>& maybe_dim) {
282  return std::make_tuple(with_stride<NewStride>(std::get<0>(maybe_dim)));
283 }
284 template <index_t NewStride>
285 NDARRAY_INLINE std::tuple<> with_stride(std::tuple<> maybe_dim) {
286  return maybe_dim;
287 }
288 
289 // These types are flags that let us overload behavior based on these 3 options.
290 class is_inferred_shape {};
291 class is_result_shape {};
292 class is_operand_shape {};
293 
294 // Get a dim from an operand, depending on the intended use of the shape.
295 template <size_t Dim, class Dims, size_t... Is>
296 NDARRAY_INLINE auto gather_dims(is_result_shape, const ein_op<Dims, Is...>& op) {
297  // If this is part of the result, we want to keep its strides.
298  return get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op));
299 }
300 template <size_t Dim, class Dims, size_t... Is>
301 NDARRAY_INLINE auto gather_dims(is_inferred_shape, const ein_op<Dims, Is...>& op) {
302  // For inferred shapes, we want shapes without any constexpr strides, so it can be reshaped.
303  return with_stride<dynamic>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
304 }
305 template <size_t Dim, class Dims, size_t... Is>
306 NDARRAY_INLINE auto gather_dims(is_operand_shape, const ein_op<Dims, Is...>& op) {
307  // If this is an operand shape, we want all of its dimensions to be stride 0.
308  return with_stride<0>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
309 }
310 
311 template <size_t Dim, class Kind, class Op, class X>
312 NDARRAY_INLINE auto gather_dims(Kind kind, const ein_unary_op<Op, X>& op) {
313  return gather_dims<Dim>(kind, op.op);
314 }
315 template <size_t Dim, class Kind, class OpA, class OpB, class X>
316 NDARRAY_INLINE auto gather_dims(Kind kind, const ein_binary_op<OpA, OpB, X>& op) {
317  return std::tuple_cat(gather_dims<Dim>(kind, op.op_a), gather_dims<Dim>(kind, op.op_b));
318 }
319 
320 template <size_t Dim, class Kind0, class Op0, class Kind1, class Op1>
321 NDARRAY_INLINE auto gather_dims(Kind0 kind0, const Op0& op0, Kind1 kind1, const Op1& op1) {
322  return std::tuple_cat(gather_dims<Dim>(kind0, op0), gather_dims<Dim>(kind1, op1));
323 }
324 template <size_t... Is, class... KindAndOps>
325 NDARRAY_UNIQUE auto make_ein_reduce_shape(
326  index_sequence<Is...>, const KindAndOps&... kind_and_ops) {
327  return make_shape(reconcile_dim(gather_dims<Is>(kind_and_ops...))...);
328 }
329 
330 } // namespace internal
331 
338 template <size_t... Is, class Op, class = internal::enable_if_callable<Op, decltype(Is)...>>
339 auto ein(Op op) {
340  return internal::ein_op<Op, Is...>(std::move(op));
341 }
342 template <size_t... Is, class T, class Shape, class Alloc,
343  class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
344 auto ein(array<T, Shape, Alloc>& op) {
345  return ein<Is...>(op.ref());
346 }
347 template <size_t... Is, class T, class Shape, class Alloc,
348  class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
349 auto ein(const array<T, Shape, Alloc>& op) {
350  return ein<Is...>(op.ref());
351 }
352 
358 template <class T>
359 auto ein(T& scalar) {
360  return ein<>(array_ref<T, shape<>>(&scalar, {}));
361 }
362 
364 template <size_t I0, class T>
365 auto ein(T* x, size_t N) {
366  return ein<I0>(make_array_ref(&x[0], shape<dim<0, dynamic, 1>>(static_cast<index_t>(N))));
367 }
368 
370 template <size_t I0, class T, size_t N>
371 auto ein(T (&x)[N]) {
372  return ein<I0>(make_array_ref(&x[0], shape<dim<0, N, 1>>()));
373 }
374 
418 template <class Expr, class = internal::enable_if_ein_assign<Expr>>
419 NDARRAY_INLINE auto ein_reduce(const Expr& expr) {
420  constexpr index_t LoopRank = Expr::MaxIndex + 1;
421 
422  // Gather the dimensions identified by the indices. gather_dims keeps the
423  // first dimension it finds, so we want that to be the result dimension if it
424  // is present. If not, this selects one of the operand dimensions, which are
425  // given stride 0.
426  auto reduction_shape = internal::make_ein_reduce_shape(internal::make_index_sequence<LoopRank>(),
427  internal::is_result_shape(), expr.op_a, internal::is_operand_shape(), expr.op_b);
428 
429  // TODO: Try to compile-time optimize reduction_shape? :)
430  // This is maybe actually somewhat doable, simply moving the for_each_index
431  // call below into a function that could be overloaded based on the type of
432  // the shape would enable different optimizations. This function could be a
433  // member of a template parameter/parameter of this function, enabling the
434  // caller to customize the optimization. However, it's still very difficult
435  // to make useful optimizations without making some assumptions about the
436  // dimensions of the shape.
437 
438  // Perform the reduction.
439  for_each_index_in_order(reduction_shape, expr);
440 
441  // Assume the expr is an assignment, and return the left-hand side.
442  return expr.op_a.op;
443 }
444 
446 template <size_t... ResultIs, class Expr, class = internal::enable_if_ein_op<Expr>>
447 NDARRAY_UNIQUE auto make_ein_reduce_shape(const Expr& expr) {
448  auto result_shape = internal::make_ein_reduce_shape(
449  internal::index_sequence<ResultIs...>(), internal::is_inferred_shape(), expr);
450  return make_compact(result_shape);
451 }
452 
472 // TODO: It would be nice to be able to express reductions other than sums.
473 // TODO: Add an overload with a default ResultIs... = 0, 1, 2, ... This requires
474 // also inferring the rank of the result.
475 template <class T, size_t... ResultIs, class Expr, class Alloc = std::allocator<T>,
476  class = internal::enable_if_ein_op<Expr>>
477 NDARRAY_INLINE auto make_ein_sum(
478  const Expr& expr, const T& init = T(), const Alloc& alloc = Alloc()) {
479  auto result_shape = make_ein_reduce_shape<ResultIs...>(expr);
480  auto result = make_array<T>(result_shape, init, alloc);
481  ein_reduce(ein<ResultIs...>(result) += expr);
482  return result;
483 }
484 
485 } // namespace nda
486 
487 #endif // NDARRAY_EIN_REDUCE_H
NDARRAY_HOST_DEVICE Shape & shape()
Definition: array.h:2105
NDARRAY_INLINE auto ein_reduce(const Expr &expr)
Definition: ein_reduce.h:419
Definition: array.h:1036
NDARRAY_HOST_DEVICE array_ref< T, Shape > make_array_ref(T *base, const Shape &shape)
Definition: array.h:1970
const interval< 0,-1 > all
Definition: array.h:363
auto ein(Op op)
Definition: ein_reduce.h:339
Definition: array.h:1961
Definition: array.h:1963
NDARRAY_INLINE auto make_ein_sum(const Expr &expr, const T &init=T(), const Alloc &alloc=Alloc())
Definition: ein_reduce.h:477
auto cast(const internal::ein_op_base< Op > &op)
Definition: ein_reduce.h:207
Main header for array library.
Definition: absl.h:10
void move(const array_ref< TSrc, ShapeSrc > &src, const array_ref< TDst, ShapeDst > &dst)
Definition: array.h:2804
NDARRAY_HOST_DEVICE auto make_shape(Dims...dims)
Definition: array.h:1040
std::ptrdiff_t index_t
Definition: array.h:87
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t min() const
Definition: array.h:293
Definition: array.h:231
NDARRAY_HOST_DEVICE auto make_compact(const Shape &s)
Definition: array.h:1620
auto min(const internal::ein_op_base< OpA > &a, const internal::ein_op_base< OpB > &b)
Definition: ein_reduce.h:213
array_ref< T, Shape > ref()
Definition: array.h:2675
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t extent() const
Definition: array.h:296