array/ein__reduce_8h_source.html

 // Copyright 2019 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef NDARRAY_EIN_REDUCE_H
 #define NDARRAY_EIN_REDUCE_H

 #include "array/array.h"

 namespace nda {

 namespace internal {

 // TODO: Find a way to enable operations with non-op types? e.g. scalars.
 template <class T>
 using enable_if_ein_op =
     std::enable_if_t<std::is_same<typename T::is_ein_op, std::true_type>::value>;

 template <class T>
 using enable_if_ein_assign =
     std::enable_if_t<std::is_same<typename T::is_assign, std::true_type>::value>;

 // Briefly, the high level goal of the next few classes is to enable construction of
 // expressions describing Einstein summations or other reductions. This is done using
 // a small expression template system. Normally, expression templates are troublesome
 // due to overwhemling the compiler's ability to do CSE and other optimziations. In
 // the case of Einstein reductions, the expressions will usually be very small...

 template <class Derived>
 struct ein_op_base {
   using is_ein_op = std::true_type;
   using is_assign = std::false_type;

   // We need to be able to get the derived type when creating binary operations using
   // this operation as an operand.
   const Derived& derived() const { return *static_cast<const Derived*>(this); }

   auto operator-() const { return make_ein_op_negate(derived()); }
   template <class T, class = enable_if_ein_op<T>>
   auto operator+(const T& r) const {
     return make_ein_op_add(derived(), r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator-(const T& r) const {
     return make_ein_op_sub(derived(), r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator*(const T& r) const {
     return make_ein_op_mul(derived(), r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator/(const T& r) const {
     return make_ein_op_div(derived(), r);
   }
 };

 // A leaf operand of an Einstein reduction expression. The Is... indicate the
 // dimension of the reduction to use to address this operand.
 template <class Op, size_t... Is>
 struct ein_op : public ein_op_base<ein_op<Op, Is...>> {
   Op op;
   ein_op(Op op) : op(std::move(op)) {}

   // The largest dimension used by this operand.
   static constexpr index_t MaxIndex = sizeof...(Is) == 0 ? -1 : variadic_max(Is...);

   // auto doesn't work here because it doesn't include the reference type of operator() when we
   // need it, but it writing it includes it when we can't, e.g. if op(...) doesn't return a
   // reference.
   template <class Idx>
   NDARRAY_INLINE decltype(op(Is...)) operator()(const Idx& i) const {
     return op(std::get<Is>(i)...);
   }

   // Only ein_op has assignment operators.
   template <class T, class = enable_if_ein_op<T>>
   auto operator=(const T& r) const {
     return make_ein_op_assign(*this, r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator+=(const T& r) const {
     return make_ein_op_add_assign(*this, r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator-=(const T& r) const {
     return make_ein_op_sub_assign(*this, r);
   }
   template <class T, class = enable_if_ein_op<T>>
   auto operator*=(const T& r) const {
     return make_ein_op_mul_assign(*this, r);
   }
 };

 // A unary operation on an Einstein operand.
 template <class Op, class Derived>
 struct ein_unary_op : public ein_op_base<Derived> {
   Op op;
   ein_unary_op(const Op& op) : op(op) {}
   static constexpr index_t MaxIndex = Op::MaxIndex;
 };

 // Unary negate.
 template <class Op>
 struct ein_negate_op : public ein_unary_op<Op, ein_negate_op<Op>> {
   using base = ein_unary_op<Op, ein_negate_op<Op>>;
   ein_negate_op(const Op& op) : base(op) {}
   template <class Idx>
   NDARRAY_INLINE auto operator()(const Idx& i) const {
     return -base::op(i);
   }
 };

 template <class Op>
 auto make_ein_op_negate(const Op& op) {
   return ein_negate_op<Op>(op);
 }

 // Cast to a different type.
 template <class Type, class Op>
 struct ein_cast_op : public ein_unary_op<Op, ein_cast_op<Type, Op>> {
   using base = ein_unary_op<Op, ein_cast_op<Type, Op>>;
   ein_cast_op(const Op& op) : base(op) {}
   template <class Idx>
   NDARRAY_INLINE auto operator()(const Idx& i) const {
     return static_cast<Type>(base::op(i));
   }
 };

 // A binary operation of two operands.
 template <class OpA, class OpB, class Derived>
 struct ein_binary_op : public ein_op_base<Derived> {
   OpA op_a;
   OpB op_b;
   ein_binary_op(const OpA& a, const OpB& b) : op_a(a), op_b(b) {}
   static constexpr index_t MaxIndex = internal::max(OpA::MaxIndex, OpB::MaxIndex);
 };

 #define NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)                                                  \
   template <class OpA, class OpB>                                                                  \
   auto make_##name(const OpA& a, const OpB& b) {                                                   \
     return name<OpA, OpB>(a, b);                                                                   \
   }

 #define NDARRAY_MAKE_EIN_BINARY_OP(name, op, is_assign_)                                           \
   template <class OpA, class OpB>                                                                  \
   struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> {                                   \
     using base = ein_binary_op<OpA, OpB, name>;                                                    \
     name(const OpA& a, const OpB& b) : base(a, b) {}                                               \
     using is_assign = is_assign_;                                                                  \
     template <class Idx>                                                                           \
     NDARRAY_INLINE auto operator()(const Idx& i) const {                                           \
       return base::op_a(i) op base::op_b(i);                                                       \
     }                                                                                              \
   };                                                                                               \
   NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)

 #define NDARRAY_MAKE_EIN_BINARY_FN(name, fn)                                                       \
   template <class OpA, class OpB>                                                                  \
   struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> {                                   \
     using base = ein_binary_op<OpA, OpB, name>;                                                    \
     name(const OpA& a, const OpB& b) : base(a, b) {}                                               \
     template <class Idx>                                                                           \
     NDARRAY_INLINE auto operator()(const Idx& i) const {                                           \
       using internal::min;                                                                              \
       using internal::max;                                                                              \
       return fn(base::op_a(i), base::op_b(i));                                                     \
     }                                                                                              \
   };                                                                                               \
   NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)

 // Define the expression types for the operations we support.
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add, +, std::false_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub, -, std::false_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul, *, std::false_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_div, /, std::false_type);
 NDARRAY_MAKE_EIN_BINARY_FN(ein_op_min, min);
 NDARRAY_MAKE_EIN_BINARY_FN(ein_op_max, max);

 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_assign, =, std::true_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add_assign, +=, std::true_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub_assign, -=, std::true_type);
 NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul_assign, *=, std::true_type);

 #undef NDARRAY_MAKE_EIN_BINARY_FN
 #undef NDARRAY_MAKE_EIN_BINARY_OP
 #undef NDARRAY_MAKE_EIN_BINARY_HELPERS

 } // namespace internal

 template <class Type, class Op>
 auto cast(const internal::ein_op_base<Op>& op) {
   return internal::ein_cast_op<Type, Op>(op.derived());
 }

 template <class OpA, class OpB>
 auto min(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
   return internal::make_ein_op_min(a.derived(), b.derived());
 }
 template <class OpA, class OpB>
 auto max(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
   return internal::make_ein_op_max(a.derived(), b.derived());
 }

 namespace internal {

 // Let these be found via ADL too.
 using nda::cast;
 using nda::max;
 using nda::min;

 // If multiple operands provide the same dim, we need to reconcile them
 // to one dim. If you follow a compiler or runtime error here, your
 // Einstein expression tries to address two dimensions that have different
 // bounds with the same loop variable.
 // TODO: It would be nice if this error would appear when constructing the
 // Einstein expression when possible, but that's really hard to do.
 template <class Dim0, class... Dims,
     class = std::enable_if_t<!any(not_equal(Dim0::Min, Dims::Min)...)>,
     class = std::enable_if_t<!any(not_equal(Dim0::Extent, Dims::Extent)...)>>
 NDARRAY_INLINE const Dim0& reconcile_dim(const Dim0& dim0, const Dims&... dims) {
   if (dim0.stride() != 0) {
     // If the first dim is an output dimension, just require the other dims
     // to be in-bounds. This is a slightly relaxed requirement compared to the
     // compile-time constraints.
     assert(all(dims.is_in_range(dim0)...));
   } else {
     // If this is a reduction dimension, all of the dims must match. For
     // example, this is the constraint that checks that the inner dimensions of
     // a matrix multiplication are compatible.
     assert(all(dim0.min() == dims.min()...));
     assert(all(dim0.extent() == dims.extent()...));
   }
   return dim0;
 }
 // If we have zero dims, the user skipped a dim index, so we need a dummy
 // loop.
 NDARRAY_INLINE dim<0, 1, 0> reconcile_dim() { return {}; }

 template <class... Dims, size_t... Is>
 NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
   return reconcile_dim(std::get<Is>(dims)...);
 }
 template <class... Dims>
 NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims) {
   return reconcile_dim(dims, make_index_sequence<sizeof...(Dims)>());
 }

 // Get the shape of an ein_reduce operand, or an empty shape if not an array.
 template <class T, class Shape>
 NDARRAY_INLINE const auto& dims_of(const array_ref<T, Shape>& op) {
   return op.shape().dims();
 }
 template <class T>
 NDARRAY_INLINE std::tuple<> dims_of(const T& op) {
   return std::tuple<>();
 }

 // Helper to reinterpret a dim with a new stride.
 template <index_t NewStride, index_t Min, index_t Extent, index_t Stride>
 NDARRAY_INLINE auto with_stride(const dim<Min, Extent, Stride>& d) {
   return dim<Min, Extent, NewStride>(d.min(), d.extent());
 }
 template <index_t NewStride, class Dim>
 NDARRAY_INLINE auto with_stride(const std::tuple<Dim>& maybe_dim) {
   return std::make_tuple(with_stride<NewStride>(std::get<0>(maybe_dim)));
 }
 template <index_t NewStride>
 NDARRAY_INLINE std::tuple<> with_stride(std::tuple<> maybe_dim) {
   return maybe_dim;
 }

 // These types are flags that let us overload behavior based on these 3 options.
 class is_inferred_shape {};
 class is_result_shape {};
 class is_operand_shape {};

 // Get a dim from an operand, depending on the intended use of the shape.
 template <size_t Dim, class Dims, size_t... Is>
 NDARRAY_INLINE auto gather_dims(is_result_shape, const ein_op<Dims, Is...>& op) {
   // If this is part of the result, we want to keep its strides.
   return get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op));
 }
 template <size_t Dim, class Dims, size_t... Is>
 NDARRAY_INLINE auto gather_dims(is_inferred_shape, const ein_op<Dims, Is...>& op) {
   // For inferred shapes, we want shapes without any constexpr strides, so it can be reshaped.
   return with_stride<dynamic>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
 }
 template <size_t Dim, class Dims, size_t... Is>
 NDARRAY_INLINE auto gather_dims(is_operand_shape, const ein_op<Dims, Is...>& op) {
   // If this is an operand shape, we want all of its dimensions to be stride 0.
   return with_stride<0>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
 }

 template <size_t Dim, class Kind, class Op, class X>
 NDARRAY_INLINE auto gather_dims(Kind kind, const ein_unary_op<Op, X>& op) {
   return gather_dims<Dim>(kind, op.op);
 }
 template <size_t Dim, class Kind, class OpA, class OpB, class X>
 NDARRAY_INLINE auto gather_dims(Kind kind, const ein_binary_op<OpA, OpB, X>& op) {
   return std::tuple_cat(gather_dims<Dim>(kind, op.op_a), gather_dims<Dim>(kind, op.op_b));
 }

 template <size_t Dim, class Kind0, class Op0, class Kind1, class Op1>
 NDARRAY_INLINE auto gather_dims(Kind0 kind0, const Op0& op0, Kind1 kind1, const Op1& op1) {
   return std::tuple_cat(gather_dims<Dim>(kind0, op0), gather_dims<Dim>(kind1, op1));
 }
 template <size_t... Is, class... KindAndOps>
 NDARRAY_UNIQUE auto make_ein_reduce_shape(
     index_sequence<Is...>, const KindAndOps&... kind_and_ops) {
   return make_shape(reconcile_dim(gather_dims<Is>(kind_and_ops...))...);
 }

 } // namespace internal

 template <size_t... Is, class Op, class = internal::enable_if_callable<Op, decltype(Is)...>>
 auto ein(Op op) {
   return internal::ein_op<Op, Is...>(std::move(op));
 }
 template <size_t... Is, class T, class Shape, class Alloc,
     class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
 auto ein(array<T, Shape, Alloc>& op) {
   return ein<Is...>(op.ref());
 }
 template <size_t... Is, class T, class Shape, class Alloc,
     class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
 auto ein(const array<T, Shape, Alloc>& op) {
   return ein<Is...>(op.ref());
 }

 template <class T>
 auto ein(T& scalar) {
   return ein<>(array_ref<T, shape<>>(&scalar, {}));
 }

 template <size_t I0, class T>
 auto ein(T* x, size_t N) {
   return ein<I0>(make_array_ref(&x[0], shape<dim<0, dynamic, 1>>(static_cast<index_t>(N))));
 }

 template <size_t I0, class T, size_t N>
 auto ein(T (&x)[N]) {
   return ein<I0>(make_array_ref(&x[0], shape<dim<0, N, 1>>()));
 }

 template <class Expr, class = internal::enable_if_ein_assign<Expr>>
 NDARRAY_INLINE auto ein_reduce(const Expr& expr) {
   constexpr index_t LoopRank = Expr::MaxIndex + 1;

   // Gather the dimensions identified by the indices. gather_dims keeps the
   // first dimension it finds, so we want that to be the result dimension if it
   // is present. If not, this selects one of the operand dimensions, which are
   // given stride 0.
   auto reduction_shape = internal::make_ein_reduce_shape(internal::make_index_sequence<LoopRank>(),
       internal::is_result_shape(), expr.op_a, internal::is_operand_shape(), expr.op_b);

   // TODO: Try to compile-time optimize reduction_shape? :)
   // This is maybe actually somewhat doable, simply moving the for_each_index
   // call below into a function that could be overloaded based on the type of
   // the shape would enable different optimizations. This function could be a
   // member of a template parameter/parameter of this function, enabling the
   // caller to customize the optimization. However, it's still very difficult
   // to make useful optimizations without making some assumptions about the
   // dimensions of the shape.

   // Perform the reduction.
   for_each_index_in_order(reduction_shape, expr);

   // Assume the expr is an assignment, and return the left-hand side.
   return expr.op_a.op;
 }

 template <size_t... ResultIs, class Expr, class = internal::enable_if_ein_op<Expr>>
 NDARRAY_UNIQUE auto make_ein_reduce_shape(const Expr& expr) {
   auto result_shape = internal::make_ein_reduce_shape(
       internal::index_sequence<ResultIs...>(), internal::is_inferred_shape(), expr);
   return make_compact(result_shape);
 }

 // TODO: It would be nice to be able to express reductions other than sums.
 // TODO: Add an overload with a default ResultIs... = 0, 1, 2, ... This requires
 // also inferring the rank of the result.
 template <class T, size_t... ResultIs, class Expr, class Alloc = std::allocator<T>,
     class = internal::enable_if_ein_op<Expr>>
 NDARRAY_INLINE auto make_ein_sum(
     const Expr& expr, const T& init = T(), const Alloc& alloc = Alloc()) {
   auto result_shape = make_ein_reduce_shape<ResultIs...>(expr);
   auto result = make_array<T>(result_shape, init, alloc);
   ein_reduce(ein<ResultIs...>(result) += expr);
   return result;
 }

 } // namespace nda

 #endif // NDARRAY_EIN_REDUCE_H
nda::array_ref::shape
NDARRAY_HOST_DEVICE Shape & shape()
Definition: array.h:2105

nda::ein_reduce
NDARRAY_INLINE auto ein_reduce(const Expr &expr)
Definition: ein_reduce.h:419

nda::shape
Definition: array.h:1036

nda::make_array_ref
NDARRAY_HOST_DEVICE array_ref< T, Shape > make_array_ref(T *base, const Shape &shape)
Definition: array.h:1970

nda::all
const interval< 0,-1 > all
Definition: array.h:363

nda::ein
auto ein(Op op)
Definition: ein_reduce.h:339

nda::array_ref
Definition: array.h:1961

nda::array
Definition: array.h:1963

nda::make_ein_sum
NDARRAY_INLINE auto make_ein_sum(const Expr &expr, const T &init=T(), const Alloc &alloc=Alloc())
Definition: ein_reduce.h:477

nda::cast
auto cast(const internal::ein_op_base< Op > &op)
Definition: ein_reduce.h:207

array.h
Main header for array library.

nda
Definition: absl.h:10

nda::move
void move(const array_ref< TSrc, ShapeSrc > &src, const array_ref< TDst, ShapeDst > &dst)
Definition: array.h:2804

nda::make_shape
NDARRAY_HOST_DEVICE auto make_shape(Dims...dims)
Definition: array.h:1040

nda::index_t
std::ptrdiff_t index_t
Definition: array.h:87

nda::interval::min
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t min() const
Definition: array.h:293

nda::dim
Definition: array.h:231

nda::make_compact
NDARRAY_HOST_DEVICE auto make_compact(const Shape &s)
Definition: array.h:1620

nda::min
auto min(const internal::ein_op_base< OpA > &a, const internal::ein_op_base< OpB > &b)
Definition: ein_reduce.h:213

nda::array::ref
array_ref< T, Shape > ref()
Definition: array.h:2675

nda::interval::extent
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t extent() const
Definition: array.h:296