array
C++ library for multi-dimensional arrays
array.h
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
19 #ifndef NDARRAY_ARRAY_H
20 #define NDARRAY_ARRAY_H
21 
22 #include <array>
23 // TODO(jiawen): CUDA *should* support assert on device. This might be due to the fact that we are
24 // not depending on the CUDA toolkit.
25 #if defined(__CUDA__)
26 #undef assert
27 #define assert(e)
28 #else
29 #include <cassert>
30 #endif
31 
32 #include <cstdio>
33 #include <limits>
34 #include <memory>
35 #include <tuple>
36 #include <type_traits>
37 
38 // Some things in this header are unbearably slow without optimization if they
39 // don't get inlined.
40 #if defined(__GNUC__)
41 #define NDARRAY_INLINE inline __attribute__((always_inline))
42 #elif defined(__clang__)
43 #if defined(__CUDA__)
44 #define NDARRAY_INLINE __forceinline__
45 #else
46 #define NDARRAY_INLINE inline __attribute__((always_inline))
47 #endif
48 #else
49 #define NDARRAY_INLINE inline
50 #endif
51 
52 // Many of the functions in this header are templates that are usually unique
53 // specializations, which are beneficial to inline. The compiler will inline
54 // functions it knows are used only once, but it can't know this unless the
55 // functions have internal linkage.
56 #define NDARRAY_UNIQUE static
57 
58 // Functions attributed with NDARRAY_HOST_DEVICE can run on both device and host mode on CUDA.
59 #if defined(__CUDA__)
60 #define NDARRAY_HOST_DEVICE __device__ __host__
61 #else
62 #define NDARRAY_HOST_DEVICE
63 #endif
64 
65 #if defined(__GNUC__) || defined(__clang__)
66 #define NDARRAY_RESTRICT __restrict__
67 #else
68 #define NDARRAY_RESTRICT
69 #endif
70 
71 #if defined(__CUDA__)
72 #define NDARRAY_PRINT_ERR(...) printf(__VA_ARGS__)
73 #else
74 #define NDARRAY_PRINT_ERR(...) fprintf(stderr, __VA_ARGS__)
75 #endif
76 
77 namespace nda {
78 
79 using size_t = std::size_t;
83 #ifdef NDARRAY_INT_INDICES
84 using index_t = int;
85 #define NDARRAY_INDEX_T_FMT "%d"
86 #else
87 using index_t = std::ptrdiff_t;
88 #define NDARRAY_INDEX_T_FMT "%td"
89 #endif
90 
96 // It would be better to use a more unreasonable value that would never be
97 // used in practice, or find a better way to express this pattern.
98 // (https://github.com/dsharlet/array/issues/9).
99 constexpr index_t dynamic = -9;
100 
103 constexpr index_t unresolved = std::numeric_limits<index_t>::min();
104 
105 namespace internal {
106 
107 // Workaround CUDA not supporting std::declval.
108 // https://stackoverflow.com/questions/31969644/compilation-error-with-nvcc-and-c11-need-minimal-failing-example
109 template <typename T>
110 NDARRAY_HOST_DEVICE typename std::add_rvalue_reference<T>::type declval() noexcept;
111 
112 NDARRAY_INLINE constexpr index_t abs(index_t x) { return x >= 0 ? x : -x; }
113 
114 NDARRAY_INLINE constexpr index_t is_static(index_t x) { return x != dynamic; }
115 NDARRAY_INLINE constexpr index_t is_dynamic(index_t x) { return x == dynamic; }
116 
117 NDARRAY_INLINE constexpr index_t is_resolved(index_t x) { return x != unresolved; }
118 NDARRAY_INLINE constexpr index_t is_unresolved(index_t x) { return x == unresolved; }
119 
120 constexpr bool is_dynamic(index_t a, index_t b) { return is_dynamic(a) || is_dynamic(b); }
121 
122 // Returns true if a and b are statically not equal, but they may still be
123 // dynamically not equal even if this returns false.
124 constexpr bool not_equal(index_t a, index_t b) { return is_static(a) && is_static(b) && a != b; }
125 
126 template <index_t A, index_t B>
127 using disable_if_not_equal = std::enable_if_t<!not_equal(A, B)>;
128 
129 // Math for (possibly) static values.
130 constexpr index_t static_abs(index_t x) { return is_dynamic(x) ? dynamic : abs(x); }
131 constexpr index_t static_add(index_t a, index_t b) { return is_dynamic(a, b) ? dynamic : a + b; }
132 constexpr index_t static_sub(index_t a, index_t b) { return is_dynamic(a, b) ? dynamic : a - b; }
133 constexpr index_t static_mul(index_t a, index_t b) { return is_dynamic(a, b) ? dynamic : a * b; }
134 constexpr index_t static_min(index_t a, index_t b) {
135  return is_dynamic(a, b) ? dynamic : (a < b ? a : b);
136 }
137 constexpr index_t static_max(index_t a, index_t b) {
138  return is_dynamic(a, b) ? dynamic : (a > b ? a : b);
139 }
140 
141 // A type that mimics a constexpr index_t with value Value, unless Value is
142 // dynamic, then mimics index_t.
143 template <index_t Value>
144 struct constexpr_index {
145 public:
146  // These asserts are really hard to debug
147  // https://github.com/dsharlet/array/issues/26
148  NDARRAY_HOST_DEVICE constexpr_index(index_t value = Value) { assert(value == Value); }
149  NDARRAY_HOST_DEVICE constexpr_index& operator=(index_t value) {
150  assert(value == Value);
151  return *this;
152  }
153  NDARRAY_HOST_DEVICE NDARRAY_INLINE operator index_t() const { return Value; }
154 };
155 
156 template <>
157 struct constexpr_index<dynamic> {
158  index_t value_;
159 
160 public:
161  NDARRAY_HOST_DEVICE constexpr_index(index_t value) : value_(value) {}
162  NDARRAY_HOST_DEVICE constexpr_index& operator=(index_t value) {
163  value_ = value;
164  return *this;
165  }
166  NDARRAY_INLINE NDARRAY_HOST_DEVICE operator index_t() const { return value_; }
167 };
168 
169 // Defined here to avoid pulling in <algorithm>.
170 template <typename T>
171 constexpr const T& max(const T& a, const T& b) {
172  return (a < b) ? b : a;
173 }
174 
175 template <typename T>
176 constexpr const T& min(const T& a, const T& b) {
177  return (b < a) ? b : a;
178 }
179 
180 } // namespace internal
181 
184  index_t i_;
185 
186 public:
188  index_iterator(index_t i) : i_(i) {}
189 
191  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t operator*() const { return i_; }
192 
193  NDARRAY_INLINE NDARRAY_HOST_DEVICE bool operator==(const index_iterator& r) const {
194  return i_ == r.i_;
195  }
196  NDARRAY_INLINE NDARRAY_HOST_DEVICE bool operator!=(const index_iterator& r) const {
197  return i_ != r.i_;
198  }
199 
200  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator operator++(int) { return index_iterator(i_++); }
201  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator operator--(int) { return index_iterator(i_--); }
202  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator& operator++() {
203  ++i_;
204  return *this;
205  }
206  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator& operator--() {
207  --i_;
208  return *this;
209  }
210  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator& operator+=(index_t r) {
211  i_ += r;
212  return *this;
213  }
214  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator& operator-=(index_t r) {
215  i_ -= r;
216  return *this;
217  }
218  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator operator+(index_t r) {
219  return index_iterator(i_ + r);
220  }
221  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_iterator operator-(index_t r) {
222  return index_iterator(i_ - r);
223  }
224  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t operator-(const index_iterator& r) {
225  return i_ - r.i_;
226  }
227  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t operator[](index_t n) const { return i_ + n; }
228 };
229 
230 template <index_t Min, index_t Extent, index_t Stride>
231 class dim;
232 
247 template <index_t Min_ = dynamic, index_t Extent_ = dynamic>
248 class interval {
249 protected:
250  internal::constexpr_index<Min_> min_;
251  internal::constexpr_index<Extent_> extent_;
252 
253 public:
254  static constexpr index_t Min = Min_;
255  static constexpr index_t Extent = Extent_;
256  static constexpr index_t Max = internal::static_sub(internal::static_add(Min, Extent), 1);
257 
265  NDARRAY_HOST_DEVICE interval(index_t min, index_t extent) : min_(min), extent_(extent) {}
266  NDARRAY_HOST_DEVICE interval(index_t min)
267  : interval(min, internal::is_static(Extent) ? Extent : 1) {}
268  NDARRAY_HOST_DEVICE interval() : interval(internal::is_static(Min) ? Min : 0) {}
269 
270  NDARRAY_HOST_DEVICE interval(const interval&) = default;
271  NDARRAY_HOST_DEVICE interval(interval&&) = default;
272  NDARRAY_HOST_DEVICE interval& operator=(const interval&) = default;
273  NDARRAY_HOST_DEVICE interval& operator=(interval&&) = default;
274 
278  template <index_t CopyMin, index_t CopyExtent,
279  class = internal::disable_if_not_equal<Min, CopyMin>,
280  class = internal::disable_if_not_equal<Extent, CopyExtent>>
281  NDARRAY_HOST_DEVICE interval(const interval<CopyMin, CopyExtent>& other)
282  : interval(other.min(), other.extent()) {}
283  template <index_t CopyMin, index_t CopyExtent,
284  class = internal::disable_if_not_equal<Min, CopyMin>,
285  class = internal::disable_if_not_equal<Extent, CopyExtent>>
286  NDARRAY_HOST_DEVICE interval& operator=(const interval<CopyMin, CopyExtent>& other) {
287  set_min(other.min());
288  set_extent(other.extent());
289  return *this;
290  }
291 
293  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t min() const { return min_; }
294  NDARRAY_INLINE NDARRAY_HOST_DEVICE void set_min(index_t min) { min_ = min; }
296  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t extent() const { return extent_; }
297  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t size() const { return extent_; }
298  NDARRAY_INLINE NDARRAY_HOST_DEVICE void set_extent(index_t extent) { extent_ = extent; }
299 
301  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t max() const { return min_ + extent_ - 1; }
302  NDARRAY_INLINE NDARRAY_HOST_DEVICE void set_max(index_t max) { set_extent(max - min_ + 1); }
303 
305  NDARRAY_INLINE NDARRAY_HOST_DEVICE bool is_in_range(index_t at) const {
306  return min_ <= at && at <= max();
307  }
310  template <index_t OtherMin, index_t OtherExtent>
311  NDARRAY_INLINE NDARRAY_HOST_DEVICE bool is_in_range(
312  const interval<OtherMin, OtherExtent>& at) const {
313  return min_ <= at.min() && at.max() <= max();
314  }
315  template <index_t OtherMin, index_t OtherExtent, index_t OtherStride>
316  NDARRAY_INLINE NDARRAY_HOST_DEVICE bool is_in_range(
317  const dim<OtherMin, OtherExtent, OtherStride>& at) const {
318  return min_ <= at.min() && at.max() <= max();
319  }
320 
322  NDARRAY_HOST_DEVICE index_iterator begin() const { return index_iterator(min_); }
324  NDARRAY_HOST_DEVICE index_iterator end() const { return index_iterator(max() + 1); }
325 
328  template <index_t OtherMin, index_t OtherExtent>
329  NDARRAY_HOST_DEVICE bool operator==(const interval<OtherMin, OtherExtent>& other) const {
330  return min_ == other.min() && extent_ == other.extent();
331  }
332  template <index_t OtherMin, index_t OtherExtent>
333  NDARRAY_HOST_DEVICE bool operator!=(const interval<OtherMin, OtherExtent>& other) const {
334  return !operator==(other);
335  }
336 };
337 
340 template <index_t Extent>
342 
344 NDARRAY_INLINE NDARRAY_HOST_DEVICE interval<> range(index_t begin, index_t end) {
345  return interval<>(begin, end - begin);
346 }
347 NDARRAY_INLINE NDARRAY_HOST_DEVICE interval<> r(index_t begin, index_t end) {
348  return interval<>(begin, end - begin);
349 }
350 
352 template <index_t Extent>
353 NDARRAY_HOST_DEVICE fixed_interval<Extent> range(index_t begin) {
354  return fixed_interval<Extent>(begin);
355 }
356 template <index_t Extent>
357 NDARRAY_HOST_DEVICE fixed_interval<Extent> r(index_t begin) {
358  return fixed_interval<Extent>(begin);
359 }
360 
363 const interval<0, -1> all, _;
364 
366 template <index_t Min, index_t Extent>
367 NDARRAY_HOST_DEVICE index_iterator begin(const interval<Min, Extent>& d) {
368  return d.begin();
369 }
370 template <index_t Min, index_t Extent>
371 NDARRAY_HOST_DEVICE index_iterator end(const interval<Min, Extent>& d) {
372  return d.end();
373 }
374 
376 NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t clamp(index_t x, index_t min, index_t max) {
377  return internal::min(internal::max(x, min), max);
378 }
379 
382 template <class Range>
383 NDARRAY_HOST_DEVICE index_t clamp(index_t x, const Range& r) {
384  return clamp(x, r.min(), r.max());
385 }
386 
395 template <index_t Min_ = dynamic, index_t Extent_ = dynamic, index_t Stride_ = dynamic>
396 class dim : protected interval<Min_, Extent_> {
397 public:
398  using base_range = interval<Min_, Extent_>;
399 
400 protected:
401  internal::constexpr_index<Stride_> stride_;
402 
403  using base_range::extent_;
404  using base_range::min_;
405 
406 public:
407  using base_range::Extent;
408  using base_range::Max;
409  using base_range::Min;
410 
411  static constexpr index_t Stride = Stride_;
412  static constexpr index_t DefaultStride = internal::is_static(Stride) ? Stride : unresolved;
413 
422  NDARRAY_HOST_DEVICE dim(index_t min, index_t extent, index_t stride = DefaultStride)
423  : base_range(min, extent), stride_(stride) {}
424  NDARRAY_HOST_DEVICE dim(index_t extent) : dim(internal::is_static(Min) ? Min : 0, extent) {}
425  NDARRAY_HOST_DEVICE dim() : dim(internal::is_static(Extent) ? Extent : 0) {}
426 
427  NDARRAY_HOST_DEVICE dim(const base_range& interval, index_t stride = DefaultStride)
428  : dim(interval.min(), interval.extent(), stride) {}
429  NDARRAY_HOST_DEVICE dim(const dim&) = default;
430  NDARRAY_HOST_DEVICE dim(dim&&) = default;
431  NDARRAY_HOST_DEVICE dim& operator=(const dim&) = default;
432  NDARRAY_HOST_DEVICE dim& operator=(dim&&) = default;
433 
438  template <index_t CopyMin, index_t CopyExtent, index_t CopyStride,
439  class = internal::disable_if_not_equal<Min, CopyMin>,
440  class = internal::disable_if_not_equal<Extent, CopyExtent>,
441  class = internal::disable_if_not_equal<Stride, CopyStride>>
442  NDARRAY_HOST_DEVICE dim(const dim<CopyMin, CopyExtent, CopyStride>& other)
443  : dim(other.min(), other.extent()) {
444  set_stride(other.stride());
445  }
446  template <index_t CopyMin, index_t CopyExtent, index_t CopyStride,
447  class = internal::disable_if_not_equal<Min, CopyMin>,
448  class = internal::disable_if_not_equal<Extent, CopyExtent>,
449  class = internal::disable_if_not_equal<Stride, CopyStride>>
450  NDARRAY_HOST_DEVICE dim& operator=(const dim<CopyMin, CopyExtent, CopyStride>& other) {
451  set_min(other.min());
452  set_extent(other.extent());
453  set_stride(other.stride());
454  return *this;
455  }
456 
457  using base_range::begin;
458  using base_range::end;
459  using base_range::extent;
460  using base_range::size;
461  using base_range::is_in_range;
462  using base_range::max;
463  using base_range::min;
464  using base_range::set_extent;
465  using base_range::set_max;
466  using base_range::set_min;
467 
470  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t stride() const { return stride_; }
471  NDARRAY_INLINE NDARRAY_HOST_DEVICE void set_stride(index_t stride) {
472  if (internal::is_static(Stride_)) {
473  assert(internal::is_unresolved(stride) || stride == Stride_);
474  } else {
475  stride_ = stride;
476  }
477  }
478 
480  NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t flat_offset(index_t at) const {
481  return (at - min_) * stride_;
482  }
483 
486  template <index_t OtherMin, index_t OtherExtent, index_t OtherStride>
487  NDARRAY_HOST_DEVICE bool operator==(const dim<OtherMin, OtherExtent, OtherStride>& other) const {
488  return min_ == other.min() && extent_ == other.extent() && stride_ == other.stride();
489  }
490  template <index_t OtherMin, index_t OtherExtent, index_t OtherStride>
491  NDARRAY_HOST_DEVICE bool operator!=(const dim<OtherMin, OtherExtent, OtherStride>& other) const {
492  return !operator==(other);
493  }
494 };
495 
497 template <index_t Extent, index_t Stride = dynamic>
499 
502 template <index_t Min = dynamic, index_t Extent = dynamic>
504 
507 template <index_t Stride>
509 
512 template <index_t Min = dynamic, index_t Extent = dynamic>
514 
515 namespace internal {
516 
517 // An iterator for a range of intervals.
518 // This is like a random access iterator in that it can move forward in constant time, but
519 // but unlike a random access iterator, it cannot be moved in reverse.
520 template <index_t InnerExtent = dynamic>
521 class split_iterator {
523  index_t outer_max;
524 
525 public:
526  NDARRAY_HOST_DEVICE split_iterator(const fixed_interval<InnerExtent>& i, index_t outer_max)
527  : i(i), outer_max(outer_max) {}
528 
529  NDARRAY_HOST_DEVICE bool operator==(const split_iterator& r) const {
530  return i.min() == r.i.min();
531  }
532  NDARRAY_HOST_DEVICE bool operator!=(const split_iterator& r) const {
533  return i.min() != r.i.min();
534  }
535 
536  NDARRAY_HOST_DEVICE fixed_interval<InnerExtent> operator*() const { return i; }
537  NDARRAY_HOST_DEVICE const fixed_interval<InnerExtent>* operator->() const { return &i; }
538 
539  NDARRAY_HOST_DEVICE split_iterator& operator+=(index_t n) {
540  assert(n >= 0);
541  if (is_static(InnerExtent)) {
542  // When the extent of the inner split is a compile-time constant,
543  // we can't shrink the out of bounds interval. Instead, shift the min,
544  // assuming the outer dimension is bigger than the inner extent.
545  i.set_min(i.min() + InnerExtent * n);
546  // Only shift the min when this straddles the end of the buffer,
547  // so the iterator can advance to the end (one past the max).
548  if (i.min() <= outer_max && i.max() > outer_max) { i.set_min(outer_max - InnerExtent + 1); }
549  } else {
550  // When the extent of the inner split is not a compile-time constant,
551  // we can just modify the extent.
552  i.set_min(i.min() + i.extent() * n);
553  index_t max = min(i.max(), outer_max);
554  i.set_extent(max - i.min() + 1);
555  }
556  return *this;
557  }
558  NDARRAY_HOST_DEVICE split_iterator operator+(index_t n) const {
559  split_iterator<InnerExtent> result(*this);
560  return result += n;
561  }
562  NDARRAY_HOST_DEVICE split_iterator& operator++() {
563  return *this += 1;
564  }
565  NDARRAY_HOST_DEVICE split_iterator operator++(int) {
566  split_iterator<InnerExtent> result(*this);
567  *this += 1;
568  return result;
569  }
570 
571  NDARRAY_HOST_DEVICE index_t operator-(const split_iterator& r) const {
572  return r.i.extent() > 0 ? (i.max() - r.i.min() + r.i.extent() - i.extent()) / r.i.extent() : 0;
573  }
574 
575  NDARRAY_HOST_DEVICE fixed_interval<InnerExtent> operator[](index_t n) const {
576  split_iterator result(*this);
577  result += n;
578  return *result;
579  }
580 };
581 
582 template <index_t InnerExtent = dynamic>
583 class split_result {
584 public:
585  using iterator = split_iterator<InnerExtent>;
586 
587 private:
588  iterator begin_;
589  iterator end_;
590 
591 public:
592  NDARRAY_HOST_DEVICE split_result(iterator begin, iterator end) : begin_(begin), end_(end) {}
593 
594  NDARRAY_HOST_DEVICE iterator begin() const { return begin_; }
595  NDARRAY_HOST_DEVICE iterator end() const { return end_; }
596 
597  NDARRAY_HOST_DEVICE index_t size() const { return end_ - begin_; }
598  NDARRAY_HOST_DEVICE iterator operator[](index_t i) const { return begin_ + i; }
599 };
600 
601 } // namespace internal
602 
613 template <index_t InnerExtent, index_t Min, index_t Extent>
614 NDARRAY_HOST_DEVICE internal::split_result<InnerExtent> split(
615  const interval<Min, Extent>& v) {
616  assert(v.extent() >= InnerExtent);
617  return {{fixed_interval<InnerExtent>(v.min()), v.max()},
618  {fixed_interval<InnerExtent>(v.max() + 1), v.max()}};
619 }
620 template <index_t InnerExtent, index_t Min, index_t Extent, index_t Stride>
621 NDARRAY_HOST_DEVICE internal::split_result<InnerExtent> split(
622  const dim<Min, Extent, Stride>& v) {
623  return split<InnerExtent>(interval<Min, Extent>(v.min(), v.extent()));
624 }
625 
633 // TODO: This probably doesn't need to be templated, but it might help
634 // avoid some conversion messes. dim<Min, Extent> probably can't implicitly
635 // convert to interval<>.
636 template <index_t Min, index_t Extent>
637 NDARRAY_HOST_DEVICE internal::split_result<> split(
638  const interval<Min, Extent>& v, index_t inner_extent) {
639  return {{interval<>(v.min(), internal::min(inner_extent, v.extent())), v.max()},
640  {interval<>(v.max() + 1, 0), v.max()}};
641 }
642 template <index_t Min, index_t Extent, index_t Stride>
643 NDARRAY_HOST_DEVICE internal::split_result<> split(
644  const dim<Min, Extent, Stride>& v, index_t inner_extent) {
645  return split(interval<Min, Extent>(v.min(), v.extent()), inner_extent);
646 }
647 
648 namespace internal {
649 
650 using std::index_sequence;
651 using std::make_index_sequence;
652 
653 // Call `fn` with the elements of tuple `args` unwrapped from the tuple.
654 // TODO: When we assume C++17, this can be replaced by std::apply.
655 template <class Fn, class Args, size_t... Is>
656 NDARRAY_INLINE NDARRAY_HOST_DEVICE auto apply(Fn&& fn, const Args& args, index_sequence<Is...>)
657  -> decltype(fn(std::get<Is>(args)...)) {
658  return fn(std::get<Is>(args)...);
659 }
660 template <class Fn, class Args>
661 NDARRAY_INLINE NDARRAY_HOST_DEVICE auto apply(Fn&& fn, const Args& args)
662  -> decltype(internal::apply(fn, args, make_index_sequence<std::tuple_size<Args>::value>())) {
663  return internal::apply(fn, args, make_index_sequence<std::tuple_size<Args>::value>());
664 }
665 
666 template <class Fn, class... Args>
667 using enable_if_callable = decltype(internal::declval<Fn>()(internal::declval<Args>()...));
668 template <class Fn, class Args>
669 using enable_if_applicable =
670  decltype(internal::apply(internal::declval<Fn>(), internal::declval<Args>()));
671 
672 // Some variadic reduction helpers.
673 NDARRAY_INLINE constexpr index_t sum() { return 0; }
674 NDARRAY_INLINE constexpr index_t sum(index_t x0) { return x0; }
675 NDARRAY_INLINE constexpr index_t sum(index_t x0, index_t x1) { return x0 + x1; }
676 NDARRAY_INLINE constexpr index_t sum(index_t x0, index_t x1, index_t x2) { return x0 + x1 + x2; }
677 template <class... Rest>
678 NDARRAY_INLINE constexpr index_t sum(index_t x0, index_t x1, index_t x2, index_t x3, Rest... rest) {
679  return x0 + x1 + x2 + x3 + sum(rest...);
680 }
681 
682 NDARRAY_INLINE constexpr int product() { return 1; }
683 template <class T, class... Rest>
684 NDARRAY_INLINE constexpr T product(T first, Rest... rest) {
685  return first * product(rest...);
686 }
687 
688 NDARRAY_INLINE constexpr index_t variadic_min() { return std::numeric_limits<index_t>::max(); }
689 template <class... Rest>
690 NDARRAY_INLINE constexpr index_t variadic_min(index_t first, Rest... rest) {
691  return min(first, variadic_min(rest...));
692 }
693 
694 NDARRAY_INLINE constexpr index_t variadic_max() { return std::numeric_limits<index_t>::min(); }
695 template <class... Rest>
696 NDARRAY_INLINE constexpr index_t variadic_max(index_t first, Rest... rest) {
697  return max(first, variadic_max(rest...));
698 }
699 
700 // Computes the product of the extents of the dims.
701 template <class Tuple, size_t... Is>
702 NDARRAY_HOST_DEVICE index_t product(const Tuple& t, index_sequence<Is...>) {
703  return product(std::get<Is>(t)...);
704 }
705 
706 // Returns true if all of bools are true.
707 template <class... Bools>
708 constexpr bool all(Bools... bools) {
709  return sum((bools ? 0 : 1)...) == 0;
710 }
711 template <class... Bools>
712 constexpr bool any(Bools... bools) {
713  return sum((bools ? 1 : 0)...) != 0;
714 }
715 
716 // Computes the sum of the offsets of a list of dims and indices.
717 template <class Dims, class Indices, size_t... Is>
718 NDARRAY_HOST_DEVICE index_t flat_offset(
719  const Dims& dims, const Indices& indices, index_sequence<Is...>) {
720  return sum(std::get<Is>(dims).flat_offset(std::get<Is>(indices))...);
721 }
722 
723 // Computes one more than the sum of the offsets of the last index in every dim.
724 template <class Dims, size_t... Is>
725 NDARRAY_HOST_DEVICE index_t flat_min(const Dims& dims, index_sequence<Is...>) {
726  return sum((std::get<Is>(dims).extent() - 1) * min<index_t>(0, std::get<Is>(dims).stride())...);
727 }
728 
729 template <class Dims, size_t... Is>
730 NDARRAY_HOST_DEVICE index_t flat_max(const Dims& dims, index_sequence<Is...>) {
731  return sum((std::get<Is>(dims).extent() - 1) *
732  internal::max<index_t>(0, std::get<Is>(dims).stride())...);
733 }
734 
735 // Make dims with the interval of the first parameter and the stride
736 // of the second parameter.
737 template <index_t DimMin, index_t DimExtent, index_t DimStride>
738 NDARRAY_HOST_DEVICE auto range_with_stride(index_t x, const dim<DimMin, DimExtent, DimStride>& d) {
739  return dim<dynamic, 1, DimStride>(x, 1, d.stride());
740 }
741 template <index_t CropMin, index_t CropExtent, index_t DimMin, index_t DimExtent, index_t Stride>
742 NDARRAY_HOST_DEVICE auto range_with_stride(
744  return dim<CropMin, CropExtent, Stride>(x.min(), x.extent(), d.stride());
745 }
746 template <index_t CropMin, index_t CropExtent, index_t CropStride, index_t DimMin,
747  index_t DimExtent, index_t Stride>
748 NDARRAY_HOST_DEVICE auto range_with_stride(
750  return dim<CropMin, CropExtent, Stride>(x.min(), x.extent(), d.stride());
751 }
752 template <index_t Min, index_t Extent, index_t Stride>
753 NDARRAY_HOST_DEVICE auto range_with_stride(const decltype(_)&, const dim<Min, Extent, Stride>& d) {
754  return d;
755 }
756 
757 template <class Intervals, class Dims, size_t... Is>
758 NDARRAY_HOST_DEVICE auto intervals_with_strides(
759  const Intervals& intervals, const Dims& dims, index_sequence<Is...>) {
760  return std::make_tuple(range_with_stride(std::get<Is>(intervals), std::get<Is>(dims))...);
761 }
762 
763 // Make a tuple of dims corresponding to elements in intervals that are not slices.
764 template <class Dim>
765 NDARRAY_HOST_DEVICE std::tuple<> skip_slices_impl(const Dim& d, index_t) {
766  return std::tuple<>();
767 }
768 template <class Dim, index_t Min, index_t Extent>
769 NDARRAY_HOST_DEVICE std::tuple<Dim> skip_slices_impl(const Dim& d, const interval<Min, Extent>&) {
770  return std::tuple<Dim>(d);
771 }
772 template <class Dim, index_t Min, index_t Extent, index_t Stride>
773 NDARRAY_HOST_DEVICE std::tuple<Dim> skip_slices_impl(
774  const Dim& d, const dim<Min, Extent, Stride>&) {
775  return std::tuple<Dim>(d);
776 }
777 
778 template <class Dims, class Intervals, size_t... Is>
779 NDARRAY_HOST_DEVICE auto skip_slices(
780  const Dims& dims, const Intervals& intervals, index_sequence<Is...>) {
781  return std::tuple_cat(skip_slices_impl(std::get<Is>(dims), std::get<Is>(intervals))...);
782 }
783 
784 // Checks if all indices are in interval of each corresponding dim.
785 template <class Dims, class Indices, size_t... Is>
786 NDARRAY_HOST_DEVICE bool is_in_range(
787  const Dims& dims, const Indices& indices, index_sequence<Is...>) {
788  return all(std::get<Is>(dims).is_in_range(std::get<Is>(indices))...);
789 }
790 
791 // Get the mins of a series of intervals.
792 template <class Dim>
793 NDARRAY_HOST_DEVICE index_t min_of_range(index_t x, const Dim&) {
794  return x;
795 }
796 template <index_t Min, index_t Extent, class Dim>
797 NDARRAY_HOST_DEVICE index_t min_of_range(const interval<Min, Extent>& x, const Dim&) {
798  return x.min();
799 }
800 template <index_t Min, index_t Extent, index_t Stride, class Dim>
801 NDARRAY_HOST_DEVICE index_t min_of_range(const dim<Min, Extent, Stride>& x, const Dim&) {
802  return x.min();
803 }
804 template <class Dim>
805 NDARRAY_HOST_DEVICE index_t min_of_range(const decltype(_)&, const Dim& dim) {
806  return dim.min();
807 }
808 
809 template <class Intervals, class Dims, size_t... Is>
810 NDARRAY_HOST_DEVICE auto mins_of_intervals(
811  const Intervals& intervals, const Dims& dims, index_sequence<Is...>) {
812  return std::make_tuple(min_of_range(std::get<Is>(intervals), std::get<Is>(dims))...);
813 }
814 
815 template <class... Dims, size_t... Is>
816 NDARRAY_HOST_DEVICE auto mins(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
817  return std::make_tuple(std::get<Is>(dims).min()...);
818 }
819 
820 template <class... Dims, size_t... Is>
821 NDARRAY_HOST_DEVICE auto extents(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
822  return std::make_tuple(std::get<Is>(dims).extent()...);
823 }
824 
825 template <class... Dims, size_t... Is>
826 NDARRAY_HOST_DEVICE auto strides(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
827  return std::make_tuple(std::get<Is>(dims).stride()...);
828 }
829 
830 template <class... Dims, size_t... Is>
831 NDARRAY_HOST_DEVICE auto maxs(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
832  return std::make_tuple(std::get<Is>(dims).max()...);
833 }
834 
835 // The following series of functions implements the algorithm for
836 // automatically determining what unknown dynamic strides should be.
837 
838 // A proposed stride is "OK" w.r.t. `dim` if the proposed
839 // stride does not intersect the dim.
840 template <class Dim>
841 NDARRAY_HOST_DEVICE bool is_stride_ok(index_t stride, index_t extent, const Dim& dim) {
842  if (is_unresolved(dim.stride())) {
843  // If the dimension has an unknown stride, it's OK, we're
844  // resolving the current dim first.
845  return true;
846  }
847  if (extent == 1 && abs(stride) == abs(dim.stride()) && dim.extent() > 1) {
848  // If a dimension is extent 1, avoid giving this dimension the same stride
849  // as another dimension with extent greater than 1. This doesn't affect the
850  // results of most programs (because the stride only ever multiplied with
851  // zero), but it makes the strides less objectionable to asserts in some
852  // other libraries that make extra assumptions about images, and may be
853  // easier to understand.
854  return false;
855  }
856  if (dim.extent() * abs(dim.stride()) <= stride) {
857  // The dim is completely inside the proposed stride.
858  return true;
859  }
860  index_t flat_extent = extent * stride;
861  if (abs(dim.stride()) >= flat_extent) {
862  // The dim is completely outside the proposed stride.
863  return true;
864  }
865  return false;
866 }
867 
868 // Replace strides that are not OK with values that cannot be the
869 // smallest stride.
870 template <class... Dims>
871 NDARRAY_HOST_DEVICE index_t filter_stride(index_t stride, index_t extent, const Dims&... dims) {
872  if (all(is_stride_ok(stride, extent, dims)...)) {
873  return stride;
874  } else {
875  return std::numeric_limits<index_t>::max();
876  }
877 }
878 
879 // The candidate stride for some other dimension is the minimum stride it
880 // could have without intersecting this dim.
881 template <class Dim>
882 NDARRAY_HOST_DEVICE index_t candidate_stride(const Dim& dim) {
883  if (is_unresolved(dim.stride())) {
884  return std::numeric_limits<index_t>::max();
885  } else {
886  return max<index_t>(1, abs(dim.stride()) * dim.extent());
887  }
888 }
889 
890 // Find the best stride (the smallest) out of all possible candidate strides.
891 template <class Dims, size_t... Is>
892 NDARRAY_HOST_DEVICE index_t find_stride(index_t extent, const Dims& dims, index_sequence<Is...>) {
893  return variadic_min(filter_stride(1, extent, std::get<Is>(dims)...),
894  filter_stride(candidate_stride(std::get<Is>(dims)), extent, std::get<Is>(dims)...)...);
895 }
896 
897 // Replace unknown dynamic strides for each dimension, starting with the first dimension.
898 template <class AllDims>
899 NDARRAY_HOST_DEVICE void resolve_unknown_strides(AllDims& all_dims) {}
900 template <class AllDims, class Dim0, class... Dims>
901 NDARRAY_HOST_DEVICE void resolve_unknown_strides(AllDims& all_dims, Dim0& dim0, Dims&... dims) {
902  if (is_unresolved(dim0.stride())) {
903  constexpr size_t rank = std::tuple_size<AllDims>::value;
904  dim0.set_stride(find_stride(dim0.extent(), all_dims, make_index_sequence<rank>()));
905  }
906  resolve_unknown_strides(all_dims, dims...);
907 }
908 
909 template <class Dims, size_t... Is>
910 NDARRAY_HOST_DEVICE void resolve_unknown_strides(Dims& dims, index_sequence<Is...>) {
911  resolve_unknown_strides(dims, std::get<Is>(dims)...);
912 }
913 
914 template <class Dims, size_t... Is>
915 NDARRAY_HOST_DEVICE bool is_resolved(const Dims& dims, index_sequence<Is...>) {
916  return all(is_resolved(std::get<Is>(dims).stride())...);
917 }
918 
919 // A helper to transform an array to a tuple.
920 template <class T, class Tuple, size_t... Is>
921 NDARRAY_HOST_DEVICE std::array<T, sizeof...(Is)> tuple_to_array(
922  const Tuple& t, index_sequence<Is...>) {
923  return {{std::get<Is>(t)...}};
924 }
925 
926 template <class T, class... Ts>
927 NDARRAY_HOST_DEVICE std::array<T, sizeof...(Ts)> tuple_to_array(const std::tuple<Ts...>& t) {
928  return tuple_to_array<T>(t, make_index_sequence<sizeof...(Ts)>());
929 }
930 
931 template <class T, size_t N, size_t... Is>
932 NDARRAY_HOST_DEVICE auto array_to_tuple(const std::array<T, N>& a, index_sequence<Is...>) {
933  return std::make_tuple(a[Is]...);
934 }
935 template <class T, size_t N>
936 NDARRAY_HOST_DEVICE auto array_to_tuple(const std::array<T, N>& a) {
937  return array_to_tuple(a, make_index_sequence<N>());
938 }
939 
940 template <class T, size_t N>
941 using tuple_of_n = decltype(array_to_tuple(internal::declval<std::array<T, N>>()));
942 
943 // A helper to check if a parameter pack is entirely implicitly convertible to
944 // any type Ts, for use with std::enable_if
945 template <class T, class... Args>
946 struct all_of_any_type : std::false_type {};
947 template <class T>
948 struct all_of_any_type<T> : std::true_type {};
949 template <class... Ts, class Arg, class... Args>
950 struct all_of_any_type<std::tuple<Ts...>, Arg, Args...> {
951  static constexpr bool value = any(std::is_constructible<Ts, Arg>::value...) &&
952  all_of_any_type<std::tuple<Ts...>, Args...>::value;
953 };
954 
955 // Wrapper for checking if a parameter pack is entirely implicitly convertible
956 // to one type T.
957 template <class T, class... Args>
958 using all_of_type = all_of_any_type<std::tuple<T>, Args...>;
959 
960 template <size_t I, class T, class... Us, std::enable_if_t<(I < sizeof...(Us)), int> = 0>
961 NDARRAY_HOST_DEVICE auto convert_dim(const std::tuple<Us...>& u) {
962  return std::get<I>(u);
963 }
964 template <size_t I, class T, class... Us, std::enable_if_t<(I >= sizeof...(Us)), int> = 0>
965 NDARRAY_HOST_DEVICE auto convert_dim(const std::tuple<Us...>& u) {
966  // For dims beyond the rank of U, make a dimension of type T_I with extent 1.
967  return decltype(std::get<I>(internal::declval<T>()))(1);
968 }
969 
970 template <class T, class U, size_t... Is>
971 NDARRAY_HOST_DEVICE T convert_dims(const U& u, internal::index_sequence<Is...>) {
972  return std::make_tuple(convert_dim<Is, T>(u)...);
973 }
974 
975 // Check that the dimensions in src_dims are either copied to the dst shape (not sliced),
976 // or are trivial slices (they are extent 1).
977 template <size_t DstRank, class SrcDims, size_t... Is>
978 NDARRAY_HOST_DEVICE bool is_trivial_slice(
979  const SrcDims& src_dims, internal::index_sequence<Is...>) {
980  return all((Is < DstRank || std::get<Is>(src_dims).extent() == 1)...);
981 }
982 
983 constexpr index_t factorial(index_t x) { return x == 1 ? 1 : x * factorial(x - 1); }
984 
985 // The errors that result from not satisfying this check are probably hell,
986 // but it would be pretty tricky to check that all of [0, Rank) is in `Is...`
987 template <size_t Rank, size_t... Is>
988 using enable_if_permutation = std::enable_if_t<sizeof...(Is) == Rank && all(Is < Rank...) &&
989  product((Is + 2)...) == factorial(Rank + 1)>;
990 
991 template <class DimDst, class DimSrc>
992 NDARRAY_HOST_DEVICE void assert_dim_compatible(size_t dim_index, const DimSrc& src) {
993  bool compatible = true;
994  if (is_static(DimDst::Min) && src.min() != DimDst::Min) {
995  NDARRAY_PRINT_ERR("Error converting dim %zu: expected static min " NDARRAY_INDEX_T_FMT
996  ", got " NDARRAY_INDEX_T_FMT "\n",
997  dim_index, DimDst::Min, src.min());
998  compatible = false;
999  }
1000  if (is_static(DimDst::Extent) && src.extent() != DimDst::Extent) {
1001  NDARRAY_PRINT_ERR("Error converting dim %zu: expected static extent " NDARRAY_INDEX_T_FMT
1002  ", got " NDARRAY_INDEX_T_FMT "\n",
1003  dim_index, DimDst::Extent, src.extent());
1004  compatible = false;
1005  }
1006  if (is_static(DimDst::Stride) && is_resolved(src.stride()) && src.stride() != DimDst::Stride) {
1007  NDARRAY_PRINT_ERR("Error converting dim %zu: expected static stride " NDARRAY_INDEX_T_FMT
1008  ", got " NDARRAY_INDEX_T_FMT "\n",
1009  dim_index, DimDst::Stride, src.stride());
1010  compatible = false;
1011  }
1012  assert(compatible);
1013  (void)compatible;
1014 }
1015 
1016 template <class DimsDst, class DimsSrc, size_t... Is>
1017 NDARRAY_HOST_DEVICE void assert_dims_compatible(const DimsSrc& src, index_sequence<Is...>) {
1018  // This is ugly, in C++17, we'd use a fold expression over the comma operator (f(), ...).
1019  int unused[] = {(assert_dim_compatible<typename std::tuple_element<Is, DimsDst>::type>(
1020  Is, nda::dim<>(std::get<Is>(src))),
1021  0)...};
1022  (void)unused;
1023 }
1024 
1025 template <class DimsDst, class DimsSrc>
1026 NDARRAY_HOST_DEVICE const DimsSrc& assert_dims_compatible(const DimsSrc& src) {
1027 #ifndef NDEBUG
1028  assert_dims_compatible<DimsDst>(src, make_index_sequence<std::tuple_size<DimsDst>::value>());
1029 #endif
1030  return src;
1031 }
1032 
1033 } // namespace internal
1034 
1035 template <class... Dims>
1036 class shape;
1037 
1039 template <class... Dims>
1040 NDARRAY_HOST_DEVICE auto make_shape(Dims... dims) {
1041  return shape<Dims...>(dims...);
1042 }
1043 
1044 template <class... Dims>
1045 NDARRAY_HOST_DEVICE shape<Dims...> make_shape_from_tuple(const std::tuple<Dims...>& dims) {
1046  return shape<Dims...>(dims);
1047 }
1048 
1053 template <size_t Rank>
1054 using index_of_rank = internal::tuple_of_n<index_t, Rank>;
1055 
1063 template <class... Dims>
1064 class shape {
1065 public:
1067  using dims_type = std::tuple<Dims...>;
1068 
1070  static constexpr size_t rank() { return std::tuple_size<dims_type>::value; }
1071 
1073  static constexpr bool is_scalar() { return rank() == 0; }
1074 
1077 
1078  using size_type = size_t;
1079 
1080  // We use this a lot here. Make an alias for it.
1081  using dim_indices = decltype(internal::make_index_sequence<std::tuple_size<dims_type>::value>());
1082 
1083 private:
1084  dims_type dims_;
1085 
1086  // TODO: This should use std::is_constructible<dims_type, std::tuple<OtherDims...>>
1087  // but it is broken on some compilers (https://github.com/dsharlet/array/issues/20).
1088  template <class... OtherDims>
1089  using enable_if_dims_compatible = std::enable_if_t<sizeof...(OtherDims) == rank()>;
1090 
1091  template <class... Args>
1092  using enable_if_same_rank = std::enable_if_t<(sizeof...(Args) == rank())>;
1093 
1094  template <class... Args>
1095  using enable_if_all_indices =
1096  std::enable_if_t<sizeof...(Args) == rank() && internal::all_of_type<index_t, Args...>::value>;
1097 
1098  template <class... Args>
1099  using enable_if_any_slices =
1100  std::enable_if_t<sizeof...(Args) == rank() &&
1101  internal::all_of_any_type<std::tuple<interval<>, dim<>>, Args...>::value &&
1102  !internal::all_of_type<index_t, Args...>::value>;
1103 
1104  template <class... Args>
1105  using enable_if_any_slices_or_indices =
1106  std::enable_if_t<sizeof...(Args) == rank() &&
1107  internal::all_of_any_type<std::tuple<interval<>, dim<>>, Args...>::value>;
1108 
1109  template <size_t Dim>
1110  using enable_if_dim = std::enable_if_t<(Dim < rank())>;
1111 
1112 public:
1113  NDARRAY_HOST_DEVICE shape() {}
1114  // TODO: This is a bit messy, but necessary to avoid ambiguous default
1115  // constructors when Dims is empty.
1116  template <size_t N = sizeof...(Dims), class = std::enable_if_t<(N > 0)>>
1117  NDARRAY_HOST_DEVICE shape(const Dims&... dims)
1118  : dims_(internal::assert_dims_compatible<dims_type>(std::make_tuple(dims...))) {}
1119  NDARRAY_HOST_DEVICE shape(const shape&) = default;
1120  NDARRAY_HOST_DEVICE shape(shape&&) = default;
1121  NDARRAY_HOST_DEVICE shape& operator=(const shape&) = default;
1122  NDARRAY_HOST_DEVICE shape& operator=(shape&&) = default;
1123 
1127  template <class... OtherDims, class = enable_if_dims_compatible<OtherDims...>>
1128  NDARRAY_HOST_DEVICE shape(const std::tuple<OtherDims...>& other)
1129  : dims_(internal::assert_dims_compatible<dims_type>(other)) {}
1130  template <class... OtherDims, class = enable_if_dims_compatible<OtherDims...>>
1131  NDARRAY_HOST_DEVICE shape(OtherDims... other_dims) : shape(std::make_tuple(other_dims...)) {}
1132  template <class... OtherDims, class = enable_if_dims_compatible<OtherDims...>>
1133  NDARRAY_HOST_DEVICE shape(const shape<OtherDims...>& other)
1134  : dims_(internal::assert_dims_compatible<dims_type>(other.dims())) {}
1135  template <class... OtherDims, class = enable_if_dims_compatible<OtherDims...>>
1136  NDARRAY_HOST_DEVICE shape& operator=(const shape<OtherDims...>& other) {
1137  dims_ = internal::assert_dims_compatible<dims_type>(other.dims());
1138  return *this;
1139  }
1140 
1141  // We cannot have an dims_type constructor because it will be
1142  // ambiguous with the Dims... constructor for 1D shapes.
1143 
1154  NDARRAY_HOST_DEVICE void resolve() { internal::resolve_unknown_strides(dims_, dim_indices()); }
1155 
1157  NDARRAY_HOST_DEVICE bool is_resolved() const {
1158  return internal::is_resolved(dims_, dim_indices());
1159  }
1160 
1162  template <class... Args, class = enable_if_any_slices_or_indices<Args...>>
1163  NDARRAY_HOST_DEVICE bool is_in_range(const std::tuple<Args...>& args) const {
1164  return internal::is_in_range(dims_, args, dim_indices());
1165  }
1166  template <class... Args, class = enable_if_any_slices_or_indices<Args...>>
1167  NDARRAY_HOST_DEVICE bool is_in_range(Args... args) const {
1168  return internal::is_in_range(dims_, std::make_tuple(args...), dim_indices());
1169  }
1170 
1172  NDARRAY_HOST_DEVICE index_t operator[](const index_type& indices) const {
1173  return internal::flat_offset(dims_, indices, dim_indices());
1174  }
1175  template <class... Args, class = enable_if_all_indices<Args...>>
1176  NDARRAY_HOST_DEVICE index_t operator()(Args... indices) const {
1177  return internal::flat_offset(dims_, std::make_tuple(indices...), dim_indices());
1178  }
1179 
1183  template <class... Args, class = enable_if_any_slices<Args...>>
1184  NDARRAY_HOST_DEVICE auto operator[](const std::tuple<Args...>& args) const {
1185  auto new_dims = internal::intervals_with_strides(args, dims_, dim_indices());
1186  auto new_dims_no_slices = internal::skip_slices(new_dims, args, dim_indices());
1187  return make_shape_from_tuple(new_dims_no_slices);
1188  }
1189  template <class... Args, class = enable_if_any_slices<Args...>>
1190  NDARRAY_HOST_DEVICE auto operator()(Args... args) const {
1191  return operator[](std::make_tuple(args...));
1192  }
1193 
1195  template <size_t D, class = enable_if_dim<D>>
1196  NDARRAY_HOST_DEVICE auto& dim() {
1197  return std::get<D>(dims_);
1198  }
1199  template <size_t D, class = enable_if_dim<D>>
1200  NDARRAY_HOST_DEVICE const auto& dim() const {
1201  return std::get<D>(dims_);
1202  }
1203 
1207  NDARRAY_HOST_DEVICE const nda::dim<> dim(size_t d) const {
1208  assert(d < rank());
1209  return internal::tuple_to_array<nda::dim<>>(dims_)[d];
1210  }
1211 
1213  NDARRAY_HOST_DEVICE dims_type& dims() { return dims_; }
1214  NDARRAY_HOST_DEVICE const dims_type& dims() const { return dims_; }
1215 
1216  NDARRAY_HOST_DEVICE index_type min() const { return internal::mins(dims_, dim_indices()); }
1217  NDARRAY_HOST_DEVICE index_type max() const { return internal::maxs(dims_, dim_indices()); }
1218  NDARRAY_HOST_DEVICE index_type extent() const { return internal::extents(dims_, dim_indices()); }
1219  NDARRAY_HOST_DEVICE index_type stride() const { return internal::strides(dims_, dim_indices()); }
1220 
1224  NDARRAY_HOST_DEVICE index_t flat_min() const { return internal::flat_min(dims_, dim_indices()); }
1225  NDARRAY_HOST_DEVICE index_t flat_max() const { return internal::flat_max(dims_, dim_indices()); }
1226  NDARRAY_HOST_DEVICE size_type flat_extent() const {
1227  index_t e = flat_max() - flat_min() + 1;
1228  return e < 0 ? 0 : static_cast<size_type>(e);
1229  }
1230 
1232  NDARRAY_HOST_DEVICE size_type size() const {
1233  index_t s = internal::product(extent(), dim_indices());
1234  return s < 0 ? 0 : static_cast<size_type>(s);
1235  }
1236 
1238  NDARRAY_HOST_DEVICE bool empty() const { return size() == 0; }
1239 
1243  NDARRAY_HOST_DEVICE bool is_compact() const { return flat_extent() <= size(); }
1244 
1249  NDARRAY_HOST_DEVICE bool is_one_to_one() const {
1250  // TODO: https://github.com/dsharlet/array/issues/2
1251  return flat_extent() >= size();
1252  }
1253 
1257  template <typename OtherShape>
1258  NDARRAY_HOST_DEVICE bool is_subset_of(const OtherShape& other, index_t offset) const {
1259  // TODO: https://github.com/dsharlet/array/issues/2
1260  return flat_min() >= other.flat_min() + offset && flat_max() <= other.flat_max() + offset;
1261  }
1262 
1265  NDARRAY_HOST_DEVICE auto& i() { return dim<0>(); }
1266  NDARRAY_HOST_DEVICE const auto& i() const { return dim<0>(); }
1267  NDARRAY_HOST_DEVICE auto& j() { return dim<1>(); }
1268  NDARRAY_HOST_DEVICE const auto& j() const { return dim<1>(); }
1269  NDARRAY_HOST_DEVICE auto& k() { return dim<2>(); }
1270  NDARRAY_HOST_DEVICE const auto& k() const { return dim<2>(); }
1271 
1274  NDARRAY_HOST_DEVICE auto& x() { return dim<0>(); }
1275  NDARRAY_HOST_DEVICE const auto& x() const { return dim<0>(); }
1276  NDARRAY_HOST_DEVICE auto& y() { return dim<1>(); }
1277  NDARRAY_HOST_DEVICE const auto& y() const { return dim<1>(); }
1278  NDARRAY_HOST_DEVICE auto& z() { return dim<2>(); }
1279  NDARRAY_HOST_DEVICE const auto& z() const { return dim<2>(); }
1280  NDARRAY_HOST_DEVICE auto& c() { return dim<2>(); }
1281  NDARRAY_HOST_DEVICE const auto& c() const { return dim<2>(); }
1282  NDARRAY_HOST_DEVICE auto& w() { return dim<3>(); }
1283  NDARRAY_HOST_DEVICE const auto& w() const { return dim<3>(); }
1284 
1287  NDARRAY_HOST_DEVICE index_t width() const { return x().extent(); }
1288  NDARRAY_HOST_DEVICE index_t height() const { return y().extent(); }
1289  NDARRAY_HOST_DEVICE index_t channels() const { return c().extent(); }
1290 
1293  NDARRAY_HOST_DEVICE index_t rows() const { return i().extent(); }
1294  NDARRAY_HOST_DEVICE index_t columns() const { return j().extent(); }
1295 
1298  template <class... OtherDims, class = enable_if_same_rank<OtherDims...>>
1299  NDARRAY_HOST_DEVICE bool operator==(const shape<OtherDims...>& other) const {
1300  return dims_ == other.dims();
1301  }
1302  template <class... OtherDims, class = enable_if_same_rank<OtherDims...>>
1303  NDARRAY_HOST_DEVICE bool operator!=(const shape<OtherDims...>& other) const {
1304  return dims_ != other.dims();
1305  }
1306 };
1307 
1308 namespace internal {
1309 
1310 template <size_t... DimIndices, class Shape, size_t... Extras>
1311 NDARRAY_HOST_DEVICE auto transpose_impl(const Shape& shape, index_sequence<Extras...>) {
1312  return make_shape(
1313  shape.template dim<DimIndices>()..., shape.template dim<sizeof...(DimIndices) + Extras>()...);
1314 }
1315 
1316 } // namespace internal
1317 
1330 template <size_t... DimIndices, class... Dims,
1331  class = internal::enable_if_permutation<sizeof...(DimIndices), DimIndices...>>
1332 NDARRAY_HOST_DEVICE auto transpose(const shape<Dims...>& shape) {
1333  return internal::transpose_impl<DimIndices...>(
1334  shape, internal::make_index_sequence<sizeof...(Dims) - sizeof...(DimIndices)>());
1335 }
1336 
1343 template <size_t... DimIndices, class... Dims>
1344 NDARRAY_HOST_DEVICE auto reorder(const shape<Dims...>& shape) {
1345  return make_shape(shape.template dim<DimIndices>()...);
1346 }
1347 
1348 namespace internal {
1349 
1350 template <class Fn, class Idx>
1351 NDARRAY_INLINE NDARRAY_HOST_DEVICE void for_each_index_in_order_impl(Fn&& fn, const Idx& idx) {
1352  fn(idx);
1353 }
1354 
1355 // These multiple dim at a time overloads help reduce pre-optimization
1356 // code size.
1357 template <class Fn, class OuterIdx, class Dim0>
1358 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index_in_order_impl(
1359  Fn&& fn, const OuterIdx& idx, const Dim0& dim0) {
1360  for (index_t i : dim0) {
1361  fn(std::tuple_cat(std::tuple<index_t>(i), idx));
1362  }
1363 }
1364 
1365 template <class Fn, class OuterIdx, class Dim0, class Dim1>
1366 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index_in_order_impl(
1367  Fn&& fn, const OuterIdx& idx, const Dim0& dim0, const Dim1& dim1) {
1368  for (index_t i : dim0) {
1369  for (index_t j : dim1) {
1370  fn(std::tuple_cat(std::tuple<index_t, index_t>(j, i), idx));
1371  }
1372  }
1373 }
1374 
1375 template <class Fn, class OuterIdx, class Dim0, class... Dims>
1376 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index_in_order_impl(
1377  Fn&& fn, const OuterIdx& idx, const Dim0& dim0, const Dims&... dims) {
1378  for (index_t i : dim0) {
1379  for_each_index_in_order_impl(fn, std::tuple_cat(std::tuple<index_t>(i), idx), dims...);
1380  }
1381 }
1382 
1383 template <class Fn, class OuterIdx, class Dim0, class Dim1, class... Dims>
1384 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index_in_order_impl(
1385  Fn&& fn, const OuterIdx& idx, const Dim0& dim0, const Dim1& dim1, const Dims&... dims) {
1386  for (index_t i : dim0) {
1387  for (index_t j : dim1) {
1388  for_each_index_in_order_impl(
1389  fn, std::tuple_cat(std::tuple<index_t, index_t>(j, i), idx), dims...);
1390  }
1391  }
1392 }
1393 
1394 template <class Dims, class Fn, size_t... Is>
1395 NDARRAY_INLINE NDARRAY_HOST_DEVICE void for_each_index_in_order(
1396  Fn&& fn, const Dims& dims, index_sequence<Is...>) {
1397  // We need to reverse the order of the dims so the last dim is
1398  // iterated innermost.
1399  for_each_index_in_order_impl(fn, std::tuple<>(), std::get<sizeof...(Is) - 1 - Is>(dims)...);
1400 }
1401 
1402 template <typename TSrc, typename TDst>
1403 NDARRAY_INLINE NDARRAY_HOST_DEVICE void move_assign(TSrc& src, TDst& dst) {
1404  dst = std::move(src);
1405 }
1406 
1407 template <typename TSrc, typename TDst>
1408 NDARRAY_INLINE NDARRAY_HOST_DEVICE void copy_assign(const TSrc& src, TDst& dst) {
1409  dst = src;
1410 }
1411 
1412 template <size_t D, class Ptr0>
1413 NDARRAY_INLINE NDARRAY_HOST_DEVICE void advance(Ptr0& ptr0) {
1414  std::get<0>(ptr0) += std::get<D>(std::get<1>(ptr0));
1415 }
1416 template <size_t D, class Ptr0, class Ptr1>
1417 NDARRAY_INLINE NDARRAY_HOST_DEVICE void advance(Ptr0& ptr0, Ptr1& ptr1) {
1418  std::get<0>(ptr0) += std::get<D>(std::get<1>(ptr0));
1419  std::get<0>(ptr1) += std::get<D>(std::get<1>(ptr1));
1420 }
1421 // If we ever need other than 1- or 2-way for_each_value, add a variadic
1422 // version of advance.
1423 
1424 template <class Fn, class Ptr0, class... Ptrs>
1425 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_value_in_order_inner_dense(
1426  index_t extent, Fn&& fn, Ptr0 NDARRAY_RESTRICT ptr0, Ptrs NDARRAY_RESTRICT... ptrs) {
1427  Ptr0 end = ptr0 + extent;
1428  while (ptr0 < end) {
1429  fn(*ptr0++, *ptrs++...);
1430  }
1431 }
1432 
1433 template <size_t, class ExtentType, class Fn, class... Ptrs>
1434 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_value_in_order_impl(
1435  std::true_type, const ExtentType& extent, Fn&& fn, Ptrs... ptrs) {
1436  index_t extent_d = std::get<0>(extent);
1437  if (all(std::get<0>(std::get<1>(ptrs)) == 1 ...)) {
1438  for_each_value_in_order_inner_dense(extent_d, fn, std::get<0>(ptrs)...);
1439  } else {
1440  for (index_t i = 0; i < extent_d; i++) {
1441  fn(*std::get<0>(ptrs)...);
1442  advance<0>(ptrs...);
1443  }
1444  }
1445 }
1446 
1447 template <size_t D, class ExtentType, class Fn, class... Ptrs>
1448 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_value_in_order_impl(
1449  std::false_type, const ExtentType& extent, Fn&& fn, Ptrs... ptrs) {
1450  index_t extent_d = std::get<D>(extent);
1451  for (index_t i = 0; i < extent_d; i++) {
1452  using is_inner_loop = std::conditional_t<D == 1, std::true_type, std::false_type>;
1453  for_each_value_in_order_impl<D - 1>(is_inner_loop(), extent, fn, ptrs...);
1454  advance<D>(ptrs...);
1455  }
1456 }
1457 
1458 template <size_t D, class ExtentType, class Fn, class... Ptrs,
1459  std::enable_if_t<(D < std::tuple_size<ExtentType>::value), int> = 0>
1460 NDARRAY_INLINE NDARRAY_HOST_DEVICE void for_each_value_in_order(
1461  const ExtentType& extent, Fn&& fn, Ptrs... ptrs) {
1462  using is_inner_loop = std::conditional_t<D == 0, std::true_type, std::false_type>;
1463  for_each_value_in_order_impl<D>(is_inner_loop(), extent, fn, ptrs...);
1464 }
1465 
1466 // Scalar buffers are a special case. The enable_if here (and above) are a workaround for a bug in
1467 // old versions of GCC that causes this overload to be ambiguous.
1468 template <size_t D, class Fn, class... Ptrs, std::enable_if_t<(D == -1), int> = 0>
1469 NDARRAY_INLINE NDARRAY_HOST_DEVICE void for_each_value_in_order(
1470  const std::tuple<>& extent, Fn&& fn, Ptrs... ptrs) {
1471  fn(*std::get<0>(ptrs)...);
1472 }
1473 
1474 template <size_t Rank>
1475 NDARRAY_HOST_DEVICE auto make_default_dense_shape() {
1476  // The inner dimension is a dense_dim, unless the shape is rank 0.
1477  using inner_dim = std::conditional_t<(Rank > 0), std::tuple<dense_dim<>>, std::tuple<>>;
1478  return make_shape_from_tuple(
1479  std::tuple_cat(inner_dim(), tuple_of_n<dim<>, max<size_t>(1, Rank) - 1>()));
1480 }
1481 
1482 template <index_t CurrentStride>
1483 NDARRAY_HOST_DEVICE std::tuple<> make_compact_dims() {
1484  return std::tuple<>();
1485 }
1486 
1487 template <index_t CurrentStride, index_t Min, index_t Extent, index_t Stride, class... Dims>
1488 NDARRAY_HOST_DEVICE auto make_compact_dims(
1489  const dim<Min, Extent, Stride>& dim0, const Dims&... dims) {
1490 
1491  // If we know the extent of this dimension, we can also provide
1492  // a constant stride for the next dimension.
1493  constexpr index_t NextStride = static_mul(CurrentStride, Extent);
1494  // Give this dimension the current stride if we don't have one already,
1495  // and don't give it a runtime stride. If CurrentStride is static, that
1496  // will be the stride. If not, it will be dynamic, and resolved later.
1497  constexpr index_t NewStride = is_static(Stride) ? Stride : CurrentStride;
1498  return std::tuple_cat(std::make_tuple(dim<Min, Extent, NewStride>(dim0.min(), dim0.extent())),
1499  make_compact_dims<NextStride>(dims...));
1500 }
1501 
1502 template <class Dims, size_t... Is>
1503 NDARRAY_HOST_DEVICE auto make_compact_dims(const Dims& dims, index_sequence<Is...>) {
1504  // Currently, the algorithm is:
1505  // - If any dimension has a static stride greater than one, don't give any
1506  // static strides.
1507  // - If all dimensions are dynamic, use 1 as the next stride.
1508  // - If a dimension has static stride 1, use its Extent as the next stride.
1509  // This is very conservative, but the only thing I can figure out how to
1510  // express as constexpr with C++14 that doesn't risk doing dumb things.
1511  constexpr index_t MinStride =
1512  variadic_max(std::tuple_element<Is, Dims>::type::Stride == 1
1513  ? static_abs(std::tuple_element<Is, Dims>::type::Extent)
1514  : dynamic...);
1515  constexpr bool AnyStrideGreaterThanOne = any((std::tuple_element<Is, Dims>::type::Stride > 1)...);
1516  constexpr bool AllDynamic = all(is_dynamic(std::tuple_element<Is, Dims>::type::Stride)...);
1517  constexpr index_t NextStride = AnyStrideGreaterThanOne ? dynamic : (AllDynamic ? 1 : MinStride);
1518  return make_compact_dims<NextStride>(std::get<Is>(dims)...);
1519 }
1520 
1521 template <index_t Min, index_t Extent, index_t Stride, class DimSrc>
1522 NDARRAY_HOST_DEVICE bool is_dim_compatible(const dim<Min, Extent, Stride>&, const DimSrc& src) {
1523  return (is_dynamic(Min) || src.min() == Min) && (is_dynamic(Extent) || src.extent() == Extent) &&
1524  (is_dynamic(Stride) || src.stride() == Stride);
1525 }
1526 
1527 template <class... DimsDst, class ShapeSrc, size_t... Is>
1528 NDARRAY_HOST_DEVICE bool is_shape_compatible(
1529  const shape<DimsDst...>&, const ShapeSrc& src, index_sequence<Is...>) {
1530  return all(is_dim_compatible(DimsDst(), src.template dim<Is>())...);
1531 }
1532 
1533 template <class DimA, class DimB>
1534 NDARRAY_HOST_DEVICE auto clamp_dims(const DimA& a, const DimB& b) {
1535  constexpr index_t Min = static_max(DimA::Min, DimB::Min);
1536  constexpr index_t Max = static_min(DimA::Max, DimB::Max);
1537  constexpr index_t Extent = static_add(static_sub(Max, Min), 1);
1538  index_t min = internal::max(a.min(), b.min());
1539  index_t max = internal::min(a.max(), b.max());
1540  index_t extent = max - min + 1;
1541  return dim<Min, Extent, DimA::Stride>(min, extent, a.stride());
1542 }
1543 
1544 template <class DimsA, class DimsB, size_t... Is>
1545 NDARRAY_HOST_DEVICE auto clamp(const DimsA& a, const DimsB& b, index_sequence<Is...>) {
1546  return make_shape(clamp_dims(std::get<Is>(a), std::get<Is>(b))...);
1547 }
1548 
1549 // Return where the index I appears in Is...
1550 template <size_t I>
1551 constexpr size_t index_of() {
1552  // This constant is just something that would be absurd to use as a tuple
1553  // index, but has headroom so we can add to it without overflow.
1554  return 10000;
1555 }
1556 template <size_t I, size_t I0, size_t... Is>
1557 constexpr size_t index_of() {
1558  return I == I0 ? 0 : 1 + index_of<I, Is...>();
1559 }
1560 
1561 // Similar to std::get, but returns a one-element tuple if I is
1562 // in bounds, or an empty tuple if not.
1563 template <size_t I, class T, std::enable_if_t<(I < std::tuple_size<T>::value), int> = 0>
1564 NDARRAY_INLINE NDARRAY_HOST_DEVICE auto get_or_empty(const T& t) {
1565  return std::make_tuple(std::get<I>(t));
1566 }
1567 template <size_t I, class T, std::enable_if_t<(I >= std::tuple_size<T>::value), int> = 0>
1568 NDARRAY_INLINE NDARRAY_HOST_DEVICE std::tuple<> get_or_empty(const T& t) {
1569  return std::tuple<>();
1570 }
1571 
1572 // Perform the inverse of a shuffle with indices Is...
1573 template <size_t... Is, class T, size_t... Js>
1574 NDARRAY_HOST_DEVICE auto unshuffle(const T& t, index_sequence<Js...>) {
1575  return std::tuple_cat(get_or_empty<index_of<Js, Is...>()>(t)...);
1576 }
1577 template <size_t... Is, class... Ts>
1578 NDARRAY_HOST_DEVICE auto unshuffle(const std::tuple<Ts...>& t) {
1579  return unshuffle<Is...>(t, make_index_sequence<variadic_max(Is...) + 1>());
1580 }
1581 
1582 template <class ShapeDst, class ShapeSrc>
1583 using enable_if_shapes_compatible =
1584  std::enable_if_t<std::is_constructible<ShapeDst, ShapeSrc>::value>;
1585 
1586 template <class ShapeDst, class ShapeSrc>
1587 using enable_if_shapes_explicitly_compatible =
1588  std::enable_if_t<(ShapeSrc::rank() <= ShapeSrc::rank())>;
1589 
1590 template <class ShapeDst, class ShapeSrc>
1591 using enable_if_shapes_copy_compatible = std::enable_if_t<(ShapeDst::rank() == ShapeSrc::rank())>;
1592 
1593 template <class Alloc>
1594 using enable_if_allocator = decltype(internal::declval<Alloc>().allocate(0));
1595 
1596 } // namespace internal
1597 
1600 template <size_t Rank>
1601 using shape_of_rank = decltype(make_shape_from_tuple(internal::tuple_of_n<dim<>, Rank>()));
1602 
1605 template <size_t Rank>
1606 using dense_shape = decltype(internal::make_default_dense_shape<Rank>());
1607 
1619 template <class Shape>
1620 NDARRAY_HOST_DEVICE auto make_compact(const Shape& s) {
1621  auto static_compact =
1622  make_shape_from_tuple(internal::make_compact_dims(s.dims(), typename Shape::dim_indices()));
1623  static_compact.resolve();
1624  return static_compact;
1625 }
1626 
1628 template <index_t... Extents>
1629 using fixed_dense_shape =
1630  decltype(make_shape_from_tuple(internal::make_compact_dims<1>(dim<0, Extents>()...)));
1631 
1634 template <class ShapeDst, class ShapeSrc,
1635  class = internal::enable_if_shapes_compatible<ShapeSrc, ShapeDst>>
1636 NDARRAY_HOST_DEVICE bool is_compatible(const ShapeSrc& src) {
1637  return internal::is_shape_compatible(ShapeDst(), src, typename ShapeSrc::dim_indices());
1638 }
1639 
1645 // TODO: Consider enabling this kind of conversion implicitly. It is hard to
1646 // do without constructor overload ambiguity problems, and I'm also not sure
1647 // it's a good idea.
1648 template <class ShapeDst, class ShapeSrc,
1649  class = internal::enable_if_shapes_explicitly_compatible<ShapeDst, ShapeSrc>>
1650 NDARRAY_HOST_DEVICE ShapeDst convert_shape(const ShapeSrc& src) {
1651  assert(
1652  internal::is_trivial_slice<ShapeDst::rank()>(src.dims(), typename ShapeSrc::dim_indices()));
1653  return internal::convert_dims<typename ShapeDst::dims_type>(
1654  src.dims(), typename ShapeDst::dim_indices());
1655 }
1656 
1659 template <class ShapeDst, class ShapeSrc,
1660  class = internal::enable_if_shapes_explicitly_compatible<ShapeSrc, ShapeDst>>
1661 NDARRAY_HOST_DEVICE bool is_explicitly_compatible(const ShapeSrc& src) {
1662  return internal::is_shape_compatible(ShapeDst(), src, typename ShapeSrc::dim_indices());
1663 }
1664 
1673 template <class Shape, class Fn,
1674  class = internal::enable_if_callable<Fn, typename Shape::index_type>>
1675 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index_in_order(const Shape& shape, Fn&& fn) {
1676  internal::for_each_index_in_order(fn, shape.dims(), typename Shape::dim_indices());
1677 }
1678 template <class Shape, class Ptr, class Fn,
1679  class = internal::enable_if_callable<Fn, typename std::remove_pointer<Ptr>::type&>>
1680 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_value_in_order(
1681  const Shape& shape, Ptr base, Fn&& fn) {
1682  if (!base) {
1683  assert(shape.empty());
1684  return;
1685  }
1686  // TODO: This is losing compile-time constant extents and strides info
1687  // (https://github.com/dsharlet/array/issues/1).
1688  auto base_and_stride = std::make_pair(base, shape.stride());
1689  internal::for_each_value_in_order<Shape::rank() - 1>(shape.extent(), fn, base_and_stride);
1690 }
1691 
1700 template <class Shape, class ShapeA, class PtrA, class ShapeB, class PtrB, class Fn,
1701  class = internal::enable_if_callable<Fn, typename std::remove_pointer<PtrA>::type&,
1702  typename std::remove_pointer<PtrB>::type&>>
1703 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_value_in_order(const Shape& shape,
1704  const ShapeA& shape_a, PtrA base_a, const ShapeB& shape_b, PtrB base_b, Fn&& fn) {
1705  if (!base_a || !base_b) {
1706  assert(shape.empty());
1707  return;
1708  }
1709  base_a += shape_a[shape.min()];
1710  base_b += shape_b[shape.min()];
1711  // TODO: This is losing compile-time constant extents and strides info
1712  // (https://github.com/dsharlet/array/issues/1).
1713  auto a = std::make_pair(base_a, shape_a.stride());
1714  auto b = std::make_pair(base_b, shape_b.stride());
1715  internal::for_each_value_in_order<Shape::rank() - 1>(shape.extent(), fn, a, b);
1716 }
1717 
1718 namespace internal {
1719 
1720 NDARRAY_INLINE NDARRAY_HOST_DEVICE bool can_fuse(const dim<>& inner, const dim<>& outer) {
1721  return inner.stride() * inner.extent() == outer.stride();
1722 }
1723 
1724 NDARRAY_INLINE NDARRAY_HOST_DEVICE dim<> fuse(const dim<>& inner, const dim<>& outer) {
1725  assert(can_fuse(inner, outer));
1726  return dim<>(
1727  inner.min() + outer.min() * inner.extent(), inner.extent() * outer.extent(), inner.stride());
1728 }
1729 
1730 struct copy_dims {
1731  dim<> src;
1732  dim<> dst;
1733 };
1734 
1735 inline bool operator<(const dim<>& l, const dim<>& r) { return l.stride() < r.stride(); }
1736 
1737 inline bool operator<(const copy_dims& l, const copy_dims& r) {
1738  return l.dst.stride() < r.dst.stride();
1739 }
1740 
1741 // We need a sort that only needs to deal with very small lists,
1742 // and extra complexity here is costly in code size/compile time.
1743 // This is a rare job for bubble sort!
1744 template <class Iterator>
1745 NDARRAY_HOST_DEVICE void bubble_sort(Iterator begin, Iterator end) {
1746  for (Iterator i = begin; i != end; ++i) {
1747  for (Iterator j = i; j != end; ++j) {
1748  if (*j < *i) { std::swap(*i, *j); }
1749  }
1750  }
1751 }
1752 
1753 // Sort the dims such that strides are increasing from dim 0, and contiguous
1754 // dimensions are fused.
1755 template <class Shape>
1756 NDARRAY_HOST_DEVICE shape_of_rank<Shape::rank()> dynamic_optimize_shape(const Shape& shape) {
1757  auto dims = internal::tuple_to_array<dim<>>(shape.dims());
1758 
1759  // Sort the dims by stride.
1760  bubble_sort(dims.begin(), dims.end());
1761 
1762  // Find dimensions that are contiguous and fuse them.
1763  size_t rank = dims.size();
1764  for (size_t i = 0; i + 1 < rank;) {
1765  if (can_fuse(dims[i], dims[i + 1])) {
1766  dims[i] = fuse(dims[i], dims[i + 1]);
1767  for (size_t j = i + 1; j + 1 < rank; j++) {
1768  dims[j] = dims[j + 1];
1769  }
1770  rank--;
1771  } else {
1772  i++;
1773  }
1774  }
1775 
1776  // Unfortunately, we can't make the rank of the resulting shape smaller. Fill
1777  // the end of the array with size 1 dimensions.
1778  for (size_t i = rank; i < dims.size(); i++) {
1779  dims[i] = dim<>(0, 1, 0);
1780  }
1781 
1782  return shape_of_rank<Shape::rank()>(array_to_tuple(dims));
1783 }
1784 
1785 // Optimize a src and dst shape. The dst shape is made dense, and contiguous
1786 // dimensions are fused.
1787 template <class ShapeSrc, class ShapeDst,
1788  class = enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
1789 NDARRAY_HOST_DEVICE auto dynamic_optimize_copy_shapes(const ShapeSrc& src, const ShapeDst& dst) {
1790  constexpr size_t rank = ShapeSrc::rank();
1791  static_assert(rank == ShapeDst::rank(), "copy shapes must have same rank.");
1792  auto src_dims = internal::tuple_to_array<dim<>>(src.dims());
1793  auto dst_dims = internal::tuple_to_array<dim<>>(dst.dims());
1794 
1795  std::array<copy_dims, rank> dims;
1796  for (size_t i = 0; i < rank; i++) {
1797  dims[i] = {src_dims[i], dst_dims[i]};
1798  }
1799 
1800  // Sort the dims by the dst stride.
1801  bubble_sort(dims.begin(), dims.end());
1802 
1803  // Find dimensions that are contiguous and fuse them.
1804  size_t new_rank = dims.size();
1805  for (size_t i = 0; i + 1 < new_rank;) {
1806  if (dims[i].src.extent() == dims[i].dst.extent() && can_fuse(dims[i].src, dims[i + 1].src) &&
1807  can_fuse(dims[i].dst, dims[i + 1].dst)) {
1808  dims[i].src = fuse(dims[i].src, dims[i + 1].src);
1809  dims[i].dst = fuse(dims[i].dst, dims[i + 1].dst);
1810  for (size_t j = i + 1; j + 1 < new_rank; j++) {
1811  dims[j] = dims[j + 1];
1812  }
1813  new_rank--;
1814  } else {
1815  i++;
1816  }
1817  }
1818 
1819  // Unfortunately, we can't make the rank of the resulting shape dynamic. Fill
1820  // the end of the array with size 1 dimensions.
1821  for (size_t i = new_rank; i < dims.size(); i++) {
1822  dims[i] = {dim<>(0, 1, 0), dim<>(0, 1, 0)};
1823  }
1824 
1825  for (size_t i = 0; i < dims.size(); i++) {
1826  src_dims[i] = dims[i].src;
1827  dst_dims[i] = dims[i].dst;
1828  }
1829 
1830  return std::make_pair(
1831  shape_of_rank<rank>(array_to_tuple(src_dims)), shape_of_rank<rank>(array_to_tuple(dst_dims)));
1832 }
1833 
1834 template <class Shape>
1835 NDARRAY_HOST_DEVICE auto optimize_shape(const Shape& shape) {
1836  // In the general case, dynamically optimize the shape.
1837  return dynamic_optimize_shape(shape);
1838 }
1839 
1840 template <class Dim0>
1841 NDARRAY_HOST_DEVICE auto optimize_shape(const shape<Dim0>& shape) {
1842  // Nothing to do for rank 1 shapes.
1843  return shape;
1844 }
1845 
1846 template <class ShapeSrc, class ShapeDst>
1847 NDARRAY_HOST_DEVICE auto optimize_copy_shapes(const ShapeSrc& src, const ShapeDst& dst) {
1848  return dynamic_optimize_copy_shapes(src, dst);
1849 }
1850 
1851 template <class Dim0Src, class Dim0Dst>
1852 NDARRAY_HOST_DEVICE auto optimize_copy_shapes(
1853  const shape<Dim0Src>& src, const shape<Dim0Dst>& dst) {
1854  // Nothing to do for rank 1 shapes.
1855  return std::make_pair(src, dst);
1856 }
1857 
1858 template <class T>
1859 NDARRAY_HOST_DEVICE T* pointer_add(T* x, index_t offset) {
1860  return x != nullptr ? x + offset : x;
1861 }
1862 
1863 } // namespace internal
1864 
1866 template <class Shape>
1868 public:
1869  using shape_type = Shape;
1870 
1873  template <class Fn>
1874  NDARRAY_HOST_DEVICE static void for_each_index(const Shape& shape, Fn&& fn) {
1875  for_each_index_in_order(shape, fn);
1876  }
1877 
1881  template <class Ptr, class Fn>
1882  NDARRAY_HOST_DEVICE static void for_each_value(const Shape& shape, Ptr base, Fn&& fn) {
1883  auto opt_shape = internal::optimize_shape(shape);
1884  for_each_value_in_order(opt_shape, base, fn);
1885  }
1886 };
1887 
1890 template <class ShapeSrc, class ShapeDst>
1892 public:
1893  using src_shape_type = ShapeSrc;
1894  using dst_shape_type = ShapeDst;
1895 
1900  template <class Fn, class TSrc, class TDst>
1901  NDARRAY_HOST_DEVICE static void for_each_value(
1902  const ShapeSrc& shape_src, TSrc src, const ShapeDst& shape_dst, TDst dst, Fn&& fn) {
1903  // For this function, we don't care about the order in which the callback is
1904  // called. Optimize the shapes for memory access order.
1905  auto opt_shape = internal::optimize_copy_shapes(shape_src, shape_dst);
1906  const auto& opt_shape_src = opt_shape.first;
1907  const auto& opt_shape_dst = opt_shape.second;
1908 
1909  for_each_value_in_order(opt_shape_dst, opt_shape_src, src, opt_shape_dst, dst, fn);
1910  }
1911 };
1912 
1929 template <size_t... LoopOrder, class Shape, class Fn,
1930  class = internal::enable_if_callable<Fn, typename Shape::index_type>,
1931  std::enable_if_t<(sizeof...(LoopOrder) == 0), int> = 0>
1932 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index(const Shape& s, Fn&& fn) {
1934 }
1935 template <size_t... LoopOrder, class Shape, class Fn,
1936  class = internal::enable_if_applicable<Fn, typename Shape::index_type>,
1937  std::enable_if_t<(sizeof...(LoopOrder) == 0), int> = 0>
1938 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_all_indices(const Shape& s, Fn&& fn) {
1939  using index_type = typename Shape::index_type;
1940  for_each_index(s, [fn = std::move(fn)](const index_type& i) { internal::apply(fn, i); });
1941 }
1942 template <size_t... LoopOrder, class Shape, class Fn,
1943  class = internal::enable_if_callable<Fn, index_of_rank<sizeof...(LoopOrder)>>,
1944  std::enable_if_t<(sizeof...(LoopOrder) != 0), int> = 0>
1945 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index(const Shape& s, Fn&& fn) {
1946  using index_type = index_of_rank<sizeof...(LoopOrder)>;
1947  for_each_index_in_order(reorder<LoopOrder...>(s),
1948  [fn = std::move(fn)](const index_type& i) { fn(internal::unshuffle<LoopOrder...>(i)); });
1949 }
1950 template <size_t... LoopOrder, class Shape, class Fn,
1951  class = internal::enable_if_callable<Fn, decltype(LoopOrder)...>,
1952  std::enable_if_t<(sizeof...(LoopOrder) != 0), int> = 0>
1953 NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_all_indices(const Shape& s, Fn&& fn) {
1954  using index_type = index_of_rank<sizeof...(LoopOrder)>;
1955  for_each_index_in_order(reorder<LoopOrder...>(s), [fn = std::move(fn)](const index_type& i) {
1956  internal::apply(fn, internal::unshuffle<LoopOrder...>(i));
1957  });
1958 }
1959 
1960 template <class T, class Shape>
1962 template <class T, class Shape, class Alloc>
1963 class array;
1964 
1965 template <class T, class Shape>
1967 
1969 template <class T, class Shape>
1970 NDARRAY_HOST_DEVICE array_ref<T, Shape> make_array_ref(T* base, const Shape& shape) {
1971  return {base, shape};
1972 }
1973 
1974 namespace internal {
1975 
1976 struct no_resolve {};
1977 
1978 template <class T, class Shape>
1979 NDARRAY_HOST_DEVICE array_ref<T, Shape> make_array_ref_no_resolve(T* base, const Shape& shape) {
1980  return {base, shape, no_resolve{}};
1981 }
1982 
1983 template <class T, class Shape, class... Args>
1984 NDARRAY_HOST_DEVICE auto make_array_ref_at(
1985  T base, const Shape& shape, const std::tuple<Args...>& args) {
1986  auto new_shape = shape[args];
1987  auto new_mins = mins_of_intervals(args, shape.dims(), make_index_sequence<sizeof...(Args)>());
1988  auto old_min_offset = shape[new_mins];
1989  return make_array_ref_no_resolve(internal::pointer_add(base, old_min_offset), new_shape);
1990 }
1991 
1992 } // namespace internal
1993 
1997 template <class T, class Shape>
1998 class array_ref {
1999 public:
2001  using value_type = T;
2002  using reference = value_type&;
2003  using const_reference = const value_type&;
2004  using pointer = value_type*;
2006  using shape_type = Shape;
2007  using index_type = typename Shape::index_type;
2009  using size_type = size_t;
2010 
2012  static constexpr size_t rank() { return Shape::rank(); }
2013 
2015  static constexpr bool is_scalar() { return Shape::is_scalar(); }
2016 
2017 private:
2018  template <class OtherShape>
2019  using enable_if_shape_compatible = internal::enable_if_shapes_compatible<Shape, OtherShape>;
2020 
2021  template <class... Args>
2022  using enable_if_all_indices =
2023  std::enable_if_t<sizeof...(Args) == rank() && internal::all_of_type<index_t, Args...>::value>;
2024 
2025  template <class... Args>
2026  using enable_if_any_slices =
2027  std::enable_if_t<sizeof...(Args) == rank() &&
2028  internal::all_of_any_type<std::tuple<interval<>, dim<>>, Args...>::value &&
2029  !internal::all_of_type<index_t, Args...>::value>;
2030 
2031  template <size_t Dim>
2032  using enable_if_dim = std::enable_if_t < Dim<rank()>;
2033 
2034  pointer base_;
2035  Shape shape_;
2036 
2037 public:
2040  NDARRAY_HOST_DEVICE array_ref(pointer base = nullptr, const Shape& shape = Shape())
2041  : base_(base), shape_(shape) {
2042  shape_.resolve();
2043  }
2044 
2045  NDARRAY_HOST_DEVICE array_ref(pointer base, const Shape& shape, internal::no_resolve)
2046  : base_(base), shape_(shape) {}
2047 
2049  NDARRAY_HOST_DEVICE array_ref(const array_ref& other) = default;
2050  NDARRAY_HOST_DEVICE array_ref(array_ref&& other) = default;
2051  NDARRAY_HOST_DEVICE array_ref& operator=(const array_ref& other) = default;
2052  NDARRAY_HOST_DEVICE array_ref& operator=(array_ref&& other) = default;
2053 
2055  template <class OtherShape, class = enable_if_shape_compatible<OtherShape>>
2056  NDARRAY_HOST_DEVICE array_ref(const array_ref<T, OtherShape>& other)
2057  : array_ref(other.base(), other.shape(), internal::no_resolve{}) {}
2058  template <class OtherShape, class = enable_if_shape_compatible<OtherShape>>
2059  NDARRAY_HOST_DEVICE array_ref& operator=(const array_ref<T, OtherShape>& other) {
2060  base_ = other.base();
2061  shape_ = other.shape();
2062  return *this;
2063  }
2064 
2066  NDARRAY_HOST_DEVICE reference operator[](const index_type& indices) const {
2067  return base_[shape_[indices]];
2068  }
2069  template <class... Args, class = enable_if_all_indices<Args...>>
2070  NDARRAY_HOST_DEVICE reference operator()(Args... indices) const {
2071  return base_[shape_(indices...)];
2072  }
2073 
2078  template <class... Args, class = enable_if_any_slices<Args...>>
2079  NDARRAY_HOST_DEVICE auto operator[](const std::tuple<Args...>& args) const {
2080  return internal::make_array_ref_at(base_, shape_, args);
2081  }
2082  template <class... Args, class = enable_if_any_slices<Args...>>
2083  NDARRAY_HOST_DEVICE auto operator()(Args... args) const {
2084  return internal::make_array_ref_at(base_, shape_, std::make_tuple(args...));
2085  }
2086 
2090  template <class Fn, class = internal::enable_if_callable<Fn, reference>>
2091  NDARRAY_HOST_DEVICE void for_each_value(Fn&& fn) const {
2092  shape_traits_type::for_each_value(shape_, base_, fn);
2093  }
2094 
2096  NDARRAY_HOST_DEVICE pointer base() const { return base_; }
2097 
2100  NDARRAY_HOST_DEVICE pointer data() const {
2101  return internal::pointer_add(base_, shape_.flat_min());
2102  }
2103 
2105  NDARRAY_HOST_DEVICE Shape& shape() { return shape_; }
2106  NDARRAY_HOST_DEVICE const Shape& shape() const { return shape_; }
2107 
2108  template <size_t D, class = enable_if_dim<D>>
2109  NDARRAY_HOST_DEVICE auto& dim() {
2110  return shape_.template dim<D>();
2111  }
2112  template <size_t D, class = enable_if_dim<D>>
2113  NDARRAY_HOST_DEVICE const auto& dim() const {
2114  return shape_.template dim<D>();
2115  }
2116  const nda::dim<> dim(size_t d) const { return shape_.dim(d); }
2117  NDARRAY_HOST_DEVICE size_type size() const { return shape_.size(); }
2118  NDARRAY_HOST_DEVICE bool empty() const { return base() != nullptr ? shape_.empty() : true; }
2119  NDARRAY_HOST_DEVICE bool is_compact() const { return shape_.is_compact(); }
2120 
2123  NDARRAY_HOST_DEVICE auto& i() { return shape_.i(); }
2124  NDARRAY_HOST_DEVICE const auto& i() const { return shape_.i(); }
2125  NDARRAY_HOST_DEVICE auto& j() { return shape_.j(); }
2126  NDARRAY_HOST_DEVICE const auto& j() const { return shape_.j(); }
2127  NDARRAY_HOST_DEVICE auto& k() { return shape_.k(); }
2128  NDARRAY_HOST_DEVICE const auto& k() const { return shape_.k(); }
2129 
2132  NDARRAY_HOST_DEVICE auto& x() { return shape_.x(); }
2133  NDARRAY_HOST_DEVICE const auto& x() const { return shape_.x(); }
2134  NDARRAY_HOST_DEVICE auto& y() { return shape_.y(); }
2135  NDARRAY_HOST_DEVICE const auto& y() const { return shape_.y(); }
2136  NDARRAY_HOST_DEVICE auto& z() { return shape_.z(); }
2137  NDARRAY_HOST_DEVICE const auto& z() const { return shape_.z(); }
2138  NDARRAY_HOST_DEVICE auto& c() { return shape_.c(); }
2139  NDARRAY_HOST_DEVICE const auto& c() const { return shape_.c(); }
2140  NDARRAY_HOST_DEVICE auto& w() { return shape_.w(); }
2141  NDARRAY_HOST_DEVICE const auto& w() const { return shape_.w(); }
2142 
2145  NDARRAY_HOST_DEVICE index_t width() const { return shape_.width(); }
2146  NDARRAY_HOST_DEVICE index_t height() const { return shape_.height(); }
2147  NDARRAY_HOST_DEVICE index_t channels() const { return shape_.channels(); }
2148 
2151  NDARRAY_HOST_DEVICE index_t rows() const { return shape_.rows(); }
2152  NDARRAY_HOST_DEVICE index_t columns() const { return shape_.columns(); }
2153 
2157  // TODO: Maybe this should just check for equality of the shape and pointer,
2158  // and let the free function equal serve this purpose.
2159  NDARRAY_HOST_DEVICE bool operator!=(const array_ref& other) const {
2160  if (shape_ != other.shape_) { return true; }
2161 
2162  // TODO: This currently calls operator!= on all elements of the array_ref,
2163  // even after we find a non-equal element
2164  // (https://github.com/dsharlet/array/issues/4).
2165  bool result = false;
2167  shape_, base_, other.shape_, other.base_, [&](const_reference a, const_reference b) {
2168  if (a != b) { result = true; }
2169  });
2170  return result;
2171  }
2172  NDARRAY_HOST_DEVICE bool operator==(const array_ref& other) const { return !operator!=(other); }
2173 
2174  NDARRAY_HOST_DEVICE const array_ref<T, Shape>& ref() const { return *this; }
2175 
2177  NDARRAY_HOST_DEVICE const const_array_ref<T, Shape> cref() const {
2178  return const_array_ref<T, Shape>(base_, shape_, internal::no_resolve{});
2179  }
2180  NDARRAY_HOST_DEVICE operator const_array_ref<T, Shape>() const { return cref(); }
2181 
2183  template <std::size_t R = rank(), typename = std::enable_if_t<R == 0>>
2184  NDARRAY_HOST_DEVICE operator reference() const {
2185  return *base_;
2186  }
2187 
2190  NDARRAY_HOST_DEVICE void set_shape(const Shape& new_shape, index_t offset = 0) {
2191  assert(new_shape.is_resolved());
2192  assert(new_shape.is_subset_of(shape_, -offset));
2193  shape_ = new_shape;
2194  base_ = internal::pointer_add(base_, offset);
2195  }
2196 };
2197 
2199 template <class T, size_t Rank>
2201 template <class T, size_t Rank>
2203 
2205 template <class T, size_t Rank>
2207 template <class T, size_t Rank>
2209 
2211 template <class T, class Shape, class Alloc = std::allocator<T>>
2212 class array {
2213 public:
2215  using allocator_type = Alloc;
2216  using alloc_traits = std::allocator_traits<Alloc>;
2217 
2219  using value_type = T;
2220  using reference = value_type&;
2221  using const_reference = const value_type&;
2222  using pointer = typename alloc_traits::pointer;
2223  using const_pointer = typename alloc_traits::const_pointer;
2224 
2226  using shape_type = Shape;
2227  using index_type = typename Shape::index_type;
2230  using size_type = size_t;
2231 
2233  static constexpr size_t rank() { return Shape::rank(); }
2234 
2236  static constexpr bool is_scalar() { return Shape::is_scalar(); }
2237 
2238 private:
2239  template <class... Args>
2240  using enable_if_all_indices =
2241  std::enable_if_t<sizeof...(Args) == rank() && internal::all_of_type<index_t, Args...>::value>;
2242 
2243  template <class... Args>
2244  using enable_if_any_slices =
2245  std::enable_if_t<sizeof...(Args) == rank() &&
2246  internal::all_of_any_type<std::tuple<interval<>, dim<>>, Args...>::value &&
2247  !internal::all_of_type<index_t, Args...>::value>;
2248 
2249  template <size_t Dim>
2250  using enable_if_dim = std::enable_if_t < Dim<rank()>;
2251 
2252  Alloc alloc_;
2253  pointer buffer_;
2254  size_type buffer_size_;
2255  pointer base_;
2256  Shape shape_;
2257 
2258  // After allocate the array is allocated but uninitialized.
2259  void allocate() {
2260  assert(!buffer_);
2261  shape_.resolve();
2262  size_type flat_extent = shape_.flat_extent();
2263  if (flat_extent > 0) {
2264  buffer_size_ = flat_extent;
2265  buffer_ = alloc_traits::allocate(alloc_, buffer_size_);
2266  }
2267  base_ = buffer_ - shape_.flat_min();
2268  }
2269 
2270  // Call the constructor on all of the elements of the array.
2271  void construct() {
2272  assert(base_ || shape_.empty());
2273  for_each_value([&](T& x) { alloc_traits::construct(alloc_, &x); });
2274  }
2275  void construct(const T& init) {
2276  assert(base_ || shape_.empty());
2277  for_each_value([&](T& x) { alloc_traits::construct(alloc_, &x, init); });
2278  }
2279  void copy_construct(const array& other) {
2280  assert(base_ || shape_.empty());
2281  assert(shape_ == other.shape_);
2282  copy_shape_traits_type::for_each_value(other.shape_, other.base_, shape_, base_,
2283  [&](const_reference src, reference dst) { alloc_traits::construct(alloc_, &dst, src); });
2284  }
2285  void move_construct(array& other) {
2286  assert(base_ || shape_.empty());
2287  assert(shape_ == other.shape_);
2288  copy_shape_traits_type::for_each_value(
2289  other.shape_, other.base_, shape_, base_, [&](reference src, reference dst) {
2290  alloc_traits::construct(alloc_, &dst, std::move(src));
2291  });
2292  }
2293 
2294  // Call the dstructor on every element.
2295  void destroy() {
2296  assert(base_ || shape_.empty());
2297  for_each_value([&](T& x) { alloc_traits::destroy(alloc_, &x); });
2298  }
2299 
2300  void deallocate() {
2301  if (base_) {
2302  destroy();
2303  base_ = nullptr;
2304  shape_ = Shape();
2305  alloc_traits::deallocate(alloc_, buffer_, buffer_size_);
2306  buffer_ = nullptr;
2307  }
2308  }
2309 
2310  static Alloc get_allocator_for_move(array& other, std::true_type) {
2311  return std::move(other.alloc_);
2312  }
2313  static Alloc get_allocator_for_move(array& other, std::false_type) { return Alloc(); }
2314  static Alloc get_allocator_for_move(array& other) {
2315  return get_allocator_for_move(
2316  other, typename alloc_traits::propagate_on_container_move_assignment());
2317  }
2318 
2319  void swap_except_allocator(array& other) {
2320  using std::swap;
2321  swap(buffer_, other.buffer_);
2322  swap(buffer_size_, other.buffer_size_);
2323  swap(base_, other.base_);
2324  swap(shape_, other.shape_);
2325  }
2326 
2327 public:
2331  array() : array(Shape()) {}
2332  explicit array(const Alloc& alloc) : array(Shape(), alloc) {}
2333 
2336  array(const Shape& shape, const T& value, const Alloc& alloc) : array(alloc) {
2337  assign(shape, value);
2338  }
2339  array(const Shape& shape, const T& value) : array() { assign(shape, value); }
2340 
2343  explicit array(const Shape& shape, const Alloc& alloc)
2344  : alloc_(alloc), buffer_(nullptr), buffer_size_(0), base_(nullptr), shape_(shape) {
2345  allocate();
2346  construct();
2347  }
2348  explicit array(const Shape& shape)
2349  : buffer_(nullptr), buffer_size_(0), base_(nullptr), shape_(shape) {
2350  allocate();
2351  construct();
2352  }
2353 
2356  array(const array& other)
2357  : alloc_(alloc_traits::select_on_container_copy_construction(other.get_allocator())),
2358  buffer_(nullptr), buffer_size_(0), base_(nullptr) {
2359  assign(other);
2360  }
2361 
2364  array(const array& other, const Alloc& alloc) : array(alloc) { assign(other); }
2365 
2371  array(array&& other)
2372  : alloc_(get_allocator_for_move(other)), buffer_(nullptr), buffer_size_(0), base_(nullptr) {
2373  using std::swap;
2374  // If we already took other's allocator, it might be in an undefined state.
2375  // Just assume we can swap in that case.
2376  if (typename alloc_traits::propagate_on_container_move_assignment() ||
2377  alloc_ == other.get_allocator()) {
2378  swap_except_allocator(other);
2379  } else {
2380  shape_ = other.shape_;
2381  allocate();
2382  move_construct(other);
2383  }
2384  }
2385 
2391  array(array&& other, const Alloc& alloc)
2392  : alloc_(alloc), buffer_(nullptr), buffer_size_(0), base_(nullptr) {
2393  using std::swap;
2394  if (alloc_ == other.get_allocator()) {
2395  swap_except_allocator(other);
2396  } else {
2397  shape_ = other.shape_;
2398  allocate();
2399  move_construct(other);
2400  }
2401  }
2402 
2403  ~array() { deallocate(); }
2404 
2405  // Let's choose not to provide array(const array_ref&) constructors. This is
2406  // a deep copy that may be unintentional, perhaps it is better to require
2407  // being explicit via `make_copy`.
2408 
2413  array& operator=(const array& other) {
2414  if (base_ == other.base_) {
2415  if (base_) {
2416  assert(shape_ == other.shape_);
2417  } else {
2418  shape_ = other.shape_;
2419  assert(shape_.empty());
2420  }
2421  return *this;
2422  }
2423 
2424  if (alloc_traits::propagate_on_container_copy_assignment::value) {
2425  if (alloc_ != other.get_allocator()) { deallocate(); }
2426  alloc_ = other.get_allocator();
2427  }
2428 
2429  assign(other);
2430  return *this;
2431  }
2432 
2437  array& operator=(array&& other) {
2438  using std::swap;
2439  if (base_ == other.base_) {
2440  if (base_) {
2441  assert(shape_ == other.shape_);
2442  } else {
2443  swap(shape_, other.shape_);
2444  assert(shape_.empty());
2445  }
2446  return *this;
2447  }
2448 
2449  if (std::allocator_traits<allocator_type>::propagate_on_container_move_assignment::value) {
2450  swap(alloc_, other.alloc_);
2451  swap_except_allocator(other);
2452  } else if (alloc_ == other.get_allocator()) {
2453  swap_except_allocator(other);
2454  } else {
2455  assign(std::move(other));
2456  }
2457  return *this;
2458  }
2459 
2463  void assign(const array& other) {
2464  if (base_ == other.base_) {
2465  if (base_) {
2466  assert(shape_ == other.shape_);
2467  } else {
2468  shape_ = other.shape_;
2469  assert(shape_.empty());
2470  }
2471  return;
2472  }
2473  if (base_ && shape_ == other.shape_) {
2474  destroy();
2475  } else {
2476  deallocate();
2477  shape_ = other.shape_;
2478  allocate();
2479  }
2480  copy_construct(other);
2481  }
2482  void assign(array&& other) {
2483  if (base_ == other.base_) {
2484  if (base_) {
2485  assert(shape_ == other.shape_);
2486  } else {
2487  shape_ = other.shape_;
2488  assert(shape_.empty());
2489  }
2490  return;
2491  }
2492  if (base_ && shape_ == other.shape_) {
2493  destroy();
2494  } else {
2495  deallocate();
2496  shape_ = other.shape_;
2497  allocate();
2498  }
2499  move_construct(other);
2500  }
2501 
2504  void assign(Shape shape, const T& value) {
2505  shape.resolve();
2506  if (shape_ == shape) {
2507  destroy();
2508  } else {
2509  deallocate();
2510  shape_ = shape;
2511  allocate();
2512  }
2513  construct(value);
2514  }
2515 
2517  const Alloc& get_allocator() const { return alloc_; }
2518 
2520  reference operator[](const index_type& indices) { return base_[shape_[indices]]; }
2521  const_reference operator[](const index_type& indices) const { return base_[shape_[indices]]; }
2522  template <class... Args, class = enable_if_all_indices<Args...>>
2523  reference operator()(Args... indices) {
2524  return base_[shape_(indices...)];
2525  }
2526  template <class... Args, class = enable_if_all_indices<Args...>>
2527  const_reference operator()(Args... indices) const {
2528  return base_[shape_(indices...)];
2529  }
2530 
2534  template <class... Args, class = enable_if_any_slices<Args...>>
2535  auto operator[](const std::tuple<Args...>& args) {
2536  return internal::make_array_ref_at(base_, shape_, args);
2537  }
2538  template <class... Args, class = enable_if_any_slices<Args...>>
2539  auto operator()(Args... args) {
2540  return internal::make_array_ref_at(base_, shape_, std::make_tuple(args...));
2541  }
2542  template <class... Args, class = enable_if_any_slices<Args...>>
2543  auto operator[](const std::tuple<Args...>& args) const {
2544  return internal::make_array_ref_at(base_, shape_, args);
2545  }
2546  template <class... Args, class = enable_if_any_slices<Args...>>
2547  auto operator()(Args... args) const {
2548  return internal::make_array_ref_at(base_, shape_, std::make_tuple(args...));
2549  }
2550 
2553  template <class Fn, class = internal::enable_if_callable<Fn, reference>>
2554  void for_each_value(Fn&& fn) {
2555  shape_traits_type::for_each_value(shape_, base_, fn);
2556  }
2557  template <class Fn, class = internal::enable_if_callable<Fn, const_reference>>
2558  void for_each_value(Fn&& fn) const {
2559  shape_traits_type::for_each_value(shape_, base_, fn);
2560  }
2561 
2563  pointer base() { return base_; }
2564  const_pointer base() const { return base_; }
2565 
2568  pointer data() { return internal::pointer_add(base_, shape_.flat_min()); }
2569  const_pointer data() const { return internal::pointer_add(base_, shape_.flat_min()); }
2570 
2572  const Shape& shape() const { return shape_; }
2573 
2574  template <size_t D, class = enable_if_dim<D>>
2575  const auto& dim() const {
2576  return shape_.template dim<D>();
2577  }
2578  const nda::dim<> dim(size_t d) const { return shape_.dim(d); }
2579  size_type size() const { return shape_.size(); }
2580  bool empty() const { return shape_.empty(); }
2581  bool is_compact() const { return shape_.is_compact(); }
2582 
2587  void clear() {
2588  deallocate();
2589  shape_ = Shape();
2590  allocate();
2591  construct();
2592  }
2593 
2596  void reshape(Shape new_shape) {
2597  new_shape.resolve();
2598  if (shape_ == new_shape) { return; }
2599 
2600  // Allocate an array with the new shape.
2601  array new_array(new_shape);
2602 
2603  // Move the common elements to the new array.
2604  Shape intersection =
2605  internal::clamp(new_shape.dims(), shape_.dims(), typename Shape::dim_indices());
2606  pointer intersection_base =
2607  internal::pointer_add(new_array.base_, new_shape[intersection.min()]);
2608  copy_shape_traits_type::for_each_value(
2609  shape_, base_, intersection, intersection_base, internal::move_assign<T, T>);
2610 
2611  *this = std::move(new_array);
2612  }
2613 
2618  void set_shape(const Shape& new_shape, index_t offset = 0) {
2619  static_assert(std::is_trivial<value_type>::value, "set_shape is broken for non-trivial types.");
2620  assert(new_shape.is_resolved());
2621  assert(new_shape.is_subset_of(shape_, -offset));
2622  shape_ = new_shape;
2623  base_ = internal::pointer_add(base_, offset);
2624  }
2625 
2628  const auto& i() const { return shape_.i(); }
2629  const auto& j() const { return shape_.j(); }
2630  const auto& k() const { return shape_.k(); }
2631 
2634  const auto& x() const { return shape_.x(); }
2635  const auto& y() const { return shape_.y(); }
2636  const auto& z() const { return shape_.z(); }
2637  const auto& c() const { return shape_.c(); }
2638  const auto& w() const { return shape_.w(); }
2639 
2642  index_t width() const { return shape_.width(); }
2643  index_t height() const { return shape_.height(); }
2644  index_t channels() const { return shape_.channels(); }
2645 
2648  index_t rows() const { return shape_.rows(); }
2649  index_t columns() const { return shape_.columns(); }
2650 
2654  bool operator!=(const array& other) const { return cref() != other.cref(); }
2655  bool operator==(const array& other) const { return cref() == other.cref(); }
2656 
2659  void swap(array& other) {
2660  using std::swap;
2661 
2662  if (alloc_traits::propagate_on_container_swap::value) {
2663  swap(alloc_, other.alloc_);
2664  swap_except_allocator(other);
2665  } else {
2666  // TODO: If the shapes are equal, we could swap each element without the
2667  // temporary allocation.
2668  array temp(std::move(other));
2669  other = std::move(*this);
2670  *this = std::move(temp);
2671  }
2672  }
2673 
2675  array_ref<T, Shape> ref() { return array_ref<T, Shape>(base_, shape_, internal::no_resolve{}); }
2676  const_array_ref<T, Shape> cref() const {
2677  return const_array_ref<T, Shape>(base_, shape_, internal::no_resolve{});
2678  }
2679  const_array_ref<T, Shape> ref() const { return cref(); }
2680  operator array_ref<T, Shape>() { return ref(); }
2681  operator const_array_ref<T, Shape>() const { return cref(); }
2682 
2684  template <std::size_t R = rank(), typename = std::enable_if_t<R == 0>>
2685  operator reference() {
2686  return *base_;
2687  }
2688  template <std::size_t R = rank(), typename = std::enable_if_t<R == 0>>
2689  operator const_reference() const {
2690  return *base_;
2691  }
2692 
2693  // We can't shadow T or Alloc here.
2694  template <typename NewShape, typename T2, typename OldShape, typename Alloc2>
2696  array<T2, OldShape, Alloc2>&& from, const NewShape& new_shape, index_t offset);
2697  template <typename NewShape, typename T2, typename OldShape, typename Alloc2>
2698  friend array<T2, NewShape, Alloc2> move_reinterpret_shape(
2699  array<T2, OldShape, Alloc2>&& from, index_t offset);
2700 };
2701 
2703 template <class T, size_t Rank, class Alloc = std::allocator<T>>
2705 
2707 template <class T, size_t Rank, class Alloc = std::allocator<T>>
2709 
2711 template <class T, class Shape, class Alloc = std::allocator<T>,
2712  class = internal::enable_if_allocator<Alloc>>
2713 auto make_array(const Shape& shape, const Alloc& alloc = Alloc()) {
2714  return array<T, Shape, Alloc>(shape, alloc);
2715 }
2716 template <class T, class Shape, class Alloc = std::allocator<T>,
2717  class = internal::enable_if_allocator<Alloc>>
2718 auto make_array(const Shape& shape, const T& value, const Alloc& alloc = Alloc()) {
2719  return array<T, Shape, Alloc>(shape, value, alloc);
2720 }
2721 
2723 template <class T, class Shape, class Alloc>
2725  a.swap(b);
2726 }
2727 
2731 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst,
2732  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2734  if (dst.shape().empty()) { return; }
2735 
2736  assert(src.shape().is_in_range(dst.shape().min()) && src.shape().is_in_range(dst.shape().max()));
2737 
2739  src.shape(), src.base(), dst.shape(), dst.base(), internal::copy_assign<TSrc, TDst>);
2740 }
2741 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocDst,
2742  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2744  copy(src, dst.ref());
2745 }
2746 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocSrc,
2747  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2748 void copy(const array<TSrc, ShapeSrc, AllocSrc>& src, const array_ref<TDst, ShapeDst>& dst) {
2749  copy(src.cref(), dst);
2750 }
2751 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocSrc, class AllocDst,
2752  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2754  copy(src.cref(), dst.ref());
2755 }
2756 
2758 template <class T, class ShapeSrc,
2759  class Alloc = std::allocator<typename std::remove_const<T>::type>>
2760 auto make_copy(const array_ref<T, ShapeSrc>& src, const Alloc& alloc = Alloc()) {
2761  array<typename std::allocator_traits<Alloc>::value_type, ShapeSrc, Alloc> dst(src.shape(), alloc);
2762  copy(src, dst);
2763  return dst;
2764 }
2765 template <class T, class ShapeSrc, class AllocSrc, class AllocDst = AllocSrc,
2766  class = internal::enable_if_allocator<AllocDst>>
2767 auto make_copy(const array<T, ShapeSrc, AllocSrc>& src, const AllocDst& alloc = AllocDst()) {
2768  return make_copy(src.cref(), alloc);
2769 }
2770 
2772 template <class T, class ShapeSrc, class ShapeDst,
2773  class Alloc = std::allocator<typename std::remove_const<T>::type>,
2774  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2776  const array_ref<T, ShapeSrc>& src, const ShapeDst& shape, const Alloc& alloc = Alloc()) {
2777  array<typename std::allocator_traits<Alloc>::value_type, ShapeDst, Alloc> dst(shape, alloc);
2778  copy(src, dst);
2779  return dst;
2780 }
2781 template <class T, class ShapeSrc, class ShapeDst, class AllocSrc, class AllocDst = AllocSrc,
2782  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2783 auto make_copy(const array<T, ShapeSrc, AllocSrc>& src, const ShapeDst& shape,
2784  const AllocDst& alloc = AllocDst()) {
2785  return make_copy(src.cref(), shape, alloc);
2786 }
2787 
2790 template <class T, class Shape, class Alloc = std::allocator<typename std::remove_const<T>::type>>
2791 auto make_compact_copy(const array_ref<T, Shape>& src, const Alloc& alloc = Alloc()) {
2792  return make_copy(src, make_compact(src.shape()), alloc);
2793 }
2794 template <class T, class Shape, class AllocSrc, class AllocDst = AllocSrc>
2795 auto make_compact_copy(const array<T, Shape, AllocSrc>& src, const AllocDst& alloc = AllocDst()) {
2796  return make_compact_copy(src.cref(), alloc);
2797 }
2798 
2802 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst,
2803  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2805  if (dst.shape().empty()) { return; }
2806 
2807  assert(src.shape().is_in_range(dst.shape().min()) && src.shape().is_in_range(dst.shape().max()));
2808 
2810  src.shape(), src.base(), dst.shape(), dst.base(), internal::move_assign<TSrc, TDst>);
2811 }
2812 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocDst,
2813  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2815  move(src, dst.ref());
2816 }
2817 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocSrc,
2818  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2820  move(src.ref(), dst);
2821 }
2822 template <class TSrc, class TDst, class ShapeSrc, class ShapeDst, class AllocSrc, class AllocDst,
2823  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2825  move(src.ref(), dst.ref());
2826 }
2827 template <class T, class Shape, class Alloc>
2829  dst = std::move(src);
2830 }
2831 
2834 template <class T, class ShapeSrc, class ShapeDst, class Alloc = std::allocator<T>,
2835  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2837  const array_ref<T, ShapeSrc>& src, const ShapeDst& shape, const Alloc& alloc = Alloc()) {
2838  array<typename std::allocator_traits<Alloc>::value_type, ShapeDst, Alloc> dst(shape, alloc);
2839  move(src, dst);
2840  return dst;
2841 }
2842 template <class T, class ShapeSrc, class ShapeDst, class AllocSrc, class AllocDst = AllocSrc,
2843  class = internal::enable_if_shapes_copy_compatible<ShapeDst, ShapeSrc>>
2844 auto make_move(
2845  array<T, ShapeSrc, AllocSrc>& src, const ShapeDst& shape, const AllocDst& alloc = AllocDst()) {
2846  return make_move(src.ref(), shape, alloc);
2847 }
2848 template <class T, class Shape, class Alloc>
2849 auto make_move(array<T, Shape, Alloc>&& src, const Shape& shape, const Alloc& alloc = Alloc()) {
2850  if (src.shape() == shape && alloc == src.get_allocator()) {
2851  return src;
2852  } else {
2853  return make_move(src.ref(), shape, alloc);
2854  }
2855 }
2856 
2859 template <class T, class Shape, class Alloc = std::allocator<T>>
2860 auto make_compact_move(const array_ref<T, Shape>& src, const Alloc& alloc = Alloc()) {
2861  return make_move(src, make_compact(src.shape()), alloc);
2862 }
2863 template <class T, class Shape, class AllocSrc, class AllocDst = AllocSrc>
2864 auto make_compact_move(array<T, Shape, AllocSrc>& src, const AllocDst& alloc = AllocDst()) {
2865  return make_compact_move(src.ref(), alloc);
2866 }
2867 template <class T, class Shape, class Alloc>
2868 auto make_compact_move(array<T, Shape, Alloc>&& src, const Alloc& alloc = Alloc()) {
2869  return make_move(src, make_compact(src.shape()), alloc);
2870 }
2871 
2873 template <class T, class Shape>
2874 NDARRAY_HOST_DEVICE void fill(const array_ref<T, Shape>& dst, const T& value) {
2875  dst.for_each_value([value](T& i) { i = value; });
2876 }
2877 template <class T, class Shape, class Alloc>
2878 void fill(array<T, Shape, Alloc>& dst, const T& value) {
2879  fill(dst.ref(), value);
2880 }
2881 
2885 template <class T, class Shape, class Generator, class = internal::enable_if_callable<Generator>>
2886 NDARRAY_HOST_DEVICE void generate(const array_ref<T, Shape>& dst, Generator&& g) {
2887  dst.for_each_value([g = std::move(g)](T& i) { i = g(); });
2888 }
2889 template <class T, class Shape, class Alloc, class Generator,
2890  class = internal::enable_if_callable<Generator>>
2891 void generate(array<T, Shape, Alloc>& dst, Generator&& g) {
2892  generate(dst.ref(), g);
2893 }
2894 
2898 template <typename T, typename Shape, class Fn>
2899 NDARRAY_HOST_DEVICE void transform_index(const array_ref<T, Shape>& dst, Fn&& fn) {
2900  using index_type = typename Shape::index_type;
2902  dst.shape(), [&, fn = std::move(fn)](const index_type& idx) { dst[idx] = fn(idx); });
2903 }
2904 template <typename T, typename Shape, class Fn>
2905 void transform_index(array<T, Shape>& dst, Fn&& fn) {
2906  transform_index(dst.ref(), fn);
2907 }
2908 
2912 template <typename T, typename Shape, class Fn>
2913 NDARRAY_HOST_DEVICE void transform_indices(const array_ref<T, Shape>& dst, Fn&& fn) {
2914  using index_type = typename Shape::index_type;
2916  dst, [fn = std::move(fn)](const index_type& idx) { return internal::apply(fn, idx); });
2917 }
2918 template <typename T, typename Shape, class Fn>
2919 void transform_indices(array<T, Shape>& dst, Fn&& fn) {
2920  transform_indices(dst.ref(), fn);
2921 }
2922 
2924 template <class TA, class ShapeA, class TB, class ShapeB>
2925 NDARRAY_HOST_DEVICE bool equal(const array_ref<TA, ShapeA>& a, const array_ref<TB, ShapeB>& b) {
2926  if (a.shape().min() != b.shape().min() || a.shape().extent() != b.shape().extent()) {
2927  return false;
2928  }
2929 
2930  bool result = true;
2932  a.shape(), a.base(), b.shape(), b.base(), [&](const TA& a, const TB& b) {
2933  if (a != b) { result = false; }
2934  });
2935  return result;
2936 }
2937 template <class TA, class ShapeA, class TB, class ShapeB, class AllocB>
2938 bool equal(const array_ref<TA, ShapeA>& a, const array<TB, ShapeB, AllocB>& b) {
2939  return equal(a, b.ref());
2940 }
2941 template <class TA, class ShapeA, class AllocA, class TB, class ShapeB>
2942 bool equal(const array<TA, ShapeA, AllocA>& a, const array_ref<TB, ShapeB>& b) {
2943  return equal(a.ref(), b);
2944 }
2945 template <class TA, class ShapeA, class AllocA, class TB, class ShapeB, class AllocB>
2947  return equal(a.ref(), b.ref());
2948 }
2949 
2952 template <class NewShape, class T, class OldShape>
2954  return array_ref<T, NewShape>(a.base(), convert_shape<NewShape>(a.shape()), internal::no_resolve{});
2955 }
2956 template <class NewShape, class T, class OldShape, class Allocator>
2958  return convert_shape<NewShape>(a.ref());
2959 }
2960 template <class NewShape, class T, class OldShape, class Allocator>
2962  return convert_shape<NewShape>(a.cref());
2963 }
2964 
2967 template <class U, class T, class Shape, class = std::enable_if_t<sizeof(T) == sizeof(U)>>
2968 NDARRAY_HOST_DEVICE array_ref<U, Shape> reinterpret(const array_ref<T, Shape>& a) {
2969  return array_ref<U, Shape>(reinterpret_cast<U*>(a.base()), a.shape(), internal::no_resolve{});
2970 }
2971 template <class U, class T, class Shape, class Alloc,
2972  class = std::enable_if_t<sizeof(T) == sizeof(U)>>
2974  return reinterpret<U>(a.ref());
2975 }
2976 template <class U, class T, class Shape, class Alloc,
2977  class = std::enable_if_t<sizeof(T) == sizeof(U)>>
2979  return reinterpret<const U>(a.cref());
2980 }
2981 
2984 template <class U, class T, class Shape>
2986  return array_ref<U, Shape>(const_cast<U*>(a.base()), a.shape(), internal::no_resolve{});
2987 }
2988 
2991 template <class NewShape, class T, class OldShape>
2993  const array_ref<T, OldShape>& a, const NewShape& new_shape, index_t offset = 0) {
2994  array_ref<T, NewShape> result(a.base() + offset, new_shape);
2995  assert(result.shape().is_subset_of(a.shape(), -offset));
2996  return result;
2997 }
2998 template <class NewShape, class T, class OldShape, class Allocator>
3000  array<T, OldShape, Allocator>& a, const NewShape& new_shape, index_t offset = 0) {
3001  return reinterpret_shape(a.ref(), new_shape, offset);
3002 }
3003 template <class NewShape, class T, class OldShape, class Allocator>
3005  const array<T, OldShape, Allocator>& a, const NewShape& new_shape, index_t offset = 0) {
3006  return reinterpret_shape(a.cref(), new_shape, offset);
3007 }
3008 
3013 template <typename NewShape, typename T, typename OldShape, typename Alloc>
3015  array<T, OldShape, Alloc>&& from, const NewShape& new_shape, index_t offset = 0) {
3016  // TODO: Use enable_if to implement this check. It is difficult to do due to
3017  // the friend declaration.
3018  static_assert(
3019  std::is_trivial<T>::value, "move_reinterpret_shape is broken for non-trivial types.");
3020  assert(new_shape.is_subset_of(from.shape(), offset));
3022  assert(result.alloc_ == from.get_allocator());
3023 
3024  using std::swap;
3025  swap(result.buffer_, from.buffer_);
3026  swap(result.buffer_size_, from.buffer_size_);
3027  swap(result.base_, from.base_);
3028  result.shape_ = new_shape;
3029  from.shape_ = OldShape();
3030  result.base_ += offset;
3031 
3032  return result;
3033 }
3034 template <typename NewShape, typename T, typename OldShape, typename Alloc>
3036  array<T, OldShape, Alloc>&& from, index_t offset = 0) {
3037  NewShape new_shape = convert_shape<NewShape>(from.shape());
3038  return move_reinterpret_shape(std::move(from), new_shape, offset);
3039 }
3040 
3044 template <size_t... DimIndices, class T, class OldShape,
3045  class = internal::enable_if_permutation<OldShape::rank(), DimIndices...>>
3046 NDARRAY_HOST_DEVICE auto transpose(const array_ref<T, OldShape>& a) {
3047  return reinterpret_shape(a, transpose<DimIndices...>(a.shape()));
3048 }
3049 template <size_t... DimIndices, class T, class OldShape, class Allocator,
3050  class = internal::enable_if_permutation<OldShape::rank(), DimIndices...>>
3052  return reinterpret_shape(a, transpose<DimIndices...>(a.shape()));
3053 }
3054 template <size_t... DimIndices, class T, class OldShape, class Allocator,
3055  class = internal::enable_if_permutation<OldShape::rank(), DimIndices...>>
3057  return reinterpret_shape(a, transpose<DimIndices...>(a.shape()));
3058 }
3059 template <size_t... DimIndices, class T, class OldShape>
3060 NDARRAY_HOST_DEVICE auto reorder(const array_ref<T, OldShape>& a) {
3061  return reinterpret_shape(a, reorder<DimIndices...>(a.shape()));
3062 }
3063 template <size_t... DimIndices, class T, class OldShape, class Allocator>
3065  return reinterpret_shape(a, reorder<DimIndices...>(a.shape()));
3066 }
3067 template <size_t... DimIndices, class T, class OldShape, class Allocator>
3068 auto reorder(const array<T, OldShape, Allocator>& a) {
3069  return reinterpret_shape(a, reorder<DimIndices...>(a.shape()));
3070 }
3071 
3076 template <class T, size_t N, size_t Alignment = alignof(T), class BaseAlloc = std::allocator<T>>
3078  alignas(Alignment) char buffer[N * sizeof(T)];
3079  bool allocated;
3080  BaseAlloc alloc;
3081 
3082 public:
3083  using value_type = T;
3084 
3085  using propagate_on_container_copy_assignment = std::false_type;
3086  using propagate_on_container_move_assignment = std::false_type;
3087  using propagate_on_container_swap = std::false_type;
3088 
3089  static auto_allocator select_on_container_copy_construction(const auto_allocator&) {
3090  return auto_allocator();
3091  }
3092 
3093  auto_allocator() : allocated(false) {}
3094  template <class U, size_t U_N, size_t U_A, class U_BaseAlloc>
3095  constexpr auto_allocator(const auto_allocator<U, U_N, U_A, U_BaseAlloc>&) noexcept
3096  : allocated(false) {}
3097  // These constructors/assignment operators are workarounds for a C++
3098  // STL implementation not respecting the propagate typedefs or the
3099  // 'select_on_...' function. (https://github.com/dsharlet/array/issues/7)
3100  auto_allocator(const auto_allocator& copy) noexcept : allocated(false), alloc(copy.alloc) {}
3101  auto_allocator(auto_allocator&& move) noexcept : allocated(false), alloc(std::move(move.alloc)) {}
3102  auto_allocator& operator=(const auto_allocator& copy) {
3103  alloc = copy.alloc;
3104  return *this;
3105  }
3106  auto_allocator& operator=(auto_allocator&& move) {
3107  alloc = std::move(move.alloc);
3108  return *this;
3109  }
3110 
3111  value_type* allocate(size_t n) {
3112  if (!allocated && n <= N) {
3113  allocated = true;
3114  return reinterpret_cast<value_type*>(&buffer[0]);
3115  } else {
3116  return std::allocator_traits<BaseAlloc>::allocate(alloc, n);
3117  }
3118  }
3119  void deallocate(value_type* ptr, size_t n) noexcept {
3120  if (ptr == reinterpret_cast<value_type*>(&buffer[0])) {
3121  assert(allocated);
3122  allocated = false;
3123  } else {
3124  std::allocator_traits<BaseAlloc>::deallocate(alloc, ptr, n);
3125  }
3126  }
3127 
3128  template <class U, size_t U_N, size_t U_A>
3129  friend bool operator==(const auto_allocator& a, const auto_allocator<U, U_N, U_A>& b) {
3130  if (a.allocated || b.allocated) {
3131  return &a.buffer[0] == &b.buffer[0];
3132  } else {
3133  return a.alloc == b.alloc;
3134  }
3135  }
3136 
3137  template <class U, size_t U_N, size_t U_A>
3138  friend bool operator!=(const auto_allocator& a, const auto_allocator<U, U_N, U_A>& b) {
3139  return !(a == b);
3140  }
3141 };
3142 
3147 template <class BaseAlloc>
3148 class uninitialized_allocator : public BaseAlloc {
3149 public:
3150  using value_type = typename std::allocator_traits<BaseAlloc>::value_type;
3151 
3152  using propagate_on_container_copy_assignment =
3153  typename std::allocator_traits<BaseAlloc>::propagate_on_container_copy_assignment;
3154  using propagate_on_container_move_assignment =
3155  typename std::allocator_traits<BaseAlloc>::propagate_on_container_move_assignment;
3156  using propagate_on_container_swap =
3157  typename std::allocator_traits<BaseAlloc>::propagate_on_container_swap;
3158  static uninitialized_allocator select_on_container_copy_construction(
3159  const uninitialized_allocator& alloc) {
3160  return std::allocator_traits<BaseAlloc>::select_on_container_copy_construction(alloc);
3161  }
3162 
3163  value_type* allocate(size_t n) { return std::allocator_traits<BaseAlloc>::allocate(*this, n); }
3164  void deallocate(value_type* p, size_t n) noexcept {
3165  return std::allocator_traits<BaseAlloc>::deallocate(*this, p, n);
3166  }
3167 
3168  // TODO: Consider adding an enable_if to this to disable it for
3169  // non-trivial value_types.
3170  template <class... Args>
3171  NDARRAY_INLINE void construct(value_type* ptr, Args&&... args) {
3172  // Skip default construction.
3173  if (sizeof...(Args) > 0) {
3174  std::allocator_traits<BaseAlloc>::construct(*this, ptr, std::forward<Args>(args)...);
3175  }
3176  }
3177 
3178  template <class OtherBaseAlloc>
3179  friend bool operator==(
3181  return static_cast<const BaseAlloc&>(a) == static_cast<const OtherBaseAlloc&>(b);
3182  }
3183  template <class OtherBaseAlloc>
3184  friend bool operator!=(
3186  return static_cast<const BaseAlloc&>(a) != static_cast<const OtherBaseAlloc&>(b);
3187  }
3188 };
3189 
3192 template <class T, class = std::enable_if_t<std::is_trivial<T>::value>>
3194 
3197 template <class T, size_t N, size_t Alignment = sizeof(T),
3198  class = std::enable_if_t<std::is_trivial<T>::value>>
3200 
3201 } // namespace nda
3202 
3203 #endif // NDARRAY_ARRAY_H
NDARRAY_HOST_DEVICE Shape & shape()
Definition: array.h:2105
decltype(make_shape_from_tuple(internal::tuple_of_n< dim<>, Rank >())) shape_of_rank
Definition: array.h:1601
index_iterator(index_t i)
Definition: array.h:188
NDARRAY_HOST_DEVICE bool is_one_to_one() const
Definition: array.h:1249
void swap(array< T, Shape, Alloc > &a, array< T, Shape, Alloc > &b)
Definition: array.h:2724
NDARRAY_INLINE NDARRAY_HOST_DEVICE bool is_in_range(index_t at) const
Definition: array.h:305
NDARRAY_HOST_DEVICE auto operator[](const std::tuple< Args... > &args) const
Definition: array.h:1184
NDARRAY_HOST_DEVICE ShapeDst convert_shape(const ShapeSrc &src)
Definition: array.h:1650
array(const Shape &shape, const Alloc &alloc)
Definition: array.h:2343
const auto & i() const
Definition: array.h:2628
Definition: array.h:3148
constexpr index_t dynamic
Definition: array.h:99
NDARRAY_HOST_DEVICE reference operator[](const index_type &indices) const
Definition: array.h:2066
void copy(const array_ref< TSrc, ShapeSrc > &src, const array_ref< TDst, ShapeDst > &dst)
Definition: array.h:2733
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t flat_offset(index_t at) const
Definition: array.h:480
NDARRAY_HOST_DEVICE internal::split_result< InnerExtent > split(const interval< Min, Extent > &v)
Definition: array.h:614
index_of_rank< rank()> index_type
Definition: array.h:1076
Definition: array.h:1036
pointer data()
Definition: array.h:2568
NDARRAY_HOST_DEVICE array_ref< T, Shape > make_array_ref(T *base, const Shape &shape)
Definition: array.h:1970
static NDARRAY_HOST_DEVICE void for_each_value(const ShapeSrc &shape_src, TSrc src, const ShapeDst &shape_dst, TDst dst, Fn &&fn)
Definition: array.h:1901
NDARRAY_HOST_DEVICE void generate(const array_ref< T, Shape > &dst, Generator &&g)
Definition: array.h:2886
NDARRAY_HOST_DEVICE pointer data() const
Definition: array.h:2100
NDARRAY_HOST_DEVICE index_t flat_min() const
Definition: array.h:1224
auto make_copy(const array_ref< T, ShapeSrc > &src, const Alloc &alloc=Alloc())
Definition: array.h:2760
NDARRAY_HOST_DEVICE const const_array_ref< T, Shape > cref() const
Definition: array.h:2177
NDARRAY_HOST_DEVICE bool operator!=(const array_ref &other) const
Definition: array.h:2159
auto make_array(const Shape &shape, const Alloc &alloc=Alloc())
Definition: array.h:2713
NDARRAY_HOST_DEVICE void for_each_value(Fn &&fn) const
Definition: array.h:2091
static NDARRAY_HOST_DEVICE void for_each_value(const Shape &shape, Ptr base, Fn &&fn)
Definition: array.h:1882
constexpr index_t unresolved
Definition: array.h:103
NDARRAY_HOST_DEVICE bool is_compatible(const ShapeSrc &src)
Definition: array.h:1636
NDARRAY_HOST_DEVICE void resolve()
Definition: array.h:1154
void clear()
Definition: array.h:2587
const interval< 0,-1 > all
Definition: array.h:363
void set_shape(const Shape &new_shape, index_t offset=0)
Definition: array.h:2618
Definition: array.h:1961
void swap(array &other)
Definition: array.h:2659
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t stride() const
Definition: array.h:470
Shape shape_type
Definition: array.h:2226
void assign(const array &other)
Definition: array.h:2463
Definition: array.h:1963
decltype(make_shape_from_tuple(internal::make_compact_dims< 1 >(dim< 0, Extents >()...))) fixed_dense_shape
Definition: array.h:1630
NDARRAY_HOST_DEVICE pointer base() const
Definition: array.h:2096
NDARRAY_INLINE NDARRAY_HOST_DEVICE interval range(index_t begin, index_t end)
Definition: array.h:344
NDARRAY_HOST_DEVICE interval(const interval< CopyMin, CopyExtent > &other)
Definition: array.h:281
NDARRAY_HOST_DEVICE auto & dim()
Definition: array.h:1196
array()
Definition: array.h:2331
void for_each_value(Fn &&fn)
Definition: array.h:2554
NDARRAY_UNIQUE NDARRAY_HOST_DEVICE void for_each_index(const Shape &s, Fn &&fn)
Definition: array.h:1932
NDARRAY_HOST_DEVICE auto operator[](const std::tuple< Args... > &args) const
Definition: array.h:2079
NDARRAY_HOST_DEVICE void transform_indices(const array_ref< T, Shape > &dst, Fn &&fn)
Definition: array.h:2913
NDARRAY_HOST_DEVICE index_t rows() const
Definition: array.h:2151
NDARRAY_HOST_DEVICE auto & x()
Definition: array.h:1274
array(const array &other, const Alloc &alloc)
Definition: array.h:2364
index_t rows() const
Definition: array.h:2648
static constexpr bool is_scalar()
Definition: array.h:1073
static constexpr size_t rank()
Definition: array.h:2012
static constexpr bool is_scalar()
Definition: array.h:2236
Definition: absl.h:10
NDARRAY_HOST_DEVICE auto & i()
Definition: array.h:1265
NDARRAY_HOST_DEVICE auto reorder(const shape< Dims... > &shape)
Definition: array.h:1344
NDARRAY_HOST_DEVICE bool operator==(const shape< OtherDims... > &other) const
Definition: array.h:1299
Definition: array.h:1891
NDARRAY_HOST_DEVICE size_type size() const
Definition: array.h:1232
Definition: array.h:1867
NDARRAY_HOST_DEVICE dim(const dim< CopyMin, CopyExtent, CopyStride > &other)
Definition: array.h:442
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t clamp(index_t x, index_t min, index_t max)
Definition: array.h:376
void move(const array_ref< TSrc, ShapeSrc > &src, const array_ref< TDst, ShapeDst > &dst)
Definition: array.h:2804
std::tuple< Dims... > dims_type
Definition: array.h:1067
NDARRAY_HOST_DEVICE bool empty() const
Definition: array.h:1238
array_ref< U, Shape > reinterpret_const(const const_array_ref< T, Shape > &a)
Definition: array.h:2985
reference operator[](const index_type &indices)
Definition: array.h:2520
NDARRAY_HOST_DEVICE index_t width() const
Definition: array.h:2145
NDARRAY_HOST_DEVICE auto make_shape(Dims...dims)
Definition: array.h:1040
Alloc allocator_type
Definition: array.h:2215
T value_type
Definition: array.h:2219
T value_type
Definition: array.h:2001
NDARRAY_HOST_DEVICE array_ref(const array_ref< T, OtherShape > &other)
Definition: array.h:2056
NDARRAY_HOST_DEVICE bool is_subset_of(const OtherShape &other, index_t offset) const
Definition: array.h:1258
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t max() const
Definition: array.h:301
array< T, NewShape, Alloc > move_reinterpret_shape(array< T, OldShape, Alloc > &&from, const NewShape &new_shape, index_t offset=0)
Definition: array.h:3014
array(const Shape &shape, const T &value, const Alloc &alloc)
Definition: array.h:2336
array(array &&other)
Definition: array.h:2371
NDARRAY_HOST_DEVICE index_iterator begin() const
Definition: array.h:322
NDARRAY_HOST_DEVICE auto & i()
Definition: array.h:2123
NDARRAY_HOST_DEVICE interval(index_t min, index_t extent)
Definition: array.h:265
static constexpr bool is_scalar()
Definition: array.h:2015
internal::tuple_of_n< index_t, Rank > index_of_rank
Definition: array.h:1054
NDARRAY_HOST_DEVICE array_ref< T, NewShape > reinterpret_shape(const array_ref< T, OldShape > &a, const NewShape &new_shape, index_t offset=0)
Definition: array.h:2992
auto make_move(const array_ref< T, ShapeSrc > &src, const ShapeDst &shape, const Alloc &alloc=Alloc())
Definition: array.h:2836
NDARRAY_HOST_DEVICE bool operator==(const dim< OtherMin, OtherExtent, OtherStride > &other) const
Definition: array.h:487
NDARRAY_HOST_DEVICE index_iterator begin(const interval< Min, Extent > &d)
Definition: array.h:367
Shape shape_type
Definition: array.h:2006
NDARRAY_HOST_DEVICE auto & x()
Definition: array.h:2132
NDARRAY_HOST_DEVICE bool is_resolved() const
Definition: array.h:1157
NDARRAY_HOST_DEVICE bool is_compact() const
Definition: array.h:1243
auto make_compact_move(const array_ref< T, Shape > &src, const Alloc &alloc=Alloc())
Definition: array.h:2860
void assign(Shape shape, const T &value)
Definition: array.h:2504
NDARRAY_HOST_DEVICE const nda::dim dim(size_t d) const
Definition: array.h:1207
Definition: array.h:183
const auto & x() const
Definition: array.h:2634
const Alloc & get_allocator() const
Definition: array.h:2517
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t operator*() const
Definition: array.h:191
NDARRAY_HOST_DEVICE auto transpose(const shape< Dims... > &shape)
Definition: array.h:1332
NDARRAY_HOST_DEVICE array_ref< U, Shape > reinterpret(const array_ref< T, Shape > &a)
Definition: array.h:2968
Definition: array.h:3077
std::ptrdiff_t index_t
Definition: array.h:87
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t min() const
Definition: array.h:293
array & operator=(array &&other)
Definition: array.h:2437
auto operator[](const std::tuple< Args... > &args)
Definition: array.h:2535
NDARRAY_HOST_DEVICE bool equal(const array_ref< TA, ShapeA > &a, const array_ref< TB, ShapeB > &b)
Definition: array.h:2925
Definition: array.h:248
NDARRAY_INLINE NDARRAY_HOST_DEVICE bool is_in_range(const interval< OtherMin, OtherExtent > &at) const
Definition: array.h:311
array & operator=(const array &other)
Definition: array.h:2413
Definition: array.h:231
NDARRAY_HOST_DEVICE bool operator==(const interval< OtherMin, OtherExtent > &other) const
Definition: array.h:329
NDARRAY_HOST_DEVICE dims_type & dims()
Definition: array.h:1213
NDARRAY_HOST_DEVICE void set_shape(const Shape &new_shape, index_t offset=0)
Definition: array.h:2190
bool operator!=(const array &other) const
Definition: array.h:2654
NDARRAY_HOST_DEVICE bool is_in_range(const std::tuple< Args... > &args) const
Definition: array.h:1163
index_t width() const
Definition: array.h:2642
array(array &&other, const Alloc &alloc)
Definition: array.h:2391
NDARRAY_HOST_DEVICE index_t operator[](const index_type &indices) const
Definition: array.h:1172
NDARRAY_HOST_DEVICE auto make_compact(const Shape &s)
Definition: array.h:1620
NDARRAY_HOST_DEVICE index_iterator end() const
Definition: array.h:324
static NDARRAY_HOST_DEVICE void for_each_index(const Shape &shape, Fn &&fn)
Definition: array.h:1874
NDARRAY_HOST_DEVICE index_t rows() const
Definition: array.h:1293
array(const array &other)
Definition: array.h:2356
NDARRAY_HOST_DEVICE void fill(const array_ref< T, Shape > &dst, const T &value)
Definition: array.h:2874
static constexpr size_t rank()
Definition: array.h:1070
NDARRAY_HOST_DEVICE index_t width() const
Definition: array.h:1287
NDARRAY_HOST_DEVICE bool is_explicitly_compatible(const ShapeSrc &src)
Definition: array.h:1661
static constexpr size_t rank()
Definition: array.h:2233
const Shape & shape() const
Definition: array.h:2572
void reshape(Shape new_shape)
Definition: array.h:2596
NDARRAY_HOST_DEVICE dim(index_t min, index_t extent, index_t stride=DefaultStride)
Definition: array.h:422
NDARRAY_HOST_DEVICE array_ref(pointer base=nullptr, const Shape &shape=Shape())
Definition: array.h:2040
array_ref< T, Shape > ref()
Definition: array.h:2675
pointer base()
Definition: array.h:2563
NDARRAY_INLINE NDARRAY_HOST_DEVICE index_t extent() const
Definition: array.h:296
decltype(internal::make_default_dense_shape< Rank >()) dense_shape
Definition: array.h:1606
auto make_compact_copy(const array_ref< T, Shape > &src, const Alloc &alloc=Alloc())
Definition: array.h:2791
NDARRAY_HOST_DEVICE void transform_index(const array_ref< T, Shape > &dst, Fn &&fn)
Definition: array.h:2899
NDARRAY_HOST_DEVICE shape(const std::tuple< OtherDims... > &other)
Definition: array.h:1128