![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Template for reading and writing tiles of accumulators to shared memory.
#include <tile_iterator_volta_tensor_op.h>
Classes | |
| struct | Detail |
Public Types | |
| using | WarpShape = WarpShape_ |
| using | InterleavedTileShape = gemm::GemmShape< 32, 32, 4 > |
| using | Element = half_t |
| using | Layout = layout::RowMajor |
| using | TensorRef = TensorRef< Element, Layout > |
| Tensor Reference object. More... | |
| using | TensorCoord = MatrixCoord |
| Logical coordinate in referenced tensor. More... | |
| using | Index = typename TensorRef::Index |
| using | LongIndex = typename TensorRef::LongIndex |
| using | Policy = VoltaTensorOpPolicy< WarpShape, InterleavedTileShape, Element, Layout > |
| using | Shape = MatrixShape< Policy::kRowsPerIteration, WarpShape::kN > |
| Shape of the tile in memory. More... | |
| using | AccessType = typename Policy::AccessType |
| Array type for aligned memory accesses. More... | |
| using | Fragment = typename Policy::Fragment |
| This is the fragment size produced by one access of the iterator. More... | |
| using | AccumulatorTile = typename Policy::AccumulatorTile |
| This is the complete warp-level accumulator tile. More... | |
| using | Padding = MatrixShape< 0, Policy::kElementsPerAccess > |
| Padding quantity. More... | |
Public Member Functions | |
| CUTLASS_HOST_DEVICE | TileIteratorVoltaTensorOp () |
| Default constructor. More... | |
| CUTLASS_DEVICE | TileIteratorVoltaTensorOp (TensorRef const &ref, unsigned lane_id) |
| Constructor from TensorRef. More... | |
| CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | add_pointer_offset (Index pointer_offset) |
| Adds a pointer offset. More... | |
| CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | add_tile_offset (TensorCoord const &tile_offset) |
| advances in units of whole tiles along the logical coordinate space of the tensor More... | |
| CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | operator+= (TensorCoord const &tile_offset) |
| CUTLASS_DEVICE void | store_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
| Store. More... | |
| CUTLASS_HOST_DEVICE void | store (Fragment const &frag) |
| Store. More... | |
| CUTLASS_HOST_DEVICE void | load_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
| Load. More... | |
| CUTLASS_HOST_DEVICE void | load (Fragment const &frag) |
| Load. More... | |
Static Public Attributes | |
| static int const | kIterations = Policy::kIterations |
| Number of times this iterator can be incremented. More... | |
| static int const | kElementsPerAccess = Policy::kElementsPerAccess |
| Number of elements per access. More... | |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::AccessType = typename Policy::AccessType |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::AccumulatorTile = typename Policy::AccumulatorTile |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Element = half_t |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Fragment = typename Policy::Fragment |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Index = typename TensorRef::Index |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::InterleavedTileShape = gemm::GemmShape<32, 32, 4> |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Layout = layout::RowMajor |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::LongIndex = typename TensorRef::LongIndex |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Padding = MatrixShape< 0, Policy::kElementsPerAccess> |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Policy = VoltaTensorOpPolicy<WarpShape, InterleavedTileShape, Element, Layout> |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Shape = MatrixShape< Policy::kRowsPerIteration, WarpShape::kN > |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::TensorCoord = MatrixCoord |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::TensorRef = TensorRef<Element, Layout> |
| using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::WarpShape = WarpShape_ |
|
inline |
|
inline |
|
inline |
advances in units of whole tiles along the logical coordinate space of the tensor
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
|
static |
1.8.11