Partial specialization for column-major output exchanges problem size and operand.
#include <gemm_batched.h>
|
| using | ElementA = ElementA_ |
| |
| using | LayoutA = LayoutA_ |
| |
| using | TensorRefA = TensorRef< ElementA const, LayoutA > |
| |
| using | ElementB = ElementB_ |
| |
| using | LayoutB = LayoutB_ |
| |
| using | TensorRefB = TensorRef< ElementB const, LayoutB > |
| |
| using | ElementC = ElementC_ |
| |
| using | LayoutC = layout::ColumnMajor |
| |
| using | TensorRefC = TensorRef< ElementC const, LayoutC > |
| |
| using | TensorRefD = TensorRef< ElementC, LayoutC > |
| |
| using | ElementAccumulator = ElementAccumulator_ |
| |
| using | OperatorClass = OperatorClass_ |
| |
| using | ArchTag = ArchTag_ |
| |
| using | ThreadblockShape = ThreadblockShape_ |
| |
| using | WarpShape = WarpShape_ |
| |
| using | InstructionShape = InstructionShape_ |
| |
| using | EpilogueOutputOp = EpilogueOutputOp_ |
| |
| using | ThreadblockSwizzle = ThreadblockSwizzle_ |
| |
| using | UnderlyingOperator = GemmBatched< ElementB, typename layout::LayoutTranspose< LayoutB >::type, ElementA, typename layout::LayoutTranspose< LayoutA >::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, kAlignmentB, kAlignmentA > |
| |
| using | UnderlyingArguments = typename UnderlyingOperator::Arguments |
| |
| using | GemmKernel = typename UnderlyingOperator::GemmKernel |
| |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ArchTag = ArchTag_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementA = ElementA_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementAccumulator = ElementAccumulator_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementB = ElementB_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementC = ElementC_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::EpilogueOutputOp = EpilogueOutputOp_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::GemmKernel = typename UnderlyingOperator::GemmKernel |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::InstructionShape = InstructionShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutA = LayoutA_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutB = LayoutB_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutC = layout::ColumnMajor |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::OperatorClass = OperatorClass_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefA = TensorRef<ElementA const, LayoutA> |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefB = TensorRef<ElementB const, LayoutB> |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefC = TensorRef<ElementC const, LayoutC> |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefD = TensorRef<ElementC, LayoutC> |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ThreadblockShape = ThreadblockShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ThreadblockSwizzle = ThreadblockSwizzle_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::UnderlyingArguments = typename UnderlyingOperator::Arguments |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::UnderlyingOperator = GemmBatched< ElementB, typename layout::LayoutTranspose<LayoutB>::type, ElementA, typename layout::LayoutTranspose<LayoutA>::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, kAlignmentB, kAlignmentA > |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::WarpShape = WarpShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::GemmBatched |
( |
| ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| static Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::can_implement |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| static size_t cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::get_workspace_size |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::initialize |
( |
Arguments const & |
args, |
|
|
void * |
workspace = nullptr, |
|
|
cudaStream_t |
stream = nullptr |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::operator() |
( |
cudaStream_t |
stream = nullptr | ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::operator() |
( |
Arguments const & |
args, |
|
|
void * |
workspace = nullptr, |
|
|
cudaStream_t |
stream = nullptr |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::run |
( |
cudaStream_t |
stream = nullptr | ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| static UnderlyingArguments cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::to_underlying_arguments |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::update |
( |
Arguments const & |
args, |
|
|
void * |
workspace = nullptr |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentA = AlignmentA |
|
static |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentB = AlignmentB |
|
static |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentC = EpilogueOutputOp::kCount |
|
static |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| bool const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kSplitKSerial = false |
|
static |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >
| int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kStages = Stages |
|
static |
The documentation for this class was generated from the following file: