@@ -2056,11 +2056,11 @@ class joint_matrix {
2056
2056
const size_t num_elements;
2057
2057
};
2058
2058
2059
- // / Loads 1 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2059
+ // / Loads 1 8x8 b16 matrix from local memory to private memory (32-bits per wi)
2060
2060
// / Requires the sub-group size of kernel calling this function to be 32
2061
2061
// / \tparam [in] T The type of result variable
2062
- // / \param [in] addr The address of the matrix in shared memory
2063
- // / \param [in] m The local memory to store the matrix
2062
+ // / \param [in] addr The address of the matrix in local memory
2063
+ // / \param [in] m The private memory to store the matrix
2064
2064
// / \param [in] item The sycl::nd_item index space class
2065
2065
// / \param [in] trans Indicates whether the matrix to be loaded transposed
2066
2066
// / \param [in] mat The matrix index to be loaded
@@ -2112,12 +2112,12 @@ void ldmatrix(uintptr_t addr, T *m, const ItemT &item, bool trans = false,
2112
2112
}
2113
2113
}
2114
2114
2115
- // / Loads 2 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2115
+ // / Loads 2 8x8 b16 matrix from local memory to private memory (32-bits per wi)
2116
2116
// / Requires the sub-group size of kernel calling this function to be 32
2117
2117
// / \tparam [in] T The type of result variable
2118
- // / \param [in] addr The address of the matrix in shared memory
2119
- // / \param [in] m1 The local memory to store data of 1st matrix
2120
- // / \param [in] m2 The local memory to store data of 2nd matrix
2118
+ // / \param [in] addr The address of the matrix in local memory
2119
+ // / \param [in] m1 The private memory to store data of 1st matrix
2120
+ // / \param [in] m2 The private memory to store data of 2nd matrix
2121
2121
// / \param [in] item The sycl::nd_item index space class
2122
2122
// / \param [in] trans Indicates whether the matrix to be loaded transposed
2123
2123
template <typename T, typename ItemT>
@@ -2129,14 +2129,14 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, const ItemT &item,
2129
2129
ldmatrix (addr, m2, item, trans, 1 );
2130
2130
}
2131
2131
2132
- // / Loads 4 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2132
+ // / Loads 4 8x8 b16 matrix from local memory to private memory (32-bits per wi)
2133
2133
// / Requires the sub-group size of kernel calling this function to be 32
2134
2134
// / \tparam [in] T The type of result variable
2135
- // / \param [in] addr The address of the matrix in shared memory
2136
- // / \param [in] m1 The local memory to store data of 1st matrix
2137
- // / \param [in] m2 The local memory to store data of 2nd matrix
2138
- // / \param [in] m3 The local memory to store data of 3rd matrix
2139
- // / \param [in] m4 The local memory to store data of 4th matrix
2135
+ // / \param [in] addr The address of the matrix in local memory
2136
+ // / \param [in] m1 The private memory to store data of 1st matrix
2137
+ // / \param [in] m2 The private memory to store data of 2nd matrix
2138
+ // / \param [in] m3 The private memory to store data of 3rd matrix
2139
+ // / \param [in] m4 The private memory to store data of 4th matrix
2140
2140
// / \param [in] item The sycl::nd_item index space class
2141
2141
// / \param [in] trans Indicates whether the matrix to be loaded transposed
2142
2142
template <typename T, typename ItemT>
0 commit comments