3 #include "caffe2/core/tensor.h" 4 #include "caffe2/utils/eigen_utils.h" 5 #include "caffe2/utils/math.h" 10 using t_tuple = std::tuple<Tensor, Tensor>;
13 T copy_ctor(
const T& x) {
19 return X.UnsafeSharedInstance();
23 t_tuple copy_ctor(
const t_tuple& X) {
24 return std::make_tuple(copy_ctor(std::get<0>(X)), copy_ctor(std::get<1>(X)));
28 std::pair<t_tuple, t_tuple> copy_ctor(
const std::pair<t_tuple, t_tuple>& X) {
29 return std::make_pair(copy_ctor(X.first), copy_ctor(X.second));
33 std::vector<Tensor> copy_ctor(
const std::vector<Tensor>& X) {
34 std::vector<Tensor> Y(X.size());
35 std::transform(X.begin(), X.end(), Y.begin(), [](
const Tensor& x) {
42 std::vector<t_tuple> copy_ctor(
const std::vector<t_tuple>& X) {
43 std::vector<t_tuple> Y(X.size());
44 std::transform(X.begin(), X.end(), Y.begin(), [](
const t_tuple& x) {
51 std::vector<std::pair<t_tuple, t_tuple>> copy_ctor(
52 const std::vector<std::pair<t_tuple, t_tuple>>& X) {
53 std::vector<std::pair<t_tuple, t_tuple>> Y(X.size());
55 X.begin(), X.end(), Y.begin(), [](
const std::pair<t_tuple, t_tuple>& x) {
63 static std::vector<std::pair<T, T>> pair_vec(
const std::vector<T>& vals) {
67 "Odd number of params or hiddens given to a bidirectional RNN");
68 std::vector<std::pair<T, T>> result;
69 result.reserve(vals.size() / 2);
70 for (int64_t i = 0; i < vals.size(); i += 2) {
71 result.emplace_back(copy_ctor(vals[i]), copy_ctor(vals[i + 1]));
78 static std::vector<T> unpair_vec(std::vector<std::pair<T, T>>&& vals) {
79 std::vector<T> result;
80 result.reserve(vals.size() * 2);
81 for (int64_t i = 0; i < vals.size(); i++) {
82 result.push_back(std::move(vals[i].first));
83 result.push_back(std::move(vals[i].second));
89 const auto canonical_axis = X.canonical_axis_index(1);
90 const auto M = X.size_to_dim(canonical_axis);
91 const auto K = X.size_from_dim(canonical_axis);
92 const auto canonical_axis_w = W.canonical_axis_index(1);
93 const int N = W.size_to_dim(canonical_axis_w);
94 auto output_size = X.sizes().vec();
95 output_size.resize(canonical_axis + 1);
96 output_size[canonical_axis] = N;
98 math::Gemm<float, CPUContext>(
105 X.template data<float>(),
106 W.template data<float>(),
108 C.template mutable_data<float>(),
115 auto output = matmul(X, W, context);
117 const auto canonical_axis = X.canonical_axis_index(1);
118 const auto M = X.size_to_dim(canonical_axis);
119 const auto canonical_axis_w = W.canonical_axis_index(1);
120 const int N = W.size_to_dim(canonical_axis_w);
121 auto bias_multiplier_ = caffe2::empty({
M}, CPU);
122 math::Set<float, CPUContext>(
123 M, 1, bias_multiplier_.template mutable_data<float>(), context);
124 math::Gemm<float, CPUContext>(
131 bias_multiplier_.template data<float>(),
132 B.template data<float>(),
134 output.template mutable_data<float>(),
141 chunk(
const Tensor& input,
int chunks,
int axis, CPUContext* context) {
142 int canonical_axis = input.canonical_axis_index(axis);
144 canonical_axis, input.dim(),
"Axis not in input ndim range.");
145 const int input_channels = input.dim32(canonical_axis);
147 input_channels % chunks,
149 "input channels should be divisible by the number of chunks.");
150 auto split_size = input_channels / chunks;
151 vector<int64_t> output_dims(input.sizes().vec());
152 int before = 1, after = 1;
153 for (
int i = 0; i < canonical_axis; ++i) {
154 before *= input.dim32(i);
156 for (
int i = canonical_axis + 1; i < input.dim(); ++i) {
157 after *= input.dim32(i);
159 size_t input_offset = 0;
160 std::vector<Tensor> outputs;
161 for (
int i = 0; i < chunks; ++i) {
162 auto axis_dim = split_size;
163 output_dims[canonical_axis] = split_size;
164 Tensor output(output_dims, CPU);
165 math::CopyMatrix<CPUContext>(
169 static_cast<const char*
>(input.raw_data()) + input_offset,
170 input.dim32(canonical_axis) * after,
171 output.raw_mutable_data(input.dtype()),
174 input.dtype().copy());
175 input_offset += axis_dim * after * input.itemsize();
176 outputs.push_back(std::move(output));
181 std::vector<Tensor> unbind(
const Tensor& input,
int axis, CPUContext* context) {
184 auto chunks = chunk(input, input.sizes()[axis], axis, context);
186 std::vector<int64_t> newDims = input.sizes().vec();
187 newDims.erase(newDims.begin() + axis);
190 for (
int i = 0; i < chunks.size(); i++) {
192 chunks[i].sizes()[axis], 1,
"Got an unexpected chunk size");
193 chunks[i].Reshape(newDims);
199 cat(
const std::vector<Tensor>& tensorList,
int axis, CPUContext* context) {
201 auto input_zero = copy_ctor(tensorList.at(0));
202 vector<int64_t> outputDims(input_zero.sizes().vec());
203 CAFFE_ENFORCE(outputDims.size() > 0);
204 for (
int i = 1; i < tensorList.size(); i++) {
205 CAFFE_ENFORCE(input_zero.dtype() == tensorList.at(i).dtype());
206 outputDims[axis] += tensorList.at(i).sizes()[axis];
208 auto output_channels = outputDims[axis];
209 Tensor output(outputDims, CPU);
210 int before = 1, after = 1;
211 for (
int i = 0; i < tensorList.at(0).dim(); ++i) {
215 int dim = input_zero.dim32(i);
222 size_t output_offset = 0;
223 for (
const auto& input : tensorList) {
224 auto axis_dim = input.dim32(axis);
225 math::CopyMatrix<CPUContext>(
231 static_cast<char*
>(output.raw_mutable_data(input_zero.dtype())) +
233 output_channels * after,
235 input_zero.dtype().copy());
236 output_offset += axis_dim * after * input.itemsize();
243 stack(
const std::vector<Tensor>& tensorList,
int axis, CPUContext* context) {
245 std::vector<int64_t> newDims(tensorList[0].sizes().vec());
246 std::vector<Tensor> expandedTensorList;
247 newDims.insert(newDims.begin() + axis, 1);
248 for (
int i = 0; i < tensorList.size(); i++) {
249 expandedTensorList.emplace_back(tensorList[i].Clone());
250 expandedTensorList.at(i).Reshape(newDims);
252 return cat(expandedTensorList, axis, context);
258 EigenVectorArrayMap<float>(Y.template mutable_data<float>(), N) = 1.0 /
260 (-ConstEigenVectorArrayMap<float>(X.template data<float>(), N)).exp());
266 math::Tanh<float, CPUContext>(
268 X.template data<float>(),
269 Y.template mutable_data<float>(),
275 Tensor Z(X.sizes().vec(), CPU);
276 math::Add<float, CPUContext>(
278 X.template data<float>(),
279 Y.template data<float>(),
280 Z.template mutable_data<float>(),
286 Tensor Z(X.sizes().vec(), CPU);
287 math::Mul<float, CPUContext>(
289 X.template data<float>(),
290 Y.template data<float>(),
291 Z.template mutable_data<float>(),
296 Tensor transpose(
const Tensor& X,
int dim0,
int dim1, CPUContext* context) {
298 CAFFE_ENFORCE(ndim > dim0 && ndim > dim1,
"Invalid transpose dimensions");
299 std::vector<int> axes(ndim);
300 std::iota(axes.begin(), axes.end(), 0);
301 std::swap(axes[dim0], axes[dim1]);
302 const std::vector<std::int64_t> X_dims = X.sizes().vec();
303 std::vector<std::int64_t> Y_dims(ndim);
304 for (
int i = 0; i < ndim; ++i) {
305 Y_dims[i] = X_dims[axes[i]];
308 math::Transpose<std::int64_t, float, CPUContext>(
312 X.template data<float>(),
313 Y.template mutable_data<float>(),
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...