Caffe2 - C++ API
A deep learning, cross platform ML framework
benchmark_helper.h
1 
16 #pragma once
17 
18 #include <string>
19 
20 #include "caffe2/core/blob_serialization.h"
21 #include "caffe2/core/init.h"
22 #include "caffe2/core/logging.h"
23 #include "caffe2/core/net.h"
24 #include "caffe2/core/operator.h"
25 #include "caffe2/utils/string_utils.h"
26 #include "c10/util/string_utils.h"
27 
28 using std::map;
29 using std::shared_ptr;
30 using std::string;
31 using std::vector;
32 
33 template <typename ContextType, typename TensorType>
34 void writeTextOutput(
35  TensorType* tensor,
36  const string& output_prefix,
37  const string& name,
38  int index,
39  int num_blobs) {
40  if (index >= num_blobs) {
41  return;
42  }
43  string filename = name;
44  std::replace(filename.begin(), filename.end(), '/', '_');
45  string output_name = output_prefix + "/" + filename + ".txt";
47  caffe2::BlobProto blob_proto;
48 
49  ser.Serialize(
50  *tensor, output_name, blob_proto.mutable_tensor(), 0, tensor->numel());
51  blob_proto.set_name(output_name);
52  blob_proto.set_type("Tensor");
53  CAFFE_ENFORCE(blob_proto.has_tensor());
54  caffe2::TensorProto tensor_proto = blob_proto.tensor();
55  int dims_size = tensor_proto.dims_size();
56  long long elem_dim_size =
57  dims_size > 1 ? tensor_proto.dims(1) : tensor_proto.dims(0);
58  for (int i = 2; i < dims_size; i++) {
59  elem_dim_size *= tensor_proto.dims(i);
60  }
61  std::vector<std::string> lines;
62  std::string dims;
63  for (int i = 0; i < dims_size; i++) {
64  int dim = tensor_proto.dims(i);
65  if (i > 0) {
66  dims += ", ";
67  }
68  dims += c10::to_string(dim);
69  }
70  lines.push_back(dims);
71  std::stringstream line;
72  if (tensor_proto.data_type() == caffe2::TensorProto::FLOAT) {
73  auto start = tensor_proto.float_data().begin();
74  auto end = tensor_proto.float_data().end();
75  copy(start, end, std::ostream_iterator<float>(line, ","));
76  } else if (tensor_proto.data_type() == caffe2::TensorProto::INT32) {
77  auto start = tensor_proto.int32_data().begin();
78  auto end = tensor_proto.int32_data().end();
79  copy(start, end, std::ostream_iterator<int>(line, ","));
80  } else {
81  CAFFE_THROW("Unimplemented Blob type.");
82  }
83  // remove the last ,
84  string str = line.str();
85  if(str.length() != 0) {
86  str.pop_back();
87  }
88  lines.push_back(str);
89 
90  // static casts are workaround for MSVC build
91  auto flags = static_cast<std::ios_base::openmode>(std::ios::out);
92  if (index != 0) {
93  flags |= static_cast<std::ios_base::openmode>(std::ios::app);
94  } else {
95  flags |= static_cast<std::ios_base::openmode>(std::ios::trunc);
96  }
97  std::ofstream output_file(output_name, flags);
98  std::ostream_iterator<std::string> output_iterator(output_file, "\n");
99  std::copy(lines.begin(), lines.end(), output_iterator);
100 }
101 
102 void observerConfig();
103 bool backendCudaSet(const string&);
104 void setDeviceType(caffe2::NetDef*, caffe2::DeviceType&);
105 void setOperatorEngine(caffe2::NetDef*, const string&);
106 int loadInput(
107  shared_ptr<caffe2::Workspace> workspace,
108  const bool run_on_gpu,
109  map<string, caffe2::TensorProtos>& tensor_protos_map,
110  const string& input,
111  const string& input_file,
112  const string& input_dims,
113  const string& input_type);
114 void fillInputBlob(
115  shared_ptr<caffe2::Workspace> workspace,
116  map<string, caffe2::TensorProtos>& tensor_protos_map,
117  int iteration);
118 void writeOutput(
119  shared_ptr<caffe2::Workspace> workspace,
120  const bool run_on_gpu,
121  const string& output,
122  const string& output_folder,
123  const bool text_output,
124  const int index,
125  const int num_blobs);
126 void runNetwork(
127  shared_ptr<caffe2::Workspace> workspace,
128  caffe2::NetDef& net_def,
129  map<string, caffe2::TensorProtos>& tensor_protos_map,
130  const bool wipe_cache,
131  const bool run_individual,
132  const bool run_on_gpu,
133  const bool text_output,
134  const int warmup,
135  const int iter,
136  const int num_blobs,
137  const int sleep_before_run,
138  const int sleep_between_iteration,
139  const int sleep_between_net_and_operator,
140  const std::string& output,
141  const std::string& output_folder);
142 int benchmark(
143  int argc,
144  char* argv[],
145  const string& FLAGS_backend,
146  const string& FLAGS_init_net,
147  const string& FLAGS_input,
148  const string& FLAGS_input_dims,
149  const string& FLAGS_input_file,
150  const string& FLAGS_input_type,
151  int FLAGS_iter,
152  const string& FLAGS_net,
153  const string& FLAGS_output,
154  const string& FLAGS_output_folder,
155  bool FLAGS_run_individual,
156  int FLAGS_sleep_before_run,
157  int FLAGS_sleep_between_iteration,
158  int FLAGS_sleep_between_net_and_operator,
159  bool FLAGS_text_output,
160  int FLAGS_warmup,
161  bool FLAGS_wipe_cache);
TensorSerializer is the serializer for Tensors.
void Serialize(const void *pointer, TypeMeta typeMeta, const string &name, SerializationAcceptor acceptor) override
Serializes a Blob.