Caffe2 - Python API
A deep learning, cross platform ML framework
sparse_lengths_sum_benchmark.py
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 import argparse
7 import numpy as np
8 import datetime
9 
10 from caffe2.python import core, workspace
11 
12 DTYPES = {
13  'uint8': np.uint8,
14  'uint8_fused': np.uint8,
15  'float': np.float32,
16  'float16': np.float16,
17 }
18 
19 
20 def benchmark_sparse_lengths_sum(
21  dtype_str,
22  categorical_limit,
23  embedding_size,
24  average_len,
25  batch_size,
26  iterations):
27  print('Preparing lookup table. ' + str(datetime.datetime.now()))
28 
29  # We will use a constant, but non-trivial value so we save initialization
30  # time.
31  data = np.ones([categorical_limit, embedding_size], dtype=np.float32)
32  data *= 17.01
33 
34  if dtype_str == 'uint8':
35  scale_bias = np.random.rand(categorical_limit, 2).astype(np.float32)
36  workspace.FeedBlob("scale_bias", scale_bias.astype(np.float32))
37  elif dtype_str == 'uint8_fused':
38  scale_bias = np.random.randint(255, size=(categorical_limit, 8))
39  data = np.concatenate([data, scale_bias], axis=1)
40 
41  print('Data has shape {} {}'.format(data.shape, datetime.datetime.now()))
42  workspace.FeedBlob("X", data.astype(DTYPES[dtype_str]))
43 
44  # In order to produce truly random lengths and indices, we will embed a
45  # Python operator in the net to generate them.
46  def f(_, outputs):
47  lengths = np.random.randint(
48  int(average_len * 0.75),
49  int(average_len * 1.25),
50  batch_size).astype(np.int32)
51  indices = np.random.randint(
52  0, categorical_limit, np.sum(lengths)).astype(np.int64)
53  outputs[0].feed(indices)
54  outputs[1].feed(lengths)
55 
56  net = core.Net("mynet")
57  net.Python(f)([], ["indices", "lengths", ])
58  if dtype_str == "uint8":
59  net.SparseLengthsSum8BitsRowwise(["X", "indices", "lengths", "scale_bias"], "Y")
60  elif dtype_str == "uint8_fused":
61  net.SparseLengthsSumFused8BitRowwise(["X", "indices", "lengths"], "Y")
62  else:
63  net.SparseLengthsSum(["X", "indices", "lengths"], "Y")
64  workspace.CreateNet(net)
65 
66  # Set random seed, so that repeated runs will keep the same sequence of
67  # random indices.
68  np.random.seed(1701)
69 
70  print('Preparation finished. ' + str(datetime.datetime.now()))
71 
72  workspace.BenchmarkNet(net.Name(), 1, iterations, True)
73 
74 
75 if __name__ == "__main__":
76  parser = argparse.ArgumentParser(
77  description="minimal benchmark for sparse lengths sum.")
78  parser.add_argument(
79  '-d', "--dtype", choices=list(DTYPES.keys()), default="float",
80  help="The data type for the input lookup table.")
81  parser.add_argument(
82  '-e', "--embedding-size", type=int, default=6000000,
83  help="Lookup table size.")
84  parser.add_argument(
85  "--embedding-dim", type=int, default=128,
86  help="Embedding dimension.")
87  parser.add_argument(
88  "--average_len", type=int, default=27,
89  help="Sparse feature average lengths, default is 27")
90  parser.add_argument(
91  "--batch_size", type=int, default=100,
92  help="The batch size.")
93  parser.add_argument(
94  '-i', "--iteration", type=int, default=100000,
95  help="The number of iterations.")
96  args, extra_args = parser.parse_known_args()
97  core.GlobalInit(['python'] + extra_args)
98  benchmark_sparse_lengths_sum(
99  args.dtype,
100  args.embedding_size,
101  args.embedding_dim,
102  args.average_len,
103  args.batch_size,
104  args.iteration)