Caffe2 - Python API
A deep learning, cross platform ML framework
lmdb_create_example.py
1 ## @package lmdb_create_example
2 # Module caffe2.python.examples.lmdb_create_example
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import argparse
9 import numpy as np
10 
11 import lmdb
12 from caffe2.proto import caffe2_pb2
13 from caffe2.python import workspace, model_helper
14 
15 '''
16 Simple example to create an lmdb database of random image data and labels.
17 This can be used a skeleton to write your own data import.
18 
19 It also runs a dummy-model with Caffe2 that reads the data and
20 validates the checksum is same.
21 '''
22 
23 
24 def create_db(output_file):
25  print(">>> Write database...")
26  LMDB_MAP_SIZE = 1 << 40 # MODIFY
27  env = lmdb.open(output_file, map_size=LMDB_MAP_SIZE)
28 
29  checksum = 0
30  with env.begin(write=True) as txn:
31  for j in range(0, 128):
32  # MODIFY: add your own data reader / creator
33  label = j % 10
34  width = 64
35  height = 32
36 
37  img_data = np.random.rand(3, width, height)
38  # ...
39 
40  # Create TensorProtos
41  tensor_protos = caffe2_pb2.TensorProtos()
42  img_tensor = tensor_protos.protos.add()
43  img_tensor.dims.extend(img_data.shape)
44  img_tensor.data_type = 1
45 
46  flatten_img = img_data.reshape(np.prod(img_data.shape))
47  img_tensor.float_data.extend(flatten_img)
48 
49  label_tensor = tensor_protos.protos.add()
50  label_tensor.data_type = 2
51  label_tensor.int32_data.append(label)
52  txn.put(
53  '{}'.format(j).encode('ascii'),
54  tensor_protos.SerializeToString()
55  )
56 
57  checksum += np.sum(img_data) * label
58  if (j % 16 == 0):
59  print("Inserted {} rows".format(j))
60 
61  print("Checksum/write: {}".format(int(checksum)))
62  return checksum
63 
64 
65 def read_db_with_caffe2(db_file, expected_checksum):
66  print(">>> Read database...")
67  model = model_helper.ModelHelper(name="lmdbtest")
68  batch_size = 32
69  data, label = model.TensorProtosDBInput(
70  [], ["data", "label"], batch_size=batch_size,
71  db=db_file, db_type="lmdb")
72 
73  checksum = 0
74 
75  workspace.RunNetOnce(model.param_init_net)
76  workspace.CreateNet(model.net)
77 
78  for _ in range(0, 4):
79  workspace.RunNet(model.net.Proto().name)
80 
81  img_datas = workspace.FetchBlob("data")
82  labels = workspace.FetchBlob("label")
83  for j in range(batch_size):
84  checksum += np.sum(img_datas[j, :]) * labels[j]
85 
86  print("Checksum/read: {}".format(int(checksum)))
87  assert np.abs(expected_checksum - checksum < 0.1), \
88  "Read/write checksums dont match"
89 
90 
91 def main():
92  parser = argparse.ArgumentParser(
93  description="Example LMDB creation"
94  )
95  parser.add_argument("--output_file", type=str, default=None,
96  help="Path to write the database to",
97  required=True)
98 
99  args = parser.parse_args()
100  checksum = create_db(args.output_file)
101 
102  # For testing reading:
103  read_db_with_caffe2(args.output_file, checksum)
104 
105 
106 if __name__ == '__main__':
107  main()