Caffe2 - Python API
A deep learning, cross platform ML framework
data_parallel_model_utils.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package data_parallel_model_utils
17 # Module caffe2.python.data_parallel_model_utils
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 
22 from future.utils import viewitems, viewkeys, viewvalues
23 
24 import logging
25 
26 from caffe2.python import core
27 from caffe2.python.data_parallel_model import stripBlobName
28 
29 log = logging.getLogger("data_parallel_model_utils")
30 log.setLevel(logging.INFO)
31 
32 
33 def GetActivationBlobs(model):
34  # Hacky way to get activations, think of a better way
35  activations = []
36  first_gpu_prefix = "{}_{}/".format(model._device_prefix, model._devices[0])
37 
38  all_inputs = set()
39  for op in model.net.Proto().op:
40  for inp in op.input:
41  all_inputs.add(inp)
42 
43  params = set(model.GetParams(''))
44 
45  for op in model.net.Proto().op:
46  for b in op.output:
47  if b.startswith(first_gpu_prefix) and not b.endswith("_grad"):
48  if b in all_inputs and b not in params and b + "_grad" in all_inputs:
49  activations.append(stripBlobName(b))
50  return activations
51 
52 
53 def _ShiftActivationDevices(model, activations, from_device, to_device):
54  prefix = "{}_{}/".format(model._device_prefix, from_device)
55  activations = set([prefix + a for a in activations])
56  all_activations = set([prefix + a for a in GetActivationBlobs(model)])
57  ops = list(op for op in model.net.Proto().op if
58  op.device_option.cuda_gpu_id == from_device)
59  device_mapping = {a: to_device for a in activations}
60  device_mapping.update({b: from_device for b in all_activations if
61  b not in activations})
62 
63  # Assign each blob to a device in a label propagation manner. activations
64  # override, and if multiple activations in same op, the output activations
65  # determine.
66  for op in ops:
67  op_device = None
68  for b in list(op.input) + list(op.output):
69  if b in device_mapping:
70  if b in all_activations or op_device is None:
71  op_device = device_mapping[b]
72  if op_device is None:
73  op_device = op.device_option.cuda_gpu_id
74  for b in list(op.input) + list(op.output):
75  if b not in device_mapping and b.startswith(prefix):
76  device_mapping[b] = op_device
77  op.device_option.cuda_gpu_id = op_device
78 
79  # Change param_init_net accordingly
80  for op in model.param_init_net.Proto().op:
81  if op.output[0] in device_mapping:
82  op.device_option.cuda_gpu_id = device_mapping[op.output[0]]
83 
84 
85 def ShiftActivationDevices(model, activations, shifts):
86  '''
87  Function to enable simple model-parallellism for data_parallel_model
88  models. 'shifts' is a dictionary from_gpu -> to_gpu, and activations is
89  a list of activation blobs (wout gpu_x/ prefix -- use GetActivationBlobs()).
90 
91  Operators handling these activations are shifted to the gpu declared in
92  'shifts'. Also related operators such as gradient operators will be moved.
93  Appropriate copy-ops are inserted.
94 
95  This allows shifting memory usage from one gpu to another, enabling bigger
96  models to be trained.
97  '''
98  assert set(viewvalues(shifts)).intersection(set(viewkeys(shifts))) == set()
99  for from_device, to_device in viewitems(shifts):
100  log.info(
101  "Shifting {} activations from {} --> {}".
102  format(len(activations), from_device, to_device)
103  )
104  _ShiftActivationDevices(model, activations, from_device, to_device)
105 
106  param_init_net, blob_to_device = core.InjectCrossDeviceCopies(model.param_init_net)
107  net, _blob_to_device = core.InjectCrossDeviceCopies(model.net, blob_to_device)
108  model.param_init_net = param_init_net
109  model.net = net