|
def | __init__ (self, grad_clip_method, clip_norm_type='l2_norm', clip_threshold=0.1, use_parameter_norm=False, compute_norm_ratio=False, clip_max=1, clip_min=-1, blobs_to_include=None, blobs_to_exclude=None) |
|
def | modify_net (self, net, init_net=None, grad_map=None, blob_to_device=None, modify_output_record=False) |
|
def | __init__ (self) |
|
def | modify_net (self, net, init_net=None, grad_map=None, blob_to_device=None) |
|
def | __call__ (self, net, init_net=None, grad_map=None, blob_to_device=None, modify_output_record=False) |
|
|
| grad_clip_method |
|
| clip_norm_type |
|
| clip_threshold |
|
| use_parameter_norm |
|
| compute_norm_ratio |
|
| clip_max |
|
| clip_min |
|
| blobs_to_include |
|
| blobs_to_exclude |
|
|
string | L1_NORM = 'l1_norm' |
|
string | L2_NORM = 'l2_norm' |
|
string | BY_NORM = 'by_norm' |
|
string | BY_VALUE = 'by_value' |
|
list | GRAD_CLIP_METHODS = [BY_NORM, BY_VALUE] |
|
list | CLIP_GRADIENT_NORM_TYPES = [L2_NORM, L1_NORM] |
|
Definition at line 16 of file gradient_clipping.py.
def caffe2.python.modeling.gradient_clipping.GradientClipping.__init__ |
( |
|
self, |
|
|
|
grad_clip_method, |
|
|
|
clip_norm_type = 'l2_norm' , |
|
|
|
clip_threshold = 0.1 , |
|
|
|
use_parameter_norm = False , |
|
|
|
compute_norm_ratio = False , |
|
|
|
clip_max = 1 , |
|
|
|
clip_min = -1 , |
|
|
|
blobs_to_include = None , |
|
|
|
blobs_to_exclude = None |
|
) |
| |
Clips gradient to avoid gradient magnitude explosion or vanishing gradient.
Args:
grad_clip_method: ways to clip the gradients
clip_norm_type: type of norm used in the necessary computation
clip_threshold: threshold used to determine whether to clip
use_parameter_norm: a boolean to indicate whether to incorporate
the norm of the parameter
compute_norm_ratio: a boolean to compute the ratio between gradient norm
and parameter norm explicitly for debugging purpose
clip_max: when clipping by_value, any value that is greater than
clip_max will be clipped to clip_max
clip_min: when clipping by_value, any value that is smaller than
clip_min will be clipped to clip_min
blobs_to_include: names of blobs whose gradient is to be clipped. If it is set
to none, all param 's gradient in grad_map will be clipped.
blobs_to_exclude: names of blobs whose gradient is not to be clipped.
Definition at line 30 of file gradient_clipping.py.
The documentation for this class was generated from the following file: