Caffe2 - C++ API
A deep learning, cross platform ML framework
Namespaces | Data Structures | Typedefs | Enumerations | Functions | Variables
caffe2 Namespace Reference

A global dictionary that holds information about what Caffe2 modules have been loaded in the current runtime, and also utility functions to load modules. More...

Namespaces

 enforce_detail
 Rich logging messages.
 

Data Structures

struct  AbsCPUFunctor
 
struct  AbsGradientCPUFunctor
 
struct  AbstractLengthsDef
 
class  AbstractLengthsGradientOp
 
class  AbstractLengthsOp
 Segment reduction op with optional fused embedding lookup. More...
 
class  AbstractLengthsWithMainInputAndForwardOutputGradientOp
 
class  AbstractLengthsWithMainInputGradientOp
 
struct  AbstractReduceBackDef
 
struct  AbstractReduceFrontDef
 
class  AbstractReduceFrontOrBackGradientOp
 
class  AbstractReduceFrontOrBackOp
 Simple non-segmented reduction over the first few dimensions of the tensor. More...
 
struct  AbstractSortedSegmentDef
 
class  AbstractSortedSegmentGradientOp
 
class  AbstractSortedSegmentOp
 Segment reduction op with optional fused embedding lookup. More...
 
struct  AbstractSortedSegmentRangeDef
 
class  AbstractSortedSegmentRangeGradientOp
 
class  AbstractSortedSegmentRangeOp
 Base implementation for segment reduction op that leverages continuity of the data. More...
 
struct  AbstractSparseLengthsDef
 
struct  AbstractSparseSortedSegmentDef
 
struct  AbstractSparseUnsortedSegmentDef
 
struct  AbstractUnsortedSegmentDef
 
class  AbstractUnsortedSegmentGradientOp
 
class  AbstractUnsortedSegmentOp
 Unsorted segment reduction op with optional fused embedding lookup. More...
 
class  AccumulateHistogramOp
 
class  AccumulateInputGradientOp
 
class  AccumulateOp
 
class  AccuracyOp
 
class  AdagradOp
 
class  AdamOp
 
class  AddPaddingOp
 
class  AffineChannelGradientOp
 
class  AffineChannelOp
 
class  AlgorithmsCache
 
class  AliasOp
 Alias op makes the output and the input share the same underlying storage. More...
 
struct  AlignedDeleter
 
struct  AllocAligned
 
class  AlternateLearningRate
 
struct  Analysis
 
class  APMeterOp
 
class  ArgMaxOp
 
class  ArgMinOp
 
class  ArgOpBase
 
class  ArgumentHelper
 A helper class to index into arguments. More...
 
class  AssertOp
 
class  AsyncDAGNet
 
class  AsyncNetBase
 
class  AsyncPollingNet
 
class  AsyncSchedulingNet
 
class  AsyncSimpleNet
 
class  AtomicIterOp
 
class  AveragedLoss
 
class  AveragedLossGradient
 
class  AveragePool
 
class  AvgExportedStat
 
class  BaseInputAccessor
 
class  BaseReducer
 
class  BaseReducerGradient
 
class  BatchBoxCoxOp
 
class  BatchBucketOneHotOp
 
class  BatchDenseToSparseOp
 
class  BatchGatherGradientOp
 
class  BatchGatherOp
 
class  BatchMatMulOp
 
class  BatchOneHotOp
 
class  BatchPermutationGradientOp
 
class  BatchPermutationOp
 
class  BatchSparseToDenseOp
 
class  BatchToSpaceOp
 
class  BBoxTransformOp
 
class  BernoulliJSDGradientOp
 
class  BernoulliJSDOp
 
class  BinaryElementwiseOp
 Performs a binary operation (e.g. More...
 
class  Blob
 Blob is a general container that hosts a typed pointer. More...
 
class  BlobDeserializerBase
 BlobDeserializerBase is an abstract class that deserializes a blob from a BlobProto or a TensorProto. More...
 
class  BlobSerializerBase
 BlobSerializerBase is an abstract class that serializes a blob to a string. More...
 
class  BlobsQueue
 
struct  BlobStatGetter
 
struct  BlobStatRegistry
 
class  BlockingCounter
 
class  BooleanMaskOp
 
class  BooleanUnmaskOp
 
class  BoxWithNMSLimitOp
 
class  BRGNCHWCToPackedInt8BGRAStylizerDeprocessOp
 
class  BufferedTokenizer
 
class  Caffe2FlagParser
 
class  Caffe2ModuleTestDynamicDummyOp
 
class  CastOp
 
class  CeilOp
 
class  ChannelBackpropStatsOp
 
class  ChannelShuffleGradientOp
 
class  ChannelShuffleOp
 
class  ChannelStatsOp
 
struct  CharRange
 
class  CheckCounterDoneOp
 
class  CheckpointOp
 
class  ClipGradientOp
 
class  ClipOp
 
class  ClipTensorByScalingOp
 
class  CloseBlobsQueueOp
 
class  CloseRebatchingQueueOp
 
class  Col2ImOp
 
class  CollectAndDistributeFpnRpnProposalsOp
 
class  CommonSubexpressionEliminationTransform
 Common Subexpression Elimination. More...
 
class  ConcatOp
 
class  ConditionalOp
 
class  ConstantFillOp
 
class  ConstantWarmupLearningRate
 
struct  ConvArgs
 
class  Converter
 
class  ConvGradientOp
 
class  ConvOp
 
class  ConvPoolOpBase
 
class  ConvToNNPackTransform
 
class  ConvTransposeGradientOp
 
class  ConvTransposeOp
 
class  ConvTransposeUnpoolBase
 
class  CopyFromGLOp
 
class  CopyFromOpenGLOp
 
class  CopyOnDeviceLikeOp
 
class  CopyOnDeviceLikeOp< CUDAContext, CUDAContext, CUDAContext >
 
class  CopyOp
 
class  CopyToOpenGLOp
 
struct  CosCPUFunctor
 
struct  CosGradientCPUFunctor
 
class  CosineEmbeddingCriterionGradientOp
 
class  CosineEmbeddingCriterionOp
 
class  CosineSimilarityGradientOp
 
class  CosineSimilarityOp
 
class  CountDownOp
 
class  Counter
 
class  CountUpOp
 
struct  CPUAllocator
 
class  CPUContext
 The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement. More...
 
struct  CPUEventWrapper
 
class  CpuId
 Identification of an Intel CPU. More...
 
class  CPUSparseLengthsReductionOp
 
struct  CpuUtilizationReportOp
 
class  CreateBlobsQueueOp
 
class  CreateCounterOp
 
class  CreateDBOp
 
class  CreateMapOp
 
class  CreateRebatchingQueueOp
 
class  CreateScopeOp
 
class  CreateTextFileReaderOp
 
class  CrossEntropyGradientOp
 
class  CrossEntropyOp
 
class  CUDAContext
 
struct  CudaDevicePropWrapper
 
struct  CudaEventWrapper
 
class  CUDARecurrentNetworkExecutor
 
class  CudaRTCFunction
 
class  CudnnConvGradientOp
 
class  CudnnConvOp
 
class  CudnnConvOpBase
 
class  CudnnConvTransposeGradientOp
 
class  CudnnConvTransposeOp
 
class  CudnnConvTransposeOpBase
 
class  cudnnFilterDescWrapper
 
class  CuDNNLRNGradientOp
 
class  CuDNNLRNOp
 
class  CuDNNReluGradientOp
 
class  CuDNNReluOp
 
class  CuDNNSoftmaxGradientOp
 
class  CuDNNSoftmaxOp
 
class  CudnnSpatialBNGradientOp
 
class  CudnnSpatialBNOp
 
class  CuDNNState
 
class  cudnnTensorDescWrapper
 cudnnTensorDescWrapper is the placeholder that wraps around a cudnnTensorDescriptor_t, allowing us to do descriptor change as-needed during runtime. More...
 
class  CuDNNTransposeOp
 
class  cudnnTypeWrapper
 cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function. More...
 
class  cudnnTypeWrapper< double >
 
class  cudnnTypeWrapper< float >
 
class  cudnnTypeWrapper< float16 >
 
struct  CuDNNWorkspace
 CuDNNWorkspace is a wrapper around a raw cuda pointer that holds the cudnn scratch space. More...
 
class  CuDNNWrapper
 CuDNNWrapper is a class that wraps the cudnn handles and cudnn workspaces. More...
 
class  DAGNet
 
class  DAGNetBase
 
class  DBExistsOp
 
class  DecodedFrame
 
struct  DefaultCPUAllocator
 
class  DefaultEngine
 
class  DeformConvGradientOp
 
class  DeformConvOp
 
class  DeformConvOpBase
 
class  DequeueBlobsOp
 
class  DequeueRebatchingQueueOp
 
class  DetailedExportedStat
 
class  DeviceGuard
 
struct  DeviceTypeRegisterer
 
class  DiagonalFillOp
 
struct  DispatchHelper
 
struct  DispatchHelper< FixedValues< FirstVal, Values... >, ExtraArgs... >
 
struct  DispatchHelper< FixedValues<>, ExtraArgs... >
 
class  DivGradientOp
 
class  DoOp
 
class  DotProductGradientOp
 
class  DotProductOp
 
class  DotProductWithPaddingGradientOp
 
class  DotProductWithPaddingOp
 
class  DropoutGradientOp
 
class  DropoutOp
 
class  EigenConvOp
 
struct  EigenPowFunctor
 
class  ElementwiseLinearGradientOp
 
class  ElementwiseLinearOp
 
class  ElementwiseRTCOp
 A GPU operator that can generate limited elementwise operations. More...
 
class  EluGradientOp
 
class  EluOp
 
class  EnforceNotMet
 
class  EnqueueBlobsOp
 
class  EnqueueRebatchingQueueOp
 
class  EnsureDenseOp
 Pass inputs to outputs. More...
 
class  Event
 
struct  EventCreateFunctionRegisterer
 
struct  EventErrorMessageFunctionRegisterer
 
struct  EventFinishFunctionRegisterer
 
struct  EventQueryFunctionRegisterer
 
struct  EventRecordFunctionRegisterer
 
struct  EventResetFunctionRegisterer
 
struct  EventSetFinishedFunctionRegisterer
 
struct  EventWaitFunctionRegisterer
 
class  ExpandDimsOp
 
struct  ExpCPUFunctor
 
class  ExpLearningRate
 
class  ExportedStat
 
struct  ExportedStatValue
 
class  FeedBlobOp
 
class  FileReader
 
class  FileStoreHandler
 
class  FileStoreHandlerCreateOp
 
class  FillerOp
 
class  FindDuplicateElementsOp
 
class  FindOp
 
class  FixedDivisor
 
class  FixedDivisor< int32_t >
 
class  FixedLearningRate
 
struct  FixedType
 
struct  FixedValues
 
class  FlattenOp
 
class  FlattenToVecOp
 
class  FlexibleTopKGradientOp
 
class  FlexibleTopKOp
 
class  Float16ConstantFillOp
 
class  FloatToFused8BitRowwiseQuantizedOp
 
class  FloatToHalfOp
 
class  FloatToRowwiseQuantized8BitsOp
 
class  FloorOp
 
struct  ForEach
 ForEach is a unary functor that forwards each element of the input array into the elementwise Functor provided, and gathers the results of each call into the resulting array. More...
 
class  FP16MomentumSGDUpdateOp
 
class  FP32MomentumSGDUpdateOp
 
class  FreeOp
 
class  FtrlOp
 
struct  FtrlParams
 
class  FullyConnectedDecompGradientOp
 
class  FullyConnectedGradientOp
 
class  FullyConnectedOp
 
class  FullyConnectedOp_SPARSE
 
class  FullyConnectedOpDecomp
 
class  FullyConnectedOpPrune
 
class  FullyConnectedPruneGradientOp
 
class  FunHashGradientOp
 
class  FunHashOp
 
class  Fused8BitRowwiseQuantizedToFloatOp
 
class  GatherByKeyOp
 
class  GatherFused8BitRowwiseOp
 
class  GatherOp
 
class  GatherPaddingOp
 
class  GatherRangesOp
 
class  GatherRangesToDenseOp
 
class  GaussianFillOp
 
class  GenerateProposalsOp
 
struct  GenericTensorImplementation
 
class  GetAbsGradient
 
struct  GetAddPaddingGradient
 
class  GetAffineChannelGradient
 
class  GetAveragedLossGradient
 
class  GetBatchGatherGradient
 
class  GetBatchPermutationGradient
 
class  GetBatchToSpaceGradient
 
class  GetBernoulliJSDGradient
 
class  GetCastGradient
 
class  GetChannelShuffleGradient
 
class  GetCol2ImGradient
 
class  GetColwiseMaxGradient
 
class  GetConcatGradient
 
class  GetConvGradient
 
class  GetConvTransposeGradient
 
class  GetCosGradient
 
class  GetCosineSimilarityGradient
 
class  GetCrossEntropyGradient
 
class  GetDotProductGradient
 
class  GetDotProductWithPaddingGradient
 
class  GetDropoutGradient
 
struct  GetElementwiseLinearGradient
 
class  GetEluGradient
 
class  GetExpGradient
 
class  GetFCDecompGradient
 
class  GetFlattenGradient
 
class  GetFloatToHalfGradient
 
class  GetGroupSpatialSoftmaxGradient
 
class  GetGRUUnitGradient
 
class  GetHalfToFloatGradient
 
class  GetIm2ColGradient
 
class  GetInstanceNormGradient
 
class  GetL1DistanceGradient
 
class  GetLabelCrossEntropyGradient
 
class  GetLeakyReluGradient
 
class  GetLogGradient
 
class  GetLRNGradient
 
class  GetLSTMUnitGradient
 
struct  GetMakeTwoClassGradient
 
class  GetMatMulGradient
 
class  GetMaxGradient
 
class  GetMeanGradient
 
class  GetMinGradient
 
struct  GetNanCheckGradient
 
class  GetNCHW2NHWCGradient
 
struct  GetNegateGradientGradient
 
class  GetNegativeGradient
 
class  GetNHWC2NCHWGradient
 
class  GetNormalizeGradient
 
class  GetPackSegmentsGradient
 
class  GetPadImageGradient
 
class  GetPoolGradient
 
class  GetPReluGradient
 
class  GetPrependDimGradient
 
struct  GetRecurrentGradient
 
struct  GetRecurrentNetworkGradient
 
class  GetReduceBackMaxGradient
 
class  GetReduceBackMeanGradient
 
class  GetReduceBackSumGradient
 
class  GetReduceFrontMaxGradient
 
class  GetReduceFrontMeanGradient
 
class  GetReduceFrontSumGradient
 
class  GetReluGradient
 
struct  GetRemovePaddingGradient
 
class  GetReshapeGradient
 
class  GetResizeNearestGradient
 
class  GetReversePackedSegsGradient
 
class  GetRoIPoolGradient
 
class  GetRowwiseMaxGradient
 
class  GetSampleAsGradient
 
class  GetScaleGradient
 
class  GetSelectSmoothL1LossGradient
 
class  GetSeluGradient
 
class  GetSigmoidCrossEntropyLossGradient
 
struct  GetSigmoidCrossEntropyWithLogitsGradient
 
class  GetSigmoidFocalLossGradient
 
class  GetSigmoidGradient
 
class  GetSinGradient
 
class  GetSmoothL1LossGradient
 
class  GetSoftmaxFocalLossGradient
 
class  GetSoftmaxGradient
 
class  GetSoftplusGradient
 
class  GetSoftsignGradient
 
class  GetSpaceToBatchGradient
 
class  GetSpatialBNGradient
 
class  GetSplitGradient
 
class  GetSqrGradient
 
class  GetSqrtGradient
 
class  GetSquaredL2DistanceGradient
 
class  GetSquareRootDivideGradient
 
class  GetSumElementsGradient
 
class  GetSwishGradient
 
class  GetTanhGradient
 
class  GetTileGradient
 
class  GetTopKGradient
 
class  GetTransposeGradient
 
class  GetUnpackSegmentsGradient
 
class  GetUpsampleNearestGradient
 
struct  GetWeightedSigmoidCrossEntropyWithLogitsGradient
 
struct  GetZeroGradientOpGradient
 
class  GivenTensorFillOp
 
class  GLAveragePoolOp
 
class  GLConcatOp
 
class  GLContext
 
class  GLConvOp
 
class  GLFullyConnectedOp
 
class  GLMaxPoolOp
 
class  GLNet
 
class  GLNormalizePlanarYUVOp
 
class  GLNormPlanarYUVOp
 
class  GLPoolOp
 
class  GLPredictor
 
class  GLReluOp
 
class  GLReshapeOp
 
class  GLResizeNearestOp
 
class  GLSigmoidOp
 
class  GLSoftmaxOp
 
class  GLSpatialBNOp
 
class  GLSumOp
 
class  GLTensor
 
class  GluOp
 
class  GPUFallbackOp
 A templated class to allow one to wrap a CPU operator as a CUDA operator. More...
 
class  GradientMakerBase
 
struct  GradientNotImplementedYet
 A helper class to indicate that the gradient mechanism is not ready. More...
 
struct  GradientOpsMeta
 A struct that holds the gradient operators and related gradient maps. More...
 
struct  GradientWrapper
 
class  GroupSpatialSoftmaxGradientOp
 
class  GroupSpatialSoftmaxOp
 
class  GRUUnitGradientOp
 
class  GRUUnitOp
 
class  HalfToFloatOp
 
class  HasElementsOp
 
class  HasScopeOp
 
class  HillLearningRate
 
class  HSoftmaxGradientOp
 
class  HSoftmaxOp
 
class  HSoftmaxOpBase
 
class  HSoftmaxSearchOp
 
class  HuffmanTreeHierarchyOp
 
class  IfOp
 
class  Im2ColOp
 
class  ImageAllocator
 
class  ImageInputOp
 
struct  Index
 
struct  IndexBase
 
class  IndexCreateOp
 
class  IndexDeserializer
 
class  IndexFreezeOp
 
class  IndexGetOp
 
class  IndexHashOp
 
class  IndexLoadOp
 
class  IndexSerializer
 
class  IndexSizeOp
 
class  IndexStoreOp
 
class  InitRegisterer
 
class  InstanceNormGradientOp
 
class  InstanceNormOp
 
class  InvLearningRate
 
class  IsEmptyOp
 
class  IsMemberOfOp
 
class  IsMemberOfValueHolder
 
class  IterOp
 
class  KeySplitOp
 
class  KeyValueToMapOp
 
class  L1DistanceGradientOp
 
class  L1DistanceOp
 
class  LabelCrossEntropyGradientOp
 
class  LabelCrossEntropyOp
 
class  LambdaRankNdcgGradientOp
 
class  LambdaRankNdcgOp
 
class  LarsOp
 
class  LayerNormGradientOp
 
class  LayerNormOp
 
class  LeakyReluGradientOp
 
class  LeakyReluOp
 
class  LearningRateFunctor
 
class  LearningRateOp
 
class  LengthsGatherOp
 
struct  LengthsOpGetGradient
 
class  LengthsPartitionOp
 
class  LengthsRangeFillOp
 
class  LengthsTileOp
 
class  LengthsTopKGradientOp
 
class  LengthsTopKOp
 
class  LengthsToRangesOp
 
class  LengthsToSegmentIdsOp
 
class  LengthsToShapeOp
 
class  LengthsToWeightsOp
 
class  LinearWarmupLearningRate
 
class  LoadOp
 
class  LocallyConnectedGradientOp
 
class  LocallyConnectedOp
 
struct  LogCPUFunctor
 
class  LoggerVoidify
 
struct  LogitCPUFunctor
 
class  LogitGradientOp
 
class  LogMeanExpRangeReducer
 
class  LogMeanExpRangeReducer< T, CPUContext >
 
struct  LogMeanExpRangeReducerDef
 
class  LogMeanExpRangeReducerGradient
 
class  LogSumExpRangeReducer
 
class  LogSumExpRangeReducer< T, CPUContext >
 
struct  LogSumExpRangeReducerDef
 
class  LogSumExpRangeReducerGradient
 
class  LpNormGradientOp
 
class  LpNormOp
 
class  LpPool
 
class  LRNGradientOp
 
class  LRNOp
 
class  LRNOpBase
 
class  LSTMUnitGradientOp
 
class  LSTMUnitOp
 
struct  MakeAligned
 
class  MakeTwoClassGradientOp
 
class  MakeTwoClassOp
 
class  MapDeserializer
 
class  MapSerializer
 
class  MapToKeyValueOp
 
struct  MapTypeTraits
 
class  MarginRankingCriterionGradientOp
 
class  MarginRankingCriterionOp
 
class  MatMulOp
 
class  MaxGradientOp
 
class  MaxMinOpBase
 
class  MaxOp
 
class  MaxPool
 
class  MaxPoolGradientRTCOp
 
class  MaxPoolRTCOp
 
class  MaxPoolWithIndexGradientOp
 
class  MaxPoolWithIndexOp
 
class  MaxRangeReducer
 
class  MaxRangeReducer< T, CPUContext >
 
struct  MaxRangeReducerDef
 
class  MaxRangeReducerGradient
 
class  MaxReduceDimsGradientOp
 
class  MaxReduceDimsOp
 
class  MaxReducer
 
class  MaxReducer< T, CPUContext >
 
struct  MaxReducerDef
 
class  MaxReducerGradient
 
class  MaxReductionGradientOp
 
class  MaxReductionOp
 
class  MeanGradientOp
 
class  MeanOp
 
class  MeanRangeReducer
 
class  MeanRangeReducer< T, CPUContext >
 
struct  MeanRangeReducerDef
 
class  MeanRangeReducerGradient
 
class  MeanReducer
 
class  MeanReducer< T, CPUContext >
 
struct  MeanReducerDef
 
class  MeanReducerGradient
 
class  MemoryAllocationReporter
 
class  MergeDimOp
 
class  MergeIdListsOp
 
class  MessageLogger
 
class  MinGradientOp
 
class  MinOp
 
class  MKLContext
 The MKL Context, which is largely the same as the CPUContext. More...
 
class  ModOp
 
class  ModuleSchema
 A module schema that can be used to store specific information about different modules. More...
 
class  MomentumSGDOp
 
class  MomentumSGDUpdateOp
 
class  MPICommonWorldWrapper
 A simple wrapper over an MPI common world. More...
 
class  MPIDataTypeWrapper
 
struct  MPSCNNContext
 
class  MSRAFillOp
 
class  MultiClassAccuracyOp
 
class  MutexDeserializer
 
class  MutexSerializer
 
class  NanCheckOp
 
class  NCHW2NHWCOp
 
class  NegateGradientOp
 
struct  NegativeCPUFunctor
 
class  NetBase
 
class  NetObserverReporter
 
class  NetObserverReporterPrint
 
class  NGramFromCategoricalOp
 
class  NHWC2NCHWOp
 
class  NNApi
 
class  NNPACKConvOp
 
class  NoDefaultEngineOp
 A helper class to denote that an op does not have a default engine. More...
 
class  NoGradient
 A helper class to indicate that the operator does not need gradient computation. More...
 
class  NormalizeGradientOp
 
class  NormalizeL1Op
 
class  NormalizeOp
 
struct  NotFunctor
 
class  Observable
 Inherit to make your class observable. More...
 
class  ObserverBase
 Use this to implement a Observer using the Observer Pattern template. More...
 
class  ObserverConfig
 
class  OneHotOp
 
class  ONNXWhileOp
 
class  OpenGLAddOp
 
class  OpenGLConcatOp
 
class  OpenGLConvOp
 
class  OpenGLConvTransposeOp
 
class  OpenGLInstanceNormPReluOp
 
class  OpenGLMulOp
 
class  OpenGLPadImageOp
 
class  OpenGLPReluOp
 
class  OpenGLResizeNearestOp
 
class  OpenGLSigmoidOp
 
class  OpenGLSubOp
 
class  OpenGLTensorToTextureStylizerPreprocessOp
 
class  OpenGLTextureToTensorStylizerDeprocessOp
 
class  OpenGLTextureToTextureStylizerDeprocessOp
 
class  OpenGLTextureToTextureStylizerPreprocessOp
 
class  Operator
 
class  OperatorAttachingNetObserver
 
class  OperatorBase
 
class  OpSchema
 A class to record the schema of an op. More...
 
class  OpSchemaRegistry
 A registry to hold all the operator schemas. More...
 
struct  OpTask
 Data structure for a scheduled task in the task queue. More...
 
class  PackedInt8BGRANHWCToNCHWCStylizerPreprocessOp
 
class  PackRNNSequenceOpBase
 
class  PackSegmentsOp
 
class  PadEmptySamplesOp
 
class  PadImageGradientOp
 
class  PadImageOp
 
class  PairWiseLossGradientOp
 
class  PairWiseLossOp
 
class  Params
 
class  PartitionOp
 
class  PartitionOpBase
 
class  PatternNetTransform
 PatternNetTransform allows you to create transforms using a simple interface. More...
 
class  PercentileOp
 
class  PerfNetObserver
 
class  PerfOperatorObserver
 
class  PerplexityOp
 
class  PiecewiseLinearTransformOp
 
struct  PinnedCPUAllocator
 An allocator that does the CPU memory allocation with pinned memory. More...
 
struct  PlanExecutionTime
 
class  PolyLearningRate
 
class  PoolGradientOp
 
class  PoolOp
 
class  PowOp
 
class  Predictor
 
class  PrefetchOperator
 
class  PReluGradientOp
 
class  PReluOp
 
class  PrependDimOp
 
class  PrintOp
 
class  ProfileCounter
 
class  ProfileObserver
 
class  ProfileOperatorObserver
 
class  PSRoIPoolGradientOp
 
class  PSRoIPoolOp
 
class  QConvOp
 
struct  QConvState
 
class  QTensor
 
class  QTensorDeserializer
 
class  QTensorSerializer
 
class  QuantDecodeGradientOp
 
class  QuantDecodeOp
 
class  QuantDecompZstdOp
 
class  RangeFillOp
 
class  RangeOp
 
class  RebatchingQueue
 
class  RecurrentBaseOp
 
class  RecurrentGradientOp
 
class  RecurrentNetworkBlobFetcherOp
 
class  RecurrentNetworkExecutorBase
 RecurrentNetworkExecutor is a specialized runtime for recurrent neural networks (RNNs). More...
 
class  RecurrentNetworkGradientOp
 
class  RecurrentNetworkOp
 
class  RecurrentOp
 
class  RecurrentParamAccessOp
 
class  RedisStoreHandler
 
class  RedisStoreHandlerCreateOp
 
class  ReduceMeanOp
 
class  ReduceOpBase
 
class  ReduceSumOp
 
class  ReduceTailSumOp
 
class  Registerer
 
class  Registry
 A template class that allows one to register classes by keys. More...
 
class  ReluGradientOp
 
class  ReluOp
 
class  RemoveDataBlocksOp
 
class  RemovePaddingOp
 
class  ReplaceNaNOp
 
class  ResetCounterOp
 
class  ReshapeOp
 
class  ResizeLikeOp
 
class  ResizeNearestGradientOp
 
class  ResizeNearestOp
 
class  RetrieveCountOp
 
class  ReversePackedSegsOp
 
class  RMACRegionsOp
 
class  RmsPropOp
 
class  RNNApplyLinkOp
 
class  RNNCapableOperatorObserver
 Inherit to make your class observable. More...
 
struct  RNNNetOperator
 Struct for operator in a timestep and its dependenceis. More...
 
class  RoIAlignGradientOp
 
class  RoIAlignOp
 
class  RoIPoolFGradientOp
 
class  RoIPoolFOp
 
class  RoIPoolGradientOp
 
class  RoIPoolOp
 
class  RowMulOp
 
class  Rowwise8BitQuantizedToFloatOp
 
class  RowWiseArgMaxOp
 
class  RowWiseSparseAdagradOp
 
class  RowWiseSparseAdamOp
 
class  RunCountNetObserver
 
class  RunCountOperatorObserver
 
class  SafeDequeueBlobsOp
 
class  SafeEnqueueBlobsOp
 
struct  SameTypeAsInput
 
class  SampleAsGradientOp
 
class  SampleAsOp
 
struct  SampleInterval
 
class  SaveOp
 
class  ScaleOp
 
class  ScatterAssignOp
 Update slices of the tensor in-place by overriding. More...
 
class  ScatterWeightedSumOp
 Update slices of the tensor in-place with weighted sum. More...
 
class  SegmentIdsToLengthsOp
 
class  SegmentIdsToRangesOp
 
class  SegmentOneHotOp
 
struct  SegmentOpGetGradient
 
class  SelectGradientOpBase
 
class  SelectSmoothL1LossGradientOp
 
class  SelectSmoothL1LossOp
 
class  SeluGradientOp
 
class  SeluOp
 
class  SequenceMaskOp
 
class  ShapeOp
 
struct  SigmoidCPUFunctor
 
class  SigmoidCrossEntropyLossGradientOp
 
class  SigmoidCrossEntropyLossOp
 
class  SigmoidCrossEntropyWithLogitsGradientOp
 
class  SigmoidCrossEntropyWithLogitsOp
 
class  SigmoidFocalLossGradientOp
 
class  SigmoidFocalLossOp
 
struct  SigmoidGradientCPUFunctor
 
class  SignalHandler
 
struct  SignCPUFunctor
 
class  SimpleNet
 
class  SimpleQueue
 
struct  SinCPUFunctor
 
class  SingleOpTransform
 Single Op Transform Base class. More...
 
struct  SinGradientCPUFunctor
 
class  SinusoidPositionEncodingOp
 
class  SizeOp
 
class  SkipIndices
 
class  SkipIndices<>
 
class  SliceGradientOp
 
class  SliceOp
 
class  SmartTensorPrinter
 
class  SmoothL1LossGradientOp
 
class  SmoothL1LossOp
 
class  SNPEOp
 
class  SoftmaxFocalLossGradientOp
 
class  SoftmaxFocalLossOp
 
class  SoftmaxGradientOp
 
class  SoftmaxOp
 
class  SoftmaxWithLossGradientOp
 
class  SoftmaxWithLossOp
 
class  SoftplusGradientOp
 
class  SoftplusOp
 
struct  SoftsignCPUFunctor
 
struct  SoftsignGradientCPUFunctor
 
class  SpaceBatchOpBase
 
class  SpaceToBatchOp
 
class  SparseAdagradOp
 
class  SparseAdamOp
 
class  SparseFtrlOp
 
class  SparseFunHashGradientOp
 
class  SparseFunHashOp
 
class  SparseLengths8BitsRowwiseOp
 
class  SparseLengthsFused8BitRowwiseOp
 
class  SparseMatrixReshapeOp
 
class  SparseMomentumSGDUpdateOp
 
class  SparseNormalizeOp
 
class  SparseToDenseMaskBase
 
class  SparseToDenseMaskGradientOp
 
class  SparseToDenseMaskOp
 
class  SparseToDenseOp
 
class  SpatialBNGradientOp
 
class  SpatialBNOp
 
class  SpatialNarrowAsGradient
 
class  SpatialNarrowAsGradientOp
 
class  SpatialNarrowAsOp
 
class  SpatialSoftmaxWithLossGradientOp
 
class  SpatialSoftmaxWithLossOp
 
class  SplitOp
 
struct  SqrCPUFunctor
 
struct  SqrtCPUFunctor
 
class  SquaredL2DistanceGradientOp
 
class  SquaredL2DistanceOp
 
class  SquareRootDivideOp
 
class  SqueezeOp
 
struct  Stat
 
struct  StaticLinkingProtector
 
class  StatRegistry
 Holds a map of atomic counters keyed by name. More...
 
class  StatRegistryCreateOp
 
class  StatRegistryExportOp
 
class  StatRegistryUpdateOp
 
class  StatValue
 
class  StdDevExportedStat
 
class  StepLearningRate
 
class  StopGradientOp
 
struct  StopOnSignal
 
class  StoreAddOp
 
class  StoreGetOp
 
class  StoreHandler
 
struct  StoreHandlerTimeoutException
 
class  StoreSetOp
 
class  StoreWaitOp
 
class  StringDeserializer
 StringDeserializer is the deserializer for Strings. More...
 
class  StringJoinOp
 
struct  StringProvider
 
class  StringSerializer
 StringSerializer is the serializer for String. More...
 
class  StumpFuncOp
 
class  SumElementsGradientOp
 
class  SumElementsIntOp
 
class  SumElementsOp
 
class  SummarizeOp
 
class  SumOp
 
class  SumRangeReducer
 
class  SumRangeReducer< T, CPUContext >
 
struct  SumRangeReducerDef
 
class  SumRangeReducerGradient
 
class  SumReduceDimsGradientOp
 
class  SumReduceDimsOp
 
class  SumReduceLikeOp
 
class  SumReducer
 
class  SumReducer< T, CPUContext >
 
struct  SumReducerDef
 
class  SumReducerGradient
 
class  SumSqrElementsOp
 
struct  SwishCPUFunctor
 
class  SwishGradientOp
 
struct  TanhCPUFunctor
 
struct  TanhGradientCPUFunctor
 
struct  Task
 
class  TaskThreadPool
 
class  Tensor
 Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information. More...
 
class  TensorDeserializer
 TensorDeserializer is the deserializer for Tensors. More...
 
class  TensorPrinter
 
class  TensorProtosDBInput
 
class  TensorSerializer
 TensorSerializer is the serializer for Tensors. More...
 
struct  TensorTypes
 
struct  TensorTypes2
 
struct  TextFileReaderInstance
 
class  TextFileReaderReadOp
 
class  ThreadedRecurrentNetworkExecutor
 
class  ThreadLocalCUDAObjects
 A struct to host thread-local cuda objects. More...
 
class  ThreadPool
 
class  ThresholdedReluGradientOp
 
class  ThresholdedReluOp
 
struct  ThrowInTheTowelIfGradientIsCalled
 A helper class to indicate that the operator should have no gradient. More...
 
class  TileGradientOp
 
class  TileOp
 
class  TimeCounter
 
class  TimeObserver
 
class  TimeOperatorObserver
 
class  Timer
 A simple timer object for measuring time. More...
 
struct  TimerBeginOp
 
struct  TimerEndOp
 
struct  TimerGetAndEndOp
 
struct  TimerGetOp
 
class  TimerInstance
 
struct  Token
 
class  TokenizedString
 
class  Tokenizer
 
class  TopKGradientOp
 
class  TopKOp
 
class  Transform
 The Transform Base Object. More...
 
class  TransposeOp
 
class  TTContractionGradientOp
 
class  TTContractionOp
 
class  TTLinearGradientOp
 
class  TTLinearOp
 
class  TTPadGradientOp
 
class  TTPadOp
 
class  TypeMeta
 TypeMeta is a thin class that allows us to store the type of a container such as a blob, or the data type of a tensor, with a unique run-time id. More...
 
struct  TypeNameRegisterer
 
struct  TypeNameTraits
 
struct  TypeNameTraits< int32_t >
 
struct  TypeNameTraits< int64_t >
 
class  UnaryElementwiseWithArgsOp
 
class  UniformFillOp
 
class  UniqueOp
 Deduplicates input indices vector and optionally produces reverse remapping. More...
 
class  UniqueUniformFillOp
 
class  UnpackSegmentsOp
 
class  UnsafeCoalesceOp
 
class  UnsupportedOperatorFeature
 
class  UpsampleNearestGradientOp
 
class  UpsampleNearestOp
 
class  VariableLengthSequencePaddingOp
 
class  VideoDecoder
 
class  VideoInputOp
 
class  VideoIOContext
 
struct  VideoMeta
 
class  WallClockTimeOp
 
class  WeightedMultiSamplingOp
 
class  WeightedSampleDequeueBlobsOp
 
class  WeightedSampleOp
 
class  WeightedSigmoidCrossEntropyWithLogitsGradientOp
 
class  WeightedSigmoidCrossEntropyWithLogitsOp
 
class  WeightedSumGradientOp
 
class  WeightedSumOp
 
class  WeightedSumReducer
 
class  WeightedSumReducer< T, CPUContext >
 
struct  WeightedSumReducerDef
 
class  WeightedSumReducerGradient
 
class  WhereOp
 
class  WhileOp
 
struct  WithDefaultConstructor
 WithDefaultConstructor is a functor that can be used as the functor of an UnaryElementwiseWithArgsOp. More...
 
struct  WithoutBroadcast
 
class  Worker
 
class  WorkersPool
 
class  Workspace
 Workspace is a class that holds all the related objects created during runtime: (1) all blobs, and (2) all instantiated networks. More...
 
class  XavierFillOp
 
class  YellowFinOp
 
class  ZeroGradientOp
 
class  ZmqContext
 
class  ZmqMessage
 
class  ZmqSocket
 

Typedefs

using MemoryDeleter = void(*)(void *)
 
typedef int64_t TIndex
 
template<typename Key , typename Value >
using CaffeMap = std::map< Key, Value >
 
typedef Tensor< CUDAContextTensorCUDA
 
typedef void(* EventCreateFunction) (const DeviceOption &option, Event *)
 
typedef void(* EventRecordFunction) (Event *, const void *, const char *)
 
typedef void(* EventWaitFunction) (const Event *, void *)
 
typedef void(* EventFinishFunction) (const Event *)
 
typedef EventStatus(* EventQueryFunction) (const Event *)
 
typedef const std::string &(* EventErrorMessageFunction) (const Event *)
 
typedef void(* EventSetFinishedFunction) (const Event *, const char *)
 
typedef void(* EventResetFunction) (Event *)
 
typedef ObserverBase< NetBaseNetObserver
 
typedef std::function< std::unique_ptr< NetObserver >NetBase *)> NetObserverCreator
 
typedef ObserverBase< OperatorBaseOperatorObserver
 
typedef Registry< std::string, std::unique_ptr< OperatorBase >, const OperatorDef &, Workspace * > *(* RegistryFunction) ()
 
using EnginePrefType = std::vector< std::string >
 
using PerOpEnginePrefType = CaffeMap< int, CaffeMap< std::string, EnginePrefType >>
 
using GlobalEnginePrefType = CaffeMap< int, EnginePrefType >
 
typedef std::function< bool(int)> ShouldContinue
 
using ExportedStatList = std::vector< ExportedStatValue >
 Holds names and values of counters exported from a StatRegistry.
 
using ExportedStatMap = std::unordered_map< std::string, int64_t >
 
typedef Tensor< CPUContextTensorCPU
 
typedef TypeMeta(* TypeCall) (const void *)
 
typedef vector< TIndex >(* TensorInfoCall) (const void *, bool *shares_data, size_t *capacity, DeviceOption *device)
 
typedef intptr_t CaffeTypeId
 
typedef half_float::half half
 
typedef half DataType
 
template<typename T >
using deleted_unique_ptr = std::unique_ptr< T, std::function< void(T *)>>
 
using ParallelFor = std::function< void(size_t, std::function< void(size_t)>)>
 
using NumericTypes = TensorTypes< int32_t, int64_t, float, double >
 
using IntTypes = TensorTypes< int32_t, int64_t >
 
using BoolTypes = TensorTypes< bool >
 
using IntBoolTypes = TensorTypes< int32_t, int64_t, bool >
 
template<typename InputTypes , class Context , class Functor , class OutputType = SameTypeAsInput>
using UnaryElementwiseOp = UnaryElementwiseWithArgsOp< InputTypes, Context, WithDefaultConstructor< Functor >, OutputType >
 UnaryElementwiseOp is a wrapper around UnaryElementwiseWithArgsOp, with the difference that it takes a functor with default constructor, e.g. More...
 
using n = 2
 
using MapType64To64 = MapTypeTraits< int64_t, int64_t >::MapType
 
using MapType64To32 = MapTypeTraits< int64_t, int32_t >::MapType
 
using MapType32To32 = MapTypeTraits< int32_t, int32_t >::MapType
 
using MapType32To64 = MapTypeTraits< int32_t, int64_t >::MapType
 
template<typename ScalarFunctor , typename TypeMap = FixedType<std::string>>
using StringElementwiseOp = UnaryElementwiseWithArgsOp< TensorTypes< std::string >, CPUContext, ForEach< ScalarFunctor >, TypeMap >
 
using RebatchingQueuePtr = std::unique_ptr< RebatchingQueue >
 
template<typename T >
using EArrXt = Eigen::Array< T, Eigen::Dynamic, 1 >
 
using EArrXf = Eigen::ArrayXf
 
using EArrXd = Eigen::ArrayXd
 
using EArrXi = Eigen::ArrayXi
 
using EArrXb = EArrXt< bool >
 
template<typename T >
using EArrXXt = Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic >
 
using EArrXXf = Eigen::ArrayXXf
 
template<typename T >
using ERArrXXt = Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor >
 
using ERArrXXf = ERArrXXt< float >
 
template<typename T >
using EVecXt = Eigen::Matrix< T, Eigen::Dynamic, 1 >
 
using EVecXd = Eigen::VectorXd
 
using EVecXf = Eigen::VectorXf
 
using ERVecXd = Eigen::RowVectorXd
 
using ERVecXf = Eigen::RowVectorXf
 
template<typename T >
using EMatXt = Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >
 
using EMatXd = Eigen::MatrixXd
 
using EMatXf = Eigen::MatrixXf
 
template<typename T >
using ERMatXt = Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor >
 
using ERMatXd = ERMatXt< double >
 
using ERMatXf = ERMatXt< float >
 
template<typename T >
using EigenMatrixMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using EigenArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using EigenVectorMap = Eigen::Map< Eigen::Matrix< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using EigenVectorArrayMap = Eigen::Map< Eigen::Array< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using ConstEigenMatrixMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using ConstEigenArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, Eigen::Dynamic > >
 
template<typename T >
using ConstEigenVectorMap = Eigen::Map< const Eigen::Matrix< T, Eigen::Dynamic, 1 > >
 
template<typename T >
using ConstEigenVectorArrayMap = Eigen::Map< const Eigen::Array< T, Eigen::Dynamic, 1 > >
 

Enumerations

enum  CudaMemoryPoolType { NONE = 0, CUB = 1 }
 
enum  EventStatus { EVENT_INITIALIZED = 0, EVENT_SCHEDULED = 1, EVENT_SUCCESS = 2, EVENT_FAILED = 3 }
 
enum  StorageOrder { UNKNOWN = 0, NHWC = 1, NCHW = 2 }
 
enum  { ALGO_FWD = 0, ALGO_WGRAD = 1, ALGO_DGRAD = 2 }
 
enum  PadMode { CONSTANT = 0, REFLECT = 1, EDGE = 2 }
 
enum  QuantDecodeRunTy { RUN_ALWAYS, RUN_ONCE }
 
enum  RecurrentParamOpMode { SET_PARAM, GET_PARAM }
 
enum  FLowAlgType { FarnebackOpticalFlow = 0, DensePyrLKOpticalFlow = 1, BroxOpticalFlow = 2, OpticalFlowDual_TVL1 = 3 }
 
enum  FlowDataType { Flow2C = 0, Flow3C = 1, FlowWithGray = 2, FlowWithRGB = 3 }
 
enum  SpecialFps { SAMPLE_NO_FRAME = 0, SAMPLE_ALL_FRAMES = -1, SAMPLE_TIMESTAMP_ONLY = -2 }
 
enum  VideoResType { USE_WIDTH_HEIGHT = 0, USE_MINIMAL_WIDTH_HEIGHT = 1, ORIGINAL_RES = 2 }
 
enum  DecodeType { DO_TMP_JITTER = 0, DO_UNIFORM_SMP = 1, USE_START_FRM = 2 }
 

Functions

void ConvertToRawDataset (const string &input_db_name, const string &output_db_name)
 
void ReadImage (std::ifstream *file, int *label, char *buffer)
 
void WriteToDB (const string &filename, const int num_items, const int &offset, db::DB *db)
 
void ConvertCIFAR ()
 
void ConvertImageDataset (const string &input_folder, const string &list_filename, const string &output_db_name, const bool)
 
uint32_t swap_endian (uint32_t val)
 
void convert_dataset (const char *image_filename, const char *label_filename, const char *db_path, const int data_limit)
 
void run ()
 
void NoDelete (void *)
 
CPUAllocatorGetCPUAllocator ()
 
void SetCPUAllocator (CPUAllocator *alloc)
 
void swap (Blob &lhs, Blob &rhs)
 
 CAFFE_DEFINE_TYPED_REGISTRY (BlobSerializerRegistry, CaffeTypeId, BlobSerializerBase, std::unique_ptr)
 
 CAFFE_DEFINE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase)
 
 CAFFE_DECLARE_TYPED_REGISTRY (BlobSerializerRegistry, CaffeTypeId, BlobSerializerBase, std::unique_ptr)
 
unique_ptr< BlobSerializerBaseCreateSerializer (CaffeTypeId id)
 
 CAFFE_DECLARE_REGISTRY (BlobDeserializerRegistry, BlobDeserializerBase)
 
unique_ptr< BlobDeserializerBaseCreateDeserializer (const string &type)
 
bool HasCudaRuntime ()
 
const std::map< string, string > & GetBuildOptions ()
 
template<typename T , typename... Args>
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique (Args &&...args)
 
template<typename T >
std::enable_if< std::is_array< T >::value, std::unique_ptr< T > >::type make_unique (const size_t n)
 
template<typename T , typename... Args>
std::enable_if< std::extent< T >::value!=0, std::unique_ptr< T > >::type make_unique (Args &&...)=delete
 
template<typename Dst , typename Src >
Dst dynamic_cast_if_rtti (Src ptr)
 
size_t cudnnCompiledVersion ()
 
size_t cudnnRuntimeVersion ()
 
void CheckCuDNNVersions ()
 
cudnnTensorFormat_t GetCudnnTensorFormat (const StorageOrder &order)
 A wrapper function to convert the Caffe storage order to cudnn storage order enum values.
 
int NumCudaDevices ()
 Returns the number of devices.
 
void SetDefaultGPUID (const int deviceid)
 
int GetDefaultGPUID ()
 
int CaffeCudaGetDevice ()
 Gets the current GPU id. More...
 
void CaffeCudaSetDevice (const int id)
 Gets the current GPU id. More...
 
int GetGPUIDForPointer (const void *ptr)
 Gets the GPU id that the current pointer is located at.
 
const cudaDeviceProp & GetDeviceProperty (const int device)
 Gets the device property for the given device. More...
 
void DeviceQuery (const int deviceid)
 Runs a device query function and prints out the results to LOG(INFO).
 
bool GetCudaPeerAccessPattern (vector< vector< bool > > *pattern)
 Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean values specifying whether peer access is possible. More...
 
bool TensorCoreAvailable ()
 Return the availability of TensorCores for math.
 
const char * cublasGetErrorString (cublasStatus_t error)
 Return a human readable cublas error string.
 
const char * curandGetErrorString (curandStatus_t error)
 Return a human readable curand error string.
 
int CudaVersion ()
 A runtime function to report the cuda version that Caffe2 is built with.
 
bool HasCudaGPU ()
 Check if the current running session has a cuda gpu present. More...
 
int CAFFE_GET_BLOCKS (const int N)
 Compute the number of blocks needed to run N threads.
 
uint32_t RandomNumberSeed ()
 A function to generate a random number seed that is unique in a best-effort basis, using an ever-incrementing seed and the current time.
 
CudaMemoryPoolType GetCudaMemoryPoolType ()
 Gets the current memory pool type used by Caffe2. More...
 
 CAFFE_KNOWN_TYPE (db::DBReader)
 
 CAFFE_KNOWN_TYPE (db::Cursor)
 
void EventCreateCPU (const DeviceOption &option, Event *event)
 
void EventRecordCPU (Event *event, const void *, const char *err_msg)
 
void EventFinishCPU (const Event *event)
 
void EventWaitCPUCPU (const Event *event, void *)
 
EventStatus EventQueryCPU (const Event *event)
 
const std::string & EventErrorMessageCPU (const Event *event)
 
void EventSetFinishedCPU (const Event *event, const char *err_msg)
 
void EventResetCPU (Event *event)
 
 REGISTER_EVENT_CREATE_FUNCTION (CPU, EventCreateCPU)
 
 REGISTER_EVENT_RECORD_FUNCTION (CPU, EventRecordCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, CPU, EventWaitCPUCPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (CPU, EventFinishCPU)
 
 REGISTER_EVENT_QUERY_FUNCTION (CPU, EventQueryCPU)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CPU, EventErrorMessageCPU)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (CPU, EventSetFinishedCPU)
 
 REGISTER_EVENT_RESET_FUNCTION (CPU, EventResetCPU)
 
bool EventCanScheduleCPU (const Event *, const Event *)
 
void EventCreateCUDA (const DeviceOption &option, Event *event)
 
void EventRecordCUDA (Event *event, const void *context, const char *err_msg)
 
void EventFinishCUDA (const Event *event)
 
void EventWaitCUDACUDA (const Event *event, void *context)
 
void EventWaitCPUCUDA (const Event *event, void *context)
 
void EventWaitCUDACPU (const Event *event, void *context)
 
EventStatus EventQueryCUDA (const Event *event)
 
const std::string & EventErrorMessageCUDA (const Event *event)
 
void EventSetFinishedCUDA (const Event *event, const char *err_msg)
 
void EventResetCUDA (Event *event)
 
 REGISTER_EVENT_CREATE_FUNCTION (CUDA, EventCreateCUDA)
 
 REGISTER_EVENT_RECORD_FUNCTION (CUDA, EventRecordCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, CUDA, EventWaitCUDACUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, CUDA, EventWaitCPUCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, CPU, EventWaitCUDACPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (CUDA, EventFinishCUDA)
 
 REGISTER_EVENT_QUERY_FUNCTION (CUDA, EventQueryCUDA)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (CUDA, EventErrorMessageCUDA)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (CUDA, EventSetFinishedCUDA)
 
 REGISTER_EVENT_RESET_FUNCTION (CUDA, EventResetCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (MKLDNN, CUDA, EventWaitCPUCUDA)
 
 REGISTER_EVENT_WAIT_FUNCTION (CUDA, MKLDNN, EventWaitCUDACPU)
 
 CAFFE_DEFINE_REGISTRY (Caffe2FlagsRegistry, Caffe2FlagParser, const string &)
 
void SetUsageMessage (const string &str)
 Sets the usage message when a commandline tool is called with "--help".
 
const char * UsageMessage ()
 Returns the usage message for the commandline tool set by SetUsageMessage.
 
bool ParseCaffeCommandLineFlags (int *pargc, char ***pargv)
 Parses the commandline flags. More...
 
bool CommandLineFlagsHasBeenParsed ()
 Checks if the commandline flags has already been passed.
 
 CAFFE_DECLARE_REGISTRY (Caffe2FlagsRegistry, Caffe2FlagParser, const string &)
 
OperatorDef * AddOp (NetDef *netdef_ptr, string op_type, std::vector< string > inputs, std::vector< string > outputs)
 
bool MatchStrings (string p, string s)
 This allows for the use of * and | to match operator types, engines, or any other property that is represented by strings. More...
 
bool MatchArguments (const OperatorDef &p_op, const OperatorDef &g_op)
 This ensures that each named arg that exists in the pattern exists in g_op, is equal in value.
 
bool GlobalInit (int *pargc, char ***argv)
 Initialize the global environment of caffe2. More...
 
bool Caffe2CheckIntrinsicsFeatures (int *, char ***)
 
 REGISTER_CAFFE2_INIT_FUNCTION (Caffe2CheckIntrinsicsFeatures,&Caffe2CheckIntrinsicsFeatures,"Check intrinsics compatibility between the CPU feature and the binary.")
 
std::string StripBasename (const std::string &full_path)
 
size_t ReplaceAll (string &s, const char *from, const char *to)
 
void SetStackTraceFetcher (std::function< string(void)> fetcher)
 
void SetOperatorLogger (std::function< void(const OperatorDef &)> tracer)
 
std::function< void(const OperatorDef &)> GetOperatorLogger ()
 
bool InitCaffeLogging (int *argc, char **argv)
 
void ShowLogInfoToStderr ()
 A utility to allow one to show log info to stderr after the program starts. More...
 
constexpr bool IsUsingGoogleLogging ()
 
void MakeStringInternal (std::stringstream &)
 
template<typename T >
void MakeStringInternal (std::stringstream &ss, const T &t)
 
template<typename T , typename... Args>
void MakeStringInternal (std::stringstream &ss, const T &t, const Args &...args)
 
template<typename... Args>
string MakeString (const Args &...args)
 
template<>
string MakeString (const string &str)
 
string MakeString (const char *c_str)
 
template<class Container >
string Join (const string &delimiter, const Container &v)
 
template<class T >
void LogMessageFatal (const char *file, int line, const T &message)
 
template<typename T >
T & CheckNotNullCommon (const char *file, int line, const char *names, T &t)
 
template<typename T >
T * CheckNotNull (const char *file, int line, const char *names, T *t)
 
template<typename T >
T & CheckNotNull (const char *file, int line, const char *names, T &t)
 
template<class First , class Second >
std::ostream & operator<< (std::ostream &out, const std::pair< First, Second > &p)
 
template<class Iter >
void PrintSequence (std::ostream &ss, Iter begin, Iter end)
 
const CaffeMap< string, const ModuleSchema * > & CurrentModules ()
 Current Modules present in the Caffe2 runtime. More...
 
bool HasModule (const string &name)
 Checks whether a module is already present in the current binary.
 
void LoadModule (const string &name, const string &filename="")
 Load a module. More...
 
 CAFFE_DEFINE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *)
 
void AddGlobalNetObserverCreator (NetObserverCreator creator)
 
unique_ptr< NetBaseCreateNet (const NetDef &net_def, Workspace *ws)
 Creates a network, accessing / creating blobs in the given workspace. More...
 
unique_ptr< NetBaseCreateNet (const std::shared_ptr< const NetDef > &net_def, Workspace *ws)
 
 CAFFE_DECLARE_REGISTRY (NetRegistry, NetBase, const std::shared_ptr< const NetDef > &, Workspace *)
 
 CAFFE_DEFINE_SHARED_REGISTRY (ThreadPoolRegistry, TaskThreadPool, const DeviceOption &)
 
 CAFFE_REGISTER_CREATOR (ThreadPoolRegistry, CPU, AsyncNetCPUThreadPoolCreator)
 
std::shared_ptr< TaskThreadPoolGetAsyncNetCPUThreadPool (int numa_node_id)
 
 CAFFE_DECLARE_SHARED_REGISTRY (ThreadPoolRegistry, TaskThreadPool, const DeviceOption &)
 
 REGISTER_NET (async_dag, AsyncDAGNet)
 
std::shared_ptr< TaskThreadPoolGetAsyncNetGPUThreadPool (int gpu_id)
 
 CAFFE_REGISTER_CREATOR (ThreadPoolRegistry, CUDA, AsyncNetGPUThreadPoolCreator)
 
 REGISTER_NET (async_polling, AsyncPollingNet)
 
 REGISTER_NET (async_scheduling, AsyncSchedulingNet)
 
 REGISTER_NET (dag, DAGNet)
 
 REGISTER_NET (simple, SimpleNet)
 
 REGISTER_NET (async_simple, AsyncSimpleNet)
 
bool IsNUMAEnabled ()
 
void NUMABind (int numa_node_id)
 
int GetNUMANode (const void *ptr)
 
int GetNumNUMANodes ()
 
void NUMAMove (void *ptr, size_t size, int numa_node_id)
 
int GetCurrentNUMANode ()
 
const std::string OpRegistryKey (const std::string &op_type, const std::string &engine)
 
void SetPerOpEnginePref (const PerOpEnginePrefType &per_op_engine_pref)
 
void SetGlobalEnginePref (const GlobalEnginePrefType &global_engine_pref)
 
void SetEnginePref (const PerOpEnginePrefType &per_op_engine_pref, const GlobalEnginePrefType &global_engine_pref)
 
void SetOpEnginePref (const std::string &op_type, const CaffeMap< int, EnginePrefType > &op_pref)
 
unique_ptr< OperatorBaseCreateOperator (const OperatorDef &operator_def, Workspace *ws, int net_position)
 
std::map< int32_t, OperatorRegistry * > * gDeviceTypeRegistry ()
 
 CAFFE_DEFINE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (DeviceType::CPU, CPUOperatorRegistry)
 
 CAFFE_DEFINE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (DeviceType::CUDA, CUDAOperatorRegistry)
 
 CAFFE_DEFINE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &)
 
GradientOpsMeta GetGradientForOp (const OperatorDef &def, const vector< GradientWrapper > &g_output)
 Gets the GradientOpsMeta for the given operator def.
 
TensorShape GetTensorShapeOfBlob (const Blob *b)
 
TensorShapes InferBlobShapesAndTypesFromWorkspace (Workspace *ws, const vector< std::unique_ptr< NetDef >> &nets)
 
TensorShapes InferBlobShapesAndTypesFromMap (const CaffeMap< std::string, std::vector< TIndex >> &blob_dimensions, const vector< std::unique_ptr< NetDef >> &nets)
 
std::map< string, std::pair< DeviceOption, DeviceOption > > ValidateTensorDevices (OperatorBase &op, const OperatorDef &op_def)
 
std::set< std::string > GetRegisteredOperators ()
 
 CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER (TensorTypes, DoRunWithType, DoRunWithOtherType) CAFFE2_DEFINE_TENSOR_TYPES_DISPATCHER(TensorTypes2
 
 CAFFE_DECLARE_REGISTRY (CPUOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_DECLARE_REGISTRY (CUDAOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_DECLARE_REGISTRY (GradientRegistry, GradientMakerBase, const OperatorDef &, const vector< GradientWrapper > &)
 
std::ostream & operator<< (std::ostream &out, const OpSchema &schema)
 
template<typename T_I = int>
TensorShape CreateTensorShape (vector< T_I > dims,::caffe2::TensorProto_DataType dt)
 
vector< TIndex > GetDimsVector (const TensorShape &shape)
 
std::pair< std::vector< DeviceOption >, std::vector< DeviceOption > > InferOpInputOutputDevice (const OperatorDef &op)
 
template<uint64_t OpsPerPoint>
OpSchema::Cost PointwiseCostInference (const OperatorDef &, const vector< TensorShape > &inputs)
 
bool RunPlanOnWorkspace (Workspace *ws, const PlanDef &plan, ShouldContinue shouldContinue)
 
 CAFFE_KNOWN_TYPE (QTensor< CPUContext >)
 
template<typename KeyType >
void PrintOffendingKey (const KeyType &key)
 
template<>
void PrintOffendingKey (const string &key)
 
template<typename F >
detail::ScopeGuardImplDecay< F > MakeGuard (F &&f) noexcept(noexcept(detail::ScopeGuardImplDecay< F >(static_cast< F && >(f))))
 ScopeGuard is a general implementation of the "Initialization is Resource Acquisition" idiom. More...
 
ExportedStatMap toMap (const ExportedStatList &stats)
 
 CAFFE_KNOWN_TYPE (Tensor< CPUContext >)
 
TypeCall GetTypeCallFunction (CaffeTypeId id)
 
void RegisterTypeCallFunction (CaffeTypeId id, TypeCall c)
 
TensorInfoCall GetTensorInfoFunction (CaffeTypeId id)
 
void RegisterTensorInfoFunction (CaffeTypeId id, TensorInfoCall c)
 
vector< TIndex > ToVectorTIndex (const std::vector< int > &src)
 A utility function to convert vector<int> to vector<TIndex>.
 
TIndex size_from_dim_ (int k, const vector< TIndex > &dims)
 Return product of all dimensions starting from K.
 
TIndex size_to_dim_ (int k, const vector< TIndex > &dims)
 
TIndex size_between_dim_ (int k, int l, const vector< TIndex > &dims)
 
int canonical_axis_index_ (int axis_index, int ndims)
 
template<class Context >
TypeMeta GetTensorType (const void *c)
 
template<class Context >
vector< TIndex > GetTensorInfo (const void *c, bool *shares_data, size_t *capacity, DeviceOption *device)
 
 CAFFE_DEFINE_REGISTRY (TransformRegistry, Transform)
 
unique_ptr< TransformCreateTransform (string key)
 
NetDef ApplyTransform (const string &key, const NetDef &netdef)
 
double average_net_run_duration (const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs)
 
NetDef ApplyTransformIfFaster (const string &key, const NetDef &netdef, const NetDef &init_netdef, const int warmup_runs, const int main_runs, const double improvement_threshold)
 
 CAFFE_DECLARE_REGISTRY (TransformRegistry, Transform)
 
std::map< CaffeTypeId, string > & gTypeNames ()
 
std::set< string > & gRegisteredTypeNames ()
 
std::mutex & gCaffe2TypeRegistrationMutex ()
 
string Demangle (const char *name)
 
string GetExceptionString (const std::exception &e)
 
 CAFFE_KNOWN_TYPE (float)
 
 CAFFE_KNOWN_TYPE (int)
 
 CAFFE_KNOWN_TYPE (std::string)
 
 CAFFE_KNOWN_TYPE (bool)
 
 CAFFE_KNOWN_TYPE (uint8_t)
 
 CAFFE_KNOWN_TYPE (int8_t)
 
 CAFFE_KNOWN_TYPE (uint16_t)
 
 CAFFE_KNOWN_TYPE (int16_t)
 
 CAFFE_KNOWN_TYPE (int64_t)
 
 CAFFE_KNOWN_TYPE (float16)
 
 CAFFE_KNOWN_TYPE (double)
 
 CAFFE_KNOWN_TYPE (char)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< std::mutex >)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< std::atomic< bool >>)
 
 CAFFE_KNOWN_TYPE (std::vector< int64_t >)
 
 CAFFE_KNOWN_TYPE (std::vector< unsigned long >)
 
 CAFFE_KNOWN_TYPE (bool *)
 
 CAFFE_KNOWN_TYPE (char *)
 
 CAFFE_KNOWN_TYPE (int *)
 
TensorProto::DataType TypeMetaToDataType (const TypeMeta &meta)
 
const TypeMetaDataTypeToTypeMeta (const TensorProto::DataType &dt)
 
StorageOrder StringToStorageOrder (const string &str)
 
constexpr char NameScopeSeparator ()
 
struct CAFFE2_ALIGNED (2) __f16
 
template<typename T >
bool fp16_type ()
 
template<>
bool fp16_type< float16 > ()
 
std::string GetUniqueName ()
 
 REGISTER_CPU_OPERATOR (CreateDB, CreateDBOp< CPUContext >)
 
 OPERATOR_SCHEMA (CreateDB).NumInputs(0).NumOutputs(1)
 
 NO_GRADIENT (CreateDB)
 
 REGISTER_CUDA_OPERATOR (CreateDB, CreateDBOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CPUContext >)
 
 NumInputs (0).NumOutputs(1).SetDoc(R"DOC( Creates a unique_ptr<StoreHandler> that uses the filesystem as backing store (typically a filesystem shared between many nodes
 
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC Arg ("path","base path used by the FileStoreHandler").Arg("prefix"
 
such as NFS This store handler is not built to be fast Its recommended use is for integration tests and prototypes where extra dependencies are cumbersome Use an ephemeral path to ensure multiple processes or runs don t interfere DOC prefix for all keys used by this store Output (0,"handler","unique_ptr<StoreHandler>")
 
 NO_GRADIENT (FileStoreHandlerCreateOp)
 
 REGISTER_CUDA_OPERATOR (FileStoreHandlerCreate, FileStoreHandlerCreateOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CPUContext >)
 
host name of Redis server Arg ("port","port number of Redis server").Arg("prefix"
 
 NO_GRADIENT (RedisStoreHandlerCreateOp)
 
 REGISTER_CUDA_OPERATOR (RedisStoreHandlerCreate, RedisStoreHandlerCreateOp< CUDAContext >)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< StoreHandler >)
 
 REGISTER_CPU_OPERATOR (StoreSet, StoreSetOp)
 
 NumInputs (2).NumOutputs(0).SetDoc(R"DOC( Set a blob in a store. The key is the input blob's name and the value is the data in that blob. The key can be overridden by specifying the 'blob_name' argument. )DOC").Arg("blob_name" = Ai * Bi
 
alternative key for the blob (optional)") .Input(0
 
alternative key for the unique_ptr< StoreHandlerInput (1,"data","data blob")
 
 REGISTER_CPU_OPERATOR (StoreGet, StoreGetOp)
 
 NumInputs (1).NumOutputs(1).SetDoc(R"DOC( Get a blob from a store. The key is the output blob's name. The key can be overridden by specifying the 'blob_name' argument. )DOC").Arg("blob_name"
 
alternative key for the unique_ptr< StoreHandlerOutput (0,"data","data blob")
 
 REGISTER_CPU_OPERATOR (StoreAdd, StoreAddOp)
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC Arg ("blob_name","key of the counter (required)").Arg("add_value"
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is added (optional, default:1)") .Input(0
 
the store initializes it to and then performs the add operation The operation returns the resulting counter value DOC value that is unique_ptr< StoreHandlerOutput (0,"value","the current value of the counter")
 
 REGISTER_CPU_OPERATOR (StoreWait, StoreWaitOp)
 
 NumInputs (1, 2).NumOutputs(0).SetDoc(R"DOC( Wait for the specified blob names to be set. The blob names can be passed either as an input blob with blob names or as an argument. )DOC").Arg("blob_names"
 
names of the blobs to wait for (optional)") .Input(0
 
names of the blobs to wait unique_ptr< StoreHandlerInput (1,"names","names of the blobs to wait for (optional)")
 
 REGISTER_CPU_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (FC_Decomp).NumInputs(4).NumOutputs(1)
 
 OPERATOR_SCHEMA (FCGradient_Decomp).NumInputs(4).NumOutputs(3
 
 REGISTER_GRADIENT (FC_Decomp, GetFCDecompGradient)
 
 REGISTER_CUDA_OPERATOR (FC_Decomp, FullyConnectedOpDecomp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (FCGradient_Decomp, FullyConnectedDecompGradientOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (TTContraction, TTContractionOp< float, CPUContext >)
 
 REGISTER_CUDA_OPERATOR (TTContraction, TTContractionOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (TTContractionGradient, TTContractionGradientOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (ImageInput, ImageInputOp< CPUContext >)
 
 NumInputs (0, 1).NumOutputs(2
 
INT_MAX TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &){vector< TensorShape > out(2);ArgumentHelper helper(def);int batch_size=helper.GetSingleArgument< int >("batch_size", 0);int crop=helper.GetSingleArgument< int >("crop",-1);int color=helper.GetSingleArgument< int >("color", 1);CHECK_GT(crop, 0);out[0]=CreateTensorShape(vector< int >{batch_size, crop, crop, color?3:1}, TensorProto::FLOAT);out[1]=CreateTensorShape(vector< int >{1, batch_size}, TensorProto::INT32);return out;}).SetDoc(R"DOC( Imports and processes images from a database. For each run of the operator
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial image (optional)-The image is rescaled either up or down(with the scale argument) or just up(with the minsize argument)-The image is randomly cropped(crop size is passed as an argument but the location of the crop is random except if is_test is passed in which case the image in cropped at the center)-The image is normalized.Each of its color channels can have separate normalization values The dimension of the output image will always be cropxcrop) DOC") .Arg("batch_size"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the operator" ".Must be 1 or greater") .Arg ("color","Number of color channels (1 or 3). Defaults to 1").Arg("color_jitter"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Arg ("img_saturation","Image saturation scale used in color jittering. ""Defaults to 0.4").Arg("img_brightness"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Arg ("img_contrast","Image contrast scale used in color jittering. ""Defaults to 0.4").Arg("color_lighting"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to Arg ("color_lighting_std","Std of normal distribution where color lighting"" scaling factor is sampled. Defaults to 0.1").Arg("scale_jitter_type"
 
INT_MAX batch_size images will be processed GPUs can optionally be used for part of the processing The following transformations are applied to the image A bounding box is applied to the initial Number of images to output for each run of the Whether or not to do color jitter Defaults to Image brightness scale used in color jittering Defaults to Whether or not to do color lighting Defaults to Scale the size of the smallest dimension of the image to this Scale and minsize are mutually exclusive Must be larger than crop Arg ("minsize","Scale the size of the smallest dimension of the image to"" this only if the size is initially smaller. Scale and minsize are"" mutually exclusive. Must be larger than crop.").Arg("warp"
 
the other dimension is proportionally scaled Defaults to Arg ("crop","Size to crop the image to. Must be provided").Arg("mirror"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Arg ("mean","Mean by which to normalize color channels."" Defaults to 0.").Arg("mean_per_channel"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color channel (1 or 3 elements).Defaults to mean argument.Channel order BGR") .Arg("std"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Arg ("std_per_channel","Vector of standard dev. per color channel "" (1 or 3 elements). Defaults to std argument. Channel order is BGR").Arg("bounding_ymin"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults to (none)") .Arg("bounding_xmin"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Arg ("use_gpu_transform","1 if GPU acceleration should be used."" Defaults to 0. Can only be 1 in a CUDAContext").Arg("decode_threads"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Arg ("output_type","If gpu_transform, can set to FLOAT or FLOAT16.").Arg("db"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the database (if not passed as input)") .Arg("db_type"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and label (should have a number of elements equal to the number of additional" "outputs)") .Arg("random_scale"
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1,-1] or no random resize desired Input (0,"reader","The input reader (a db::DBReader)").Output(0
 
the other dimension is proportionally scaled Defaults to Whether or not to mirror the image Defaults to Vector of means per color Standard deviation by which to normalize color channels Defaults to Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults Bounding box coordinate Defaults if the input is in Caffe format Defaults to Number of CPU decode transform threads Defaults to Name of the Type of The sizes of any outputs besides the data and shortest side desired for image resize Defaults to[-1,-1] or no random resize desired Tensor containing the images Output (1,"label","Tensor containing the labels").Output(2
 
 NO_GRADIENT (ImageInput)
 
template<class Context >
bool RandomSizedCropping (cv::Mat *img, const int crop, std::mt19937 *randgen)
 
template<class Context >
void Saturation (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void Brightness (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void Contrast (float *img, const int img_size, const float alpha_rand, std::mt19937 *randgen)
 
template<class Context >
void ColorJitter (float *img, const int img_size, const float saturation, const float brightness, const float contrast, std::mt19937 *randgen)
 
template<class Context >
void ColorLighting (float *img, const int img_size, const float alpha_std, const std::vector< std::vector< float >> &eigvecs, const std::vector< float > &eigvals, std::mt19937 *randgen)
 
template<class Context >
void ColorNormalization (float *img, const int img_size, const int channels, const std::vector< float > &mean, const std::vector< float > &std)
 
template<class Context >
void TransformImage (const cv::Mat &scaled_img, const int channels, float *image_data, const bool color_jitter, const float saturation, const float brightness, const float contrast, const bool color_lighting, const float color_lighting_std, const std::vector< std::vector< float >> &color_lighting_eigvecs, const std::vector< float > &color_lighting_eigvals, const int crop, const bool mirror, const std::vector< float > &mean, const std::vector< float > &std, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false)
 
template<class Context >
void CropTransposeImage (const cv::Mat &scaled_img, const int channels, uint8_t *cropped_data, const int crop, const bool mirror, std::mt19937 *randgen, std::bernoulli_distribution *mirror_this_image, bool is_test=false)
 
 REGISTER_CUDA_OPERATOR (ImageInput, ImageInputOp< CUDAContext >)
 
template<typename T_IN , typename T_OUT , class Context >
bool TransformOnGPU (Tensor< Context > &X, Tensor< Context > *Y, Tensor< Context > &mean, Tensor< Context > &std, Context *context)
 
 REGISTER_EVENT_CREATE_FUNCTION (MKLDNN, EventCreateCPU)
 
 REGISTER_EVENT_RECORD_FUNCTION (MKLDNN, EventRecordCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (MKLDNN, MKLDNN, EventWaitCPUCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (MKLDNN, CPU, EventWaitCPUCPU)
 
 REGISTER_EVENT_WAIT_FUNCTION (CPU, MKLDNN, EventWaitCPUCPU)
 
 REGISTER_EVENT_FINISH_FUNCTION (MKLDNN, EventFinishCPU)
 
 REGISTER_EVENT_QUERY_FUNCTION (MKLDNN, EventQueryCPU)
 
 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION (MKLDNN, EventErrorMessageCPU)
 
 REGISTER_EVENT_SET_FINISHED_FUNCTION (MKLDNN, EventSetFinishedCPU)
 
 REGISTER_EVENT_RESET_FUNCTION (MKLDNN, EventResetCPU)
 
 CAFFE_DECLARE_REGISTRY (MKLOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_KNOWN_TYPE (GLTensor< GLfloat >)
 
 CAFFE_KNOWN_TYPE (GLTensor< GLhalf >)
 
 CAFFE_KNOWN_TYPE (GLTensor< half >)
 
 CAFFE_KNOWN_TYPE (Tensor< GLContext >)
 
void EventCreateOPENGL (const DeviceOption &, Event *)
 
void EventRecordOPENGL (Event *, const void *, const char *)
 
void EventWaitOPENGLOPENGL (const Event *, void *)
 
void EventFinishOPENGL (const Event *)
 
void EventResetOPENGL (Event *)
 
 REGISTER_EVENT_CREATE_FUNCTION (OPENGL, EventCreateOPENGL)
 
 REGISTER_EVENT_RECORD_FUNCTION (OPENGL, EventRecordOPENGL)
 
 REGISTER_EVENT_WAIT_FUNCTION (OPENGL, OPENGL, EventWaitOPENGLOPENGL)
 
 REGISTER_EVENT_FINISH_FUNCTION (OPENGL, EventFinishOPENGL)
 
 REGISTER_EVENT_RESET_FUNCTION (OPENGL, EventResetOPENGL)
 
template<typename T = half>
void getTensorCPU (const GLTensor< T > &g_, TensorCPU &g)
 
 REGISTER_NET (opengl, GLNet)
 
 CAFFE_DEFINE_REGISTRY (GLOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
 CAFFE_REGISTER_DEVICE_TYPE (DeviceType::OPENGL, GLOperatorRegistry)
 
 CAFFE_DECLARE_REGISTRY (GLOperatorRegistry, OperatorBase, const OperatorDef &, Workspace *)
 
void dumpDefForOpenGL (const NetDef &d)
 
NetDef rewritePredictNetForOpenGL (const NetDef &predictNet, bool runFusion, std::unordered_set< std::string > cpuOps)
 
bool tryConvertToOpenGL (const NetDef &predictNet, NetDef *glPredictNet, bool runFusion, std::unordered_set< std::string > cpuOps)
 
 REGISTER_GL_OPERATOR (Relu, GLReluOp< half >)
 
 REGISTER_GL_OPERATOR (Sigmoid, GLSigmoidOp< DataType >)
 
 REGISTER_GL_OPERATOR (Concat, GLConcatOp< DataType >)
 
 REGISTER_GL_OPERATOR (Conv, GLConvOp< DataType >)
 
 REGISTER_GL_OPERATOR (CopyFromGL, CopyFromGLOp< DataType >)
 
 REGISTER_GL_OPERATOR (Sum, GLSumOp< DataType >)
 
 REGISTER_GL_OPERATOR (Add, GLSumOp< DataType >)
 
 REGISTER_GL_OPERATOR (FC, GLFullyConnectedOp< DataType >)
 
 REGISTER_GL_OPERATOR (NormalizePlanarYUV, GLNormalizePlanarYUVOp< DataType >)
 
 REGISTER_GL_OPERATOR (AveragePool, GLAveragePoolOp< DataType >)
 
 REGISTER_GL_OPERATOR (MaxPool, GLMaxPoolOp< DataType >)
 
 REGISTER_GL_OPERATOR (Reshape, GLReshapeOp< DataType >)
 
 REGISTER_GL_OPERATOR (ResizeNearest, GLResizeNearestOp< DataType >)
 
 REGISTER_GL_OPERATOR (Softmax, GLSoftmaxOp< DataType >)
 
 REGISTER_GL_OPERATOR (SpatialBN, GLSpatialBNOp< DataType >)
 
void benchmarkModel (std::string init_net_pb, std::string predict_net_pb, std::string input_name, std::vector< int > input_dims, std::string net_name="benchmark_net", std::unordered_set< std::string > cpu_ops=std::unordered_set< std::string >({}))
 
template<typename T = float>
void PopulateCPUBlob (Workspace *ws, bool random, std::string name, std::vector< int > dims, int val=1, int dist_shift=0, float variance=1)
 
template<typename T = half>
void compareNetResult (Workspace &ws, NetDef &cpu_net, NetDef &gpu_net, string cpu_blob="ref_Y", string gpu_blob="gpu_Y", double tol=0.01, bool relative=false)
 
template<typename T = half>
void compareNetResult4D (Workspace &ws, NetDef &cpu_net, NetDef &gpu_net, string cpu_blob="ref_Y", string gpu_blob="gpu_Y", double tol=0.05)
 
bool tryConvertToMPSCNN (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet)
 
NetDef annotateDefWithReadCounts (const NetDef &net)
 
NetDef rewriteForMetal (const NetDef &net)
 
NetDef runMPSCNNFusion (const NetDef &net)
 
void dumpDef (const NetDef &d)
 
void mpscnnRecordExecutionFinish ()
 
MPSCNNContextgetMPSCNNContext ()
 
bool tryConvertToMPSCNNIntermediateCopies (const NetDef &initNet, const NetDef &predictNet, NetDef *mpscnnPredictNet)
 
NetDef setSpecialArgs (const NetDef &def)
 
void testMPSCNN ()
 
void compareModels (const NetDef &initNet, NetDef predictNet)
 
void verifyRewrite (const NetDef &initNet, const NetDef &net, std::vector< int > inputDims)
 
 CAFFE_KNOWN_TYPE (GLImage< float >)
 
 CAFFE_KNOWN_TYPE (GLImage< uint8_t >)
 
 CAFFE_KNOWN_TYPE (GLImageVector< float >)
 
 CAFFE_KNOWN_TYPE (GLImageVector< uint8_t >)
 
template<class T >
void shareInputGLImage (Workspace *ws, const std::string &name, GLImageVector< T > *input)
 
template<class T >
const GLImageVector< T > * extractOutputGLImage (Workspace *ws, const std::string &name)
 
const NetDef create_gl_run_net (const NetDef &init_net, const NetDef &run_net, bool use_texture_input)
 
NetDef rewritePredictNetForOpenGL (const NetDef &predictNet, bool useTextureInput, bool useTiling, bool runFusion)
 
bool tryConvertToOpenGL (const NetDef &initNet, const NetDef &predictNet, NetDef *glPredictNet, bool useTextureInput, bool useTiling, bool runFusion)
 
 REGISTER_CPU_OPERATOR (OpenGLAdd, OpenGLAddOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLAdd).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConcat, OpenGLConcatOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLConcat).NumInputs(2
 
 NumOutputs (1, 2)
 
 REGISTER_CPU_OPERATOR (OpenGLConv, OpenGLConvOp< float16_t, false, false >)
 
 OPERATOR_SCHEMA (OpenGLConv).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConvPRelu, OpenGLConvOp< float16_t, true, false >)
 
 OPERATOR_SCHEMA (OpenGLConvPRelu).NumInputs(4).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConvRelu, OpenGLConvOp< float16_t, false, true >)
 
 OPERATOR_SCHEMA (OpenGLConvRelu).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConvTranspose, OpenGLConvTransposeOp< float16_t, false, false >)
 
 OPERATOR_SCHEMA (OpenGLConvTranspose).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConvTransposePRelu, OpenGLConvTransposeOp< float16_t, true, false >)
 
 OPERATOR_SCHEMA (OpenGLConvTransposePRelu).NumInputs(4).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLConvTransposeRelu, OpenGLConvTransposeOp< float16_t, false, true >)
 
 OPERATOR_SCHEMA (OpenGLConvTransposeRelu).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (CopyToOpenGL, CopyToOpenGLOp< float16_t >)
 
 OPERATOR_SCHEMA (CopyToOpenGL).NumInputs(1).NumOutputs(1).AllowInplace(
 
 REGISTER_CPU_OPERATOR (CopyFromOpenGL, CopyFromOpenGLOp< float16_t >)
 
 OPERATOR_SCHEMA (CopyFromOpenGL).NumInputs(1).NumOutputs(1).AllowInplace(
 
 REGISTER_CPU_OPERATOR (OpenGLInstanceNorm, OpenGLInstanceNormPReluOp< float16_t, false >)
 
 OPERATOR_SCHEMA (OpenGLInstanceNorm).NumInputs(3
 
 NumOutputs (1, 3).AllowInplace(
 
 REGISTER_CPU_OPERATOR (OpenGLInstanceNormPRelu, OpenGLInstanceNormPReluOp< float16_t, true >)
 
 OPERATOR_SCHEMA (OpenGLInstanceNormPRelu).NumInputs(3
 
 REGISTER_CPU_OPERATOR (OpenGLMul, OpenGLMulOp< float16_t >)
 
 REGISTER_CPU_OPERATOR (OpenGLNormalizePlanarYUV, GLNormPlanarYUVOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLNormalizePlanarYUV).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLPadImage, OpenGLPadImageOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLPadImage).NumInputs(1).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLPRelu, OpenGLPReluOp< float16_t, GLPRelu::PRelu >)
 
 IdenticalTypeAndShape ()
 
 REGISTER_CPU_OPERATOR (OpenGLRelu, OpenGLPReluOp< float16_t, GLPRelu::Relu >)
 
 REGISTER_CPU_OPERATOR (OpenGLResizeNearest, OpenGLResizeNearestOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLResizeNearest).NumInputs(1).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLSigmoid, OpenGLSigmoidOp< float16_t, Sigmoid >)
 
 REGISTER_CPU_OPERATOR (OpenGLTanh, OpenGLSigmoidOp< float16_t, Tanh >)
 
 REGISTER_CPU_OPERATOR (OpenGLTensorToTextureStylizerPreprocess, OpenGLTensorToTextureStylizerPreprocessOp)
 
 OPERATOR_SCHEMA (OpenGLTensorToTextureStylizerPreprocess).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLTextureToTextureStylizerPreprocess, OpenGLTextureToTextureStylizerPreprocessOp< RGBA >)
 
 OPERATOR_SCHEMA (OpenGLTextureToTextureStylizerPreprocess).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLTextureToTensorStylizerDeprocess, OpenGLTextureToTensorStylizerDeprocessOp)
 
 OPERATOR_SCHEMA (OpenGLTextureToTensorStylizerDeprocess).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLTextureToTextureStylizerDeprocess, OpenGLTextureToTextureStylizerDeprocessOp< RGBA >)
 
 OPERATOR_SCHEMA (OpenGLTextureToTextureStylizerDeprocess).NumInputs(2).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (OpenGLSub, OpenGLSubOp< float16_t >)
 
 OPERATOR_SCHEMA (OpenGLSub).NumInputs(2).NumOutputs(1)
 
void testOpenGL ()
 
void compareModelsForOpenGL (std::string name, const NetDef &initNet, NetDef predictNet, int width, int height, int channel, std::string input_type, std::string input_order)
 
void compareBatchedToTiledModels (std::string name, const NetDef &initNet, NetDef predictNet, int width, int height, int channel, std::string input_type, std::string input_order)
 
int runModelBenchmarks (caffe2::NetDef &init_net, caffe2::NetDef &predict_net, int warm_up_runs, int main_runs, int channel, int height, int width, std::string input_type, std::string input_order, std::string engine, bool run_individual=false, bool use_texture_input=false, bool use_tiling=false, bool run_fusion=true)
 
std::string & gSNPELocation ()
 
 REGISTER_CPU_OPERATOR (SNPE, SNPEOp)
 
void uniformQuantize2b1b (const TensorCPU &X, const std::vector< std::unique_ptr< TensorCPU >> &XQ, float offset, float inter_center_distance)
 
void qconv (const ConvArgs &args, const TensorCPU &X, const TensorCPU &W, const TensorCPU *b, TensorCPU *Y)
 
void qpad_zero (const ConvArgs &args, const TensorCPU &X, TensorCPU *Y)
 
void signQuantize (const TensorCPU &X, TensorCPU *XQ)
 
void filterNormalization11 (const TensorCPU &WQ, TensorCPU *WQN)
 
void filterNormalizationL1 (const TensorCPU &W, TensorCPU *WL1)
 
void qim2col (const ConvArgs &args, const TensorCPU &XQ, const TensorCPU &WQ, TensorCPU *XQcol)
 
std::unique_ptr< QConvStatecreate2b1bConvState (Workspace *ws, const TensorCPU &W, const TensorCPU *b)
 
void run2b1bConvGeneric (QConvState *state, const ConvArgs &args, const TensorCPU &X, TensorCPU *Y)
 
void run2b1bUnification (QConvState *state, size_t N, size_t C, const float *WQNVdata, const float *YQs0Vdata, const float *YQs1Vdata, size_t YQstride, float *Ydata, size_t Ystride, const float *bias)
 
 REGISTER_CPU_OPERATOR (QConv, QConvOp)
 
size_t divRoundUp (size_t x, size_t d)
 
bool run2b1bConvNeon (QConvState *state, const ConvArgs &args, const TensorCPU &X, TensorCPU *Y)
 
 CAFFE_KNOWN_TYPE (MPICommonWorldWrapper)
 
std::mutex & MPIMutex ()
 
MPI_Comm GlobalMPIComm ()
 Gets the global MPI communicator used by Caffe2. More...
 
void SetGlobalMPIComm (MPI_Comm new_comm)
 Sets the global MPI communicator. More...
 
int MPICommSize (MPI_Comm comm)
 A helper function to return the size of the given communicator.
 
int MPICommRank (MPI_Comm comm)
 A helper function to return the rank of the given communicator.
 
void MPISetupPeers (const int replicas, const string &role, const string &job_path)
 A function used to perform peer setup so one does not need to use mpirun / mpiexec to run the binary. More...
 
void CheckInitializedMPI ()
 
 REGISTER_CPU_OPERATOR (Abs, UnaryElementwiseOp< TensorTypes< float >, CPUContext, AbsCPUFunctor >)
 
 REGISTER_CPU_OPERATOR (AbsGradient, BinaryElementwiseOp< TensorTypes< float >, CPUContext, WithoutBroadcast< AbsGradientCPUFunctor >>)
 
element wise DOC Input (0,"input","Input tensor").Output(0
 
element wise DOC The absolute value of the input tensor computed element wise InheritOnnxSchema ("Abs")
 
 OPERATOR_SCHEMA (AbsGradient).NumInputs(2).NumOutputs(1).IdenticalTypeAndShape()
 
 REGISTER_GRADIENT (Abs, GetAbsGradient)
 
 REGISTER_CPU_OPERATOR (Accumulate, AccumulateOp< float, CPUContext >)
 
we first initialize the output tensor to all and then do accumulation Any further calls to the operator, given that no one else fiddles with the output in the interim, will do simple accumulations.Accumulation is done using Axpby operation as shown:Y=1 *X+gamma *Y where X is the input tensor, Y is the output tensor and gamma is the multiplier argument.) DOC") .Arg ("gamma","(float, default 1.0) Accumulation multiplier").Input(0
 
we first initialize the output tensor to all and then do accumulation Any further calls to the The input tensor that has to be accumulated to the output tensor If the output size is not the same as input the output tensor is first reshaped and initialized to and only accumulation is done Output (0,"output","Accumulated output tensor")
 
 SHOULD_NOT_DO_GRADIENT (Accumulate)
 
 REGISTER_CPU_OPERATOR (Accuracy, AccuracyOp< float, CPUContext >)
 
NumInputs(2).NumOutputs(1).ScalarType(TensorProto SHOULD_NOT_DO_GRADIENT (Accuracy)
 
 REGISTER_CPU_OPERATOR (RowWiseArgMax, RowWiseArgMaxOp< CPUContext >)
 
this operator returns a 2D (N X 1) output tensor with with the index of the maximum value in each row.If there are duplicate max values in a row the index of the first occurence is returned.) DOC") .Input(0
 
this N X D input tensor Output (0,"Z","2D (N X 1) output tensor")
 
 NO_GRADIENT (RowWiseArgMax)
 
 REGISTER_CPU_OPERATOR (ArgMax, ArgMaxOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ArgMin, ArgMinOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (Assert, AssertOp< CPUContext >)
 
or long longs and checks if all values are true when coerced into a boolean In other for non bool types this asserts that all values in the tensor are non zero DOC Arg ("error_msg","An error message to print when the assert fails.", false)
 
 REGISTER_CPU_OPERATOR (BatchGather, BatchGatherOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (BatchGatherGradient, BatchGatherGradientOp< CPUContext >)
 
output_dims push_back (data_dims[0])
 
output_dims insert (output_dims.end(), indices_dims.begin(), indices_dims.end())
 
output_dims insert (output_dims.end(), data_dims.begin()+2, data_dims.end())
 
 SetDoc (R"DOC( Batch gather operation, first dimension in DATA is the batch size. Given DATA tensor of rank r >= 2, and INDICES tensor of rank q >= 1, gather entries of the outer-most dimension of DATA indexed by INDICES, and concatenate them in an output tensor of rank (q - 1) + (r - 1). Example: DATA = [ [1.0, 1.2, 2.4, 4.5], [2.3, 3.4, 3.6, 2.3], [4.5, 5.7, 1.2, 4.5], ] INDICES = [ [0, 2], ] OUTPUT = [ [1.0, 2.4], [2.3, 3.6], [4.5, 1.2], ] )DOC").Input(0
 
Tensor of rank of any rank q Output (0,"OUTPUT","Tensor of rank (q - 1) + (r - 1).")
 
 OPERATOR_SCHEMA (BatchGatherGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (BatchGather, GetBatchGatherGradient)
 
 REGISTER_CPU_OPERATOR (BatchMatMul, BatchMatMulOp< CPUContext >)
 
vector< TensorShape > TensorInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in)
 
OpSchema::Cost CostInferenceForBatchMatMul (const OperatorDef &def, const vector< TensorShape > &in)
 
where A has shape (dim0, dim1,...M, K)
 
where A has B has shape (dim0, dim1,...K, N)
 
 REGISTER_CPU_OPERATOR (BatchSparseToDense, BatchSparseToDenseOp< float, CPUContext >)
 
 NumInputs (3, 4).NumOutputs(1).SetDoc(R"DOC( Convert sparse matrix representation into dense matrix. A sparse matrix is represented by `lengths` vector
 
indices and values vector Each element in lengths vector (lengths[`i`]) represents the number of indices in this batch(batch`i`).With in each batch
 
 REGISTER_CPU_OPERATOR (BooleanMask, BooleanMaskOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (BooleanMaskLengths, BooleanMaskLengthsOp< CPUContext >)
 
 SetDoc (R"DOC( Given a data tensor and a 1D boolean mask tensor, returns a tensor containing only the elements corresponding to positions where the mask is true. )DOC").Input(0
 
original data tensor Input (1,"mask","A tensor of bools of same shape as `data`.").Output(0
 
original data tensor A tensor of same type as data Output (1,"masked_indices","A tensor for indices.")
 
return the segment lengths of a corresponding segmented tensor after BooleanMask is applied DOC Input (0,"lengths","A 1D int32 tensor representing segment lengths.").Input(1
 
return the segment lengths of a corresponding segmented tensor after BooleanMask is applied DOC A bool tensor of values to keep Output (0,"masked_lengths","Segment lengths of a masked tensor.")
 
 NO_GRADIENT (BooleanMaskLengths)
 
template<typename Functor >
void MaskWithFunctor (size_t N, size_t M, int B, const float *in, Functor fn, float fill_val, float *out)
 
template<typename Functor >
void RepeatedMaskWithFunctor (size_t N, size_t M, int D, const float *in, Functor fn, float fill_val, float *out)
 
 REGISTER_CPU_OPERATOR (SequenceMask, SequenceMaskOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (BooleanUnmask, BooleanUnmaskOp< CPUContext >)
 
 NumInputs ([](int n){return n > 0 &&n%2==0;}).NumOutputs(1).SetDoc(R"DOC( Given a series of mask and values
 
reconstruct values together according to masks A comprehensive False False True Reconstruct We Note that for all mask there must be at least one True If for a field there are multiple True we will accept the first value For False False False True DOC Output (0,"unmasked_data","The final reconstructed unmasked data")
 
 REGISTER_CPU_OPERATOR (Cast, CastOp< CPUContext >)
 
out push_back (in[0])
 
out[0] set_data_type (cast::GetCastDataType(helper,"to"))
 
 SetDoc (R"DOC( The operator casts the elements of a given input tensor to a data type specified by the 'to' argument and returns an output tensor of the same size in the converted type. The 'to' argument must be one of the data types specified in the 'DataType' enum field in the TensorProto message. If the 'to' argument is not provided or is not one of the enumerated types in DataType, Caffe2 throws an Enforce error. NOTE: Casting to and from strings is not supported yet. )DOC").Arg("to"
 
The data type to which the elements of the input tensor are cast Strictly must be one of the types from DataType enum in TensorProto Input (0,"input","Input tensor to be cast.").Output(0
 
 REGISTER_GRADIENT (Cast, GetCastGradient)
 
 REGISTER_CPU_OPERATOR (Ceil, CeilOp< float, CPUContext >)
 
 SetDoc (R"DOC( Ceil takes one input data (Tensor<T>) and produces one output data (Tensor<T>) where the ceil function, y = ceil(x), is applied to the tensor elementwise. Currently supports only float32. )DOC").Input(0
 
ND input tensor Output (0,"Y","ND input tensor")
 
 GRADIENT_NOT_IMPLEMENTED_YET (Ceil)
 
 REGISTER_CPU_OPERATOR (ChannelBackpropStats, ChannelBackpropStatsOp< CPUContext >)
 
 NumInputs (4).NumOutputs(2).SetDoc(R"DOC( Given an input tensor in NCHW format
 
the gradient for the output of SpatialBN and the per channel mean and inverse std var vectors for the computes the per channel bias and scale gradient to be used during the backward pass for subsequent spatial batch normalization gradient calculation the results of this op are subsequently reduced over multiple devices to obtain statistics over a larger batch size in cases where the batch size for a single model copy is too low to yield the full benefit of batch normalization The resulting bias and scale can then be plugged back into SpatialBNGradient to get results over the larger batch size DOC Input (0,"X","The input 4-dimensional tensor of shape NCHW").Input(1
 
the gradient for the output of SpatialBN and the per channel mean and inverse std var vectors for the computes the per channel bias and scale gradient to be used during the backward pass for subsequent spatial batch normalization gradient calculation the results of this op are subsequently reduced over multiple devices to obtain statistics over a larger batch size in cases where the batch size for a single model copy is too low to yield the full benefit of batch normalization The resulting bias and scale can then be plugged back into SpatialBNGradient to get results over the larger batch size DOC The mean saved from the forward pass as a dimensional tensor of size C Input (2,"inv_std","The saved inverse standard deviation as a 1-dimensional tensor ""of size C.").Input(3
 
the gradient for the output of SpatialBN and the per channel mean and inverse std var vectors for the computes the per channel bias and scale gradient to be used during the backward pass for subsequent spatial batch normalization gradient calculation the results of this op are subsequently reduced over multiple devices to obtain statistics over a larger batch size in cases where the batch size for a single model copy is too low to yield the full benefit of batch normalization The resulting bias and scale can then be plugged back into SpatialBNGradient to get results over the larger batch size DOC The mean saved from the forward pass as a dimensional tensor of size C Gradient for the output layer of here used as input because we are on the backward pass Output (0,"scale_grad","Gradient for the scale vector").Output(1
 
 SHOULD_NOT_DO_GRADIENT (ChannelBackpropStats)
 
 REGISTER_CPU_OPERATOR (ChannelShuffle, ChannelShuffleOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (ChannelShuffleGradient, ChannelShuffleGradientOp< CPUContext >)
 
 REGISTER_GRADIENT (ChannelShuffle, GetChannelShuffleGradient)
 
 REGISTER_CPU_OPERATOR (ChannelStats, ChannelStatsOp< CPUContext >)
 
computes the sum of all elements per channel and the sum of all elements squared per channel These values can be reduced across multiple batches and used to obtain the mean and variance across the full set of batches Using the new mean and variance as input to SpatialBN has the effect of changing the batch size over which SpatialBN is applied DOC The output dimensional tensor of size C containing the sum of elements of X per channel Output (1,"sumsq","The output 1-dimensional tensor of size C containing the sum of ""elements squared per channel.")
 
 SHOULD_NOT_DO_GRADIENT (ChannelStats)
 
 REGISTER_CPU_OPERATOR (Clip, ClipOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ClipGradient, ClipGradientOp< float, CPUContext >)
 
Key value handler for rendezvous (optional).") .Output(0
 
Key value handler for A common world for collective operations Arg ("size","(int) size of the common world.").Arg("rank"
 
Existing common world to clone Output (0,"comm_world","A common world for collective operations.")
 
 SetDoc ("Closes all connections managed by a common world.").Input(0
 
 NumInputsOutputs ([](int in, int out){return in >=2 &&out==(in-1);}).EnforceInplace([](int in
 
 InputsCanCrossDevices ().IdenticalTypeAndShapeOfInput(0).SetDoc(R"DOC( Does a broadcast operation from the root node to every other node. The tensor on each node should have been pre-created with the same shape and data type. )DOC").Input(0
 
The common world Input (1,"X","A tensor to be broadcasted.").Output(0
 
The common world In place as input Arg ("root","(int, default 0) the root to run broadcast from.")
 
The common world Input (1,"X","A tensor to be reduced.").Output(0
 
The common world The reduced result on not set for other nodes Arg ("root","(int, default 0) the root to run reduce into.")
 
 IdenticalTypeAndShapeOfInput (0).InputsCanCrossDevices().SetDoc(R"DOC( Does an allreduce operation among the nodes. Currently only Sum is supported. )DOC").Input(0
 
The common world Input (1,"X","A tensor to be allreduced.").Output(0
 
The common world Input (1,"X","A tensor to be reduce-scattered.").Output(0
 
 NumInputs (2, INT_MAX).NumOutputs(1).InputsCanCrossDevices().SetDoc(R"DOC( Does an allgather operation among the nodes. )DOC").Input(0
 
The common world Input (1,"X","A tensor to be allgathered.").Output(0
 
 NumInputs ({2, 4}).NumOutputs(0).SetDoc(R"DOC( Sends the tensor to another node. )DOC").Input(0
 
The common world An int CPUtensor of size specifying the rank If this overrides the to argument of the op Input (3,"tag","An int CPUtensor of size 1 specifying the tag to ""send the tensor with. This overrides the 'tag' ""argument of the op.").Arg("dst"
 
The common world An int CPUtensor of size specifying the rank If this overrides the to argument of the op The rank to send the tensor to Arg ("tag","(int) a tag to send the tensor with.").Arg("raw_buffer"
 
 AllowInplace ({{2, 1},{3, 2}}).SetDoc(R"DOC( Receives the tensor from another node. )DOC").Input(0
 
The common world Input (1,"Y","In-place output. If raw_buffer is specified, ""Y should have pre-allocated data and type..").Input(2
 
The common world An int CPUtensor of size specifying the rank If this overrides the from argument of the op The received tensor Output (1,"src","The sender that sent the message as a CPUTensor ""of size 1 and of type int.").Output(2
 
The common world An int CPUtensor of size specifying the rank If this overrides the from argument of the op The received tensor The tag that the message is sent with as a CPUTensor of size and of type int Arg ("src","(int) he rank to receive the tensor from.").Arg("tag"
 
The common world An int CPUtensor of size specifying the rank If this overrides the from argument of the op The received tensor The tag that the message is sent with as a CPUTensor of size and of type int int a tag to receive the tensor with Arg ("raw_buffer","(bool) if set, only send the content and assume that the receiver ""has already known the tensor's shape and information.")
 
 SHOULD_NOT_DO_GRADIENT (CreateCommonWorld)
 
 SHOULD_NOT_DO_GRADIENT (CloneCommonWorld)
 
 SHOULD_NOT_DO_GRADIENT (DestroyCommonWorld)
 
 SHOULD_NOT_DO_GRADIENT (Broadcast)
 
 SHOULD_NOT_DO_GRADIENT (Reduce)
 
 SHOULD_NOT_DO_GRADIENT (Allgather)
 
 SHOULD_NOT_DO_GRADIENT (Allreduce)
 
 SHOULD_NOT_DO_GRADIENT (ReduceScatter)
 
 SHOULD_NOT_DO_GRADIENT (Barrier)
 
 SHOULD_NOT_DO_GRADIENT (SendTensor)
 
 SHOULD_NOT_DO_GRADIENT (ReceiveTensor)
 
 REGISTER_CPU_OPERATOR (CreateCommonWorld, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (CloneCommonWorld, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (DestroyCommonWorld, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Broadcast, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Reduce, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Allgather, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Allreduce, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (ReduceScatter, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Barrier, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (SendTensor, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (ReceiveTensor, NoDefaultEngineOp< CPUContext >)
 
 REGISTER_CUDA_OPERATOR (CreateCommonWorld, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (CloneCommonWorld, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Broadcast, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Reduce, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Allgather, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Allreduce, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (SendTensor, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (ReceiveTensor, NoDefaultEngineOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Split, SplitOp< CPUContext >)
 
INT_MAX Input (0,"input","The tensor to split").Input(1
 
INT_MAX Optional list of output lengths (see also arg 'split')") .Arg("axis"
 
INT_MAX Optional list of output Which axis to split on Arg ("split","length of each output").Arg("order"
 
INT_MAX Optional list of output Which axis to split on Either NHWC or will split on C defaults to NCHW DeviceInferenceFunction (splitOpDevInfer).SetDoc(R"DOC( Split a tensor into a list of tensors
 
 REGISTER_CPU_OPERATOR (Concat, ConcatOp< CPUContext >)
 
 NumInputs (1, INT_MAX).NumOutputs(2).Arg("axis"
 
Which axis to concat on Arg ("order","Either NHWC or NCHW, will concat on C axis, defaults to NCHW").Arg("add_axis"
 
Which axis to concat on Pass to add the axis specified in arg axis to all input tensors TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &in){ArgumentHelper helper(def);const int axis=helper.HasArgument("axis")?helper.GetSingleArgument< int >("axis",-1):GetDimFromOrderString(helper.GetSingleArgument< string >("order","NCHW"));bool add_axis=helper.GetSingleArgument< int >("add_axis", 0)!=0;const int canonical_axis=canonical_axis_index_(axis, in[0].dims_size());CAFFE_ENFORCE_GT(in.size(), 0);vector< int > split_shape(1, in.size());vector< int > out_shape(in[0].dims().begin(), in[0].dims().end());if(add_axis){for(int i=1;i< in.size();++i){CAFFE_ENFORCE_EQ(in[0].dims().size(), in[i].dims().size(),"All inputs of Concat should have same dims when add_axis = 1. ""Got different sizes for inputs 0 and ", i);for(int j=0;j< in[0].dims().size();++j){CAFFE_ENFORCE_EQ(in[0].dims(j), in[i].dims(j),"All inputs of Concat should have same dims when add_axis = 1. ""Got different dims for inputs 0 and ", i,". At dim: ", j);}}out_shape.insert(out_shape.begin()+canonical_axis, in.size());}else{for(int i=1;i< in.size();++i){CAFFE_ENFORCE_EQ(in[0].dims().size(), in[i].dims().size(),"All inputs of Concat should have same dims except ""canonical_axis dim that is equal to ", canonical_axis,"Got different sizes for inputs 0 and ", i);for(int j=0;j< in[0].dims().size();++j){if(j==canonical_axis){continue;}CAFFE_ENFORCE_EQ(in[0].dims(j), in[i].dims(j),"All inputs of Concat should have same dims except ""canonical_axis dim that is equal to ", canonical_axis,"Got different dims for inputs 0 and ", i,". At dim: ", j);}}for(int i=1;i< in.size();++i){out_shape[canonical_axis]+=in[i].dims(canonical_axis);}}if(def.output_size()==1){return vector< TensorShape >{CreateTensorShape(out_shape, in[0].data_type())};}return vector< TensorShape >{CreateTensorShape(out_shape, in[0].data_type()), CreateTensorShape(split_shape, TensorProto::INT32)};}).CostInferenceFunction(CostInferenceForConcat).DeviceInferenceFunction(concatOpDevInfer).SetDoc("Concatenate a list of tensors into a single tensor").Output(0
 
Which axis to concat on Pass to add the axis specified in arg axis to all input tensors Concatenated tensor Output (1,"split_info","The dimensions of the inputs.").InheritOnnxSchema("Concat")
 
 REGISTER_CPU_OPERATOR (DepthSplit, SplitOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (DepthConcat, ConcatOp< CPUContext >)
 
INT_MAX SetDoc ("Backward compatible operator name for Split.")
 
 REGISTER_GRADIENT (Split, GetSplitGradient)
 
 REGISTER_GRADIENT (DepthSplit, GetSplitGradient)
 
 REGISTER_GRADIENT (Concat, GetConcatGradient)
 
 REGISTER_GRADIENT (DepthConcat, GetConcatGradient)
 
 REGISTER_CUDA_OPERATOR (Split, SplitOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Concat, ConcatOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (DepthSplit, SplitOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (DepthConcat, ConcatOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Conditional, ConditionalOp< CPUContext >)
 
 NumInputs (3).NumOutputs(1).SetDoc(R"DOC( Given a 1-D tensor of boolean values
 
apply conditional operator along the first dimension of DataT and DataF and return DataO.Note, DataT and DataF must have the exact same shape and type.) DOC") .Input (0,"Condition","Boolean tensor to select DataT or DataF").Input(1
 
apply conditional Data to use when True Input (2,"DataF","Data to use when False").Output(0
 
 NO_GRADIENT (Conditional)
 
 REGISTER_CPU_OPERATOR (ConvGradient, ConvGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (ConvGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (Conv1DGradient, ConvGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (Conv1DGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (Conv2DGradient, ConvGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (Conv2DGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (Conv3DGradient, ConvGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (Conv3DGradient).NumInputs(2
 
 REGISTER_GRADIENT (Conv, GetConvGradient)
 
 REGISTER_GRADIENT (Conv1D, GetConvGradient)
 
 REGISTER_GRADIENT (Conv2D, GetConvGradient)
 
 REGISTER_GRADIENT (Conv3D, GetConvGradient)
 
std::function< void(OpSchema &)> ConvDocGenerator (const char *dim)
 
 REGISTER_CPU_OPERATOR (Conv, ConvOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (Conv1D, ConvOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (Conv2D, ConvOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).CostInferenceFunction(OpSchema REGISTER_CPU_OPERATOR (Conv3D, ConvOp< float, CPUContext >)
 
 REGISTER_CUDNN_OPERATOR (Conv, CudnnConvOp)
 
 REGISTER_CUDNN_OPERATOR (ConvGradient, CudnnConvGradientOp)
 
 REGISTER_CUDNN_OPERATOR (Conv1D, CudnnConvOp)
 
 REGISTER_CUDNN_OPERATOR (Conv1DGradient, CudnnConvGradientOp)
 
 REGISTER_CUDNN_OPERATOR (Conv2D, CudnnConvOp)
 
 REGISTER_CUDNN_OPERATOR (Conv2DGradient, CudnnConvGradientOp)
 
 REGISTER_CUDNN_OPERATOR (Conv3D, CudnnConvOp)
 
 REGISTER_CUDNN_OPERATOR (Conv3DGradient, CudnnConvGradientOp)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv, EIGEN, EigenConvOp< float >)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv1D, EIGEN, EigenConvOp< float >)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv2D, EIGEN, EigenConvOp< float >)
 
 REGISTER_CPU_OPERATOR_WITH_ENGINE (Conv3D, EIGEN, EigenConvOp< float >)
 
 REGISTER_CUDA_OPERATOR (Conv, ConvOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (ConvGradient, ConvGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv1D, ConvOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv1DGradient, ConvGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv2D, ConvOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv2DGradient, ConvGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv3D, ConvOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Conv3DGradient, ConvGradientOp< float, CUDAContext >)
 
template<>
void createSharedBuffer< CPUContext > (Workspace *ws)
 
template<>
void runWithSharedBuffer (Workspace *ws, std::function< void(Tensor< CPUContext > *buffer)> f)
 
template<typename Context >
void createSharedBuffer (Workspace *ws)
 Creates a mutex and shared buffer in the workspace. More...
 
template<typename Context >
void runWithSharedBuffer (Workspace *ws, std::function< void(Tensor< Context > *buffer)> f)
 Thread-safe, can be invoked from RunOnDevice() to serialize access to shared buffer.
 
template<>
void createSharedBuffer< CUDAContext > (Workspace *ws)
 
template<>
void runWithSharedBuffer (Workspace *ws, std::function< void(Tensor< CUDAContext > *buffer)> f)
 
 REGISTER_CPU_OPERATOR (ConvTransposeGradient, ConvTransposeGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (ConvTransposeGradient).NumInputs(3).NumOutputs(1
 
 REGISTER_GRADIENT (ConvTranspose, GetConvTransposeGradient)
 
 REGISTER_CPU_OPERATOR (ConvTranspose, ConvTransposeOp< float, CPUContext >)
 
 NumInputs (2, 3).NumOutputs(1).SetDoc(R"DOC( The transposed convolution consumes an input vector
 
this is done throughout the image data and the output is computed As a side note on the implementation which is why they are separate files DOC Input (0,"X","Input data blob from previous layer; has size ""(N x C x H x W), where N is the batch size, C is the number of channels, and"" H and W are the height and width. Note that this is for the NCHW usage. On ""the other hand, the NHWC Op has a different set of dimension constraints.").Input(1
 
has size (M x C x kH x kW)
 
has where C is the number of and kH and kW are the height and width of the kernel Input (2,"bias","The 1D bias blob that is added through the convolution;""has size (C). Optional, if not passed, will treat it as all 0.").Output(0
 
has where C is the number of and kH and kW are the height and width of the kernel Output data blob that contains the result of the transposed convolution The output dimensions are functions of the kernel stride and pad lengths InheritOnnxSchema ("ConvTranspose")
 
 REGISTER_CUDNN_OPERATOR (ConvTranspose, CudnnConvTransposeOp< float >)
 
 REGISTER_CUDNN_OPERATOR (ConvTransposeGradient, CudnnConvTransposeGradientOp< float >)
 
 REGISTER_CUDA_OPERATOR (ConvTranspose, ConvTransposeOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (ConvTransposeGradient, ConvTransposeGradientOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Cos, UnaryElementwiseOp< TensorTypes< float >, CPUContext, CosCPUFunctor >)
 
 REGISTER_CPU_OPERATOR (CosGradient, BinaryElementwiseOp< TensorTypes< float >, CPUContext, WithoutBroadcast< CosGradientCPUFunctor >>)
 
 OPERATOR_SCHEMA (CosGradient).NumInputs(2).NumOutputs(1).IdenticalTypeAndShape()
 
 REGISTER_GRADIENT (Cos, GetCosGradient)
 
 REGISTER_CPU_OPERATOR (CosineEmbeddingCriterion, CosineEmbeddingCriterionOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (CosineEmbeddingCriterionGradient, CosineEmbeddingCriterionGradientOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (CreateCounter, CreateCounterOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ResetCounter, ResetCounterOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (CountDown, CountDownOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (CheckCounterDone, CheckCounterDoneOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (CountUp, CountUpOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (RetrieveCount, RetrieveCountOp< int64_t, CPUContext >)
 
A blob pointing to an instance of a new counter Arg ("init_count","Initial count for the counter, must be >= 0.")
 
 SetDoc (R"DOC( Resets a count-down counter with initial value specified by the 'init_count' argument. )DOC").Input(0
 
A blob pointing to an instance of a new counter Output (0,"previous_value","(optional) Previous value of the counter.").Arg("init_count"
 
 REGISTER_CUDA_OPERATOR (CreateCounter, CreateCounterOp< int64_t, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (ResetCounter, ResetCounterOp< int64_t, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (CountDown, CountDownOp< int64_t, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (CheckCounterDone, CheckCounterDoneOp< int64_t, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (CountUp, CountUpOp< int64_t, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (RetrieveCount, RetrieveCountOp< int64_t, CUDAContext >)
 
 CAFFE_KNOWN_TYPE (detail::WorkspaceStack)
 
 REGISTER_CPU_OPERATOR (CreateScope, CreateScopeOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (CreateScope)
 
 OPERATOR_SCHEMA (CreateScope).NumInputs(0).NumOutputs(1).SetDoc(R"DOC( 'CreateScope' operator initializes and outputs empty scope that is used by Do operator to store local blobs )DOC")
 
 REGISTER_CPU_OPERATOR (HasScope, HasScopeOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (HasScope)
 
 OPERATOR_SCHEMA (HasScope).NumInputs(1).NumOutputs(1).SetDoc(R"DOC( Checks whether scope blob has any saved scopes left )DOC")
 
 REGISTER_CPU_OPERATOR (LabelCrossEntropy, LabelCrossEntropyOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (LabelCrossEntropyGradient, LabelCrossEntropyGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Operator computes the cross entropy between the input and the label set. In practice, it is most commonly used at the end of models, after the SoftMax operator and before the AveragedLoss operator. Note that LabelCrossEntropy assumes that the label provided is either a 1D array of size N (batch size), or a 2D array of size N x 1 (batch size). Each entry in the label vector indicates which is the correct class; as such, each entry must be between 0 and D - 1, inclusive, where D is the total number of classes. The formula used is: Y[i] = -log(X[i][j]) where (i, j) is the classifier's prediction of the jth class (the correct one), and i is the batch size. Each log has a lower limit for numerical stability. )DOC").Input(0
 
X is a array of size N x where N is the batch size and D is the number of classes Input (1,"label","Blob containing the labels used to compare the input").Output(0
 
 REGISTER_GRADIENT (LabelCrossEntropy, GetLabelCrossEntropyGradient)
 
 REGISTER_CPU_OPERATOR (MakeTwoClass, MakeTwoClassOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (MakeTwoClassGradient, MakeTwoClassGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (SigmoidCrossEntropyWithLogits, SigmoidCrossEntropyWithLogitsOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (SigmoidCrossEntropyWithLogitsGradient, SigmoidCrossEntropyWithLogitsGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (WeightedSigmoidCrossEntropyWithLogits, WeightedSigmoidCrossEntropyWithLogitsOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (WeightedSigmoidCrossEntropyWithLogitsGradient, WeightedSigmoidCrossEntropyWithLogitsGradientOp< float, CPUContext >)
 
out[0] add_dims (in[0].dims(0))
 
out[0] add_dims (2)
 
 SetDoc (R"DOC( Given a vector of probabilities, this operator transforms this into a 2-column matrix with complimentary probabilities for binary classification. In explicit terms, given the vector X, the output Y is vstack(1 - X, X). )DOC").Input(0
 
Input vector of probabilities Output (0,"Y","2-column matrix with complimentary probabilities of X for ""binary classification")
 
 SetDoc (R"DOC( Given two matrices logits and targets, of same shape, (batch_size, num_classes), computes the sigmoid cross entropy between the two. Returns a tensor of shape (batch_size,) of losses for each example. )DOC").Input(0
 
matrix of logits for each example and class Input (1,"targets","matrix of targets, same shape as logits.").Output(0
 
 SetDoc (R"DOC( Given three matrices: logits, targets, weights, all of the same shape, (batch_size, num_classes), computes the weighted sigmoid cross entropy between logits and targets. Specifically, at each position r,c, this computes weights[r, c] * crossentropy(sigmoid(logits[r, c]), targets[r, c]), and then averages over each row. Returns a tensor of shape (batch_size,) of losses for each example. )DOC").Input(0
 
matrix of logits for each example and class matrix of same shape as logits Output (0,"xentropy","Vector with the total xentropy for each example.")
 
 REGISTER_GRADIENT (MakeTwoClass, GetMakeTwoClassGradient)
 
 REGISTER_GRADIENT (SigmoidCrossEntropyWithLogits, GetSigmoidCrossEntropyWithLogitsGradient)
 
 REGISTER_GRADIENT (WeightedSigmoidCrossEntropyWithLogits, GetWeightedSigmoidCrossEntropyWithLogitsGradient)
 
 REGISTER_CPU_OPERATOR (CrossEntropy, CrossEntropyOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (CrossEntropyGradient, CrossEntropyGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Operator computes the cross entropy between the input and the label set. In practice, it is most commonly used at the end of models, after the SoftMax operator and before the AveragedLoss operator. Note that CrossEntropy assumes that the soft labels provided is a 2D array of size N x D (batch size x number of classes). Each entry in the 2D label corresponds to the soft label for the input, where each element represents the correct probability of the class being selected. As such, each element must be between 0 and 1, and all elements in an entry must sum to 1. The formula used is: Y[i] = sum_j (label[i][j] * log(X[i][j])) where (i, j) is the classifier's prediction of the jth class (the correct one), and i is the batch size. Each log has a lower limit for numerical stability. )DOC").Input(0
 
 REGISTER_GRADIENT (CrossEntropy, GetCrossEntropyGradient)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< dataset_ops::TreeCursor >)
 
 CAFFE_KNOWN_TYPE (dataset_ops::TensorVectorPtr< CPUContext >)
 
 CAFFE_KNOWN_TYPE (dataset_ops::SharedTensorVectorPtr)
 
 OPERATOR_SCHEMA (DeformConvGradient).NumInputs(4
 
 NumOutputs (2, 4)
 
OpSchema::Cost CostInferenceForDotProduct (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (SquaredL2Distance, SquaredL2DistanceOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (SquaredL2DistanceGradient, SquaredL2DistanceGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Given two input float tensors X, Y, and produces one output float tensor of the L2 difference between X and Y that is computed as ||(X - Y)^2 / 2||. )DOC").Input(0
 
or input tensor Input (1,"Y","1D or 2D input tensor (must have the same shape as X)").Output(0
 
 OPERATOR_SCHEMA (SquaredL2DistanceGradient).NumInputs(3).NumOutputs(2)
 
 REGISTER_GRADIENT (SquaredL2Distance, GetSquaredL2DistanceGradient)
 
 REGISTER_CPU_OPERATOR (L1Distance, L1DistanceOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (L1DistanceGradient, L1DistanceGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Given two input float tensors X, Y, and produces one output float tensor of the L1 difference between X and Y, computed as L1(x,y) = sum over |x-y| )DOC").Input(0
 
 OPERATOR_SCHEMA (L1DistanceGradient).NumInputs(3).NumOutputs(2)
 
 REGISTER_GRADIENT (L1Distance, GetL1DistanceGradient)
 
 REGISTER_CPU_OPERATOR (DotProduct, DotProductOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (DotProductGradient, DotProductGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Given two input float tensors X, Y, and produces one output float tensor of the dot product between X and Y. )DOC").Input(0
 
or input tensor output tensor CostInferenceFunction (OpSchema::CostInferenceFunctionType(CostInferenceForDotProduct))
 
 OPERATOR_SCHEMA (DotProductGradient).NumInputs(3).NumOutputs(2)
 
 REGISTER_GRADIENT (DotProduct, GetDotProductGradient)
 
 REGISTER_CPU_OPERATOR (CosineSimilarity, CosineSimilarityOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (CosineSimilarityGradient, CosineSimilarityGradientOp< float, CPUContext >)
 
 SetDoc (R"DOC( Given two input float tensors X, Y, and produces one output float tensor of the cosine similarity between X and Y. )DOC").Input(0
 
 OPERATOR_SCHEMA (CosineSimilarityGradient).NumInputs(3).NumOutputs(2)
 
 REGISTER_GRADIENT (CosineSimilarity, GetCosineSimilarityGradient)
 
 REGISTER_CPU_OPERATOR (DotProductWithPadding, DotProductWithPaddingOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (DotProductWithPaddingGradient, DotProductWithPaddingGradientOp< float, CPUContext >)
 
Y with different shapes and produces one output float tensor of the dot product between X and Y We currently support two kinds of strategies to achieve this Before doing normal dot_product pad the smaller tensor (using pad_value) to the same shape as the other one.2) replicate the smaller tensor to the same shape as the other one.Note the first dimension of X
 
Y with different shapes and produces one output float tensor of the dot product between X and Y We currently support two kinds of strategies to achieve this Before doing normal dot_product pad the smaller Y must be equal Only the second dimension of X or Y can be padded DOC Input (0,"X","1D or 2D input tensor").Input(1
 
Y with different shapes and produces one output float tensor of the dot product between X and Y We currently support two kinds of strategies to achieve this Before doing normal dot_product pad the smaller Y must be equal Only the second dimension of X or Y can be padded DOC or input tensor Output (0,"Z","1D output tensor").IdenticalTypeAndShapeOfInputDim(0
 
Y with different shapes and produces one output float tensor of the dot product between X and Y We currently support two kinds of strategies to achieve this Before doing normal dot_product pad the smaller Y must be equal Only the second dimension of X or Y can be padded DOC or input tensor Arg ("pad_value","the padding value for tensors with smaller dimension").Arg("replicate"
 
 OPERATOR_SCHEMA (DotProductWithPaddingGradient).NumInputs(3).NumOutputs(2)
 
 REGISTER_GRADIENT (DotProductWithPadding, GetDotProductWithPaddingGradient)
 
 REGISTER_CPU_OPERATOR (Do, DoOp< CPUContext >)
 
INT_MAX SetDoc (R"DOC( 'Do' control operator, executes a subnet in a separate workspace. Last blobs in the input and output lists should be the same blob created with CreateScope op. Arguments 'inner_blobs' and 'outer_blobs_idx' provide a mapping between selected inner blob names and corresponding outer blob indices. )DOC").Arg("net"
 
INT_MAX Subnet with blob bindings Arg ("inner_blobs","List of inner net blob names to bind to outer workspace").Arg("outer_blobs_idx"
 
INT_MAX Subnet with blob bindings Indices of corresponding outer workspace in operator outputs (skipping workspace blobs)") .Arg( "saved_fwd_blobs"
 
INT_MAX Subnet with blob bindings Indices of corresponding outer workspace in List of blobs from the forward Do operator workspace needed" "in backward pass, used in gradient Do operator") .Arg ("reuse_workspace","Whether to reuse workspace or create a new one in a given scope").AllowInplace([](int in
 
 REGISTER_CUDA_OPERATOR (Do, DoOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Dropout, DropoutOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (DropoutGrad, DropoutGradientOp< float, CPUContext >)
 
 AllowInplace ({{0, 0}}).TensorInferenceFunction([](const OperatorDef &def
 
 if (output_mask)
 
 SetDoc (R"DOC( Dropout takes one input data (Tensor<float>) and produces two Tensor outputs, output (Tensor<float>) and mask (Tensor<bool>). Depending on whether it is in test mode or not, the output Y will either be a random dropout, or a simple copy of the input. Note that our implementation of Dropout does scaling in the training phase, so during testing nothing needs to be done. )DOC").Arg("ratio"
 
default the ratio of random dropout ArgIsTest ("(int) if nonzero, run dropout in test mode where ""the output is simply Y = X.").Input(0
 
default the ratio of random dropout The input data as Tensor Output (0,"output","The output.").Output(1
 
default the ratio of random dropout The input data as Tensor The output mask If is_test is this output is not filled InheritOnnxSchema ("Dropout")
 
 REGISTER_GRADIENT (Dropout, GetDropoutGradient)
 
 EIGEN_FUNCTOR (Add, EIGEN_ADD, NumericTypes, SameTypeAsInput)
 
 EIGEN_FUNCTOR (Div, EIGEN_DIV, NumericTypes, SameTypeAsInput)
 
void ElementWiseDivide (CPUContext &, const int n, float *dXdata, float *dYdata, const float *dZdata, const float *Ydata, const float *Zdata)
 
 REGISTER_CPU_OPERATOR (DivGradient, DivGradientOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (ElementwiseLinear, ElementwiseLinearOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ElementwiseLinearGradient, ElementwiseLinearGradientOp< float, CPUContext >)
 
w of size D and b of size the op computes Y of size (N X D) where Y_
 
 REGISTER_GRADIENT (ElementwiseLinear, GetElementwiseLinearGradient)
 
 EIGEN_FUNCTOR (Mul, EIGEN_MUL, NumericTypes, SameTypeAsInput)
 
 NAIVE_FUNCTOR (LT, NAIVE_LT, NumericTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (LE, NAIVE_LE, NumericTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (GT, NAIVE_GT, NumericTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (GE, NAIVE_GE, NumericTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (EQ, NAIVE_EQ, IntBoolTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (And, NAIVE_AND, BoolTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (Or, NAIVE_OR, BoolTypes, FixedType< bool >)
 
 NAIVE_FUNCTOR (Xor, NAIVE_XOR, BoolTypes, FixedType< bool >)
 
 REGISTER_CPU_OPERATOR (Not, UnaryElementwiseOp< BoolTypes, CPUContext, NotFunctor >)
 
 REGISTER_CPU_OPERATOR (SumReduceLike, SumReduceLikeOp< CPUContext >)
 
template<typename Context >
std::tuple< size_t, size_t, size_t > calculate_broadcast_sizes (const Tensor< Context > &A, const Tensor< Context > &B, int axis)
 
std::function< void(OpSchema &)> MathDocGenerator (const char *name)
 
 CostInferenceFunction (PointwiseCostInference< 1 >).IdenticalTypeAndShapeOfInput(0).FillUsing(MathDocGenerator("addition")).InheritOnnxSchema("Add")
 
 OPERATOR_SCHEMA (DivGradient).NumInputs(3).NumOutputs(2).AllowInplace(
 
and the dimensions of the second input is the contiguous subset of the dimensions of the first For the following tensor shapes are shape (B)
 
and the dimensions of the second input is the contiguous subset of the dimensions of the first For the following tensor shapes are i e B is a scalar shape (A)
 
 EIGEN_FUNCTOR (Sub, EIGEN_SUB, NumericTypes, SameTypeAsInput)
 
 REGISTER_CPU_OPERATOR (Sum, SumOp< CPUContext >)
 
 CostInferenceFunction (CostInferenceForSum).InputsCanCrossDevices().IdenticalTypeAndShapeOfInput(0).SetDoc(R"DOC( Element-wise sum of each of the input tensors. The first input tensor can be used in-place as the output tensor
 
in which case the sum will be done in place and results will be accumulated in input0 All inputs and outputs must have the same shape and data type DOC Input (0,"data_0","First of the input tensors. Can be inplace.").Output(0
 
in which case the sum will be done in place and results will be accumulated in input0 All inputs and outputs must have the same shape and data type DOC Output tensor Same dimension as inputs InheritOnnxSchema ("Sum")
 
 REGISTER_CPU_OPERATOR (Elu, EluOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (EluGradient, EluGradientOp< float, CPUContext >)
 
is applied to the tensor elementwise DOC Input (0,"X","1D input tensor").Output(0
 
is applied to the tensor elementwise DOC input tensor InheritOnnxSchema ("Elu")
 
 SetDoc (R"DOC( EluGradient takes both Y and dY and uses this to update dX according to the chain rule and derivatives of the rectified linear function. )DOC")
 
 REGISTER_GRADIENT (Elu, GetEluGradient)
 
 REGISTER_CPU_OPERATOR (Exp, UnaryElementwiseOp< TensorTypes< float >, CPUContext, ExpCPUFunctor >)
 
element wise This operation can be done in an in place fashion by providing the same input and output blobs DOC The exponential of the input tensor computed element wise InheritOnnxSchema ("Exp")
 
 REGISTER_GRADIENT (Exp, GetExpGradient)
 
 REGISTER_CPU_OPERATOR (ExpandDims, ExpandDimsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Squeeze, SqueezeOp< CPUContext >)
 
 TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &in){ArgumentHelper helper(def);auto dims=helper.template GetRepeatedArgument< int >("dims");auto originalSize=dims.size();CAFFE_ENFORCE(originalSize > 0,"Parameter `dims` must be provided.");std::sort(dims.begin(), dims.end());dims.erase(std::unique(dims.begin(), dims.end()), dims.end());if(dims.size()< originalSize){LOG(WARNING)<< "Parameter `dims` has repeated dimensions.";}CAFFE_ENFORCE(dims.front() >=0,"Dimension ids must be non-negative.");CAFFE_ENFORCE_GE(in[0].dims_size()+dims.size(), dims.back()+1,"Input needs at least ",(1+dims.back()-dims.size())," dimensions given `dims`.");vector< TensorShape > out(1);int cur_pos=0;int idx=0;for(const auto new_dim:dims){for(int i=cur_pos;i< new_dim;i++){out[0].add_dims(in[0].dims(idx++));}out[0].add_dims(1);cur_pos=new_dim+1;}for(;idx< in[0].dims_size();idx++){out[0].add_dims(in[0].dims(idx));}out[0].set_data_type(in[0].data_type());return out;}).SetDoc(R"DOC( Insert single-dimensional entries to the shape of a tensor. Takes one required argument `dims`
 
 REGISTER_CUDA_OPERATOR (Squeeze, SqueezeOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (ExpandDims, ExpandDimsOp< CUDAContext >)
 
std::vector< TensorShape > FCShapeInference (const OperatorDef &def, const vector< TensorShape > &in, bool pretransposed_weight)
 
OpSchema::Cost CostInferenceForFC (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (FeedBlob, FeedBlobOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (FeedBlob)
 
 NumInputs (0, 0).NumOutputs(1
 
 SetDoc (R"DOC( FeedBlobs the content of the blobs. The input and output blobs should be one-to-one inplace.)DOC").Arg("value"
 
 REGISTER_CPU_OPERATOR (UniformFill, UniformFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (UniformIntFill, UniformFillOp< int, CPUContext >)
 
 REGISTER_CPU_OPERATOR (UniqueUniformFill, UniqueUniformFillOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (ConstantFill, ConstantFillOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (DiagonalFill, DiagonalFillOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (GaussianFill, GaussianFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (XavierFill, XavierFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (MSRAFill, MSRAFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (RangeFill, RangeFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (LengthsRangeFill, LengthsRangeFillOp< CPUContext >)
 
 TensorInferenceFunction (FillerTensorInference<>).SetDoc(R"DOC( The operator fills the elements of the output tensor with a const ant value specified by the 'value' argument. The data type is specified by the 'dtype' argument. The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the TensorProto message. If the 'dtype' argument is not provided
 
template<int VALUE_TYPE = TensorProto_DataType_FLOAT>
std::vector< TensorShape > FillerTensorInference (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CUDA_OPERATOR (LengthsRangeFill, GPUFallbackOp< LengthsRangeFillOp< CPUContext >>)
 
 Index (integers)") .Input(1
 
Needles query Output (0,"query_indices","Indices of the needles in index or 'missing value'").Arg("missing_value"
 
Needles query Placeholder for items that are not found SetDoc (R"DOC( Finds elements of second input from first input, outputting the last (max) index for each query. If query not find, inserts missing_value. See IndexGet() for a version that modifies the index when values are not found. )DOC")
 
 REGISTER_CPU_OPERATOR (Flatten, FlattenOp< CPUContext >)
 
vector< TensorShape > out (1)
 
 for (auto d:in[0].dims())
 
out[0] set_data_type (in[0].data_type())
 
out[0] add_dims (outer)
 
out[0] add_dims (inner)
 
 SetDoc (R"DOC( Flattens the input tensor into a 2D matrix. If input tensor has shape (d_0, d_1, ... d_n) then the output will have shape (d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn) )DOC").Input(0
 
A tensor of with input dimensions up to axis flattened to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output Arg ("axis","(Default to 1) Indicate up to which input dimensions ""(exclusive) should be flattened to the outer dimension of the output").InheritOnnxSchema("Flatten")
 
 REGISTER_GRADIENT (Flatten, GetFlattenGradient)
 
 REGISTER_CPU_OPERATOR (FlexibleTopK, FlexibleTopKOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (FlexibleTopKGradient, FlexibleTopKGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (Floor, FloorOp< float, CPUContext >)
 
 SetDoc (R"DOC( Floor takes one input data (Tensor<T>) and produces one output data (Tensor<T>) where the floor function, y = floor(x), is applied to the tensor elementwise. Currently supports only float32. )DOC").Input(0
 
 GRADIENT_NOT_IMPLEMENTED_YET (Floor)
 
 REGISTER_CPU_OPERATOR (Free, FreeOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (Free)
 
INT_MAX SameNumberOfOutput ().EnforceOneToOneInplace().SetDoc(R"DOC( Frees the content of the blobs. The input and output blobs should be one-to-one inplace.)DOC")
 
 REGISTER_CUDA_OPERATOR (Free, FreeOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (FC, FullyConnectedOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (FCGradient, FullyConnectedGradientOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (FCTransposed, FullyConnectedOp< CPUContext, DefaultEngine, false >)
 
 REGISTER_CPU_OPERATOR (FCTransposedGradient, FullyConnectedGradientOp< CPUContext, DefaultEngine, false >)
 
 REGISTER_CUDA_OPERATOR (FC, FullyConnectedOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (FCGradient, FullyConnectedGradientOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (FCTransposed, FullyConnectedOp< CUDAContext, DefaultEngine, false >)
 
 REGISTER_CUDA_OPERATOR (FCTransposedGradient, FullyConnectedGradientOp< CUDAContext, DefaultEngine, false >)
 
 REGISTER_CPU_OPERATOR (FloatToFused8BitRowwiseQuantized, FloatToFused8BitRowwiseQuantizedOp< CPUContext >)
 
and then scaling each element to an bit number between and To later de quantize the scale (range/255) and offset(bias) are stored alongside the data.More precisely
 
and then scaling each element to an bit number between and To later de quantize the the first bytes of each row in the output matrix are a bit float storing the the next bytes store the bias as a bit and all remaining bytes in the row encode single quantized values DOC Input (0,"input","Float32 input data").Output(0
 
 NO_GRADIENT (FloatToFused8BitRowwiseQuantized)
 
 REGISTER_CPU_OPERATOR (Fused8BitRowwiseQuantizedToFloat, Fused8BitRowwiseQuantizedToFloatOp< CPUContext >)
 
followed by the bias as a bit float in the next and the quantized values in the preceding bytes of the row The output is a matrix containing only the but de quantized De quantization is performed by multiplying each value by its row s scale and bias parameters The de quantized values will thus not be exactly equal to the un quantized floating point values DOC Input (0,"scale_bias_quantized_input","Fused scale, bias and quantized data").Output(0
 
 NO_GRADIENT (Fused8BitRowwiseQuantizedToFloat)
 
but operating on bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then the scale and offset).DATA needs to have rank 2 and INDICES needs to have rank 1.) DOC") .Input( 0
 
but operating on bit rowwise quantized matrices with fused uint8 tensor with rank obtained with operator FloatToFused8BitRowwiseQuantized") .Input (1,"INDICES","Integer vector containing indices of the first dimension of DATA for""the rows that are being gathered").Output(0
 
but operating on bit rowwise quantized matrices with fused uint8 tensor with rank obtained with output TensorInferenceFunction ([](const OperatorDef &def, const vector< TensorShape > &in){vector< TensorShape > out(1);for(auto d:in[1].dims()){out[0].add_dims(d);}for(int i=1;i< in[0].dims_size();++i){out[0].add_dims(in[0].dims(i));}out[0].set_data_type(in[0].data_type());return out;})
 
 REGISTER_CPU_OPERATOR (GatherFused8BitRowwise, GatherFused8BitRowwiseOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorFill, GivenTensorFillOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorDoubleFill, GivenTensorFillOp< double, CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorBoolFill, GivenTensorFillOp< bool, CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorIntFill, GivenTensorFillOp< int, CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorInt64Fill, GivenTensorFillOp< int64_t, CPUContext >)
 
 REGISTER_CPU_OPERATOR (GivenTensorStringFill, GivenTensorFillOp< std::string, CPUContext >)
 
 NO_GRADIENT (GivenTensorFill)
 
 NO_GRADIENT (GivenTensorDoubleFill)
 
 NO_GRADIENT (GivenTensorBoolFill)
 
 NO_GRADIENT (GivenTensorIntFill)
 
 NO_GRADIENT (GivenTensorInt64Fill)
 
 NO_GRADIENT (GivenTensorStringFill)
 
 TensorInferenceFunction (FillerTensorInference< TensorProto_DataType_DOUBLE >)
 
 TensorInferenceFunction (FillerTensorInference< TensorProto_DataType_BOOL >)
 
 TensorInferenceFunction (FillerTensorInference< TensorProto_DataType_INT32 >)
 
 TensorInferenceFunction (FillerTensorInference< TensorProto_DataType_INT64 >)
 
 TensorInferenceFunction (FillerTensorInference< TensorProto_DataType_STRING >)
 
 REGISTER_CPU_OPERATOR (GRUUnit, GRUUnitOp< float, CPUContext >)
 
in a sequence length aware fashion given the (fused) inputs X(TxNxD)
 
in a sequence length aware fashion given the previous hidden state (NxD)
 
in a sequence length aware fashion given the previous hidden and the sequence lengths (N)
 
in a sequence length aware fashion given the previous hidden and the sequence computes the GRU avoiding computation if the input is invalid (as in, the value at X[t][n] >=seqLengths[n].) DOC") .Arg( "drop_states"
 
in a sequence length aware fashion given the previous hidden and the sequence computes the GRU avoiding computation if the input is Bool to determine if hidden state is zeroes or passed along for timesteps past the given sequence_length Arg ("sequence_lengths","When false, the sequence lengths input is left out, ""and all following inputs are shifted left by one.").Output(0
 
 REGISTER_CPU_OPERATOR (GRUUnitGradient, GRUUnitGradientOp< float, CPUContext >)
 
 NumInputs (5, 6).NumOutputs(2).Arg("sequence_lengths"
 
 REGISTER_GRADIENT (GRUUnit, GetGRUUnitGradient)
 
out push_back (X)
 
out[0] set_data_type (TensorProto_DataType_FLOAT16)
 
out[0] set_data_type (TensorProto_DataType_FLOAT)
 
The value for the elements of the output tensor Arg ("shape","The shape of the output tensor.").Output(0
 
 REGISTER_GRADIENT (FloatToHalf, GetFloatToHalfGradient)
 
 REGISTER_GRADIENT (HalfToFloat, GetHalfToFloatGradient)
 
 NO_GRADIENT (Float16ConstantFill)
 
std::vector< TensorShape > Float16FillerTensorInference (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (If, IfOp< CPUContext >)
 
INT_MAX SetDoc (R"DOC( 'If' control operator, first input is a scalar boolean blob that stores condition value. Accepts 'then_net' (required) and 'else_net' (optional) arguments for 'then' and 'else' subnets respectively. Subnets are executed in the same workspace as 'If'. )DOC").Arg("then_net"
 
INT_MAX Net executed when condition is true Arg ("else_net","Net executed when condition is false (optional)").Input(0
 
INT_MAX Net executed when condition is true Scalar boolean condition AllowInplace ([](int in, int out) -> bool{return true;})
 
 REGISTER_CUDA_OPERATOR (If, IfOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Im2Col, Im2ColOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (Col2Im, Col2ImOp< float, CPUContext >)
 
 REGISTER_GRADIENT (Im2Col, GetIm2ColGradient)
 
 REGISTER_GRADIENT (Col2Im, GetCol2ImGradient)
 
 switch (order)
 
 CAFFE_ENFORCE (H >=dkernel_h)
 
 CAFFE_ENFORCE (W >=dkernel_w)
 
 Input (0,"X","4-tensor in NCHW or NHWC.").Output(0
 
 OPERATOR_SCHEMA (Col2Im).NumInputs(2).NumOutputs(1)
 
 REGISTER_CUDA_OPERATOR (Im2Col, Im2ColOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Col2Im, Col2ImOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (IntIndexCreate, IndexCreateOp< int32_t >)
 
 REGISTER_CPU_OPERATOR (LongIndexCreate, IndexCreateOp< int64_t >)
 
 REGISTER_CPU_OPERATOR (StringIndexCreate, IndexCreateOp< std::string >)
 
 REGISTER_CPU_OPERATOR (IndexGet, IndexGetOp)
 
 REGISTER_CPU_OPERATOR (IndexLoad, IndexLoadOp)
 
 REGISTER_CPU_OPERATOR (IndexStore, IndexStoreOp)
 
 REGISTER_CPU_OPERATOR (IndexFreeze, IndexFreezeOp)
 
 REGISTER_CPU_OPERATOR (IndexSize, IndexSizeOp)
 
Max number of including the zero entry Output (0,"handler","Pointer to an Index instance.")
 
Max number of including the zero entry Output (0,"handle","Pointer to an Index instance.")
 
return an Int tensor of same shape containing the indices for each of the keys If the index is unknown entries are given index new entries are added into the index If an insert is necessary but max_elements has been fail DOC Input (0,"handle","Pointer to an Index instance.").Input(1
 
return an Int tensor of same shape containing the indices for each of the keys If the index is unknown entries are given index new entries are added into the index If an insert is necessary but max_elements has been fail DOC Tensor of keys to be looked up Output (0,"indices","Indices for each of the keys.")
 
disallowing creation of new index entries Should not be called concurrently with IndexGet DOC The input handle EnforceInplace ({{0, 0}})
 
Pointer to an Index instance Input (1,"items","1-D tensor with elements starting with index 1.").Output(0
 
Pointer to an Index instance Output (0,"items","Scalar int64 tensor with number of entries.")
 
 NO_GRADIENT (IndexGetOp)
 
 NO_GRADIENT (IntIndexCreate)
 
 NO_GRADIENT (LongIndexCreate)
 
 NO_GRADIENT (StringIndexCreate)
 
 SHOULD_NOT_DO_GRADIENT (IndexFreeze)
 
 SHOULD_NOT_DO_GRADIENT (IndexLoad)
 
 SHOULD_NOT_DO_GRADIENT (IndexStore)
 
 SHOULD_NOT_DO_GRADIENT (IndexSize)
 
 CAFFE_KNOWN_TYPE (std::unique_ptr< caffe2::IndexBase >)
 
 REGISTER_BLOB_SERIALIZER ((TypeMeta::Id< std::unique_ptr< caffe2::IndexBase >>()), IndexSerializer)
 
 REGISTER_BLOB_DESERIALIZER (std::unique_ptr< caffe2::IndexBase >, IndexDeserializer)
 
 REGISTER_CPU_OPERATOR (InstanceNormGradient, InstanceNormGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (InstanceNormGradient).NumInputs(4
 
 NumOutputs (3)
 
 REGISTER_GRADIENT (InstanceNorm, GetInstanceNormGradient)
 
 REGISTER_CPU_OPERATOR (InstanceNorm, InstanceNormOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (BernoulliJSD, BernoulliJSDOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (BernoulliJSDGradient, BernoulliJSDGradientOp< float, CPUContext >)
 
array of probabilities for prediction Input (0,"T","array of probabilities for target").Output(0
 
 OPERATOR_SCHEMA (BernoulliJSDGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (BernoulliJSD, GetBernoulliJSDGradient)
 
 REGISTER_CPU_OPERATOR (KeySplit, KeySplitOp< int64_t, CPUContext >)
 
 NO_GRADIENT (KeySplitOp)
 
 OPERATOR_SCHEMA (KeySplit).NumInputs(1).NumOutputs(1
 
 REGISTER_CPU_OPERATOR (LayerNorm, LayerNormOp< CPUContext >)
 
 OPERATOR_SCHEMA (LayerNormGradient).NumInputs(5).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (LayerNormGradient, LayerNormGradientOp< CPUContext >)
 
 REGISTER_GRADIENT (LayerNorm, GetLayerNormGradient)
 
std::vector< int > input_dims (input_dims_long.begin(), input_dims_long.end())
 
ArgumentHelper helper (def)
 
std::vector< int > stat_dims (input_dims.begin(), input_dims.begin()+canonical_axis)
 
stat_dims push_back (1)
 
 SetDoc (R"DOC( Computes layer normalization as described in https://arxiv.org/pdf/1607.06450.pdf. Given an input vector x \in [a_0, a_1, ...,a_{k-1}, a_k, ..., a_{n-1}], this op treats dimensions a_k through a_{n-1} as feature vectors. For each feature vector, the op contains the mean and standard deviation. Then, it returns the normalized values (with respect to the feature vector). Note that this op does not contain the scale an bias terms described in the paper. Simply follow this op with an FC op to add those. Concretely, this op implements: h = \frac{1}{\sigma}(a - \mu) where \mu = \frac{1}{H}\sum_{i=1}^{H} a_i and \sigma = \sqrt{\frac{1}{H}\sum_{i=1}^{H}(a_i - \mu)^2} where H is the number of hidden units (i.e. product of dimensions from 'axis' to the end.) )DOC").Arg("axis"
 
Describes axis of the inputs Defaults to one because the axis most likely describes the batch size Arg ("epsilon","(float) default to 0.001. Small value to be added to the stdev when"" dividing out by that value. This prevents division by zero.").Input(0
 
Describes axis of the inputs Defaults to one because the axis most likely describes the batch size Input tensor which layer normalization will be applied to Output (0,"output","Normalized values").Output(1
 
Describes axis of the inputs Defaults to one because the axis most likely describes the batch size Input tensor which layer normalization will be applied to Mean values for each feature vector Output (2,"stddev","Standard deviations for each feature vector")
 
 REGISTER_CPU_OPERATOR (LeakyRelu, LeakyReluOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (LeakyReluGradient, LeakyReluGradientOp< float, CPUContext >)
 
Coefficient of default value is and produces one output data (Tensor< T >) where the function`f(x)
 
 Arg ("alpha","Coefficient of leakage").InheritOnnxSchema("LeakyRelu")
 
 REGISTER_GRADIENT (LeakyRelu, GetLeakyReluGradient)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSumFused8BitRowwise, SparseLengthsFused8BitRowwiseOp< CPUContext >)
 
but operating on bit rowwise quantized matrices with fused storage (where each row stores quantized values, and then 4-byte scale and 4-byte bias).) DOC") .Input( 0
 
but operating on bit rowwise quantized matrices with fused uint8 tensor obtained with operator FloatToFused8BitRowwiseQuantized") .Input (1,"INDICES","Integer vector containing indices of the first ""dimension of DATA for the slices that are being aggregated").Input(2
 
but operating on bit rowwise quantized matrices with fused uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA Output (0,"output","output")
 
 NO_GRADIENT (SparseLengthsSumFused8BitRowwise)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSumFused8BitRowwise, SparseLengthsFused8BitRowwiseOp< CPUContext, true >)
 
but operating on bit rowwise quantized matrices with fused uint8 tensor obtained with Vector with the same sum of elements as the first dimension of DATA Input (3,"WEIGHTS","Vector of weights to scale rows of DATA with before reduction").Output(0
 
 NO_GRADIENT (SparseLengthsWeightedSumFused8BitRowwise)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMeanFused8BitRowwise, SparseLengthsFused8BitRowwiseOp< CPUContext, false, true >)
 
 NO_GRADIENT (SparseLengthsMeanFused8BitRowwise)
 
 REGISTER_CPU_OPERATOR_STR ("SparseLengthsSum", CPUSparseLengthsReductionOp< float, TensorTypes< float, float16 >, 0, 0 >)
 
 REGISTER_CPU_OPERATOR_STR ("SparseLengthsWeightedSum", CPUSparseLengthsReductionOp< float, TensorTypes< float, float16 >, 1, 0 >)
 
 REGISTER_CPU_OPERATOR_STR ("SparseLengthsMean", CPUSparseLengthsReductionOp< float, TensorTypes< float, float16 >, 0, 1 >)
 
for each weights are accessed by where L is the length of given row This is basically a fused operator of LengthsRangeFill+Gather+SparseWeightedSum) DOC") .Input (0,"DATA","uint8 tensor obtained with ""operator FloatToRowwiseQuantized8Bits").Input(1
 
for each weights are accessed by where L is the length of given row This is basically a fused Scalar multipliers for the input slices Must be a vector with the length matching the length of DATA Input (2,"INDICES","Integer vector containing indices of the first ""dimension of DATA for the slices that are being aggregated").Input(3
 
 REGISTER_CPU_OPERATOR_STR ("SparseLengthsPositionalWeightedSum", CPUSparseLengthsReductionOp< float, TensorTypes< float, float16 >, 1, 0, 1 >)
 
 REGISTER_CPU_OPERATOR (Rowwise8BitQuantizedToFloat, Rowwise8BitQuantizedToFloatOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (FloatToRowwiseQuantized8Bits, FloatToRowwiseQuantized8BitsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (SparseLengthsSum8BitsRowwise, SparseLengths8BitsRowwiseOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedSum8BitsRowwise, SparseLengths8BitsRowwiseOp< CPUContext, 1 >)
 
 REGISTER_CPU_OPERATOR (SparseLengthsMean8BitsRowwise, SparseLengths8BitsRowwiseOp< CPUContext, 0, 1 >)
 
 REGISTER_CPU_OPERATOR (SparseLengthsWeightedMean8BitsRowwise, SparseLengths8BitsRowwiseOp< CPUContext, 1, 1 >)
 
 NumInputs (5).NumOutputs(1).SetDoc(R"DOC( Variation of SparseLengthsWeightedSum operator
 
reshape it into matrix of size (m_1, m_2 x...x m_n) and apply row-wise quantization.After this
 
 NO_GRADIENT (Rowwise8BitQuantizedToFloat)
 
 NO_GRADIENT (FloatToRowwiseQuantized8Bits)
 
 NO_GRADIENT (SparseLengthsSum8BitsRowwise)
 
 NO_GRADIENT (SparseLengthsWeightedSum8BitsRowwise)
 
 NO_GRADIENT (SparseLengthsMean8BitsRowwise)
 
 NO_GRADIENT (SparseLengthsWeightedMean8BitsRowwise)
 
 REGISTER_CPU_OPERATOR (LengthsTile, LengthsTileOp< CPUContext >)
 
 REGISTER_CUDA_OPERATOR (LengthsTile, LengthsTileOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (LengthsTopK, LengthsTopKOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (LengthsTopKGradient, LengthsTopKGradientOp< float, CPUContext >)
 
where segments are defined by their and concatenate them in an output tensor of the output value will be padded and the corresponding output indices will be padded by DOC Input (0,"DATA","Tensor of rank 1. First dimension must be equal to the sum of ""lengths").Input(1
 
where segments are defined by their and concatenate them in an output tensor of the output value will be padded and the corresponding output indices will be padded by DOC Tensor of int32 lengths of rank Output (0,"TopKValue","Output top k elements for each segment, with""shape=(SIZE(lengths), k)").Output(1
 
where segments are defined by their and concatenate them in an output tensor of the output value will be padded and the corresponding output indices will be padded by DOC Tensor of int32 lengths of rank Output indices in DATA corresponding to value in TopKValue Arg ("k","the number of top values to return for each segment, if the number ""of values is smaller than k, the values would be padded with 0 and ""indices would be padded with -1.")
 
 OPERATOR_SCHEMA (LengthsTopKGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (LengthsTopK, GetLengthsTopKGradient)
 
 REGISTER_CPU_OPERATOR (DBExists, DBExistsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Load, LoadOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Save, SaveOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Checkpoint, CheckpointOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (Snapshot, CheckpointOp< CPUContext >)
 
A scalar bool Tensor Arg ("absolute_path","(int, default 0) if set, use the db path directly and do not prepend ""the current root folder of the workspace.").Arg("db_name"
 
A scalar bool Tensor string the path to the db to load Arg ("db_type","(string) the type of the db.")
 
 NumInputs (0, INT_MAX).NumOutputs(0
 
INT_MAX SetDoc (R"DOC( The Load operator loads a set of serialized blobs from a db or multiple dbs. It takes [0, infinity) number of inputs and [0, infinity) number of outputs, using the db keys to match the db entries with the outputs. If at least one input is passed, then it is assumed that that input blobs are a set of DBReaders to load from. Otherwise the db or dbs argument is used to load blobs from one single db or multiple dbs respectively. db_type argument is used to specify the type of the input db/dbs. )DOC").Arg("absolute_path"
 
INT_MAX default if use the db path directly and do not prepend the current root folder of the workspace Arg ("add_prefix","(string, default=\"\") blobs will be prefixed with this when loading.""Useful for avoiding collisions with blobs existing in the workspace.""The output blob names specified to this op should include this prefix.").Arg("strip_prefix"
 
template<typename... Ts>
string FormatString (const string &pattern, Ts...values)
 
 REGISTER_CUDA_OPERATOR (Load, LoadOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Save, SaveOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (Checkpoint, CheckpointOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (LRN, LRNOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (LRNGradient, LRNGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (LRN).NumInputs(1).NumOutputs(1
 
 InheritOnnxSchema ("LRN")
 
 OPERATOR_SCHEMA (LRNGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (LRN, GetLRNGradient)
 
 REGISTER_CPU_OPERATOR (LC, LocallyConnectedOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (LC1D, LocallyConnectedOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (LC2D, LocallyConnectedOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (LC3D, LocallyConnectedOp< float, CPUContext >)
 
NumInputs(2, 3).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (LCGradient, LocallyConnectedGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (LCGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (LC1DGradient, LocallyConnectedGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (LC1DGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (LC2DGradient, LocallyConnectedGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (LC2DGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (LC3DGradient, LocallyConnectedGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (LC3DGradient).NumInputs(2
 
 REGISTER_GRADIENT (LC, GetLocallyConnectedGradient)
 
 REGISTER_GRADIENT (LC1D, GetLocallyConnectedGradient)
 
 REGISTER_GRADIENT (LC2D, GetLocallyConnectedGradient)
 
 REGISTER_GRADIENT (LC3D, GetLocallyConnectedGradient)
 
 REGISTER_CUDA_OPERATOR (LC, LocallyConnectedOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LCGradient, LocallyConnectedGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC1D, LocallyConnectedOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC1DGradient, LocallyConnectedGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC2D, LocallyConnectedOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC2DGradient, LocallyConnectedGradientOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC3D, LocallyConnectedOp< float, CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (LC3DGradient, LocallyConnectedGradientOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Log, UnaryElementwiseOp< TensorTypes< float >, CPUContext, LogCPUFunctor >)
 
element wise This operation can be done in an in place fashion by providing the same input and output blobs DOC The natural log of the input tensor computed element wise InheritOnnxSchema ("Log")
 
 REGISTER_GRADIENT (Log, GetLogGradient)
 
 REGISTER_CPU_OPERATOR (Logit, UnaryElementwiseWithArgsOp< TensorTypes< float >, CPUContext, LogitCPUFunctor >)
 
 REGISTER_CPU_OPERATOR (LogitGradient, LogitGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (AveragedLoss, AveragedLoss< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (AveragedLossGradient, AveragedLossGradient< float, CPUContext >)
 
NumInputs(1).NumOutputs(1).ScalarType(TensorProto OPERATOR_SCHEMA (AveragedLossGradient).NumInputs(2).NumOutputs(1)
 
 REGISTER_GRADIENT (AveragedLoss, GetAveragedLossGradient)
 
 REGISTER_CPU_OPERATOR (LpPool, PoolOp< float, CPUContext, LpPool >)
 
 REGISTER_CPU_OPERATOR (LpPoolGradient, PoolGradientOp< float, CPUContext, LpPool >)
 
stride and pad lengths defined by the ConvPoolOpBase operator.L-p pooling consisting of taking the L-p norm of a subset of the input tensor according to the kernel size and downsampling the data into the output blob Y for further processing.) DOC") .Input (0,"X","Input data tensor from the previous operator; dimensions ""depend on whether the NCHW or NHWC operators are being used. For example, ""in the former, the input has size (N x C x H x W), where N is the batch ""size, C is the number of channels, and H and W are the height and the width ""of the data. The corresponding permutation of dimensions is used in the ""latter case. ").Output(0
 
 OPERATOR_SCHEMA (LpPoolGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (LpPool, GetPoolGradient)
 
 REGISTER_CPU_OPERATOR (LSTMUnit, LSTMUnitOp< CPUContext >)
 
 NumInputs (4, 5).NumOutputs(2).SetDoc(R"DOC( LSTMUnit computes the activations of a standard LSTM (without peephole connections)
 
in a sequence length aware fashion given the previous cell and the sequence computes the LSTM avoiding computation if the input is invalid (as in, the value at X{t][n] >=seqLengths[n].) DOC") .Arg("forget_bias"
 
 REGISTER_CPU_OPERATOR (LSTMUnitGradient, LSTMUnitGradientOp< CPUContext >)
 
 NumInputs (8, 9).NumOutputs(3).Arg("sequence_lengths"
 
 REGISTER_GRADIENT (LSTMUnit, GetLSTMUnitGradient)
 
 CAFFE_KNOWN_TYPE (MapType64To64)
 
 REGISTER_CPU_OPERATOR (MarginRankingCriterion, MarginRankingCriterionOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (MarginRankingCriterionGradient, MarginRankingCriterionGradientOp< CPUContext >)
 
 X2 (Tensor< float >)
 
and label Y (Tensor< int >) to produce the loss(Tensor< float >) where the loss function
 
and label loss (X1, X2, Y)
 
 REGISTER_CPU_OPERATOR (Sqr, UnaryElementwiseOp< TensorTypes< float >, CPUContext, SqrCPUFunctor >)
 
Input tensor Output (0,"output","Squared elements of the input")
 
 REGISTER_GRADIENT (Sqr, GetSqrGradient)
 
 REGISTER_CPU_OPERATOR (Sign, UnaryElementwiseOp< TensorTypes< float >, CPUContext, SignCPUFunctor >)
 
 REGISTER_CPU_OPERATOR (MatMul, MatMulOp< float, CPUContext >)
 
 if (trans_a)
 
 if (trans_b)
 
out[0] add_dims (M)
 
out[0] add_dims (N)
 
 SetDoc (R"DOC( Matrix multiplication Y = A * B, where A has size (M x K), B has size (K x N), and Y will have a size (M x N). )DOC").Input(0
 
matrix of size (M x K)") .Input(1
 
matrix of matrix of size (K x N)") .Output(0
 
matrix of matrix of matrix of Exclusive axis that divides the first and second dimension of matrix default to Arg ("axis_b","Exclusive axis that divides the first and second dimension \ of matrix B, default to 1").Arg("trans_a"
 
 REGISTER_GRADIENT (MatMul, GetMatMulGradient)
 
 REGISTER_CUDA_OPERATOR (MatMul, MatMulOp< float, CUDAContext >)
 
 REGISTER_CPU_OPERATOR (Mean, MeanOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (MeanGradient, MeanGradientOp< CPUContext >)
 
 SetDoc (R"DOC( Element-wise mean of each of the input tensors. The first input tensor can be used in-place as the output tensor, in which case the mean will be done in place and results will be accumulated in input0. All inputs and outputs must have the same shape and data type. )DOC").Input(0
 
First of the input tensors Can be inplace Output (0,"mean","Output tensor. Same dimension as inputs.")
 
 REGISTER_GRADIENT (Mean, GetMeanGradient)
 
 REGISTER_CPU_OPERATOR (MaxGradient, MaxGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (MinGradient, MinGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (MaxGradient).NumInputs(3
 
INT_MAX NumOutputs (1, INT_MAX)
 
 OPERATOR_SCHEMA (MinGradient).NumInputs(3
 
 REGISTER_GRADIENT (Max, GetMaxGradient)
 
 REGISTER_GRADIENT (Min, GetMinGradient)
 
 REGISTER_CPU_OPERATOR (Max, MaxOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (Min, MinOp< float, CPUContext >)
 
 SetDoc (R"DOC( Element-wise max of each of the input tensors. The first input tensor can be used in-place as the output tensor, in which case the max will be done in place and results will be accumulated in input0. All inputs and outputs must have the same shape and data type. )DOC").Input(0
 
First of the input tensors Can be inplace Output (0,"max","Output tensor. Same dimension as inputs.").InheritOnnxSchema("Max")
 
 SetDoc (R"DOC( Element-wise min of each of the input tensors. The first input tensor can be used in-place as the output tensor, in which case the min will be done in place and results will be accumulated in input0. All inputs and outputs must have the same shape and data type. )DOC").Input(0
 
First of the input tensors Can be inplace Output (0,"min","Output tensor. Same dimension as inputs.").InheritOnnxSchema("Min")
 
 REGISTER_CPU_OPERATOR (MultiClassAccuracy, MultiClassAccuracyOp< float, CPUContext >)
 
D float tensor (N, D,) of predicted scores of each class for" "each data.N is the number of instances
 
D float i batch size D is number of possible classes labels Input (1,"labels","1-D int tensor (N,) of labels for each instance.").Output(0
 
D float i batch size D is number of possible classes labels D float tensor (D,) of accuracy for each class.If a class has no" "instance in the batch
 
D float i batch size D is number of possible classes labels D float its accuracy score is set to zero Output (1,"amounts","1-D int tensor (D,) of number of instances for each class in the batch.")
 
 SHOULD_NOT_DO_GRADIENT (MultiClassAccuracy)
 
 REGISTER_CPU_OPERATOR (NegateGradient, NegateGradientOp< CPUContext >)
 
 SetDoc (R"DOC( NegagteGradient operator in forward pass simply copies input to the output, and in backward pass, flips the sign of the output gradient )DOC")
 
 REGISTER_GRADIENT (NegateGradient, GetNegateGradientGradient)
 
 REGISTER_CPU_OPERATOR (Negative, UnaryElementwiseOp< TensorTypes< float, double, int, long >, CPUContext, NegativeCPUFunctor >)
 
input tensor Output (0,"Y","1D input tensor").InheritOnnxSchema("Neg")
 
 REGISTER_GRADIENT (Negative, GetNegativeGradient)
 
 REGISTER_CPU_OPERATOR (NGramFromCategorical, NGramFromCategoricalOp< float, int64_t, CPUContext >)
 
 NO_GRADIENT (NGramFromCategorical)
 
 OPERATOR_SCHEMA (NGramFromCategorical).NumInputs(1).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (NormalizeL1, NormalizeL1Op< float, CPUContext >)
 
axis to normalize SetDoc (R"DOC( Given a matrix, apply L1-normalization along the specified axis. )DOC")
 
 REGISTER_CPU_OPERATOR (Normalize, NormalizeOp< float, CPUContext >)
 
axis to normalize SetDoc (R"DOC( Given a matrix, apply L2-normalization along the specified dimension. )DOC").IdenticalTypeAndShape()
 
 REGISTER_CPU_OPERATOR (NormalizeGradient, NormalizeGradientOp< float, CPUContext >)
 
 REGISTER_GRADIENT (Normalize, GetNormalizeGradient)
 
vector< TensorShape > TensorInferenceForBatchOneHot (const OperatorDef &, const vector< TensorShape > &in)
 
OpSchema::Cost CostInferenceForBatchOneHot (const OperatorDef &def, const vector< TensorShape > &in)
 
 REGISTER_CPU_OPERATOR (BatchBucketOneHot, BatchBucketOneHotOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (BatchOneHot, BatchOneHotOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (OneHot, OneHotOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (SegmentOneHot, SegmentOneHotOp)
 
 REGISTER_CPU_OPERATOR (ONNXWhile, ONNXWhileOp< CPUContext >)
 
INT_MAX SetDoc (R"DOC( *** EXPERIMENTAL. This operator is a work-in-progress. No assumption should be made about the stability or correctness of this op. *** Generic Looping construct confirming to the ONNX Loop operator spec. This loop has multiple termination conditions: 1. Trip count. Iteration count specified at runtime. Set by specifying the input M. Optional. Set to empty string to omit. Note that a static trip count (specified at graph construction time) can be specified by passing in a constant node for input M. 2. Loop termination condition. This is an input to the op that determines whether to run the first interation and also a loop-carried dependency for the body graph. The body graph must yield a value for the condition variable, whether this input is provided or not. This table summarizes the operating modes of this operator with equivalent C-style code: Operator inputs defined as (max_trip_count, condition_var). Omitted optional inputs are represented as empty string. Concretely, in this caffe2 op an input is marked as omitted by setting its 'has_{name}' argument to False. input ("", ""): for (int i=0; ; ++i) { cond = ... // Note this value is ignored, but is required in the body } input ("", cond) // Note this is analogous to a while loop bool cond = ...; for (int i=0; cond; ++i) { cond = ...; } input ("", 1) // Note this is analogous to a do-while loop bool cond = true for (int i=0; cond; ++i) { cond = ...; } input (trip_count, "") // Note this is analogous to a for loop int trip_count = ... for (int i=0; i < trip_count; ++i) { cond = ...; // ignored } input (trip_count, cond) int trip_count = ...; bool cond = ...; for (int i=0; i < trip_count && cond; ++i) { cond = ...; } )DOC").Arg("loop_net"
 
INT_MAX Net executed on each iteration Input (0,"condition","Scalar boolean condition").AllowInplace([](int in
 
 REGISTER_CPU_OPERATOR (NHWC2NCHW, NHWC2NCHWOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (NCHW2NHWC, NCHW2NHWCOp< float, CPUContext >)
 
out[0] add_dims (in[0].dims(3))
 
out[0] add_dims (in[0].dims(1))
 
out[0] add_dims (in[0].dims(2))
 
 SetDoc (R"DOC( The operator switches the order of data in a tensor from NHWC- sample index N, height H, width H and channels C, to the NCHW order. )DOC").Input(0
 
The input data (Tensor< float >) in the NHWC order.") .Output( 0
 
The input The output tensor (Tensor< float >) in the NCHW order.")
 
 OPERATOR_SCHEMA (NCHW2NHWC).NumInputs(1).NumOutputs(1).SetDoc(R"DOC( The operator switches the order of data in a tensor from NCHW- sample index N
 
channels height H and width to the NHWC order DOC Input (0,"data","The input data (Tensor<float>) in the NCHW order.").Output(0
 
 REGISTER_GRADIENT (NHWC2NCHW, GetNHWC2NCHWGradient)
 
 REGISTER_GRADIENT (NCHW2NHWC, GetNCHW2NHWCGradient)
 
 REGISTER_CPU_OPERATOR (PackSegments, PackSegmentsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (UnpackSegments, UnpackSegmentsOp< CPUContext >)
 
 SetDoc ("Map N dim tensor to N+1 dim based on length blob. Sequences that \ are shorter than the longest sequence are padded with zeros.").Input(0
 
d int long tensor contains the length in each of the output Input (1,"tensor","N dim Tensor.").Output(0
 
d int long tensor contains the length in each of the output N dim Tensor where dim (1) is the max length" "
 
d int long tensor contains the length in each of the output N dim Tensor where dim (0) is the batch size.") .Output( 1
 
d int long tensor contains the length in each of the output N dim Tensor where dim boolean false where packed_tensor is true otherwise Arg ("pad_minf","Padding number in the packed segments. Use true to pad \ -infinity, otherwise pad zeros").Arg("return_presence_mask"
 
d int long tensor contains the length in each of the input Input (1,"tensor","N+1 dim Tensor.").Output(0
 
 REGISTER_GRADIENT (PackSegments, GetPackSegmentsGradient)
 
 REGISTER_GRADIENT (UnpackSegments, GetUnpackSegmentsGradient)
 
PadMode StringToPadMode (const string &mode)
 
 REGISTER_CPU_OPERATOR (PadImage, PadImageOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (PadImageGradient, PadImageGradientOp< float, CPUContext >)
 
CPUContext::PadTensorInference SetDoc (R"DOC( PadImage pads values around the boundary of an image according to the pad values and stride sizes defined by the ConvPoolOpBase operator. )DOC").Input(0
 
dimensions depend on whether the NCHW or NHWC operators are being used For in the the input has size (N x C x H x W)
 
dimensions depend on whether the NCHW or NHWC operators are being used For in the the input has where N is the batch C is the number of and H and W are the height and the width of the data The corresponding permutation of dimensions is used in the latter case Output (0,"Y","Output data tensor from padding the H and W dimensions on ""the tensor. Dimensions will vary based on various pad and stride ""sizes.")
 
 OPERATOR_SCHEMA (PadImageGradient).NumInputs(1).NumOutputs(1)
 
 REGISTER_GRADIENT (PadImage, GetPadImageGradient)
 
 REGISTER_CPU_OPERATOR (Percentile, PercentileOp< CPUContext >)
 
given a sample set of raw labeled with their corresponding percentiles from the same distribution In this operator takes as input a tensor of floats to find the percentile values for, a 2D tensor of floats, where the first column of the tensor represents sampled values, and the second column represents the percentile labels, and a tensor of integers lengths.This lengths tensor is used because the operator works on multiple sets of raw values at the same time.For example, for an input:original_values=[[3, 5, 3], [5, 1, 6]], lengths=[2, 1, 1], value_to_pct=[[3, 0.2],[5, 0.5],[1, 0.3],[3.0.6]] Our operator expects that each column i of the input tensor is sampled from distribution i.Lengths tells us that the first two elements in value_to_pct are sampled from distribution 1, the next is from distribution two, and the last is from distribution 3.We expect the output of our operator to give us[[0.2, 1.0, 0.6],[0.5, 0.3, 1.0]].To calculate the percentile of an element, we check to see if its value is already mapped to a percentile in value_to_pct.If so, we return that value.If not, we linearly interpolate between the two closest values in value_to_pct.If the value is larger than all values in value_to_pct, we return 1.If it's smaller than all the values, we return 0.) DOC") .Input (0,"original_values","Input 2D tensor of floats, representing the original, raw data to calculate percentiles for.").Input(1
 
given a sample set of raw labeled with their corresponding percentiles from the same distribution In this Sorted with columns Each element in the first column is a float representing the raw value of a sample Its corresponding element in the next column represents the percentile it maps to Input (2,"lengths","1D tensor, representing the length of each distribution. We expect that the sum of elements of this tensor"" is equal to the total length of value_to_pct.").Output(0
 
 NO_GRADIENT (Percentile)
 
 REGISTER_CPU_OPERATOR (Perplexity, PerplexityOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (Perplexity).NumInputs(1).NumOutputs(1).SetDoc(R"DOC( Perplexity calculates how well a probability distribution predicts a sample. Perplexity takes a 1-D tensor containing a batch of probabilities. Each value in the tensor belongs to a different sample and represents the probability of the model predicting the true label for that sample. The operator returns a single (float) perplexity value for the batch. )DOC").Input(0
 
The input data as Tensor It contains a batch of true label or target probabilities Output (0,"output","The output- a single (float) perplexity value for the ""batch")
 
 SHOULD_NOT_DO_GRADIENT (Perplexity)
 
 REGISTER_CPU_OPERATOR (PiecewiseLinearTransform, PiecewiseLinearTransformOp< float, CPUContext >)
 
 NumInputs (1, 4).NumOutputs(1).SetDoc(R"DOC( PiecewiseLinearTransform takes inputs -- predictions
 
a D or D slopes and intercepts The output tensor has the same shape of input predictions and contains the predictions transformed by the piecewise linear functions Each column of predictions has its own piecewise linear transformation functions Therefore the size of piecewise function parameters are pieces x except for binary predictions where only the positive prediction needs them Note that in each low bound is excluded while high bound is included Also the piecewise linear function must be continuous Notes If the input is binary predictions (Nx2 or Nx1 tensor)
 
a D or D slopes and intercepts The output tensor has the same shape of input predictions and contains the predictions transformed by the piecewise linear functions Each column of predictions has its own piecewise linear transformation functions Therefore the size of piecewise function parameters are pieces x except for binary predictions where only the positive prediction needs them Note that in each low bound is excluded while high bound is included Also the piecewise linear function must be continuous Notes If the input is binary set the binary arg to true so that one group of piecewise linear functions is needed (see details below).-The transform parameters(bounds
 
a D or D slopes and intercepts The output tensor has the same shape of input predictions and contains the predictions transformed by the piecewise linear functions Each column of predictions has its own piecewise linear transformation functions Therefore the size of piecewise function parameters are pieces x except for binary predictions where only the positive prediction needs them Note that in each low bound is excluded while high bound is included Also the piecewise linear function must be continuous Notes If the input is binary set the binary arg to true so that one group of piecewise linear functions is intercepts can be passed either through args or through input blobs If we have multiple groups of piecewise linear each group has the same number of pieces If a prediction is out of the it is capped to the smallest or largest bound DOC Arg ("bounds","1-D vector of size (prediction_dimensions x (pieces+1)) contain the ""upper bounds of each piece of linear function. One special case is ""the first bound is the lower bound of whole piecewise function and we ""treat it the same as the left most functions. (bounds, slopes, ""intercepts) can be passed through either arg or input blobs.").Arg("slopes"
 
a D or D slopes and intercepts The output tensor has the same shape of input predictions and contains the predictions transformed by the piecewise linear functions Each column of predictions has its own piecewise linear transformation functions Therefore the size of piecewise function parameters are pieces x except for binary predictions where only the positive prediction needs them Note that in each low bound is excluded while high bound is included Also the piecewise linear function must be continuous Notes If the input is binary set the binary arg to true so that one group of piecewise linear functions is intercepts can be passed either through args or through input blobs If we have multiple groups of piecewise linear each group has the same number of pieces If a prediction is out of the it is capped to the smallest or largest bound DOC D vector of size (prediction_dimensions x pieces) containing the" "slopes of linear function") .Arg( "intercepts"
 
 REGISTER_CPU_OPERATOR (AveragePoolGradient, PoolGradientOp< float, CPUContext, AveragePool< float >>)
 
 OPERATOR_SCHEMA (AveragePoolGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (AveragePool1DGradient, PoolGradientOp< float, CPUContext, AveragePool< float >>)
 
 OPERATOR_SCHEMA (AveragePool1DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (AveragePool2DGradient, PoolGradientOp< float, CPUContext, AveragePool< float >>)
 
 OPERATOR_SCHEMA (AveragePool2DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (AveragePool3DGradient, PoolGradientOp< float, CPUContext, AveragePool< float >>)
 
 OPERATOR_SCHEMA (AveragePool3DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (MaxPoolGradient, PoolGradientOp< float, CPUContext, MaxPool< float >>)
 
 OPERATOR_SCHEMA (MaxPoolGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (MaxPool1DGradient, PoolGradientOp< float, CPUContext, MaxPool< float >>)
 
 OPERATOR_SCHEMA (MaxPool1DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (MaxPool2DGradient, PoolGradientOp< float, CPUContext, MaxPool< float >>)
 
 OPERATOR_SCHEMA (MaxPool2DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_CPU_OPERATOR (MaxPool3DGradient, PoolGradientOp< float, CPUContext, MaxPool< float >>)
 
 OPERATOR_SCHEMA (MaxPool3DGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (AveragePool, GetPoolGradient)
 
 REGISTER_GRADIENT (AveragePool1D, GetPoolGradient)
 
 REGISTER_GRADIENT (AveragePool2D, GetPoolGradient)
 
 REGISTER_GRADIENT (AveragePool3D, GetPoolGradient)
 
 REGISTER_GRADIENT (MaxPool, GetPoolGradient)
 
 REGISTER_GRADIENT (MaxPool1D, GetPoolGradient)
 
 REGISTER_GRADIENT (MaxPool2D, GetPoolGradient)
 
 REGISTER_GRADIENT (MaxPool3D, GetPoolGradient)
 
std::function< void(OpSchema &)> AveragePoolDocGenerator (const char *dim)
 
std::function< void(OpSchema &)> MaxPoolDocGenerator (const char *dim)
 
 REGISTER_CPU_OPERATOR (AveragePool, PoolOp< float, CPUContext, AveragePool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (AveragePool1D, PoolOp< float, CPUContext, AveragePool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (AveragePool2D, PoolOp< float, CPUContext, AveragePool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (AveragePool3D, PoolOp< float, CPUContext, AveragePool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (MaxPool, PoolOp< float, CPUContext, MaxPool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (MaxPool1D, PoolOp< float, CPUContext, MaxPool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (MaxPool2D, PoolOp< float, CPUContext, MaxPool< float >>)
 
NumInputs(1).NumOutputs(1).TensorInferenceFunction(ConvPoolOpBase< CPUContextREGISTER_CPU_OPERATOR (MaxPool3D, PoolOp< float, CPUContext, MaxPool< float >>)
 
 REGISTER_CPU_OPERATOR (Pow, PowOp< TensorTypes< float >, CPUContext, EigenPowFunctor, SameTypeAsInput >).NumInputs(1
 
 NumOutputs (1).Arg("exponent"
 
The exponent of the power function AllowInplace ({{0, 0},{1, 0}}).IdenticalTypeAndShapeOfInput(0).SetDoc(R"DOC( Pow takes input data (Tensor<T>) and an argument exponent
 
 REGISTER_CPU_OPERATOR (PRelu, PReluOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (PReluGradient, PReluGradientOp< float, CPUContext >)
 
 OPERATOR_SCHEMA (PReluGradient).NumInputs(4).NumOutputs(2).SetDoc(R"DOC( PReluGradient takes both Y and dY and uses this to update dX and dW according to the chain rule and derivatives of the rectified linear function. )DOC")
 
 REGISTER_GRADIENT (PRelu, GetPReluGradient)
 
 REGISTER_CPU_OPERATOR (PrependDim, PrependDimOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (MergeDim, MergeDimOp< CPUContext >)
 
 SetDoc (R"DOC( Reshape the tensor by prepending a dimension of fixed size and dividing the size of the next dimension by that amount. )DOC").Arg("dim_size"
 
Size of the dimension to prepend Input (0,"data","An input tensor.").Output(0
 
 SetDoc (R"DOC( Merge first two dimensions in a single dimension with size dim(0) * dim(1). )DOC").Input(0
 
An input tensor Output (0,"reshaped","Reshaped tensor.")
 
 REGISTER_GRADIENT (PrependDim, GetPrependDimGradient)
 
 REGISTER_CUDA_OPERATOR (PrependDim, PrependDimOp< CUDAContext >)
 
 REGISTER_CUDA_OPERATOR (MergeDim, MergeDimOp< CUDAContext >)
 
 REGISTER_CPU_OPERATOR (QuantDecode, QuantDecodeOp< QuantDecodeRunTy::RUN_ALWAYS >)
 
 REGISTER_CPU_OPERATOR (QuantDecodeGradient, QuantDecodeGradientOp)
 
vector< TIndex > ConvertFromInputIndex (TIndex index, vector< TIndex > &dims)
 
TIndex ConvertToOutputIndex (const vector< int > &axes, const vector< TIndex > &nd_idx, vector< TIndex > &dims)
 
template<typename T >
Add (T x, T y)
 
template<typename T , class Context >
void ComputeOp (const T *X_data, const TIndex X_size, vector< TIndex > &dims, T *Y_data, vector< int > &axes, int keepdims, T(*binary_op)(T, T))
 
 REGISTER_CPU_OPERATOR (ReduceSum, ReduceSumOp< float, CPUContext >)
 
then the resulted tensor have the reduced dimension pruned DOC Arg ("axes","A list of integers, along which to reduce.").Arg("keepdims"
 
then the resulted tensor have the reduced dimension pruned DOC Keep the reduced dimension (s) or not
 
then the resulted tensor have the reduced dimension pruned DOC Keep the reduced default keeps the reduced An input tensor Output (0,"reduced","Reduced output tensor.")
 
 GRADIENT_NOT_IMPLEMENTED_YET (ReduceSum)
 
 REGISTER_CPU_OPERATOR (ReduceMean, ReduceMeanOp< float, CPUContext >)
 
 GRADIENT_NOT_IMPLEMENTED_YET (ReduceMean)
 
 REGISTER_CPU_OPERATOR (ReduceFrontSum, SumReduceDimsOp< CPUContext, true, false >)
 
 REGISTER_CPU_OPERATOR (ReduceFrontSumGradient, SumReduceDimsGradientOp< CPUContext, true, false >)
 
 REGISTER_GRADIENT (ReduceFrontSum, GetReduceFrontSumGradient)
 
 REGISTER_CPU_OPERATOR (ReduceBackSum, SumReduceDimsOp< CPUContext, false, false >)
 
 REGISTER_CPU_OPERATOR (ReduceBackSumGradient, SumReduceDimsGradientOp< CPUContext, false, false >)
 
 REGISTER_GRADIENT (ReduceBackSum, GetReduceBackSumGradient)
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the first dimension of the input tensor by applying 'Sum'. When lengths is given, sum is only computed with subsets of elements correspondingly. )DOC").Input(0
 
Number of dimensions to reduce T< D1..., Dn > Input data Input (1,"lengths","Num of elements in each sample, should have size D2 x D3 x ... x Dn.").TensorInferenceFunction([](const OperatorDef &def
 
 OPERATOR_SCHEMA (ReduceFrontSumGradient).NumInputs(2
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the last dimension of the input tensor by applying 'Sum'. When lengths is given, sum is only computed with subsets of elements correspondingly. )DOC").Input(0
 
Number of dimensions to reduce T< D1..., Dn > Input data Input (1,"lengths","Num of elements in each sample, should have size D1 x D2 x ... x D(n-1).").TensorInferenceFunction([](const OperatorDef &def
 
 OPERATOR_SCHEMA (ReduceBackSumGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (ReduceFrontMean, SumReduceDimsOp< CPUContext, true, true >)
 
 REGISTER_CPU_OPERATOR (ReduceFrontMeanGradient, SumReduceDimsGradientOp< CPUContext, true, true >)
 
 REGISTER_GRADIENT (ReduceFrontMean, GetReduceFrontMeanGradient)
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the first dimension of the input tensor by applying 'Mean'. When lengths is given, mean is only computed with subsets of elements correspondingly. )DOC").Input(0
 
 OPERATOR_SCHEMA (ReduceFrontMeanGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (ReduceBackMean, SumReduceDimsOp< CPUContext, false, true >)
 
 REGISTER_CPU_OPERATOR (ReduceBackMeanGradient, SumReduceDimsGradientOp< CPUContext, false, true >)
 
 REGISTER_GRADIENT (ReduceBackMean, GetReduceBackMeanGradient)
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the last dimension of the input tensor by applying 'Mean'. When lengths is given, mean is only computed with subsets of elements correspondingly. )DOC").Input(0
 
 OPERATOR_SCHEMA (ReduceBackMeanGradient).NumInputs(2
 
 REGISTER_CPU_OPERATOR (ReduceFrontMax, MaxReduceDimsOp< float, CPUContext, true >)
 
 REGISTER_CPU_OPERATOR (ReduceFrontMaxGradient, MaxReduceDimsGradientOp< float, CPUContext, true >)
 
 REGISTER_CPU_OPERATOR (ReduceBackMax, MaxReduceDimsOp< float, CPUContext, false >)
 
 REGISTER_CPU_OPERATOR (ReduceBackMaxGradient, MaxReduceDimsGradientOp< float, CPUContext, false >)
 
 REGISTER_GRADIENT (ReduceFrontMax, GetReduceFrontMaxGradient)
 
 REGISTER_GRADIENT (ReduceBackMax, GetReduceBackMaxGradient)
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the first dimension of the input tensor by applying 'Max'. When lengths is given, max is only computed with subsets of elements correspondingly. )DOC").Input(0
 
Number of dimensions to reduce T< D1..., Dn > Input data Input (1,"lengths","Num of elements in each sample, should have size D2 x D3 ... x Dn.").TensorInferenceFunction([](const OperatorDef &def
 
 OPERATOR_SCHEMA (ReduceFrontMaxGradient).NumInputs(3
 
Number of dimensions to reduce SetDoc (R"DOC( Reduces the input tensor along the last dimension of the input tensor by applying 'Max'. When lengths is given, max is only computed with subsets of elements correspondingly. )DOC").Input(0
 
 OPERATOR_SCHEMA (ReduceBackMaxGradient).NumInputs(3
 
 REGISTER_CPU_OPERATOR (SumElements, SumElementsOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (SumElementsInt, SumElementsIntOp< int, CPUContext >)
 
 REGISTER_CPU_OPERATOR (SumSqrElements, SumSqrElementsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (SumElementsGradient, SumElementsGradientOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (RowwiseMax, MaxReductionOp< float, CPUContext, true >)
 
 REGISTER_CPU_OPERATOR (RowwiseMaxGradient, MaxReductionGradientOp< float, CPUContext, true >)
 
 REGISTER_CPU_OPERATOR (ColwiseMaxGradient, MaxReductionGradientOp< float, CPUContext, false >)
 
 REGISTER_CPU_OPERATOR (ColwiseMax, MaxReductionOp< float, CPUContext, false >)
 
NumInputs(1).NumOutputs(1).ScalarType(TensorProto NumInputs(1).NumOutputs(1).ScalarType(TensorProto SHOULD_NOT_DO_GRADIENT (SumElementsInt)
 
NumInputs(1).NumOutputs(1).ScalarType(TensorProto OPERATOR_SCHEMA (SumElementsGradient).NumInputs(2).NumOutputs(1)
 
 REGISTER_GRADIENT (SumElements, GetSumElementsGradient)
 
A tenosr of dimensions batch_size x M x N to compute rowwise max Output (0,"Y","batch_size x M rowwise-max results matrix.")
 
 OPERATOR_SCHEMA (RowwiseMaxGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (RowwiseMax, GetRowwiseMaxGradient)
 
 OPERATOR_SCHEMA (ColwiseMaxGradient)
 
A tenosr of dimensions batch_size x M x N to compute colwise max Output (0,"Y","batch_size x N column-max results matrix.")
 
 OPERATOR_SCHEMA (ColumnMaxGradient).NumInputs(3).NumOutputs(1)
 
 REGISTER_GRADIENT (ColwiseMax, GetColwiseMaxGradient)
 
 REGISTER_CPU_OPERATOR (Relu, ReluOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ReluGradient, ReluGradientOp< float, CPUContext >)
 
 CostInferenceFunction (CostInferenceForRelu).IdenticalTypeAndShape().SetDoc(R"DOC( Relu takes one input data (Tensor<T>) and produces one output data (Tensor<T>) where the rectified linear function
 
is applied to the tensor elementwise DOC input tensor InheritOnnxSchema ("Relu")
 
 SetDoc (R"DOC( ReluGradient takes both Y and dY and uses this to update dX according to the chain rule and derivatives of the rectified linear function. )DOC")
 
 REGISTER_GRADIENT (Relu, GetReluGradient)
 
 REGISTER_GRADIENT (ReluFp16, GetReluGradient)
 
 REGISTER_CPU_OPERATOR (ReplaceNaN, ReplaceNaNOp< CPUContext >)
 
 SHOULD_NOT_DO_GRADIENT (ReplaceNaN)
 
 REGISTER_CPU_OPERATOR (Reshape, ReshapeOp< float, CPUContext >)
 
out[1] set_data_type (TensorProto::INT64)
 
out[1] add_dims (in[0].dims_size())
 
 if (!helper.HasArgument("shape"))
 
 CAFFE_ENFORCE_EQ (in.size(), 1,"New shape must not be specified by the input blob and the ""argument `shape` at the same time.")
 
 for (int i=0;i< actualNewShape.size();++i)
 
 if (unknownIdx!=-1)
 
 for (const auto d:actualNewShape)
 
an extra argument shape must be specified It outputs the reshaped tensor as well as the original shape At most one dimension of the new shape can be In this the value is inferred from the size of the tensor and the remaining dimensions A dimension could also in which case the actual dimension value is going to be copied from the input tensor DOC Arg ("shape","New shape").Input(0
 
an extra argument shape must be specified It outputs the reshaped tensor as well as the original shape At most one dimension of the new shape can be In this the value is inferred from the size of the tensor and the remaining dimensions A dimension could also in which case the actual dimension value is going to be copied from the input tensor DOC An input tensor Input (1,"new_shape","New shape.").Output(0
 
an extra argument shape must be specified It outputs the reshaped tensor as well as the original shape At most one dimension of the new shape can be In this the value is inferred from the size of the tensor and the remaining dimensions A dimension could also in which case the actual dimension value is going to be copied from the input tensor DOC An input tensor Reshaped data Output (1,"old_shape","Original shape.").InheritOnnxSchema("Reshape")
 
 REGISTER_GRADIENT (Reshape, GetReshapeGradient)
 
 REGISTER_CUDA_OPERATOR (Reshape, ReshapeOp< float, CUDAContext >)
 
void resizeNearest2x (int batch_size, int num_channels, int input_height, int input_width, const float *input, float *output)
 
 REGISTER_CPU_OPERATOR (ResizeNearest, ResizeNearestOp< float, CPUContext >)
 
 REGISTER_CPU_OPERATOR (ResizeNearestGradient, ResizeNearestGradientOp< float, CPUContext >)
 
Scale along width dimension Arg ("height_scale","Scale along height dimension").SetDoc(R"DOC( Resizes the spatial dimensions of the input using nearest neighbor interpolation. The `width_scale` and `height_scale` arguments control the size of the output
 
Scale along width dimension which is given Input tensor Output (0,"Y","Output tensor")
 
 REGISTER_GRADIENT (ResizeNearest, GetResizeNearestGradient)
 
 REGISTER_CPU_OPERATOR (ReversePackedSegs, ReversePackedSegsOp< CPUContext >)
 
leaving paddings unchanged This operator is used to reverse input of a recurrent neural network to make it a BRNN.) DOC") .Input (0,"data","a 3-D (lengths, segments, embeddings,) tensor.").Input(1
 
leaving paddings unchanged This length of each segment Output (0,"reversed data","a (lengths, segments, embeddings,) tensor with each segment reversed""and paddings unchanged.")
 
 REGISTER_GRADIENT (ReversePackedSegs, GetReversePackedSegsGradient)
 
 REGISTER_CPU_OPERATOR (RMACRegions, RMACRegionsOp< CPUContext >)
 
 REGISTER_CPU_OPERATOR (RecurrentNetworkBlobFetcher, RecurrentNetworkBlobFetcherOp< CPUContext >)
 
Prefix string to prepend extracted blobs Input (0,"ScratchWorkspaceBlob","Name of scratch workspace blob returned by recurrent network.").Output(0
 
 SHOULD_NOT_DO_GRADIENT (RecurrentNetworkBlobFetcher)
 
 REGISTER_CUDA_OPERATOR (RecurrentNetworkBlobFetcher, RecurrentNetworkBlobFetcherOp< CUDAContext >)
 
template<>
std::unique_ptr< RecurrentNetworkExecutorBasecreateRNNExecutor< CPUContext > (const NetDef &step_net_def, std::map< string, string > &recurrent_input_map, std::string timestep_blob, ArgumentHelper rnn_args)
 Implementation of RecurrentNetworkExecutor that uses thread pool for multithreaded execution of RNNs. More...
 
template<class Context >
std::unique_ptr< RecurrentNetworkExecutorBasecreateRNNExecutor (const NetDef &step_net_def, std::map< string, string > &recurrent_input_map, std::string timestep_blob, ArgumentHelper rnn_args)
 
template<>
std::unique_ptr< RecurrentNetworkExecutorBasecreateRNNExecutor< CUDAContext > (const NetDef &step_net_def, std::map< string, string > &recurrent_input_map, std::string timestep_blob, ArgumentHelper arg_helper)
 
 CAFFE_KNOWN_TYPE (detail::ScratchWorkspaces)
 
 REGISTER_CPU_OPERATOR (RecurrentNetwork, RecurrentNetworkOp< CPUContext >)
 
INT_MAX SetDoc (R"DOC( Run the input network in a recurrent fashion. This can be used to implement fairly general recurrent neural networks (RNNs). The operator proceeds as follows. - First, initialized the states from the input recurrent states - For each timestep T, apply the links (that map offsets from input/output tensors into the inputs/outputs for the `step` network) - Finally, alias the recurrent states to the specified output blobs. This is a fairly special-case meta-operator, and so the implementation is somewhat complex. It trades of generality (and frankly usability) against performance and control (compared to e.g. TF dynamic_rnn, Theano scan, etc). See the usage examples for a flavor of how to use it. )DOC")
 
 REGISTER_CPU_OPERATOR (RecurrentNetworkGradient, RecurrentNetworkGradientOp< CPUContext >)
 
 OPERATOR_SCHEMA (RecurrentNetworkGradient)
 
 REGISTER_CPU_OPERATOR (rnn_internal_accumulate_gradient_input, AccumulateInputGradientOp< CPUContext >)
 
INT_MAX EnforceInplace ({{2, 0}}).Private().SetDoc(R"DOC( Internal RNN operator. )DOC")
 
 REGISTER_CPU_OPERATOR (rnn_internal_apply_link, RNNApplyLinkOp< CPUContext >)
 
 Private ().SetDoc(R"DOC( Internal RNN operator. )DOC")
 
 REGISTER_GRADIENT (RecurrentNetwork, GetRecurrentNetworkGradient)
 
 REGISTER_CUDNN_OPERATOR (Recurrent, RecurrentOp< float >)
 
 OPERATOR_SCHEMA (Recurrent).NumInputs(4).NumOutputs(5).SetDoc(R"DOC( Recurrent wraps the CuDNN R5 RNN implementation. See the CuDNN R5 documentation for more information. In general
 
the implementation takes an input (TxNxD) tensor
 
the implementation takes an the hidden state input (NxD)
 
the implementation takes an the hidden state the cell and a weight tensor (effectively an opaque blob, where the size and layout is dictated by CuDNN).The outputs are the output(again
 
the implementation takes an the hidden state the cell and a weight the final hidden cell states (NxD).These can be reset(at sequence boundaries across minibatches) by multiplying by zero.The CuDNN arguments(hidden_size
 
 REGISTER_CUDNN_OPERATOR (RecurrentGradient, RecurrentGradientOp< float >)
 
 NumInputs (7).NumOutputs(6).AllowInplace(
 
 REGISTER_CUDNN_OPERATOR (RecurrentParamSet, RecurrentParamAccessOp< float, SET_PARAM >)
 
 SetDoc ("Set individual parameters of a recurrent net.").Arg("param_type"