Commit adb6fa64 by Ting PAN

Add native ops test

Summary:
This commit tests the executing of native ops and verifies the results.
Several bugs are found and fixed according to these tests.
1 parent df172cc8
Showing with 2594 additions and 3027 deletions
......@@ -9,13 +9,13 @@
#
# ------------------------------------------------------------
"""Implementation for the ``Layer`` C++ class."""
"""The base layer class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.eager import context as eager_context
from dragon.core.framework import context
......@@ -76,8 +76,8 @@ class Layer(object):
param_name = scoped_name + '/param:{}'.format(len(self._blobs))
# Set the name explicitly.
variable = RefTensor(param_name)
variable_grad = RefTensor(param_name + '_grad')
variable = TensorRef(param_name)
variable_grad = TensorRef(param_name + '_grad')
if filler is not None:
variable._register_as(**filler)
......
......@@ -455,8 +455,8 @@ class InnerProduct(Layer):
param = layer_param.inner_product_param
self.arguments = {
'axis': param.axis,
'num_output': param.num_output,
'transW': not param.transpose,
'out_channels': param.num_output,
'transpose_w': not param.transpose,
}
# Add weights and biases
self.add_blob(filler=self.get_filler(param, 'weight_filler'))
......@@ -522,7 +522,7 @@ class Normalize(Layer):
normalize_param {
across_spatial: false
channel_shared: false
eps: 1e-5
eps: 1e-12
scale_filler: {
type: "constant"
value: 1
......@@ -548,7 +548,7 @@ class Normalize(Layer):
self.add_blob(filler=self.get_filler(param, 'scale_filler'), value=1)
def __call__(self, bottom):
norm_out = [normalization_ops.l2_normalize(bottom, **self.l2norm_arguments)]
norm_out = [normalization_ops.lp_normalize(bottom, **self.l2norm_arguments)]
norm_out += [blob['data'] for blob in self._blobs]
return math_ops.affine(norm_out, **self.affine_arguments)
......
......@@ -65,7 +65,7 @@ class Convolution(Layer):
super(Convolution, self).__init__(layer_param)
param = layer_param.convolution_param
self.arguments = {
'num_output': param.num_output,
'out_channels': param.num_output,
'kernel_shape': [int(e) for e in param.kernel_size],
'strides': [int(e) for e in param.stride] if len(param.stride) > 0 else [1],
'pads': [int(e) for e in param.pad] if len(param.pad) > 0 else [0],
......@@ -187,7 +187,7 @@ class DepthwiseConv2d(Layer):
super(DepthwiseConv2d, self).__init__(layer_param)
param = layer_param.convolution_param
self.arguments = {
'num_output': param.num_output,
'out_channels': param.num_output,
'kernel_shape': [int(e) for e in param.kernel_size],
'strides': [int(e) for e in param.stride] if len(param.stride) > 0 else [1],
'pads': [int(e) for e in param.pad] if len(param.pad) > 0 else [0],
......
......@@ -9,7 +9,7 @@
#
# ------------------------------------------------------------
"""Implementation for the ``Net`` C++ class."""
"""The base net class."""
from __future__ import absolute_import
from __future__ import division
......@@ -20,8 +20,8 @@ from google.protobuf import text_format
from dragon.core.autograph import def_function
from dragon.core.autograph import grad_impl
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import Tensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.framework import workspace
from dragon.core.util import nest
from dragon.vm.caffe import layers as layer_factory
......@@ -84,17 +84,13 @@ class Net(object):
if len(self._net_proto.input) > 0:
shapes = self._net_proto.input_shape
for i, input in enumerate(self._net_proto.input):
for i, input_name in enumerate(self._net_proto.input):
shape = [e for e in shapes[i].dim] if i < len(shapes) else None
if input not in self._blobs:
data = Tensor(input, shape=shape, dtype='float32').placeholder()
self._blobs[input] = {
data = Tensor(input_name, shape, 'float32').placeholder()
self._blobs[input_name] = {
'data': data,
'diff': RefTensor(
data.id + '_grad',
shape=shape,
dtype=data.dtype
),
'diff': TensorRef(data.id + '_grad', shape, data.dtype),
}
for layer in self._net_proto.layer:
......@@ -145,7 +141,7 @@ class Net(object):
for i, blob in enumerate(layer._top):
self._blobs[blob] = {
'data': outputs[i],
'diff': RefTensor(outputs[i].id + '_grad'),
'diff': TensorRef(outputs[i].id + '_grad'),
}
self._net_outputs.add(blob)
......
......@@ -3,29 +3,25 @@ syntax = "proto2";
package caffe;
// Specifies the shape (dimensions) of a Blob.
message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobShape { repeated int64 dim = 1 [ packed = true ]; }
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
repeated float data = 5 [ packed = true ];
repeated float diff = 6 [ packed = true ];
repeated double double_data = 8 [ packed = true ];
repeated double double_diff = 9 [ packed = true ];
// 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
optional int32 num = 1 [ default = 0 ];
optional int32 channels = 2 [ default = 0 ];
optional int32 height = 3 [ default = 0 ];
optional int32 width = 4 [ default = 0 ];
}
// The BlobProtoVector is simply a way to pass multiple blobproto instances
// around.
message BlobProtoVector {
repeated BlobProto blobs = 1;
}
message BlobProtoVector { repeated BlobProto blobs = 1; }
message Datum {
optional int32 channels = 1;
......@@ -37,21 +33,21 @@ message Datum {
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
optional bool encoded = 7 [ default = false ];
repeated int32 labels = 8;
}
message FillerParameter {
// The filler type.
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0]; // the value in constant filler
optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [default = 1]; // the max value in uniform filler
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [default = 1]; // the std value in Gaussian filler
optional string type = 1 [ default = 'constant' ];
optional float value = 2 [ default = 0 ]; // the value in constant filler
optional float min = 3 [ default = 0 ]; // the min value in uniform filler
optional float max = 4 [ default = 1 ]; // the max value in uniform filler
optional float mean = 5 [ default = 0 ]; // the mean value in Gaussian filler
optional float std = 6 [ default = 1 ]; // the std value in Gaussian filler
// The expected number of non-zero output weights for a given input in
// Gaussian filler -- the default -1 means don't perform sparsification.
optional int32 sparse = 7 [default = -1];
optional int32 sparse = 7 [ default = -1 ];
// Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers.
enum VarianceNorm {
......@@ -59,7 +55,7 @@ message FillerParameter {
FAN_OUT = 1;
AVERAGE = 2;
}
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
optional VarianceNorm variance_norm = 8 [ default = FAN_IN ];
}
message NetParameter {
......@@ -78,7 +74,7 @@ message NetParameter {
// Whether the network will force every layer to carry out backward operation.
// If set False, then whether to carry out backward is determined
// automatically according to the net structure and learning rates.
optional bool force_backward = 5 [default = false];
optional bool force_backward = 5 [ default = false ];
// The current "state" of the network, including the phase, level, and stage.
// Some layers may be included/excluded depending on this state and the states
// specified in the layers' include and exclude fields.
......@@ -86,11 +82,11 @@ message NetParameter {
// Print debugging information about results while running Net::Forward,
// Net::Backward, and Net::Update.
optional bool debug_info = 7 [default = false];
optional bool debug_info = 7 [ default = false ];
// The layers that make up the net. Each of their configurations, including
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
......@@ -122,9 +118,9 @@ message SolverParameter {
optional NetParameter net_param = 25;
optional string train_net = 1; // Proto filename for the train net.
repeated string test_net = 2; // Proto filenames for the test nets.
repeated string test_net = 2; // Proto filenames for the test nets.
optional NetParameter train_net_param = 21; // Inline train net params.
repeated NetParameter test_net_param = 22; // Inline test net params.
repeated NetParameter test_net_param = 22; // Inline test net params.
// The states for the train/test nets. Must be unspecified or
// specified once per net.
......@@ -140,11 +136,11 @@ message SolverParameter {
repeated int32 test_iter = 3;
// The number of iterations between two testing phases.
optional int32 test_interval = 4 [default = 0];
optional bool test_compute_loss = 19 [default = false];
optional int32 test_interval = 4 [ default = 0 ];
optional bool test_compute_loss = 19 [ default = false ];
// If true, run an initial test pass before the first iteration,
// ensuring memory availability and printing the starting value of the loss.
optional bool test_initialization = 32 [default = true];
optional bool test_initialization = 32 [ default = true ];
optional float base_lr = 5; // The base learning rate
repeated float stage_lr = 50;
repeated int32 stage_iter = 51;
......@@ -152,10 +148,10 @@ message SolverParameter {
// will be displayed.
optional int32 display = 6;
// Display the loss averaged over the last average_loss iterations
optional int32 average_loss = 33 [default = 1];
optional int32 average_loss = 33 [ default = 1 ];
optional int32 max_iter = 7; // the maximum number of iterations
// accumulate gradients over `iter_size` x `batch_size` instances
optional int32 iter_size = 36 [default = 1];
optional int32 iter_size = 36 [ default = 1 ];
// The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
......@@ -173,13 +169,13 @@ message SolverParameter {
// where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration.
optional string lr_policy = 8;
optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate.
optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value.
optional float weight_decay = 12; // The weight decay.
// regularization types supported: L1 and L2
// controlled by weight_decay
optional string regularization_type = 29 [default = "L2"];
optional string regularization_type = 29 [ default = "L2" ];
// the stepsize for learning rate policy "step"
optional int32 stepsize = 13;
// the stepsize for learning rate policy "multistep"
......@@ -187,49 +183,49 @@ message SolverParameter {
// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
// whenever their actual L2 norm is larger.
optional float clip_gradients = 35 [default = -1];
optional float clip_gradients = 35 [ default = -1 ];
optional int32 snapshot = 14 [default = 0]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot.
optional int32 snapshot = 14 [ default = 0 ]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot.
// whether to snapshot diff in the results or not. Snapshotting diff will help
// debugging but the final protocol buffer size will be much larger.
optional bool snapshot_diff = 16 [default = false];
optional bool snapshot_diff = 16 [ default = false ];
enum SnapshotFormat {
HDF5 = 0;
BINARYPROTO = 1;
}
optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
optional SnapshotFormat snapshot_format = 37 [ default = BINARYPROTO ];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode {
CPU = 0;
GPU = 1;
}
optional SolverMode solver_mode = 17 [default = GPU];
optional SolverMode solver_mode = 17 [ default = GPU ];
// the device_id will that be used in GPU mode. Use device_id = 0 in default.
optional int32 device_id = 18 [default = 0];
optional int32 device_id = 18 [ default = 0 ];
// If non-negative, the seed with which the Solver will initialize the Caffe
// random number generator -- useful for reproducible results. Otherwise,
// (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 20 [default = -1];
optional int64 random_seed = 20 [ default = -1 ];
// type of the solver
optional string type = 40 [default = "SGD"];
optional string type = 40 [ default = "SGD" ];
// numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
optional float delta = 31 [default = 1e-8];
optional float delta = 31 [ default = 1e-8 ];
// parameters for the Adam solver
optional float momentum2 = 39 [default = 0.999];
optional float momentum2 = 39 [ default = 0.999 ];
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38 [default = 0.99];
optional float rms_decay = 38 [ default = 0.99 ];
// If true, print information about the state of the net that may help with
// debugging learning problems.
optional bool debug_info = 23 [default = false];
optional bool debug_info = 23 [ default = false ];
// If false, don't save a snapshot after training finishes.
optional bool snapshot_after_train = 28 [default = true];
optional bool snapshot_after_train = 28 [ default = true ];
// DEPRECATED: old solver enum types, use string instead
enum SolverType {
......@@ -241,25 +237,26 @@ message SolverParameter {
ADAM = 5;
}
// DEPRECATED: use type instead of solver_type
optional SolverType solver_type = 30 [default = SGD];
optional SolverType solver_type = 30 [ default = SGD ];
}
// A message that stores the solver snapshots
message SolverState {
optional int32 iter = 1; // The current iteration
optional int32 iter = 1; // The current iteration
optional string learned_net = 2; // The file that stores the learned net.
repeated BlobProto history = 3; // The history for sgd solvers
optional int32 current_step = 4 [default = 0]; // The current step for learning rate
repeated BlobProto history = 3; // The history for sgd solvers
optional int32 current_step = 4
[ default = 0 ]; // The current step for learning rate
}
enum Phase {
TRAIN = 0;
TEST = 1;
TRAIN = 0;
TEST = 1;
}
message NetState {
optional Phase phase = 1 [default = TEST];
optional int32 level = 2 [default = 0];
optional Phase phase = 1 [ default = TEST ];
optional int32 level = 2 [ default = 0 ];
repeated string stage = 3;
}
......@@ -300,24 +297,25 @@ message ParamSpec {
}
// The multiplier on the global learning rate for this parameter.
optional float lr_mult = 3 [default = 1.0];
optional float lr_mult = 3 [ default = 1.0 ];
// The multiplier on the global weight decay for this parameter.
optional float decay_mult = 4 [default = 1.0];
optional float decay_mult = 4 [ default = 1.0 ];
}
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 146 (last added: parameter_param)
// LayerParameter next available layer-specific ID: 146 (last added:
// parameter_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
repeated string top = 4; // the name of each top blob
// The mirror stage optimization
optional bool mirror_stage = 150 [default = false];
optional bool mirror_stage = 150 [ default = false ];
// The train / test phase for computation.
optional Phase phase = 10;
......@@ -423,29 +421,29 @@ message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
optional float scale = 1 [ default = 1 ];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
optional bool mirror = 2 [ default = false ];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
optional uint32 crop_size = 3 [ default = 0 ];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
// if specified can be repeated once (would substract it from all the
// channels) or can be repeated the same number of times as channels (would
// subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
optional bool force_color = 6 [ default = false ];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
optional bool force_gray = 7 [ default = false ];
// Distort the color?
optional bool augment_color = 9 [default = false];
optional bool augment_color = 9 [ default = false ];
// Target size.
optional uint32 resize = 10 [default=0];
optional uint32 resize = 10 [ default = 0 ];
// Padding size.
optional uint32 padding = 11 [default = 0];
optional uint32 padding = 11 [ default = 0 ];
// Crop size during scale jittering
optional uint32 random_crop_size = 12 [default = 0];
optional uint32 random_crop_size = 12 [ default = 0 ];
}
// Message that stores parameters shared by loss layers
......@@ -469,7 +467,7 @@ message LossParameter {
// Do not normalize the loss.
NONE = 3;
}
optional NormalizationMode normalization = 3 [default = VALID];
optional NormalizationMode normalization = 3 [ default = VALID ];
// Deprecated. Ignored if normalization is specified. If normalization
// is not specified, then setting this to false will be equivalent to
// normalization = BATCH_SIZE to be consistent with previous behavior.
......@@ -483,14 +481,14 @@ message AccuracyParameter {
// When computing accuracy, count as correct by comparing the true label to
// the top k scoring classes. By default, only compare to the top scoring
// class (i.e. argmax).
optional uint32 top_k = 1 [default = 1];
optional uint32 top_k = 1 [ default = 1 ];
// The "label" axis of the prediction blob, whose argmax corresponds to the
// predicted label -- may be negative to index from the end (e.g., -1 for the
// last axis). For example, if axis == 1 and the predictions are
// (N x C x H x W), the label blob is expected to contain N*H*W ground truth
// labels with integer values in {0, 1, ..., C-1}.
optional int32 axis = 2 [default = 1];
optional int32 axis = 2 [ default = 1 ];
// If specified, ignore instances with the given label.
optional int32 ignore_label = 3;
......@@ -498,8 +496,8 @@ message AccuracyParameter {
message ArgMaxParameter {
// If true produce pairs (argmax, maxval)
optional bool out_max_val = 1 [default = false];
optional uint32 top_k = 2 [default = 1];
optional bool out_max_val = 1 [ default = false ];
optional uint32 top_k = 2 [ default = 1 ];
// The axis along which to maximise -- may be negative to index from the
// end (e.g., -1 for the last axis).
// By default ArgMaxLayer maximizes over the flattened trailing dimensions
......@@ -512,10 +510,10 @@ message ConcatParameter {
// end (e.g., -1 for the last axis). Other axes must have the
// same dimension for all the bottom blobs.
// By default, ConcatLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 2 [default = 1];
optional int32 axis = 2 [ default = 1 ];
// DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 concat_dim = 1 [default = 1];
optional uint32 concat_dim = 1 [ default = 1 ];
}
message BatchNormParameter {
......@@ -524,10 +522,10 @@ message BatchNormParameter {
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = 0.9];
optional float moving_average_fraction = 2 [ default = 0.9 ];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-5];
optional float eps = 3 [ default = 1e-5 ];
}
message BiasParameter {
......@@ -544,7 +542,7 @@ message BiasParameter {
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar bias.
optional int32 axis = 1 [default = 1];
optional int32 axis = 1 [ default = 1 ];
// (num_axes is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
......@@ -552,7 +550,7 @@ message BiasParameter {
// The number of axes of the input (bottom[0]) covered by the bias
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to add a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
optional int32 num_axes = 2 [ default = 1 ];
// (filler is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer.)
......@@ -564,25 +562,25 @@ message BiasParameter {
message ContrastiveLossParameter {
// margin for dissimilar pair
optional float margin = 1 [default = 1.0];
optional float margin = 1 [ default = 1.0 ];
// The first implementation of this cost did not exactly match the cost of
// Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
// legacy_version = false (the default) uses (margin - d)^2 as proposed in the
// Hadsell paper. New models should probably use this version.
// legacy_version = true uses (margin - d^2). This is kept to support /
// reproduce existing models and results
optional bool legacy_version = 2 [default = false];
optional bool legacy_version = 2 [ default = false ];
}
message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional bool bias_term = 2 [ default = true ]; // whether to have bias terms
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in all spatial dimensions, or once per spatial dimension.
repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 kernel_size = 4; // The kernel size
repeated uint32 stride = 6; // The stride; defaults to 1
repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme �� trous from Holschneider et al. 1987.)
......@@ -590,23 +588,23 @@ message ConvolutionParameter {
// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only)
optional uint32 pad_h = 9 [ default = 0 ]; // The padding height (2D only)
optional uint32 pad_w = 10 [ default = 0 ]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only)
optional uint32 group = 5 [default = 1]; // The group size for group conv
optional uint32 group = 5 [ default = 1 ]; // The group size for group conv
optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias
optional FillerParameter bias_filler = 8; // The filler for the bias
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 15 [default = DEFAULT];
optional Engine engine = 15 [ default = DEFAULT ];
// The axis to interpret as "channels" when performing convolution.
// Preceding dimensions are treated as independent inputs;
......@@ -617,14 +615,14 @@ message ConvolutionParameter {
// With (N, C, D, H, W) inputs, and axis == 1, we perform
// N independent 3D convolutions, sliding (C/g)-channels
// filters across the spatial axes (D, H, W) of the input.
optional int32 axis = 16 [default = 1];
optional int32 axis = 16 [ default = 1 ];
// Whether to force use of the general ND convolution, even if a specific
// implementation for blobs of the appropriate number of spatial dimensions
// is available. (Currently, there is only a 2D-specific convolution
// implementation; for input blobs with num_axes != 2, this option is
// ignored and the ND implementation will be used.)
optional bool force_nd_im2col = 17 [default = false];
optional bool force_nd_im2col = 17 [ default = false ];
}
message CropParameter {
......@@ -641,7 +639,7 @@ message CropParameter {
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
optional int32 axis = 1 [ default = 2 ];
repeated uint32 offset = 2;
}
......@@ -659,40 +657,40 @@ message DataParameter {
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database.
optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [default = LEVELDB];
optional uint32 rand_skip = 7 [ default = 0 ];
optional DB backend = 8 [ default = LEVELDB ];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
optional float scale = 2 [default = 1];
optional float scale = 2 [ default = 1 ];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data.
optional bool mirror = 6 [default = false];
// DEPRECATED. See TransformationParameter. Specify if we would like to
// randomly crop an image.
optional uint32 crop_size = 5 [ default = 0 ];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly
// mirror data.
optional bool mirror = 6 [ default = false ];
// Force the encoded image to have 3 color channels
optional bool force_encoded_color = 9 [default = false];
optional bool force_encoded_color = 9 [ default = false ];
// Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies).
optional uint32 prefetch = 10 [default = 5];
optional uint32 prefetch = 10 [ default = 5 ];
// Whether to shuffle the data.
optional bool shuffle = 11 [default = false];
optional bool shuffle = 11 [ default = false ];
// The number of chunks to shuffle.
optional int32 num_chunks = 12 [default = 2048];
optional int32 num_chunks = 12 [ default = 2048 ];
}
message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
optional bool scale_train = 2 [default = true]; // scale train or test phase
optional float dropout_ratio = 1 [ default = 0.5 ]; // dropout ratio
optional bool scale_train = 2 [ default = true ]; // scale train or test phase
}
// DummyDataLayer fills any number of arbitrarily shaped blobs with random
// (or constant) data generated by "Fillers" (see "message FillerParameter").
message DummyDataParameter {
// This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N
// shape fields, and 0, 1 or N data_fillers.
// This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or
// N shape fields, and 0, 1 or N data_fillers.
//
// If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
// If 1 data_filler is specified, it is applied to all top blobs. If N are
......@@ -713,12 +711,12 @@ message EltwiseParameter {
SUM = 1;
MAX = 2;
}
optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
optional EltwiseOp operation = 1 [ default = SUM ]; // element-wise operation
repeated float coeff = 2; // blob-wise coefficient for SUM operation
// Whether to use an asymptotically slower (for >2 inputs) but stabler method
// of computing the gradient for the PROD operation. (No effect for SUM op.)
optional bool stable_prod_grad = 3 [default = true];
optional bool stable_prod_grad = 3 [ default = true ];
}
// Message that stores parameters used by ELULayer
......@@ -726,7 +724,7 @@ message ELUParameter {
// Described in:
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv
optional float alpha = 1 [default = 1];
optional float alpha = 1 [ default = 1 ];
}
// Message that stores parameters used by EmbedLayer
......@@ -737,10 +735,9 @@ message EmbedParameter {
// 1 greater than the maximum possible input value.
optional uint32 input_dim = 2;
optional bool bias_term = 3 [default = true]; // Whether to use a bias term
optional FillerParameter weight_filler = 4; // The filler for the weight
optional FillerParameter bias_filler = 5; // The filler for the bias
optional bool bias_term = 3 [ default = true ]; // Whether to use a bias term
optional FillerParameter weight_filler = 4; // The filler for the weight
optional FillerParameter bias_filler = 5; // The filler for the bias
}
// Message that stores parameters used by ExpLayer
......@@ -748,21 +745,21 @@ message ExpParameter {
// ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e,
// so y = exp(shift + scale * x).
optional float base = 1 [default = -1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
optional float base = 1 [ default = -1.0 ];
optional float scale = 2 [ default = 1.0 ];
optional float shift = 3 [ default = 0.0 ];
}
/// Message that stores parameters used by FlattenLayer
message FlattenParameter {
// The first axis to flatten: all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 1 [default = 1];
optional int32 axis = 1 [ default = 1 ];
// The last axis to flatten: all following axes are retained in the output.
// May be negative to index from the end (e.g., the default -1 for the last
// axis).
optional int32 end_axis = 2 [default = -1];
optional int32 end_axis = 2 [ default = -1 ];
}
// Message that stores parameters used by HDF5DataLayer
......@@ -777,12 +774,10 @@ message HDF5DataParameter {
// and the ordering of data within any given HDF5 file is shuffled,
// but data between different files are not interleaved; all of a file's
// data are output (in a random order) before moving onto another file.
optional bool shuffle = 3 [default = false];
optional bool shuffle = 3 [ default = false ];
}
message HDF5OutputParameter {
optional string file_name = 1;
}
message HDF5OutputParameter { optional string file_name = 1; }
message HingeLossParameter {
enum Norm {
......@@ -790,38 +785,38 @@ message HingeLossParameter {
L2 = 2;
}
// Specify the Norm to use L1 or L2
optional Norm norm = 1 [default = L1];
optional Norm norm = 1 [ default = L1 ];
}
message ImageDataParameter {
// Specify the data source.
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 4 [default = 1];
optional uint32 batch_size = 4 [ default = 1 ];
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
optional uint32 rand_skip = 7 [default = 0];
optional uint32 rand_skip = 7 [ default = 0 ];
// Whether or not ImageLayer should shuffle the list of files at every epoch.
optional bool shuffle = 8 [default = false];
optional bool shuffle = 8 [ default = false ];
// It will also resize images if new_height or new_width are not zero.
optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [default = 0];
optional uint32 new_height = 9 [ default = 0 ];
optional uint32 new_width = 10 [ default = 0 ];
// Specify if the images are color or gray
optional bool is_color = 11 [default = true];
optional bool is_color = 11 [ default = true ];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
optional float scale = 2 [default = 1];
optional float scale = 2 [ default = 1 ];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data.
optional bool mirror = 6 [default = false];
optional string root_folder = 12 [default = ""];
// DEPRECATED. See TransformationParameter. Specify if we would like to
// randomly crop an image.
optional uint32 crop_size = 5 [ default = 0 ];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly
// mirror data.
optional bool mirror = 6 [ default = false ];
optional string root_folder = 12 [ default = "" ];
}
message InfogainLossParameter {
......@@ -831,19 +826,20 @@ message InfogainLossParameter {
message InnerProductParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 3; // The filler for the weight
optional FillerParameter bias_filler = 4; // The filler for the bias
optional bool bias_term = 2 [ default = true ]; // whether to have bias terms
optional FillerParameter weight_filler = 3; // The filler for the weight
optional FillerParameter bias_filler = 4; // The filler for the bias
// The first axis to be lumped into a single inner product computation;
// all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [default = 1];
optional int32 axis = 5 [ default = 1 ];
// Specify whether to transpose the weight matrix or not.
// If transpose == true, any operations will be performed on the transpose
// of the weight matrix. The weight matrix itself is not going to be transposed
// but rather the transfer flag of operations will be toggled accordingly.
optional bool transpose = 6 [default = false];
// of the weight matrix. The weight matrix itself is not going to be
// transposed but rather the transfer flag of operations will be toggled
// accordingly.
optional bool transpose = 6 [ default = false ];
}
message InputParameter {
......@@ -860,28 +856,28 @@ message LogParameter {
// LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e,
// so y = ln(shift + scale * x) = log_e(shift + scale * x)
optional float base = 1 [default = -1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
optional float base = 1 [ default = -1.0 ];
optional float scale = 2 [ default = 1.0 ];
optional float shift = 3 [ default = 0.0 ];
}
// Message that stores parameters used by LRNLayer
message LRNParameter {
optional uint32 local_size = 1 [default = 5];
optional float alpha = 2 [default = 1.];
optional float beta = 3 [default = 0.75];
optional uint32 local_size = 1 [ default = 5 ];
optional float alpha = 2 [ default = 1. ];
optional float beta = 3 [ default = 0.75 ];
enum NormRegion {
ACROSS_CHANNELS = 0;
WITHIN_CHANNEL = 1;
}
optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
optional float k = 5 [default = 1.];
optional NormRegion norm_region = 4 [ default = ACROSS_CHANNELS ];
optional float k = 5 [ default = 1. ];
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
optional Engine engine = 6 [ default = DEFAULT ];
}
message MemoryDataParameter {
......@@ -893,18 +889,16 @@ message MemoryDataParameter {
message MVNParameter {
// This parameter can be set to false to normalize mean only
optional bool normalize_variance = 1 [default = true];
optional bool normalize_variance = 1 [ default = true ];
// This parameter can be set to true to perform DNN-like MVN
optional bool across_channels = 2 [default = false];
optional bool across_channels = 2 [ default = false ];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 3 [default = 1e-9];
optional float eps = 3 [ default = 1e-9 ];
}
message ParameterParameter {
optional BlobShape shape = 1;
}
message ParameterParameter { optional BlobShape shape = 1; }
message PoolingParameter {
enum PoolMethod {
......@@ -912,45 +906,45 @@ message PoolingParameter {
AVE = 1;
STOCHASTIC = 2;
}
optional PoolMethod pool = 1 [default = MAX]; // The pooling method
optional PoolMethod pool = 1 [ default = MAX ]; // The pooling method
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [default = 0]; // The padding height
optional uint32 pad_w = 10 [default = 0]; // The padding width
optional uint32 kernel_size = 2; // The kernel size (square)
optional uint32 kernel_h = 5; // The kernel height
optional uint32 kernel_w = 6; // The kernel width
optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 7; // The stride height
optional uint32 stride_w = 8; // The stride width
optional uint32 pad = 4 [ default = 0 ]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [ default = 0 ]; // The padding height
optional uint32 pad_w = 10 [ default = 0 ]; // The padding width
optional uint32 kernel_size = 2; // The kernel size (square)
optional uint32 kernel_h = 5; // The kernel height
optional uint32 kernel_w = 6; // The kernel width
optional uint32 stride = 3 [ default = 1 ]; // The stride (equal in Y, X)
optional uint32 stride_h = 7; // The stride height
optional uint32 stride_w = 8; // The stride width
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 11 [default = DEFAULT];
optional Engine engine = 11 [ default = DEFAULT ];
// If global_pooling then it will pool over the size of the bottom by doing
// kernel_h = bottom->height and kernel_w = bottom->width
optional bool global_pooling = 12 [default = false];
optional bool global_pooling = 12 [ default = false ];
}
// Message that stores parameters used by ROIPoolingLayer
message ROIPoolingParameter {
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
optional uint32 pooled_h = 1 [ default = 0 ]; // The pooled output height
optional uint32 pooled_w = 2 [ default = 0 ]; // The pooled output width
// Multiplicative spatial scale factor to translate ROI coords from their
// input scale to the scale used when pooling
optional float spatial_scale = 3 [default = 1];
optional float spatial_scale = 3 [ default = 1 ];
}
message PowerParameter {
// PowerLayer computes outputs y = (shift + scale * x) ^ power.
optional float power = 1 [default = 1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
optional float power = 1 [ default = 1.0 ];
optional float scale = 2 [ default = 1.0 ];
optional float shift = 3 [ default = 0.0 ];
}
message PythonParameter {
......@@ -960,11 +954,11 @@ message PythonParameter {
// in Python before calling the `setup()` method. This could be a number,
// string, dictionary in Python dict format, JSON, etc. You may parse this
// string in `setup` method and use it in `forward` and `backward`.
optional string param_str = 3 [default = ''];
// Whether this PythonLayer is shared among worker solvers during data parallelism.
// If true, each worker solver sequentially run forward from this layer.
// This value should be set true if you are using it as a data layer.
optional bool share_in_parallel = 4 [default = false];
optional string param_str = 3 [ default = ''];
// Whether this PythonLayer is shared among worker solvers during data
// parallelism. If true, each worker solver sequentially run forward from this
// layer. This value should be set true if you are using it as a data layer.
optional bool share_in_parallel = 4 [ default = false ];
}
// Message that stores parameters used by ReductionLayer
......@@ -976,7 +970,7 @@ message ReductionParameter {
MEAN = 4;
}
optional ReductionOp operation = 1 [default = SUM]; // reduction operation
optional ReductionOp operation = 1 [ default = SUM ]; // reduction operation
// The first axis to reduce to a scalar -- may be negative to index from the
// end (e.g., -1 for the last axis).
......@@ -991,9 +985,9 @@ message ReductionParameter {
// If axis == 0 (the default), the output Blob always has the empty shape
// (count 1), performing reduction across the entire input --
// often useful for creating new loss functions.
optional int32 axis = 2 [default = 0];
optional int32 axis = 2 [ default = 0 ];
optional float coeff = 3 [default = 1.0]; // coefficient for output
optional float coeff = 3 [ default = 1.0 ]; // coefficient for output
}
// Message that stores parameters used by ReLULayer
......@@ -1003,13 +997,13 @@ message ReLUParameter {
// Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
// improve neural network acoustic models. In ICML Workshop on Deep Learning
// for Audio, Speech, and Language Processing.
optional float negative_slope = 1 [default = 0];
optional float negative_slope = 1 [ default = 0 ];
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 2 [default = DEFAULT];
optional Engine engine = 2 [ default = DEFAULT ];
}
message ReshapeParameter {
......@@ -1072,8 +1066,8 @@ message ReshapeParameter {
// reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
// reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
//
optional int32 axis = 2 [default = 0];
optional int32 num_axes = 3 [default = -1];
optional int32 axis = 2 [ default = 0 ];
optional int32 num_axes = 3 [ default = -1 ];
}
message ScaleParameter {
......@@ -1090,7 +1084,7 @@ message ScaleParameter {
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar multiplier.
optional int32 axis = 1 [default = 1];
optional int32 axis = 1 [ default = 1 ];
// (num_axes is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
......@@ -1098,7 +1092,7 @@ message ScaleParameter {
// The number of axes of the input (bottom[0]) covered by the scale
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
optional int32 num_axes = 2 [ default = 1 ];
// (filler is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer.)
......@@ -1109,7 +1103,7 @@ message ScaleParameter {
// Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
// may be more efficient). Initialized with bias_filler (defaults to 0).
optional bool bias_term = 4 [default = false];
optional bool bias_term = 4 [ default = false ];
optional FillerParameter bias_filler = 5;
}
......@@ -1119,18 +1113,18 @@ message SigmoidParameter {
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
optional Engine engine = 1 [ default = DEFAULT ];
}
message SliceParameter {
// The axis along which to slice -- may be negative to index from the end
// (e.g., -1 for the last axis).
// By default, SliceLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 3 [default = 1];
optional int32 axis = 3 [ default = 1 ];
repeated uint32 slice_point = 2;
// DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 slice_dim = 1 [default = 1];
optional uint32 slice_dim = 1 [ default = 1 ];
}
// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
......@@ -1140,12 +1134,12 @@ message SoftmaxParameter {
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
optional Engine engine = 1 [ default = DEFAULT ];
// The axis along which to perform the softmax -- may be negative to index
// from the end (e.g., -1 for the last axis).
// Any other axes will be evaluated as independent softmaxes.
optional int32 axis = 2 [default = 1];
optional int32 axis = 2 [ default = 1 ];
}
message TanHParameter {
......@@ -1154,13 +1148,13 @@ message TanHParameter {
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
optional Engine engine = 1 [ default = DEFAULT ];
}
// Message that stores parameters used by TileLayer
message TileParameter {
// The index of the axis to tile.
optional int32 axis = 1 [default = 1];
optional int32 axis = 1 [ default = 1 ];
// The number of copies (tiles) of the blob to output.
optional int32 tiles = 2;
......@@ -1170,7 +1164,7 @@ message TileParameter {
// Message that stores parameters used by ThresholdLayer
message ThresholdParameter {
optional float threshold = 1 [default = 0]; // Strictly positive values
optional float threshold = 1 [ default = 0 ]; // Strictly positive values
}
message WindowDataParameter {
......@@ -1179,31 +1173,31 @@ message WindowDataParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 2 [default = 1];
optional float scale = 2 [ default = 1 ];
optional string mean_file = 3;
// Specify the batch size.
optional uint32 batch_size = 4;
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 5 [default = 0];
optional uint32 crop_size = 5 [ default = 0 ];
// Specify if we want to randomly mirror data.
optional bool mirror = 6 [default = false];
optional bool mirror = 6 [ default = false ];
// Foreground (object) overlap threshold
optional float fg_threshold = 7 [default = 0.5];
optional float fg_threshold = 7 [ default = 0.5 ];
// Background (non-object) overlap threshold
optional float bg_threshold = 8 [default = 0.5];
optional float bg_threshold = 8 [ default = 0.5 ];
// Fraction of batch that should be foreground objects
optional float fg_fraction = 9 [default = 0.25];
optional float fg_fraction = 9 [ default = 0.25 ];
// Amount of contextual padding to add around a window
// (used only by the window_data_layer)
optional uint32 context_pad = 10 [default = 0];
optional uint32 context_pad = 10 [ default = 0 ];
// Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped
optional string crop_mode = 11 [default = "warp"];
optional string crop_mode = 11 [ default = "warp" ];
// cache_images: will load all images in memory for faster access
optional bool cache_images = 12 [default = false];
optional bool cache_images = 12 [ default = false ];
// append root_folder to locate images
optional string root_folder = 13 [default = ""];
optional string root_folder = 13 [ default = "" ];
}
message SPPParameter {
......@@ -1213,13 +1207,13 @@ message SPPParameter {
STOCHASTIC = 2;
}
optional uint32 pyramid_height = 1;
optional PoolMethod pool = 2 [default = MAX]; // The pooling method
optional PoolMethod pool = 2 [ default = MAX ]; // The pooling method
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
optional Engine engine = 6 [ default = DEFAULT ];
}
// DEPRECATED: use LayerParameter.
......@@ -1323,40 +1317,40 @@ message V0LayerParameter {
// Parameters to specify layers with inner products.
optional uint32 num_output = 3; // The number of outputs for the layer
optional bool biasterm = 4 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 5; // The filler for the weight
optional FillerParameter bias_filler = 6; // The filler for the bias
optional uint32 pad = 7 [default = 0]; // The padding size
optional uint32 kernelsize = 8; // The kernel size
optional uint32 group = 9 [default = 1]; // The group size for group conv
optional uint32 stride = 10 [default = 1]; // The stride
optional bool biasterm = 4 [ default = true ]; // whether to have bias terms
optional FillerParameter weight_filler = 5; // The filler for the weight
optional FillerParameter bias_filler = 6; // The filler for the bias
optional uint32 pad = 7 [ default = 0 ]; // The padding size
optional uint32 kernelsize = 8; // The kernel size
optional uint32 group = 9 [ default = 1 ]; // The group size for group conv
optional uint32 stride = 10 [ default = 1 ]; // The stride
enum PoolMethod {
MAX = 0;
AVE = 1;
STOCHASTIC = 2;
}
optional PoolMethod pool = 11 [default = MAX]; // The pooling method
optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
optional PoolMethod pool = 11 [ default = MAX ]; // The pooling method
optional float dropout_ratio = 12 [ default = 0.5 ]; // dropout ratio
optional uint32 local_size = 13 [default = 5]; // for local response norm
optional float alpha = 14 [default = 1.]; // for local response norm
optional float beta = 15 [default = 0.75]; // for local response norm
optional float k = 22 [default = 1.];
optional uint32 local_size = 13 [ default = 5 ]; // for local response norm
optional float alpha = 14 [ default = 1. ]; // for local response norm
optional float beta = 15 [ default = 0.75 ]; // for local response norm
optional float k = 22 [ default = 1. ];
// For data layers, specify the data source
optional string source = 16;
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 17 [default = 1];
optional float scale = 17 [ default = 1 ];
optional string meanfile = 18;
// For data layers, specify the batch size.
optional uint32 batchsize = 19;
// For data layers, specify if we would like to randomly crop an image.
optional uint32 cropsize = 20 [default = 0];
optional uint32 cropsize = 20 [ default = 0 ];
// For data layers, specify if we want to randomly mirror data.
optional bool mirror = 21 [default = false];
optional bool mirror = 21 [ default = false ];
// The blobs containing the numeric parameters of the layer
repeated BlobProto blobs = 50;
......@@ -1370,41 +1364,41 @@ message V0LayerParameter {
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
optional uint32 rand_skip = 53 [default = 0];
optional uint32 rand_skip = 53 [ default = 0 ];
// Fields related to detection (det_*)
// foreground (object) overlap threshold
optional float det_fg_threshold = 54 [default = 0.5];
optional float det_fg_threshold = 54 [ default = 0.5 ];
// background (non-object) overlap threshold
optional float det_bg_threshold = 55 [default = 0.5];
optional float det_bg_threshold = 55 [ default = 0.5 ];
// Fraction of batch that should be foreground objects
optional float det_fg_fraction = 56 [default = 0.25];
optional float det_fg_fraction = 56 [ default = 0.25 ];
// optional bool OBSOLETE_can_clobber = 57 [default = true];
// Amount of contextual padding to add around a window
// (used only by the window_data_layer)
optional uint32 det_context_pad = 58 [default = 0];
optional uint32 det_context_pad = 58 [ default = 0 ];
// Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped
optional string det_crop_mode = 59 [default = "warp"];
optional string det_crop_mode = 59 [ default = "warp" ];
// For ReshapeLayer, one needs to specify the new dimensions.
optional int32 new_num = 60 [default = 0];
optional int32 new_channels = 61 [default = 0];
optional int32 new_height = 62 [default = 0];
optional int32 new_width = 63 [default = 0];
optional int32 new_num = 60 [ default = 0 ];
optional int32 new_channels = 61 [ default = 0 ];
optional int32 new_height = 62 [ default = 0 ];
optional int32 new_width = 63 [ default = 0 ];
// Whether or not ImageLayer should shuffle the list of files at every epoch.
// It will also resize images if new_height or new_width are not zero.
optional bool shuffle_images = 64 [default = false];
optional bool shuffle_images = 64 [ default = false ];
// For ConcatLayer, one needs to specify the dimension for concatenation, and
// the other dimensions must be the same for all the bottom blobs.
// By default it will concatenate blobs along the channels dimension.
optional uint32 concat_dim = 65 [default = 1];
optional uint32 concat_dim = 65 [ default = 1 ];
optional HDF5OutputParameter hdf5_output_param = 1001;
}
......@@ -1416,14 +1410,14 @@ message PReLUParameter {
// Initial value of a_i. Default is a_i=0.25 for all i.
optional FillerParameter filler = 1;
// Whether or not slope paramters are shared across channels.
optional bool channel_shared = 2 [default = false];
optional bool channel_shared = 2 [ default = false ];
}
message SmoothL1LossParameter {
// SmoothL1Loss(x) =
// 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma
// |x| - 0.5 / sigma / sigma -- otherwise
optional float sigma = 1 [default = 1];
optional float sigma = 1 [ default = 1 ];
}
message PermuteParameter {
......@@ -1434,20 +1428,18 @@ message PermuteParameter {
}
message NormalizeParameter {
optional bool across_spatial = 1 [default = true];
optional bool across_spatial = 1 [ default = true ];
// Initial value of scale. Default is 1.0 for all
optional FillerParameter scale_filler = 2;
// Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true];
optional bool channel_shared = 3 [ default = true ];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-5];
optional float eps = 4 [ default = 1e-12 ];
}
message GroupNormParameter {
optional float eps = 1 [default = 1e-5];
optional int32 group = 2 [default = 32];
optional float eps = 1 [ default = 1e-5 ];
optional int32 group = 2 [ default = 32 ];
}
message CastParameter {
optional string dtype = 1;
}
message CastParameter { optional string dtype = 1; }
......@@ -9,7 +9,7 @@
#
# ------------------------------------------------------------
"""Implementation for the ``Solver`` C++ class."""
"""The solver to update parameters."""
from __future__ import absolute_import
from __future__ import division
......@@ -19,9 +19,12 @@ import time
from google.protobuf import text_format
from dragon import updaters
from dragon.core.autograph import def_function
from dragon.core.framework import workspace
from dragon.core.training.adam import Adam
from dragon.core.training.rmsprop import RMSprop
from dragon.core.training.sgd import SGD
from dragon.core.training.sgd import Nesterov
from dragon.vm.caffe.net import Net
from dragon.vm.caffe.proto import caffe_pb2
......@@ -47,10 +50,10 @@ class Solver(object):
if self._param.iter_size > 1:
raise NotImplementedError('GradientAccum is deprecated.')
self._arguments = {
'scale_gradient': 1. / self._param.iter_size,
'clip_gradient': float(self._param.clip_gradients),
'l2_decay': float(self._param.weight_decay)
if str(self._param.regularization_type) == 'L2' else -1.,
'scale': 1. / self._param.iter_size,
'clip_norm': float(self._param.clip_gradients),
'weight_decay': float(self._param.weight_decay)
if str(self._param.regularization_type) == 'L2' else 0,
}
self._optimizer = None
self._net, self._test_nets = None, []
......@@ -415,7 +418,7 @@ class AdamSolver(Solver):
self._arguments['beta1'] = self._param.momentum
self._arguments['beta2'] = self._param.momentum2
self._arguments['eps'] = self._param.delta
self._optimizer = updaters.Adam(**self._arguments)
self._optimizer = Adam(**self._arguments)
class NesterovSolver(Solver):
......@@ -447,7 +450,7 @@ class NesterovSolver(Solver):
super(NesterovSolver, self).__init__(solver_file, is_root)
self._arguments['base_lr'] = self._param.base_lr
self._arguments['momentum'] = self._param.momentum
self._optimizer = updaters.Nesterov(**self._arguments)
self._optimizer = Nesterov(**self._arguments)
class RMSPropSolver(Solver):
......@@ -481,7 +484,7 @@ class RMSPropSolver(Solver):
self._arguments['base_lr'] = self._param.base_lr
self._arguments['decay'] = self._param.rms_decay
self._arguments['eps'] = self._param.delta
self._optimizer = updaters.RMSProp(**self._arguments)
self._optimizer = RMSprop(**self._arguments)
class SGDSolver(Solver):
......@@ -513,4 +516,4 @@ class SGDSolver(Solver):
super(SGDSolver, self).__init__(solver_file, is_root)
self._arguments['base_lr'] = self._param.base_lr
self._arguments['momentum'] = self._param.momentum
self._optimizer = updaters.SGD(**self._arguments)
self._optimizer = SGD(**self._arguments)
......@@ -9,9 +9,6 @@ dragon.math
`abs(...) <math/abs.html>`_
: Compute the absolute value of input.
`accumulate(...) <math/accumulate.html>`_
: Compute the element-wise accumulation from input to output.
`add(...) <math/add.html>`_
: Compute the element-wise addition.
......@@ -24,6 +21,9 @@ dragon.math
`argmin(...) <math/argmin.html>`_
: Compute the indices of minimum elements along the given axis.
`axpby(...) <math/axpby.html>`_
: Compute the element-wise addition from input to output.
`ceil(...) <math/ceil.html>`_
: Compute the smallest integer not less than input.
......@@ -96,9 +96,6 @@ dragon.math
`moments(...) <math/moments.html>`_
: Compute the mean and variance of input along the given axes.
`moving_average(...) <math/moving_average.html>`_
: Compute the moving average of input to output.
`mul(...) <math/mul.html>`_
: Compute the element-wise multiplication.
......@@ -148,11 +145,11 @@ dragon.math
:hidden:
math/abs
math/accumulate
math/add
math/affine
math/argmax
math/argmin
math/axpby
math/ceil
math/clip
math/cos
......@@ -177,7 +174,6 @@ dragon.math
math/min
math/minimum
math/moments
math/moving_average
math/mul
math/negative
math/not_equal
......
accumulate
==========
axpby
=====
.. autofunction:: dragon.math.accumulate
.. autofunction:: dragon.math.axpby
.. raw:: html
......
moving_average
==============
.. autofunction:: dragon.math.moving_average
.. raw:: html
<style>
h1:before {
content: "dragon.math.";
color: #103d3e;
}
</style>
dragon.updaters
===============
dragon.optimizers
=================
.. only:: html
Classes
-------
`class Adam <updaters/Adam.html>`_
: The updater which implements Adam algorithm.
`class Adam <optimizers/Adam.html>`_
: The optimizer to apply Adam algorithm.
`[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_.
`class Nesterov <updaters/Nesterov.html>`_
: The updater which implements NesterovSGD algorithm.
`class Nesterov <optimizers/Nesterov.html>`_
: The optimizer to apply NesterovSGD algorithm.
`[Sutskever et.al, 2013] <http://www.cs.toronto.edu/~hinton/absps/momentum.pdf>`_.
`class RMSProp <updaters/RMSProp.html>`_
: The updater which implements RMSprop algorithm.
`class RMSProp <optimizers/RMSprop.html>`_
: The optimizer to apply RMSprop algorithm.
`[Hinton et.al, 2013] <http://www.cs.utoronto.ca/~bonner/courses/2016s/csc321/lectures/lec6.pdf>`_.
`class SGD <updaters/SGD.html>`_
: The updater which implements MomentumSGD algorithm.
`class SGD <optimizers/SGD.html>`_
: The optimizer to apply MomentumSGD algorithm.
`[Polyak, 1964] <https://doi.org/10.1016/0041-5553(64)90137-5>`_.
.. toctree::
:hidden:
updaters/Adam
updaters/Nesterov
updaters/RMSProp
updaters/SGD
optimizers/Adam
optimizers/Nesterov
optimizers/Optimizer
optimizers/RMSprop
optimizers/SGD
.. raw:: html
......
Adam
====
.. autoclass:: dragon.updaters.Adam
.. autoclass:: dragon.optimizers.Adam
__init__
--------
.. automethod:: dragon.updaters.Adam.__init__
.. automethod:: dragon.optimizers.Adam.__init__
Methods
-------
apply_gradients
################
.. automethod:: dragon.updaters.Updater.apply_gradients
.. automethod:: dragon.optimizers.Optimizer.apply_gradients
:noindex:
.. raw:: html
<style>
h1:before {
content: "dragon.updaters.";
content: "dragon.optimizers.";
color: #103d3e;
}
</style>
Nesterov
========
.. autoclass:: dragon.updaters.Nesterov
.. autoclass:: dragon.optimizers.Nesterov
__init__
--------
.. automethod:: dragon.updaters.Nesterov.__init__
.. automethod:: dragon.optimizers.Nesterov.__init__
Methods
-------
apply_gradients
################
.. automethod:: dragon.updaters.Updater.apply_gradients
.. automethod:: dragon.optimizers.Optimizer.apply_gradients
:noindex:
.. raw:: html
<style>
h1:before {
content: "dragon.updaters.";
content: "dragon.optimizers.";
color: #103d3e;
}
</style>
Optimizer
=========
.. autoclass:: dragon.optimizers.Optimizer
__init__
--------
.. automethod:: dragon.optimizers.Optimizer.__init__
Methods
-------
apply_gradients
################
.. automethod:: dragon.optimizers.Optimizer.apply_gradients
.. raw:: html
<style>
h1:before {
content: "dragon.optimizers.";
color: #103d3e;
}
</style>
RMSProp
RMSprop
=======
.. autoclass:: dragon.updaters.RMSProp
.. autoclass:: dragon.optimizers.RMSprop
__init__
--------
.. automethod:: dragon.updaters.RMSProp.__init__
.. automethod:: dragon.optimizers.RMSprop.__init__
Methods
-------
apply_gradients
################
.. automethod:: dragon.updaters.Updater.apply_gradients
.. automethod:: dragon.optimizers.Optimizer.apply_gradients
:noindex:
.. raw:: html
<style>
h1:before {
content: "dragon.updaters.";
content: "dragon.optimizers.";
color: #103d3e;
}
</style>
SGD
===
.. autoclass:: dragon.updaters.SGD
.. autoclass:: dragon.optimizers.SGD
__init__
--------
.. automethod:: dragon.updaters.SGD.__init__
.. automethod:: dragon.optimizers.SGD.__init__
Methods
-------
apply_gradients
################
.. automethod:: dragon.updaters.Updater.apply_gradients
.. automethod:: dragon.optimizers.Optimizer.apply_gradients
:noindex:
.. raw:: html
<style>
h1:before {
content: "dragon.updaters.";
content: "dragon.optimizers.";
color: #103d3e;
}
</style>
......@@ -14,15 +14,15 @@ For using it, import as follows:
However, it will not help you much because you do not want to learn it.
We have extended it with following programming styles:
To resolve this matter, we are concerned to design diverse styles for you:
Dragon
######
*Dragon* takes a very light-weight programming style.
*Dragon* is initially as a light-weight but professional style.
Our goal is to reduce unnecessary structures or interfaces. Therefore,
in addition to feed or fetch, the last thing is designing a function.
Native interfaces are encouraged to manipulate the backend engine
to perform the computation flexibly with data feeding or fetching.
This style involves the following components:
......@@ -38,15 +38,15 @@ Dragon
* `dragon.math <dragon/math.html>`_
* `dragon.metrics <dragon/metrics.html>`_
* `dragon.nn <dragon/nn.html>`_
* `dragon.optimizers <dragon/optimizers.html>`_
* `dragon.random <dragon/random.html>`_
* `dragon.updaters <dragon/updaters.html>`_
* `dragon.vision <dragon/vision.html>`_
* `dragon.workspace <dragon/workspace.html>`_
* `dragon.vision <dragon/vision.html>`_
Caffe
#####
*Caffe* is one of the most famous deep learning framework for Computer Vision.
*Caffe* is the most famous framework for vision.
Our work is very different from the official python wrappers, a.k.a,
the *PyCaffe*, which comes from the exports of *BoostPython*
......@@ -102,7 +102,7 @@ PyTorch
*PyTorch* provides straight-forward operations on research prototyping.
To bridge it, our *JIT* traces and dispatches the expressions,
To bridge it, our *JIT* traces and dispatches the operations,
as well as the rewriting of *GC* (Garbage Collection) to reuse
the memories and operators by turns.
......@@ -168,52 +168,52 @@ Modules
.. only:: html
`Module autograph <dragon/autograph.html>`_
: Public API for ``dragon.autograph`` namespace.
: Native API for ``dragon.autograph`` namespace.
`Module bitwise <dragon/bitwise.html>`_
: Public API for ``dragon.bitwise`` namespace.
: Native API for ``dragon.bitwise`` namespace.
`Module cuda <dragon/cuda.html>`_
: Public API for ``dragon.cuda`` namespace.
: Native API for ``dragon.cuda`` namespace.
`Module distributed <dragon/distributed.html>`_
: Public API for ``dragon.distributed`` namespace.
: Native API for ``dragon.distributed`` namespace.
`Module dlpack <dragon/dlpack.html>`_
: Public API for ``dragon.dlpack`` namespace.
: Native API for ``dragon.dlpack`` namespace.
`Module io <dragon/io.html>`_
: Public API for ``dragon.io`` namespace.
: Native API for ``dragon.io`` namespace.
`Module logging <dragon/logging.html>`_
: Public API for ``dragon.logging`` namespace.
: Native API for ``dragon.logging`` namespace.
`Module losses <dragon/losses.html>`_
: Public API for ``dragon.losses`` namespace.
: Native API for ``dragon.losses`` namespace.
`Module math <dragon/math.html>`_
: Public API for ``dragon.math`` namespace.
: Native API for ``dragon.math`` namespace.
`Module metrics <dragon/metrics.html>`_
: Public API for ``dragon.metrics`` namespace.
: Native API for ``dragon.metrics`` namespace.
`Module nn <dragon/nn.html>`_
: Public API for ``dragon.nn`` namespace.
: Native API for ``dragon.nn`` namespace.
`Module optimizers <dragon/optimizers.html>`_
: Native API for ``dragon.optimizers`` namespace.
`Module random <dragon/random.html>`_
: Public API for ``dragon.random`` namespace.
: Native API for ``dragon.random`` namespace.
`Module updaters <dragon/updaters.html>`_
: Public API for ``dragon.updaters`` namespace.
`Module workspace <dragon/workspace.html>`_
: Native API for ``dragon.workspace`` namespace.
`Module vision <dragon/vision.html>`_
: Public API for ``dragon.vision`` namespace.
: Native API for ``dragon.vision`` namespace.
`Module workspace <dragon/workspace.html>`_
: Public API for ``dragon.workspace`` namespace.
`Module workspace <dragon/workspace.html>`_
: Public API for ``dragon.workspace`` namespace.
: Native API for ``dragon.workspace`` namespace.
`Module vm.caffe <caffe.html>`_
: Virtual API for ``caffe`` namespace.
......@@ -317,10 +317,10 @@ Modules
dragon/math
dragon/metrics
dragon/nn
dragon/optimizers
dragon/random
dragon/updaters
dragon/vision
dragon/workspace
dragon/vision
caffe
caffe/layers
dali
......
......@@ -30,9 +30,6 @@ vm.torch
`abs(...) <torch/abs.html>`_
: Compute the absolute value of input.
`accumulate(...) <torch/accumulate.html>`_
: Compute the element-wise accumulation from input to output.
`add(...) <torch/add.html>`_
: Compute the element-wise addition.
......@@ -45,6 +42,9 @@ vm.torch
`argmin(...) <torch/argmin.html>`_
: Return the indices of minimum elements along the given axis.
`axpby(...) <torch/axpby.html>`_
: Compute the element-wise addition from input to output.
`bitwise_not(...) <torch/bitwise_not.html>`_
: Compute the element-wise NOT bitwise operation.
......@@ -254,11 +254,11 @@ vm.torch
:hidden:
torch/abs
torch/accumulate
torch/add
torch/arange
torch/argmax
torch/argmin
torch/axpby
torch/bitwise_not
torch/bitwise_xor
torch/cat
......
accumulate
==========
axpby
=====
.. autofunction:: dragon.vm.torch.accumulate
.. autofunction:: dragon.vm.torch.axpby
.. raw:: html
......
......@@ -50,18 +50,18 @@ class CUDAObject {
*/
if (stream) cudaStreamDestroy(stream);
}
for (auto& e : cublas_handles_[i])
if (e) {
CUBLAS_CHECK(cublasDestroy_v2(e));
for (auto& handle : cublas_handles_[i])
if (handle) {
CUBLAS_CHECK(cublasDestroy(handle));
}
#ifdef USE_CUDNN
for (auto& e : cudnn_handles_[i])
if (e) {
CUDNN_CHECK(cudnnDestroy(e));
for (auto& handle : cudnn_handles_[i])
if (handle) {
CUDNN_CHECK(cudnnDestroy(handle));
}
#endif
#ifdef USE_NCCL
for (auto& e : nccl_comms_[i]) {
for (auto& comm : nccl_comms_[i]) {
/*!
* Temporarily disable the comm destroying,
* to avoid an unhandled error.
......@@ -74,17 +74,18 @@ class CUDAObject {
/*! \brief Return the specified cublas handle */
cublasHandle_t cublas_handle(int device_id, int stream_id) {
auto& handles = cublas_handles_[device_id];
if (handles.size() <= (unsigned)stream_id)
if (handles.size() <= (unsigned)stream_id) {
handles.resize(stream_id + 1, nullptr);
}
if (!handles[stream_id]) {
CUDADeviceGuard guard(device_id);
CUBLAS_CHECK(cublasCreate_v2(&handles[stream_id]));
CUBLAS_CHECK(
cublasSetStream_v2(handles[stream_id], stream(device_id, stream_id)));
CUBLAS_CHECK(cublasCreate(&handles[stream_id]));
auto& handle = handles[stream_id];
CUBLAS_CHECK(cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasSetStream(handle, stream(device_id, stream_id)));
#if CUDA_VERSION >= 9000
if (TENSOR_CORE_AVAILABLE()) {
CUBLAS_CHECK(
cublasSetMathMode(handles[stream_id], CUBLAS_TENSOR_OP_MATH));
CUBLAS_CHECK(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH));
}
#endif
}
......@@ -95,13 +96,14 @@ class CUDAObject {
#ifdef USE_CUDNN
cudnnHandle_t cudnn_handle(int device_id, int stream_id) {
auto& handles = cudnn_handles_[device_id];
if (handles.size() <= (unsigned)stream_id)
if (handles.size() <= (unsigned)stream_id) {
handles.resize(stream_id + 1, nullptr);
}
if (!handles[stream_id]) {
CUDADeviceGuard guard(device_id);
CUDNN_CHECK(cudnnCreate(&handles[stream_id]));
CUDNN_CHECK(
cudnnSetStream(handles[stream_id], stream(device_id, stream_id)));
auto& handle = handles[stream_id];
CUDNN_CHECK(cudnnSetStream(handle, stream(device_id, stream_id)));
}
return handles[stream_id];
}
......@@ -144,7 +146,7 @@ class CUDAObject {
if (!streams[stream_id]) {
CUDADeviceGuard guard(device_id);
unsigned int flags =
!stream_id ? cudaStreamDefault : cudaStreamNonBlocking;
stream_id == 0 ? cudaStreamDefault : cudaStreamNonBlocking;
CUDA_CHECK(cudaStreamCreateWithFlags(&streams[stream_id], flags));
}
return streams[stream_id];
......
......@@ -80,7 +80,7 @@ Tensor* OperatorBase::Output(int i, const vec32_t& inputs) {
}
Tensor* OperatorBase::Buffer(const string& name) {
return ws()->CreateTensor(unique_name(name));
return ws()->CreateTensor("/share/buffer/" + handle_ + "/" + name);
}
string OperatorBase::TypeString(const Tensor& tensor, const Set<string>& types)
......
......@@ -133,11 +133,6 @@ class DRAGON_API OperatorBase {
return handle_;
}
/*! \brief Return the unique name in this operator */
const string unique_name(const string& name) const {
return "/mnt/" + handle_ + "/" + name;
}
/*! \brief Return the stored def */
const OperatorDef& def() const {
return def_;
......@@ -268,7 +263,6 @@ OperatorBase* NewOperator(const OperatorDef&, Workspace*);
using OperatorBase::dtype; \
using OperatorBase::data_format; \
using OperatorBase::handle; \
using OperatorBase::unique_name; \
using OperatorBase::def; \
using OperatorBase::ws
......@@ -277,17 +271,18 @@ OperatorBase* NewOperator(const OperatorDef&, Workspace*);
using Operator<Context>::allow_run; \
using Operator<Context>::ctx
#define STORE_INPUT_SPEC(i) \
*(ws()->CreateTensor(unique_name("Input[" + std::to_string(i) + "]")) \
->ReshapeLike(Input(i)) \
#define STORE_INPUT_SPEC(i) \
*(Buffer("X_spec:" + std::to_string(i)) \
->ReshapeLike(Input(i)) \
->set_meta(Input(i).meta()))
#define RESTORE_INPUT_SPEC(i) \
*(ws()->GetTensor(unique_name("Input[" + std::to_string(i) + "]")))
*(ws()->GetTensor( \
"/share/buffer/" + handle() + "/X_spec:" + std::to_string(i)))
/* Dispatchers */
#define XIsType(x, type) x.template IsType<type>()
#define XIsType(X, type) X.template IsType<type>()
template <typename... Types>
struct TensorTypes {};
......
......@@ -53,14 +53,11 @@ __global__ void _EluGrad(
const T* y,
T* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
dx[i] = dy[i] *
(
#if __CUDA_ARCH__ >= 350
__ldg(y + i) > T(0) ? T(1) : alpha + __ldg(y + i)
dx[i] = dy[i] * (__ldg(y + i) > T(0) ? T(1) : alpha + __ldg(y + i));
#else
y[i] > T(0) ? T(1) : (alpha + y[i])
dx[i] = dy[i] * (y[i] > T(0) ? T(1) : (alpha + y[i]));
#endif
);
}
}
......
......@@ -14,28 +14,28 @@ void _Softmax(
const int inner_dim,
const T* x,
T* y) {
int row_ofs, col_ofs, yi;
int row_offset, col_offset, yi;
auto x_stride = axis_dim * inner_dim;
for (int i = 0; i < outer_dim; ++i) {
row_ofs = i * axis_dim * inner_dim;
row_offset = i * axis_dim * inner_dim;
for (int j = 0; j < inner_dim; ++j) {
col_ofs = row_ofs + j;
T val = x[col_ofs];
col_offset = row_offset + j;
T val = x[col_offset];
for (int k = 1; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
val = std::max(val, x[yi]);
}
for (int k = 0; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
y[yi] = std::exp(x[yi] - val);
}
val = y[col_ofs];
val = y[col_offset];
for (int k = 1; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
val += y[yi];
}
for (int k = 0; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
y[yi] /= val;
}
}
......@@ -60,19 +60,19 @@ void _SoftmaxGrad(
const T* dy,
const T* y,
T* dx) {
int row_ofs, col_ofs, yi;
int row_offset, col_offset, yi;
auto x_stride = axis_dim * inner_dim;
for (int i = 0; i < outer_dim; ++i) {
row_ofs = i * axis_dim * inner_dim;
row_offset = i * axis_dim * inner_dim;
for (int j = 0; j < inner_dim; ++j) {
col_ofs = row_ofs + j;
T val = dy[col_ofs] * y[col_ofs];
col_offset = row_offset + j;
T val = dy[col_offset] * y[col_offset];
for (int k = 1; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
val += dy[yi] * y[yi];
}
for (int k = 0; k < axis_dim; ++k) {
yi = col_ofs + k * inner_dim;
yi = col_offset + k * inner_dim;
dx[yi] = (dy[yi] - val) * y[yi];
}
}
......
......@@ -53,11 +53,11 @@ void _CumSumReverse(
CPUContext* ctx) {
const int kStart = axis_dim - 1;
for (int n = 0; n < outer_dim; ++n) {
const int n_ofs = n * axis_dim;
const int n_offset = n * axis_dim;
for (int m = kStart; m >= 0; --m) {
const int nm_ofs = (n_ofs + m) * inner_dim;
const int nm_offset = (n_offset + m) * inner_dim;
for (int k = 0; k < inner_dim; ++k) {
const int i = nm_ofs + k;
const int i = nm_offset + k;
if (m < kStart) {
const int j = i + inner_dim;
y[i] = y[j] + x[exclusive ? j : i];
......
......@@ -25,9 +25,9 @@ void _SetEye(const int n, const int m, const int k, T* y) {
const int n, const int m, const int k, T* y, CPUContext* ctx) { \
math::Set(n* m, cast::to<T>(0.f), y, ctx); \
if (k > 0) { \
_SetEye(n - k, m, k, y); \
if (m - k > 0) _SetEye(m - k, m, k, y); \
} else { \
_SetEye(n + k, m, 0, y - k * m); \
if (n + k > 0) _SetEye(n + k, m, 0, y - k * m); \
} \
}
......
......@@ -20,9 +20,9 @@ __global__ void _SetEye(const int n, const int m, const int k, T* y) {
template <>
__global__ void _SetEye<half>(const int n, const int m, const int k, half* y) {
const half kZero = __float2half(1.f);
const half kOne = __float2half(1.f);
CUDA_1D_KERNEL_LOOP(i, n) {
y[i * m + k + i] = kZero;
y[i * m + k + i] = kOne;
}
}
......@@ -39,26 +39,34 @@ void Eye<float16, CUDAContext>(
CUDAContext* ctx) {
math::Set(n * m, cast::to<float16>(0.f), y, ctx);
if (k > 0) {
_SetEye<<<CUDA_BLOCKS(n - k), CUDA_THREADS, 0, ctx->cuda_stream()>>>(
n - k, m, k, reinterpret_cast<half*>(y));
if (m - k > 0) {
_SetEye<<<CUDA_BLOCKS(m - k), CUDA_THREADS, 0, ctx->cuda_stream()>>>(
m - k, m, k, reinterpret_cast<half*>(y));
}
} else {
_SetEye<<<CUDA_BLOCKS(n + k), CUDA_THREADS, 0, ctx->cuda_stream()>>>(
n + k, m, 0, reinterpret_cast<half*>(y - k * m));
if (n + k > 0) {
_SetEye<<<CUDA_BLOCKS(n + k), CUDA_THREADS, 0, ctx->cuda_stream()>>>(
n + k, m, 0, reinterpret_cast<half*>(y - k * m));
}
}
}
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void Eye<T, CUDAContext>( \
const int n, const int m, const int k, T* y, CUDAContext* ctx) { \
math::Set(n* m, T(0), y, ctx); \
if (k > 0) { \
_SetEye<<<CUDA_BLOCKS(n - k), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
n - k, m, k, y); \
} else { \
_SetEye<<<CUDA_BLOCKS(n + k), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
n + k, m, 0, y - k * m); \
} \
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void Eye<T, CUDAContext>( \
const int n, const int m, const int k, T* y, CUDAContext* ctx) { \
math::Set(n* m, T(0), y, ctx); \
if (k > 0) { \
if (m - k > 0) { \
_SetEye<<<CUDA_BLOCKS(m - k), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
m - k, m, k, y); \
} \
} else { \
if (n + k > 0) { \
_SetEye<<<CUDA_BLOCKS(n + k), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
n + k, m, 0, y - k * m); \
} \
} \
}
DEFINE_KERNEL_LAUNCHER(bool);
......
......@@ -35,21 +35,22 @@ void _BroadcastLossGrad<float16>(
} // namespace
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void ReduceLoss<T, CPUContext>( \
const int count, \
const int num_masks, \
const float normalizer, \
const T* x, \
const int* mask, \
T* y, \
CPUContext* ctx) { \
float inv_scale = std::max( \
1e-5F, \
num_masks > 0 ? (float)math::Sum(num_masks, 1.f, mask, ctx) \
: normalizer); \
y[0] = math::Sum(count, 1.f / inv_scale, x, ctx); \
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void ReduceLoss<T, CPUContext>( \
const int count, \
const int num_masks, \
const float normalizer, \
const T* x, \
const int* mask, \
T* y, \
CPUContext* ctx) { \
float inv_scale = std::max( \
1e-5F, \
num_masks > 0 && normalizer < 0.f \
? (float)math::Sum(num_masks, 1.f, mask, ctx) \
: normalizer); \
y[0] = math::Sum(count, 1.f / inv_scale, x, ctx); \
}
#define DEFINE_GRAD_KERNEL_LAUNCHER(T) \
......@@ -64,8 +65,9 @@ void _BroadcastLossGrad<float16>(
CPUContext* ctx) { \
float inv_scale = std::max( \
1e-5F, \
num_masks > 0 ? (float)math::Sum(num_masks, 1.f, mask, ctx) \
: normalizer); \
num_masks > 0 && normalizer < 0.f \
? (float)math::Sum(num_masks, 1.f, mask, ctx) \
: normalizer); \
math::Scale(count, cast::to<float>(dy[0]) / inv_scale, dx, dx, ctx); \
} \
template <> \
......
......@@ -152,15 +152,15 @@ __global__ void _ReduceLossGradWithMask<half>(
template <typename T>
__global__ void _BroadcastLossGrad(
const int nthreads,
const int rows,
const int cols,
const int dim1,
const int dim2,
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350
dx[i] *= __ldg(dy + (i / rows) * cols + (i % cols));
dx[i] *= __ldg(dy + (i / dim1) * dim2 + (i % dim2));
#else
dx[i] *= dy[(i / rows) * cols + (i % cols)];
dx[i] *= dy[(i / dim1) * dim2 + (i % dim2)];
#endif
}
}
......@@ -168,18 +168,18 @@ __global__ void _BroadcastLossGrad(
template <>
__global__ void _BroadcastLossGrad<half>(
const int nthreads,
const int rows,
const int cols,
const int dim1,
const int dim2,
const half* dy,
half* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350
dx[i] = __float2half(
__half2float(dx[i]) *
__half2float(__ldg(dy + (i / rows) * cols + (i % cols))));
__half2float(__ldg(dy + (i / dim1) * dim2 + (i % dim2))));
#else
dx[i] = __float2half(
__half2float(dx[i]) * __half2float(dy[(i / rows) * cols + (i % cols)]));
__half2float(dx[i]) * __half2float(dy[(i / dim1) * dim2 + (i % dim2)]));
#endif
}
}
......@@ -197,7 +197,7 @@ void ReduceLoss<float16, CUDAContext>(
const int* mask,
float16* y,
CUDAContext* ctx) {
if (num_masks > 0) {
if (num_masks > 0 && normalizer < 0.f) {
_ReduceLossWithMask<<<1, CUDA_THREADS, 0, ctx->cuda_stream()>>>(
num_masks,
reinterpret_cast<const half*>(x),
......@@ -221,7 +221,7 @@ void ReduceLossGrad<float16, CUDAContext>(
const int* mask,
float16* dx,
CUDAContext* ctx) {
if (num_masks > 0) {
if (num_masks > 0 && normalizer < 0.f) {
_ReduceMask<<<1, CUDA_THREADS, 0, ctx->cuda_stream()>>>(
num_masks, const_cast<int*>(mask));
_ReduceLossGradWithMask<<<
......@@ -254,16 +254,15 @@ void BroadcastLossGrad<float16, CUDAContext>(
const float16* dy,
float16* dx,
CUDAContext* ctx) {
auto rows = outer_dim * axis_dim, cols = inner_dim;
auto nthreads = rows * cols;
auto nthreads = outer_dim * axis_dim * inner_dim;
_BroadcastLossGrad<<<
CUDA_BLOCKS(nthreads),
CUDA_THREADS,
0,
ctx->cuda_stream()>>>(
nthreads,
rows,
cols,
axis_dim * inner_dim,
inner_dim,
reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx));
} // BroadcastLossGrad
......@@ -278,7 +277,7 @@ void BroadcastLossGrad<float16, CUDAContext>(
const int* mask, \
T* y, \
CUDAContext* ctx) { \
if (num_masks > 0) { \
if (num_masks > 0 && normalizer < 0.f) { \
_ReduceLossWithMask<<<1, CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
num_masks, x, mask, y); \
} else { \
......@@ -297,7 +296,7 @@ void BroadcastLossGrad<float16, CUDAContext>(
const int* mask, \
T* dx, \
CUDAContext* ctx) { \
if (num_masks > 0) { \
if (num_masks > 0 && normalizer < 0.f) { \
_ReduceMask<<<1, CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
num_masks, const_cast<int*>(mask)); \
_ReduceLossGradWithMask<<< \
......@@ -322,13 +321,13 @@ void BroadcastLossGrad<float16, CUDAContext>(
const T* dy, \
T* dx, \
CUDAContext* ctx) { \
auto rows = outer_dim * axis_dim, cols = inner_dim; \
auto nthreads = rows * cols; \
auto nthreads = outer_dim * axis_dim * inner_dim; \
_BroadcastLossGrad<<< \
CUDA_BLOCKS(nthreads), \
CUDA_THREADS, \
0, \
ctx->cuda_stream()>>>(nthreads, rows, cols, dy, dx); \
ctx->cuda_stream()>>>( \
nthreads, axis_dim * inner_dim, inner_dim, dy, dx); \
}
DEFINE_KERNEL_LAUNCHER(float);
......
......@@ -7,31 +7,31 @@ namespace dragon {
namespace kernel {
template <>
void MixedPrecL2Decay<float16, CPUContext>(
void MixedPrecL2Penalty<float16, CPUContext>(
const int count,
const float alpha,
const float16* w,
const float16* x,
float* dx,
CPUContext* ctx) {
#ifdef USE_OPENMP
#pragma omp parallel for num_threads(OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
dx[i] += (cast::to<float>(w[i]) * alpha);
dx[i] += (cast::to<float>(x[i]) * alpha);
}
}
template <>
void MixedPrecUpdate<float16, CPUContext>(
const int count,
const float* updates,
float16* w,
const float* dx,
float16* x,
CPUContext* ctx) {
#ifdef USE_OPENMP
#pragma omp parallel for num_threads(OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
w[i] = cast::to<float16>(cast::to<float>(w[i]) - updates[i]);
x[i] = cast::to<float16>(cast::to<float>(x[i]) - dx[i]);
}
}
......
......@@ -9,24 +9,19 @@ namespace kernel {
namespace {
__global__ void _MixedPrecL2DecayHalf(
__global__ void _MixedPrecL2Penalty(
const int nthreads,
const float alpha,
const half* w,
const half* x,
float* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 530
dx[i] += __half2float(w[i]) * alpha;
#endif
dx[i] += __half2float(x[i]) * alpha;
}
}
__global__ void
_MixedPrecUpdateHalf(const int nthreads, const float* updates, half* w) {
__global__ void _MixedPrecUpdate(const int nthreads, const float* dx, half* x) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 530
w[i] = __float2half(__half2float(w[i]) - updates[i]);
#endif
x[i] = __float2half(__half2float(x[i]) - dx[i]);
}
}
......@@ -35,30 +30,27 @@ _MixedPrecUpdateHalf(const int nthreads, const float* updates, half* w) {
/* ------------------- Launcher Separator ------------------- */
template <>
void MixedPrecL2Decay<float16, CUDAContext>(
void MixedPrecL2Penalty<float16, CUDAContext>(
const int count,
const float alpha,
const float16* w,
const float16* x,
float* dx,
CUDAContext* ctx) {
_MixedPrecL2DecayHalf<<<
_MixedPrecL2Penalty<<<
CUDA_BLOCKS(count),
CUDA_THREADS,
0,
ctx->cuda_stream()>>>(count, alpha, reinterpret_cast<const half*>(w), dx);
ctx->cuda_stream()>>>(count, alpha, reinterpret_cast<const half*>(x), dx);
}
template <>
void MixedPrecUpdate<float16, CUDAContext>(
const int count,
const float* updates,
float16* w,
const float* dx,
float16* x,
CUDAContext* ctx) {
_MixedPrecUpdateHalf<<<
CUDA_BLOCKS(count),
CUDA_THREADS,
0,
ctx->cuda_stream()>>>(count, updates, reinterpret_cast<half*>(w));
_MixedPrecUpdate<<<CUDA_BLOCKS(count), CUDA_THREADS, 0, ctx->cuda_stream()>>>(
count, dx, reinterpret_cast<half*>(x));
}
} // namespace kernel
......
......@@ -116,15 +116,13 @@ __global__ void _AvgPool2dGradNCHW(
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(xi, nthreads) {
const int w = xi % W;
const int h = (xi / W) % H;
const int w = xi % W + pad_w;
const int h = (xi / W) % H + pad_h;
const int c = (xi / W / H) % C;
const int n = xi / W / H / C;
const int phstart =
(h + pad_h < kernel_h) ? 0 : (h + pad_h - kernel_h) / stride_h + 1;
const int pwstart =
(w + pad_w < kernel_w) ? 0 : (w + pad_w - kernel_w) / stride_w + 1;
const int phstart = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
const int pwstart = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
const int phend = min(h / stride_h + 1, out_h);
const int pwend = min(w / stride_w + 1, out_w);
......@@ -164,14 +162,12 @@ __global__ void _AvgPool2dGradNHWC(
T* dx) {
CUDA_1D_KERNEL_LOOP(xi, nthreads) {
const int c = xi % C;
const int w = (xi / C) % W;
const int h = (xi / C / W) % H;
const int w = (xi / C) % W + pad_w;
const int h = (xi / C / W) % H + pad_h;
const int n = xi / C / W / H;
const int phstart =
(h + pad_h < kernel_h) ? 0 : (h + pad_h - kernel_h) / stride_h + 1;
const int pwstart =
(w + pad_w < kernel_w) ? 0 : (w + pad_w - kernel_w) / stride_w + 1;
const int phstart = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
const int pwstart = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
const int phend = min(h / stride_h + 1, out_h);
const int pwend = min(w / stride_w + 1, out_w);
......
......@@ -30,8 +30,8 @@ void _Im2Col2dNCHW(
const T* im,
T* col) {
int ih, iw;
const int im_ofs = H * W;
for (int c = 0; c < C; ++c, im += im_ofs) {
const int im_offset = H * W;
for (int c = 0; c < C; ++c, im += im_offset) {
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
ih = -pad_h + kh * dilation_h;
......@@ -117,8 +117,8 @@ void _Col2Im2dNCHW(
const T* col,
T* im) {
int ih, iw;
const int im_ofs = H * W;
for (int c = 0; c < C; ++c, im += im_ofs) {
const int im_offset = H * W;
for (int c = 0; c < C; ++c, im += im_offset) {
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
ih = -pad_h + kh * dilation_h;
......
......@@ -27,13 +27,13 @@ void _DepthwiseConv2dNCHW(
T* y) {
T sum_val;
int ih, iw, xi, wi;
int yc_ofs, xc_start, yc_start;
int yc_offset, xc_start, yc_start;
int ih_start, yh_start, iw_start;
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
yc_ofs = n * C + c;
xc_start = yc_ofs * H * W;
yc_start = yc_ofs * out_h;
yc_offset = n * C + c;
xc_start = yc_offset * H * W;
yc_start = yc_offset * out_h;
for (int oh = 0; oh < out_h; ++oh) {
ih_start = oh * stride_h - pad_h;
yh_start = (yc_start + oh) * out_w;
......
......@@ -46,7 +46,7 @@ void _ResizeLinearNCHW(
std::array<int, 4> idx = {0, 0, 0, 0};
std::array<int, 4> dims = {N, C, out_h, out_w};
float h_in, w_in, u, v, t, b, tl, tr, bl, br;
int ti, bi, li, ri, ofs, h_max = H - 1, w_max = W - 1;
int ti, bi, li, ri, offset, h_max = H - 1, w_max = W - 1;
for (int i = 0; i < count; ++i) {
h_in = TransformCoordinate(idx[2], scale_h, align_corners);
w_in = TransformCoordinate(idx[3], scale_w, align_corners);
......@@ -54,11 +54,11 @@ void _ResizeLinearNCHW(
bi = (h_in < h_max) ? std::ceil(h_in) : h_max;
ri = (w_in < w_max) ? std::ceil(w_in) : w_max;
v = h_in - ti, u = w_in - li;
ofs = (idx[0] * C + idx[1]) * H;
tl = (float)x[(ofs + ti) * W + li];
tr = (float)x[(ofs + ti) * W + ri];
bl = (float)x[(ofs + bi) * W + li];
br = (float)x[(ofs + bi) * W + ri];
offset = (idx[0] * C + idx[1]) * H;
tl = (float)x[(offset + ti) * W + li];
tr = (float)x[(offset + ti) * W + ri];
bl = (float)x[(offset + bi) * W + li];
br = (float)x[(offset + bi) * W + ri];
t = tl + (tr - tl) * u;
b = bl + (br - bl) * u;
y[i] = static_cast<T>(t + (b - t) * v);
......@@ -83,7 +83,7 @@ void _ResizeLinearNHWC(
std::array<int, 4> idx = {0, 0, 0, 0};
std::array<int, 4> dims = {N, out_h, out_w, C};
float h_in, w_in, u, v, t, b, tl, tr, bl, br;
int ti, bi, li, ri, ofs, h_max = H - 1, w_max = W - 1;
int ti, bi, li, ri, offset, h_max = H - 1, w_max = W - 1;
for (int i = 0; i < count; ++i) {
h_in = TransformCoordinate(idx[1], scale_h, align_corners);
w_in = TransformCoordinate(idx[2], scale_w, align_corners);
......@@ -91,11 +91,11 @@ void _ResizeLinearNHWC(
bi = (h_in < h_max) ? std::ceil(h_in) : h_max;
ri = (w_in < w_max) ? std::ceil(w_in) : w_max;
v = h_in - ti, u = w_in - li;
ofs = idx[0] * H;
tl = (float)x[((ofs + ti) * W + li) * C + idx[3]];
tr = (float)x[((ofs + ti) * W + ri) * C + idx[3]];
bl = (float)x[((ofs + bi) * W + li) * C + idx[3]];
br = (float)x[((ofs + bi) * W + ri) * C + idx[3]];
offset = idx[0] * H;
tl = (float)x[((offset + ti) * W + li) * C + idx[3]];
tr = (float)x[((offset + ti) * W + ri) * C + idx[3]];
bl = (float)x[((offset + bi) * W + li) * C + idx[3]];
br = (float)x[((offset + bi) * W + ri) * C + idx[3]];
t = tl + (tr - tl) * u;
b = bl + (br - bl) * u;
y[i] = static_cast<T>(t + (b - t) * v);
......@@ -120,7 +120,7 @@ void _ResizeLinearGradNCHW(
std::array<int, 4> idx = {0, 0, 0, 0};
std::array<int, 4> dims = {N, C, out_h, out_w};
float h_in, w_in, u, v, dt, db, tl, tr, bl, br;
int ti, bi, li, ri, ofs, h_max = H - 1, w_max = W - 1;
int ti, bi, li, ri, offset, h_max = H - 1, w_max = W - 1;
for (int i = 0; i < count; ++i) {
h_in = TransformCoordinate(idx[2], scale_h, align_corners);
w_in = TransformCoordinate(idx[3], scale_w, align_corners);
......@@ -128,13 +128,13 @@ void _ResizeLinearGradNCHW(
bi = (h_in < h_max) ? std::ceil(h_in) : h_max;
ri = (w_in < w_max) ? std::ceil(w_in) : w_max;
v = h_in - ti, u = w_in - li;
ofs = (idx[0] * C + idx[1]) * H;
offset = (idx[0] * C + idx[1]) * H;
dt = (1.f - v) * static_cast<float>(dy[i]);
db = v * static_cast<float>(dy[i]);
dx[(ofs + ti) * W + li] += (1.f - u) * dt; // tl
dx[(ofs + ti) * W + ri] += u * dt; // tr
dx[(ofs + bi) * W + li] += (1.f - u) * db; // bl
dx[(ofs + bi) * W + ri] += u * db; // br
dx[(offset + ti) * W + li] += (1.f - u) * dt; // tl
dx[(offset + ti) * W + ri] += u * dt; // tr
dx[(offset + bi) * W + li] += (1.f - u) * db; // bl
dx[(offset + bi) * W + ri] += u * db; // br
utils::math::IncreaseIndexInDims(4, dims.data(), idx.data());
}
}
......@@ -156,7 +156,7 @@ void _ResizeLinearGradNHWC(
std::array<int, 4> idx = {0, 0, 0, 0};
std::array<int, 4> dims = {N, out_h, out_w, C};
float h_in, w_in, u, v, dt, db, tl, tr, bl, br;
int ti, bi, li, ri, ofs, h_max = H - 1, w_max = W - 1;
int ti, bi, li, ri, offset, h_max = H - 1, w_max = W - 1;
for (int i = 0; i < count; ++i) {
h_in = TransformCoordinate(idx[1], scale_h, align_corners);
w_in = TransformCoordinate(idx[2], scale_w, align_corners);
......@@ -164,13 +164,13 @@ void _ResizeLinearGradNHWC(
bi = (h_in < h_max) ? std::ceil(h_in) : h_max;
ri = (w_in < w_max) ? std::ceil(w_in) : w_max;
v = h_in - ti, u = w_in - li;
ofs = idx[0] * H;
offset = idx[0] * H;
dt = (1.f - v) * static_cast<float>(dy[i]);
db = v * static_cast<float>(dy[i]);
dx[((ofs + ti) * W + li) * C + idx[3]] += (1.f - u) * dt; // tl
dx[((ofs + ti) * W + ri) * C + idx[3]] += u * dt; // tr
dx[((ofs + bi) * W + li) * C + idx[3]] += (1.f - u) * db; // bl
dx[((ofs + bi) * W + ri) * C + idx[3]] += u * db; // br
dx[((offset + ti) * W + li) * C + idx[3]] += (1.f - u) * dt; // tl
dx[((offset + ti) * W + ri) * C + idx[3]] += u * dt; // tr
dx[((offset + bi) * W + li) * C + idx[3]] += (1.f - u) * db; // bl
dx[((offset + bi) * W + ri) * C + idx[3]] += u * db; // br
utils::math::IncreaseIndexInDims(4, dims.data(), idx.data());
}
}
......
......@@ -61,17 +61,17 @@ __global__ void _ResizeLinearNCHW(
const int ri = (w_in < W - 1) ? ceilf(w_in) : W - 1;
const float u = w_in - li;
const int ofs = (n * C + c) * H;
const int offset = (n * C + c) * H;
#if __CUDA_ARCH__ >= 350
const float tl = __ldg(x + ((ofs + ti) * W + li));
const float tr = __ldg(x + ((ofs + ti) * W + ri));
const float bl = __ldg(x + ((ofs + bi) * W + li));
const float br = __ldg(x + ((ofs + bi) * W + ri));
const float tl = __ldg(x + ((offset + ti) * W + li));
const float tr = __ldg(x + ((offset + ti) * W + ri));
const float bl = __ldg(x + ((offset + bi) * W + li));
const float br = __ldg(x + ((offset + bi) * W + ri));
#else
const float tl = x[(ofs + ti) * W + li];
const float tr = x[(ofs + ti) * W + ri];
const float bl = x[(ofs + bi) * W + li];
const float br = x[(ofs + bi) * W + ri];
const float tl = x[(offset + ti) * W + li];
const float tr = x[(offset + ti) * W + ri];
const float bl = x[(offset + bi) * W + li];
const float br = x[(offset + bi) * W + ri];
#endif
const float t = tl + (tr - tl) * u;
const float b = bl + (br - bl) * u;
......@@ -109,11 +109,11 @@ __global__ void _ResizeLinearNCHW<half>(
const int ri = (w_in < W - 1) ? ceilf(w_in) : W - 1;
const float u = w_in - li;
const int ofs = (n * C + c) * H;
const float tl = __half2float(__ldg(x + ((ofs + ti) * W + li)));
const float tr = __half2float(__ldg(x + ((ofs + ti) * W + ri)));
const float bl = __half2float(__ldg(x + ((ofs + bi) * W + li)));
const float br = __half2float(__ldg(x + ((ofs + bi) * W + ri)));
const int offset = (n * C + c) * H;
const float tl = __half2float(__ldg(x + ((offset + ti) * W + li)));
const float tr = __half2float(__ldg(x + ((offset + ti) * W + ri)));
const float bl = __half2float(__ldg(x + ((offset + bi) * W + li)));
const float br = __half2float(__ldg(x + ((offset + bi) * W + ri)));
const float t = tl + (tr - tl) * u;
const float b = bl + (br - bl) * u;
......@@ -151,17 +151,17 @@ __global__ void _ResizeLinearNHWC(
const int ri = (w_in < W - 1) ? ceilf(w_in) : W - 1;
const float u = w_in - li;
const int ofs = n * H;
const int offset = n * H;
#if __CUDA_ARCH__ >= 350
const float tl = __ldg(x + (((ofs + ti) * W + li) * C + c));
const float tr = __ldg(x + (((ofs + ti) * W + ri) * C + c));
const float bl = __ldg(x + (((ofs + bi) * W + li) * C + c));
const float br = __ldg(x + (((ofs + bi) * W + ri) * C + c));
const float tl = __ldg(x + (((offset + ti) * W + li) * C + c));
const float tr = __ldg(x + (((offset + ti) * W + ri) * C + c));
const float bl = __ldg(x + (((offset + bi) * W + li) * C + c));
const float br = __ldg(x + (((offset + bi) * W + ri) * C + c));
#else
const float tl = x[((ofs + ti) * W + li) * C + c];
const float tr = x[((ofs + ti) * W + ri) * C + c];
const float bl = x[((ofs + bi) * W + li) * C + c];
const float br = x[((ofs + bi) * W + ri) * C + c];
const float tl = x[((offset + ti) * W + li) * C + c];
const float tr = x[((offset + ti) * W + ri) * C + c];
const float bl = x[((offset + bi) * W + li) * C + c];
const float br = x[((offset + bi) * W + ri) * C + c];
#endif
const float t = tl + (tr - tl) * u;
const float b = bl + (br - bl) * u;
......@@ -199,11 +199,15 @@ __global__ void _ResizeLinearNHWC<half>(
const int ri = (w_in < W - 1) ? ceilf(w_in) : W - 1;
const float u = w_in - li;
const int ofs = n * H;
const float tl = __half2float(__ldg(x + (((ofs + ti) * W + li) * C + c)));
const float tr = __half2float(__ldg(x + (((ofs + ti) * W + ri) * C + c)));
const float bl = __half2float(__ldg(x + (((ofs + bi) * W + li) * C + c)));
const float br = __half2float(__ldg(x + (((ofs + bi) * W + ri) * C + c)));
const int offset = n * H;
const float tl =
__half2float(__ldg(x + (((offset + ti) * W + li) * C + c)));
const float tr =
__half2float(__ldg(x + (((offset + ti) * W + ri) * C + c)));
const float bl =
__half2float(__ldg(x + (((offset + bi) * W + li) * C + c)));
const float br =
__half2float(__ldg(x + (((offset + bi) * W + ri) * C + c)));
const float t = tl + (tr - tl) * u;
const float b = bl + (br - bl) * u;
......@@ -249,11 +253,11 @@ __global__ void _ResizeLinearGradNCHW(
const float db = v * ((float)dy[yi]);
#endif
const int ofs = (n * C + c) * H;
atomicAdd(&dx[(ofs + ti) * W + li], (1.f - u) * dt);
atomicAdd(&dx[(ofs + ti) * W + ri], u * dt);
atomicAdd(&dx[(ofs + bi) * W + li], (1.f - u) * db);
atomicAdd(&dx[(ofs + bi) * W + ri], u * db);
const int offset = (n * C + c) * H;
atomicAdd(&dx[(offset + ti) * W + li], (1.f - u) * dt);
atomicAdd(&dx[(offset + ti) * W + ri], u * dt);
atomicAdd(&dx[(offset + bi) * W + li], (1.f - u) * db);
atomicAdd(&dx[(offset + bi) * W + ri], u * db);
}
}
......@@ -290,11 +294,11 @@ __global__ void _ResizeLinearGradNCHW<half>(
const float dt = (1.f - v) * __half2float(__ldg(dy + yi));
const float db = v * __half2float(__ldg(dy + yi));
const int ofs = (n * C + c) * H;
atomicAdd(&dx[(ofs + ti) * W + li], (1.f - u) * dt);
atomicAdd(&dx[(ofs + ti) * W + ri], u * dt);
atomicAdd(&dx[(ofs + bi) * W + li], (1.f - u) * db);
atomicAdd(&dx[(ofs + bi) * W + ri], u * db);
const int offset = (n * C + c) * H;
atomicAdd(&dx[(offset + ti) * W + li], (1.f - u) * dt);
atomicAdd(&dx[(offset + ti) * W + ri], u * dt);
atomicAdd(&dx[(offset + bi) * W + li], (1.f - u) * db);
atomicAdd(&dx[(offset + bi) * W + ri], u * db);
#endif
}
}
......@@ -336,11 +340,11 @@ __global__ void _ResizeLinearGradNHWC(
const float db = v * ((float)dy[yi]);
#endif
const int ofs = n * H;
atomicAdd(&dx[((ofs + ti) * W + li) * C + c], (1.f - u) * dt);
atomicAdd(&dx[((ofs + ti) * W + ri) * C + c], u * dt);
atomicAdd(&dx[((ofs + bi) * W + li) * C + c], (1.f - u) * db);
atomicAdd(&dx[((ofs + bi) * W + ri) * C + c], u * db);
const int offset = n * H;
atomicAdd(&dx[((offset + ti) * W + li) * C + c], (1.f - u) * dt);
atomicAdd(&dx[((offset + ti) * W + ri) * C + c], u * dt);
atomicAdd(&dx[((offset + bi) * W + li) * C + c], (1.f - u) * db);
atomicAdd(&dx[((offset + bi) * W + ri) * C + c], u * db);
}
}
......
......@@ -11,7 +11,6 @@ template <typename T>
void CuDNNEluOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CuDNNSetTensorDesc<T>(&input_desc_, X.dims());
CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(),
act_desc_,
......@@ -33,7 +32,6 @@ template <typename T>
void CuDNNEluGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
CuDNNSetTensorDesc<T>(&input_desc_, Y.dims());
CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(),
act_desc_,
......
......@@ -7,7 +7,6 @@ template <class Context>
template <typename T>
void ReluOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
if (max_value_ > 0.f) {
kernel::ReluN(
X.count(),
......@@ -34,7 +33,6 @@ template <class Context>
template <typename T>
void ReluGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
if (max_value_ > 0.f) {
kernel::ReluNGrad(
Y.count(),
......
......@@ -9,7 +9,6 @@ template <typename T>
void CuDNNReluOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CuDNNSetTensorDesc<T>(&input_desc_, X.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(),
......@@ -47,7 +46,6 @@ template <typename T>
void CuDNNReluGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
CuDNNSetTensorDesc<T>(&input_desc_, Y.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(),
......
......@@ -9,7 +9,6 @@ template <typename T>
void CuDNNSigmoidOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CuDNNSetTensorDesc<T>(&input_desc_, X.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(),
......@@ -43,7 +42,6 @@ template <typename T>
void CuDNNSigmoidGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
CuDNNSetTensorDesc<T>(&input_desc_, Y.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(),
......
......@@ -9,10 +9,8 @@ template <typename T>
void CuDNNSoftmaxOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CANONICALIZE_AXIS_WITH_TENSOR(X);
CuDNNSetTensorDesc<T>(
&input_desc_, {X.count(0, axis), X.dim(axis), X.count(axis + 1)});
CUDNN_CHECK(cudnnSoftmaxForward(
ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE,
......@@ -35,10 +33,8 @@ template <typename T>
void CuDNNSoftmaxGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
CANONICALIZE_AXIS_WITH_TENSOR(Y);
CuDNNSetTensorDesc<T>(
&input_desc_, {Y.count(0, axis), Y.dim(axis), Y.count(axis + 1)});
CUDNN_CHECK(cudnnSoftmaxBackward(
ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE,
......
......@@ -9,7 +9,6 @@ template <typename T>
void CuDNNTanhOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CuDNNSetTensorDesc<T>(&input_desc_, X.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(),
......@@ -43,7 +42,6 @@ template <typename T>
void CuDNNTanhGradientOp<Context>::DoRunWithType() {
auto &Y = Input(0), &dY = Input(1), *dX = Output(0);
CuDNNSetTensorDesc<T>(&input_desc_, Y.dims());
#if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(),
......
......@@ -64,9 +64,7 @@ void CastOp<Context>::RunOnDevice() {
STORE_INPUT_SPEC(0);
DISPATCH_WITH_TENSOR(Input(0));
} else {
Buffer("X[" + std::to_string(0) + "]")
->ReshapeLike(*Output(0))
->set_meta(Output(0)->meta());
Buffer("X_spec:0")->ReshapeLike(*Output(0))->set_meta(Output(0)->meta());
DISPATCH_WITH_TENSOR((*Output(0)));
};
}
......
......@@ -26,7 +26,9 @@ namespace dragon {
axes_(OpArgs<int64_t>("axes")), \
keep_dims_(OpArg<int64_t>("keep_dims", 0)) {} \
USE_OPERATOR_FUNCTIONS; \
\
void RunOnDevice() override; \
\
template <typename T> \
void DoRunWithType(); \
\
......@@ -41,7 +43,9 @@ namespace dragon {
public: \
SIMPLE_CTOR_DTOR(name##GradientOp); \
USE_OPERATOR_FUNCTIONS; \
\
void RunOnDevice() override; \
\
template <typename T> \
void DoRunWithType(); \
};
......
......@@ -15,9 +15,8 @@ void SoftmaxCrossEntropyOp<Context>::DoRunWithType() {
auto inner_dim = X.count(axis + 1);
auto num_preds = outer_dim * inner_dim;
CHECK_EQ(num_preds, Input(1).count())
CHECK_EQ(X.count(), Input(1).count())
<< "\nNumber of preds must match the number of targets.";
Buffer("prob")->ReshapeLike(X);
auto* loss = ws()->template data<T, Context>({X.count()})[0];
......
......@@ -17,6 +17,8 @@ void SparseSoftmaxCrossEntropyOp<Context>::DoRunWithType() {
CHECK_EQ(num_preds, Input(1).count())
<< "\nNumber of preds must match the number of targets.";
auto* X_prob = Buffer("prob")->ReshapeLike(X);
auto* prob = X_prob->template mutable_data<LogitType, Context>();
auto scratches = ws()->template data<Context>({
num_preds * sizeof(LogitType), // loss
......@@ -25,10 +27,6 @@ void SparseSoftmaxCrossEntropyOp<Context>::DoRunWithType() {
auto* loss = static_cast<LogitType*>(scratches[0]);
auto* mask = static_cast<int*>(scratches[1]);
auto* prob = Buffer("prob")
->ReshapeLike(X)
->template mutable_data<LogitType, Context>();
kernel::Softmax(
outer_dim,
X.dim(axis),
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_MATH_ACCUMULATE_OP_H_
#define DRAGON_OPERATORS_MATH_ACCUMULATE_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class AccumulateOp final : public Operator<Context> {
public:
AccumulateOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
alpha_(OpArg<float>("alpha", 1.f)),
beta_(OpArg<float>("beta", 1.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType(Tensor* X, Tensor* Y);
protected:
float alpha_, beta_;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_MATH_ACCUMULATE_OP_H_
#include "dragon/core/workspace.h"
#include "dragon/operators/math/accumulate_op.h"
#include "dragon/operators/math/elementwise_ops.h"
#include "dragon/utils/math_functions.h"
namespace dragon {
template <class Context>
template <typename T>
void AccumulateOp<Context>::DoRunWithType(Tensor* X, Tensor* Y) {
void AxpbyOp<Context>::DoRunWithType(Tensor* X, Tensor* Y) {
CHECK_EQ(X->count(), Y->count());
auto* x = X->template data<T, Context>();
auto* y = Y->template mutable_data<T, Context>();
......@@ -26,41 +26,42 @@ void AccumulateOp<Context>::DoRunWithType(Tensor* X, Tensor* Y) {
}
template <class Context>
void AccumulateOp<Context>::RunOnDevice() {
void AxpbyOp<Context>::RunOnDevice() {
for (int i = 0; i < InputSize(); i++) {
Output(i)->ReshapeLike(Input(i));
if (XIsType(Input(i), int8_t)) {
DoRunWithType<int8_t>(&Input(i), Output(i));
} else if (XIsType(Input(i), uint8_t)) {
DoRunWithType<uint8_t>(&Input(i), Output(i));
} else if (XIsType(Input(i), int)) {
DoRunWithType<int>(&Input(i), Output(i));
} else if (XIsType(Input(i), int64_t)) {
DoRunWithType<int64_t>(&Input(i), Output(i));
} else if (XIsType(Input(i), float16)) {
DoRunWithType<float16>(&Input(i), Output(i));
} else if (XIsType(Input(i), float)) {
DoRunWithType<float>(&Input(i), Output(i));
} else if (XIsType(Input(i), double)) {
DoRunWithType<double>(&Input(i), Output(i));
auto &X = Input(i), *Y = Output(i);
Y->ReshapeLike(X);
if (XIsType(X, int8_t)) {
DoRunWithType<int8_t>(&X, Y);
} else if (XIsType(X, uint8_t)) {
DoRunWithType<uint8_t>(&X, Y);
} else if (XIsType(X, int)) {
DoRunWithType<int>(&X, Y);
} else if (XIsType(X, int64_t)) {
DoRunWithType<int64_t>(&X, Y);
} else if (XIsType(X, float16)) {
DoRunWithType<float16>(&X, Y);
} else if (XIsType(X, float)) {
DoRunWithType<float>(&X, Y);
} else if (XIsType(X, double)) {
DoRunWithType<double>(&X, Y);
} else
LOG(FATAL) << TypeString(
Input(i),
X,
{"int8", "uint8", "int32", "int64", "float16", "float32", "float64"});
}
}
DEPLOY_CPU(Accumulate);
DEPLOY_CPU(Axpby);
#ifdef USE_CUDA
DEPLOY_CUDA(Accumulate);
DEPLOY_CUDA(Axpby);
#endif
OPERATOR_SCHEMA(Accumulate)
OPERATOR_SCHEMA(Axpby)
/* X1, ... */
.NumInputs(1, INT_MAX)
/* Y1, ... */
.NumOutputs(1, INT_MAX);
NO_GRADIENT(Accumulate);
NO_GRADIENT(Axpby);
} // namespace dragon
#include "dragon/operators/math/dot_op.h"
#include "dragon/operators/math/elementwise_ops.h"
#include "dragon/utils/math_functions.h"
namespace dragon {
template <class Context>
template <typename T>
void DotOp<Context>::DotImpl() {
CHECK_EQ(Input(0).dim(0), Input(1).dim(0))
<< "\nTensor(" << Input(0).name() << "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
Output(0)->Reshape({});
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* y = Output(0)->template mutable_data<T, Context>();
T yHost;
math::Dot(Input(0).count(), a, b, &yHost, ctx());
ctx()->template Copy<T, Context, CPUContext>(1, y, &yHost);
}
template <class Context>
template <typename T>
void DotOp<Context>::GemmImpl() {
K1_ = transA_ ? Input(0).dim(0) : Input(0).dim(-1);
K2_ = transB_ ? Input(1).dim(1) : Input(1).dim(0);
N_ = transB_ ? Input(1).dim(0) : Input(1).dim(1);
M_ = Input(0).count() / K1_;
CHECK_EQ(K1_, K2_) << "\nTensor(" << Input(0).name()
<< "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
auto out_dims = Input(0).dims();
if (!transA_) {
out_dims.pop_back();
} else {
out_dims.erase(out_dims.begin());
}
out_dims.push_back(N_);
Output(0)->Reshape(out_dims);
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* y = Output(0)->template mutable_data<T, Context>();
math::Gemm(
transA_ ? CblasTrans : CblasNoTrans,
transB_ ? CblasTrans : CblasNoTrans,
M_,
N_,
K1_,
1.f,
a,
b,
0.f,
y,
ctx());
}
template <class Context>
template <typename T>
void DotOp<Context>::GemvImpl() {
N_ = transA_ ? Input(0).dim(0) : Input(0).dim(-1);
M_ = Input(0).count() / N_;
CHECK_EQ(N_, Input(1).dim(0))
<< "\nTensor(" << Input(0).name() << "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
auto out_dims = Input(0).dims();
if (!transA_) {
out_dims.pop_back();
} else {
out_dims.erase(out_dims.begin());
}
Output(0)->Reshape(out_dims);
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* y = Output(0)->template mutable_data<T, Context>();
math::Gemv(
transA_ ? CblasTrans : CblasNoTrans,
transA_ ? N_ : M_,
transA_ ? M_ : N_,
1.f,
a,
b,
0.f,
y,
ctx());
}
template <class Context>
template <typename T>
void DotOp<Context>::DoRunWithType() {
if (Input(0).ndim() == 1 && Input(1).ndim() == 1) {
DotImpl<T>();
} else if (Input(0).ndim() >= 2 && Input(1).ndim() == 2) {
GemmImpl<T>();
} else if (Input(0).ndim() >= 2 && Input(1).ndim() == 1) {
GemvImpl<T>();
auto &A = Input(0), &B = Input(1), *Y = Output(0);
if (A.ndim() == 1 && B.ndim() == 1) {
// Compute vector product
CHECK_EQ(A.count(), B.count())
<< "\nShapes " << A.DimString() << " and " << B.DimString()
<< " not aligned: " << A.count() << " (dim 0) != " << B.count()
<< " (dim 0)";
math::Dot(
A.count(),
A.template data<T, Context>(),
B.template data<T, Context>(),
Y->Reshape({})->template mutable_data<T, Context>(),
ctx());
} else if (A.ndim() == 2 && B.ndim() == 2) {
// Compute matrix multiplication
CHECK_EQ(A.dim(1), B.dim(0))
<< "\nShapes " << A.DimString() << " and " << B.DimString()
<< " not aligned: " << A.dim(1) << " (dim 1) != " << B.dim(0)
<< " (dim 0)";
math::Gemm(
CblasNoTrans,
CblasNoTrans,
A.dim(0),
B.dim(1),
A.dim(1),
1.f,
A.template data<T, Context>(),
B.template data<T, Context>(),
0.f,
Y->Reshape({A.dim(0), B.dim(1)})->template mutable_data<T, Context>(),
ctx());
} else if (A.ndim() == 0 && B.ndim() == 0) {
// Compute elementwise multiplication
math::Mul(
1,
A.template data<T, Context>(),
B.template data<T, Context>(),
Y->Reshape({})->template mutable_data<T, Context>(),
ctx());
} else if (A.ndim() >= 2 && B.ndim() == 1) {
// Compute matrix-vector multiplication
CHECK_EQ(A.dim(-1), B.dim(0))
<< "\nShapes " << A.DimString() << " and " << B.DimString()
<< " not aligned: " << A.dim(-1) << " (dim -1) != " << B.dim(0)
<< " (dim 0)";
vec64_t Y_dims(A.dims().begin(), A.dims().end() - 1);
math::Gemv(
CblasNoTrans,
A.dim(0),
A.dim(-1),
1.f,
A.template data<T, Context>(),
B.template data<T, Context>(),
0.f,
Y->Reshape(Y_dims)->template mutable_data<T, Context>(),
ctx());
} else {
LOG(FATAL) << "\nTensor(" << Input(0).name()
<< "): " << Input(0).DimString() << " can not dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
LOG(FATAL) << "\nShapes " << A.DimString() << " and " << B.DimString()
<< " not aligned.";
}
}
......@@ -120,164 +76,119 @@ void DotOp<Context>::RunOnDevice() {
template <class Context>
template <typename T>
void DotGradientOp<Context>::DotImpl() {
CHECK_EQ(Input(0).count(), Input(1).count())
<< "\nTensor(" << Input(0).name() << "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* dy = Input(-1).template data<T, CPUContext>();
if (Output(0)->has_name()) {
auto* da = Output(0)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>(dy[0]), b, da, ctx());
}
if (Output(1)->has_name()) {
auto* db = Output(1)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>(dy[0]), a, db, ctx());
}
}
template <class Context>
template <typename T>
void DotGradientOp<Context>::GemmImpl() {
K1_ = transA_ ? Input(0).dim(0) : Input(0).dim(-1);
K2_ = transB_ ? Input(1).dim(1) : Input(1).dim(0);
N_ = transB_ ? Input(1).dim(0) : Input(1).dim(1);
M_ = Input(0).count() / K1_;
CHECK_EQ(K1_, K2_) << "\nTensor(" << Input(0).name()
<< "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->has_name()) {
auto* da = Output(0)->template mutable_data<T, Context>();
if (transA_) {
void DotGradientOp<Context>::DoRunWithType() {
auto &A = Input(0), &B = Input(1), &dY = Input(2);
auto *dA = Output(0), *dB = Output(1);
if (A.ndim() == 1 && B.ndim() == 1) {
// Gradient of vector product
if (dA->has_name()) {
math::Mul(
dY.ndim(),
dY.dims().data(),
B.ndim(),
B.dims().data(),
dY.template data<T, Context>(),
B.template data<T, Context>(),
dA->ReshapeLike(A)->template mutable_data<T, Context>(),
ctx());
}
if (dB->has_name()) {
math::Mul(
dY.ndim(),
dY.dims().data(),
A.ndim(),
A.dims().data(),
dY.template data<T, Context>(),
A.template data<T, Context>(),
dB->ReshapeLike(B)->template mutable_data<T, Context>(),
ctx());
}
} else if (A.ndim() == 2 && B.ndim() == 2) {
// Gradient of matrix multiplication
if (dA->has_name()) {
math::Gemm(
transB_ ? CblasTrans : CblasNoTrans,
CblasNoTrans,
CblasTrans,
K1_,
M_,
N_,
A.dim(0),
A.dim(1),
B.dim(1),
1.f,
b,
dy,
dY.template data<T, Context>(),
B.template data<T, Context>(),
0.f,
da,
dA->ReshapeLike(A)->template mutable_data<T, Context>(),
ctx());
} else {
}
if (dB->has_name()) {
math::Gemm(
CblasTrans,
CblasNoTrans,
transB_ ? CblasNoTrans : CblasTrans,
M_,
K1_,
N_,
A.dim(1),
B.dim(1),
A.dim(0),
1.f,
dy,
b,
A.template data<T, Context>(),
dY.template data<T, Context>(),
0.f,
da,
dB->ReshapeLike(B)->template mutable_data<T, Context>(),
ctx());
}
}
if (Output(1)->has_name()) {
auto* db = Output(1)->template mutable_data<T, Context>();
if (transB_) {
} else if (A.ndim() == 0 && B.ndim() == 0) {
// Gradient of elementwise multiplication
if (dA->has_name()) {
math::Mul(
1,
dY.template data<T, Context>(),
B.template data<T, Context>(),
dA->ReshapeLike(A)->template mutable_data<T, Context>(),
ctx());
}
if (dB->has_name()) {
math::Mul(
1,
dY.template data<T, Context>(),
A.template data<T, Context>(),
dB->ReshapeLike(B)->template mutable_data<T, Context>(),
ctx());
}
} else if (A.ndim() >= 2 && B.ndim() == 1) {
// Gradient of matrix-vector multiplication
if (dA->has_name()) {
math::Gemm(
CblasTrans,
transA_ ? CblasTrans : CblasNoTrans,
N_,
K1_,
M_,
CblasNoTrans,
CblasNoTrans,
A.dim(0),
A.dim(1),
1,
1.f,
dy,
a,
dY.template data<T, Context>(),
B.template data<T, Context>(),
0.f,
db,
dA->ReshapeLike(A)->template mutable_data<T, Context>(),
ctx());
} else {
math::Gemm(
transA_ ? CblasNoTrans : CblasTrans,
CblasNoTrans,
K1_,
N_,
M_,
}
if (dB->has_name()) {
math::Gemv(
CblasTrans,
A.dim(0),
A.dim(1),
1.f,
a,
dy,
A.template data<T, Context>(),
dY.template data<T, Context>(),
0.f,
db,
dB->ReshapeLike(B)->template mutable_data<T, Context>(),
ctx());
}
}
}
template <class Context>
template <typename T>
void DotGradientOp<Context>::GemvImpl() {
N_ = transA_ ? Input(0).dim(0) : Input(0).dim(-1);
M_ = Input(0).count() / N_;
CHECK_EQ(N_, Input(1).dim(0))
<< "\nTensor(" << Input(0).name() << "): " << Input(0).DimString()
<< " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
auto* a = Input(0).template data<T, Context>();
auto* b = Input(1).template data<T, Context>();
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->has_name()) {
auto* da = Output(0)->template mutable_data<T, Context>();
math::Gemm(
CblasNoTrans, CblasNoTrans, M_, N_, 1, 1.f, dy, b, 0.f, da, ctx());
}
if (Output(1)->has_name()) {
auto* db = Output(1)->template mutable_data<T, Context>();
math::Gemv(
transA_ ? CblasNoTrans : CblasTrans,
transA_ ? N_ : M_,
transA_ ? M_ : N_,
1.f,
a,
dy,
0.f,
db,
ctx());
}
}
template <class Context>
template <typename T>
void DotGradientOp<Context>::DoRunWithType() {
if (Input(0).ndim() == 1 && Input(1).ndim() == 1) {
DotImpl<T>();
} else if (Input(0).ndim() >= 2 && Input(1).ndim() == 2) {
GemmImpl<T>();
} else if (Input(0).ndim() >= 2 && Input(1).ndim() == 1) {
GemvImpl<T>();
} else {
LOG(FATAL) << "\nTensor(" << Input(0).name()
<< "): " << Input(0).DimString() << " can not Dot with Tensor"
<< "(" << Input(1).name() << "): " << Input(1).DimString();
LOG(FATAL) << "\nShapes " << A.DimString() << " and " << B.DimString()
<< " not aligned.";
}
}
template <class Context>
void DotGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
DispatchHelper<FloatingTensorTypes>::Call(this, Input(-1));
DispatchHelper<FloatingTensorTypes>::Call(this, Input(2));
}
DEPLOY_CPU(Dot);
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_MATH_DOT_OP_H_
#define DRAGON_OPERATORS_MATH_DOT_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class DotOp final : public Operator<Context> {
public:
DotOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
transA_(OpArg<bool>("transA", false)),
transB_(OpArg<bool>("transB", false)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DotImpl();
template <typename T>
void GemmImpl();
template <typename T>
void GemvImpl();
template <typename T>
void DoRunWithType();
protected:
int64_t transA_, transB_;
int64_t M_, K1_, K2_, N_;
int64_t M1_, N1_, M2_, N2_;
};
template <class Context>
class DotGradientOp final : public Operator<Context> {
public:
DotGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
transA_(OpArg<bool>("transA", false)),
transB_(OpArg<bool>("transB", false)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DotImpl();
template <typename T>
void GemmImpl();
template <typename T>
void GemvImpl();
template <typename T>
void DoRunWithType();
protected:
int64_t transA_, transB_;
int64_t M_, K1_, K2_, N_;
int64_t M1_, N1_, M2_, N2_;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_MATH_DOT_OP_H_
......@@ -18,7 +18,7 @@
namespace dragon {
#define DECLARE_SIMPLE_UNARY_OP(name) \
#define DECLARE_ELEMENTWISE_OP(name) \
template <class Context> \
class name##Op final : public Operator<Context> { \
public: \
......@@ -31,18 +31,23 @@ namespace dragon {
void DoRunWithType(); \
};
#define DECLARE_SIMPLE_BINARY_OP(name) \
template <class Context> \
class name##Op final : public Operator<Context> { \
public: \
SIMPLE_CTOR_DTOR(name##Op); \
USE_OPERATOR_FUNCTIONS; \
\
void RunOnDevice() override; \
\
template <typename T> \
void DoRunWithType(); \
};
template <class Context>
class AxpbyOp final : public Operator<Context> {
public:
AxpbyOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
alpha_(OpArg<float>("alpha", 1.f)),
beta_(OpArg<float>("beta", 1.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType(Tensor* X, Tensor* Y);
protected:
float alpha_, beta_;
};
inline vec32_t CheckOutputAliases(
const Tensor& A,
......@@ -64,87 +69,61 @@ inline vec32_t CheckOutputAliases(
return available_aliases;
}
inline void IsBroadcast(
const Tensor& A,
const Tensor& B,
int& rows,
int& cols,
int& kind,
Tensor* Y = nullptr) {
kind = -2;
if (A.count() == B.count()) {
if (Y != nullptr) Y->ReshapeLike(A);
kind = -1;
} else if (B.count() < A.count()) {
if (Y != nullptr) Y->ReshapeLike(A);
if (utils::math::IsRowwiseBroadcast(A.dims(), B.dims(), &rows, &cols)) {
kind = 0;
} else if (utils::math::IsColwiseBroadcast(
A.dims(), B.dims(), &rows, &cols)) {
kind = 1;
}
} else {
if (Y != nullptr) Y->ReshapeLike(B);
if (utils::math::IsRowwiseBroadcast(A.dims(), B.dims(), &rows, &cols)) {
kind = 2;
} else if (utils::math::IsColwiseBroadcast(
A.dims(), B.dims(), &rows, &cols)) {
kind = 3;
}
}
}
// Unary ElementwiseOp
DECLARE_ELEMENTWISE_OP(Abs);
DECLARE_ELEMENTWISE_OP(Ceil);
DECLARE_ELEMENTWISE_OP(Cos);
DECLARE_ELEMENTWISE_OP(Exp);
DECLARE_ELEMENTWISE_OP(Floor);
DECLARE_ELEMENTWISE_OP(IsInf);
DECLARE_ELEMENTWISE_OP(IsNaN);
DECLARE_ELEMENTWISE_OP(Log);
DECLARE_ELEMENTWISE_OP(Neg);
DECLARE_ELEMENTWISE_OP(Invert);
DECLARE_ELEMENTWISE_OP(Reciprocal);
DECLARE_ELEMENTWISE_OP(Round);
DECLARE_ELEMENTWISE_OP(Rsqrt);
DECLARE_ELEMENTWISE_OP(Sign);
DECLARE_ELEMENTWISE_OP(Sin);
DECLARE_ELEMENTWISE_OP(Sqrt);
DECLARE_ELEMENTWISE_OP(Square);
DECLARE_ELEMENTWISE_OP(AbsGradient);
DECLARE_ELEMENTWISE_OP(CosGradient);
DECLARE_ELEMENTWISE_OP(ExpGradient);
DECLARE_ELEMENTWISE_OP(LogGradient);
DECLARE_ELEMENTWISE_OP(NegGradient);
DECLARE_ELEMENTWISE_OP(ReciprocalGradient);
DECLARE_ELEMENTWISE_OP(RsqrtGradient);
DECLARE_ELEMENTWISE_OP(SignGradient);
DECLARE_ELEMENTWISE_OP(SinGradient);
DECLARE_ELEMENTWISE_OP(SqrtGradient);
DECLARE_ELEMENTWISE_OP(SquareGradient);
DECLARE_SIMPLE_UNARY_OP(Abs);
DECLARE_SIMPLE_UNARY_OP(Ceil);
DECLARE_SIMPLE_UNARY_OP(Cos);
DECLARE_SIMPLE_UNARY_OP(Exp);
DECLARE_SIMPLE_UNARY_OP(Floor);
DECLARE_SIMPLE_UNARY_OP(IsInf);
DECLARE_SIMPLE_UNARY_OP(IsNaN);
DECLARE_SIMPLE_UNARY_OP(Log);
DECLARE_SIMPLE_UNARY_OP(Neg);
DECLARE_SIMPLE_UNARY_OP(Invert);
DECLARE_SIMPLE_UNARY_OP(Reciprocal);
DECLARE_SIMPLE_UNARY_OP(Round);
DECLARE_SIMPLE_UNARY_OP(Rsqrt);
DECLARE_SIMPLE_UNARY_OP(Sign);
DECLARE_SIMPLE_UNARY_OP(Sin);
DECLARE_SIMPLE_UNARY_OP(Sqrt);
DECLARE_SIMPLE_UNARY_OP(Square);
DECLARE_SIMPLE_UNARY_OP(AbsGradient);
DECLARE_SIMPLE_UNARY_OP(CosGradient);
DECLARE_SIMPLE_UNARY_OP(ExpGradient);
DECLARE_SIMPLE_UNARY_OP(LogGradient);
DECLARE_SIMPLE_UNARY_OP(NegGradient);
DECLARE_SIMPLE_UNARY_OP(ReciprocalGradient);
DECLARE_SIMPLE_UNARY_OP(RsqrtGradient);
DECLARE_SIMPLE_UNARY_OP(SignGradient);
DECLARE_SIMPLE_UNARY_OP(SinGradient);
DECLARE_SIMPLE_UNARY_OP(SqrtGradient);
DECLARE_SIMPLE_UNARY_OP(SquareGradient);
#undef DECLARE_SIMPLE_UNARY_OP
// Binary ElementwiseOp
DECLARE_ELEMENTWISE_OP(Add);
DECLARE_ELEMENTWISE_OP(Sub);
DECLARE_ELEMENTWISE_OP(Mul);
DECLARE_ELEMENTWISE_OP(Div);
DECLARE_ELEMENTWISE_OP(Pow);
DECLARE_ELEMENTWISE_OP(Dot);
DECLARE_ELEMENTWISE_OP(Minimum);
DECLARE_ELEMENTWISE_OP(Maximum);
DECLARE_ELEMENTWISE_OP(Equal);
DECLARE_ELEMENTWISE_OP(NotEqual);
DECLARE_ELEMENTWISE_OP(Less);
DECLARE_ELEMENTWISE_OP(LessEqual);
DECLARE_ELEMENTWISE_OP(Greater);
DECLARE_ELEMENTWISE_OP(GreaterEqual);
DECLARE_ELEMENTWISE_OP(AddGradient);
DECLARE_ELEMENTWISE_OP(SubGradient);
DECLARE_ELEMENTWISE_OP(MulGradient);
DECLARE_ELEMENTWISE_OP(DivGradient);
DECLARE_ELEMENTWISE_OP(PowGradient);
DECLARE_ELEMENTWISE_OP(DotGradient);
DECLARE_ELEMENTWISE_OP(MinimumGradient);
DECLARE_ELEMENTWISE_OP(MaximumGradient);
DECLARE_SIMPLE_BINARY_OP(Add);
DECLARE_SIMPLE_BINARY_OP(Sub);
DECLARE_SIMPLE_BINARY_OP(Mul);
DECLARE_SIMPLE_BINARY_OP(Div);
DECLARE_SIMPLE_BINARY_OP(Pow);
DECLARE_SIMPLE_BINARY_OP(Minimum);
DECLARE_SIMPLE_BINARY_OP(Maximum);
DECLARE_SIMPLE_BINARY_OP(Equal);
DECLARE_SIMPLE_BINARY_OP(NotEqual);
DECLARE_SIMPLE_BINARY_OP(Less);
DECLARE_SIMPLE_BINARY_OP(LessEqual);
DECLARE_SIMPLE_BINARY_OP(Greater);
DECLARE_SIMPLE_BINARY_OP(GreaterEqual);
DECLARE_SIMPLE_BINARY_OP(AddGradient);
DECLARE_SIMPLE_BINARY_OP(SubGradient);
DECLARE_SIMPLE_BINARY_OP(MulGradient);
DECLARE_SIMPLE_BINARY_OP(DivGradient);
DECLARE_SIMPLE_BINARY_OP(PowGradient);
DECLARE_SIMPLE_BINARY_OP(MinimumGradient);
DECLARE_SIMPLE_BINARY_OP(MaximumGradient);
#undef DECLARE_SIMPLE_BINARY_OP
#undef DECLARE_ELEMENTWISE_OP
} // namespace dragon
......
......@@ -13,14 +13,14 @@ void FullyConnectedOp<Context>::DoRunWithType() {
// Determine the number of output channels
int64_t M = X.count(0, axis), K = X.count(axis), N;
if (num_output_ <= 0) {
if (out_channels_ <= 0) {
// Infer the "N" from the weights shape
N = W.count() / K;
CHECK_GT(N, 0) << "\nFailed to infer the N from "
<< "the weights shape: " << W.DimString();
} else {
// Use a fixed "N" from the argument
N = num_output_;
N = out_channels_;
}
vec64_t Y_dims(axis + 1);
......@@ -82,14 +82,14 @@ void FullyConnectedGradientOp<Context>::DoRunWithType() {
// Determine the number of output channels
int64_t M = X.count(0, axis), K = X.count(axis), N;
if (num_output_ <= 0) {
if (out_channels_ <= 0) {
// Infer the "N" from the weights shape
N = W.count() / K;
CHECK_GT(N, 0) << "\nFailed to infer the N from "
<< "the weights shape: " << W.DimString();
} else {
// Use a fixed "N" from the argument
N = num_output_;
N = out_channels_;
}
if (dX->has_name()) {
......
......@@ -22,7 +22,7 @@ class FullyConnectedOp final : public Operator<Context> {
public:
FullyConnectedOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
num_output_(OpArg<int64_t>("num_output", 0)),
out_channels_(OpArg<int64_t>("out_channels", 0)),
transW_(OpArg<int64_t>("transW", 1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -32,7 +32,7 @@ class FullyConnectedOp final : public Operator<Context> {
void DoRunWithType();
protected:
int64_t num_output_, transW_;
int64_t out_channels_, transW_;
};
template <class Context>
......@@ -40,7 +40,7 @@ class FullyConnectedGradientOp final : public Operator<Context> {
public:
FullyConnectedGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
num_output_(OpArg<int64_t>("num_output", 0)),
out_channels_(OpArg<int64_t>("out_channels", 0)),
transW_(OpArg<int64_t>("transW", 1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -50,7 +50,7 @@ class FullyConnectedGradientOp final : public Operator<Context> {
void DoRunWithType();
protected:
int64_t num_output_, transW_;
int64_t out_channels_, transW_;
};
} // namespace dragon
......
......@@ -5,7 +5,7 @@ namespace dragon {
template <class Context>
template <typename T>
void MatmulOp<Context>::DoRunWithType() {
void MatMulOp<Context>::DoRunWithType() {
auto &A = Input(0), &B = Input(1), *Y = Output(0);
CHECK_GE(A.ndim(), 2) << "\nTensor(" << A.name() + ") must be a matrix"
......@@ -51,13 +51,13 @@ void MatmulOp<Context>::DoRunWithType() {
}
template <class Context>
void MatmulOp<Context>::RunOnDevice() {
void MatMulOp<Context>::RunOnDevice() {
DispatchHelper<FloatingTensorTypes>::Call(this, Input(0));
}
template <class Context>
template <typename T>
void MatmulGradientOp<Context>::DoRunWithType() {
void MatMulGradientOp<Context>::DoRunWithType() {
auto &A = Input(0), &B = Input(1), &dY = Input(2);
auto *dA = Output(0), *dB = Output(1);
......@@ -154,32 +154,32 @@ void MatmulGradientOp<Context>::DoRunWithType() {
}
template <class Context>
void MatmulGradientOp<Context>::RunOnDevice() {
void MatMulGradientOp<Context>::RunOnDevice() {
DispatchHelper<FloatingTensorTypes>::Call(this, Input(0));
}
DEPLOY_CPU(Matmul);
DEPLOY_CPU(MatMul);
#ifdef USE_CUDA
DEPLOY_CUDA(Matmul);
DEPLOY_CUDA(MatMul);
#endif
DEPLOY_CPU(MatmulGradient);
DEPLOY_CPU(MatMulGradient);
#ifdef USE_CUDA
DEPLOY_CUDA(MatmulGradient);
DEPLOY_CUDA(MatMulGradient);
#endif
OPERATOR_SCHEMA(Matmul)
OPERATOR_SCHEMA(MatMul)
/* A, B */
.NumInputs(2)
/* Y */
.NumOutputs(1);
OPERATOR_SCHEMA(MatmulGradient)
OPERATOR_SCHEMA(MatMulGradient)
/* A, B, dY */
.NumInputs(3)
/* dA, dB */
.NumOutputs(2);
REGISTER_GRADIENT(Matmul, GenericGradientMaker);
REGISTER_GRADIENT(MatMul, GenericGradientMaker);
} // namespace dragon
......@@ -18,9 +18,9 @@
namespace dragon {
template <class Context>
class MatmulOp final : public Operator<Context> {
class MatMulOp final : public Operator<Context> {
public:
MatmulOp(const OperatorDef& def, Workspace* ws)
MatMulOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
transA_(OpArg<int64_t>("transA", 0)),
transB_(OpArg<int64_t>("transB", 0)) {}
......@@ -36,9 +36,9 @@ class MatmulOp final : public Operator<Context> {
};
template <class Context>
class MatmulGradientOp final : public Operator<Context> {
class MatMulGradientOp final : public Operator<Context> {
public:
MatmulGradientOp(const OperatorDef& def, Workspace* ws)
MatMulGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
transA_(OpArg<int64_t>("transA", 0)),
transB_(OpArg<int64_t>("transB", 0)) {}
......
......@@ -94,8 +94,8 @@ void GroupNormGradientOp<Context>::DoRunWithType() {
template <class Context>
void GroupNormGradientOp<Context>::RunOnDevice() {
DetermineBaseArguments();
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) {
DoRunWithType<float, float>();
} else if (XIsType(Input(0), float16)) {
......
......@@ -42,9 +42,6 @@ class GroupNormOpBase : public Operator<Context> {
// Check the channels and groups
CHECK_EQ(C_ % G_, 0) << "\nThe " << C_ << " channels "
<< "can not be split into " << G_ << " groups.";
if (G_ == C_ && X.ndim() == 2) {
LOG(WARNING) << "The 2d input will output all zeros.";
}
}
protected:
......
#include "dragon/operators/training/adam_update_op.h"
#include "dragon/core/workspace.h"
#include "dragon/operators/training/update_ops.h"
#include "dragon/utils/op_kernels.h"
namespace dragon {
template <class Context>
void AdamUpdateOp<Context>::Compute(Tensor* dX) {
auto* m = ws()->CreateTensor("/mnt/" + slot() + "/m")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
auto* v = ws()->CreateTensor("/mnt/" + slot() + "/v")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
void AdamUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
t_++;
auto beta1 = param("beta1");
auto beta2 = param("beta2");
auto beta1 = Parameter("beta1"), beta2 = Parameter("beta2");
auto coef = sqrt(1.f - pow(beta2, t_)) / (1.f - pow(beta1, t_));
kernel::AdamUpdate(
dX->count(),
param("base_lr") * coef * lr_mult(),
Parameter("base_lr") * coef * this->lr_mult_,
beta1,
beta2,
param("eps"),
Parameter("eps"),
dX->template mutable_data<float, Context>(),
m,
v,
Slot("m")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
Slot("v")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
ctx());
}
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_ADAM_UPDATE_OP_H_
#define DRAGON_OPERATORS_TRAINING_ADAM_UPDATE_OP_H_
#include "dragon/operators/training/update_op_base.h"
namespace dragon {
template <class Context>
class AdamUpdateOp final : public UpdateOpBase<Context> {
public:
AdamUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws), t_(0) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void Compute(Tensor* dX) override;
protected:
int t_;
// float lr_, beta1_, beta2_, eps_;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_ADAM_UPDATE_OP_H_
#include "dragon/operators/training/nesterov_update_op.h"
#include "dragon/core/workspace.h"
#include "dragon/operators/training/update_ops.h"
#include "dragon/utils/op_kernels.h"
namespace dragon {
template <class Context>
void NesterovUpdateOp<Context>::Compute(Tensor* dX) {
auto* m = ws()->CreateTensor("/mnt/" + slot() + "/m")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
void NesterovUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
kernel::NesterovUpdate(
dX->count(),
param("base_lr") * lr_mult(),
param("momentum"),
Parameter("base_lr") * this->lr_mult_,
Parameter("momentum"),
dX->template mutable_data<float, Context>(),
m,
Slot("m")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
ctx());
}
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_NESTEROV_UPDATE_OP_H_
#define DRAGON_OPERATORS_TRAINING_NESTEROV_UPDATE_OP_H_
#include "dragon/operators/training/update_op_base.h"
namespace dragon {
template <class Context>
class NesterovUpdateOp final : public UpdateOpBase<Context> {
public:
NesterovUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void Compute(Tensor* dX) override;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_NESTEROV_UPDATE_OP_H_
#include "dragon/operators/training/rmsprop_update_op.h"
#include "dragon/core/workspace.h"
#include "dragon/operators/training/update_ops.h"
#include "dragon/utils/op_kernels.h"
namespace dragon {
template <class Context>
void RMSPropUpdateOp<Context>::Compute(Tensor* dX) {
auto* m = ws()->CreateTensor("/mnt/" + slot() + "/m")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
auto* v = ws()->CreateTensor("/mnt/" + slot() + "/v")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
void RMSpropUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
kernel::RMSPropUpdate(
dX->count(),
param("base_lr") * lr_mult(),
param("momentum"),
param("decay"),
param("eps"),
Parameter("base_lr") * this->lr_mult_,
Parameter("momentum"),
Parameter("decay"),
Parameter("eps"),
dX->template mutable_data<float, Context>(),
m,
v,
Slot("m")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
Slot("v")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
ctx());
}
DEPLOY_CPU(RMSPropUpdate);
DEPLOY_CPU(RMSpropUpdate);
#ifdef USE_CUDA
DEPLOY_CUDA(RMSPropUpdate);
DEPLOY_CUDA(RMSpropUpdate);
#endif
OPERATOR_SCHEMA(RMSPropUpdate)
OPERATOR_SCHEMA(RMSpropUpdate)
/* dX */
.NumInputs(1)
/* X */
.NumOutputs(1);
NO_GRADIENT(RMSPropUpdate);
NO_GRADIENT(RMSpropUpdate);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_RMSPROP_UPDATE_OP_H_
#define DRAGON_OPERATORS_TRAINING_RMSPROP_UPDATE_OP_H_
#include "dragon/operators/training/update_op_base.h"
namespace dragon {
template <class Context>
class RMSPropUpdateOp final : public UpdateOpBase<Context> {
public:
RMSPropUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void Compute(Tensor* dX) override;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_RMSPROP_UPDATE_OP_H_
#include "dragon/operators/training/sgd_update_op.h"
#include "dragon/core/workspace.h"
#include "dragon/operators/training/update_ops.h"
#include "dragon/utils/op_kernels.h"
namespace dragon {
template <class Context>
void SGDUpdateOp<Context>::Compute(Tensor* dX) {
auto* m = ws()->CreateTensor("/mnt/" + slot() + "/m")
->ReshapeLike(*dX)
->template mutable_data<float, Context>();
void SGDUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
// Momentum Correction, See arXiv:1706.02677
auto lr = param("base_lr") * lr_mult();
auto lr = Parameter("base_lr") * this->lr_mult_;
if (last_lr_ > 0) correction_ = lr / last_lr_;
last_lr_ = lr; // Record the last value
kernel::SGDUpdate(
dX->count(),
lr,
param("momentum") * correction_,
Parameter("momentum") * correction_,
dX->template mutable_data<float, Context>(),
m,
Slot("m")->ReshapeLike(*dX)->template mutable_data<float, Context>(),
ctx());
}
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_SGD_UPDATE_OP_H_
#define DRAGON_OPERATORS_TRAINING_SGD_UPDATE_OP_H_
#include "dragon/operators/training/update_op_base.h"
namespace dragon {
template <class Context>
class SGDUpdateOp final : public UpdateOpBase<Context> {
public:
SGDUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws), last_lr_(-1.f), correction_(1.f) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void Compute(Tensor* dX) override;
protected:
float last_lr_, correction_;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_SGD_UPDATE_OP_H_
#include "dragon/operators/training/update_op_base.h"
#include "dragon/core/workspace.h"
#include "dragon/utils/cast.h"
#include "dragon/operators/training/update_ops.h"
#include "dragon/utils/math_functions.h"
#include "dragon/utils/op_kernels.h"
namespace dragon {
template <class Context>
float UpdateOpBase<Context>::param(const string& name) const {
return ws()
->GetTensor(slot_ + "/" + name)
->template mutable_data<float, CPUContext>()[0];
Tensor* UpdateOpBase<Context>::Slot(const string& name) {
return Buffer(Output(0)->name() + "/" + name);
}
template <class Context>
float UpdateOpBase<Context>::Parameter(const string& name) const {
auto* P = ws()->GetTensor("/share/hyper/" + handle() + "/" + name);
return P->template mutable_data<float, CPUContext>()[0];
}
template <class Context>
template <typename T>
void UpdateOpBase<Context>::Process(Tensor* dX, Tensor* X) {
void UpdateOpBase<Context>::AdjustGradient(Tensor* dX, Tensor* X) {
// Scale
auto scale_factor = param("scale_gradient");
if (scale_factor != 1.f) {
auto scale = Parameter("scale");
if (scale != 1.f) {
auto* dx = dX->template mutable_data<T, Context>();
math::Scale(dX->count(), scale_factor, dx, dx, ctx());
math::Scale(dX->count(), scale, dx, dx, ctx());
}
// Clip
auto clip_thresh = param("clip_gradient");
if (clip_thresh > 0.f) {
T sumsq_grad;
auto clip_norm = Parameter("clip_norm");
if (clip_norm > 0.f) {
auto* dx = dX->template mutable_data<T, Context>();
math::Dot(dX->count(), dx, dx, &sumsq_grad, ctx());
auto l2_norm = sqrt(cast::to<float>(sumsq_grad));
if (l2_norm > clip_thresh) {
math::Scale(dX->count(), clip_thresh / l2_norm, dx, dx, ctx());
auto grad_norm = std::sqrt(math::Dot(dX->count(), dx, dx, ctx()));
if (grad_norm > clip_norm) {
math::Scale(dX->count(), clip_norm / grad_norm, dx, dx, ctx());
}
}
// L2 Decay
auto l2_decay = param("l2_decay") * decay_mult_;
if (l2_decay > 0) {
// Penalty
auto weight_decay = Parameter("weight_decay");
if (weight_decay > 0.f) {
if (XIsType((*X), float16)) {
kernel::MixedPrecL2Decay(
kernel::MixedPrecL2Penalty(
X->count(),
l2_decay,
weight_decay * decay_mult_,
X->template data<float16, Context>(),
dX->template mutable_data<float, Context>(),
ctx());
} else {
math::Axpy(
X->count(),
l2_decay,
weight_decay * decay_mult_,
X->template data<T, Context>(),
dX->template mutable_data<T, Context>(),
ctx());
......@@ -56,7 +57,7 @@ void UpdateOpBase<Context>::Process(Tensor* dX, Tensor* X) {
template <class Context>
template <typename T>
void UpdateOpBase<Context>::Apply(Tensor* dX, Tensor* X) {
void UpdateOpBase<Context>::ApplyUpdate(Tensor* dX, Tensor* X) {
if (XIsType((*X), float16)) {
kernel::MixedPrecUpdate(
X->count(),
......@@ -64,9 +65,9 @@ void UpdateOpBase<Context>::Apply(Tensor* dX, Tensor* X) {
X->template mutable_data<float16, Context>(),
ctx());
} else {
math::Axpy(
math::Sub(
X->count(),
-1.f,
X->template data<T, Context>(),
dX->template data<T, Context>(),
X->template mutable_data<T, Context>(),
ctx());
......@@ -85,19 +86,19 @@ void UpdateOpBase<Context>::RunOnDevice() {
<< "\nGot" << X->DimString() << " and " << dX.DimString();
if (XIsType(dX, float)) {
Process<float>(&dX, X);
Compute(&dX);
Apply<float>(&dX, X);
AdjustGradient<float>(&dX, X);
ComputeUpdate(&dX);
ApplyUpdate<float>(&dX, X);
} else if (XIsType(dX, float16)) {
auto* dX_fp32 = ws()->CreateTensor(dX.name() + "/fp32");
auto* dX_cast = ws()->CreateTensor(dX.name() + "[float32]");
kernel::Cast(
dX.count(),
dX.template data<float16, Context>(),
dX_fp32->ReshapeLike(dX)->template mutable_data<float, Context>(),
dX_cast->ReshapeLike(dX)->template mutable_data<float, Context>(),
ctx());
Process<float>(dX_fp32, X);
Compute(dX_fp32);
Apply<float>(dX_fp32, X);
AdjustGradient<float>(dX_cast, X);
ComputeUpdate(dX_cast);
ApplyUpdate<float>(dX_cast, X);
} else {
LOG(FATAL) << TypeString(dX, {"float16", "float32"});
}
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_UPDATE_OP_BASE_H_
#define DRAGON_OPERATORS_TRAINING_UPDATE_OP_BASE_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class UpdateOpBase : public Operator<Context> {
public:
UpdateOpBase(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
lr_mult_(OpArg<float>("lr_mult", 1.f)),
decay_mult_(OpArg<float>("decay_mult", 1.f)),
slot_(OpArg<string>("slot", "")) {
CHECK(!slot_.empty()) << "\nRequired a non-empty slot";
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
virtual void Compute(Tensor* dX) = 0;
template <typename T>
void Process(Tensor* dX, Tensor* X);
template <typename T>
void Apply(Tensor* dX, Tensor* X);
string slot() {
return slot_ + "/" + Output(0)->name();
}
float param(const string& name) const;
float lr_mult() const {
return lr_mult_;
}
protected:
string slot_;
float lr_mult_, decay_mult_;
};
#define USE_PARAM_UPDATE_FUNCTIONS \
using UpdateOpBase<Context>::slot; \
using UpdateOpBase<Context>::param; \
using UpdateOpBase<Context>::lr_mult
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_UPDATE_OP_BASE_H_
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_OPERATORS_TRAINING_UPDATE_OPS_H_
#define DRAGON_OPERATORS_TRAINING_UPDATE_OPS_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class UpdateOpBase : public Operator<Context> {
public:
UpdateOpBase(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
lr_mult_(OpArg<float>("lr_mult", 1.f)),
decay_mult_(OpArg<float>("decay_mult", 1.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
virtual void ComputeUpdate(Tensor* dX) = 0;
template <typename T>
void AdjustGradient(Tensor* dX, Tensor* X);
template <typename T>
void ApplyUpdate(Tensor* dX, Tensor* X);
Tensor* Slot(const string& name);
float Parameter(const string& name) const;
protected:
float lr_mult_, decay_mult_;
};
#define USE_PARAM_UPDATE_FUNCTIONS \
using UpdateOpBase<Context>::Slot; \
using UpdateOpBase<Context>::Parameter
template <class Context>
class SGDUpdateOp final : public UpdateOpBase<Context> {
public:
SGDUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws), last_lr_(-1.f), correction_(1.f) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void ComputeUpdate(Tensor* dX) override;
protected:
float last_lr_, correction_;
};
template <class Context>
class NesterovUpdateOp final : public UpdateOpBase<Context> {
public:
NesterovUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void ComputeUpdate(Tensor* dX) override;
};
template <class Context>
class RMSpropUpdateOp final : public UpdateOpBase<Context> {
public:
RMSpropUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void ComputeUpdate(Tensor* dX) override;
};
template <class Context>
class AdamUpdateOp final : public UpdateOpBase<Context> {
public:
AdamUpdateOp(const OperatorDef& def, Workspace* ws)
: UpdateOpBase<Context>(def, ws), t_(0) {}
USE_OPERATOR_FUNCTIONS;
USE_PARAM_UPDATE_FUNCTIONS;
void ComputeUpdate(Tensor* dX) override;
protected:
int t_;
};
#undef USE_PARAM_UPDATE_FUNCTIONS
} // namespace dragon
#endif // DRAGON_OPERATORS_TRAINING_UPDATE_OPS_H_
......@@ -59,9 +59,9 @@ void BiasAddGradientOp<Context>::DoRunWithType() {
dB->Reshape({dY.dim(-1)});
}
math::ReduceSum(
3,
dims.size(),
dims.data(),
2,
axes.size(),
axes.data(),
1.f,
dY.template data<T, Context>(),
......
......@@ -16,7 +16,7 @@ void Conv2dOp<Context>::DoRunWithType() {
auto* y = Y->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Wx(x + i * x_ofs_, w, y + i * y_ofs_);
Wx(x + i * x_offset_, w, y + i * y_offset_);
}
if (HasBias()) {
......@@ -46,7 +46,7 @@ void Conv2dGradientOp<Context>::DoRunWithType() {
auto* w = W.template data<T, Context>();
auto* dx = dX->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Dx(dy + i * y_ofs_, w, dx + i * x_ofs_);
Dx(dy + i * y_offset_, w, dx + i * x_offset_);
}
}
......@@ -55,7 +55,7 @@ void Conv2dGradientOp<Context>::DoRunWithType() {
auto* x = X.template data<T, Context>();
auto* dw = dW->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Dw(dy + i * y_ofs_, x + i * x_ofs_, dw, i > 0);
Dw(dy + i * y_offset_, x + i * x_offset_, dw, i > 0);
}
}
......
......@@ -73,8 +73,8 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
num_output_ / cudnn_group_,
channels_ / group_,
out_channels_ / cudnn_group_,
in_channels_ / group_,
kshape_[0],
kshape_[1]));
#else
......@@ -82,14 +82,15 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
num_output_ / cudnn_group_,
channels_ / group_,
out_channels_ / cudnn_group_,
in_channels_ / group_,
kshape_[0],
kshape_[1]));
#endif
// Determine the bias shape
if (HasBias()) {
CuDNNSetBiasDesc<T>(&bias_desc_, X.ndim(), num_output_, data_format());
CuDNNSetBiasDesc<T>(
&bias_desc_, X.ndim(), out_channels_, data_format());
}
}
// Set the conv configuration
......@@ -179,16 +180,16 @@ void CuDNNConv2dOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
input_desc_,
x + x_ofs_ * g,
x + x_offset_ * g,
filter_desc_,
w + w_ofs_ * g,
w + w_offset_ * g,
conv_desc_,
fwd_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
output_desc_,
y + y_ofs_ * g));
y + y_offset_ * g));
}
if (HasBias()) {
......@@ -217,11 +218,11 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
ConvOpBase<Context>::Reshape();
if (data_format() == "NCHW") {
x_ofs_ = Input(0).stride(0) / cudnn_group_;
y_ofs_ = Output(0)->stride(0) / cudnn_group_;
x_offset_ = Input(0).stride(0) / cudnn_group_;
y_offset_ = Output(0)->stride(0) / cudnn_group_;
} else if (data_format() == "NHWC") {
x_ofs_ = Input(0).dim(-1) / cudnn_group_;
y_ofs_ = Output(0)->dim(-1) / cudnn_group_;
x_offset_ = Input(0).dim(-1) / cudnn_group_;
y_offset_ = Output(0)->dim(-1) / cudnn_group_;
}
DispatchHelper<FloatingTensorTypes>::Call(this, Input(0));
......@@ -294,8 +295,8 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
num_output_ / cudnn_group_,
channels_ / group_,
out_channels_ / cudnn_group_,
in_channels_ / group_,
kshape_[0],
kshape_[1]));
#else
......@@ -303,14 +304,15 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
num_output_ / cudnn_group_,
channels_ / group_,
out_channels_ / cudnn_group_,
in_channels_ / group_,
kshape_[0],
kshape_[1]));
#endif
// Determine the bias shape
if (HasBias()) {
CuDNNSetBiasDesc<T>(&bias_desc_, X.ndim(), num_output_, data_format());
CuDNNSetBiasDesc<T>(
&bias_desc_, X.ndim(), out_channels_, data_format());
}
}
// Set the conv configuration
......@@ -470,16 +472,16 @@ void CuDNNConv2dGradientOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
output_desc_,
x + x_ofs_ * g,
x + x_offset_ * g,
input_desc_,
dy + y_ofs_ * g,
dy + y_offset_ * g,
conv_desc_,
bwd_filter_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
filter_desc_,
dw + w_ofs_ * g));
dw + w_offset_ * g));
}
}
......@@ -491,16 +493,16 @@ void CuDNNConv2dGradientOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
filter_desc_,
w + w_ofs_ * g,
w + w_offset_ * g,
input_desc_,
dy + y_ofs_ * g,
dy + y_offset_ * g,
conv_desc_,
bwd_data_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
output_desc_,
dx + x_ofs_ * g));
dx + x_offset_ * g));
}
}
}
......@@ -518,11 +520,11 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
ConvOpBase<Context>::Reshape(true);
if (data_format() == "NCHW") {
x_ofs_ = Input(0).stride(0) / cudnn_group_;
y_ofs_ = Input(-1).stride(0) / cudnn_group_;
x_offset_ = Input(0).stride(0) / cudnn_group_;
y_offset_ = Input(-1).stride(0) / cudnn_group_;
} else if (data_format() == "NHWC") {
x_ofs_ = Input(0).dim(-1) / cudnn_group_;
y_ofs_ = Input(-1).dim(-1) / cudnn_group_;
x_offset_ = Input(0).dim(-1) / cudnn_group_;
y_offset_ = Input(-1).dim(-1) / cudnn_group_;
}
DispatchHelper<FloatingTensorTypes>::Call(this, Input(-1));
......
......@@ -8,12 +8,7 @@ template <class Context>
template <typename T>
void ConvTranspose2dOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), *Y = Output(0);
ConvOpBase<Context>::Reshape();
// Fix the output shape for im2col/col2im
for (int i = 0; i < num_axes_; i++) {
out_shape_[i] = X.dim(axis_ + i);
}
TENSOR_FILL(W, w_shape_);
auto* x = X.template data<T, Context>();
......@@ -21,7 +16,7 @@ void ConvTranspose2dOp<Context>::DoRunWithType() {
auto* y = Y->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Dx(x + i * x_ofs_, w, y + i * y_ofs_);
Dx(x + i * x_offset_, w, y + i * y_offset_);
}
if (HasBias()) {
......@@ -44,19 +39,14 @@ template <typename T>
void ConvTranspose2dGradientOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), &dY = Input(2);
auto *dX = Output(0), *dW = Output(1), *dB = Output(2);
ConvOpBase<Context>::Reshape(true);
// Fix the output shape for im2col/col2im
for (int i = 0; i < num_axes_; i++) {
out_shape_[i] = X.dim(axis_ + i);
}
if (dX->has_name()) {
auto* dy = dY.template data<T, Context>();
auto* w = W.template data<T, Context>();
auto* dx = dX->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Wx(dy + i * y_ofs_, w, dx + i * x_ofs_);
Wx(dy + i * y_offset_, w, dx + i * x_offset_);
}
}
......@@ -65,7 +55,7 @@ void ConvTranspose2dGradientOp<Context>::DoRunWithType() {
auto* dy = dY.template data<T, Context>();
auto* dw = dW->template mutable_data<T, Context>();
for (int i = 0; i < X.dim(0); ++i) {
Dw(x + i * x_ofs_, dy + i * y_ofs_, dw, i > 0);
Dw(x + i * x_offset_, dy + i * y_offset_, dw, i > 0);
}
}
......
......@@ -71,8 +71,8 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
channels_ / cudnn_group_,
num_output_ / group_,
in_channels_ / cudnn_group_,
out_channels_ / group_,
kshape_[0],
kshape_[1]));
#else
......@@ -80,14 +80,15 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
channels_ / cudnn_group_,
num_output_ / group_,
in_channels_ / cudnn_group_,
out_channels_ / group_,
kshape_[0],
kshape_[1]));
#endif
// Determine the bias shape
if (HasBias()) {
CuDNNSetBiasDesc<T>(&bias_desc_, X.ndim(), num_output_, data_format());
CuDNNSetBiasDesc<T>(
&bias_desc_, X.ndim(), out_channels_, data_format());
}
}
// Set the conv configuration
......@@ -180,16 +181,16 @@ void CuDNNConvTranspose2dOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
filter_desc_,
w + w_ofs_ * g,
w + w_offset_ * g,
input_desc_,
x + x_ofs_ * g,
x + x_offset_ * g,
conv_desc_,
fwd_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
output_desc_,
y + y_ofs_ * g));
y + y_offset_ * g));
}
if (HasBias()) {
......@@ -218,11 +219,11 @@ void CuDNNConvTranspose2dOp<Context>::RunOnDevice() {
ConvOpBase<Context>::Reshape();
if (data_format() == "NCHW") {
x_ofs_ = Input(0).stride(0) / cudnn_group_;
y_ofs_ = Output(0)->stride(0) / cudnn_group_;
x_offset_ = Input(0).stride(0) / cudnn_group_;
y_offset_ = Output(0)->stride(0) / cudnn_group_;
} else if (data_format() == "NHWC") {
x_ofs_ = Input(0).dim(-1) / cudnn_group_;
y_ofs_ = Output(0)->dim(-1) / cudnn_group_;
x_offset_ = Input(0).dim(-1) / cudnn_group_;
y_offset_ = Output(0)->dim(-1) / cudnn_group_;
}
DispatchHelper<FloatingTensorTypes>::Call(this, Input(0));
......@@ -293,8 +294,8 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() {
filter_desc_,
CuDNNType<T>::type,
format_,
channels_ / cudnn_group_,
num_output_ / group_,
in_channels_ / cudnn_group_,
out_channels_ / group_,
kshape_[0],
kshape_[1]));
#else
......@@ -302,14 +303,15 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() {
filter_desc,
CuDNNType<T>::type,
format_,
channels_ / cudnn_group_,
num_output_ / group_,
in_channels_ / cudnn_group_,
out_channels_ / group_,
kshape_[0],
kshape_[1]));
#endif
// Determine the bias shape
if (HasBias()) {
CuDNNSetBiasDesc<T>(&bias_desc_, X.ndim(), num_output_, data_format());
CuDNNSetBiasDesc<T>(
&bias_desc_, X.ndim(), out_channels_, data_format());
}
}
// Set the conv configuration
......@@ -466,16 +468,16 @@ void CuDNNConvTranspose2dGradientOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
input_desc_,
dy + y_ofs_ * g,
dy + y_offset_ * g,
output_desc_,
x + x_ofs_ * g,
x + x_offset_ * g,
conv_desc_,
bwd_filter_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
filter_desc_,
dw + w_ofs_ * g));
dw + w_offset_ * g));
}
}
......@@ -487,16 +489,16 @@ void CuDNNConvTranspose2dGradientOp<Context>::DoRunWithType() {
ctx()->cudnn_handle(),
CuDNNType<T>::one,
input_desc_,
dy + y_ofs_ * g,
dy + y_offset_ * g,
filter_desc_,
w + w_ofs_ * g,
w + w_offset_ * g,
conv_desc_,
bwd_data_algo_,
scratch,
cudnn_ws_nbytes_,
CuDNNType<T>::zero,
output_desc_,
dx + x_ofs_ * g));
dx + x_offset_ * g));
}
}
}
......@@ -514,11 +516,11 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunOnDevice() {
ConvOpBase<Context>::Reshape(true);
if (data_format() == "NCHW") {
x_ofs_ = Input(0).stride(0) / cudnn_group_;
y_ofs_ = Input(-1).stride(0) / cudnn_group_;
x_offset_ = Input(0).stride(0) / cudnn_group_;
y_offset_ = Input(-1).stride(0) / cudnn_group_;
} else if (data_format() == "NHWC") {
x_ofs_ = Input(0).dim(-1) / cudnn_group_;
y_ofs_ = Input(-1).dim(-1) / cudnn_group_;
x_offset_ = Input(0).dim(-1) / cudnn_group_;
y_offset_ = Input(-1).dim(-1) / cudnn_group_;
}
DispatchHelper<FloatingTensorTypes>::Call(this, Input(-1));
......
......@@ -10,10 +10,11 @@ namespace dragon {
template <class Context>
void ConvOpBase<Context>::ComputeOutShape() {
auto X_dims = Input(0).dims();
out_shape_.clear();
for (int i = 0; i < num_axes_; i++) {
if (!Transposed()) {
auto idm = x_shape_[axis_ + i];
auto idm = X_dims[axis_ + i];
auto dk = dilation_[i] * (kshape_[i] - 1) + 1;
if (!str::find(padding_, "SAME")) {
// Explicit pads
......@@ -32,7 +33,7 @@ void ConvOpBase<Context>::ComputeOutShape() {
} // SAME_LOWER or SAME
}
} else {
auto idm = x_shape_[axis_ + i];
auto idm = X_dims[axis_ + i];
auto dk = dilation_[i] * (kshape_[i] - 1) + 1;
if (!str::find(padding_, "SAME")) {
// Explicit pads
......@@ -79,13 +80,11 @@ template <class Context>
template <typename T>
void ConvOpBase<Context>::Wx(const T* x, const T* w, T* y, bool skip) {
auto* col = x;
if (!is_1x1_) {
auto* scratch = ws()->template data<T, Context>({col_dim_})[0];
if (!skip) Im2Col(x, scratch);
col = scratch;
}
for (int g = 0; g < group_; g++) {
if (data_format() == "NCHW") {
math::Gemm(
......@@ -95,10 +94,10 @@ void ConvOpBase<Context>::Wx(const T* x, const T* w, T* y, bool skip) {
conv_out_dim_,
kernel_dim_,
1.f,
w + w_ofs_ * g,
col + col_ofs_ * g,
w + w_offset_ * g,
col + col_offset_ * g,
0.f,
y + output_ofs_ * g,
y + out_offset_ * g,
ctx());
} else if (data_format() == "NHWC") {
math::Gemm(
......@@ -121,10 +120,11 @@ template <class Context>
template <typename T>
void ConvOpBase<Context>::Pb(const T* bias, T* y) {
if (data_format() == "NCHW") {
kernel::BiasAdd(Input(0).dim(0), num_output_, out_dim_, y, bias, y, ctx());
kernel::BiasAdd(
Input(0).dim(0), out_channels_, out_dim_, y, bias, y, ctx());
} else if (data_format() == "NHWC") {
kernel::BiasAdd(
Input(0).dim(0) * out_dim_, num_output_, 1, y, bias, y, ctx());
Input(0).dim(0) * out_dim_, out_channels_, 1, y, bias, y, ctx());
}
}
......@@ -141,10 +141,10 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* w, T* dx) {
conv_out_dim_,
conv_out_channels_ / group_,
1.f,
w + w_ofs_ * g,
dy + output_ofs_ * g,
w + w_offset_ * g,
dy + out_offset_ * g,
0.f,
col + col_ofs_ * g,
col + col_offset_ * g,
ctx());
} else if (data_format() == "NHWC") {
math::Gemm(
......@@ -168,13 +168,11 @@ template <class Context>
template <typename T>
void ConvOpBase<Context>::Dw(const T* dy, const T* x, T* dw, bool accum) {
auto* col = x;
if (!is_1x1_) {
auto* scratch = ws()->template data<T, Context>({col_dim_})[0];
Im2Col(x, scratch);
col = scratch;
}
for (int g = 0; g < group_; g++) {
if (data_format() == "NCHW") {
math::Gemm(
......@@ -184,10 +182,10 @@ void ConvOpBase<Context>::Dw(const T* dy, const T* x, T* dw, bool accum) {
kernel_dim_,
conv_out_dim_,
1.f,
dy + output_ofs_ * g,
col + col_ofs_ * g,
dy + out_offset_ * g,
col + col_offset_ * g,
accum ? 1.f : 0.f,
dw + w_ofs_ * g,
dw + w_offset_ * g,
ctx());
} else if (data_format() == "NHWC") {
math::Gemm(
......@@ -211,10 +209,10 @@ template <typename T>
void ConvOpBase<Context>::Db(const T* dy, T* db) {
vec32_t dims, axes;
if (data_format() == "NCHW") {
dims = {(int)Input(0).dim(0), (int)num_output_, (int)out_dim_};
dims = {(int)Input(0).dim(0), (int)out_channels_, (int)out_dim_};
axes = {0, 2};
} else if (data_format() == "NHWC") {
dims = {(int)Input(0).dim(0), (int)out_dim_, (int)num_output_};
dims = {(int)Input(0).dim(0), (int)out_dim_, (int)out_channels_};
axes = {0, 1};
}
math::ReduceSum(3, dims.data(), 2, axes.data(), 1.f, dy, db, ctx());
......@@ -223,16 +221,15 @@ void ConvOpBase<Context>::Db(const T* dy, T* db) {
template <class Context>
void ConvOpBase<Context>::Setup(int num_axes) {
num_axes_ = num_axes;
auto at = [&](const vec64_t& vec, int i) {
return i < vec.size() ? vec[i] : vec[0];
};
auto pads = OpArgs<int64_t>("pads");
auto strides = OpArgs<int64_t>("strides");
auto kshape = OpArgs<int64_t>("kernel_shape");
auto dilations = OpArgs<int64_t>("dilations");
auto at = [&](const vec64_t& vec, int i) {
return i < vec.size() ? vec[i] : vec[0];
};
for (int i = 0; i < num_axes; i++) {
pad_l_.push_back(at(pads, i));
stride_.push_back(at(strides, i));
......@@ -241,8 +238,9 @@ void ConvOpBase<Context>::Setup(int num_axes) {
}
if ((int64_t)pads.size() == (num_axes * 2)) {
for (int i = 0; i < num_axes; i++)
for (int i = 0; i < num_axes; i++) {
pad_r_.push_back(pads[num_axes + i]);
}
} else {
pad_r_.assign(pad_l_.begin(), pad_l_.end());
}
......@@ -264,63 +262,56 @@ void ConvOpBase<Context>::Reshape(bool backward) {
auto* Y_ref = backward ? &Input(-1) : Output(0);
// Determine the in/out channels
channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
if (num_output_ <= 0) {
in_channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
if (out_channels_ <= 0) {
// Infer the out channels from the weights shape
num_output_ = W.count() / channels_;
for (int i = 0; i < num_axes_; i++)
num_output_ /= kshape_[i];
CHECK_GT(num_output_, 0) << "\nFailed to infer the out channels "
<< "from weights: " << W.DimString();
out_channels_ = W.count() / (in_channels_ / group_);
for (int i = 0; i < num_axes_; i++) {
out_channels_ /= kshape_[i];
}
CHECK_GT(out_channels_, 0) << "\nFailed to infer the out channels "
<< "from weights: " << W.DimString();
}
if (Transposed()) {
conv_out_channels_ = channels_;
conv_in_channels_ = num_output_;
conv_out_channels_ = in_channels_;
conv_in_channels_ = out_channels_;
} else {
conv_out_channels_ = num_output_;
conv_in_channels_ = channels_;
conv_out_channels_ = out_channels_;
conv_in_channels_ = in_channels_;
}
// Determine the weight and bias shape
// Weight shape is assumed as NCHW format
// whatever to compute the fans correctly
w_shape_ = {conv_out_channels_, conv_in_channels_ / group_};
for (int i = 0; i < num_axes_; i++)
for (int i = 0; i < num_axes_; i++) {
w_shape_.push_back(kshape_[i]);
b_shape_ = {num_output_};
}
b_shape_ = {out_channels_};
// Determine the Y shape
x_shape_ = X.dims();
// Determine the output shape
ComputeOutShape();
if (backward) {
if (Output(0)->has_name()) Output(0)->ReshapeLike(X);
if (Output(1)->has_name()) Output(1)->ReshapeLike(W);
if (Output(2)->has_name()) Output(2)->Reshape({num_output_});
if (Output(2)->has_name()) Output(2)->Reshape({out_channels_});
} else {
vec64_t Y_dims{X.dim(0)};
if (data_format() == "NCHW") {
y_shape_ = {X.dim(0), num_output_};
for (int i = 0; i < num_axes_; i++)
y_shape_.push_back(out_shape_[i]);
Y_dims.push_back(out_channels_);
for (int i = 0; i < num_axes_; i++) {
Y_dims.push_back(out_shape_[i]);
}
} else if (data_format() == "NHWC") {
y_shape_ = {X.dim(0)};
for (int i = 0; i < num_axes_; i++)
y_shape_.push_back(out_shape_[i]);
y_shape_.push_back(num_output_);
}
Output(0)->Reshape(y_shape_);
}
// Determine the input shape for im2col/col2im
in_shape_.clear();
for (int i = 0; i < num_axes_; i++) {
if (Transposed()) {
in_shape_.push_back(Y_ref->dim(axis_ + i));
} else {
in_shape_.push_back(X.dim(axis_ + i));
for (int i = 0; i < num_axes_; i++) {
Y_dims.push_back(out_shape_[i]);
}
Y_dims.push_back(out_channels_);
}
Output(0)->Reshape(Y_dims);
}
// Determine the out spatial dim
// Determine the output dim
auto end_axis = X.ndim() - 1;
if (data_format() == "NCHW") {
if (Transposed()) {
......@@ -338,25 +329,31 @@ void ConvOpBase<Context>::Reshape(bool backward) {
out_dim_ = Y_ref->count(axis_, end_axis);
}
// Determine the misc
x_ofs_ = X.stride(0);
y_ofs_ = Y_ref->stride(0);
// Compute the miscellaneous
x_offset_ = X.stride(0);
y_offset_ = Y_ref->stride(0);
kernel_dim_ = conv_in_channels_ / group_;
for (int i = 0; i < num_axes_; i++)
for (int i = 0; i < num_axes_; i++) {
kernel_dim_ *= kshape_[i];
col_ofs_ = kernel_dim_ * conv_out_dim_;
w_ofs_ = conv_out_channels_ * kernel_dim_ / group_;
output_ofs_ = conv_out_channels_ * conv_out_dim_ / group_;
}
col_offset_ = kernel_dim_ * conv_out_dim_;
w_offset_ = conv_out_channels_ * kernel_dim_ / group_;
out_offset_ = conv_out_channels_ * conv_out_dim_ / group_;
// Determine the workspace size for col buffer
col_dim_ = kernel_dim_ * group_;
// Compute the arguments for im2col/col2im
in_shape_.clear();
for (int i = 0; i < num_axes_; i++) {
if (Transposed()) {
col_dim_ *= x_shape_[axis_ + i];
in_shape_.push_back(Y_ref->dim(axis_ + i));
out_shape_[i] = X.dim(axis_ + i);
} else {
col_dim_ *= out_shape_[i];
in_shape_.push_back(X.dim(axis_ + i));
}
}
col_dim_ = kernel_dim_ * group_;
for (int i = 0; i < num_axes_; i++) {
col_dim_ *= out_shape_[i];
}
}
#define INSTANTIATE_API(Context, T) \
......
......@@ -25,7 +25,7 @@ class ConvOpBase : public Operator<Context> {
ConvOpBase(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
padding_(OpArg<string>("padding", "VALID")),
num_output_(OpArg<int64_t>("num_output", 0)),
out_channels_(OpArg<int64_t>("out_channels", 0)),
group_(OpArg<int64_t>("group", 1)) {
if (data_format() == "NCHW") {
axis_ = 2;
......@@ -42,18 +42,13 @@ class ConvOpBase : public Operator<Context> {
vec64_t kshape_, stride_;
vec64_t pad_l_, pad_r_, dilation_;
vec64_t in_shape_, out_shape_;
vec64_t x_shape_, y_shape_;
vec64_t w_shape_, b_shape_;
vec64_t in_shape_, w_shape_, b_shape_, out_shape_;
string padding_;
int64_t is_1x1_, num_output_, group_;
int64_t group_;
int64_t axis_, num_axes_;
int64_t channels_, out_dim_;
int64_t conv_in_channels_, conv_out_channels_;
int64_t conv_out_dim_, kernel_dim_, col_dim_;
int64_t col_ofs_, output_ofs_;
int64_t w_ofs_, x_ofs_, y_ofs_;
int64_t in_channels_, out_channels_, out_dim_;
int64_t x_offset_, w_offset_, y_offset_;
DECLARE_ARGS_WITH_DESC(int64_t, output_shape);
DECLARE_ARGS_WITH_DESC(int64_t, output_padding);
......@@ -133,37 +128,42 @@ class ConvOpBase : public Operator<Context> {
LOG(FATAL) << "ConvNd has not been implemented.";
}
}
int64_t is_1x1_;
int64_t kernel_dim_, col_dim_;
int64_t col_offset_, out_offset_;
int64_t conv_in_channels_, conv_out_channels_, conv_out_dim_;
};
DEFINE_ARGS_WITH_DESC(int64_t, ConvOpBase, output_shape);
DEFINE_ARGS_WITH_DESC(int64_t, ConvOpBase, output_padding);
#define USE_CONVOLUTION_FUNCTIONS \
using ConvOpBase<Context>::Setup; \
using ConvOpBase<Context>::Reshape; \
using ConvOpBase<Context>::Transposed; \
using ConvOpBase<Context>::HasBias; \
using ConvOpBase<Context>::Wx; \
using ConvOpBase<Context>::Pb; \
using ConvOpBase<Context>::Dx; \
using ConvOpBase<Context>::Dw; \
using ConvOpBase<Context>::Db; \
using ConvOpBase<Context>::kshape_; \
using ConvOpBase<Context>::stride_; \
using ConvOpBase<Context>::pad_l_; \
using ConvOpBase<Context>::pad_r_; \
using ConvOpBase<Context>::dilation_; \
using ConvOpBase<Context>::group_; \
using ConvOpBase<Context>::channels_; \
using ConvOpBase<Context>::num_output_; \
using ConvOpBase<Context>::axis_; \
using ConvOpBase<Context>::num_axes_; \
using ConvOpBase<Context>::x_ofs_; \
using ConvOpBase<Context>::y_ofs_; \
using ConvOpBase<Context>::w_ofs_; \
using ConvOpBase<Context>::w_shape_; \
using ConvOpBase<Context>::b_shape_; \
using ConvOpBase<Context>::in_shape_; \
#define USE_CONVOLUTION_FUNCTIONS \
using ConvOpBase<Context>::Setup; \
using ConvOpBase<Context>::Reshape; \
using ConvOpBase<Context>::Transposed; \
using ConvOpBase<Context>::HasBias; \
using ConvOpBase<Context>::Wx; \
using ConvOpBase<Context>::Pb; \
using ConvOpBase<Context>::Dx; \
using ConvOpBase<Context>::Dw; \
using ConvOpBase<Context>::Db; \
using ConvOpBase<Context>::kshape_; \
using ConvOpBase<Context>::stride_; \
using ConvOpBase<Context>::pad_l_; \
using ConvOpBase<Context>::pad_r_; \
using ConvOpBase<Context>::dilation_; \
using ConvOpBase<Context>::group_; \
using ConvOpBase<Context>::in_channels_; \
using ConvOpBase<Context>::out_channels_; \
using ConvOpBase<Context>::axis_; \
using ConvOpBase<Context>::num_axes_; \
using ConvOpBase<Context>::x_offset_; \
using ConvOpBase<Context>::w_offset_; \
using ConvOpBase<Context>::y_offset_; \
using ConvOpBase<Context>::in_shape_; \
using ConvOpBase<Context>::w_shape_; \
using ConvOpBase<Context>::b_shape_; \
using ConvOpBase<Context>::out_shape_
} // namespace dragon
......
......@@ -10,14 +10,15 @@ template <typename T>
void DepthwiseConv2dOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), *Y = Output(0);
group_ = channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
CHECK_EQ(channels_, num_output_) << "\nExcepted in/out channels unchanged.";
group_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
ConvOpBase<Context>::Reshape();
CHECK_EQ(in_channels_, out_channels_)
<< "\nExcepted in/out channels to be same.";
TENSOR_FILL(W, w_shape_);
kernel::DepthwiseConv2d(
Input(0).dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......@@ -54,13 +55,13 @@ void DepthwiseConv2dGradientOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), &dY = Input(2);
auto *dX = Output(0), *dW = Output(1), *dB = Output(2);
group_ = channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
group_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
ConvOpBase<Context>::Reshape(true);
if (dX->has_name()) {
kernel::DepthwiseConv2dGrad(
X.dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......@@ -83,7 +84,7 @@ void DepthwiseConv2dGradientOp<Context>::DoRunWithType() {
if (dW->has_name()) {
kernel::DepthwiseConv2dWGrad(
X.dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......
......@@ -12,14 +12,15 @@ template <typename T>
void CuDNNDepthwiseConv2dOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), *Y = Output(0);
group_ = channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
CHECK_EQ(channels_, num_output_) << "\nExcepted in/out channels unchanged.";
group_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
ConvOpBase<Context>::Reshape();
CHECK_EQ(in_channels_, out_channels_)
<< "\nExcepted in/out channels to be same.";
TENSOR_FILL(W, w_shape_);
kernel::DepthwiseConv2d(
X.dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......@@ -40,7 +41,7 @@ void CuDNNDepthwiseConv2dOp<Context>::DoRunWithType() {
if (HasBias()) {
TENSOR_FILL(Input(2), b_shape_);
CuDNNSetBiasDesc<T>(&bias_desc_, 4, num_output_, data_format());
CuDNNSetBiasDesc<T>(&bias_desc_, 4, out_channels_, data_format());
CuDNNSetTensorDesc<T>(&output_desc_, Y->dims(), data_format());
CUDNN_CHECK(cudnnAddTensor(
ctx()->cudnn_handle(),
......@@ -64,13 +65,13 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::DoRunWithType() {
auto &X = Input(0), &W = Input(1), &dY = Input(2);
auto *dX = Output(0), *dW = Output(1), *dB = Output(2);
group_ = channels_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
group_ = data_format() == "NCHW" ? X.dim(1) : X.dim(-1);
ConvOpBase<Context>::Reshape(true);
if (dX->has_name()) {
kernel::DepthwiseConv2dGrad(
X.dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......@@ -93,7 +94,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::DoRunWithType() {
if (dW->has_name()) {
kernel::DepthwiseConv2dWGrad(
X.dim(0),
channels_,
in_channels_,
in_shape_[0],
in_shape_[1],
out_shape_[0],
......@@ -115,7 +116,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::DoRunWithType() {
if (dB->has_name()) {
CuDNNSetTensorDesc<T>(&input_desc_, Input(-1).dims(), data_format());
CuDNNSetBiasDesc<T>(&bias_desc_, 4, num_output_, data_format());
CuDNNSetBiasDesc<T>(&bias_desc_, 4, out_channels_, data_format());
CUDNN_CHECK(cudnnConvolutionBackwardBias(
ctx()->cudnn_handle(),
CuDNNType<T>::one,
......
......@@ -50,7 +50,7 @@ void SpaceToDepthOp<Context>::DoRunWithType() {
if (data_format() == "NCHW") {
for (int i = 0; i < num_axes; i++) {
perm.insert(perm.begin() + 1, perm.back());
perm.pop_back(); // CRD mode
perm.pop_back(); // DCR mode
}
}
......
......@@ -10,61 +10,65 @@ package dragon;
// Store the serialized Tensor objects.
message TensorProto {
repeated int32 dims = 1;
enum DataType {
UNDEFINED = 0;
// Basic types.
FLOAT = 1;
INT32 = 2;
BYTE = 3;
STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12;
DOUBLE = 13;
}
optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
repeated int32 dims = 1;
enum DataType {
UNDEFINED = 0;
// Basic types.
FLOAT = 1;
INT32 = 2;
BYTE = 3;
STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12;
DOUBLE = 13;
}
optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
}
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
optional float value = 3 [default = 0];
optional float low = 4 [default = 0];
optional float high = 5 [default = 1];
optional float mean = 6 [default = 0];
optional float std = 7 [default = 1];
optional float scale = 8 [default = 3];
enum VarianceNorm { FAN_IN = 0; FAN_OUT = 1; FAN_AVG=2; }
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
optional float value = 3 [default = 0];
optional float low = 4 [default = 0];
optional float high = 5 [default = 1];
optional float mean = 6 [default = 0];
optional float std = 7 [default = 1];
optional float scale = 8 [default = 3];
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
}
// Store multiple TensorProto objects in one single proto.
......@@ -74,99 +78,99 @@ message TensorProtos {
// DeviceType that Dragon currently supports.
enum DeviceTypeProto {
// The default device.
PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
// The default device.
PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
}
// Device-specific options.
message DeviceOption {
// The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3];
// The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3];
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
// The name of this argument.
optional string name = 1;
// Store the float32 value.
optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3;
// Store the string value.
optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9;
// The name of this argument.
optional string name = 1;
// Store the float32 value.
optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3;
// Store the string value.
optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9;
}
// Operator Definition
message OperatorDef {
// The name of inputs.
repeated string input = 1;
// The name of outputs.
repeated string output = 2;
// The optional name of this operator.
optional string name = 3;
// The operator type.
optional string type = 4;
// The arguments.
repeated Argument arg = 5;
// The device option that the operator should run under.
optional DeviceOption device_option = 6;
// The optional unique key for this operator.
// Set it to persist operators in the eager mode.
optional string cache_key = 7;
// The name of inputs.
repeated string input = 1;
// The name of outputs.
repeated string output = 2;
// The optional name of this operator.
optional string name = 3;
// The operator type.
optional string type = 4;
// The arguments.
repeated Argument arg = 5;
// The device option that the operator should run under.
optional DeviceOption device_option = 6;
// The optional unique key for this operator.
// Set it to persist operators in the eager mode.
optional string cache_key = 7;
}
// Record the gradient information
message GradientProto {
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
}
// Graph Definition
message GraphDef {
// The graph name.
optional string name = 1;
// The graph name.
optional string name = 1;
// The operators to execute.
repeated OperatorDef op = 2;
// The operators to execute.
repeated OperatorDef op = 2;
// The type of graph.
optional string graph_type = 3;
// The type of graph.
optional string graph_type = 3;
// The device option for this graph.
optional DeviceOption device_option = 5;
// The device option for this graph.
optional DeviceOption device_option = 5;
// The arguments.
repeated Argument arg = 6;
// The arguments.
repeated Argument arg = 6;
// The name of inputs.
repeated string input = 7;
// The name of outputs.
repeated string output = 8;
// The name of inputs.
repeated string input = 7;
// The name of outputs.
repeated string output = 8;
// The gradients information.
repeated GradientProto gradient = 9;
// The gradients information.
repeated GradientProto gradient = 9;
}
......@@ -2,7 +2,6 @@
// WARNING: This file is automatically generated! Please edit onnx.in.proto.
//
// Copyright (c) Facebook Inc. and Microsoft Corporation.
// Licensed under the MIT license.
......@@ -19,11 +18,12 @@ package onnx_dragon;
// 3) Definitions of built-in operators.
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
// Intermediate Representation, or 'IR' for short.
// as well as the standard data types. Together, they are referred to as the
// ONNX Intermediate Representation, or 'IR' for short.
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.
// Definitions of the built-in neural network operators may be found in
// docs/Operators.md.
// Notes
//
......@@ -35,10 +35,11 @@ package onnx_dragon;
// by sharing our working version of ONNX.
//
// Protobuf compatibility
//
// To simplify framework compatibility, ONNX is defined using the subset of protobuf
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
// To simplify framework compatibility, ONNX is defined using the subset of
// protobuf that is compatible with both protobuf v2 and v3. This means that we
// do not use any protobuf features that are only available in one of the two
// versions.
//
// Here are the most notable contortions we have to carry out to work around
// these limitations:
......@@ -47,10 +48,10 @@ package onnx_dragon;
// of key-value pairs, where order does not matter and duplicates
// are not allowed.
// Versioning
//
// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md
// ONNX versioning is specified in docs/IR.md and elaborated on in
// docs/Versioning.md
//
// To be compatible with both proto2 and proto3, we will use a version number
// that is not defined by the default value but an explicit enum number.
......@@ -60,8 +61,8 @@ enum Version {
_START_VERSION = 0;
// The version field is always serialized and we will use it to store the
// version that the graph is generated from. This helps us set up version
// control.
// For the IR, we are using simple numbers starting with with 0x00000001,
// control.
// For the IR, we are using simple numbers starting with with 0x00000001,
// which was the version we published on Oct 10, 2017.
IR_VERSION_2017_10_10 = 0x0000000000000001;
......@@ -80,13 +81,13 @@ enum Version {
// Attributes
//
// A named attribute containing either singular float, integer, string, graph,
// and tensor values, or repeated float, integer, string, graph, and tensor values.
// An AttributeProto MUST contain the name field, and *only one* of the
// and tensor values, or repeated float, integer, string, graph, and tensor
// values. An AttributeProto MUST contain the name field, and *only one* of the
// following content fields, effectively enforcing a C/C++ union equivalent.
message AttributeProto {
// Note: this enum is structurally identical to the OpSchema::AttrType
// enum defined in schema.h. If you rev one, you likely need to rev the other.
// enum defined in schema.h. If you rev one, you likely need to rev the
// other.
enum AttributeType {
UNDEFINED = 0;
FLOAT = 1;
......@@ -103,12 +104,12 @@ message AttributeProto {
}
// The name field MUST be present for this version of the IR.
optional string name = 1; // namespace Attribute
// if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
// In this case, this AttributeProto does not contain data, and it's a reference of attribute
// in parent scope.
// NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph.
optional string name = 1; // namespace Attribute
// if ref_attr_name is not empty, ref_attr_name is the attribute name in
// parent function. In this case, this AttributeProto does not contain data,
// and it's a reference of attribute in parent scope. NOTE: This should ONLY
// be used in function (sub-graph). It's invalid to be used in main graph.
optional string ref_attr_name = 21;
// A human-readable documentation for this attribute. Markdown is allowed.
......@@ -120,16 +121,19 @@ message AttributeProto {
// which value field was in use. For IR_VERSION 0.0.2 or later, this
// field MUST be set and match the f|i|s|t|... field in use. This
// change was made to accomodate proto3 implementations.
optional AttributeType type = 20; // discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present for this version of the IR
optional float f = 2; // float
optional int64 i = 3; // int
optional bytes s = 4; // UTF-8 string
optional TensorProto t = 5; // tensor value
optional GraphProto g = 6; // graph
optional AttributeType type =
20; // discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present for this version of the
// IR
optional float f = 2; // float
optional int64 i = 3; // int
optional bytes s = 4; // UTF-8 string
optional TensorProto t = 5; // tensor value
optional GraphProto g = 6; // graph
// Do not use field below, it's deprecated.
// optional ValueProto v = 12; // value - subsumes everything but graph
// optional ValueProto v = 12; // value - subsumes everything but
// graph
repeated float floats = 7; // list of floats
repeated int64 ints = 8; // list of ints
......@@ -142,7 +146,7 @@ message AttributeProto {
// the shape of the value.
message ValueInfoProto {
// This field MUST be present in this version of the IR.
optional string name = 1; // namespace Value
optional string name = 1; // namespace Value
// This field MUST be present in this version of the IR.
optional TypeProto type = 2;
// A human-readable documentation for this value. Markdown is allowed.
......@@ -154,20 +158,20 @@ message ValueInfoProto {
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
// For example, it can be a node of type "Conv" that takes in an image, a filter
// For example, it can be a node of type "Conv" that takes in an image, a filter
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
repeated string input = 1; // namespace Value
repeated string output = 2; // namespace Value
repeated string input = 1; // namespace Value
repeated string output = 2; // namespace Value
// An optional identifier for this node in a graph.
// This field MAY be absent in ths version of the IR.
optional string name = 3; // namespace Node
optional string name = 3; // namespace Node
// The symbolic identifier of the Operator to execute.
optional string op_type = 4; // namespace Operator
// The domain of the OperatorSet that specifies the operator named by op_type.
optional string domain = 7; // namespace Domain
optional string domain = 7; // namespace Domain
// Additional named attributes.
repeated AttributeProto attribute = 5;
......@@ -198,21 +202,21 @@ message ModelProto {
repeated OperatorSetIdProto opset_import = 8;
// The name of the framework or tool used to generate this model.
// This field SHOULD be present to indicate which implementation/tool/framework
// emitted the model.
// This field SHOULD be present to indicate which
// implementation/tool/framework emitted the model.
optional string producer_name = 2;
// The version of the framework or tool used to generate this model.
// This field SHOULD be present to indicate which implementation/tool/framework
// emitted the model.
// This field SHOULD be present to indicate which
// implementation/tool/framework emitted the model.
optional string producer_version = 3;
// Domain name of the model.
// We use reverse domain names as name space indicators. For example:
// `com.facebook.fair` or `com.microsoft.cognitiveservices`
//
// Together with `model_version` and GraphProto.name, this forms the unique identity of
// the graph.
// Together with `model_version` and GraphProto.name, this forms the unique
// identity of the graph.
optional string domain = 4;
// The version of the graph encoded. See Version enum below.
......@@ -232,25 +236,25 @@ message ModelProto {
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
optional string key = 1;
optional string value= 2;
optional string value = 2;
};
// Graphs
//
// A graph defines the computational logic of a model and is comprised of a parameterized
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
// A graph defines the computational logic of a model and is comprised of a
// parameterized list of nodes that form a directed acyclic graph based on their
// inputs and outputs. This is the equivalent of the "network" or "graph" in
// many deep learning frameworks.
message GraphProto {
// The nodes in the graph, sorted topologically.
repeated NodeProto node = 1;
// The name of the graph.
optional string name = 2; // namespace Graph
optional string name = 2; // namespace Graph
// A list of named tensor values, used to specify constant inputs of the graph.
// Each TensorProto entry must have a distinct name (within the list) that
// also appears in the input list.
// A list of named tensor values, used to specify constant inputs of the
// graph. Each TensorProto entry must have a distinct name (within the list)
// that also appears in the input list.
repeated TensorProto initializer = 5;
// A human-readable documentation for this graph. Markdown is allowed.
......@@ -264,13 +268,10 @@ message GraphProto {
// must be distinct. It is optional for a value to appear in value_info list.
repeated ValueInfoProto value_info = 13;
// DO NOT USE the following fields, they were deprecated from earlier versions.
// repeated string input = 3;
// repeated string output = 4;
// optional int64 ir_version = 6;
// optional int64 producer_version = 7;
// optional string producer_tag = 8;
// optional string domain = 9;
// DO NOT USE the following fields, they were deprecated from earlier
// versions. repeated string input = 3; repeated string output = 4; optional
// int64 ir_version = 6; optional int64 producer_version = 7; optional string
// producer_tag = 8; optional string domain = 9;
}
// Tensors
......@@ -297,8 +298,8 @@ message TensorProto {
DOUBLE = 11;
UINT32 = 12;
UINT64 = 13;
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
// Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits.
......@@ -356,7 +357,7 @@ message TensorProto {
repeated int64 int64_data = 7 [packed = true];
// Optionally, a name for the tensor.
optional string name = 8; // namespace Value
optional string name = 8; // namespace Value
// A human-readable documentation for this tensor. Markdown is allowed.
optional string doc_string = 12;
......@@ -368,14 +369,16 @@ message TensorProto {
// When this raw_data field is used to store tensor value, elements MUST
// be stored in as fixed-width, little-endian order.
// Floating-point data types MUST be stored in IEEE 754 format.
// Complex64 elements must be written as two consecutive FLOAT values, real component first.
// Complex128 elements must be written as two consecutive DOUBLE values, real component first.
// Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
// Complex64 elements must be written as two consecutive FLOAT values, real
// component first. Complex128 elements must be written as two consecutive
// DOUBLE values, real component first. Boolean type MUST be written one byte
// per tensor element (00000001 for true, 00000000 for false).
//
// Note: the advantage of specific field rather than the raw_data field is
// that in some cases (e.g. int data), protobuf does a better packing via
// variable length storage, and may lead to smaller binary footprint.
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
// When this field is present, the data_type field MUST NOT be STRING or
// UNDEFINED
optional bytes raw_data = 9;
// For double
......@@ -384,7 +387,8 @@ message TensorProto {
// and the corresponding imaginary component apparing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
// When this field is present, the data_type field MUST be DOUBLE or
// COMPLEX128
repeated double double_data = 10 [packed = true];
// For uint64 and uint32 values
......@@ -400,12 +404,13 @@ message TensorShapeProto {
message Dimension {
oneof value {
int64 dim_value = 1;
string dim_param = 2; // namespace Shape
string dim_param = 2; // namespace Shape
};
// Standard denotation can optionally be used to denote tensor
// dimensions with standard semantic descriptions to ensure
// that operations are applied to the correct axis of a tensor.
// Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
// Refer to
// https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
// for pre-defined dimension denotations.
optional string denotation = 3;
};
......@@ -416,7 +421,6 @@ message TensorShapeProto {
//
// The standard ONNX data types.
message TypeProto {
message Tensor {
// This field MUST NOT have the value of UNDEFINED
// This field MUST be present for this version of the IR.
......@@ -424,16 +428,15 @@ message TypeProto {
optional TensorShapeProto shape = 2;
}
oneof value {
// The type of a tensor.
Tensor tensor_type = 1;
}
// An optional denotation can be used to denote the whole
// type with a standard semantic description as to what is
// stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
// An optional denotation can be used to denote the whole
// type with a standard semantic description as to what is
// stored inside. Refer to
// https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
// for pre-defined type denotations.
optional string denotation = 6;
}
......@@ -445,7 +448,8 @@ message OperatorSetIdProto {
// The domain of the operator set being identified.
// The empty string ("") or absence of this field implies the operator
// set that is defined as part of the ONNX specification.
// This field MUST be present in this version of the IR when referring to any other operator set.
// This field MUST be present in this version of the IR when referring to any
// other operator set.
optional string domain = 1;
// The version of the operator set being identified.
......
......@@ -28,10 +28,10 @@ from dragon._api import losses
from dragon._api import math
from dragon._api import metrics
from dragon._api import nn
from dragon._api import optimizers
from dragon._api import random
from dragon._api import updaters
from dragon._api import vision
from dragon._api import workspace
from dragon._api import vision
# Virtual API
from dragon import vm
......@@ -56,7 +56,7 @@ from dragon.core.framework.context import name_scope
from dragon.core.framework.workspace import get_workspace
from dragon.core.framework.workspace import reset_workspace
from dragon.core.ops import tensorbind_eager as _
from dragon.core.ops import tensorbind_symbolic as _
from dragon.core.ops import tensorbind_symbol as _
from dragon.core.ops.array_ops import arange
from dragon.core.ops.array_ops import broadcast_to
from dragon.core.ops.array_ops import cast
......
......@@ -24,9 +24,9 @@ from dragon.core.ops.array_ops import min
from dragon.core.ops.array_ops import moments
from dragon.core.ops.array_ops import sum
from dragon.core.ops.math_ops import abs
from dragon.core.ops.math_ops import accumulate
from dragon.core.ops.math_ops import add
from dragon.core.ops.math_ops import affine
from dragon.core.ops.math_ops import axpby
from dragon.core.ops.math_ops import ceil
from dragon.core.ops.math_ops import clip
from dragon.core.ops.math_ops import cos
......@@ -45,7 +45,6 @@ from dragon.core.ops.math_ops import log
from dragon.core.ops.math_ops import matmul
from dragon.core.ops.math_ops import maximum
from dragon.core.ops.math_ops import minimum
from dragon.core.ops.math_ops import moving_average
from dragon.core.ops.math_ops import mul
from dragon.core.ops.math_ops import negative
from dragon.core.ops.math_ops import not_equal
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import as _absolute_import
from __future__ import division as _division
from __future__ import print_function as _print_function
from dragon.core.training.adam import Adam
from dragon.core.training.optimizer import Optimizer
from dragon.core.training.rmsprop import RMSprop
from dragon.core.training.sgd import Nesterov
from dragon.core.training.sgd import SGD
__all__ = [_s for _s in dir() if not _s.startswith('_')]
......@@ -29,7 +29,7 @@ from dragon.core.eager import context as eager_context
from dragon.core.eager.tensor import EagerTensor
from dragon.core.framework import context
from dragon.core.framework import workspace
from dragon.core.training import updater
from dragon.core.training import optimizer
from dragon.core.util import decorator
from dragon.core.util import inspect
from dragon.core.util import nest
......@@ -265,7 +265,7 @@ class FunctionGuard(object):
dummies.append(obj)
executables = [function_lib.create_function(inputs, outputs)]
for obj in dummies:
if isinstance(obj, updater.Updater):
if isinstance(obj, optimizer.Optimizer):
executables.append(function_lib.create_function(updater=obj))
self.inputs = inputs
self.outputs = returns
......
......@@ -78,22 +78,22 @@ def add_phase(graph_def, targets):
graph_def.arg.extend([proto_util.make_argument('phase', phase)])
def add_update_ops(graph_def, updater):
def add_update_ops(graph_def, optimizer):
"""Add the update operators for graph."""
if updater is None:
if optimizer is None:
return
grads, update_ops = [], []
extra_arguments = updater._extra_kwargs
extra_arguments['slot'] = updater._slot
extra_arguments = optimizer._extra_kwargs
extra_arguments['handle'] = optimizer._op_handle
# Generate update operators according to the updater.
for e in updater._param_group:
for e in optimizer._param_group:
(param, grad), arguments = e
if workspace.has_tensor(grad):
grads.append(grad)
arguments = dict(arguments, **extra_arguments)
update_ops.append(
proto_util.make_operator_def(
op_type=updater._op_type,
op_type=optimizer._op_type,
inputs=[grad],
outputs=[param],
name=OpDef.get_name(),
......@@ -102,7 +102,7 @@ def add_update_ops(graph_def, updater):
else:
logging.info('Skip to update Tensor({}).'.format(param))
# Insert a reduce op if the process group is found.
process_group = updater._process_group
process_group = optimizer._process_group
if process_group is not None:
update_ops.insert(
0, proto_util.make_operator_def(
......@@ -139,12 +139,15 @@ class Function(object):
# Collect the forward operators.
requires_grad = False
for output in outputs:
for i, output in enumerate(outputs):
op_info.merge_from(output)
op_info.add_target(output.id)
if output._grad is not None and \
output._grad.required():
requires_grad = True
try:
grad_info = output._grad
if grad_info and grad_info.required():
requires_grad = True
except AttributeError:
raise ValueError('Output[%d] is not a symbolic tensor.' % i)
# Handle givens.
if givens is not None:
......@@ -169,23 +172,23 @@ class Function(object):
])
del op_def.input[:len(op_def.input) // 2]
# Sort out the topology of states.
# Sort out the states.
op_defs = sorted(op_info._defs.items(), key=lambda d: d[0])
forward_ops = copy.deepcopy([v for k, v in op_defs])
# Generate the backward operators.
if requires_grad:
input_grads = {}
input_grads, grad_targets = {}, []
for output in outputs:
if hasattr(output, '_grad'):
grad_info = output._grad
if grad_info is not None:
if grad_info.input is not None:
input_grads[output.id] = grad_info.input.id
grad_info = output._grad
if grad_info is not None:
if grad_info.input is not None:
input_grads[output.id] = output._grad.input.id
grad_targets.append(output.id)
forward_ops, gradient_ops, _ = \
grad_maker.GradientMaker.make(
forward_ops=forward_ops,
targets=list(op_info._targets),
targets=grad_targets,
input_grads=input_grads,
)
else:
......
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.eager import context
from dragon.core.util import nest
......@@ -120,19 +120,12 @@ def gradients(ys, xs, grad_ys=None):
if grad_ys is not None:
y._grad.set_input(grad_ys[i])
for x in xs:
if not hasattr(x, '_grad') or \
x._grad is None:
if not hasattr(x, '_grad') or x._grad is None:
x._grad = GradientInfo(x)
y._grad.add_wrt(x.id)
x._grad.add_cost(y)
if i == 0:
dxs.append(
RefTensor(
name=x.id + '_grad',
shape=x.shape,
dtype=x.dtype,
)
)
dxs.append(TensorRef(x.id + '_grad', x.shape, x.dtype))
# Return the packed gradients.
return dxs
......@@ -15,7 +15,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.autograph import op_spec
from dragon.core.framework import context
from dragon.core.framework import proto_util
......@@ -76,26 +76,24 @@ class OpDef(object):
outputs = []
name_scope = context.get_name_scope()
for i in range(num_outputs):
outputs.append(RefTensor(
outputs.append(TensorRef(
workspace.get_dummy_name(
name_scope + (name if name else op_type),
suffix=':{}'.format(i),
domain='Tensor'))
)
domain='Tensor')))
else:
outputs = nest.flatten(outputs)
num_outputs = len(outputs)
# Construct Def.
op_idx, op_name = OpDef.get_index_and_name()
op_info._defs[op_idx] = \
proto_util.make_operator_def(
name=op_name,
op_type=op_type,
inputs=[input.id for input in inputs],
outputs=[output.id for output in outputs],
device_option=proto_util.get_default_device_option(),
**kwargs)
op_info._defs[op_idx] = proto_util.make_operator_def(
name=op_name,
op_type=op_type,
inputs=[input.id for input in inputs],
outputs=[output.id for output in outputs],
device_option=proto_util.get_default_device_option(),
**kwargs)
# Blend the op for outputs.
for output in outputs:
......
......@@ -147,7 +147,7 @@ def cast_spec(args, inputs, outputs):
outputs[0].dtype = args['dtype']
try:
outputs[0].shape = inputs[0].shape[:]
except TypeError:
except (TypeError, IndexError):
pass
return outputs
......@@ -192,7 +192,10 @@ def conv_spec(args, inputs, outputs):
out_shape = inputs[0].shape[:]
channel_axis = 1 if args['data_format'] == 'NCHW' else -1
spatial_axis = 2 if args['data_format'] == 'NCHW' else 1
out_shape[channel_axis] = args['num_output']
if 'out_channels' in args:
out_shape[channel_axis] = args['out_channels']
else:
out_shape[channel_axis] = inputs[1].shape[0]
for i in range(len(out_shape) - 2):
input_size = out_shape[i + spatial_axis]
k = args['kernel_shape'][i]
......@@ -219,7 +222,10 @@ def conv_transpose_spec(args, inputs, outputs):
out_shape = inputs[0].shape[:]
channel_axis = 1 if args['data_format'] == 'NCHW' else -1
spatial_axis = 2 if args['data_format'] == 'NCHW' else 1
out_shape[channel_axis] = args['num_output']
if 'out_channels' in args:
out_shape[channel_axis] = args['out_channels']
else:
out_shape[channel_axis] = inputs[1].shape[1]
for i in range(len(out_shape) - 2):
k = args['kernel_shape'][i]
s = args['strides'][i]
......@@ -274,20 +280,16 @@ def depth_to_space_spec(args, inputs, outputs):
@register('Dot')
def dot_spec(args, inputs, outputs):
outputs[0].dtype = inputs[0].dtype
ta, tb = args['transA'], args['transB']
try:
if len(inputs[0].shape) == 1:
a_shape, b_shape = inputs[0].shape[:], inputs[1].shape[:]
if len(a_shape) == 1 and len(b_shape) == 1:
outputs[0].shape = []
return outputs
except TypeError:
pass
try:
if len(inputs[0].shape) >= 2 and len(inputs[1].shape) in (1, 2):
out_shape = inputs[0].shape[1:] if ta else inputs[0].shape[:-1]
if len(inputs[1].shape) == 2:
out_shape.append(inputs[1].shape[0] if tb else inputs[1].shape[1])
outputs[0].shape = out_shape
return outputs
elif len(a_shape) == 2 and len(b_shape) == 2:
outputs[0].shape = [a_shape[0], b_shape[1]]
elif len(a_shape) == 0 and len(b_shape) == 0:
outputs[0].shape = []
elif len(a_shape) >= 2 and len(b_shape) == 1:
outputs[0].shape = a_shape[:-1]
except TypeError:
pass
return outputs
......@@ -298,6 +300,7 @@ def dot_spec(args, inputs, outputs):
'L1Loss',
'L2Loss',
'SigmoidCrossEntropy',
'SigmoidFocalLoss',
'SmoothL1Loss',
])
def eltwise_loss_spec(args, inputs, outputs):
......@@ -426,22 +429,22 @@ def flatten_spec(args, inputs, outputs):
@register('FullyConnected')
def fully_connected_spec(args, inputs, outputs):
outputs[0].dtype = inputs[0].dtype
axis, num_output = args['axis'], args['num_output']
axis, out_channels = args['axis'], args.get('out_channels', None)
while axis < 0:
try:
axis += len(inputs[0].shape)
except TypeError:
return outputs
outputs[0].shape = [None] * (axis + 1)
if num_output is None:
if out_channels is None:
try:
if args['transW']:
num_output = inputs[1].shape[0]
out_channels = inputs[1].shape[0]
else:
num_output = inputs[1].shape[1]
out_channels = inputs[1].shape[1]
except (TypeError, IndexError):
num_output = None
outputs[0].shape[axis] = num_output
out_channels = None
outputs[0].shape[axis] = out_channels
try:
outputs[0].shape[:axis] = inputs[0].shape[:axis]
except TypeError:
......@@ -488,7 +491,7 @@ def index_select_spec(args, inputs, outputs):
return outputs
@register(['IsInf', 'InNaN'])
@register(['IsInf', 'IsNaN'])
def is_spec(args, inputs, outputs):
_ = locals()
outputs[0].dtype = 'bool'
......@@ -507,7 +510,7 @@ def masked_select_spec(args, inputs, outputs):
return outputs
@register('Matmul')
@register('MatMul')
def matmul_spec(args, inputs, outputs):
outputs[0].dtype = inputs[0].dtype
ta, tb = args['transA'], args['transB']
......@@ -758,7 +761,7 @@ def resize_spec(args, inputs, outputs):
@register(['RoiPool', 'RoiAlign'])
def roi_pool_spec(args, inputs, outputs):
outputs[0].dtype = inputs[0].dtype
pool_h, pool_w = args['pool_h'], args['pool_w']
pool_h, pool_w = args['pooled_h'], args['pooled_w']
out_shape = None
try:
out_shape = inputs[0].shape[:]
......@@ -814,7 +817,6 @@ def slice_spec(args, inputs, outputs):
@register([
'NLLLoss',
'SigmoidFocalLoss',
'SoftmaxCrossEntropy',
'SparseSoftmaxCrossEntropy',
])
......
......@@ -420,7 +420,7 @@ class Tensor(types.TensorMetaclass):
The constant contains the value.
"""
return RefTensor('', dtype=dtype)._from_constant(value, name)
return Tensor('', dtype=dtype)._from_constant(value, name)
def _register_as(self, type, **kwargs):
"""Fill self with the specific type of filler."""
......@@ -463,13 +463,12 @@ class Tensor(types.TensorMetaclass):
"""Convert the value to a tensor."""
if not isinstance(value, numpy.ndarray):
value = numpy.array(value, self.dtype if self.dtype else 'float32')
return RefTensor(
return TensorRef(
name=workspace.get_dummy_name(
basename=context.get_name_scope() +
(name if name else 'Const'),
suffix=':0',
domain='Tensor'
),
domain='Tensor'),
shape=list(value.shape),
dtype=str(value.dtype),
).set_value(value)
......@@ -560,8 +559,8 @@ class Tensor(types.TensorMetaclass):
return self.__div__(other)
class RefTensor(object):
"""Create a reference tensor not involved with name scope."""
class TensorRef(object):
"""Create a reference not involved with name scope."""
def __new__(cls, name, shape=None, dtype=None):
tensor = Tensor('', shape=shape, dtype=dtype)
......
......@@ -9,7 +9,7 @@
#
# ------------------------------------------------------------
"""Some useful mappings are defined here."""
"""Constant mappings."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -104,7 +104,7 @@ class Operator(object):
"""Generate the OpDef from attributes."""
attributes = self.attributes()
self._def = proto_util.make_operator_cdef(
name='Generic',
name=attributes.get('name', 'GenericOp'),
cache_key=self._cache_key,
op_type=attributes['op_type'],
device_option=proto_util.get_device_option(
......
......@@ -134,10 +134,14 @@ def make_operator_cdef(
op_def = backend.OperatorDef()
op_def.ParseFrom(
make_operator_def(
op_type, inputs, outputs, name,
cache_key, device_option, arg, **kwargs
).SerializeToString()
)
op_type,
inputs,
outputs,
name,
cache_key,
device_option,
arg,
**kwargs).SerializeToString())
return op_def
......
......@@ -9,12 +9,7 @@
#
# ------------------------------------------------------------
"""Wrappers for the Workspace of C++ backend.
Flexible API is provided to manage the global resources
between the Python threads (quite different from C++).
"""
"""Generic interfaces of current default workspace."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -268,7 +268,7 @@ def leaky_relu(inputs, alpha=0.2, **kwargs):
@OpSchema.num_inputs(1)
def log_softmax(inputs, axis=1, **kwargs):
def log_softmax(inputs, axis=-1, **kwargs):
r"""Apply the composite of logarithm and softmax.
The **LogSoftmax** function is defined as:
......@@ -287,7 +287,7 @@ def log_softmax(inputs, axis=1, **kwargs):
----------
inputs : dragon.Tensor
The input tensor.
axis : int, optional, default=1
axis : int, optional, default=-1
The axis to reduce.
Returns
......@@ -351,7 +351,7 @@ def prelu(inputs, channel_shared=False, data_format='NCHW', **kwargs):
if context.executing_eagerly():
return op_lib \
.instantiate(data_format=data_format) \
.apply([inputs])
.apply(inputs)
else:
return op_lib.blend(**args)
......@@ -373,7 +373,7 @@ def relu(inputs, **kwargs):
Examples:
```python
x = dragon.constant([-1, 0, 1], 'float32')
x = dragon.constant([-1., 0., 1.])
print(dragon.nn.relu(x, inplace=False))
```
......@@ -449,10 +449,10 @@ def selu(inputs, alpha=1.67326, gamma=1.0507, **kwargs):
.. math::
\text{SELU}(x) = \gamma *
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\end{cases}
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -561,9 +561,8 @@ def softmax(inputs, axis=-1, **kwargs):
op_lib = activation_ops_lib.Softmax
if context.executing_eagerly():
return op_lib \
.instantiate(
axis=axis,
).apply([inputs], inplace=inplace)
.instantiate(axis=axis) \
.apply([inputs], inplace=inplace)
else:
return op_lib.blend(**args)
......
......@@ -64,11 +64,14 @@ def arange(start, stop=None, step=1, dtype='int64', **kwargs):
"""
args = parse_args(locals())
args['dtype'] = args['dtype'].lower()
op_lib = array_ops_lib.Arange
if stop is None:
args['slice'] = (start, step)
args['slice'] = (float(start), float(step))
else:
args['slice'] = (start, stop, step)
args['slice'] = (float(start), float(stop), float(step))
args.pop('start')
args.pop('stop')
args.pop('step')
op_lib = array_ops_lib.Arange
trainable = args.pop('trainable') if 'trainable' in args else False
if context.executing_eagerly():
return op_lib.instantiate(
......@@ -269,6 +272,8 @@ def cast(inputs, dtype, **kwargs):
.instantiate(dtype=dtype) \
.apply([inputs], inplace=inplace)
else:
if inputs.dtype == dtype:
return inputs
if inplace:
args['inputs'], args['outputs'] = [], [inputs]
return op_lib.blend(**args)
......@@ -627,16 +632,14 @@ def index_select(inputs, indices, axis=0, **kwargs):
return op_lib.blend(**args)
@OpSchema.num_inputs(1)
def masked_select(inputs, mask, **kwargs):
@OpSchema.num_inputs(2)
def masked_select(inputs, **kwargs):
"""Select the elements where the given mask is **1**.
Parameters
----------
inputs : dragon.Tensor
The input tensor.
mask : dragon.Tensor
The mask, with the same size as ``inputs``.
inputs : Sequence[dragon.Tensor]
The input and mask tensor.
Returns
-------
......@@ -647,9 +650,8 @@ def masked_select(inputs, mask, **kwargs):
args = parse_args(locals())
op_lib = array_ops_lib.MaskedSelect
if context.executing_eagerly():
return op_lib.instantiate().apply([inputs, mask])
return op_lib.instantiate().apply(inputs)
else:
args['inputs'] = [args['inputs'], args.pop('mask')]
return op_lib.blend(**args)
......@@ -1047,7 +1049,7 @@ def pad(inputs, pads, mode='constant', value=0, **kwargs):
.instantiate(
ndim=len(pads_begin),
value=args['value'],
mode=mode,
mode=args['mode'],
).apply([inputs], args['pads'])
else:
return op_lib.blend(**args)
......@@ -1278,7 +1280,9 @@ def split(
size_splits = None
if slice_points is not None:
if len(slice_points) + 1 != num_splits:
raise ValueError('Excepted %d values for <slice_points>.')
raise ValueError(
'Excepted %d values for <slice_points>.'
% len(slice_points))
if context.executing_eagerly():
return op_lib \
.instantiate(
......
......@@ -61,38 +61,36 @@ def assign(inputs, starts=None, sizes=None, **kwargs):
@OpSchema.num_inputs(1, 2)
def copy(inputs, **kwargs):
r"""Copy the value to ref.
.. math:: \text{Ref}[:] = \text{Value}[:]
"""Copy the input.
Examples:
```python
# Copy the content from ``x`` to ``xx``
# Copy ``x`` to ``y``
x = dragon.ones(shape=(2, 3))
xx = dragon.zeros(shape=(2, 4))
dragon.copy([xx, x])
y = dragon.zeros(shape=(2, 4))
dragon.copy([x, y])
# Create a new tensor initialized from ``x``
xxx = dragon.copy(x)
# Copy to a new tensor from ``x``
y = dragon.copy(x)
```
Parameters
----------
inputs : Sequence[dragon.Tensor]
The **ref** and **value**.
inputs : Union[dragon.Tensor, Sequence[dragon.Tensor]]
The input tensor.
Returns
-------
dragon.Tensor
The **ref**.
The output tensor.
"""
args = parse_args(locals())
inputs = nest.flatten(inputs)
if len(inputs) == 2:
args['inputs'] = [inputs[1]]
args['outputs'] = [inputs[0]]
args['inputs'] = nest.flatten(inputs)
if len(args['inputs']) == 2:
args['outputs'] = [args['inputs'][1]]
args['inputs'] = [args['inputs'][0]]
else:
args['outputs'] = None
op_lib = control_flow_ops_lib.Copy
......@@ -104,8 +102,8 @@ def copy(inputs, **kwargs):
return op_lib.blend('Copy', **args)
@OpSchema.num_inputs(2)
def masked_assign(inputs, mask, **kwargs):
@OpSchema.num_inputs(3)
def masked_assign(inputs, **kwargs):
r"""Assign the value to ref where mask is **1**.
.. math::
......@@ -118,24 +116,22 @@ def masked_assign(inputs, mask, **kwargs):
Parameters
----------
inputs : Sequence[dragon.Tensor]
The **ref** and **value**.
mask : dragon.Tensor
The mask, with the same size as **ref**.
The **ref**, **value** and **mask** tensor.
Returns
-------
dragon.Tensor
The **ref**.
The **ref** tensor..
"""
args = parse_args(locals())
inputs[1] = ops.scalar_to_tensor(inputs[1], inputs[0].dtype)
op_lib = control_flow_ops_lib.MaskedAssign
if context.executing_eagerly():
return op_lib.instantiate().apply(inputs, mask)
return op_lib.instantiate().apply(inputs)
else:
args.update({
'outputs': [args['inputs'][0]],
'inputs': [args['inputs'][1], mask],
'inputs': [args['inputs'][1:]],
})
return op_lib.blend(**args)
......@@ -47,7 +47,7 @@ class Assign(Operator):
sizes[i], 'int64',
)
def forward(self, ws, inputs, starts, sizes):
def forward(self, inputs, starts, sizes):
return self.dispatch(
[inputs[1]], [inputs[0]],
callback=lambda ws, handle:
......@@ -75,5 +75,5 @@ class MaskedAssign(Operator):
def attributes(self):
return {'op_type': 'MaskedAssign', 'arguments': {}}
def forward(self, inputs, mask):
return self.dispatch([inputs[1], mask], [inputs[0]], no_grad=True)
def forward(self, inputs):
return self.dispatch(inputs[1:], [inputs[0]], no_grad=True)
......@@ -88,9 +88,7 @@ def l1_loss(inputs, reduction='mean', **kwargs):
op_lib = loss_ops_lib.L1Loss
if context.executing_eagerly():
return op_lib \
.instantiate(
reduction=args['reduction'],
).apply(inputs)
.instantiate(reduction=args['reduction']).apply(inputs)
else:
return op_lib.blend(**args)
......
......@@ -46,55 +46,13 @@ def abs(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Abs').apply(inputs)
return op_lib.instantiate(op_type='Abs').apply([inputs])
else:
return op_lib.blend('Abs', **args)
@OpSchema.num_inputs(1, 2147483647)
def accumulate(inputs, outputs=None, alpha=1., beta=1., **kwargs):
r"""Compute the element-wise accumulation from input to output.
.. math:: y = \alpha x + \beta y
If ``outputs`` is not provided, **zeros** will be used instead.
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor :math:`x`.
outputs : Sequence[dragon.Tensor], optional
The tensor :math:`y`.
alpha : number, optional, default=1.
The value of :math:`\alpha`.
beta : number, optional, default=1.
The value of :math:`\beta`.
Returns
-------
Sequence[dragon.Tensor]
The tensor :math:`y`.
"""
args = parse_args(locals())
args['alpha'], args['beta'] = float(alpha), float(beta)
if types.is_tensor(inputs):
inputs = [inputs]
if outputs is not None and types.is_tensor(outputs):
args['outputs'] = [outputs]
op_lib = math_ops_lib.Accumulate
if context.executing_eagerly():
return op_lib \
.instantiate(
alpha=args['alpha'],
beta=args['beta'],
).apply(inputs, args['outputs'])
else:
return op_lib.blend(**args)
@OpSchema.num_inputs(2)
def add(inputs, **kwargs):
r"""Compute the element-wise addition.
......@@ -123,11 +81,9 @@ def add(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Add') \
.apply(inputs)
return op_lib.instantiate(op_type='Add').apply(inputs)
else:
return op_lib.blend('Add', **args)
......@@ -173,6 +129,48 @@ def affine(inputs, axis=1, num_axes=1, **kwargs):
return op_lib.blend(**args)
@OpSchema.num_inputs(1, 2147483647)
def axpby(inputs, outputs=None, alpha=1., beta=1., **kwargs):
r"""Compute the element-wise addition from input to output.
.. math:: y = \alpha x + \beta y
If ``outputs`` is not provided, **zeros** will be used instead.
Parameters
----------
inputs : Union[dragon.Tensor, Sequence[dragon.Tensor]]
The tensor :math:`x`.
outputs : Union[dragon.Tensor, Sequence[dragon.Tensor]], optional
The tensor :math:`y`.
alpha : number, optional, default=1.
The value of :math:`\alpha`.
beta : number, optional, default=1.
The value of :math:`\beta`.
Returns
-------
Union[dragon.Tensor, Sequence[dragon.Tensor]]
The tensor :math:`y`.
"""
args = parse_args(locals())
args['alpha'], args['beta'] = float(alpha), float(beta)
if types.is_tensor(inputs):
inputs = [inputs]
if outputs is not None and types.is_tensor(outputs):
args['outputs'] = [outputs]
op_lib = math_ops_lib.Axpby
if context.executing_eagerly():
return op_lib \
.instantiate(
alpha=args['alpha'],
beta=args['beta'],
).apply(inputs, args['outputs'])
else:
return op_lib.blend(**args)
@OpSchema.num_inputs(2)
def bitwise_and(inputs, **kwargs):
r"""Compute the element-wise AND bitwise operation.
......@@ -285,9 +283,9 @@ def ceil(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Ceil').apply(inputs)
return op_lib.instantiate(op_type='Ceil').apply([inputs])
else:
return op_lib.blend('Ceil', **args)
......@@ -324,7 +322,7 @@ def clip(inputs, low=None, high=None, **kwargs):
.instantiate(
low=args['low'],
high=args['high'],
).apply(inputs)
).apply([inputs])
else:
return op_lib.blend(**args)
......@@ -354,11 +352,9 @@ def cos(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Cos') \
.apply(inputs)
return op_lib.instantiate(op_type='Cos').apply([inputs])
else:
return op_lib.blend('Cos', **args)
......@@ -391,56 +387,48 @@ def div(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Div') \
.apply(inputs)
return op_lib.instantiate(op_type='Div').apply(inputs)
else:
return op_lib.blend('Div', **args)
@OpSchema.num_inputs(2)
def dot(inputs, transA=False, transB=False, **kwargs):
def dot(inputs, **kwargs):
r"""Compute the dot product.
.. math:: \text{out} = a \cdot b
If ``rank(a)`` == ``rank(b)`` == 1, computes **Vector Dot**:
If ``rank(a)`` == ``rank(b)`` == 1, compute vector product:
```python
x = dragon.ones((4,))
y = dragon.ones((4,))
print(dragon.math.dot([x, y])) # 4.0
x = dragon.ones((2,))
y = dragon.ones((2,))
print(dragon.math.dot([x, y])) # 2.0
```
If ``rank(a)`` >= 2, ``rank(b)`` == 2, computes **Matrix-Matrix Multiplication**:
If ``rank(a)`` == ``rank(b)`` == 2, compute matrix multiplication:
```python
x = dragon.ones((1, 2, 3))
x = dragon.ones((2, 3))
y = dragon.ones((3, 2))
print(dragon.math.dot([x, y])) # [[[3. 3.], [3. 3.]]]
print(dragon.math.dot([x.reshape((2, 3)), y]).reshape((1, 2, 2))) # Equivalent
print(dragon.math.matmul([x.reshape((2, 3)), y]).reshape((1, 2, 2))) # Equivalent
print(dragon.math.matmul([x, y])) # Equivalent
```
If ``rank(a)`` >= 2, ``rank(b)`` == 1, computes **Matrix-Vector Multiplication**:
If ``rank(a)`` >= 2, ``rank(b)`` == 1, compute matrix-vector multiplication:
```python
x = dragon.ones((1, 2, 3))
x = dragon.ones((2, 3))
y = dragon.ones((3,))
print(dragon.math.dot([x, y])) # [[3. 3.]]
print(dragon.math.dot([x.reshape((2, 3)), y]).reshape((1, 2))) # Equivalent
```
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor :math:`a` and :math:`b`.
transA : bool, optional, default=False
**True** to transpose :math:`a` before computation.
transB : bool, optional, default=False
**True** to transpose :math:`b` before computation.
Returns
-------
......@@ -449,15 +437,11 @@ def dot(inputs, transA=False, transB=False, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Dot
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(
transA=transA,
transB=transB,
).apply(inputs)
return op_lib.instantiate(op_type='Dot').apply(inputs)
else:
return op_lib.blend(**args)
return op_lib.blend('Dot', **args)
@OpSchema.num_inputs(2)
......@@ -489,11 +473,9 @@ def equal(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Equal') \
.apply(inputs)
return op_lib.instantiate(op_type='Equal').apply(inputs)
else:
return op_lib.blend('Equal', **args)
......@@ -523,11 +505,9 @@ def exp(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Exp') \
.apply(inputs)
return op_lib.instantiate(op_type='Exp').apply([inputs])
else:
return op_lib.blend('Exp', **args)
......@@ -557,38 +537,36 @@ def floor(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Floor').apply(inputs)
return op_lib.instantiate(op_type='Floor').apply([inputs])
else:
return op_lib.blend('Floor', **args)
@OpSchema.num_inputs(2, 3)
def fully_connected(inputs, num_output=None, axis=1, transW=True, **kwargs):
def fully_connected(inputs, axis=1, transpose_w=True, **kwargs):
r"""Compute the dense matrix multiplication along the given axes.
.. math:: y = Wx + b
The column of input matrix is determined by:
.. math:: \text{Col} = \text{Dim}(\text{Input}, \text{Axis})
.. math:: \text{Col} = \text{DimSince}(\text{Input}, \text{Axis})
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor :math:`x`, :math:`W` and :math:`b`.
num_output : int, optional
The optional output dim.
axis : int, optional, default=1
The start axis to compute, can be negative.
transW : bool, optional, default=True
transpose_w : bool, optional, default=True
**True** to transpose :math:`W` before computation.
Returns
-------
dragon.Tensor
The **y**.
The output tensor.
"""
args = parse_args(locals())
......@@ -597,9 +575,11 @@ def fully_connected(inputs, num_output=None, axis=1, transW=True, **kwargs):
return op_lib \
.instantiate(
axis=axis,
transW=transW,
transpose_w=transpose_w,
).apply(inputs)
else:
args.pop('transpose_w')
args['transW'] = transpose_w
return op_lib.blend('FullyConnected', **args)
......@@ -631,12 +611,10 @@ def greater(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
inputs = ops.remove_binary_scalar(inputs)
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Greater') \
.apply(inputs)
return op_lib.instantiate(op_type='Greater').apply(inputs)
else:
return op_lib.blend('Greater', **args)
......@@ -670,11 +648,9 @@ def greater_equal(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='GreaterEqual') \
.apply(inputs)
return op_lib.instantiate(op_type='GreaterEqual').apply(inputs)
else:
return op_lib.blend('GreaterEqual', **args)
......@@ -709,9 +685,9 @@ def invert(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Invert').apply(inputs)
return op_lib.instantiate(op_type='Invert').apply([inputs])
else:
return op_lib.blend('Invert', **args)
......@@ -741,11 +717,9 @@ def is_inf(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='IsInf') \
.apply(inputs)
return op_lib.instantiate(op_type='IsInf').apply([inputs])
else:
return op_lib.blend('IsInf', **args)
......@@ -775,11 +749,9 @@ def is_nan(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='IsNaN') \
.apply(inputs)
return op_lib.instantiate(op_type='IsNaN').apply([inputs])
else:
return op_lib.blend('IsNaN', **args)
......@@ -809,11 +781,9 @@ def log(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Log') \
.apply(inputs)
return op_lib.instantiate(op_type='Log').apply([inputs])
else:
return op_lib.blend('Log', **args)
......@@ -847,11 +817,9 @@ def less(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Less') \
.apply(inputs)
return op_lib.instantiate(op_type='Less').apply(inputs)
else:
return op_lib.blend('Less', **args)
......@@ -885,17 +853,15 @@ def less_equal(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='LessEqual') \
.apply(inputs)
return op_lib.instantiate(op_type='LessEqual').apply(inputs)
else:
return op_lib.blend('LessEqual', **args)
@OpSchema.num_inputs(2)
def matmul(inputs, transA=False, transB=False, **kwargs):
def matmul(inputs, transpose_a=False, transpose_b=False, **kwargs):
r"""Compute the matrix multiplication.
.. math:: \text{out} = a \times b
......@@ -920,16 +886,16 @@ def matmul(inputs, transA=False, transB=False, **kwargs):
a = dragon.ones((3, 2), 'float32')
b = dragon.ones((3, 3), 'float32')
print(dragon.math.matmul([a, b])) # ``a`` takes the wrong dimensions
print(dragon.math.matmul([a, b], transA=True)) # Ok
print(dragon.math.matmul([a, b], transpose_a=True)) # Ok
```
Parameters
----------
inputs : Sequence[dragon.Tensor]
The matrix :math:`a` and :math:`b`.
transA : bool, optional, default=False
transpose_a : bool, optional, default=False
**True** to transpose :math:`a` before computation.
transB : bool, optional, default=False
transpose_b : bool, optional, default=False
**True** to transpose :math:`b` before computation.
Returns
......@@ -939,15 +905,17 @@ def matmul(inputs, transA=False, transB=False, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Matmul
op_lib = math_ops_lib.MatMul
if context.executing_eagerly():
return op_lib \
.instantiate(
transA=transA,
transB=transB,
transpose_a=transpose_a,
transpose_b=transpose_b,
).apply(inputs)
else:
return op_lib.blend(**args)
args.pop('transpose_a')
args.pop('transpose_b')
return op_lib.blend(transA=transpose_a, transB=transpose_b, **args)
@OpSchema.num_inputs(2)
......@@ -969,11 +937,9 @@ def maximum(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Maximum') \
.apply(inputs)
return op_lib.instantiate(op_type='Maximum').apply(inputs)
else:
return op_lib.blend('Maximum', **args)
......@@ -997,37 +963,13 @@ def minimum(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Minimum') \
.apply(inputs)
return op_lib.instantiate(op_type='Minimum').apply(inputs)
else:
return op_lib.blend('Minimum', **args)
@OpSchema.num_inputs(1, 2147483647)
def moving_average(inputs, decay, **kwargs):
r"""Compute the moving average of input to output.
.. math:: y = (1 - decay) * x + decay * y
Parameters
----------
inputs : Sequence[dragon.Tensor]
The **x**.
decay : float, required
The decay factor.
Returns
-------
Sequence[dragon.Tensor]
The **y**.
"""
return accumulate(inputs, 1. - decay, decay, **kwargs)
@OpSchema.num_inputs(2)
def mul(inputs, **kwargs):
r"""Compute the element-wise multiplication.
......@@ -1056,11 +998,9 @@ def mul(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Mul') \
.apply(inputs)
return op_lib.instantiate(op_type='Mul').apply(inputs)
else:
return op_lib.blend('Mul', **args)
......@@ -1088,11 +1028,9 @@ def negative(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Neg') \
.apply(inputs)
return op_lib.instantiate(op_type='Neg').apply([inputs])
else:
return op_lib.blend('Neg', **args)
......@@ -1126,11 +1064,9 @@ def not_equal(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='NotEqual') \
.apply(inputs)
return op_lib.instantiate(op_type='NotEqual').apply(inputs)
else:
return op_lib.blend('NotEqual', **args)
......@@ -1163,11 +1099,9 @@ def pow(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Pow') \
.apply(inputs)
return op_lib.instantiate(op_type='Pow').apply(inputs)
else:
return op_lib.blend('Pow', **args)
......@@ -1197,11 +1131,9 @@ def reciprocal(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Reciprocal') \
.apply(inputs)
return op_lib.instantiate(op_type='Reciprocal').apply([inputs])
else:
return op_lib.blend('Reciprocal', **args)
......@@ -1231,9 +1163,9 @@ def round(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Round').apply(inputs)
return op_lib.instantiate(op_type='Round').apply([inputs])
else:
return op_lib.blend('Round', **args)
......@@ -1263,11 +1195,9 @@ def rsqrt(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Rsqrt') \
.apply(inputs)
return op_lib.instantiate(op_type='Rsqrt').apply([inputs])
else:
return op_lib.blend('Rsqrt', **args)
......@@ -1303,9 +1233,9 @@ def sign(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib.instantiate(op_type='Sign').apply(inputs)
return op_lib.instantiate(op_type='Sign').apply([inputs])
else:
return op_lib.blend('Sign', **args)
......@@ -1335,11 +1265,9 @@ def sin(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Sin') \
.apply(inputs)
return op_lib.instantiate(op_type='Sin').apply([inputs])
else:
return op_lib.blend('Sin', **args)
......@@ -1369,11 +1297,9 @@ def sqrt(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Sqrt') \
.apply(inputs)
return op_lib.instantiate(op_type='Sqrt').apply([inputs])
else:
return op_lib.blend('Sqrt', **args)
......@@ -1403,11 +1329,9 @@ def square(inputs, **kwargs):
"""
args = parse_args(locals())
op_lib = math_ops_lib.Unary
op_lib = math_ops_lib.UnaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Square') \
.apply(inputs)
return op_lib.instantiate(op_type='Square').apply([inputs])
else:
return op_lib.blend('Square', **args)
......@@ -1440,10 +1364,8 @@ def sub(inputs, **kwargs):
"""
args = parse_args(locals())
inputs = ops.remove_binary_scalar(inputs)
op_lib = math_ops_lib.Binary
op_lib = math_ops_lib.BinaryOp
if context.executing_eagerly():
return op_lib \
.instantiate(op_type='Sub') \
.apply(inputs)
return op_lib.instantiate(op_type='Sub').apply(inputs)
else:
return op_lib.blend('Sub', **args)
......@@ -16,49 +16,49 @@ from __future__ import print_function
from dragon.core.framework.ops import Operator
class Accumulate(Operator):
class Affine(Operator):
def __init__(self, key, dev, **kwargs):
super(Accumulate, self).__init__(key, dev, **kwargs)
self.alpha = kwargs.get('alpha', 1.)
self.beta = kwargs.get('beta', 1.)
super(Affine, self).__init__(key, dev, **kwargs)
self.axis = kwargs.get('axis', 1)
self.num_axes = kwargs.get('num_axes', 1)
def attributes(self):
return {
'op_type': 'Accumulate',
'op_type': 'Affine',
'arguments': {
'alpha': self.alpha,
'beta': self.beta,
'axis': self.axis,
'num_axes': self.num_axes,
}
}
def forward(self, inputs, outputs=None):
if outputs is None:
outputs = [self.alloc() for _ in range(len(inputs))]
return self.dispatch(inputs, outputs, no_grad=True)
def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()])
class Affine(Operator):
class Axpby(Operator):
def __init__(self, key, dev, **kwargs):
super(Affine, self).__init__(key, dev, **kwargs)
self.axis = kwargs.get('axis', 1)
self.num_axes = kwargs.get('num_axes', 1)
super(Axpby, self).__init__(key, dev, **kwargs)
self.alpha = kwargs.get('alpha', 1.)
self.beta = kwargs.get('beta', 1.)
def attributes(self):
return {
'op_type': 'Affine',
'op_type': 'Axpby',
'arguments': {
'axis': self.axis,
'num_axes': self.num_axes,
'alpha': self.alpha,
'beta': self.beta,
}
}
def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()])
def forward(self, inputs, outputs=None):
if outputs is None:
outputs = [self.alloc() for _ in range(len(inputs))]
return self.dispatch(inputs, outputs, no_grad=True)
class Binary(Operator):
class BinaryOp(Operator):
def __init__(self, key, dev, **kwargs):
super(Binary, self).__init__(key, dev, **kwargs)
super(BinaryOp, self).__init__(key, dev, **kwargs)
self.op_type = kwargs.get('op_type', '')
def attributes(self):
......@@ -95,37 +95,18 @@ class Clip(Operator):
return self.dispatch(inputs, [self.alloc()])
class Dot(Operator):
def __init__(self, key, dev, **kwargs):
super(Dot, self).__init__(key, dev, **kwargs)
self.transA = kwargs.get('transA', False)
self.transB = kwargs.get('transB', False)
def attributes(self):
return {
'op_type': 'Dot',
'arguments': {
'transA': self.transA,
'transB': self.transB,
}
}
def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()])
class FullyConnected(Operator):
def __init__(self, key, dev, **kwargs):
super(FullyConnected, self).__init__(key, dev, **kwargs)
self.axis = kwargs.get('axis', 1)
self.transW = kwargs.get('transW', True)
self.transpose_w = kwargs.get('transpose_w', True)
def attributes(self):
return {
'op_type': 'FullyConnected',
'arguments': {
'axis': self.axis,
'transW': self.transW,
'transW': self.transpose_w,
}
}
......@@ -133,18 +114,18 @@ class FullyConnected(Operator):
return self.dispatch(inputs, [self.alloc()])
class Matmul(Operator):
class MatMul(Operator):
def __init__(self, key, dev, **kwargs):
super(Matmul, self).__init__(key, dev, **kwargs)
self.transA = kwargs.get('transA', False)
self.transB = kwargs.get('transB', False)
super(MatMul, self).__init__(key, dev, **kwargs)
self.transpose_a = kwargs.get('transpose_a', False)
self.transpose_b = kwargs.get('transpose_b', False)
def attributes(self):
return {
'op_type': 'Matmul',
'op_type': 'MatMul',
'arguments': {
'transA': self.transA,
'transB': self.transB,
'transA': self.transpose_a,
'transB': self.transpose_b,
}
}
......@@ -152,9 +133,9 @@ class Matmul(Operator):
return self.dispatch(inputs, [self.alloc()])
class Unary(Operator):
class UnaryOp(Operator):
def __init__(self, key, dev, **kwargs):
super(Unary, self).__init__(key, dev, **kwargs)
super(UnaryOp, self).__init__(key, dev, **kwargs)
self.op_type = kwargs.get('op_type', '')
def attributes(self):
......
......@@ -38,10 +38,10 @@ def batch_norm(
.. math::
y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
The moving average of stats are calculated as:
The running average of statistics are calculated as:
.. math::
x_{moving} \leftarrow momentum * x_{moving} + (1 - momentum) * x_{stat}
x_{\text{running}} = \text{momentum} * x_{\text{running}} + (1 - \text{momentum}) * x_{\text{stat}}
Note that the number of inputs should be **5**, i.e.,
this operators is implemented into the fused version.
......@@ -56,11 +56,11 @@ def batch_norm(
axis : int, optional, default=-1
The channel axis.
momentum : float, optional, default=0.9
The momentum of moving average.
The momentum for running average.
eps : float, optional, default=1e-5
The epsilon.
The value of :math:`\epsilon`.
use_stats : int, optional, default=-1
Whether to use global stats.
Whether to use estimated statistics or not.
Returns
-------
......@@ -168,7 +168,7 @@ def instance_norm(inputs, axis=-1, eps=1e-5, **kwargs):
@OpSchema.num_inputs(1)
def lp_normalize(inputs, axis=None, p=2, eps=1e-5, reduction='sum', **kwargs):
def lp_normalize(inputs, axis=None, p=2, eps=1e-12, reduction='sum', **kwargs):
r"""Apply the lp normalization.
The **Lp-Normalization** is defined as:
......@@ -200,7 +200,7 @@ def lp_normalize(inputs, axis=None, p=2, eps=1e-5, reduction='sum', **kwargs):
The order of the normalization.
axis : Union[int, Sequence[int]], optional
The axis to compute the norm.
eps : float, optional, default=1e-5
eps : float, optional, default=1e-12
The value of :math:`\epsilon`.
reduction : {'sum', 'mean'}, optional
The reduction method for norm.
......@@ -326,9 +326,9 @@ def local_response_norm(
beta=args['beta'],
bias=args['bias'],
data_format=data_format,
).apply(inputs)
).apply([inputs])
else:
return op_lib.blend(**args)
return op_lib.blend('LRN', **args)
@OpSchema.num_inputs(5)
......@@ -349,10 +349,10 @@ def sync_batch_norm(
.. math::
\text{out} = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
The moving average of statistics are calculated as:
The running average of statistics are calculated as:
.. math::
x_{moving} \leftarrow momentum * x_{moving} + (1 - momentum) * x_{stat}
x_{\text{running}} = \text{momentum} * x_{\text{running}} + (1 - \text{momentum}) * x_{\text{stat}}
Note that the number of inputs should be **5**, i.e.,
this operators is implemented into the fused version.
......@@ -367,11 +367,11 @@ def sync_batch_norm(
axis : int, optional, default=-1
The channel axis.
momentum : float, optional, default=0.9
The momentum of moving average.
The momentum for average.
eps : float, optional, default=1e-5
The epsilon.
The value of :math:`\epsilon`.
use_stats : int, optional, default=-1
Whether to use global stats.
Whether to use estimated statistics or not.
process_group : ProcessGroup, optional
The group for communication.
......
......@@ -83,7 +83,7 @@ class LpNormalize(Operator):
}
}
def forward(self, inputs):
def forward(self,inputs):
return self.dispatch(inputs, [self.alloc()])
......@@ -94,6 +94,7 @@ class LocalResponseNorm(Operator):
self.alpha = kwargs.get('alpha', 0.0001)
self.beta = kwargs.get('beta', 0.75)
self.bias = kwargs.get('bias', 1.)
self.data_format = kwargs.get('data_format', 'NCHW')
def attributes(self):
return {
......@@ -103,6 +104,7 @@ class LocalResponseNorm(Operator):
'alpha': self.alpha,
'beta': self.beta,
'bias': self.bias,
'data_format': self.data_format,
}
}
......
......@@ -69,9 +69,7 @@ def astype(self, dtype, inplace=False):
"""
return array_ops_lib.Cast \
.instantiate(
dtype=dtype,
).apply([self], inplace)
.instantiate(dtype=dtype).apply([self], inplace)
def constant(self, value=0):
......@@ -701,7 +699,7 @@ def uniform(self, low=0, high=1):
def _binary_op(a, b, op_type, outputs=None):
"""Apply the general binary operation."""
return math_ops_lib.Binary \
return math_ops_lib.BinaryOp \
.instantiate(op_type=op_type) \
.apply(ops.remove_binary_scalar([a, b]), outputs)
......@@ -710,7 +708,7 @@ def _masked_assign(ref, value, mask):
"""Apply the mask-assign operation."""
value = ops.scalar_to_tensor(value, ref.dtype)
return control_flow_ops_lib.MaskedAssign \
.instantiate().apply([ref, value], mask)
.instantiate().apply([ref, value, mask])
def _masked_select(x, mask):
......@@ -764,22 +762,20 @@ def _section_assign(ref, value, starts, sizes):
"""Apply the section-assign operation."""
value = ops.scalar_to_tensor(value, ref.dtype)
return control_flow_ops_lib.Assign \
.instantiate(
ndim=len(starts) if starts is not None else 0,
).apply([ref, value], starts, sizes)
.instantiate(ndim=len(starts) if starts is not None else 0) \
.apply([ref, value], starts, sizes)
def _section_select(x, starts, sizes):
"""Apply the section-select operation."""
return array_ops_lib.Slice \
.instantiate(
ndim=len(starts),
).apply([x], starts, sizes)
.instantiate(ndim=len(starts)).apply([x], starts, sizes)
def _unary_op(x, op_type):
"""Apply the general unary operation."""
return math_ops_lib.Unary.instantiate(op_type=op_type).apply(x)
return math_ops_lib.UnaryOp \
.instantiate(op_type=op_type).apply([x])
# Aliases
......@@ -801,13 +797,15 @@ EagerTensor.__iadd__ = iadd
EagerTensor.__idiv__ = idiv
EagerTensor.__imul__ = imul
EagerTensor.__isub__ = isub
EagerTensor.__itruediv__ = idiv
EagerTensor.__le__ = le
EagerTensor.__lt__ = lt
EagerTensor.__mul__ = mul
EagerTensor.__neg__ = neg
EagerTensor.__radd__ = radd
EagerTensor.__rdiv__ = rdiv
EagerTensor.__rmul__ = rmul
EagerTensor.__rtruediv__ = rdiv
EagerTensor.__rsub__ = rsub
EagerTensor.__rtruediv__ = rdiv
EagerTensor.__setitem__ = setitem
EagerTensor.__sub__ = sub
......@@ -66,7 +66,9 @@ def astype(self, dtype, inplace=False):
`dragon.cast(...)`_ : Cast the data type of input.
"""
inputs, outputs = ([], [self]) if inplace else ([self], [])
if self.dtype == dtype:
return self
inputs, outputs = ([], [self]) if inplace else ([self], None)
return OpDef.apply('Cast', inputs, outputs, dtype=dtype)
......
......@@ -19,18 +19,18 @@ from dragon.core.framework.ops import Operator
class ParamUpdate(Operator):
def __init__(self, key, dev, **kwargs):
super(ParamUpdate, self).__init__(key, dev, **kwargs)
self.op_type = kwargs.get('op_type', 'ParamUpdate')
self.lr_mult = kwargs.get('lr_mult', 1.)
self.decay_mult = kwargs.get('decay_mult', 1.)
self.slot = kwargs.get('slot', '')
self.op_type = kwargs.get('op_type', '')
self.op_handle = kwargs.get('op_handle', '')
self.lr_mult = kwargs.get('lr_mult', 1)
self.decay_mult = kwargs.get('decay_mult', 1)
def attributes(self):
return {
'name': self.op_handle,
'op_type': self.op_type,
'arguments': {
'lr_mult': self.lr_mult,
'decay_mult': self.decay_mult,
'slot': self.slot,
'lr_mult': float(self.lr_mult),
'decay_mult': float(self.decay_mult),
},
}
......
......@@ -28,7 +28,7 @@ def bias_add(inputs, data_format='NCHW', **kwargs):
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor ``input`` and ``bias``.
The ``input`` and ``bias``.
data_format : {'NCHW', 'NHWC'}, optional
The optional data format.
......@@ -53,7 +53,6 @@ def bias_add(inputs, data_format='NCHW', **kwargs):
@OpSchema.num_inputs(2, 3)
def conv2d(
inputs,
num_output=None,
kernel_shape=3,
strides=1,
pads=0,
......@@ -65,24 +64,12 @@ def conv2d(
):
r"""Apply the 2d convolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
Set ``padding`` to **VALID** will use the value of ``pads``.
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor ``x``, ``weight`` and ``bias``.
num_output : int, optional
The optional number of output channels.
kernel_shape : Sequence[int], optional, default=3
The shape of convolution kernel.
strides : Sequence[int], optional, default=1
......@@ -114,9 +101,7 @@ def conv2d(
args[key] = _normalize_pads(args[key], 2)
else:
args[key] = _normalize_tuple(args[key], 2)
op_lib = vision_ops_lib.Conv2d
if context.executing_eagerly():
weight_shape = inputs[1].shape
return op_lib \
......@@ -142,7 +127,6 @@ def conv2d(
@ArgHelper.repeated_desc('output_shape')
def conv2d_transpose(
inputs,
num_output=None,
kernel_shape=3,
strides=1,
pads=0,
......@@ -156,24 +140,12 @@ def conv2d_transpose(
):
r"""Apply the 2d deconvolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} - 1) *
stride + \text{DK}_{size} - 2 * pad
\end{cases}
Set ``padding`` to **VALID** will use the value of ``pads``.
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor ``x``, ``weight`` and ``bias``.
num_output : int, optional
The optional number of output channels.
kernel_shape : Sequence[int], optional, default=3
The shape of convolution kernel.
strides : Sequence[int], optional, default=1
......@@ -200,7 +172,6 @@ def conv2d_transpose(
"""
args = parse_args(locals())
if padding not in ('VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'):
raise ValueError('Unsupported padding algorithm: %s' % padding)
if data_format not in ('NCHW', 'NHWC'):
......@@ -212,9 +183,7 @@ def conv2d_transpose(
args[key] = _normalize_pads(args[key], 2)
else:
args[key] = _normalize_tuple(args[key], 2)
op_lib = vision_ops_lib.ConvTranspose2d
if context.executing_eagerly():
weight_shape = inputs[1].shape
return op_lib \
......@@ -240,7 +209,6 @@ def conv2d_transpose(
@OpSchema.num_inputs(2, 3)
def depthwise_conv2d(
inputs,
num_output=None,
kernel_shape=3,
strides=1,
pads=0,
......@@ -252,24 +220,12 @@ def depthwise_conv2d(
r"""Apply the 2d depthwise convolution.
`[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
Set ``padding`` to **VALID** will use the value of ``pads``.
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor ``x``, ``weight`` and ``bias``.
num_output : int, optional
The optional number of output channels.
kernel_shape : Sequence[int], optional, default=3
The size(s) of convolution kernel.
strides : Sequence[int], optional, default=1
......@@ -290,7 +246,6 @@ def depthwise_conv2d(
"""
args = parse_args(locals())
if padding not in ('VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'):
raise ValueError('Unsupported padding algorithm: %s' % padding)
if data_format not in ('NCHW', 'NHWC'):
......@@ -300,9 +255,7 @@ def depthwise_conv2d(
args[key] = _normalize_pads(args[key], 2)
else:
args[key] = _normalize_tuple(args[key], 2)
op_lib = vision_ops_lib.DepthwiseConv2d
if context.executing_eagerly():
weight_shape = inputs[1].shape
return op_lib \
......@@ -361,7 +314,7 @@ def depth_to_space(inputs, block_size, data_format='NCHW', **kwargs):
.instantiate(
block_size=block_size,
data_format=data_format,
).apply(inputs)
).apply([inputs])
else:
return op_lib.blend(**args)
......@@ -374,19 +327,13 @@ def pool2d(
pads=0,
padding='VALID',
ceil_mode=False,
mode='MAX',
mode='max',
data_format='NCHW',
global_pooling=False,
**kwargs
):
r"""Apply the 2d pooling.
The spatial output dimension is computed as:
.. math::
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{K}_{size}) / stride + 1
Set ``padding`` to **VALID** will use the value of ``pads``.
If ``global_pooling`` is **True**, ``strides`` and ``pads`` will be set to **1** and **0**.
......@@ -410,7 +357,7 @@ def pool2d(
data_format : {'NCHW', 'NHWC'}, optional
The optional data format.
global_pooling : bool, optional, default=False
Whether to use global pooling.
Whether to apply the global pooling.
Returns
-------
......@@ -419,7 +366,6 @@ def pool2d(
"""
args = parse_args(locals())
if mode not in ('MAX', 'AVG'):
raise ValueError('Unsupported pooling mode: %s' % mode)
if padding not in ('VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'):
......@@ -431,9 +377,7 @@ def pool2d(
args[key] = _normalize_pads(args[key], 2)
else:
args[key] = _normalize_tuple(args[key], 2)
op_lib = vision_ops_lib.Pool2d
if context.executing_eagerly():
return op_lib \
.instantiate(
......@@ -445,7 +389,7 @@ def pool2d(
mode=mode,
data_format=data_format,
global_pooling=global_pooling,
).apply(inputs)
).apply([inputs])
else:
return op_lib.blend(**args)
......@@ -526,7 +470,7 @@ def resize(
num_sizes=len(args['sizes']) if sizes is not None else 0,
num_scales=len(args['scales']) if scales is not None else 0,
data_format=data_format,
).apply(inputs, args['sizes'], args['scales'])
).apply([inputs], args['sizes'], args['scales'])
else:
return op_lib.blend(**args)
......@@ -668,7 +612,7 @@ def space_to_depth(inputs, block_size, data_format='NCHW', **kwargs):
.instantiate(
block_size=block_size,
data_format=data_format,
).apply(inputs)
).apply([inputs])
else:
return op_lib.blend(**args)
......
......@@ -32,7 +32,6 @@ class _ConvNd(Operator):
return {
'op_type': self.__class__.__name__,
'arguments': {
'num_output': self.num_output,
'kernel_shape': self.kernel_shape,
'strides': self.strides,
'pads': self.pads,
......@@ -113,7 +112,6 @@ class ConvTranspose2d(_ConvNd):
return {
'op_type': self.__class__.__name__,
'arguments': {
'num_output': self.num_output,
'kernel_shape': self.kernel_shape,
'strides': self.strides,
'pads': self.pads,
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
import argparse
import dragon
parser = argparse.ArgumentParser(add_help=False)
TEST_CUDA = dragon.cuda.is_available()
def run_tests(argv=None):
"""Run tests under the current ``__main__``."""
if argv is None:
args, remaining = parser.parse_known_args()
argv = [sys.argv[0]] + remaining
unittest.main(argv=argv)
......@@ -9,17 +9,17 @@
#
# ------------------------------------------------------------
"""Define the Adam updaters."""
"""The Adam optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.training import updater
from dragon.core.training import optimizer
class Adam(updater.Updater):
r"""The updater which implements Adam algorithm.
class Adam(optimizer.Optimizer):
r"""The optimizer to apply Adam algorithm.
`[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_.
The **Adam** update is defined as:
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The optimizer to update parameters."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core import distributed
from dragon.core.eager import context
from dragon.core.framework import workspace
from dragon.core.ops import distributed_ops_lib
from dragon.core.ops import training_ops_lib
class Optimizer(object):
"""The base class of optimizers."""
# Store for the global unique handle
_DEFAULT_UNIQUE_HANDLE_INDEX = 0
def __init__(
self,
scale=1,
clip_norm=0,
weight_decay=0,
name=None,
):
"""Create a ``Optimizer``.
Parameters
----------
scale : float, optional, default=1
The scaling factor to gradient.
clip_norm : float, optional, default=0
The maximum L2 norm to clip gradient.
weight_decay : float, optional, default=0
The L2 penalty factor to weight.
name : str, optional
The optional name for shared slots.
"""
self._defaults = {
'scale': float(scale),
'clip_norm': float(clip_norm),
'weight_decay': float(weight_decay),
}
self._param_group = []
if name:
self._op_handle = name
else:
Optimizer. _DEFAULT_UNIQUE_HANDLE_INDEX += 1
self._op_handle = 'Optimizer_{}'.format(
Optimizer. _DEFAULT_UNIQUE_HANDLE_INDEX)
self._op_type = self.__class__.__name__ + 'Update'
self._process_group = distributed.get_group()
self._extra_kwargs = {}
def apply_gradients(
self,
values_and_grads,
lr_mult=None,
decay_mult=None,
):
"""Apply the gradients on values.
Parameters
----------
values_and_grads : Sequence[Sequence[dragon.Tensor]]
The values and grads.
lr_mult : number, optional
The multiplier to learning rate.
decay_mult : number, optional
The multiplier to weight decay.
"""
if context.executing_eagerly():
# Filter value whose grad is missing.
values, grads = [], []
for v, g in values_and_grads:
if g is not None:
values.append(v)
grads.append(g)
# Accumulate grads from the current process group.
if self._process_group is not None:
distributed_ops_lib.Collective \
.instantiate(
operation='MEAN',
communication='ALLREDUCE',
group=self._process_group,
).apply(grads)
# Apply the updates.
for v, g in zip(values, grads):
self._run_update(v, g, lr_mult, decay_mult)
else:
# Store for the lazy compilation.
for v, g in values_and_grads:
self._add_update(v, g, lr_mult, decay_mult)
return self
def _init_set_defaults(self, extra=None):
"""Initialize the defaults into current workspace."""
if extra is not None:
self._defaults = dict(self._defaults, **extra)
for k, v in self._defaults.items():
workspace.feed_tensor(
'/share/hyper/%s/%s' % (self._op_handle, k), v,
dtype='float32', enforce_cpu=True,
)
def _add_update(self, param, grad, lr_mult=None, decay_mult=None):
"""Add a symbolic operator for updating."""
pair = (v.id if hasattr(v, 'id') else v for v in (param, grad))
self._param_group.append(
(pair, {
'lr_mult': float(lr_mult) if lr_mult is not None else 1.,
'decay_mult': float(decay_mult) if decay_mult is not None else 1.,
})
)
def _run_update(self, param, grad, lr_mult=None, decay_mult=None):
"""Run an eager operation for updating."""
return training_ops_lib.ParamUpdate \
.instantiate(
op_type=self._op_type,
op_handle=self._op_handle,
lr_mult=float(lr_mult) if lr_mult is not None else 1.,
decay_mult=float(decay_mult) if decay_mult is not None else 1.,
).apply(grad, param)
def __getattr__(self, item):
defaults = self.__dict__.get('_defaults')
if item in defaults:
return workspace.fetch_tensor(
'/share/hyper/%s/%s' % (self._op_handle, item))
return self.__dict__[item]
def __setattr__(self, key, value):
defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults:
workspace.feed_tensor(
'/share/hyper/%s/%s' % (self._op_handle, key), value,
dtype='float32', enforce_cpu=True)
else:
object.__setattr__(self, key, value)
......@@ -9,17 +9,17 @@
#
# ------------------------------------------------------------
"""Define the RMSprop updater."""
"""The RMSprop optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.training import updater
from dragon.core.training import optimizer
class RMSProp(updater.Updater):
r"""The updater which implements RMSprop algorithm.
class RMSprop(optimizer.Optimizer):
r"""The optimizer to apply RMSprop algorithm.
`[Hinton et.al, 2013] <http://www.cs.utoronto.ca/~bonner/courses/2016s/csc321/lectures/lec6.pdf>`_.
The **RMSprop** update is defined as:
......@@ -43,7 +43,7 @@ class RMSProp(updater.Updater):
eps=1e-8,
**kwargs
):
r"""Create a ``RMSProp`` updater.
r"""Create a ``RMSProp`` optimizer.
Parameters
----------
......@@ -57,7 +57,7 @@ class RMSProp(updater.Updater):
The initial value for :math:`\epsilon`.
"""
super(RMSProp, self).__init__(**kwargs)
super(RMSprop, self).__init__(**kwargs)
self._init_set_defaults({
'base_lr': base_lr,
'momentum': momentum,
......
......@@ -9,17 +9,17 @@
#
# ------------------------------------------------------------
"""Define the SGD updaters."""
"""The SGD optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.training import updater
from dragon.core.training import optimizer
class SGD(updater.Updater):
r"""The updater which implements MomentumSGD algorithm.
class SGD(optimizer.Optimizer):
r"""The optimizer to apply MomentumSGD algorithm.
`[Polyak, 1964] <https://doi.org/10.1016/0041-5553(64)90137-5>`_.
The **MomentumSGD** update is defined as:
......@@ -46,8 +46,8 @@ class SGD(updater.Updater):
})
class Nesterov(updater.Updater):
r"""The updater which implements NesterovSGD algorithm.
class Nesterov(optimizer.Optimizer):
r"""The optimizer to apply NesterovSGD algorithm.
`[Sutskever et.al, 2013] <http://www.cs.toronto.edu/~hinton/absps/momentum.pdf>`_.
The **NesterovSGD** update is defined as:
......@@ -60,7 +60,7 @@ class Nesterov(updater.Updater):
"""
def __init__(self, base_lr=0.01, momentum=0.9, **kwargs):
r"""Create a ``Nesterov`` updater.
r"""Create a ``Nesterov`` optimizer.
Parameters
----------
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
import argparse
import dragon
from dragon.vm import torch as torch_vm
parser = argparse.ArgumentParser(add_help=False)
TEST_CUDA = dragon.cuda.is_available()
def new_tensor(data, constructor='EagerTensor', execution=None):
if execution is not None:
if execution == 'GRAPH_MODE':
return dragon.Tensor(
shape=data.shape,
dtype=str(data.dtype),
).set_value(data)
else:
return dragon.EagerTensor(data, copy=True)
if constructor == 'EagerTensor':
return dragon.EagerTensor(data, copy=True)
elif constructor == 'Tensor':
return dragon.Tensor(
shape=data.shape,
dtype=str(data.dtype),
).set_value(data)
elif constructor == 'torch.Tensor':
return torch_vm.tensor(data)
else:
raise ValueError('Unknown constructor:', constructor)
def run_tests(argv=None):
"""Run tests under the current ``__main__``."""
if argv is None:
args, remaining = parser.parse_known_args()
argv = [sys.argv[0]] + remaining
unittest.main(argv=argv)
......@@ -14,7 +14,7 @@
#define DRAGON_UTILS_CUDA_DEVICE_H_
#ifdef USE_CUDA
#include <cublas.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <curand.h>
......
......@@ -55,6 +55,10 @@ using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1>>;
template <typename T>
using ConstEigenVectorArrayMap2 =
Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>;
template <typename T>
using EigenArrayMap =
Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>>;
......
......@@ -108,11 +108,16 @@ DRAGON_API void Dot<float16, CPUContext>(
CPU_FP16_NOT_SUPPORTED;
}
#define DEFINE_DOT_FUNC(T) \
template <> \
DRAGON_API void Dot<T, CPUContext>( \
int n, const T* a, const T* b, T* y, CPUContext* ctx) { \
*y = ConstEigenVectorMap<T>(a, n).dot(ConstEigenVectorMap<T>(b, n)); \
#define DEFINE_DOT_FUNC(T) \
template <> \
DRAGON_API void Dot<T, CPUContext>( \
int n, const T* a, const T* b, T* y, CPUContext* ctx) { \
*y = ConstEigenVectorMap<T>(a, n).dot(ConstEigenVectorMap<T>(b, n)); \
} \
template <> \
DRAGON_API T Dot<T, CPUContext>( \
int n, const T* a, const T* b, CPUContext* ctx) { \
return ConstEigenVectorMap<T>(a, n).dot(ConstEigenVectorMap<T>(b, n)); \
}
DEFINE_DOT_FUNC(float);
......@@ -121,6 +126,11 @@ DEFINE_DOT_FUNC(double);
#define DEFINE_ASUM_FUNC(T) \
template <> \
DRAGON_API void ASum<T, CPUContext>( \
const int n, const T* x, T* y, CPUContext* ctx) { \
*y = ConstEigenVectorArrayMap<T>(x, n).abs().sum(); \
} \
template <> \
DRAGON_API T ASum<T, CPUContext>(const int n, const T* x, CPUContext* ctx) { \
return ConstEigenVectorArrayMap<T>(x, n).abs().sum(); \
}
......
......@@ -94,13 +94,15 @@ DEFINE_SCALE_FUNC(int64_t);
template <> \
DRAGON_API void Scale<T, CUDAContext>( \
const int n, const float alpha, const T* x, T* y, CUDAContext* ctx) { \
T _alpha_ = (T)alpha; \
if (x != y) { \
CUDA_CHECK(cudaMemcpyAsync( \
y, x, sizeof(T) * n, cudaMemcpyDeviceToDevice, ctx->cuda_stream())); \
} \
if (_alpha_ != T(1)) { \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, &_alpha_, y, 1)); \
if (alpha != 1.f) { \
T scale = (T)alpha; \
CUBLAS_CHECK(cublasSetPointerMode( \
ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST)); \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, &scale, y, 1)); \
} \
}
......@@ -120,6 +122,8 @@ DRAGON_API void Scale<float16, CUDAContext>(
ctx->cuda_stream()));
}
if (alpha != 1.f) {
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasScalEx(
ctx->cublas_handle(),
n,
......@@ -132,8 +136,8 @@ DRAGON_API void Scale<float16, CUDAContext>(
}
}
DEFINE_SCALE_FUNC(float, cublasSscal_v2);
DEFINE_SCALE_FUNC(double, cublasDscal_v2);
DEFINE_SCALE_FUNC(float, cublasSscal);
DEFINE_SCALE_FUNC(double, cublasDscal);
#undef DEFINE_SCALE_FUNC
#define DEFINE_COPY_FUNC(T) \
......@@ -170,12 +174,14 @@ DEFINE_AXPY_FUNC(int);
DEFINE_AXPY_FUNC(int64_t);
#undef DEFINE_AXPY_FUNC
#define DEFINE_AXPY_FUNC(T, cublas_func) \
template <> \
DRAGON_API void Axpy<T, CUDAContext>( \
const int n, const float alpha, const T* x, T* y, CUDAContext* ctx) { \
T _alpha_ = (T)alpha; \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, &_alpha_, x, 1, y, 1)); \
#define DEFINE_AXPY_FUNC(T, cublas_func) \
template <> \
DRAGON_API void Axpy<T, CUDAContext>( \
const int n, const float alpha, const T* x, T* y, CUDAContext* ctx) { \
T scale = (T)alpha; \
CUBLAS_CHECK( \
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST)); \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, &scale, x, 1, y, 1)); \
}
template <>
......@@ -185,6 +191,8 @@ DRAGON_API void Axpy<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasAxpyEx(
ctx->cublas_handle(),
n,
......@@ -199,8 +207,8 @@ DRAGON_API void Axpy<float16, CUDAContext>(
CUDA_R_32F));
}
DEFINE_AXPY_FUNC(float, cublasSaxpy_v2);
DEFINE_AXPY_FUNC(double, cublasDaxpy_v2);
DEFINE_AXPY_FUNC(float, cublasSaxpy);
DEFINE_AXPY_FUNC(double, cublasDaxpy);
#undef DEFINE_AXPY_FUNC
#define DEFINE_AXPBY_FUNC(T) \
......@@ -249,12 +257,22 @@ DEFINE_AXPBY_FUNC(float);
DEFINE_AXPBY_FUNC(double);
#undef DEFINE_AXPBY_FUNC
#define DEFINE_DOT_FUNC(T, cublas_func) \
template <> \
DRAGON_API void Dot<T, CUDAContext>( \
const int n, const T* a, const T* b, T* y, CUDAContext* ctx) { \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, a, 1, b, 1, y)); \
ctx->FinishDeviceComputation(); \
#define DEFINE_DOT_FUNC(T, cublas_func) \
template <> \
DRAGON_API void Dot<T, CUDAContext>( \
const int n, const T* a, const T* b, T* y, CUDAContext* ctx) { \
CUBLAS_CHECK(cublasSetPointerMode( \
ctx->cublas_handle(), CUBLAS_POINTER_MODE_DEVICE)); \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, a, 1, b, 1, y)); \
} \
template <> \
DRAGON_API T Dot<T, CUDAContext>( \
const int n, const T* a, const T* b, CUDAContext* ctx) { \
T y_host; \
CUBLAS_CHECK( \
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST)); \
CUBLAS_CHECK(cublas_func(ctx->cublas_handle(), n, a, 1, b, 1, &y_host)); \
return y_host; \
}
template <>
......@@ -264,6 +282,8 @@ DRAGON_API void Dot<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_DEVICE));
CUBLAS_CHECK(cublasDotEx(
ctx->cublas_handle(),
n,
......@@ -276,18 +296,28 @@ DRAGON_API void Dot<float16, CUDAContext>(
y,
CUDA_R_16F,
CUDA_R_32F));
ctx->FinishDeviceComputation();
}
DEFINE_DOT_FUNC(float, cublasSdot_v2);
DEFINE_DOT_FUNC(double, cublasDdot_v2);
DEFINE_DOT_FUNC(float, cublasSdot);
DEFINE_DOT_FUNC(double, cublasDdot);
#undef DEFINE_DOT_FUNC
#define DEFINE_ASUM_FUNC(T, cublas_func) \
template <> \
DRAGON_API T ASum<T, CUDAContext>( \
const int n, const T* x, CUDAContext* ctx) { \
return cublas_func(n, x, 1); \
#define DEFINE_ASUM_FUNC(T, cublas_func) \
template <> \
DRAGON_API void ASum<T, CUDAContext>( \
const int n, const T* x, T* y, CUDAContext* ctx) { \
CUBLAS_CHECK(cublasSetPointerMode( \
ctx->cublas_handle(), CUBLAS_POINTER_MODE_DEVICE)); \
cublas_func(ctx->cublas_handle(), n, x, 1, y); \
} \
template <> \
DRAGON_API T ASum<T, CUDAContext>( \
const int n, const T* x, CUDAContext* ctx) { \
T y_host; \
CUBLAS_CHECK( \
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST)); \
cublas_func(ctx->cublas_handle(), n, x, 1, &y_host); \
return y_host; \
}
DEFINE_ASUM_FUNC(float, cublasSasum);
......@@ -312,7 +342,8 @@ DRAGON_API void Gemv<float16, CUDAContext>(
int k = cuTransA == CUBLAS_OP_N ? M : N;
int LDA = cuTransA == CUBLAS_OP_N ? m : k;
int LDC = m;
const float _alpha_ = alpha, _beta_ = beta;
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
if (math_type == "float32") {
#if CUDA_VERSION >= 9000
if (TENSOR_CORE_AVAILABLE()) {
......@@ -324,14 +355,14 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha,
A,
CUDA_R_16F,
LDA,
x,
CUDA_R_16F,
k,
&_beta_,
&beta,
y,
CUDA_R_16F,
LDC,
......@@ -346,14 +377,14 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha,
A,
CUDA_R_16F,
LDA,
x,
CUDA_R_16F,
k,
&_beta_,
&beta,
y,
CUDA_R_16F,
LDC));
......@@ -366,21 +397,21 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha,
A,
CUDA_R_16F,
LDA,
x,
CUDA_R_16F,
k,
&_beta_,
&beta,
y,
CUDA_R_16F,
LDC));
#endif
} else if (math_type == "float16") {
const half _alpha_ = cast::to<half>(alpha);
const half _beta_ = cast::to<half>(beta);
const half alpha_half = cast::to<half>(alpha);
const half beta_half = cast::to<half>(beta);
#if CUDA_VERSION >= 9000
if (TENSOR_CORE_AVAILABLE()) {
// GEMV + MATH16 + TENSOR-CORE
......@@ -391,14 +422,14 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha_half,
A,
CUDA_R_16F,
LDA,
x,
CUDA_R_16F,
k,
&_beta_,
&beta_half,
y,
CUDA_R_16F,
LDC,
......@@ -413,12 +444,12 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha_half,
reinterpret_cast<const half*>(A),
LDA,
reinterpret_cast<const half*>(x),
k,
&_beta_,
&beta_half,
reinterpret_cast<half*>(y),
LDC));
}
......@@ -430,12 +461,12 @@ DRAGON_API void Gemv<float16, CUDAContext>(
m,
1,
k,
&_alpha_,
&alpha_half,
reinterpret_cast<const half*>(A),
LDA,
reinterpret_cast<const half*>(x),
k,
&_beta_,
&beta_half,
reinterpret_cast<half*>(y),
LDC));
#endif
......@@ -458,20 +489,10 @@ DRAGON_API void Gemv<float, CUDAContext>(
const string math_type) {
cublasOperation_t cuTransA =
TransA == CblasNoTrans ? CUBLAS_OP_T : CUBLAS_OP_N;
const float _alpha_ = alpha, _beta_ = beta;
CUBLAS_CHECK(cublasSgemv_v2(
ctx->cublas_handle(),
cuTransA,
N,
M,
&_alpha_,
A,
N,
x,
1,
&_beta_,
y,
1));
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasSgemv(
ctx->cublas_handle(), cuTransA, N, M, &alpha, A, N, x, 1, &beta, y, 1));
}
template <>
......@@ -488,18 +509,21 @@ DRAGON_API void Gemv<double, CUDAContext>(
const string math_type) {
cublasOperation_t cuTransA =
TransA == CblasNoTrans ? CUBLAS_OP_T : CUBLAS_OP_N;
const double _alpha_ = alpha, _beta_ = beta;
CUBLAS_CHECK(cublasDgemv_v2(
const auto alpha64 = static_cast<double>(alpha);
const auto beta64 = static_cast<double>(beta);
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasDgemv(
ctx->cublas_handle(),
cuTransA,
N,
M,
&_alpha_,
&alpha64,
A,
N,
x,
1,
&_beta_,
&beta64,
y,
1));
}
......@@ -524,8 +548,9 @@ DRAGON_API void Gemm<float16, CUDAContext>(
TransA == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
TransB == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
if (math_type == "float32") {
const float _alpha_ = alpha, _beta_ = beta;
#if CUDA_VERSION >= 9000
if (TENSOR_CORE_AVAILABLE()) {
// GEMM + MATH32 + TENSOR-CORE
......@@ -536,14 +561,14 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha,
B,
CUDA_R_16F,
ldb,
A,
CUDA_R_16F,
lda,
&_beta_,
&beta,
C,
CUDA_R_16F,
N,
......@@ -558,14 +583,14 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha,
B,
CUDA_R_16F,
ldb,
A,
CUDA_R_16F,
lda,
&_beta_,
&beta,
C,
CUDA_R_16F,
N));
......@@ -578,21 +603,21 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha,
B,
CUDA_R_16F,
ldb,
A,
CUDA_R_16F,
lda,
&_beta_,
&beta,
C,
CUDA_R_16F,
N));
#endif
} else if (math_type == "float16") {
const half _alpha_ = cast::to<half>(alpha);
const half _beta_ = cast::to<half>(beta);
const half alpha_half = cast::to<half>(alpha);
const half beta_half = cast::to<half>(beta);
#if CUDA_VERSION >= 9000
if (TENSOR_CORE_AVAILABLE()) {
// GEMM + MATH16 + TENSOR-CORE
......@@ -603,14 +628,14 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha_half,
B,
CUDA_R_16F,
ldb,
A,
CUDA_R_16F,
lda,
&_beta_,
&beta_half,
C,
CUDA_R_16F,
N,
......@@ -625,12 +650,12 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha_half,
reinterpret_cast<const half*>(B),
ldb,
reinterpret_cast<const half*>(A),
lda,
&_beta_,
&beta_half,
reinterpret_cast<half*>(C),
N));
}
......@@ -642,12 +667,12 @@ DRAGON_API void Gemm<float16, CUDAContext>(
N,
M,
K,
&_alpha_,
&alpha_half,
reinterpret_cast<const half*>(B),
ldb,
reinterpret_cast<const half*>(A),
lda,
&_beta_,
&beta_half,
reinterpret_cast<half*>(C),
N));
#endif
......@@ -676,7 +701,9 @@ DRAGON_API void Gemm<float, CUDAContext>(
TransA == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
TransB == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
CUBLAS_CHECK(cublasSgemm_v2(
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasSgemm(
ctx->cublas_handle(),
cuTransB,
cuTransA,
......@@ -713,20 +740,23 @@ DRAGON_API void Gemm<double, CUDAContext>(
TransA == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
TransB == CblasNoTrans ? CUBLAS_OP_N : CUBLAS_OP_T;
const double _alpha_ = alpha, _beta_ = beta;
CUBLAS_CHECK(cublasDgemm_v2(
const auto alpha64 = static_cast<double>(alpha);
const auto beta64 = static_cast<double>(beta);
CUBLAS_CHECK(
cublasSetPointerMode(ctx->cublas_handle(), CUBLAS_POINTER_MODE_HOST));
CUBLAS_CHECK(cublasDgemm(
ctx->cublas_handle(),
cuTransB,
cuTransA,
N,
M,
K,
&_alpha_,
&alpha64,
B,
ldb,
A,
lda,
&_beta_,
&beta64,
C,
N));
}
......
......@@ -48,6 +48,12 @@ template <typename T, class Context>
DRAGON_API void Dot(const int n, const T* a, const T* b, T* y, Context* ctx);
template <typename T, class Context>
DRAGON_API T Dot(const int n, const T* a, const T* b, Context* ctx);
template <typename T, class Context>
DRAGON_API void ASum(const int n, const T* x, T* y, Context* ctx);
template <typename T, class Context>
DRAGON_API T ASum(const int n, const T* x, Context* ctx);
template <typename T, class Context>
......
......@@ -213,7 +213,7 @@ DEFINE_BROADCAST_2ND_FUNC(Div, double, /);
void _Colwise##name<TIn, true>( \
const int rows, const int cols, const TIn* a, const TIn* b, TOut* y) { \
EigenArrayMap<TOut>(y, cols, rows) = \
ConstEigenVectorArrayMap<TIn>(a, rows).colwise().replicate(cols) \
ConstEigenVectorArrayMap2<TIn>(a, rows).colwise().replicate(cols) \
expr ConstEigenArrayMap<TIn>(b, cols, rows); \
} \
template <> \
......@@ -230,7 +230,7 @@ DEFINE_BROADCAST_2ND_FUNC(Div, double, /);
const int rows, const int cols, const TIn* a, const TIn* b, TOut* y) { \
EigenArrayMap<TOut>(y, cols, rows) = \
ConstEigenArrayMap<TIn>(a, cols, rows) \
expr ConstEigenVectorArrayMap<TIn>(b, rows) \
expr ConstEigenVectorArrayMap2<TIn>(b, rows) \
.colwise() \
.replicate(cols); \
}
......@@ -273,36 +273,36 @@ DEFINE_ROWWISE_COLWISE_BIANRY_FUNC(GreaterEqual, float, bool, >=);
DEFINE_ROWWISE_COLWISE_BIANRY_FUNC(GreaterEqual, double, bool, >=);
#undef DEFINE_ROWWISE_COLWISE_BIANRY_FUNC
#define DEFINE_ROWWISE_COLWISE_BIANRY_FUNC(name, T, func) \
template <> \
void _Rowwise##name<T, true>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenVectorArrayMap<T>(a, cols).rowwise().replicate(rows).func( \
ConstEigenArrayMap<T>(b, cols, rows)); \
} \
template <> \
void _Colwise##name<T, true>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenVectorArrayMap<T>(a, rows).colwise().replicate(cols).func( \
ConstEigenArrayMap<T>(b, cols, rows)); \
} \
template <> \
void _Rowwise##name<T, false>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenArrayMap<T>(a, cols, rows) \
.func(ConstEigenVectorArrayMap<T>(b, cols).rowwise().replicate( \
rows)); \
} \
template <> \
void _Colwise##name<T, false>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenArrayMap<T>(a, cols, rows) \
.func(ConstEigenVectorArrayMap<T>(b, rows).colwise().replicate( \
cols)); \
#define DEFINE_ROWWISE_COLWISE_BIANRY_FUNC(name, T, func) \
template <> \
void _Rowwise##name<T, true>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenVectorArrayMap<T>(a, cols).rowwise().replicate(rows).func( \
ConstEigenArrayMap<T>(b, cols, rows)); \
} \
template <> \
void _Colwise##name<T, true>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenVectorArrayMap2<T>(a, rows).colwise().replicate(cols).func( \
ConstEigenArrayMap<T>(b, cols, rows)); \
} \
template <> \
void _Rowwise##name<T, false>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenArrayMap<T>(a, cols, rows) \
.func(ConstEigenVectorArrayMap<T>(b, cols).rowwise().replicate( \
rows)); \
} \
template <> \
void _Colwise##name<T, false>( \
const int rows, const int cols, const T* a, const T* b, T* y) { \
EigenArrayMap<T>(y, cols, rows) = \
ConstEigenArrayMap<T>(a, cols, rows) \
.func(ConstEigenVectorArrayMap2<T>(b, rows).colwise().replicate( \
cols)); \
}
DEFINE_ROWWISE_COLWISE_BIANRY_FUNC(Pow, float, pow);
......
......@@ -211,7 +211,7 @@ void ComputeBinaryBroadcastStrides(
Y_dims.resize(num_dims);
int64_t A_stride = 1;
int64_t B_stride = 1;
for (int i = 0; i < num_dims; ++i) {
for (int i = num_dims - 1; i >= 0; --i) {
A_broadcast_strides[i] = A_broadcast_dims[i] == 1 ? 0 : A_stride;
B_broadcast_strides[i] = B_broadcast_dims[i] == 1 ? 0 : B_stride;
Y_dims[i] = std::max(A_broadcast_dims[i], B_broadcast_dims[i]);
......
......@@ -1025,18 +1025,18 @@ void SGDUpdate(
T* m,
Context* ctx);
/* training.op_base */
/* training.mixed_prec_update */
template <typename T, class Context>
void MixedPrecL2Decay(
void MixedPrecL2Penalty(
const int count,
const float alpha,
const T* w,
const T* x,
float* dx,
Context* ctx);
template <typename T, class Context>
void MixedPrecUpdate(const int count, const float* updates, T* w, Context* ctx);
void MixedPrecUpdate(const int count, const float* dx, T* x, Context* ctx);
/* vision.bias_add */
......
......@@ -49,7 +49,6 @@ from dragon.vm.tensorflow.core.framework.dtypes import qint32
from dragon.vm.tensorflow.core.framework.dtypes import qint8
from dragon.vm.tensorflow.core.framework.dtypes import quint16
from dragon.vm.tensorflow.core.framework.dtypes import quint8
from dragon.vm.tensorflow.core.framework.dtypes import resource
from dragon.vm.tensorflow.core.framework.dtypes import string
from dragon.vm.tensorflow.core.framework.dtypes import uint16
from dragon.vm.tensorflow.core.framework.dtypes import uint32
......
......@@ -33,7 +33,6 @@ from dragon.vm.tensorflow.core.framework.dtypes import qint32
from dragon.vm.tensorflow.core.framework.dtypes import qint8
from dragon.vm.tensorflow.core.framework.dtypes import quint16
from dragon.vm.tensorflow.core.framework.dtypes import quint8
from dragon.vm.tensorflow.core.framework.dtypes import resource
from dragon.vm.tensorflow.core.framework.dtypes import string
from dragon.vm.tensorflow.core.framework.dtypes import uint16
from dragon.vm.tensorflow.core.framework.dtypes import uint32
......
......@@ -15,7 +15,7 @@ from __future__ import print_function
import numpy
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.eager import context as eager_context
from dragon.core.eager.tensor import EagerTensor
from dragon.core.framework import context
......@@ -80,7 +80,7 @@ def constant(value, dtype=None, shape=None, name='Const'):
if eager_context.executing_eagerly():
return EagerTensor(value, name=name + ':0')
else:
return RefTensor(
return TensorRef(
name=workspace.get_dummy_name(name, ':0', 'Tensor'),
shape=list(value.shape),
dtype=str(value.dtype),
......
......@@ -19,7 +19,31 @@ from __future__ import print_function
import numpy as np
from dragon.vm.tensorflow.core.proto import types_pb2
# Predefine the type enumerations
# to avoid to import the tensorflow proto
DT_INVALID = 0
DT_FLOAT = 1
DT_DOUBLE = 2
DT_INT32 = 3
DT_UINT8 = 4
DT_INT16 = 5
DT_INT8 = 6
DT_STRING = 7
DT_COMPLEX64 = 8
DT_INT64 = 9
DT_BOOL = 10
DT_QINT8 = 11
DT_QUINT8 = 12
DT_QINT32 = 13
DT_BFLOAT16 = 14
DT_QINT16 = 15
DT_QUINT16 = 16
DT_UINT16 = 17
DT_COMPLEX128 = 18
DT_HALF = 19
DT_VARIANT = 21
DT_UINT32 = 22
DT_UINT64 = 23
class DType(object):
......@@ -69,8 +93,6 @@ class DType(object):
* ``tf.qint32``: Quantized 32-bit signed integer.
* ``tf.resource``: Handle to a mutable resource.
* ``tf.variant``: Values of arbitrary types.
"""
......@@ -81,13 +103,12 @@ class DType(object):
Parameters
----------
type_enum : DataType
The ``types_pb2.DataType`` value.
The ``DataType`` value.
"""
type_enum = int(type_enum)
if (type_enum not in types_pb2.DataType.values()
or type_enum == types_pb2.DT_INVALID):
raise TypeError('<type_enum> is not a valid types_pb2.DataType.')
if type_enum == DT_INVALID:
raise TypeError('<type_enum> is not a valid DataType.')
self._type_enum = type_enum
@property
......@@ -106,8 +127,7 @@ class DType(object):
@property
def is_numpy_compatible(self):
return (self._type_enum != types_pb2.DT_RESOURCE and
self._type_enum != types_pb2.DT_RESOURCE_REF)
return self._type_enum in _TF_TO_NP
@property
def as_numpy_dtype(self):
......@@ -230,55 +250,53 @@ dtype_range = {np.bool_: (False, True),
np.float64: (-1, 1)}
# Define standard wrappers for the types_pb2.DataType enum.
resource = DType(types_pb2.DT_RESOURCE)
float16 = DType(types_pb2.DT_HALF)
# Define standard wrappers for the DataType enum.
float16 = DType(DT_HALF)
half = float16
float32 = DType(types_pb2.DT_FLOAT)
float64 = DType(types_pb2.DT_DOUBLE)
float32 = DType(DT_FLOAT)
float64 = DType(DT_DOUBLE)
double = float64
int32 = DType(types_pb2.DT_INT32)
uint8 = DType(types_pb2.DT_UINT8)
uint16 = DType(types_pb2.DT_UINT16)
uint64 = DType(types_pb2.DT_UINT32)
uint32 = DType(types_pb2.DT_UINT64)
int16 = DType(types_pb2.DT_INT16)
int8 = DType(types_pb2.DT_INT8)
string = DType(types_pb2.DT_STRING)
complex64 = DType(types_pb2.DT_COMPLEX64)
complex128 = DType(types_pb2.DT_COMPLEX128)
int64 = DType(types_pb2.DT_INT64)
bool = DType(types_pb2.DT_BOOL)
qint8 = DType(types_pb2.DT_QINT8)
quint8 = DType(types_pb2.DT_QUINT8)
qint16 = DType(types_pb2.DT_QINT16)
quint16 = DType(types_pb2.DT_QUINT16)
qint32 = DType(types_pb2.DT_QINT32)
bfloat16 = DType(types_pb2.DT_BFLOAT16)
variant = DType(types_pb2.DT_VARIANT)
# Standard mappings between types_pb2.DataType values and string names.
int32 = DType(DT_INT32)
uint8 = DType(DT_UINT8)
uint16 = DType(DT_UINT16)
uint64 = DType(DT_UINT32)
uint32 = DType(DT_UINT64)
int16 = DType(DT_INT16)
int8 = DType(DT_INT8)
string = DType(DT_STRING)
complex64 = DType(DT_COMPLEX64)
complex128 = DType(DT_COMPLEX128)
int64 = DType(DT_INT64)
bool = DType(DT_BOOL)
qint8 = DType(DT_QINT8)
quint8 = DType(DT_QUINT8)
qint16 = DType(DT_QINT16)
quint16 = DType(DT_QUINT16)
qint32 = DType(DT_QINT32)
bfloat16 = DType(DT_BFLOAT16)
variant = DType(DT_VARIANT)
# Standard mappings between DataType values and string names.
_TYPE_TO_STRING = {
types_pb2.DT_HALF: "float16",
types_pb2.DT_FLOAT: "float32",
types_pb2.DT_DOUBLE: "float64",
types_pb2.DT_INT32: "int32",
types_pb2.DT_UINT8: "uint8",
types_pb2.DT_UINT16: "uint16",
types_pb2.DT_INT16: "int16",
types_pb2.DT_INT8: "int8",
types_pb2.DT_STRING: "string",
types_pb2.DT_COMPLEX64: "complex64",
types_pb2.DT_COMPLEX128: "complex128",
types_pb2.DT_INT64: "int64",
types_pb2.DT_BOOL: "bool",
types_pb2.DT_QINT8: "qint8",
types_pb2.DT_QUINT8: "quint8",
types_pb2.DT_QINT16: "qint16",
types_pb2.DT_QUINT16: "quint16",
types_pb2.DT_QINT32: "qint32",
types_pb2.DT_BFLOAT16: "bfloat16",
types_pb2.DT_RESOURCE: "resource",
DT_HALF: "float16",
DT_FLOAT: "float32",
DT_DOUBLE: "float64",
DT_INT32: "int32",
DT_UINT8: "uint8",
DT_UINT16: "uint16",
DT_INT16: "int16",
DT_INT8: "int8",
DT_STRING: "string",
DT_COMPLEX64: "complex64",
DT_COMPLEX128: "complex128",
DT_INT64: "int64",
DT_BOOL: "bool",
DT_QINT8: "qint8",
DT_QUINT8: "quint8",
DT_QINT16: "qint16",
DT_QUINT16: "quint16",
DT_QINT32: "qint32",
DT_BFLOAT16: "bfloat16",
}
# Numpy representation for quantized dtypes.
......@@ -314,51 +332,50 @@ _NP_TO_TF = {
}
_TF_TO_NP = {
types_pb2.DT_HALF: np.float16,
types_pb2.DT_FLOAT: np.float32,
types_pb2.DT_DOUBLE: np.float64,
types_pb2.DT_INT32: np.int32,
types_pb2.DT_UINT8: np.uint8,
types_pb2.DT_UINT16: np.uint16,
types_pb2.DT_INT16: np.int16,
types_pb2.DT_INT8: np.int8,
types_pb2.DT_STRING: np.object,
types_pb2.DT_COMPLEX64: np.complex64,
types_pb2.DT_COMPLEX128: np.complex128,
types_pb2.DT_INT64: np.int64,
types_pb2.DT_BOOL: np.bool,
types_pb2.DT_QINT8: _np_qint8,
types_pb2.DT_QUINT8: _np_quint8,
types_pb2.DT_QINT16: _np_qint16,
types_pb2.DT_QUINT16: _np_quint16,
types_pb2.DT_QINT32: _np_qint32,
types_pb2.DT_BFLOAT16: np.uint16,
DT_HALF: np.float16,
DT_FLOAT: np.float32,
DT_DOUBLE: np.float64,
DT_INT32: np.int32,
DT_UINT8: np.uint8,
DT_UINT16: np.uint16,
DT_INT16: np.int16,
DT_INT8: np.int8,
DT_STRING: np.object,
DT_COMPLEX64: np.complex64,
DT_COMPLEX128: np.complex128,
DT_INT64: np.int64,
DT_BOOL: np.bool,
DT_QINT8: _np_qint8,
DT_QUINT8: _np_quint8,
DT_QINT16: _np_qint16,
DT_QUINT16: _np_quint16,
DT_QINT32: _np_qint32,
DT_BFLOAT16: np.uint16,
}
_INTERN_TABLE = {
types_pb2.DT_HALF: float16,
types_pb2.DT_FLOAT: float32,
types_pb2.DT_DOUBLE: float64,
types_pb2.DT_INT32: int32,
types_pb2.DT_UINT8: uint8,
types_pb2.DT_UINT16: uint16,
types_pb2.DT_UINT32: uint32,
types_pb2.DT_UINT64: uint64,
types_pb2.DT_INT16: int16,
types_pb2.DT_INT8: int8,
types_pb2.DT_STRING: string,
types_pb2.DT_COMPLEX64: complex64,
types_pb2.DT_COMPLEX128: complex128,
types_pb2.DT_INT64: int64,
types_pb2.DT_BOOL: bool,
types_pb2.DT_QINT8: qint8,
types_pb2.DT_QUINT8: quint8,
types_pb2.DT_QINT16: qint16,
types_pb2.DT_QUINT16: quint16,
types_pb2.DT_QINT32: qint32,
types_pb2.DT_BFLOAT16: bfloat16,
types_pb2.DT_RESOURCE: resource,
types_pb2.DT_VARIANT: variant,
DT_HALF: float16,
DT_FLOAT: float32,
DT_DOUBLE: float64,
DT_INT32: int32,
DT_UINT8: uint8,
DT_UINT16: uint16,
DT_UINT32: uint32,
DT_UINT64: uint64,
DT_INT16: int16,
DT_INT8: int8,
DT_STRING: string,
DT_COMPLEX64: complex64,
DT_COMPLEX128: complex128,
DT_INT64: int64,
DT_BOOL: bool,
DT_QINT8: qint8,
DT_QUINT8: quint8,
DT_QINT16: qint16,
DT_QUINT16: quint16,
DT_QINT32: qint32,
DT_BFLOAT16: bfloat16,
DT_VARIANT: variant,
}
_STRING_TO_TF = {
......
......@@ -62,7 +62,6 @@ class Adam(optimizer.Optimizer):
"""
super(Adam, self).__init__(name, **kwargs)
self._op_type = 'AdamUpdate'
self._set_hyper('learning_rate', kwargs.get('lr', learning_rate), 'base_lr')
self._set_hyper('beta_1', beta_1, 'beta1')
self._set_hyper('beta_2', beta_2, 'beta2')
......
......@@ -21,7 +21,7 @@ from dragon.core.eager import context as eager_context
from dragon.core.framework import context
from dragon.core.framework import types
from dragon.core.framework import workspace
from dragon.core.training import updater
from dragon.core.training import optimizer as optimizer_v1
from dragon.core.util import six
from dragon.vm.tensorflow.core.framework import dtypes
from dragon.vm.tensorflow.core.keras import initializers
......@@ -29,7 +29,7 @@ from dragon.vm.tensorflow.core.keras.utils import generic_utils
from dragon.vm.tensorflow.core.ops import variables
class Optimizer(updater.Updater):
class Optimizer(optimizer_v1.Optimizer):
"""The base class for optimizers."""
BASE_WEIGHT_DECAY = 0.0001
......@@ -46,9 +46,9 @@ class Optimizer(updater.Updater):
self._init_set_name(name)
super(Optimizer, self).__init__(
name=self._name,
l2_decay=self.BASE_WEIGHT_DECAY,
weight_decay=self.BASE_WEIGHT_DECAY,
)
allowed_kwargs = {'clipnorm', 'clipvalue', 'lr', 'decay'}
allowed_kwargs = {'scale', 'clipnorm', 'lr'}
for k in kwargs:
if k not in allowed_kwargs:
raise TypeError('Unexpected keyword argument:', str(k))
......@@ -61,11 +61,12 @@ class Optimizer(updater.Updater):
self._iterations = 0
# Register the common hyper parameters.
if 'scale' in kwargs:
self._defaults['scale'] = kwargs.pop('scale')
if 'clipnorm' in kwargs:
self._defaults['clip_gradient'] = kwargs.pop('clipnorm')
self._defaults['clip_norm'] = kwargs.pop('clipnorm')
for k, v in self._defaults.items():
self._set_hyper(k, v, k)
self._hypers_created = False
@property
......@@ -196,7 +197,7 @@ class Optimizer(updater.Updater):
else:
self._hyper[name] = value
if alias and name not in self._alias:
self._alias[name] = self._slot + '/' + alias
self._alias[name] = '/share/hyper/%s/%s' % (self._op_handle, alias)
def __getattr__(self, item):
if item == 'lr':
......
......@@ -59,7 +59,6 @@ class RMSprop(optimizer.Optimizer):
"""
super(RMSprop, self).__init__(name, **kwargs)
self._op_type = 'RMSPropUpdate'
self._set_hyper('learning_rate', kwargs.get('lr', learning_rate), 'base_lr')
self._set_hyper('rho', rho, 'decay')
self._set_hyper('momentum', momentum, 'momentum')
......
......@@ -19,7 +19,7 @@ from __future__ import print_function
import numpy
from dragon.core.autograph.tensor import RefTensor
from dragon.core.autograph.tensor import TensorRef
from dragon.core.framework import context
from dragon.core.framework import workspace
from dragon.core.ops import array_ops
......@@ -477,7 +477,7 @@ def placeholder(dtype=None, shape=None, name=None):
"""
# Construct a tensor from the explicit name
return RefTensor(
return TensorRef(
workspace.get_dummy_name(
context.get_name_scope() + name
if name else 'Placeholder',
......
......@@ -716,8 +716,8 @@ def matmul(
"""
return math_ops.matmul(
[a, b],
transA=transpose_a,
transB=transpose_b,
transpose_a=transpose_a,
transpose_b=transpose_b,
name=name,
)
......
......@@ -18,7 +18,6 @@ import functools
from dragon.core.framework import types
from dragon.core.ops import activation_ops
from dragon.core.ops import loss_ops
from dragon.core.ops import math_ops
from dragon.core.ops import normalization_ops
from dragon.core.ops import vision_ops
from dragon.core.util import nest
......@@ -205,7 +204,6 @@ def convolution(
return getattr(vision_ops, '{}{}d'.format(
kwargs.get('conv_type', 'conv'), num_spatial_dims))(
[input, filters],
num_output=filters.shape[0],
kernel_shape=filters.shape[2:],
strides=strides[start_axis:start_axis + num_spatial_dims],
dilations=dilations[start_axis:start_axis + num_spatial_dims],
......@@ -288,7 +286,6 @@ def conv_transpose(
return getattr(vision_ops, 'conv{}d_transpose'.format(num_spatial_dims))(
[input, filters],
num_output=filters.shape[1],
kernel_shape=filters.shape[2:],
strides=strides[start_axis:start_axis + num_spatial_dims],
dilations=dilations[start_axis:start_axis + num_spatial_dims],
......@@ -897,29 +894,6 @@ def sparse_softmax_cross_entropy_with_logits(
)
def xw_plus_b(x, weights, biases, name=None):
if weights.shape is None:
raise ValueError('weights must have a valid shape.')
else:
if len(weights.shape) != 2:
raise ValueError('weights must be a 2D Tensor')
if biases.shape is None:
raise ValueError('biases must a have a valid shape.')
else:
if len(biases.shape) != 1:
raise ValueError('biases must be a 1D Tensor')
if weights.shape[1] != biases.shape[0]:
raise ValueError('the shape of weights and biaes are incompatible.')
return math_ops.fully_connected(
[x, weights, biases],
num_output=weights.shape[1],
transW=False,
name=name,
)
def _normalize_spatial_args(
name,
values,
......
syntax = "proto2";
package tensorflow;
message GPUOptions {
// A value between 0 and 1 that indicates what fraction of the
// available GPU memory to pre-allocate for each process. 1 means
// to pre-allocate all of the GPU memory, 0.5 means the process
// allocates ~50% of the available GPU memory.
optional double per_process_gpu_memory_fraction = 1;
// The type of GPU allocation strategy to use.
//
// Allowed values:
// "": The empty string (default) uses a system-chosen default
// which may change over time.
//
// "BFC": A "Best-fit with coalescing" algorithm, simplified from a
// version of dlmalloc.
optional string allocator_type = 2;
// Delay deletion of up to this many bytes to reduce the number of
// interactions with gpu driver code. If 0, the system chooses
// a reasonable default (several MBs).
optional int64 deferred_deletion_bytes = 3;
// If true, the allocator does not pre-allocate the entire specified
// GPU memory region, instead starting small and growing as needed.
optional bool allow_growth = 4;
// A comma-separated list of GPU ids that determines the 'visible'
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
// can see 8 GPU devices in the process, and one wanted to map
// visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
// would specify this field as "5,3". This field is similar in
// spirit to the CUDA_VISIBLE_DEVICES environment variable, except
// it applies to the visible GPU devices in the process.
//
// NOTE: The GPU driver provides the process with the visible GPUs
// in an order which is not guaranteed to have any correlation to
// the *physical* GPU id in the machine. This field is used for
// remapping "visible" to "virtual", which means this operates only
// after the process starts. Users are required to use vendor
// specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
// physical to visible device mapping prior to invoking TensorFlow.
optional string visible_device_list = 5;
// In the event polling loop sleep this many microseconds between
// PollEvents calls, when the queue is not empty. If value is not
// set or set to 0, gets set to a non-zero default.
optional int32 polling_active_delay_usecs = 6;
// In the event polling loop sleep this many millisconds between
// PollEvents calls, when the queue is empty. If value is not
// set or set to 0, gets set to a non-zero default.
optional int32 polling_inactive_delay_msecs = 7;
// Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow,
// enabling this option forces all CPU tensors to be allocated with Cuda
// pinned memory. Normally, TensorFlow will infer which tensors should be
// allocated as the pinned memory. But in case where the inference is
// incomplete, this option can significantly speed up the cross-device memory
// copy performance as long as it fits the memory.
// Note that this option is not something that should be
// enabled by default for unknown or very large models, since all Cuda pinned
// memory is unpageable, having too much pinned memory might negatively impact
// the overall host system performance.
optional bool force_gpu_compatible = 8;
}
message GraphOptions {
// If true, use control flow to schedule the activation of Recv nodes.
// (Currently ignored.)
optional bool enable_recv_scheduling = 2;
// Options controlling how graph is optimized.
// OptimizerOptions optimizer_options = 3;
// The number of steps to run before returning a cost model detailing
// the memory usage and performance of each node of the graph. 0 means
// no cost model.
optional int64 build_cost_model = 4;
// The number of steps to skip before collecting statistics for the
// cost model.
optional int64 build_cost_model_after = 9;
// Annotate each Node with Op output shape data, to the extent it can
// be statically inferred.
optional bool infer_shapes = 5;
// Only place the subgraphs that are run, rather than the entire graph.
//
// This is useful for interactive graph building, where one might
// produce graphs that cannot be placed during the debugging
// process. In particular, it allows the client to continue work in
// a session after adding a node to a graph whose placement
// constraints are unsatisfiable.
optional bool place_pruned_graph = 6;
// If true, transfer float values between processes as bfloat16.
optional bool enable_bfloat16_sendrecv = 7;
// If > 0, record a timeline every this many steps.
// EXPERIMENTAL: This currently has no effect in MasterSession.
optional int32 timeline_step = 8;
// Options that control the type and amount of graph rewriting.
// Not currently configurable via the public Python API (i.e. there is no API
// stability guarantee if you import RewriterConfig explicitly).
// RewriterConfig rewrite_options = 10;
}
message ConfigProto {
// Map from device type name (e.g., "CPU" or "GPU" ) to maximum
// number of devices of that type to use. If a particular device
// type is not found in the map, the system picks an appropriate
// number.
// map<string, int32> device_count = 1;
// The execution of an individual op (for some op types) can be
// parallelized on a pool of intra_op_parallelism_threads.
// 0 means the system picks an appropriate number.
optional int32 intra_op_parallelism_threads = 2;
// Nodes that perform blocking operations are enqueued on a pool of
// inter_op_parallelism_threads available in each process.
//
// 0 means the system picks an appropriate number.
//
// Note that the first Session created in the process sets the
// number of threads for all future sessions unless use_per_session_threads is
// true or session_inter_op_thread_pool is configured.
optional int32 inter_op_parallelism_threads = 5;
// If true, use a new set of threads for this session rather than the global
// pool of threads. Only supported by direct sessions.
//
// If false, use the global threads created by the first session, or the
// per-session thread pools configured by session_inter_op_thread_pool.
//
// This option is deprecated. The same effect can be achieved by setting
// session_inter_op_thread_pool to have one element, whose num_threads equals
// inter_op_parallelism_threads.
optional bool use_per_session_threads = 9;
// This option is experimental - it may be replaced with a different mechanism
// in the future.
//
// Configures session thread pools. If this is configured, then RunOptions for
// a Run call can select the thread pool to use.
//
// The intended use is for when some session invocations need to run in a
// background pool limited to a small number of threads:
// - For example, a session may be configured to have one large pool (for
// regular compute) and one small pool (for periodic, low priority work);
// using the small pool is currently the mechanism for limiting the inter-op
// parallelism of the low priority work. Note that it does not limit the
// parallelism of work spawned by a single op kernel implementation.
// - Using this setting is normally not needed in training, but may help some
// serving use cases.
// - It is also generally recommended to set the global_name field of this
// proto, to avoid creating multiple large pools. It is typically better to
// run the non-low-priority work, even across sessions, in a single large
// pool.
// repeated ThreadPoolOptionProto session_inter_op_thread_pool = 12;
// Assignment of Nodes to Devices is recomputed every placement_period
// steps until the system warms up (at which point the recomputation
// typically slows down automatically).
optional int32 placement_period = 3;
// When any filters are present sessions will ignore all devices which do not
// match the filters. Each filter can be partially specified, e.g. "/job:ps"
// "/job:worker/replica:3", etc.
repeated string device_filters = 4;
// Options that apply to all GPUs.
optional GPUOptions gpu_options = 6;
// Whether soft placement is allowed. If allow_soft_placement is true,
// an op will be placed on CPU if
// 1. there's no GPU implementation for the OP
// or
// 2. no GPU devices are known or registered
// or
// 3. need to co-locate with reftype input(s) which are from CPU.
optional bool allow_soft_placement = 7;
// Whether device placements should be logged.
optional bool log_device_placement = 8;
// Options that apply to all graphs.
optional GraphOptions graph_options = 10;
// Global timeout for all blocking operations in this session. If non-zero,
// and not overridden on a per-operation basis, this value will be used as the
// deadline for all blocking operations.
optional int64 operation_timeout_in_ms = 11;
// Options that apply when this session uses the distributed runtime.
// RPCOptions rpc_options = 13;
// Optional list of all workers to use in this session.
// ClusterDef cluster_def = 14;
// Next: 15
}
\ No newline at end of file
syntax = "proto2";
package tensorflow;
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field
// has not been set.
DT_INVALID = 0;
// Data types that all computation devices are expected to be
// capable to support.
DT_FLOAT = 1;
DT_DOUBLE = 2;
DT_INT32 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_INT8 = 6;
DT_STRING = 7;
DT_COMPLEX64 = 8; // Single-precision complex
DT_INT64 = 9;
DT_BOOL = 10;
DT_QINT8 = 11; // Quantized int8
DT_QUINT8 = 12; // Quantized uint8
DT_QINT32 = 13; // Quantized int32
DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops.
DT_QINT16 = 15; // Quantized int16
DT_QUINT16 = 16; // Quantized uint16
DT_UINT16 = 17;
DT_COMPLEX128 = 18; // Double-precision complex
DT_HALF = 19;
DT_RESOURCE = 20;
DT_VARIANT = 21; // Arbitrary C++ data types
DT_UINT32 = 22;
DT_UINT64 = 23;
// Do not use! These are only for parameters. Every enum above
// should have a corresponding value below (verified by types_test).
DT_FLOAT_REF = 101;
DT_DOUBLE_REF = 102;
DT_INT32_REF = 103;
DT_UINT8_REF = 104;
DT_INT16_REF = 105;
DT_INT8_REF = 106;
DT_STRING_REF = 107;
DT_COMPLEX64_REF = 108;
DT_INT64_REF = 109;
DT_BOOL_REF = 110;
DT_QINT8_REF = 111;
DT_QUINT8_REF = 112;
DT_QINT32_REF = 113;
DT_BFLOAT16_REF = 114;
DT_QINT16_REF = 115;
DT_QUINT16_REF = 116;
DT_UINT16_REF = 117;
DT_COMPLEX128_REF = 118;
DT_HALF_REF = 119;
DT_RESOURCE_REF = 120;
DT_VARIANT_REF = 121;
DT_UINT32_REF = 122;
DT_UINT64_REF = 123;
}
\ No newline at end of file
......@@ -122,7 +122,7 @@ class Layer(module.Module):
"""
self._built = True
def forward(self, inputs, **kwargs):
def forward(self, inputs):
"""Method to define the forward operations.
Parameters
......
......@@ -85,10 +85,8 @@ class Conv2d(layer.Layer):
self.W_init = W_init
self.b_init = b_init
self.in_channels = in_channels
self.W = None
self.b = None
if self.in_channels:
self.build(None)
self._built = True
......@@ -116,7 +114,6 @@ class Conv2d(layer.Layer):
self.in_channels = inputs_shape[-1]
else:
self.in_channels = inputs_shape[1]
# Fake shape with ``channels_first`` format,
# to indicate the backend to compute fans correctly.
filter_shape = [self.n_filter, self.in_channels] + self.filter_size
......@@ -135,10 +132,8 @@ class Conv2d(layer.Layer):
def forward(self, inputs, **kwargs):
data_format = conv_utils.convert_data_format(self.data_format)
padding, pads = conv_utils.normalize_2d_args('padding', self.padding)
outputs = vision_ops.conv2d(
[inputs, self.W] + ([self.b] if self.b_init else []),
num_output=self.n_filter,
kernel_shape=self.filter_size,
strides=self.strides,
pads=pads,
......@@ -148,5 +143,4 @@ class Conv2d(layer.Layer):
)
if self.act:
outputs = self.act(outputs)
return outputs
......@@ -106,11 +106,7 @@ class Dense(layer.Layer):
def forward(self, inputs):
outputs = math_ops.fully_connected(
[inputs, self.W] + ([self.b] if self.b_init else []),
num_output=self.n_units,
axis=1,
transW=True,
)
[inputs, self.W] + ([self.b] if self.b_init else []), axis=1)
if self.act:
outputs = self.act(outputs)
return outputs
This diff could not be displayed because it is too large.
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import subprocess
import argparse
TESTS_AND_SOURCES = [
('dragon/core/test_ops', 'dragon.core.ops'),
]
TESTS = [t[0] for t in TESTS_AND_SOURCES]
SOURCES = [t[1] for t in TESTS_AND_SOURCES]
def parse_args():
parser = argparse.ArgumentParser(
description='Run the unittests',
epilog='where TESTS is any of: {}'.format(', '.join(TESTS)))
parser.add_argument(
'-v',
'--verbose',
action='store_true',
help='print verbose information')
parser.add_argument(
'-q',
'--quiet',
action='store_true',
help='print error information only')
parser.add_argument(
'-c',
'--coverage',
action='store_true',
help='run coverage for unittests')
return parser.parse_args()
def get_base_command(args):
"""Return the base running command."""
if args.coverage:
executable = ['coverage', 'run', '--parallel-mode']
else:
executable = [sys.executable]
return executable
def main():
"""The main procedure."""
args = parse_args()
base_command = get_base_command(args)
for i, test in enumerate(TESTS):
command = base_command[:]
if args.coverage:
if SOURCES[i]:
command.extend(['--source ', SOURCES[i]])
command.append(test + '.py')
if args.verbose:
command.append('--verbose')
elif args.quiet:
command.append('--quiet')
subprocess.call(' '.join(command), shell=True)
if args.coverage:
subprocess.call(['coverage', 'combine'])
subprocess.call(['coverage', 'html'])
if __name__ == '__main__':
main()
......@@ -80,8 +80,8 @@ from dragon.vm.torch.ops.init.functional import uniform
from dragon.vm.torch.ops.init.functional import zeros
from dragon.vm.torch.ops.init.functional import zeros_like
from dragon.vm.torch.ops.math.functional import abs
from dragon.vm.torch.ops.math.functional import accumulate
from dragon.vm.torch.ops.math.functional import add
from dragon.vm.torch.ops.math.functional import axpby
from dragon.vm.torch.ops.math.functional import bitwise_not
from dragon.vm.torch.ops.math.functional import bitwise_xor
from dragon.vm.torch.ops.math.functional import ceil
......
......@@ -98,7 +98,7 @@ class Function(object):
"""Generate the OpDef from attributes."""
attributes = self.attributes()
self._def = proto_util.make_operator_cdef(
name='Generic',
name=attributes.get('name', 'GenericOp'),
cache_key=self._cache_key,
op_type=attributes['op_type'],
device_option=proto_util.get_device_option(
......
......@@ -46,7 +46,6 @@ class _ConvNd(function.Function):
return {
'op_type': self.__class__.__name__,
'arguments': {
'num_output': self.num_output,
'kernel_shape': self.kernel_shape,
'strides': self.strides,
'pads': self.pads,
......
......@@ -16,15 +16,15 @@ from __future__ import print_function
from dragon.vm.torch.autograd import function
class Accumulate(function.Function):
class Axpby(function.Function):
def __init__(self, key, dev, **kwargs):
super(Accumulate, self).__init__(key, dev, **kwargs)
super(Axpby, self).__init__(key, dev, **kwargs)
self.alpha = kwargs.get('alpha', 1.)
self.beta = kwargs.get('beta', 1.)
def attributes(self):
return {
'op_type': 'Accumulate',
'op_type': 'Axpby',
'arguments': {
'alpha': self.alpha,
'beta': self.beta,
......@@ -36,9 +36,9 @@ class Accumulate(function.Function):
return self.dispatch([input], [out], no_grad=True)
class Binary(function.Function):
class BinaryFunc(function.Function):
def __init__(self, key, dev, **kwargs):
super(Binary, self).__init__(key, dev, **kwargs)
super(BinaryFunc, self).__init__(key, dev, **kwargs)
self.op_type = kwargs.get('op_type', '')
def attributes(self):
......@@ -73,9 +73,9 @@ class Clip(function.Function):
return self.dispatch([input], [out])
class Unary(function.Function):
class UnaryFunc(function.Function):
def __init__(self, key, dev, **kwargs):
super(Unary, self).__init__(key, dev, **kwargs)
super(UnaryFunc, self).__init__(key, dev, **kwargs)
self.op_type = kwargs.get('op_type', '')
def attributes(self):
......@@ -86,18 +86,18 @@ class Unary(function.Function):
return self.dispatch([input], [out])
class MM(function.Function):
class MatMul(function.Function):
def __init__(self, key, dev, **kwargs):
super(MM, self).__init__(key, dev, **kwargs)
self.transA = kwargs.get('transA', False)
self.transB = kwargs.get('transB', False)
super(MatMul, self).__init__(key, dev, **kwargs)
self.transpose_a = kwargs.get('transpose_a', False)
self.transpose_b = kwargs.get('transpose_b', False)
def attributes(self):
return {
'op_type': 'Matmul',
'op_type': 'MatMul',
'arguments': {
'transA': self.transA,
'transB': self.transB,
'transA': self.transpose_a,
'transB': self.transpose_b,
},
}
......
......@@ -44,8 +44,8 @@ def abs(input, out=None):
return _unary_func(input, 'Abs', out)
def accumulate(input, alpha=1., beta=1., out=None):
r"""Compute the element-wise accumulation from input to output.
def axpby(input, alpha=1., beta=1., out=None):
r"""Compute the element-wise addition from input to output.
.. math:: \text{out} = \alpha * \text{input} + \beta * \text{out}
......@@ -66,7 +66,7 @@ def accumulate(input, alpha=1., beta=1., out=None):
The output tensor.
"""
return _functions.Accumulate \
return _functions.Axpby \
.instantiate(
input.device,
alpha=alpha,
......@@ -555,7 +555,7 @@ def maximum(input, other, out=None):
"""
input, other = utils \
.remove_binary_scalar(input, other)
return _functions.Binary \
return _functions.BinaryFunc \
.instantiate(
input.device,
op_type='Maximum',
......@@ -584,28 +584,28 @@ def minimum(input, other, out=None):
"""
input, other = utils \
.remove_binary_scalar(input, other)
return _functions.Binary \
return _functions.BinaryFunc \
.instantiate(
input.device,
op_type='Minimum',
).apply(input, other, out)
def mm(input, mat2, transA=False, transB=False, out=None):
def mm(input, mat2, transpose_a=False, transpose_b=False, out=None):
r"""Compute matrix-matrix multiplication.
.. math:: \text{out} = AB
.. math:: \text{out} = a \times b
Parameters
----------
input : dragon.vm.torch.Tensor
The matrix :math:`A`.
The matrix :math:`a`.
mat2 : dragon.vm.torch.Tensor
The matrix :math:`B`.
transA : bool, optional, default=False
**True** to transpose :math:`A` before computation.
transB : bool, optional, default=False
**True** to transpose :math:`B` before computation.
The matrix :math:`b`.
transpose_a : bool, optional, default=False
**True** to transpose :math:`a` before computation.
transpose_b : bool, optional, default=False
**True** to transpose :math:`b` before computation.
out : dragon.vm.torch.Tensor, optional
The optional output.
......@@ -615,11 +615,11 @@ def mm(input, mat2, transA=False, transB=False, out=None):
The output tensor.
"""
return _functions.MM \
return _functions.MatMul \
.instantiate(
utils.unify_devices([input, mat2]),
transA=transA,
transB=transB,
transpose_a=transpose_a,
transpose_b=transpose_b,
).apply(input, mat2, out)
......@@ -922,17 +922,13 @@ def sub(input, value, out=None):
def _binary_func(input, value, op_type='', out=None):
"""Generic binary function."""
input, value = utils.remove_binary_scalar(input, value)
return _functions.Binary \
.instantiate(
input.device,
op_type=op_type,
).apply(input, value, out)
return _functions.BinaryFunc \
.instantiate(input.device, op_type=op_type) \
.apply(input, value, out)
def _unary_func(input, op_type='', out=None):
"""Generic unary function."""
return _functions.Unary \
.instantiate(
input.device,
op_type=op_type,
).apply(input, out)
return _functions.UnaryFunc \
.instantiate(input.device, op_type=op_type) \
.apply(input, out)
......@@ -19,18 +19,18 @@ from dragon.vm.torch.autograd import function
class ParamUpdate(function.Function):
def __init__(self, key, dev, **kwargs):
super(ParamUpdate, self).__init__(key, dev, **kwargs)
self.slot = kwargs.get('slot', '')
self.lr_mult = kwargs.get('lr_mult', 1.)
self.decay_mult = kwargs.get('decay_mult', 1.)
self.op_type = kwargs.get('op_type', 'Update')
self.op_type = kwargs.get('op_type', '')
self.op_handle = kwargs.get('op_handle', '')
self.lr_mult = kwargs.get('lr_mult', 1)
self.decay_mult = kwargs.get('decay_mult', 1)
def attributes(self):
return {
'name': self.op_handle,
'op_type': self.op_type,
'arguments': {
'lr_mult': self.lr_mult,
'decay_mult': self.decay_mult,
'slot': self.slot,
'lr_mult': float(self.lr_mult),
'decay_mult': float(self.decay_mult),
},
}
......@@ -49,11 +49,8 @@ class GradAccumulate(function.Function):
def attributes(self):
return {
'op_type': 'Accumulate',
'arguments': {
'alpha': 1.,
'beta': 1.,
},
'op_type': 'Axpby',
'arguments': {'alpha': 1., 'beta': 1.},
}
def forward(self, grads):
......
......@@ -23,25 +23,23 @@ def grad_accumulate(grads):
if len(grads) == 0:
return
return _functions.GradAccumulate \
.instantiate(
grads[0].device,
).apply(grads)
.instantiate(grads[0].device).apply(grads)
def param_update(
param,
grad,
op_type,
slot,
lr_mult=1.,
decay_mult=1.,
op_handle,
lr_mult=1,
decay_mult=1,
):
"""Apply the param update."""
return _functions.ParamUpdate \
.instantiate(
param.device,
op_type=op_type,
slot=slot,
op_handle=op_handle,
lr_mult=lr_mult,
decay_mult=decay_mult,
).apply(param, grad)
......@@ -40,13 +40,12 @@ class Adam(Optimizer):
self,
params,
lr=1e-3,
beta1=0.9,
beta2=0.999,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=0,
amsgrad=False,
scale_gradient=1.,
clip_gradient=-1.,
scale=1,
clip_norm=0,
):
r"""Create an ``Adam`` optimizer.
......@@ -56,50 +55,47 @@ class Adam(Optimizer):
The parameters to optimize.
lr : float, required
The initial value for :math:`\text{lr}`.
beta1 : float, optional, default=0.9
The initial value for :math:`\beta_{1}`.
beta2 : float, optional, default=0.999
The initial value for :math:`\beta_{2}`.
betas : Tuple[float, float], optional, default=(0.9, 0.999)
The initial value for :math:`\beta_{1}` and :math:`\beta_{2}`.
eps : float, optional, default=1e-8
The initial value of :math:`\epsilon`.
weight_decay : float, optional, default=-1.
The factor of L2 penalty.
The initial value for :math:`\epsilon`.
weight_decay : float, optional, default=0
The L2 penalty factor to weight.
amsgrad : bool, optional, default=False
**True** to switch to **AMSGrad** optimizer.
scale_gradient : float, optional, default=1.
The factor to scale gradients.
clip_gradient : float, optional, default=-1.
The norm thresh to clip gradients.
scale : float, optional, default=1
The scaling factor to gradient.
clip_norm : float, optional, default=0
The maximum L2 norm to clip gradient.
"""
if not 0. <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0. <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0. <= beta1 < 1.:
raise ValueError("Invalid beta parameter at index 0: {}".format(beta1))
if not 0. <= beta2 < 1.:
raise ValueError("Invalid beta parameter at index 1: {}".format(beta2))
if not 0. <= betas[0] < 1.:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0. <= betas[1] < 1.:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if amsgrad:
raise NotImplementedError()
defaults = dict(
lr=lr,
beta1=beta1,
beta2=beta2,
beta1=betas[0],
beta2=betas[1],
eps=eps,
weight_decay=weight_decay,
amsgrad=amsgrad,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
scale=scale,
clip_norm=clip_norm,
weight_decay=weight_decay,
)
super(Adam, self).__init__(params, defaults)
self._update_op_type = 'AdamUpdate'
self._shared_args = {
'lr': 'base_lr',
'beta1': 'beta1',
'beta2': 'beta2',
'eps': 'eps',
'weight_decay': 'l2_decay',
'clip_gradient': 'clip_gradient',
'scale_gradient': 'scale_gradient',
'scale': 'scale',
'clip_norm': 'clip_norm',
'weight_decay': 'weight_decay',
}
......@@ -43,8 +43,8 @@ class Optimizer(object):
"""
# Store the global unique slot index.
_DEFAULT_UNIQUE_SLOT_ID = 0
# Store for the global unique handle
_DEFAULT_UNIQUE_HANDLE_INDEX = 0
def __init__(self, params, defaults):
"""Create a ``Optimizer``.
......@@ -69,7 +69,7 @@ class Optimizer(object):
param_groups = [{'params': param_groups}]
for param_group in param_groups:
self.add_param_group(param_group)
self._update_op_type = None
self._op_type = self.__class__.__name__ + 'Update'
self._process_group = distributed.get_group()
self._shared_args = {}
......@@ -113,8 +113,8 @@ class Optimizer(object):
# A group inherits the defaults while using ``multiplier``
param_group2 = {
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
'lr_mult': 1,
'decay_mult': 1,
}
```
......@@ -124,25 +124,21 @@ class Optimizer(object):
The param group to add.
"""
assert isinstance(param_group, dict), "Param group must be a dict."
if not isinstance(param_group, dict):
raise TypeError('Param group must be a dict.')
params = param_group['params']
if isinstance(params, Tensor):
param_group['params'] = [params]
elif isinstance(params, set):
raise TypeError(
'Optimizer parameters need to be organized in ordered collections,'
'\nbut the ordering of tensors in sets will change between runs.'
'\nPlease use a list instead.'
)
elif isinstance(params, (set, dict)):
raise TypeError('Parameters should be organized in a sequence.')
else:
param_group['params'] = list(params)
for param in param_group['params']:
if not param.requires_grad:
raise ValueError(
"Optimizing a Parameter that "
"Optimizing a parameter that "
"doesn't require gradients."
)
......@@ -155,17 +151,18 @@ class Optimizer(object):
else:
param_group.setdefault(name, default)
if 'slot' not in param_group:
Optimizer._DEFAULT_UNIQUE_SLOT_ID += 1
param_group['slot'] = 'Optimizer/Slot:{}'.format(
Optimizer._DEFAULT_UNIQUE_SLOT_ID)
if 'name' not in param_group:
Optimizer._DEFAULT_UNIQUE_HANDLE_INDEX += 1
param_group['name'] = 'Optimizer_{}'.format(
Optimizer._DEFAULT_UNIQUE_HANDLE_INDEX)
param_set = set()
for group in self.param_groups:
param_set.update(set(group['params']))
if not param_set.isdisjoint(set(param_group['params'])):
raise ValueError("Some parameters appear in more than one parameter group")
raise ValueError('Some parameters appear in '
'more than one parameter group.')
self.param_groups.append(param_group)
......@@ -224,7 +221,7 @@ class Optimizer(object):
def _init_set_defaults(self, group):
"""Initialize the defaults into current workspace."""
template = group['slot'] + '/{}'
template = '/share/hyper/%s/{}' % group['name']
for k, v in group.items():
if k in self._shared_args:
workspace.feed_tensor(
......@@ -256,10 +253,10 @@ class Optimizer(object):
for p, g in zip(params, grads):
training_funcs.param_update(
p, g,
slot=group['slot'],
op_type=self._update_op_type,
lr_mult=group.get('lr_mult', 1.),
decay_mult=group.get('decay_mult', 1.),
op_type=self._op_type,
op_handle=group['name'],
lr_mult=group.get('lr_mult', 1),
decay_mult=group.get('decay_mult', 1),
)
@staticmethod
......
......@@ -46,8 +46,8 @@ class RMSprop(Optimizer):
weight_decay=0,
momentum=0,
centered=False,
scale_gradient=1.,
clip_gradient=-1.,
scale=1,
clip_norm=0,
):
r"""Create a ``RMSprop`` optimizer.
......@@ -61,14 +61,14 @@ class RMSprop(Optimizer):
The initial value for :math:`\alpha`.
eps : float, optional, default=1e-7
The initial value for :math:`\epsilon`.
weight_decay : float, optional, default=-1.
The factor of L2 penalty.
weight_decay : float, optional, default=0
The L2 penalty factor to weight.
momentum : float, optional, default=0
The initial value for :math:`\text{momentum}`.
scale_gradient : float, optional, default=1.
The factor to scale gradients.
clip_gradient : float, optional, default=-1.
The norm thresh to clip gradients.
scale : float, optional, default=1
The scaling factor to gradient.
clip_norm : float, optional, default=0
The maximum L2 norm to clip gradient.
"""
if not 0. <= lr:
......@@ -85,18 +85,17 @@ class RMSprop(Optimizer):
alpha=alpha,
eps=eps,
centered=centered,
scale=scale,
clip_norm=clip_norm,
weight_decay=weight_decay,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
)
super(RMSprop, self).__init__(params, defaults)
self._update_op_type = 'RMSPropUpdate'
self._shared_args = {
'lr': 'base_lr',
'momentum': 'momentum',
'alpha': 'decay',
'eps': 'eps',
'weight_decay': 'l2_decay',
'clip_gradient': 'clip_gradient',
'scale_gradient': 'scale_gradient',
'scale': 'scale',
'clip_norm': 'clip_norm',
'weight_decay': 'weight_decay',
}
......@@ -67,10 +67,10 @@ class SGD(Optimizer):
lr=required,
momentum=0,
dampening=0,
weight_decay=-1.,
weight_decay=0,
nesterov=False,
scale_gradient=1.,
clip_gradient=-1.,
scale=1,
clip_norm=0,
):
r"""Create a ``SGD`` optimizer.
......@@ -84,37 +84,37 @@ class SGD(Optimizer):
The initial value for :math:`\text{momentum}`.
dampening : float, optional, default=0
The dampening for :math:`\text{momentum}`.
weight_decay : float, optional, default=-1.
The factor of L2 penalty.
weight_decay : float, optional, default=0
The L2 penalty factor to weight.
nesterov : bool, optional, default=False
**True** to switch to **NesterovSGD** optimizer.
scale_gradient : float, optional, default=1.
The factor to scale gradients.
clip_gradient : float, optional, default=-1.
The norm thresh to clip gradients.
scale : float, optional, default=1
The scaling factor to gradient.
clip_norm : float, optional, default=0
The maximum L2 norm to clip gradient.
"""
if lr is not required and lr < 0.:
raise ValueError("Invalid learning rate: {}".format(lr))
raise ValueError('Invalid learning rate: {}'.format(lr))
if momentum < 0.:
raise ValueError("Invalid momentum value: {}".format(momentum))
raise ValueError('Invalid momentum value: {}'.format(momentum))
defaults = dict(
lr=lr,
momentum=momentum,
dampening=dampening,
weight_decay=weight_decay,
nesterov=nesterov,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
scale=scale,
clip_norm=clip_norm,
weight_decay=weight_decay,
)
if nesterov and (momentum <= 0. or dampening != 0.):
raise ValueError("Nesterov momentum requires a momentum and zero dampening.")
raise ValueError('Nesterov momentum requires a momentum and zero dampening.')
super(SGD, self).__init__(params, defaults)
self._update_op_type = 'NesterovUpdate' if nesterov else 'SGDUpdate'
self._op_type = ('Nesterov' if nesterov else 'SGD') + 'Update'
self._shared_args = {
'lr': 'base_lr',
'momentum': 'momentum',
'weight_decay': 'l2_decay',
'clip_gradient': 'clip_gradient',
'scale_gradient': 'scale_gradient',
'scale': 'scale',
'clip_norm': 'clip_norm',
'weight_decay': 'weight_decay',
}
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!