Commit ddb76e7b by Ting PAN

add memonger for Dragon

1 parent d64a3943
Showing with 1800 additions and 982 deletions
......@@ -42,6 +42,7 @@ class Graph final : public GraphBase {
GraphDef Prune(const GraphDef& graph_def);
GraphDef Share(const GraphDef& graph_def);
GraphDef MakeUpdate(const GraphDef& graph_def);
void RecomputingAware(const GraphDef& graph_def, Workspace* ws);
inline Workspace* ws() const { return ws_; }
......
......@@ -80,30 +80,35 @@ class Operator : public OperatorBase {
allow_run_ = true;
allow_run_ &= _MPICheck();
allow_run_ &= (!(OutputSize() == 1 && output(0)->name() == "ignore"));
allow_share_grads_ = (!op_def.debug_mode());
allow_share_grads_ &= op_def.share_grads();
allow_share_grads_ &= (type().find("Gradient") != string::npos);
}
virtual void Run() final {
if (!allow_run_) return;
MakeResource();
ctx_.SwitchToDevice();
if (!op_def_.debug_mode()) ShareBeforeRun();
MemorySwitch();
RunOnDevice();
if (!op_def_.debug_mode()) ClearAfterRun();
ctx_.FinishDeviceCompution();
CleanResource();
}
virtual void ElimateCorruption();
virtual void ShareGradient();
virtual void MakeResource();
virtual void CleanResource();
void MemorySwitch() {
for (int i = 0; i < InputSize(); i++)
if (input(i).name() != "ignore")
input(i).SwitchToDevice();
if (input(i).name() != "ignore") input(i).SwitchToDevice();
for (int i = 0; i < OutputSize(); i++)
if (output(i)->name() != "ignore")
output(i)->SwitchToDevice();
if (output(i)->name() != "ignore") output(i)->SwitchToDevice();
}
virtual void ShareBeforeRun() { /*** share tensors here if necessary ***/ }
virtual void RunOnDevice() = 0;
virtual void ClearAfterRun() { /*** clear tensors here if necessary ***/ }
inline Context& ctx() { return ctx_; }
inline string anchor() { return GetSingleArg("anchor", name()); }
......@@ -111,7 +116,7 @@ class Operator : public OperatorBase {
protected:
Context ctx_;
bool allow_run_;
bool allow_run_, allow_share_grads_;
private:
bool _MPICheck() {
......@@ -169,6 +174,9 @@ DECLARE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Worksp
} \
}
#define DISABLE_SHARE_GRADIENT \
this->allow_share_grads_ = false
#define INSTANTIATE_OPERATOR(name, context) \
template class name##Op<context>;
......
......@@ -30,7 +30,7 @@ class Tensor {
CHECK_GT(d, 0);
new_size *= d;
}
if (size_ != new_size &&
if (size_ != new_size && own_mem_ &&
capacity_ < TIndex(new_size * meta_.itemsize())) {
memory_.reset();
capacity_ = 0;
......@@ -38,9 +38,7 @@ class Tensor {
size_ = new_size;
}
void ReshapeLike(const Tensor& other) {
Reshape(other.dims_);
}
void ReshapeLike(const Tensor& other) { Reshape(other.dims_); }
inline const string& name() const { return name_; }
......@@ -92,63 +90,86 @@ class Tensor {
return ss.str();
}
MixedMemory::State memory_state() const { return memory_->state(); }
MixedMemory* memory() const { return memory_.get(); }
void SwitchToDevice() { if(memory_) memory_->SwitchToDevice(); }
inline bool is_corrupted() const { return is_corrupted_; }
inline void Corrupt() { is_corrupted_ = true; }
MixedMemory* memory() const { return own_mem_ ? memory_.get() : ex_memory_; }
MixedMemory::State memory_state() const {
MixedMemory* mem = memory();
CHECK(mem) << "memory access before allowcating.";
return memory()->state();
}
void SwitchToDevice() {
MixedMemory* mem = own_mem_ ? memory_.get() : ex_memory_;
if (mem) mem->SwitchToDevice();
}
const TypeMeta& meta() const { return meta_; }
void SetMeta(const TypeMeta& meta) { meta_ = meta; }
template <typename T> inline bool IsType() { return meta_.Match<T>(); }
template <class Context>
const void* raw_data() const {
CHECK(memory_.get()) << "memory access before allowcating.";
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>())
return memory_->cpu_data();
else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>())
return memory_->cuda_data();
else LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported.";
return nullptr;
void mutable_data_ptr(void** data_ptr) {
MixedMemory* mem = memory();
if (!mem) {
*data_ptr = nullptr;
} else {
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) {
*data_ptr = mem->mutable_cpu_data();
} else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data();
} else {
LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported.";
}
}
template <typename T, class Context>
const T* data() const {
return static_cast<const T*>(raw_data<Context>());
}
template <class Context>
void active_data_ptr(void** data_ptr) {
if (!memory_) {
*data_ptr = nullptr;
} else {
const void* const_data_ptr() const {
MixedMemory* mem = memory();
CHECK(mem) << "memory access before allowcating.";
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) {
*data_ptr = memory_->mutable_cpu_data();
return mem->cpu_data();
} else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) {
*data_ptr = memory_->mutable_cuda_data();
}
return mem->cuda_data();
} else {
LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported.";
return nullptr;
}
}
template <class Context>
void* raw_mutable_data(const TypeMeta& meta) {
void* data_ptr;
active_data_ptr<Context>(&data_ptr);
if (own_mem_) {
mutable_data_ptr<Context>(&data_ptr);
if (meta_ == meta && data_ptr) {
return data_ptr;
} else {
meta_ = meta; // copy-assign the meta
CHECK_GT(size_, 0); // must specify a valid size
meta_ = meta;
CHECK_GT(size_, 0);
memory_.reset(new MixedMemory(meta, size_* meta_.itemsize()));
// malloc
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>())
data_ptr = memory_->mutable_cpu_data();
else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>())
data_ptr = memory_->mutable_cuda_data();
// init for each structed element if necessary
mutable_data_ptr<Context>(&data_ptr); // malloc
if (meta.ctor()) meta_.ctor()(data_ptr, size_);
}
capacity_ = size_ * meta_.itemsize();
return data_ptr;
} else {
meta_ = meta;
CHECK_GT(size_, 0);
TIndex ex_capacity_ = ex_memory_->nbytes();
if (ex_capacity_ >= TIndex(size_ * meta.itemsize())) {
mutable_data_ptr<Context>(&data_ptr);
} else {
delete ex_memory_;
ex_memory_ = new MixedMemory(meta, size_* meta_.itemsize());
mutable_data_ptr<Context>(&data_ptr); // malloc
if (meta.ctor()) meta_.ctor()(data_ptr, size_);
capacity_ = size_ * meta.itemsize();
}
return data_ptr;
}
}
template <class Context>
......@@ -159,22 +180,30 @@ class Tensor {
return raw_mutable_data<Context>(meta_);
}
template <class Context>
const void* raw_data() const { return const_data_ptr<Context>(); }
template <typename T, class Context>
T* mutable_data() {
void* data_ptr;
active_data_ptr<Context>(&data_ptr);
mutable_data_ptr<Context>(&data_ptr);
if (data_ptr && meta_ == TypeMeta::Make<T>()) return static_cast<T*>(data_ptr);
return static_cast<T*>(raw_mutable_data<Context>(TypeMeta::Make<T>()));
}
void Share(const Tensor& other) {
template <typename T, class Context>
const T* data() const {
return static_cast<const T*>(raw_data<Context>());
}
inline void Share(const Tensor& other) {
CHECK_EQ(size_, other.size_);
memory_ = other.memory_;
meta_ = other.meta_;
capacity_ = other.capacity_;
}
void Replace(const Tensor& other) {
inline void Replace(const Tensor& other) {
memory_ = other.memory_;
meta_ = other.meta_;
capacity_ = other.capacity_;
......@@ -182,23 +211,27 @@ class Tensor {
dims_ = other.dims_;
}
void Reset() {
inline void Move(MixedMemory* mem) {
if (mem != nullptr) ex_memory_ = mem;
else ex_memory_ = new MixedMemory(TypeMeta::Make<float>(), 4);
own_mem_ = false;
}
inline void Reset() {
size_ = capacity_ = 0;
meta_ = TypeMeta();
dims_.clear();
memory_.reset();
}
void Release() {
memory_.reset();
}
private:
vector<TIndex> dims_;
TIndex size_ = 0, capacity_ = 0;
TypeMeta meta_;
string name_;
shared_ptr<MixedMemory> memory_;
MixedMemory* ex_memory_ = nullptr;
bool is_corrupted_ = false, own_mem_ = true;
};
} // namespace dragon
......
......@@ -13,23 +13,28 @@
namespace dragon {
#define WORKSPACE_MIN_BUFFER_SIZE 3
#define WORKSPACE_MAX_BUFFER_SIZE 3
#define WORKSPACE_COMMON_BUFFER_SIZE 2
#define WORKSPACE_GRAD_BUFFER_SIZE 1
#define WORKSPACE_MAX_CORRUPTED_SIZE 2
class Workspace{
public:
typedef Map<string, unique_ptr<Tensor> > TensorMap;
typedef Map<string, stack<string> > BufferMap;
typedef Map<string, unique_ptr<mutex> > LockMap;
typedef Map<string, unique_ptr<GraphBase> > GraphMap;
typedef Map<string, TensorFiller> FillerMap;
typedef Map<string, string> RenameMap;
typedef Map<string, vector<OperatorBase*> > RecomputeMap;
Workspace(): root_folder_(".") { init(); }
Workspace(string root_folder) : root_folder_(root_folder) { init(); }
~Workspace();
void init() {
CreateTensor("ignore");
for (int i = 0; i < WORKSPACE_MIN_BUFFER_SIZE; i++) CreateBuffer();
CreateBuffer("Common", WORKSPACE_COMMON_BUFFER_SIZE);
CreateBuffer("Grad", WORKSPACE_GRAD_BUFFER_SIZE);
}
/******************** Tensor ********************/
......@@ -101,33 +106,39 @@ class Workspace{
/******************** Buffer ********************/
inline Tensor* CreateBuffer() {
int buffer_idx = 1;
string name;
while (1) {
name = "_t_buffer_" + dragon_cast<string, int>(buffer_idx++);
if (!HasTensor(name)) break;
inline void CreateBuffer(string category, int num) {
CHECK(!buffer_map_.count(category));
buffer_map_[category] = stack<string>();
for (int i = 1; i <= num; i++) {
string name = "_t_" + category + "_buffer_" + dragon_cast<string, int>(i);
buffer_map_[category].push(name);
CreateTensor(name);
}
buffer_stack_.push(name);
return CreateTensor(name);
}
inline Tensor* GetBuffer() {
if (!buffer_stack_.empty()) {
string name = buffer_stack_.top();
buffer_stack_.pop();
inline Tensor* GetBuffer(string category = "Common") {
if (!buffer_map_[category].empty()) {
string name = buffer_map_[category].top();
buffer_map_[category].pop();
return GetTensor(name);
}
LOG(FATAL) << "buffers are not enough, add more if necessary.";
LOG(FATAL) << "buffers of [" << category << "] "
<< "are not enough, add more if necessary.";
return nullptr;
}
inline void ReleaseBuffer(Tensor* tensor, bool force_release=false) {
inline void ReleaseBuffer(Tensor* tensor,
string category = "Common",
bool enforce = false) {
static Map<string, int> limits = {
{ "Common", WORKSPACE_COMMON_BUFFER_SIZE },
{ "Grad", WORKSPACE_GRAD_BUFFER_SIZE }};
if (buffer_map_[category].size() >= limits[category] || enforce) {
// release directly
if (buffer_stack_.size() >= WORKSPACE_MAX_BUFFER_SIZE || force_release) {
ReleaseTensor(tensor->name());
} else { // recover as a available buffer
buffer_stack_.push(tensor->name());
} else {
// recover as a available buffer
buffer_map_[category].push(tensor->name());
}
}
......@@ -158,14 +169,30 @@ class Workspace{
rename_map_[old_tensor] = new_tensor;
}
inline void AddRecompute(const string& tensor, OperatorBase* op) {
if (!recompute_map_.count(tensor)) {
recompute_map_[tensor] = vector<OperatorBase*>();
}
recompute_map_[tensor].push_back(op);
}
inline vector<OperatorBase*> GetRecompute(const string& tensor) {
if (recompute_map_.count(tensor)) {
return recompute_map_[tensor];
} else {
return vector<OperatorBase*>();
}
}
private:
TensorMap tensor_map_;
BufferMap buffer_map_;
LockMap lock_map_;
GraphMap graph_map_;
FillerMap filler_map_;
RenameMap rename_map_;
RecomputeMap recompute_map_;
string root_folder_;
stack<string> buffer_stack_;
};
} // namespace dragon
......
......@@ -43,10 +43,11 @@ class DropoutGradientOp final : public Operator<Context> {
threshold = static_cast<unsigned int>(UINT_MAX * prob);
if (use_scale) scale = 1.0 / (1.0 - prob);
else scale = 1.0;
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
void ClearAfterRun() override;
void CleanResource() override;
template <typename T> void RunWithType();
protected:
......
......@@ -30,7 +30,9 @@ class ReluGradientOp : public Operator<Context> {
public:
ReluGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
slope(OperatorBase::GetSingleArg<float>("slope", 0.0)) {}
slope(OperatorBase::GetSingleArg<float>("slope", 0.0)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -23,7 +23,10 @@ class SigmoidOp final : public Operator<Context> {
template <class Context>
class SigmoidGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(SigmoidGradientOp);
SigmoidGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -32,7 +32,9 @@ class SoftmaxGradientOp final : public Operator<Context> {
public:
SoftmaxGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)) {}
axis(OperatorBase::GetSingleArg<int>("axis", 1)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -23,7 +23,10 @@ class TanhOp final : public Operator<Context> {
template <class Context>
class TanhGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(TanhGradientOp);
TanhGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -29,9 +29,8 @@ class AddGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(AddGradientOp);
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type);
......
......@@ -35,9 +35,7 @@ class BiasAddGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws),
data_format(OperatorBase::GetSingleArg<string>("data_format", "NCHW")) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void NCHWRunWithType();
template <typename T> void NHWCRunWithType();
......
......@@ -33,9 +33,7 @@ class ClipGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ClipGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -29,9 +29,8 @@ class DivGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(DivGradientOp);
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type);
......
......@@ -37,6 +37,7 @@ class DotGradientOp final : public Operator<Context> {
transA(OperatorBase::GetSingleArg<bool>("TransA", false)),
transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {}
void ShareGradient() override;
void RunOnDevice() override;
template <typename T> void DotRunWithType();
template <typename T> void GemmRunWithType();
......
......@@ -48,9 +48,8 @@ class EltwiseGradientOp final : public Operator<Context> {
} else coeffs.resize(InputSize(), float(1));
}
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void SumRunWithType();
template <typename T> void ProdRunWithType();
......
......@@ -25,9 +25,7 @@ class ExpGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ExpGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -33,9 +33,7 @@ class GramMatrixGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -39,9 +39,7 @@ class InnerProductGradientOp final : public Operator<Context> {
num_output(OperatorBase::GetSingleArg<int>("num_output", 0)),
transW(OperatorBase::GetSingleArg<bool>("TransW", true)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -25,9 +25,7 @@ class LogGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(LogGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -36,9 +36,8 @@ class MatmulGradientOp final : public Operator<Context> {
transA(OperatorBase::GetSingleArg<bool>("TransA", false)),
transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {}
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -29,9 +29,8 @@ class MulGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MulGradientOp);
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type);
......
......@@ -40,9 +40,7 @@ class PowGradientOp final : public Operator<Context> {
power_scale = power * scale;
}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -35,9 +35,7 @@ class ScaleGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void BiasRunWithType();
template <typename T> void ScaleRunWithType();
template <typename T> void RunWithType();
......
......@@ -25,9 +25,7 @@ class SquareGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(SquareGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -29,9 +29,8 @@ class SubGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(SubGradientOp);
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type);
......
......@@ -34,9 +34,7 @@ class AtGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 0)),
acc_grad(OperatorBase::GetSingleArg<bool>("acc_gradient", false)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -36,9 +36,8 @@ class ConcatGradientOp : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
nin(OperatorBase::GetSingleArg<int>("num_input", 1)) {}
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -57,9 +57,7 @@ class CropGradientOp final : public Operator<Context > {
}
void ComputeOutputShape();
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
template <typename T> void RecursiveRunWithType(vector<TIndex> idxs,
const vector<TIndex>& offsets,
......
......@@ -27,7 +27,10 @@ class ExpandDimsOp final : public Operator<Context> {
template <class Context>
class ExpandDimsGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ExpandDimsGradientOp);
ExpandDimsGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
};
......
......@@ -28,7 +28,10 @@ class FlattenOp final : public Operator<Context> {
template <class Context>
class FlattenGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(FlattenGradientOp);
FlattenGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
};
......
......@@ -43,7 +43,9 @@ template <class Context>
class TemplateGradientOp : public TemplateOp<Context> {
public:
TemplateGradientOp(const OperatorDef& op_def, Workspace* ws)
: TemplateOp<Context>(op_def, ws) {}
: TemplateOp<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
};
......
......@@ -39,9 +39,7 @@ class ReduceGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", -1)),
operation(OperatorBase::GetSingleArg<string>("operation", "NONE")) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void SumRunWithType();
template <typename T> void MeanRunWithType();
......
......@@ -30,7 +30,10 @@ class ReshapeOp final : public Operator<Context> {
template <class Context>
class ReshapeGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ReshapeGradientOp);
ReshapeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
};
......
......@@ -61,6 +61,8 @@ class ScanGradientOp final: public Operator<Context> {
// handle GI(x)
for (int i = 0; i < forward_inputs.size(); i++)
terms[forward_inputs[i] + "_grad"] = output(i)->name();
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
......
......@@ -35,7 +35,9 @@ class SliceGradientOp final : public Operator<Context> {
SliceGradientOp(const OperatorDef& op_def, Workspace* ws):
Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
nout(OperatorBase::GetSingleArg<int>("num_output", 1)) {}
nout(OperatorBase::GetSingleArg<int>("num_output", 1)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -43,9 +43,7 @@ class TileGradientOp : public Operator<Context> {
process_axes.push_back({ i, multiples[i] });
}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template<typename T> void TileRunWithType();
protected:
......
......@@ -33,9 +33,7 @@ class TransposeGradientOp final : public Operator<Context> {
TransposeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -16,14 +16,12 @@ class L1LossOp : public Operator<Context> {
public:
L1LossOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float coeff;
Tensor* diff;
string normalization;
};
......@@ -33,14 +31,13 @@ class L1LossGradientOp final : public Operator<Context> {
public:
L1LossGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void ShareGradient() override;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float coeff;
Tensor* diff;
string normalization;
};
......
......@@ -16,14 +16,12 @@ class L2LossOp : public Operator<Context> {
public:
L2LossOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float coeff;
Tensor* diff;
string normalization;
};
......@@ -33,14 +31,13 @@ class L2LossGradientOp final : public Operator<Context> {
public:
L2LossGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void ShareGradient() override;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float coeff;
Tensor* diff;
string normalization;
};
......
......@@ -4,19 +4,20 @@
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_
#ifndef DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class SigmoidCrossEntropyLossOp final : public Operator<Context> {
class SigmoidCrossEntropyOp final : public Operator<Context> {
public:
SigmoidCrossEntropyLossOp(const OperatorDef& op_def, Workspace* ws)
SigmoidCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
......@@ -27,9 +28,9 @@ class SigmoidCrossEntropyLossOp final : public Operator<Context> {
};
template <class Context>
class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> {
class SigmoidCrossEntropyGradientOp final : public Operator<Context> {
public:
SigmoidCrossEntropyLossGradientOp(const OperatorDef& op_def, Workspace* ws)
SigmoidCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
......@@ -43,4 +44,4 @@ class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> {
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_
\ No newline at end of file
#endif // DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
\ No newline at end of file
......@@ -4,17 +4,17 @@
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_
#ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class SoftmaxCrossEntropyLossOp final : public Operator<Context> {
class SoftmaxCrossEntropyOp final : public Operator<Context> {
public:
SoftmaxCrossEntropyLossOp(const OperatorDef& op_def, Workspace* ws)
SoftmaxCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {
......@@ -39,9 +39,9 @@ class SoftmaxCrossEntropyLossOp final : public Operator<Context> {
};
template <class Context>
class SoftmaxCrossEntropyLossGradientOp final : public Operator<Context> {
class SoftmaxCrossEntropyGradientOp final : public Operator<Context> {
public:
SoftmaxCrossEntropyLossGradientOp(const OperatorDef& op_def, Workspace* ws)
SoftmaxCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
......@@ -57,4 +57,4 @@ class SoftmaxCrossEntropyLossGradientOp final : public Operator<Context> {
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_
\ No newline at end of file
#endif // DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
\ No newline at end of file
......@@ -4,17 +4,17 @@
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_
#ifndef DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class SoftmaxLossOp final : public Operator<Context> {
class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
public:
SoftmaxLossOp(const OperatorDef& op_def, Workspace* ws)
SparseSoftmaxCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) {
......@@ -45,9 +45,9 @@ class SoftmaxLossOp final : public Operator<Context> {
};
template <class Context>
class SoftmaxLossGradientOp final : public Operator<Context> {
class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {
public:
SoftmaxLossGradientOp(const OperatorDef& op_def, Workspace* ws)
SparseSoftmaxCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) {
......@@ -71,4 +71,4 @@ class SoftmaxLossGradientOp final : public Operator<Context> {
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_
\ No newline at end of file
#endif // DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
\ No newline at end of file
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
#include "operators/loss/sparse_softmax_cross_entropy_op.h"
namespace dragon {
template <class Context>
class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Context> {
public:
SparseSoftmaxFocalLossOp(const OperatorDef& op_def, Workspace* ws)
: SparseSoftmaxCrossEntropyOp<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")),
alpha(OperatorBase::GetSingleArg<float>("alpha", 1.0)),
gamma(OperatorBase::GetSingleArg<float>("gamma", 2.0)),
use_pseudo_metric(OperatorBase::GetSingleArg<bool>("use_pseudo_metric", true)) {
if (alpha == 1.0) use_pseudo_metric = false;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float alpha, gamma;
bool use_pseudo_metric;
TIndex axis, outer_dim, inner_dim;
Tensor* scale;
string normalization;
};
template <class Context>
class SparseSoftmaxFocalLossGradientOp final : public SparseSoftmaxCrossEntropyGradientOp<Context> {
public:
SparseSoftmaxFocalLossGradientOp(const OperatorDef& op_def, Workspace* ws)
: SparseSoftmaxCrossEntropyGradientOp<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")),
gamma(OperatorBase::GetSingleArg<float>("gamma", 2.0)),
eps(OperatorBase::GetSingleArg<float>("eps", float(1e-10))) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float gamma, eps;
TIndex axis, outer_dim, inner_dim;
Tensor* scale;
string normalization;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
\ No newline at end of file
......@@ -27,7 +27,9 @@ template <class Context>
class MPIBroadcastGradientOp final : public ModelMPIBase<Context> {
public:
MPIBroadcastGradientOp(const OperatorDef& op_def, Workspace* ws)
: ModelMPIBase<Context>(op_def, ws) {}
: ModelMPIBase<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -27,7 +27,9 @@ template <class Context>
class MPIGatherGradientOp final : public ModelMPIBase<Context> {
public:
MPIGatherGradientOp(const OperatorDef& op_def, Workspace *ws)
: ModelMPIBase<Context>(op_def, ws) {}
: ModelMPIBase<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -30,7 +30,7 @@ class BatchNormOp : public Operator<Context> {
Tensor* num_multiplier, *spatial_multiplier, *stddev, *var;
TIndex num, channels, spatial_dim, nbychans;
int use_stats;
bool use_global_stats, inplace;
bool use_global_stats, inplace, is_recomputing;
};
template <class Context>
......@@ -40,9 +40,7 @@ class BatchNormGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......@@ -68,7 +66,7 @@ class BNOp : public Operator<Context> {
protected:
float momentum, eps;
int use_stats;
bool use_global_stats;
bool use_global_stats, is_recomputing;
};
template <class Context>
......@@ -79,9 +77,8 @@ class BNGradientOp : public Operator<Context> {
eps(OperatorBase::GetSingleArg<float>("eps", float(1e-3))),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) { }
void ShareBeforeRun() override;
void ShareGradient() override;
void RunOnDevice() override { NOT_IMPLEMENTED; }
void ClearAfterRun() override;
template <typename T> void RunWithType() { NOT_IMPLEMENTED; }
protected:
......@@ -115,7 +112,7 @@ class CuDNNBNOp final : public BNOp<Context> {
cudnnTensorDescriptor_t input_desc, output_desc, bn_desc;
TIndex num, channels, spatial_dim;
Tensor* mean, *var;
bool use_global_stats;
bool use_global_stats, is_recomputing;
};
template <class Context>
......
......@@ -36,7 +36,7 @@ class BatchRenormOp : public Operator<Context> {
Tensor* stddev, *r, *var, *x_norm;
TIndex num, channels, spatial_dim, nbychans;
int use_stats;
bool use_global_stats, inplace;
bool use_global_stats, inplace, is_recomputing;
};
template <class Context>
......@@ -46,9 +46,7 @@ class BatchRenormGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -36,9 +36,7 @@ class InstanceNormGradientOp final : public Operator<Context> {
InstanceNormGradientOp(const OperatorDef& op_def, Workspace *ws)
: Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -40,9 +40,7 @@ class L2NormGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 0)),
num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
......
......@@ -30,7 +30,10 @@ class LSTMUnitOp : public Operator<Context> {
template <class Context>
class LSTMUnitGradientOp : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(LSTMUnitGradientOp);
LSTMUnitGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
this->allow_share_grads_ = false;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -4,23 +4,14 @@
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_COMMON_UTILS_OP_H_
#define DRAGON_OPERATORS_COMMON_UTILS_OP_H_
#ifndef DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
#define DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class CopyOp final: public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(CopyOp);
void RunOnDevice() override;
template <typename T> void RunWithType();
};
template <class Context>
class AccuracyOp final: public Operator<Context> {
public:
AccuracyOp(const OperatorDef& op_def, Workspace* ws)
......@@ -42,22 +33,6 @@ class AccuracyOp final: public Operator<Context> {
Tensor ignore_labels;
};
template <class Context>
class OneHotOp final : public Operator < Context > {
public:
OneHotOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
depth(OperatorBase::GetSingleArg<int>("depth", -1)),
on_value(OperatorBase::GetSingleArg<int>("on_value", 1)),
off_value(OperatorBase::GetSingleArg<int>("off_value", 0)) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex depth, on_value, off_value;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_COMMON_UTILS_OP_H_
\ No newline at end of file
#endif // DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
\ No newline at end of file
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_UTILS_COPY_OP_H_
#define DRAGON_OPERATORS_UTILS_COPY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class CopyOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(CopyOp);
void RunOnDevice() override;
template <typename T> void RunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_UTILS_COPY_OP_H_
\ No newline at end of file
......@@ -19,6 +19,7 @@ class GradientGenerateOp final: public Operator<Context> {
defaults(OperatorBase::GetRepeatedArg<float>("defaults")) {
CHECK_EQ(InputSize(), OutputSize());
CHECK_EQ(defaults.size(), OutputSize());
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
......@@ -35,6 +36,7 @@ class GradientGatherOp final : public Operator<Context> {
: Operator<Context>(op_def, ws) {
for (int i = 0; i < InputSize(); i++)
if (input(i).name() != "ignore") indices.push_back(i);
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
......@@ -47,7 +49,11 @@ class GradientGatherOp final : public Operator<Context> {
template <class Context>
class StopGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(StopGradientOp);
StopGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override;
};
......
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
#define DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class OneHotOp final : public Operator < Context > {
public:
OneHotOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
depth(OperatorBase::GetSingleArg<int>("depth", -1)),
on_value(OperatorBase::GetSingleArg<int>("on_value", 1)),
off_value(OperatorBase::GetSingleArg<int>("off_value", 0)) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex depth, on_value, off_value;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
\ No newline at end of file
......@@ -30,9 +30,7 @@ class ConvGradientOp : public ConvOp<Context> {
ConvGradientOp(const OperatorDef& def, Workspace* ws)
: ConvOp<Context>(def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -31,9 +31,7 @@ class DeConvGradientOp : public DeConvOp<Context> {
DeConvGradientOp(const OperatorDef& def, Workspace* ws) :
DeConvOp<Context>(def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -15,21 +15,21 @@ template <class Context>
class DenseConcatOp final : public ConcatOp<Context> {
public:
DenseConcatOp(const OperatorDef& op_def, Workspace* ws)
: ConcatOp<Context>(op_def, ws) { }
void RunOnDevice() override;
: ConcatOp<Context>(op_def, ws) {}
};
template <class Context>
class DenseConcatGradientOp : public ConcatGradientOp<Context> {
public:
public:
DenseConcatGradientOp(const OperatorDef& op_def, Workspace* ws)
: ConcatGradientOp<Context>(op_def, ws) {}
: ConcatGradientOp<Context>(op_def, ws),
growth_rate(OperatorBase::GetSingleArg<int>("growth_rate", 0)) {}
void ElimateCorruption() override;
template <typename T> void RestoreX1();
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
protected:
TIndex growth_rate;
};
......
......@@ -35,9 +35,7 @@ class NNResizeGradientOp : public Operator<Context> {
NNResizeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType();
};
......
......@@ -69,9 +69,7 @@ class PoolingGradientOp: public Operator<Context> {
}
void Reshape();
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void MaxRunWithType();
template <typename T> void AvgRunWithType();
......
......@@ -44,9 +44,8 @@ class ROIAlignGradientOp : public Operator<Context> {
CHECK_GT(pool_w, 0) << "\npool_w must > 0";
}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
void CleanResource() override;
template <typename T> void RunWithType();
protected:
......
......@@ -41,9 +41,8 @@ class ROIPoolingGradientOp final : public Operator<Context> {
pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)),
spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)) {}
void ShareBeforeRun() override;
void RunOnDevice() override;
void ClearAfterRun() override;
void CleanResource() override;
template <typename T> void RunWithType();
protected:
......
......@@ -286,21 +286,12 @@ void TransposeGrad(const int count,
const T* dy,
T* dx);
/******************** common.utils ********************/
template <typename T, class Context>
void OneHot(const int count,
const int depth,
const int on_value,
const T* x,
T* y);
/******************** loss.l1_loss ********************/
template <typename T, class Context>
void AbsGrad(const int count, const T* dy, T* dx);
/******************** loss.sigmoid_cross_entropy_loss ********************/
/******************** loss.sigmoid_cross_entropy ********************/
template <typename T, class Context>
void SigmoidCrossEntropy(const int count, const T* x, const T* target, T* loss);
......@@ -313,12 +304,12 @@ void SmoothL1(const int count, const float sigma2, const T* x, T* y);
template <typename T, class Context>
void SmoothL1Grad(const int count, const float sigma2, const T* dy, T* dx);
/******************** loss.softmax_cross_entropy_loss ********************/
/******************** loss.softmax_cross_entropy ********************/
template <typename T, class Context>
void SoftmaxCrossEntropy(const int count, const T* prob, const T* target, T* loss);
/******************** loss.softmax_loss ********************/
/******************** loss.sparse_softmax_cross_entropy ********************/
template <typename T, class Context>
void SparseSoftmaxCrossEntropy(const int count,
......@@ -332,12 +323,42 @@ void SparseSoftmaxCrossEntropy(const int count,
Tensor* ignore);
template <typename T, class Context>
void SoftmaxLossGrad(const int count,
void SparseSoftmaxCrossEntropyGrad(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const T* prob,
const T* labels,
T* valid,
Tensor* ignore,
T* dXdata);
/******************** loss.sparse_softmax_focal_loss ********************/
template <typename T, class Context>
void SparseSoftmaxFocalLoss(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const T* prob,
const T* labels,
T* scale,
T* loss,
T* valid,
Tensor* ignore);
template <typename T, class Context>
void SparseSoftmaxFocalLossGrad(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const T* scale,
const T* prob,
const T* labels,
T* valid,
Tensor* ignore,
T* dXdata);
......@@ -422,6 +443,15 @@ void MemoryData(const int count,
const Tx* x,
Ty* y);
/******************** utils.one_hot ********************/
template <typename T, class Context>
void OneHot(const int count,
const int depth,
const int on_value,
const T* x,
T* y);
/******************** vision.conv ********************/
template <typename T, class Context>
......
......@@ -20,7 +20,11 @@ option['device'] = 'CPU'
option['gpu_id'] = 0
option['use_cudnn'] = False
option['random_seed'] = 3
option['debug_mode'] = True
# if True, disable Dragon-Memonger
option['debug_mode'] = False
option['share_grads'] = False # set it by Dragon-Memonger
option['allow_mirrow_stage'] = True # default
def EnableCPU():
global option
......@@ -32,8 +36,8 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
option['gpu_id'] = gpu_id
option['use_cudnn'] = use_cudnn
# TODO(Pan): please not use @setter
# TODO(Pan): seems that it can't change the global value
# TODO(PhyscalX): please not use @setter
# TODO(PhyscalX): seems that it can't change the global value
def SetRandomSeed(seed):
global option
......
......@@ -176,6 +176,6 @@ def Restore(filename, format=0):
FeedTensor(key, ndarray)
elif format is 1:
# TODO(pan): caffemodel can't save the tensor name
# TODO(pan): we simply use 'Scope + LayerName + @paramX'
# TODO(PhyscalX): caffemodel can't save the tensor name
# TODO(PhyscalX): we simply use 'Scope + LayerName + @paramX'
RestoreCC(filename, '', format)
\ No newline at end of file
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
def share_grads(enabled=True):
from dragon.config import option
option['share_grads'] = enabled
def drop(op_func, *args, **kwargs):
kwargs['mirrow_stage'] = True
return op_func(*args, **kwargs)
\ No newline at end of file
......@@ -4,12 +4,12 @@
# Written by Ting Pan
# --------------------------------------------------------
from __future__ import print_function
import numpy as np
import dragon.core.workspace as ws
import dragon.ops as ops
import dragon.vm.theano as theano
from multiprocessing import Process, Queue
from dragon.config import logger
""" How to custom a RunOp in Dragon """
......@@ -32,7 +32,7 @@ class Fetcher(Process):
self.daemon = True
def cleanup():
logger.info('Terminating Fetcher......')
print('Terminating Fetcher......')
self.terminate()
self.join()
......@@ -104,4 +104,4 @@ if __name__ == '__main__':
foo()
# fetch
logger.info('y \n-------------- \n', y.get_value(), '\n')
\ No newline at end of file
print('y \n-------------- \n', y.get_value(), '\n')
\ No newline at end of file
......@@ -4,13 +4,13 @@
# Written by Ting Pan
# --------------------------------------------------------
from __future__ import print_function
import numpy as np
import dragon.core.workspace as ws
import dragon.ops as ops
from dragon.core.tensor import Tensor
import dragon.vm.theano.tensor as T
import dragon.vm.theano as theano
from dragon.config import logger
""" How to custom a TemplateOp in Dragon """
......@@ -91,14 +91,14 @@ if __name__ == '__main__':
foo = theano.function(outputs=y)
# feed
ws.FeedTensor(x1, np.ones((5, 3)))
ws.FeedTensor(x2, np.ones((5, 3)) * 5.0)
ws.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
ws.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
# run
foo()
# fetch
logger.info('y \n-------------- \n', y.get_value(), '\n')
logger.info('dx1 \n-------------- \n', dx1.get_value(), '\n')
logger.info('dx2 \n-------------- \n', dx2.get_value(), '\n')
print('y \n-------------- \n', y.get_value(), '\n')
print('dx1 \n-------------- \n', dx1.get_value(), '\n')
print('dx2 \n-------------- \n', dx2.get_value(), '\n')
......@@ -7,7 +7,7 @@
from dragon.core.tensor import Tensor
import numpy as np
def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""
:param inputs: a list of Tensor contains [input, label]
:param axis a int of using which axis to compute softmax
......@@ -17,12 +17,12 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg
"""
if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxLoss Operator accpets a list of 2 Tensors')
raise RuntimeError('SparseSoftmaxCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SoftmaxLoss', **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SparseSoftmaxCrossEntropy', **kwargs)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
......@@ -35,7 +35,7 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg
return output
def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs):
def SigmoidCrossEntropy(inputs, normalization='FULL', **kwargs):
"""
:param inputs: a list of Tensor contains [input, label]
:param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE)
......@@ -43,12 +43,12 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs):
"""
if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SigmoidCrossEntropyLoss Operator accpets a list of 2 Tensors')
raise RuntimeError('SigmoidCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SigmoidCrossEntropyLoss', **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SigmoidCrossEntropy', **kwargs)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
......@@ -57,7 +57,7 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs):
return output
def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs):
def SoftmaxCrossEntropy(inputs, axis=1, normalization='FULL', **kwargs):
"""
:param inputs: a list of Tensor contains [input, label]
:param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE)
......@@ -65,12 +65,12 @@ def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs):
"""
if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxCrossEntropyLoss Operator accpets a list of 2 Tensors')
raise RuntimeError('SoftmaxCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SoftmaxCrossEntropyLoss', **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SoftmaxCrossEntropy', **kwargs)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
......@@ -96,7 +96,7 @@ def SmoothL1Loss(inputs, sigma=1.0, **kwargs):
return output
def L1Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
def L1Loss(inputs, normalization='BATCH_SIZE', **kwargs):
if not isinstance(inputs, list) or len(inputs) < 2:
raise RuntimeError('L1Loss Operator accpets a list of at least 2 Tensors')
......@@ -109,7 +109,7 @@ def L1Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
return output
def L2Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
def L2Loss(inputs, normalization='BATCH_SIZE', **kwargs):
if not isinstance(inputs, list) or len(inputs) < 2:
raise RuntimeError('L2Loss Operator accpets a list of at least 2 Tensors')
......@@ -120,3 +120,35 @@ def L2Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
output = Tensor.CreateOperator(nout=1, op_type='L2Loss', **kwargs)
if inputs[0].shape is not None: output.shape = [1]
return output
def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(),
alpha=0.25, gamma=2.0, eps=1e-10, use_pseudo_metric=True, **kwargs):
"""
:param inputs: a list of Tensor contains [input, label]
:param axis a int of using which axis to compute softmax
:param normalization: a str of (UNIT, FULL, VALID, BATCH_SIZE, NONE)
:param ignore_labels: a list of int contatins the labels to ignore
:param alpha a float of the alpha value
:param gamma a float of the gamma value
:param eps a float of the eps value
:return: a Tensor of loss with the shape (1,)
"""
if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxFocalLoss Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SparseSoftmaxFocalLoss', **kwargs)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
elif all(dim is not None for dim in inputs[0].shape):
outer_dim = int(np.prod(inputs[0].shape[0 : axis]))
inner_dim = int(np.prod(inputs[0].shape[axis + 1 :]))
output.shape = [outer_dim * inner_dim]
else: output.shape = [None]
return output
\ No newline at end of file
......@@ -197,7 +197,7 @@ def BiasAdd(inputs, data_format='NCHW', **kwargs):
return output
def DenseConcat(inputs, axis=1, **kwargs):
def DenseConcat(inputs, growth_rate, axis=1, **kwargs):
if not isinstance(inputs, list) or len(inputs) != 2:
raise RuntimeError('DenseConcat Operator accepts 2 Tensors as inputs')
......@@ -207,6 +207,7 @@ def DenseConcat(inputs, axis=1, **kwargs):
kwargs['num_input'] = len(inputs)
output = Tensor.CreateOperator(nout=1, op_type='DenseConcat', **kwargs)
if all(input.shape is not None for input in inputs):
if all(input.shape[axis] is not None for input in inputs):
output.shape = inputs[0].shape[:]
......
......@@ -52,12 +52,13 @@ Softmax = act.Softmax
Dropout = act.Dropout
# loss
SoftmaxLoss = loss.SoftmaxLoss
SigmoidCrossEntropyLoss = loss.SigmoidCrossEntropyLoss
SoftmaxCrossEntropyLoss = loss.SoftmaxCrossEntropyLoss
SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy
SigmoidCrossEntropy = loss.SigmoidCrossEntropy
SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy
SmoothL1Loss = loss.SmoothL1Loss
L1Loss = loss.L1Loss
L2Loss = loss.L2Loss
SparseSoftmaxFocalLoss = loss.SparseSoftmaxFocalLoss
# arithmetic
Add = math.Add
......
......@@ -50,6 +50,7 @@ message OperatorDef {
repeated Argument arg= 5;
optional DeviceOption device_option = 6;
optional bool debug_mode = 7 [default = false];
optional bool share_grads = 8 [default = false];
}
message GradientTarget {
......@@ -65,7 +66,6 @@ message UpdateTarget {
repeated Argument arg = 4;
}
// simply copy from caffe1
message TensorFiller {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
......@@ -89,4 +89,5 @@ message GraphDef {
repeated GradientTarget g_target = 8;
repeated UpdateTarget u_target = 9;
optional bool debug_mode = 10 [default = false];
optional bool share_grads = 11 [default = false];
}
\ No newline at end of file
......@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='dragon.proto',
package='',
serialized_pb=_b('\n\x0c\x64ragon.proto\"\xf7\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12/\n\tdata_type\x18\x02 \x01(\x0e\x32\x15.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\",\n\x0cTensorProtos\x12\x1c\n\x06protos\x18\x01 \x03(\x0b\x32\x0c.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"p\n\x0c\x44\x65viceOption\x12%\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x0b.DeviceType:\x03\x43PU\x12\x11\n\x06gpu_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\xa1\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x16\n\x03\x61rg\x18\x05 \x03(\x0b\x32\t.Argument\x12$\n\rdevice_option\x18\x06 \x01(\x0b\x32\r.DeviceOption\x12\x19\n\ndebug_mode\x18\x07 \x01(\x08:\x05\x66\x61lse\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"R\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x16\n\x03\x61rg\x18\x04 \x03(\x0b\x32\t.Argument\"\x8d\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12\x39\n\rvariance_norm\x18\t \x01(\x0e\x32\x1a.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xf3\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x02op\x18\x02 \x03(\x0b\x32\x0c.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12$\n\rdevice_option\x18\x05 \x01(\x0b\x32\r.DeviceOption\x12\x16\n\x03\x61rg\x18\x06 \x03(\x0b\x32\t.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12!\n\x08g_target\x18\x08 \x03(\x0b\x32\x0f.GradientTarget\x12\x1f\n\x08u_target\x18\t \x03(\x0b\x32\r.UpdateTarget\x12\x19\n\ndebug_mode\x18\n \x01(\x08:\x05\x66\x61lse*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02')
serialized_pb=_b('\n\x0c\x64ragon.proto\"\xf7\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12/\n\tdata_type\x18\x02 \x01(\x0e\x32\x15.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\",\n\x0cTensorProtos\x12\x1c\n\x06protos\x18\x01 \x03(\x0b\x32\x0c.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"p\n\x0c\x44\x65viceOption\x12%\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x0b.DeviceType:\x03\x43PU\x12\x11\n\x06gpu_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\xbd\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x16\n\x03\x61rg\x18\x05 \x03(\x0b\x32\t.Argument\x12$\n\rdevice_option\x18\x06 \x01(\x0b\x32\r.DeviceOption\x12\x19\n\ndebug_mode\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x1a\n\x0bshare_grads\x18\x08 \x01(\x08:\x05\x66\x61lse\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"R\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x16\n\x03\x61rg\x18\x04 \x03(\x0b\x32\t.Argument\"\x8d\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12\x39\n\rvariance_norm\x18\t \x01(\x0e\x32\x1a.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\x8f\x02\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x02op\x18\x02 \x03(\x0b\x32\x0c.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12$\n\rdevice_option\x18\x05 \x01(\x0b\x32\r.DeviceOption\x12\x16\n\x03\x61rg\x18\x06 \x03(\x0b\x32\t.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12!\n\x08g_target\x18\x08 \x03(\x0b\x32\x0f.GradientTarget\x12\x1f\n\x08u_target\x18\t \x03(\x0b\x32\r.UpdateTarget\x12\x19\n\ndebug_mode\x18\n \x01(\x08:\x05\x66\x61lse\x12\x1a\n\x0bshare_grads\x18\x0b \x01(\x08:\x05\x66\x61lse*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
......@@ -44,8 +44,8 @@ _DEVICETYPE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=1386,
serialized_end=1429,
serialized_start=1442,
serialized_end=1485,
)
_sym_db.RegisterEnumDescriptor(_DEVICETYPE)
......@@ -110,8 +110,8 @@ _TENSORFILLER_VARIANCENORM = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=1086,
serialized_end=1138,
serialized_start=1114,
serialized_end=1166,
)
_sym_db.RegisterEnumDescriptor(_TENSORFILLER_VARIANCENORM)
......@@ -412,6 +412,13 @@ _OPERATORDEF = _descriptor.Descriptor(
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='share_grads', full_name='OperatorDef.share_grads', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=True, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
......@@ -424,7 +431,7 @@ _OPERATORDEF = _descriptor.Descriptor(
oneofs=[
],
serialized_start=558,
serialized_end=719,
serialized_end=747,
)
......@@ -467,8 +474,8 @@ _GRADIENTTARGET = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=721,
serialized_end=782,
serialized_start=749,
serialized_end=810,
)
......@@ -518,8 +525,8 @@ _UPDATETARGET = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=784,
serialized_end=866,
serialized_start=812,
serialized_end=894,
)
......@@ -605,8 +612,8 @@ _TENSORFILLER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=869,
serialized_end=1138,
serialized_start=897,
serialized_end=1166,
)
......@@ -680,6 +687,13 @@ _GRAPHDEF = _descriptor.Descriptor(
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='share_grads', full_name='GraphDef.share_grads', index=9,
number=11, type=8, cpp_type=7, label=1,
has_default_value=True, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
......@@ -691,8 +705,8 @@ _GRAPHDEF = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=1141,
serialized_end=1384,
serialized_start=1169,
serialized_end=1440,
)
_TENSORPROTO.fields_by_name['data_type'].enum_type = _TENSORPROTO_DATATYPE
......
......@@ -11,7 +11,7 @@ from .vision import ConvolutionLayer, DeconvolutionLayer, PoolingLayer, \
from .neuron import ReLULayer, DropoutLayer, TanhLayer, PowerLayer
from .loss import SoftmaxWithLossLayer, SigmoidCrossEntropyLossLayer, \
L2LossLayer, SmoothL1LossLayer
L2LossLayer, SmoothL1LossLayer, SoftmaxWithFocalLossLayer
from .mpi import MPIBroadcastLayer, MPIGatherLayer
......
......@@ -93,8 +93,9 @@ class ConcatLayer(Layer):
class DenseConcatLayer(Layer):
def __init__(self, LayerParameter):
super(DenseConcatLayer, self).__init__(LayerParameter)
param = LayerParameter.concat_param
self._param = {'axis': param.axis}
param = LayerParameter.dense_concat_param
self._param = {'axis': param.axis,
'growth_rate': param.growth_rate}
def Setup(self, bottom):
super(DenseConcatLayer, self).Setup(bottom)
......@@ -268,7 +269,7 @@ class BNLayer(Layer):
if scale_param.HasField('filler'):
self.Fill(scale, scale_param, 'filler')
else: scale.Constant(value=1.0)
else: scale.Uniform(low=0.0, high=1.0)
self.Fill(bias, scale_param, 'bias_filler')
self.norm_blobs = [{'data': mean, 'diff': None},
{'data': var, 'diff': None}]
......
......@@ -19,14 +19,17 @@ class Layer(object):
self._name = LayerParameter.name
self._blobs = []
self._param = {}
self._mpi_param = {}
self._common_param = {}
for include in LayerParameter.include:
mpi_rank = [int(rank) for rank in include.mpi_rank]
if len(mpi_rank) > 0: self._mpi_param['mpi_rank'] = mpi_rank
if len(mpi_rank) > 0: self._common_param['mpi_rank'] = mpi_rank
if LayerParameter.HasField('mirrow_stage'):
self._common_param['mirrow_stage'] = LayerParameter.mirrow_stage
def Setup(self, bottom):
self._param = dict(self._param, **self._mpi_param)
self._param = dict(self._param, **self._common_param)
def Fill(self, tensor, param, filler):
""" wrapper for caffe filler """
......
......@@ -24,7 +24,7 @@ class SoftmaxWithLossLayer(Layer):
def Setup(self, bottom):
super(SoftmaxWithLossLayer, self).Setup(bottom)
return ops.SoftmaxLoss(bottom, **self._param)
return ops.SparseSoftmaxCrossEntropy(bottom, **self._param)
class SigmoidCrossEntropyLossLayer(Layer):
......@@ -40,7 +40,7 @@ class SigmoidCrossEntropyLossLayer(Layer):
def Setup(self, bottom):
super(SigmoidCrossEntropyLossLayer, self).Setup(bottom)
return ops.SigmoidCrossEntropyLoss(bottom, **self._param)
return ops.SigmoidCrossEntropy(bottom, **self._param)
class L2LossLayer(Layer):
......@@ -64,3 +64,27 @@ class SmoothL1LossLayer(Layer):
def Setup(self, bottom):
super(SmoothL1LossLayer, self).Setup(bottom)
return ops.SmoothL1Loss(bottom, **self._param)
class SoftmaxWithFocalLossLayer(Layer):
def __init__(self, LayerParameter):
super(SoftmaxWithFocalLossLayer, self).__init__(LayerParameter)
param = LayerParameter.loss_param
softmax_param = LayerParameter.softmax_param
focal_loss_param = LayerParameter.focal_loss_param
norm_mode = {0: 'FULL', 1: 'VALID', 2: 'BATCH_SIZE', 3: 'NONE'}
normalization = 'VALID'
if param.HasField('normalize'):
if not param.normalize: normalization='BATCH_SIZE'
else: normalization = norm_mode[param.normalization]
self._param = {'axis': softmax_param.axis,
'normalization': normalization,
'ignore_labels': [param.ignore_label] if param.HasField('ignore_label') else [],
'alpha': float(focal_loss_param.alpha),
'gamma': float(focal_loss_param.gamma),
'eps': float(focal_loss_param.eps),
'use_pseudo_metric': focal_loss_param.use_pseudo_metric}
def Setup(self, bottom):
super(SoftmaxWithFocalLossLayer, self).Setup(bottom)
return ops.SparseSoftmaxFocalLoss(bottom, **self._param)
......@@ -20,7 +20,6 @@ def make_if_not_exist(path):
os.makedirs(path)
def UnpackVariable(var, num):
assert len > 0
if type(var) is list and len(var) == num:
return var
else:
......@@ -277,7 +276,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
dilation = 1
kernel_size = 3
pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2
pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2)
net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
net.relu5_1 = L.ReLU(net.conv5_1, in_place=True)
net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
......@@ -319,7 +318,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
else:
kernel_size = 7
num_output = 4096
pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2
pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2)
net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
net.relu6 = L.ReLU(net.fc6, in_place=True)
......
......@@ -318,6 +318,9 @@ message LayerParameter {
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The mirrow stage optimization
optional bool mirrow_stage = 162 [default = false];
// The train / test phase for computation.
optional Phase phase = 10;
......@@ -418,6 +421,8 @@ message LayerParameter {
optional ExpandDimsParameter expand_dims_param = 159;
optional ProposalParameter proposal_param = 160;
optional BatchRenormParameter batch_renorm_param = 161;
optional DenseConcatParameter dense_concat_param = 163;
optional FocalLossParameter focal_loss_param = 164;
}
// Message that stores parameters used to apply transformation
......@@ -1494,3 +1499,16 @@ message BatchRenormParameter {
optional float d_max = 5 [default = 5.0];
optional float t_delta = 6 [default = 1.0];
}
message DenseConcatParameter {
optional int32 axis = 1 [default = 1];
optional int32 growth_rate = 2 [default = 0];
}
message FocalLossParameter {
optional float alpha = 1 [default = 1.0];
optional float gamma = 2 [default = 0.25];
optional float eps = 3 [default = 1e-10];
optional bool use_pseudo_metric = 4 [default = true];
}
This diff could not be displayed because it is too large.
......@@ -119,7 +119,7 @@ def bias_add(value, bias, data_format='NCHW', name=None):
def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
return ops.SigmoidCrossEntropyLoss([logits, targets], normalization='UNIT', name=None)
return ops.SigmoidCrossEntropy([logits, targets], normalization='UNIT', name=None)
def softmax_cross_entropy_with_logits(_sentinel=None,
......@@ -131,13 +131,13 @@ def softmax_cross_entropy_with_logits(_sentinel=None,
'with named arguments (labels=..., logits=..., ...)')
if dim == -1: dim = 1
return ops.SoftmaxCrossEntropyLoss([logits, labels], axis=dim, normalization='UNIT', name=name)
return ops.SoftmaxCrossEntropy([logits, labels], axis=dim, normalization='UNIT', name=name)
def sparse_softmax_cross_entropy_with_logits(logits, labels, dim=-1, name=None):
if dim == -1: dim = 1
return ops.SoftmaxLoss([logits, labels], axis=dim, normalization='UNIT', name=name)
return ops.SparseSoftmaxCrossEntropy([logits, labels], axis=dim, normalization='UNIT', name=name)
def l2_loss(t, name=None):
......
......@@ -77,10 +77,11 @@ def GraphDef_Update(graph_def, updater):
u_target.arg.add().CopyFrom(MakeArgument(k, v))
graph_def.u_target.extend([u_target])
def GraphDef_Debug(graph_def):
""" generate debug mode for CC Graph """
def GraphDef_Opt(graph_def):
""" generate opt options for CC Graph """
from dragon.config import option
graph_def.debug_mode = option['debug_mode']
graph_def.share_grads = option['share_grads']
def GraphDef_Device(graph_def):
""" generate deivce info for CC Graph """
......@@ -155,13 +156,13 @@ def function(inputs=[], outputs=[], swaps=None, updater=None):
if len(outputs) > 0:
GraphDef_Device(graph_def)
GraphDef_Debug(graph_def)
GraphDef_Opt(graph_def)
GraphDef_Grad(graph_def, outputs)
GraphDef_Phase(graph_def, outputs)
elif updater is not None:
GraphDef_Device(graph_def)
GraphDef_Debug(graph_def)
GraphDef_Opt(graph_def)
GraphDef_Update(graph_def, updater)
# call c api to create graph
......
......@@ -257,6 +257,7 @@ GraphDef Graph::MakeUpdate(const GraphDef& graph_def) {
bool Graph::Create(const GraphDef& graph_def, Workspace* ws) {
bool has_device_option = graph_def.has_device_option();
bool has_debug_mode = graph_def.has_debug_mode();
bool has_share_grads = graph_def.has_share_grads();
for (const OperatorDef& plain_op_def: graph_def.op()) {
OperatorDef op_def(plain_op_def);
LOG(DEBUG) << "Create Operator " << plain_op_def.name()
......@@ -270,12 +271,83 @@ bool Graph::Create(const GraphDef& graph_def, Workspace* ws) {
if (!op_def.has_debug_mode() && has_debug_mode)
op_def.set_debug_mode(graph_def.debug_mode());
// inherit share_grads if necessary
if (!op_def.has_share_grads() && has_share_grads)
op_def.set_share_grads(graph_def.share_grads());
OperatorBase* op = CreateOperator(op_def, ws);
ops_.push_back(op);
}
return true;
}
void Graph::RecomputingAware(const GraphDef& graph_def, Workspace* ws) {
GraphDef fake_graph(graph_def);
Map<string, vector<OperatorBase*> > fake_recompute_map;
Map<string, string> rename_map;
Map<string, Set<string> > hash_map;
Map<string, int> multi_use_count;
// check mirrow stage
for (int i = 0; i < ops_.size(); i++) {
if (ops_[i]->type().find("Gradient") != string::npos) continue;
bool mirrow_stage = ops_[i]->GetSingleArg<bool>("mirrow_stage", false);
for (auto& u : graph_def.op(i).input()) {
bool inplace_flag = false;
for (auto& v : graph_def.op(i).output()) if (u == v) inplace_flag = true;
mirrow_stage &= (!inplace_flag);
if (!inplace_flag) multi_use_count[u]++;
}
if (mirrow_stage) {
// TODO(PhyscalX): we assume that input(0)-output(0) as a force in-place currently
OperatorDef* op = fake_graph.mutable_op(i);
if (rename_map.count(op->input(0)))
*op->mutable_input(0) = rename_map[op->input(0)];
rename_map[op->output(0)] = op->input(0);
*op->mutable_output(0) = op->input(0);
ops_[i]->input(0).Corrupt(); // mark a flag
}
}
// sub-graph aware
for (int i = 0; i < ops_.size(); i++) {
if (ops_[i]->type().find("Gradient") != string::npos) continue;
OperatorDef fake_op = fake_graph.op(i);
OperatorDef op = graph_def.op(i);
for (int j = 0; j < op.output_size(); j++) {
string v = op.output(j);
string fake_v = fake_op.output(j);
if (!fake_recompute_map.count(fake_v))
fake_recompute_map[fake_v] = vector<OperatorBase*>();
if (v != fake_v) {
if (multi_use_count[fake_v] >= 2)
fake_recompute_map[fake_v] = ws->GetRecompute(fake_v);
}
fake_recompute_map[fake_v].push_back(ops_[i]);
for (int k = 0; k < fake_recompute_map[fake_v].size(); k++) {
if (!hash_map.count(v)) hash_map[v] = Set<string>();
string op_name = fake_recompute_map[fake_v][k]->name();
if (!hash_map[v].count(op_name)) {
ws->AddRecompute(v, fake_recompute_map[fake_v][k]);
hash_map[v].insert(op_name);
}
}
}
}
// prepare resources
Tensor* head = ws->CreateTensor("_t_mirrow_stage_head");
head->Reshape(vector<TIndex>(1, WORKSPACE_MAX_CORRUPTED_SIZE));
Tensor* recompute_flag = ws->CreateTensor("_t_global_recompute_flag");
recompute_flag->Reshape(vector<TIndex>(1, 1));
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(i);
Tensor* buffer = ws->CreateTensor(name);
head->mutable_data<string, CPUContext>()[i] = "";
}
}
Graph::Graph(const GraphDef& graph_def, Workspace* ws)
: GraphBase(graph_def, ws) {
GraphDef optimized_graph;
......@@ -297,6 +369,9 @@ Graph::Graph(const GraphDef& graph_def, Workspace* ws)
// create
Create(optimized_graph, ws);
// recomputing-aware
RecomputingAware(optimized_graph, ws);
}
bool Graph::Run(const string& include, const string& exclude) {
......
......@@ -59,6 +59,91 @@ Gradient MakeGradientForOp(const OperatorDef& def, const vector<string>& g_outpu
return grad;
}
template <class Context>
void Operator<Context>::ElimateCorruption() {
Set<string> all_heads;
queue<int> safe_heads;
Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) all_heads.insert(head_data[i]);
// sub-graph run
for (int i = 0; i < InputSize(); i++) {
if (input(i).is_corrupted()) {
if (all_heads.count(input(i).name())) continue;
LOG(DEBUG) << "Tensor(" << input(i).name() << ") is corrupted, recompute... ";
Tensor* recompute_flag = ws()->GetTensor("_t_global_recompute_flag");
vector<OperatorBase*> list = ws()->GetRecompute(input(i).name());
recompute_flag->mutable_data<bool, CPUContext>()[0] = true;
for (int j = 0; j < list.size(); j++) list[j]->Run();
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
}
}
// check available head
all_heads.clear();
for (int i = 0; i < head->count(); i++) {
bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
all_heads.insert(head_data[i]);
}
// pre-process
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->is_corrupted()) {
bool inplace_flag = false;
for (int j = 0; j < InputSize(); j++)
if (output(i)->name() == input(j).name()) inplace_flag = true;
if (inplace_flag || all_heads.count(output(i)->name())) continue; // skip to use new buffer
CHECK(!safe_heads.empty())
<< "\nat most (" << safe_heads.size() << " [safe] / "
<< all_heads.size() << " [total] can be used for corrupted output in "
<< "(" << name() << ", " << type() << "), "
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirrow stage ?";
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
output(i)->Move(buffer->memory());
head_data[idx] = output(i)->name();
}
}
}
template <class Context>
void Operator<Context>::ShareGradient() {
// TODO(PhyscalX): we preset input(-1)->output(0) to share
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(0)->Replace(*dX);
}
}
template <class Context>
void Operator<Context>::MakeResource() {
ElimateCorruption();
if (allow_share_grads_) ShareGradient();
}
template <class Context>
void Operator<Context>::CleanResource() {
// post-process for mirrow stage
Map<string, int> head_to_idx;
Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) head_to_idx[head_data[i]] = i;
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->is_corrupted() && head_to_idx.count(output(i)->name())) {
string used = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(head_to_idx[output(i)->name()]);
Tensor* buffer = ws()->GetTensor(used);
if (output(i)->memory() != buffer->memory()) buffer->Move(output(i)->memory());
}
}
if (allow_share_grads_) {
// TODO(PhyscalX): we preset input(-1)->output(0) to share
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY, "Grad");
}
}
DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*);
DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
......@@ -94,4 +179,13 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(int, ints)
INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings)
#undef INSTANTIATE_GET_REPEATED_ARGUMENT
template void Operator<CPUContext>::ElimateCorruption();
template void Operator<CUDAContext>::ElimateCorruption();
template void Operator<CPUContext>::ShareGradient();
template void Operator<CUDAContext>::ShareGradient();
template void Operator<CPUContext>::MakeResource();
template void Operator<CUDAContext>::MakeResource();
template void Operator<CPUContext>::CleanResource();
template void Operator<CUDAContext>::CleanResource();
} // namespace dragon
\ No newline at end of file
......@@ -13,4 +13,14 @@ GraphBase* Workspace::CreateGraph(const GraphDef& graph_def) {
return graph_map_[graph_def.name()].get();
}
Workspace::~Workspace() {
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(i);
if (HasTensor(name)) {
MixedMemory* mem = GetTensor(name)->memory();
if (mem != nullptr) delete mem;
}
}
}
} // namespace dragon
\ No newline at end of file
......@@ -70,8 +70,9 @@ void DropoutGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void DropoutGradientOp<Context>::ClearAfterRun() {
ws()->ReleaseBuffer(mask, true);
void DropoutGradientOp<Context>::CleanResource() {
Operator<Context>::CleanResource();
ws()->ReleaseBuffer(mask, "Common", true);
}
DEPLOY_CPU(DropoutGradient);
......@@ -81,7 +82,7 @@ DEPLOY_CUDA(DropoutGradient);
OPERATOR_SCHEMA(DropoutGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetDropoutGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetDropoutGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......@@ -92,4 +93,3 @@ public:
REGISTER_GRADIENT(Dropout, GetDropoutGradient);
} // namepsace dragon
\ No newline at end of file
......@@ -48,7 +48,7 @@ DEPLOY_CUDA(ReluGradient);
OPERATOR_SCHEMA(ReluGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 }});
class GetReluGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetReluGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -48,7 +48,7 @@ DEPLOY_CUDA(SigmoidGradient);
OPERATOR_SCHEMA(SigmoidGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetSigmoidGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSigmoidGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -71,7 +71,7 @@ DEPLOY_CUDA(SoftmaxGradient);
OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetSoftmaxGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSoftmaxGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -48,7 +48,7 @@ DEPLOY_CUDA(TanhGradient);
OPERATOR_SCHEMA(TanhGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetTanhGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetTanhGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......@@ -59,4 +59,3 @@ public:
REGISTER_GRADIENT(Tanh, GetTanhGradient);
} // namespace dragon
\ No newline at end of file
......@@ -160,22 +160,16 @@ void AddGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void AddGradientOp<Context>::ShareBeforeRun() {
void AddGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void AddGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(AddGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(AddGradient);
......@@ -183,7 +177,7 @@ DEPLOY_CUDA(AddGradient);
OPERATOR_SCHEMA(AddGradient).NumInputs(2).NumOutputs(2);
class GetAddGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetAddGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -95,18 +95,6 @@ void BiasAddGradientOp<Context>::RunOnDevice() {
}
}
template <class Context>
void BiasAddGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BiasAddGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BiasAddGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(BiasAddGradient);
......@@ -114,7 +102,7 @@ DEPLOY_CUDA(BiasAddGradient);
OPERATOR_SCHEMA(BiasAddGradient).NumInputs(3).NumOutputs(2);
class GetBiasAddGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetBiasAddGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -45,18 +45,6 @@ void ClipGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void ClipGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ClipGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ClipGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ClipGradient);
......@@ -64,7 +52,7 @@ DEPLOY_CUDA(ClipGradient);
OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1);
class GetClipGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetClipGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -191,22 +191,16 @@ void DivGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void DivGradientOp<Context>::ShareBeforeRun() {
void DivGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void DivGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(DivGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DivGradient);
......@@ -214,7 +208,7 @@ DEPLOY_CUDA(DivGradient);
OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2);
class GetDivGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetDivGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
#include "operators/arithmetic/dot_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
namespace dragon {
......@@ -169,6 +170,17 @@ void DotGradientOp<Context>::RunOnDevice() {
}
}
template <class Context>
void DotGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(DotGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DotGradient);
......@@ -176,7 +188,7 @@ DEPLOY_CUDA(DotGradient);
OPERATOR_SCHEMA(DotGradient).NumInputs(3).NumOutputs(2);
class GetDotGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetDotGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -118,22 +118,16 @@ void EltwiseGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void EltwiseGradientOp<Context>::ShareBeforeRun() {
void EltwiseGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void EltwiseGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(EltwiseGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(EltwiseGradient);
......@@ -141,7 +135,7 @@ DEPLOY_CUDA(EltwiseGradient);
OPERATOR_SCHEMA(EltwiseGradient).NumInputs(3, INT_MAX).NumOutputs(2, INT_MAX);
class GetEltwiseGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetEltwiseGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs;
......
......@@ -41,19 +41,6 @@ void ExpGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void ExpGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ExpGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ExpGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ExpGradient);
......@@ -61,7 +48,7 @@ DEPLOY_CUDA(ExpGradient);
OPERATOR_SCHEMA(ExpGradient).NumInputs(2).NumOutputs(1);
class GetExpGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetExpGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -61,18 +61,6 @@ void GramMatrixGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void GramMatrixGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void GramMatrixGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(GramMatrixGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(GramMatrixGradient);
......@@ -80,7 +68,7 @@ DEPLOY_CUDA(GramMatrixGradient);
OPERATOR_SCHEMA(GramMatrixGradient).NumInputs(2).NumOutputs(1);
class GetGramMatrixGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetGramMatrixGradient);
vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "",
......
......@@ -120,20 +120,6 @@ void InnerProductGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void InnerProductGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void InnerProductGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(InnerProductGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(InnerProductGradient);
......@@ -141,7 +127,7 @@ DEPLOY_CUDA(InnerProductGradient);
OPERATOR_SCHEMA(InnerProductGradient).NumInputs(3).NumOutputs(3);
class GetInnerProductGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetInnerProductGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -41,18 +41,6 @@ void LogGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void LogGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void LogGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(LogGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(LogGradient);
......@@ -60,7 +48,7 @@ DEPLOY_CUDA(LogGradient);
OPERATOR_SCHEMA(LogGradient).NumInputs(2).NumOutputs(1);
class GetLogGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetLogGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -110,22 +110,16 @@ void MatmulGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void MatmulGradientOp<Context>::ShareBeforeRun() {
void MatmulGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void MatmulGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(MatmulGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MatmulGradient);
......@@ -133,7 +127,7 @@ DEPLOY_CUDA(MatmulGradient);
OPERATOR_SCHEMA(MatmulGradient).NumInputs(3).NumOutputs(2);
class GetMatmulGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetMatmulGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -179,22 +179,16 @@ void MulGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void MulGradientOp<Context>::ShareBeforeRun() {
void MulGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void MulGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(MulGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MulGradient);
......@@ -202,7 +196,7 @@ DEPLOY_CUDA(MulGradient);
OPERATOR_SCHEMA(MulGradient).NumInputs(3).NumOutputs(2);
class GetMulGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetMulGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -80,18 +80,6 @@ void PowGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void PowGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void PowGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(PowGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(PowGradient);
......@@ -99,7 +87,7 @@ DEPLOY_CUDA(PowGradient);
OPERATOR_SCHEMA(PowGradient).NumInputs(3).NumOutputs(1);
class GetPowGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetPowGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -137,18 +137,6 @@ void ScaleGradientOp<Context>::RunOnDevice() {
}
}
template <class Context>
void ScaleGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ScaleGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ScaleGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ScaleGradient);
......@@ -156,7 +144,7 @@ DEPLOY_CUDA(ScaleGradient);
OPERATOR_SCHEMA(ScaleGradient).NumInputs(3).NumOutputs(3);
class GetScaleGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetScaleGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -42,18 +42,6 @@ void SquareGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void SquareGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void SquareGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(SquareGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SquareGradient);
......@@ -61,7 +49,7 @@ DEPLOY_CUDA(SquareGradient);
OPERATOR_SCHEMA(SquareGradient).NumInputs(2).NumOutputs(1);
class GetSquareGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSquareGradient);
vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "",
......
......@@ -160,22 +160,16 @@ void SubGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void SubGradientOp<Context>::ShareBeforeRun() {
void SubGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void SubGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(SubGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SubGradient);
......@@ -183,7 +177,7 @@ DEPLOY_CUDA(SubGradient);
OPERATOR_SCHEMA(SubGradient).NumInputs(3).NumOutputs(2);
class GetSubGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSubGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -57,18 +57,6 @@ void AtGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void AtGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void AtGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(AtGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(AtGradient);
......@@ -76,7 +64,7 @@ DEPLOY_CUDA(AtGradient);
OPERATOR_SCHEMA(AtGradient).NumInputs(3).NumOutputs(1);
class GetAtGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetAtGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -101,22 +101,16 @@ void ConcatGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void ConcatGradientOp<Context>::ShareBeforeRun() {
void ConcatGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(i)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
template <class Context>
void ConcatGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ConcatGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ConcatGradient);
......@@ -124,7 +118,7 @@ DEPLOY_CUDA(ConcatGradient);
OPERATOR_SCHEMA(ConcatGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX);
class GetConcatGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetConcatGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs;
......
......@@ -139,18 +139,6 @@ void CropGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void CropGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void CropGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(CropGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(CropGradient);
......@@ -158,7 +146,7 @@ DEPLOY_CUDA(CropGradient);
OPERATOR_SCHEMA(CropGradient).NumInputs(2).NumOutputs(1);
class GetCropGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetCropGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -30,7 +30,7 @@ DEPLOY_CUDA(ExpandDimsGradient);
OPERATOR_SCHEMA(ExpandDimsGradient).NumInputs(2).NumOutputs(1);
class GetExpandDimsGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetExpandDimsGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -39,7 +39,7 @@ DEPLOY_CUDA(FlattenGradient);
OPERATOR_SCHEMA(FlattenGradient).NumInputs(2).NumOutputs(1);
class GetFlattenGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetFlattenGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -91,7 +91,7 @@ DEPLOY_CUDA(TemplateGradient);
OPERATOR_SCHEMA(TemplateGradient);
class GetTemplateGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetTemplateGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs;
......@@ -104,5 +104,3 @@ public:
REGISTER_GRADIENT(Template, GetTemplateGradient);
} // namespace dragon
\ No newline at end of file
......@@ -107,18 +107,6 @@ void ReduceGradientOp<Context>::RunOnDevice() {
}
}
template <class Context>
void ReduceGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ReduceGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ReduceGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ReduceGradient);
......@@ -126,7 +114,7 @@ DEPLOY_CUDA(ReduceGradient);
OPERATOR_SCHEMA(ReduceGradient).NumInputs(2).NumOutputs(1);
class GetReduceGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetReduceGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -67,7 +67,7 @@ DEPLOY_CUDA(ReshapeGradient);
OPERATOR_SCHEMA(ReshapeGradient).NumInputs(2).NumOutputs(1);
class GetReshapeGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetReshapeGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......@@ -78,4 +78,3 @@ public:
REGISTER_GRADIENT(Reshape, GetReshapeGradient);
} // namespace dragon
\ No newline at end of file
......@@ -214,7 +214,7 @@ DEPLOY_CUDA(ScanGradient);
OPERATOR_SCHEMA(ScanGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX);
class GetScanGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetScanGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs;
......
......@@ -81,7 +81,7 @@ DEPLOY_CUDA(SliceGradient);
OPERATOR_SCHEMA(SliceGradient).NumInputs(2, INT_MAX).NumOutputs(1);
class GetSliceGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSliceGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs(1, I(0));
......
......@@ -114,18 +114,6 @@ void TileGradientOp<Context>::RunOnDevice() {
ws()->ReleaseBuffer(dest);
}
template <class Context>
void TileGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void TileGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(TileGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(TileGradient);
......@@ -133,7 +121,7 @@ DEPLOY_CUDA(TileGradient);
OPERATOR_SCHEMA(TileGradient).NumInputs(1).NumOutputs(1);
class GetTileGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetTileGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -79,18 +79,6 @@ void TransposeGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void TransposeGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void TransposeGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(TransposeGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(TransposeGradient);
......@@ -98,7 +86,7 @@ DEPLOY_CUDA(TransposeGradient);
OPERATOR_SCHEMA(TransposeGradient).NumInputs(2).NumOutputs(1);
class GetTransposeGradient final : public GradientMakerBase{
public:
public:
GRADIENT_MAKER_CTOR(GetTransposeGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -18,8 +18,8 @@ void L1LossOp<Context>::RunWithType() {
auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
}
T abs_val = math::ASum<T, Context>(diff->count(), diff_data);
Ydata[0] = coeff * abs_val;
Ydata[0] = math::ASum<T, Context>(diff->count(), diff_data);
T normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count();
......@@ -46,9 +46,11 @@ OPERATOR_SCHEMA(L1Loss).NumInputs(2, 3).NumOutputs(1);
template <class Context> template <typename T>
void L1LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>();
kernel::AbsGrad<T, Context>(diff->count(), dYdata, dYdata);
T alpha = coeff, normalizer;
auto* diff_data = diff->template mutable_data<T, Context>();
auto* dYdata = input(-1).template data<T, CPUContext>();
kernel::AbsGrad<T, Context>(diff->count(), diff_data, diff_data);
T alpha = dYdata[0], normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1;
......@@ -59,7 +61,7 @@ void L1LossGradientOp<Context>::RunWithType() {
auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1;
alpha *= sign;
math::Axpby<T, Context>(output(i)->count(), alpha, dYdata, 0, dXdata);
math::Axpby<T, Context>(output(i)->count(), alpha, diff_data, 0, dXdata);
}
}
......@@ -71,6 +73,17 @@ void L1LossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void L1LossGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(L1LossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(L1LossGradient);
......@@ -78,7 +91,7 @@ DEPLOY_CUDA(L1LossGradient);
OPERATOR_SCHEMA(L1LossGradient).NumInputs(3).NumOutputs(2);
class GetL1LossGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetL1LossGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -16,8 +16,8 @@ void L2LossOp<Context>::RunWithType() {
auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
}
T dot = math::Dot<T, Context>(diff->count(), diff_data, diff_data);
Ydata[0] = T(0.5) * coeff * dot;
Ydata[0] = T(0.5) * math::Dot<T, Context>(diff->count(), diff_data, diff_data);
T normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count();
......@@ -44,8 +44,10 @@ OPERATOR_SCHEMA(L2Loss).NumInputs(2, 3).NumOutputs(1);
template <class Context> template <typename T>
void L2LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>();
T alpha = coeff, normalizer;
auto* diff_data = diff->template mutable_data<T, Context>();
auto* dYdata = input(-1).template data<T, CPUContext>();
T alpha = dYdata[0], normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1;
......@@ -56,7 +58,7 @@ void L2LossGradientOp<Context>::RunWithType() {
auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1;
alpha *= sign;
math::Axpby<T, Context>(output(i)->count(), alpha, dYdata, 0, dXdata);
math::Axpby<T, Context>(output(i)->count(), alpha, diff_data, 0, dXdata);
}
}
......@@ -68,6 +70,17 @@ void L2LossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void L2LossGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(L2LossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(L2LossGradient);
......@@ -75,9 +88,9 @@ DEPLOY_CUDA(L2LossGradient);
OPERATOR_SCHEMA(L2LossGradient).NumInputs(3).NumOutputs(2);
class GetL2LossGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetL2LossGradient);
vector<OperatorDef> MakeDefs() override{
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)});
......@@ -86,5 +99,3 @@ public:
REGISTER_GRADIENT(L2Loss, GetL2LossGradient);
} // namespace dragon
\ No newline at end of file
#include "operators/loss/sigmoid_cross_entropy_loss_op.h"
#include "operators/loss/sigmoid_cross_entropy_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
......@@ -6,7 +6,7 @@
namespace dragon {
template <class Context> template <typename T>
void SigmoidCrossEntropyLossOp<Context>::RunWithType() {
void SigmoidCrossEntropyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Pdata = prob->template mutable_data<T, Context>();
kernel::Sigmoid<T, Context>(prob->count(), Xdata, Pdata);
......@@ -32,7 +32,7 @@ void SigmoidCrossEntropyLossOp<Context>::RunWithType() {
}
template <class Context>
void SigmoidCrossEntropyLossOp<Context>::RunOnDevice() {
void SigmoidCrossEntropyOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count())
<< "\nnumber of predictions must match the number of labels.";
prob = ws()->CreateTensor("_t_" + anchor() + "_sigmoid_prob");
......@@ -43,14 +43,14 @@ void SigmoidCrossEntropyLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SigmoidCrossEntropyLoss);
DEPLOY_CPU(SigmoidCrossEntropy);
#ifdef WITH_CUDA
DEPLOY_CUDA(SigmoidCrossEntropyLoss);
DEPLOY_CUDA(SigmoidCrossEntropy);
#endif
OPERATOR_SCHEMA(SigmoidCrossEntropyLoss).NumInputs(2).NumOutputs(1);
OPERATOR_SCHEMA(SigmoidCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void SigmoidCrossEntropyLossGradientOp<Context>::RunWithType() {
void SigmoidCrossEntropyGradientOp<Context>::RunWithType() {
auto* Pdata = prob->template data<T, Context>();
auto* Tdata = input(1).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
......@@ -72,7 +72,7 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunWithType() {
}
template <class Context>
void SigmoidCrossEntropyLossGradientOp<Context>::RunOnDevice() {
void SigmoidCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_sigmoid_prob");
output(0)->ReshapeLike(input(0));
......@@ -80,21 +80,21 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SigmoidCrossEntropyLossGradient);
DEPLOY_CPU(SigmoidCrossEntropyGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SigmoidCrossEntropyLossGradient);
DEPLOY_CUDA(SigmoidCrossEntropyGradient);
#endif
OPERATOR_SCHEMA(SigmoidCrossEntropyLossGradient).NumInputs(3).NumOutputs(1);
OPERATOR_SCHEMA(SigmoidCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSigmoidCrossEntropyLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSigmoidCrossEntropyLossGradient);
class GetSigmoidCrossEntropyGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSigmoidCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(SigmoidCrossEntropyLoss, GetSigmoidCrossEntropyLossGradient);
REGISTER_GRADIENT(SigmoidCrossEntropy, GetSigmoidCrossEntropyGradient);
} // namespace dragon
\ No newline at end of file
......@@ -52,17 +52,18 @@ OPERATOR_SCHEMA(SmoothL1Loss).NumInputs(2, 4).NumOutputs(1);
template <class Context> template <typename T>
void SmoothL1LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>();
auto* diff_data = diff->template mutable_data<T, Context>();
auto* dYdata = input(-1).template data<T, CPUContext>();
kernel::SmoothL1Grad<T, Context>(diff->count(), sigma2, dYdata, dYdata);
kernel::SmoothL1Grad<T, Context>(diff->count(), sigma2, diff_data, diff_data);
for (int i = 0; i < 2; i++) {
if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i));
auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1;
const T coeff = sign / input(i).dim(0);
math::Axpby<T, Context>(output(i)->count(), coeff, dYdata, 0, dXdata);
const T coeff = sign / input(i).dim(0) * dYdata[0];
math::Axpby<T, Context>(output(i)->count(), coeff, diff_data, 0, dXdata);
if (InputSize() > 3) {
auto* inside_w_data = input(2).template data<T, Context>();
math::Mul<T, Context>(output(i)->count(), inside_w_data, dXdata, dXdata);
......@@ -89,7 +90,7 @@ DEPLOY_CUDA(SmoothL1LossGradient);
OPERATOR_SCHEMA(SmoothL1LossGradient).NumInputs(3, 5).NumOutputs(2);
class GetSmoothL1LossGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetSmoothL1LossGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs;
......
#include "operators/activation/softmax_op.h"
#include "operators/loss/softmax_cross_entropy_loss_op.h"
#include "operators/loss/softmax_cross_entropy_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
......@@ -8,7 +8,7 @@
namespace dragon {
template <class Context> template <typename T>
void SoftmaxCrossEntropyLossOp<Context>::RunWithType() {
void SoftmaxCrossEntropyOp<Context>::RunWithType() {
auto* Pdata = prob->template data<T, Context>();
auto* Tdata = input(1).template data<T, Context>();
auto* Ldata = losses.template mutable_data<T, Context>();
......@@ -36,7 +36,7 @@ void SoftmaxCrossEntropyLossOp<Context>::RunWithType() {
}
template <class Context>
void SoftmaxCrossEntropyLossOp<Context>::RunOnDevice() {
void SoftmaxCrossEntropyOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1);
CHECK_EQ(input(0).count(), input(1).count())
......@@ -49,14 +49,14 @@ void SoftmaxCrossEntropyLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SoftmaxCrossEntropyLoss);
DEPLOY_CPU(SoftmaxCrossEntropy);
#ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxCrossEntropyLoss);
DEPLOY_CUDA(SoftmaxCrossEntropy);
#endif
OPERATOR_SCHEMA(SoftmaxCrossEntropyLoss).NumInputs(2).NumOutputs(1);
OPERATOR_SCHEMA(SoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() {
void SoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
auto* Tdata = input(1).template data<T, Context>();
auto* Pdata = prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
......@@ -75,7 +75,6 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() {
return;
}
// normalize
T normalizer;
if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
......@@ -85,7 +84,7 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() {
}
template <class Context>
void SoftmaxCrossEntropyLossGradientOp<Context>::RunOnDevice() {
void SoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1);
......@@ -95,21 +94,21 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SoftmaxCrossEntropyLossGradient);
DEPLOY_CPU(SoftmaxCrossEntropyGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxCrossEntropyLossGradient);
DEPLOY_CUDA(SoftmaxCrossEntropyGradient);
#endif
OPERATOR_SCHEMA(SoftmaxCrossEntropyLossGradient).NumInputs(3).NumOutputs(1);
OPERATOR_SCHEMA(SoftmaxCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSoftmaxCrossEntropyLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSoftmaxCrossEntropyLossGradient);
class GetSoftmaxCrossEntropyGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSoftmaxCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(SoftmaxCrossEntropyLoss, GetSoftmaxCrossEntropyLossGradient);
REGISTER_GRADIENT(SoftmaxCrossEntropy, GetSoftmaxCrossEntropyGradient);
} // namespace dragon
\ No newline at end of file
#include "operators/activation/softmax_op.h"
#include "operators/loss/softmax_loss_op.h"
#include "operators/loss/sparse_softmax_cross_entropy_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
......@@ -8,13 +8,21 @@
namespace dragon {
template <class Context> template <typename T>
void SoftmaxLossOp<Context>::RunWithType() {
void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
auto* prob_data = prob->template data<T, Context>();
auto* label_data = input(1).template data<T, Context>();
auto* loss_data = losses.template mutable_data<T, Context>();
auto* valid_data = valid.template mutable_data<T, Context>();
kernel::SparseSoftmaxCrossEntropy<T, Context>(input(0).count(), input(0).dim(axis),
outer_dim, inner_dim, prob_data, label_data, loss_data, valid_data, &ignore);
kernel::SparseSoftmaxCrossEntropy<T, Context>(input(0).count(),
input(0).dim(axis),
outer_dim,
inner_dim,
prob_data,
label_data,
loss_data,
valid_data,
&ignore);
if (normalization == "UNIT") {
output(0)->ReshapeLike(losses);
......@@ -35,7 +43,7 @@ void SoftmaxLossOp<Context>::RunWithType() {
}
template <class Context>
void SoftmaxLossOp<Context>::RunOnDevice() {
void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, input(1).count())
......@@ -49,21 +57,29 @@ void SoftmaxLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SoftmaxLoss);
DEPLOY_CPU(SparseSoftmaxCrossEntropy);
#ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxLoss);
DEPLOY_CUDA(SparseSoftmaxCrossEntropy);
#endif
OPERATOR_SCHEMA(SoftmaxLoss).NumInputs(2).NumOutputs(1);
OPERATOR_SCHEMA(SparseSoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void SoftmaxLossGradientOp<Context>::RunWithType() {
void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
auto* label_data = input(1).template data<T, Context>();
auto* prob_data = prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* valid_data = valid.template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(prob->count(), dXdata, prob_data);
kernel::SoftmaxLossGrad<T, Context>(output(0)->count(), output(0)->dim(axis),
outer_dim, inner_dim, label_data, prob_data, valid_data, &ignore, dXdata);
kernel::SparseSoftmaxCrossEntropyGrad<T, Context>(output(0)->count(),
output(0)->dim(axis),
outer_dim,
inner_dim,
prob_data,
label_data,
valid_data,
&ignore,
dXdata);
if (normalization == "UNIT") {
auto* dYdata = input(-1).template data<T, Context>();
......@@ -83,7 +99,7 @@ void SoftmaxLossGradientOp<Context>::RunWithType() {
}
template <class Context>
void SoftmaxLossGradientOp<Context>::RunOnDevice() {
void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1);
......@@ -94,21 +110,21 @@ void SoftmaxLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SoftmaxLossGradient);
DEPLOY_CPU(SparseSoftmaxCrossEntropyGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxLossGradient);
DEPLOY_CUDA(SparseSoftmaxCrossEntropyGradient);
#endif
OPERATOR_SCHEMA(SoftmaxLossGradient).NumInputs(3).NumOutputs(1);
OPERATOR_SCHEMA(SparseSoftmaxCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSoftmaxLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSoftmaxLossGradient);
vector<OperatorDef> MakeDefs() override{
class GetSparseSoftmaxCrossEntropyGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSparseSoftmaxCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(SoftmaxLoss, GetSoftmaxLossGradient);
REGISTER_GRADIENT(SparseSoftmaxCrossEntropy, GetSparseSoftmaxCrossEntropyGradient);
} // namespace dragon
\ No newline at end of file
#include "operators/activation/softmax_op.h"
#include "operators/loss/sparse_softmax_focal_loss_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
#include "utils/proto_utils.h"
namespace dragon {
template <class Context> template <typename T>
void SparseSoftmaxFocalLossOp<Context>::RunWithType() {
auto* prob_data = this->prob->template data<T, Context>();
auto* label_data = input(1).template data<T, Context>();
auto* loss_data = this->losses.template mutable_data<T, Context>();
auto* valid_data = this->valid.template mutable_data<T, Context>();
auto* scale_data = scale->template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLoss<T, Context>(input(0).count(),
input(0).dim(axis),
outer_dim,
inner_dim,
alpha,
gamma,
prob_data,
label_data,
scale_data,
loss_data,
valid_data,
&this->ignore);
if (normalization == "UNIT") {
if (use_pseudo_metric) {
math::MulScalar<T, Context>(this->losses.count(),
1.0 / alpha,
loss_data);
}
output(0)->ReshapeLike(this->losses);
output(0)->Share(this->losses);
return;
}
T normalizer;
if (normalization == "VALID")
normalizer = math::ASum<T, Context>(this->valid.count(), valid_data);
else if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1;
T loss = math::ASum<T, Context>(this->losses.count(), loss_data);
loss = use_pseudo_metric ? loss / alpha : loss;
output(0)->Reshape(vector<TIndex>(1, 1));
auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
Ydata[0] = loss / normalizer;
}
template <class Context>
void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, input(1).count())
<< "\nnumber of predictions must match the number of labels.";
this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->softmax_op->Run();
this->prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
scale = ws()->CreateTensor("_t_" + anchor() + "_focal_scale");
scale->ReshapeLike(*this->prob);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SparseSoftmaxFocalLoss);
#ifdef WITH_CUDA
DEPLOY_CUDA(SparseSoftmaxFocalLoss);
#endif
OPERATOR_SCHEMA(SparseSoftmaxFocalLoss).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
auto* label_data = input(1).template data<T, Context>();
auto* prob_data = this->prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* valid_data = this->valid.template mutable_data<T, Context>();
auto* scale_data = scale->template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLossGrad<T, Context>(output(0)->count(),
output(0)->dim(axis),
outer_dim,
inner_dim,
gamma,
eps,
scale_data,
prob_data,
label_data,
valid_data,
&this->ignore,
dXdata);
if (normalization == "UNIT") {
auto* dYdata = input(-1).template data<T, Context>();
kernel::SumGrad<T, Context>(input(0).count() / input(0).dim(axis),
input(0).dim(axis),
inner_dim,
1.0,
dYdata,
prob_data);
math::Mul<T, Context>(output(0)->count(), prob_data, dXdata, dXdata);
return;
}
T normalizer;
if (normalization == "VALID") normalizer = math::ASum<T, Context>(this->valid.count(), valid_data);
else if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1;
auto* dYdata = input(-1).template data<T, CPUContext>();
math::Scal<T, Context>(output(0)->count(), dYdata[0] / normalizer, dXdata);
}
template <class Context>
void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
this->prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
scale = ws()->GetTensor("_t_" + anchor() + "_focal_scale");
outer_dim = this->prob->count(0, axis);
inner_dim = this->prob->count(axis + 1);
output(0)->ReshapeLike(input(0));
this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SparseSoftmaxFocalLossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SparseSoftmaxFocalLossGradient);
#endif
OPERATOR_SCHEMA(SparseSoftmaxFocalLossGradient).NumInputs(3).NumOutputs(1);
class GetSparseSoftmaxFocalLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSparseSoftmaxFocalLossGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(SparseSoftmaxFocalLoss, GetSparseSoftmaxFocalLossGradient);
} // namespace dragon
\ No newline at end of file
......@@ -104,7 +104,7 @@ DEPLOY_CUDA(MPIBroadcastGradient);
OPERATOR_SCHEMA(MPIBroadcastGradient).NumInputs(1).NumOutputs(1);
class GetMPIBroadcastGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetMPIBroadcastGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -86,7 +86,7 @@ DEPLOY_CUDA(MPIGatherGradient);
OPERATOR_SCHEMA(MPIGatherGradient).NumInputs(2, INT_MAX).NumOutputs(1);
class GetMPIGatherGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetMPIGatherGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs(1, I(0));
......
......@@ -63,7 +63,7 @@ void BatchNormOp<Context>::RunWithType() {
1.0,
Ydata);
if (!use_global_stats) {
if (!use_global_stats && !is_recomputing) {
// Var(X) = E((X - EX) ^ 2)
math::Square<T, Context>(output(0)->count(), Ydata, Std_data);
math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim,
......@@ -120,9 +120,12 @@ void BatchNormOp<Context>::RunOnDevice() {
if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
// if true, Act/Exp/Pow/Norm Ops can not exist before when train
if (inplace) output(0)->Share(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
......@@ -248,18 +251,6 @@ void BatchNormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void BatchNormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BatchNormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BatchNormGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(BatchNormGradient);
......@@ -267,7 +258,7 @@ DEPLOY_CUDA(BatchNormGradient);
OPERATOR_SCHEMA(BatchNormGradient).NumInputs(3).NumOutputs(1);
class GetBatchNormGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetBatchNormGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -72,7 +72,7 @@ void BatchRenormOp<Context>::RunWithType() {
1.0,
Ydata);
if (!use_global_stats) {
if (!use_global_stats && !is_recomputing) {
// Var(X) = E((X - EX) ^ 2)
math::Pow<T, Context>(stddev->count(), 2, Ydata, Std_data);
math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim,
......@@ -97,7 +97,7 @@ void BatchRenormOp<Context>::RunWithType() {
math::AddScalar<T, Context>(mean.count(), eps, tVar_data);
math::Pow<T, Context>(mean.count(), 0.5, tVar_data, tVar_data);
if (!use_global_stats) {
if (!use_global_stats && !is_recomputing) {
// normalize history var
math::AddScalar<T, Context>(mean.count(), eps, thVar_data);
math::Pow<T, Context>(mean.count(), 0.5, thVar_data, thVar_data);
......@@ -183,6 +183,8 @@ void BatchRenormOp<Context>::RunOnDevice() {
if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
// if true, Act/Exp/Pow/Norm Ops can not exist before when train
if (inplace) output(0)->Share(input(0));
......@@ -314,7 +316,7 @@ void BatchRenormGradientOp<Context>::RunWithType() {
// release buffer
ws()->ReleaseBuffer(stddev);
ws()->ReleaseBuffer(x_norm, true);
ws()->ReleaseBuffer(x_norm, "Common", true);
}
template <class Context>
......@@ -336,18 +338,6 @@ void BatchRenormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void BatchRenormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BatchRenormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BatchRenormGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(BatchRenormGradient);
......@@ -355,7 +345,7 @@ DEPLOY_CUDA(BatchRenormGradient);
OPERATOR_SCHEMA(BatchRenormGradient).NumInputs(3).NumOutputs(1);
class GetBatchRenormGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetBatchRenormGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -54,7 +54,7 @@ void CuDNNBNOp<Context>::SpatialRunWithType() {
bn_desc,
Sdata,
Bdata,
1.0 - this->momentum,
is_recomputing ? 0.0 : 1.0 - this->momentum,
hMean_data,
hVar_data,
this->eps,
......@@ -110,7 +110,7 @@ void CuDNNBNOp<Context>::PerActivationRunWithType() {
bn_desc,
Sdata,
Bdata,
1.0 - this->momentum,
is_recomputing ? 0.0 : 1.0 - this->momentum,
hMean_data,
hVar_data,
this->eps,
......@@ -133,6 +133,8 @@ void CuDNNBNOp<Context>::RunOnDevice() {
if (this->use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = this->use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
if (input(0).template IsType<float>()) {
if (input(0).ndim() == 4) SpatialRunWithType<float>();
......@@ -344,28 +346,22 @@ void CuDNNBNGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void BNGradientOp<Context>::ShareBeforeRun() {
void BNGradientOp<Context>::ShareGradient() {
if (use_global_stats) {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(0)->Replace(*dX);
}
} else {
if (output(0)->name() != "ignore" ||
output(1)->name() != "ignore" ||
output(2)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
Tensor* dX = ws()->GetBuffer("Grad");
output(0)->Replace(*dX);
}
}
}
template <class Context>
void BNGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BNGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(BNGradient);
......@@ -374,7 +370,7 @@ OPERATOR_SCHEMA(BNGradient).NumInputs(5).NumOutputs(3);
DEPLOY_CUDNN(BNGradient);
class GetBNGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetBNGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -155,18 +155,6 @@ void InstanceNormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void InstanceNormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void InstanceNormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(InstanceNormGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(InstanceNormGradient);
......@@ -174,7 +162,7 @@ DEPLOY_CUDA(InstanceNormGradient);
OPERATOR_SCHEMA(InstanceNormGradient).NumInputs(3).NumOutputs(1);
class GetInstanceNormGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetInstanceNormGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -180,18 +180,6 @@ void L2NormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void L2NormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void L2NormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(L2NormGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(L2NormGradient);
......@@ -199,7 +187,7 @@ DEPLOY_CUDA(L2NormGradient);
OPERATOR_SCHEMA(L2NormGradient).NumInputs(2).NumOutputs(1);
class GetL2NormGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetL2NormGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -85,7 +85,7 @@ DEPLOY_CUDA(LSTMUnitGradient);
OPERATOR_SCHEMA(LSTMUnitGradient).NumInputs(5).NumOutputs(2);
class GetLSTMUnitGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetLSTMUnitGradient);
vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "",
......
#include <algorithm>
#include "operators/common/utils_op.h"
#include "operators/utils/accuracy_op.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
namespace dragon {
template <class Context> template <typename T>
void CopyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(output(0)->count(), Ydata, Xdata);
}
template <class Context>
void CopyOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(Copy);
#ifdef WITH_CUDA
DEPLOY_CUDA(Copy);
#endif
OPERATOR_SCHEMA(Copy).NumInputs(1).NumOutputs(1);
NO_GRADIENT(Copy);
template <class Context> template <typename T>
void AccuracyOp<Context>::RunWithType() {
if (OutputSize() > 1) {
......@@ -94,35 +70,4 @@ OPERATOR_SCHEMA(Accuracy).NumInputs(2).NumOutputs(1, 2);
NO_GRADIENT(Accuracy);
template <class Context> template <typename T>
void OneHotOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(output(0)->count(),
dragon_cast<T, float>(float(off_value)),
Ydata);
kernel::OneHot<T, Context>(input(0).count(), depth, on_value, Xdata, Ydata);
}
template <class Context>
void OneHotOp<Context>::RunOnDevice() {
vector<TIndex> dims = input(0).dims();
dims.push_back(depth);
output(0)->Reshape(dims);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(OneHot);
#ifdef WITH_CUDA
DEPLOY_CUDA(OneHot);
#endif
OPERATOR_SCHEMA(OneHot).NumInputs(1).NumOutputs(1);
NO_GRADIENT(OneHot);
} // namespace dragon
\ No newline at end of file
#include "operators/utils/copy_op.h"
namespace dragon {
template <class Context> template <typename T>
void CopyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(output(0)->count(), Ydata, Xdata);
}
template <class Context>
void CopyOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(Copy);
#ifdef WITH_CUDA
DEPLOY_CUDA(Copy);
#endif
OPERATOR_SCHEMA(Copy).NumInputs(1).NumOutputs(1);
NO_GRADIENT(Copy);
} // namespace dragon
\ No newline at end of file
#include "operators/utils/one_hot_op.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
namespace dragon {
template <class Context> template <typename T>
void OneHotOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(output(0)->count(),
dragon_cast<T, float>(float(off_value)),
Ydata);
kernel::OneHot<T, Context>(input(0).count(), depth, on_value, Xdata, Ydata);
}
template <class Context>
void OneHotOp<Context>::RunOnDevice() {
vector<TIndex> dims = input(0).dims();
dims.push_back(depth);
output(0)->Reshape(dims);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(OneHot);
#ifdef WITH_CUDA
DEPLOY_CUDA(OneHot);
#endif
OPERATOR_SCHEMA(OneHot).NumInputs(1).NumOutputs(1);
NO_GRADIENT(OneHot);
} // namespace dragon
\ No newline at end of file
......@@ -96,20 +96,6 @@ void ConvGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void ConvGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void ConvGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ConvGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ConvGradient);
......@@ -117,7 +103,7 @@ DEPLOY_CUDA(ConvGradient);
OPERATOR_SCHEMA(ConvGradient).NumInputs(3).NumOutputs(3);
class GetConvGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetConvGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -97,20 +97,6 @@ void DeConvGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void DeConvGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void DeConvGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(DeConvGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DeConvGradient);
......@@ -118,7 +104,7 @@ DEPLOY_CUDA(DeConvGradient);
OPERATOR_SCHEMA(DeConvGradient).NumInputs(3).NumOutputs(3);
class GetDeConvGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetDeConvGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -4,12 +4,6 @@
namespace dragon {
template <class Context>
void DenseConcatOp<Context>::RunOnDevice() {
ConcatOp<Context>::RunOnDevice();
input(0).Release(); // keep shape, just release mem
}
DEPLOY_CPU(DenseConcat);
#ifdef WITH_CUDA
DEPLOY_CUDA(DenseConcat);
......@@ -17,16 +11,18 @@ DEPLOY_CUDA(DenseConcat);
OPERATOR_SCHEMA(DenseConcat).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void DenseConcatGradientOp<Context>::RunWithType() {
// restore X1 from Y
auto* Ydata = input(-2).template data<T, Context>();
auto* Xdata = input(0).template mutable_data<T, Context>();
this->x_concat_dim = input(0).dim(this->axis);
TIndex count = input(0).count();
void DenseConcatGradientOp<Context>::RestoreX1() {
CHECK_GT(growth_rate, 0) << "invalid growth rate, please preset it.";
this->concat_dims = input(-1).dims();
this->y_concat_dim = this->concat_dims[this->axis];
this->outer_dim = input(-1).count(0, this->axis);
this->inner_dim = input(-1).count(this->axis + 1);
this->concat_dims[this->axis] -= growth_rate;
input(0).Reshape(this->concat_dims);
this->x_concat_dim = input(0).dim(this->axis);
TIndex count = input(0).count();
auto* Ydata = input(-2).template data<T, Context>();
auto* Xdata = input(0).template mutable_data<T, Context>();
kernel::ConcatGrad<T, Context>(count,
this->outer_dim,
this->inner_dim,
......@@ -39,26 +35,65 @@ void DenseConcatGradientOp<Context>::RunWithType() {
}
template <class Context>
void DenseConcatGradientOp<Context>::RunOnDevice() {
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
void DenseConcatGradientOp<Context>::ElimateCorruption() {
Set<string> all_heads;
queue<int> safe_heads;
Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) all_heads.insert(head_data[i]);
ConcatGradientOp<Context>::RunOnDevice();
}
// sub-graph run
if (input(0).is_corrupted() && !all_heads.count(input(0).name())) {
// pre-process
LOG(DEBUG) << "Tensor(" << input(0).name() << ") is corrupted, recompute... ";
for (int i = 0; i < head->count(); i++) {
bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
}
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
input(0).Move(buffer->memory());
head_data[idx] = input(0).name();
if (input(-2).template IsType<float>()) RestoreX1<float>();
else if (input(-2).template IsType<float16>()) RestoreX1<float16>();
else LOG(FATAL) << "unsupported input types.";
// post-process
if (input(0).memory() != buffer->memory()) buffer->Move(input(0).memory());
}
template <class Context>
void DenseConcatGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
// check available head
while (!safe_heads.empty()) safe_heads.pop();
all_heads.clear();
for (int i = 0; i < head->count(); i++) {
bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
all_heads.insert(head_data[i]);
}
template <class Context>
void DenseConcatGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
Tensor* Y = &input(-2);
ws()->ReleaseBuffer(dY);
ws()->ReleaseBuffer(Y, true);
// pre-process
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->is_corrupted()) {
bool inplace_flag = false;
for (int j = 0; j < InputSize(); j++)
if (output(i)->name() == input(j).name()) inplace_flag = true;
if (inplace_flag || all_heads.count(output(i)->name())) continue; // skip to use new buffer
CHECK(!safe_heads.empty())
<< "\nat most (" << safe_heads.size() << " [safe] / "
<< all_heads.size() << " [total] can be used for corrupted output in "
<< "(" << name() << ", " << type() << "), "
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirrow stage ?";
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
output(i)->Move(buffer->memory());
head_data[idx] = output(i)->name();
}
}
}
DEPLOY_CPU(DenseConcatGradient);
......@@ -68,7 +103,7 @@ DEPLOY_CUDA(DenseConcatGradient);
OPERATOR_SCHEMA(DenseConcatGradient).NumInputs(4).NumOutputs(2);
class GetDenseConcatGradient : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetDenseConcatGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -247,7 +247,7 @@ DEPLOY_CUDA(LRNGradient);
OPERATOR_SCHEMA(LRNGradient).NumInputs(3).NumOutputs(1);
class GetLRNGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetLRNGradient);
vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "",
......
......@@ -54,18 +54,6 @@ void NNResizeGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types.";
}
template <class Context>
void NNResizeGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void NNResizeGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(NNResizeGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(NNResizeGradient);
......@@ -73,7 +61,7 @@ DEPLOY_CUDA(NNResizeGradient);
OPERATOR_SCHEMA(NNResizeGradient).NumInputs(2).NumOutputs(1);
class GetNNResizeGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetNNResizeGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -151,18 +151,6 @@ void PoolingGradientOp<Context>::RunOnDevice() {
}
}
template <class Context>
void PoolingGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void PoolingGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(PoolingGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(PoolingGradient);
......@@ -170,7 +158,7 @@ DEPLOY_CUDA(PoolingGradient);
OPERATOR_SCHEMA(PoolingGradient).NumInputs(3).NumOutputs(1);
class GetPoolingGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetPoolingGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -57,17 +57,10 @@ void ROIAlignGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void ROIAlignGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ROIAlignGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
ws()->ReleaseBuffer(mask_h);
ws()->ReleaseBuffer(mask_w);
void ROIAlignGradientOp<Context>::CleanResource() {
Operator<Context>::CleanResource();
ws()->ReleaseBuffer(mask_h, "Common", true);
ws()->ReleaseBuffer(mask_w, "Common", true);
}
DEPLOY_CPU(ROIAlignGradient);
......@@ -77,7 +70,7 @@ DEPLOY_CUDA(ROIAlignGradient);
OPERATOR_SCHEMA(ROIAlignGradient).NumInputs(3).NumOutputs(1);
class GetROIAlignGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetROIAlignGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -55,16 +55,9 @@ void ROIPoolingGradientOp<Context>::RunOnDevice() {
}
template <class Context>
void ROIPoolingGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ROIPoolingGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
ws()->ReleaseBuffer(mask);
void ROIPoolingGradientOp<Context>::CleanResource() {
Operator<Context>::CleanResource();
ws()->ReleaseBuffer(mask, "Common", true);
}
DEPLOY_CPU(ROIPoolingGradient);
......@@ -74,7 +67,7 @@ DEPLOY_CUDA(ROIPoolingGradient);
OPERATOR_SCHEMA(ROIPoolingGradient).NumInputs(3).NumOutputs(1);
class GetROIPoolingGradient final : public GradientMakerBase {
public:
public:
GRADIENT_MAKER_CTOR(GetROIPoolingGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
......
......@@ -22,5 +22,3 @@ message LayerParameter {
optional string name = 1; // the layer name
repeated BlobProto blobs = 7;
}
\ No newline at end of file
......@@ -137,7 +137,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
::google::protobuf::MessageFactory::generated_factory(),
sizeof(DeviceOption));
OperatorDef_descriptor_ = file->message_type(4);
static const int OperatorDef_offsets_[7] = {
static const int OperatorDef_offsets_[8] = {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, input_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, output_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, name_),
......@@ -145,6 +145,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, arg_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, device_option_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, debug_mode_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, share_grads_),
};
OperatorDef_reflection_ =
new ::google::protobuf::internal::GeneratedMessageReflection(
......@@ -217,7 +218,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
sizeof(TensorFiller));
TensorFiller_VarianceNorm_descriptor_ = TensorFiller_descriptor_->enum_type(0);
GraphDef_descriptor_ = file->message_type(8);
static const int GraphDef_offsets_[9] = {
static const int GraphDef_offsets_[10] = {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, name_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, op_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, graph_type_),
......@@ -227,6 +228,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, g_target_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, u_target_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, debug_mode_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, share_grads_),
};
GraphDef_reflection_ =
new ::google::protobuf::internal::GeneratedMessageReflection(
......@@ -316,29 +318,31 @@ void protobuf_AddDesc_dragon_2eproto() {
"oats\030\005 \003(\002\022\014\n\004ints\030\006 \003(\005\022\017\n\007strings\030\007 \003("
"\t\"p\n\014DeviceOption\022%\n\013device_type\030\001 \001(\0162\013"
".DeviceType:\003CPU\022\021\n\006gpu_id\030\002 \001(\005:\0010\022\026\n\013r"
"andom_seed\030\003 \001(\r:\0013\022\016\n\006engine\030\004 \001(\t\"\241\001\n\013"
"andom_seed\030\003 \001(\r:\0013\022\016\n\006engine\030\004 \001(\t\"\275\001\n\013"
"OperatorDef\022\r\n\005input\030\001 \003(\t\022\016\n\006output\030\002 \003"
"(\t\022\014\n\004name\030\003 \001(\t\022\014\n\004type\030\004 \001(\t\022\026\n\003arg\030\005 "
"\003(\0132\t.Argument\022$\n\rdevice_option\030\006 \001(\0132\r."
"DeviceOption\022\031\n\ndebug_mode\030\007 \001(\010:\005false\""
"=\n\016GradientTarget\022\014\n\004cost\030\001 \001(\t\022\013\n\003wrt\030\002"
" \001(\t\022\020\n\010external\030\003 \001(\t\"R\n\014UpdateTarget\022\014"
"\n\004name\030\001 \001(\t\022\014\n\004type\030\002 \001(\t\022\016\n\006tensor\030\003 \003"
"(\t\022\026\n\003arg\030\004 \003(\0132\t.Argument\"\215\002\n\014TensorFil"
"ler\022\016\n\006tensor\030\001 \001(\t\022\026\n\004type\030\002 \001(\t:\010const"
"ant\022\020\n\005value\030\003 \001(\002:\0010\022\016\n\003low\030\004 \001(\002:\0010\022\017\n"
"\004high\030\005 \001(\002:\0011\022\017\n\004mean\030\006 \001(\002:\0010\022\016\n\003std\030\007"
" \001(\002:\0011\022\020\n\005scale\030\010 \001(\002:\0013\0229\n\rvariance_no"
"rm\030\t \001(\0162\032.TensorFiller.VarianceNorm:\006FA"
"N_IN\"4\n\014VarianceNorm\022\n\n\006FAN_IN\020\000\022\013\n\007FAN_"
"OUT\020\001\022\013\n\007FAN_AVG\020\002\"\363\001\n\010GraphDef\022\014\n\004name\030"
"\001 \001(\t\022\030\n\002op\030\002 \003(\0132\014.OperatorDef\022\022\n\ngraph"
"_type\030\003 \001(\t\022$\n\rdevice_option\030\005 \001(\0132\r.Dev"
"iceOption\022\026\n\003arg\030\006 \003(\0132\t.Argument\022\016\n\006tar"
"get\030\007 \003(\t\022!\n\010g_target\030\010 \003(\0132\017.GradientTa"
"rget\022\037\n\010u_target\030\t \003(\0132\r.UpdateTarget\022\031\n"
"\ndebug_mode\030\n \001(\010:\005false*+\n\nDeviceType\022\007"
"\n\003CPU\020\000\022\010\n\004CUDA\020\001\022\n\n\006OPENCL\020\002", 1429);
"DeviceOption\022\031\n\ndebug_mode\030\007 \001(\010:\005false\022"
"\032\n\013share_grads\030\010 \001(\010:\005false\"=\n\016GradientT"
"arget\022\014\n\004cost\030\001 \001(\t\022\013\n\003wrt\030\002 \001(\t\022\020\n\010exte"
"rnal\030\003 \001(\t\"R\n\014UpdateTarget\022\014\n\004name\030\001 \001(\t"
"\022\014\n\004type\030\002 \001(\t\022\016\n\006tensor\030\003 \003(\t\022\026\n\003arg\030\004 "
"\003(\0132\t.Argument\"\215\002\n\014TensorFiller\022\016\n\006tenso"
"r\030\001 \001(\t\022\026\n\004type\030\002 \001(\t:\010constant\022\020\n\005value"
"\030\003 \001(\002:\0010\022\016\n\003low\030\004 \001(\002:\0010\022\017\n\004high\030\005 \001(\002:"
"\0011\022\017\n\004mean\030\006 \001(\002:\0010\022\016\n\003std\030\007 \001(\002:\0011\022\020\n\005s"
"cale\030\010 \001(\002:\0013\0229\n\rvariance_norm\030\t \001(\0162\032.T"
"ensorFiller.VarianceNorm:\006FAN_IN\"4\n\014Vari"
"anceNorm\022\n\n\006FAN_IN\020\000\022\013\n\007FAN_OUT\020\001\022\013\n\007FAN"
"_AVG\020\002\"\217\002\n\010GraphDef\022\014\n\004name\030\001 \001(\t\022\030\n\002op\030"
"\002 \003(\0132\014.OperatorDef\022\022\n\ngraph_type\030\003 \001(\t\022"
"$\n\rdevice_option\030\005 \001(\0132\r.DeviceOption\022\026\n"
"\003arg\030\006 \003(\0132\t.Argument\022\016\n\006target\030\007 \003(\t\022!\n"
"\010g_target\030\010 \003(\0132\017.GradientTarget\022\037\n\010u_ta"
"rget\030\t \003(\0132\r.UpdateTarget\022\031\n\ndebug_mode\030"
"\n \001(\010:\005false\022\032\n\013share_grads\030\013 \001(\010:\005false"
"*+\n\nDeviceType\022\007\n\003CPU\020\000\022\010\n\004CUDA\020\001\022\n\n\006OPE"
"NCL\020\002", 1485);
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
"dragon.proto", &protobuf_RegisterTypes);
TensorProto::default_instance_ = new TensorProto();
......@@ -2171,6 +2175,7 @@ const int OperatorDef::kTypeFieldNumber;
const int OperatorDef::kArgFieldNumber;
const int OperatorDef::kDeviceOptionFieldNumber;
const int OperatorDef::kDebugModeFieldNumber;
const int OperatorDef::kShareGradsFieldNumber;
#endif // !_MSC_VER
OperatorDef::OperatorDef()
......@@ -2197,6 +2202,7 @@ void OperatorDef::SharedCtor() {
type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
device_option_ = NULL;
debug_mode_ = false;
share_grads_ = false;
::memset(_has_bits_, 0, sizeof(_has_bits_));
}
......@@ -2239,7 +2245,18 @@ OperatorDef* OperatorDef::New() const {
}
void OperatorDef::Clear() {
if (_has_bits_[0 / 32] & 108) {
#define OFFSET_OF_FIELD_(f) (reinterpret_cast<char*>( \
&reinterpret_cast<OperatorDef*>(16)->f) - \
reinterpret_cast<char*>(16))
#define ZR_(first, last) do { \
size_t f = OFFSET_OF_FIELD_(first); \
size_t n = OFFSET_OF_FIELD_(last) - f + sizeof(last); \
::memset(&first, 0, n); \
} while (0)
if (_has_bits_[0 / 32] & 236) {
ZR_(debug_mode_, share_grads_);
if (has_name()) {
if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) {
name_->clear();
......@@ -2253,8 +2270,11 @@ void OperatorDef::Clear() {
if (has_device_option()) {
if (device_option_ != NULL) device_option_->::DeviceOption::Clear();
}
debug_mode_ = false;
}
#undef OFFSET_OF_FIELD_
#undef ZR_
input_.Clear();
output_.Clear();
arg_.Clear();
......@@ -2382,6 +2402,21 @@ bool OperatorDef::MergePartialFromCodedStream(
} else {
goto handle_unusual;
}
if (input->ExpectTag(64)) goto parse_share_grads;
break;
}
// optional bool share_grads = 8 [default = false];
case 8: {
if (tag == 64) {
parse_share_grads:
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
input, &share_grads_)));
set_has_share_grads();
} else {
goto handle_unusual;
}
if (input->ExpectAtEnd()) goto success;
break;
}
......@@ -2468,6 +2503,11 @@ void OperatorDef::SerializeWithCachedSizes(
::google::protobuf::internal::WireFormatLite::WriteBool(7, this->debug_mode(), output);
}
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
::google::protobuf::internal::WireFormatLite::WriteBool(8, this->share_grads(), output);
}
if (!unknown_fields().empty()) {
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
unknown_fields(), output);
......@@ -2539,6 +2579,11 @@ void OperatorDef::SerializeWithCachedSizes(
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(7, this->debug_mode(), target);
}
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(8, this->share_grads(), target);
}
if (!unknown_fields().empty()) {
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
unknown_fields(), target);
......@@ -2577,6 +2622,11 @@ int OperatorDef::ByteSize() const {
total_size += 1 + 1;
}
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
total_size += 1 + 1;
}
}
// repeated string input = 1;
total_size += 1 * this->input_size();
......@@ -2641,6 +2691,9 @@ void OperatorDef::MergeFrom(const OperatorDef& from) {
if (from.has_debug_mode()) {
set_debug_mode(from.debug_mode());
}
if (from.has_share_grads()) {
set_share_grads(from.share_grads());
}
}
mutable_unknown_fields()->MergeFrom(from.unknown_fields());
}
......@@ -2671,6 +2724,7 @@ void OperatorDef::Swap(OperatorDef* other) {
arg_.Swap(&other->arg_);
std::swap(device_option_, other->device_option_);
std::swap(debug_mode_, other->debug_mode_);
std::swap(share_grads_, other->share_grads_);
std::swap(_has_bits_[0], other->_has_bits_[0]);
_unknown_fields_.Swap(&other->_unknown_fields_);
std::swap(_cached_size_, other->_cached_size_);
......@@ -4066,6 +4120,7 @@ const int GraphDef::kTargetFieldNumber;
const int GraphDef::kGTargetFieldNumber;
const int GraphDef::kUTargetFieldNumber;
const int GraphDef::kDebugModeFieldNumber;
const int GraphDef::kShareGradsFieldNumber;
#endif // !_MSC_VER
GraphDef::GraphDef()
......@@ -4092,6 +4147,7 @@ void GraphDef::SharedCtor() {
graph_type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
device_option_ = NULL;
debug_mode_ = false;
share_grads_ = false;
::memset(_has_bits_, 0, sizeof(_has_bits_));
}
......@@ -4134,6 +4190,16 @@ GraphDef* GraphDef::New() const {
}
void GraphDef::Clear() {
#define OFFSET_OF_FIELD_(f) (reinterpret_cast<char*>( \
&reinterpret_cast<GraphDef*>(16)->f) - \
reinterpret_cast<char*>(16))
#define ZR_(first, last) do { \
size_t f = OFFSET_OF_FIELD_(first); \
size_t n = OFFSET_OF_FIELD_(last) - f + sizeof(last); \
::memset(&first, 0, n); \
} while (0)
if (_has_bits_[0 / 32] & 13) {
if (has_name()) {
if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) {
......@@ -4149,7 +4215,11 @@ void GraphDef::Clear() {
if (device_option_ != NULL) device_option_->::DeviceOption::Clear();
}
}
debug_mode_ = false;
ZR_(debug_mode_, share_grads_);
#undef OFFSET_OF_FIELD_
#undef ZR_
op_.Clear();
arg_.Clear();
target_.Clear();
......@@ -4301,6 +4371,21 @@ bool GraphDef::MergePartialFromCodedStream(
} else {
goto handle_unusual;
}
if (input->ExpectTag(88)) goto parse_share_grads;
break;
}
// optional bool share_grads = 11 [default = false];
case 11: {
if (tag == 88) {
parse_share_grads:
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
input, &share_grads_)));
set_has_share_grads();
} else {
goto handle_unusual;
}
if (input->ExpectAtEnd()) goto success;
break;
}
......@@ -4395,6 +4480,11 @@ void GraphDef::SerializeWithCachedSizes(
::google::protobuf::internal::WireFormatLite::WriteBool(10, this->debug_mode(), output);
}
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
::google::protobuf::internal::WireFormatLite::WriteBool(11, this->share_grads(), output);
}
if (!unknown_fields().empty()) {
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
unknown_fields(), output);
......@@ -4477,6 +4567,11 @@ void GraphDef::SerializeWithCachedSizes(
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(10, this->debug_mode(), target);
}
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(11, this->share_grads(), target);
}
if (!unknown_fields().empty()) {
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
unknown_fields(), target);
......@@ -4517,6 +4612,11 @@ int GraphDef::ByteSize() const {
total_size += 1 + 1;
}
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
total_size += 1 + 1;
}
}
// repeated .OperatorDef op = 2;
total_size += 1 * this->op_size();
......@@ -4602,6 +4702,9 @@ void GraphDef::MergeFrom(const GraphDef& from) {
if (from.has_debug_mode()) {
set_debug_mode(from.debug_mode());
}
if (from.has_share_grads()) {
set_share_grads(from.share_grads());
}
}
mutable_unknown_fields()->MergeFrom(from.unknown_fields());
}
......@@ -4634,6 +4737,7 @@ void GraphDef::Swap(GraphDef* other) {
g_target_.Swap(&other->g_target_);
u_target_.Swap(&other->u_target_);
std::swap(debug_mode_, other->debug_mode_);
std::swap(share_grads_, other->share_grads_);
std::swap(_has_bits_[0], other->_has_bits_[0]);
_unknown_fields_.Swap(&other->_unknown_fields_);
std::swap(_cached_size_, other->_cached_size_);
......
......@@ -815,6 +815,13 @@ class OperatorDef : public ::google::protobuf::Message {
inline bool debug_mode() const;
inline void set_debug_mode(bool value);
// optional bool share_grads = 8 [default = false];
inline bool has_share_grads() const;
inline void clear_share_grads();
static const int kShareGradsFieldNumber = 8;
inline bool share_grads() const;
inline void set_share_grads(bool value);
// @@protoc_insertion_point(class_scope:OperatorDef)
private:
inline void set_has_name();
......@@ -825,6 +832,8 @@ class OperatorDef : public ::google::protobuf::Message {
inline void clear_has_device_option();
inline void set_has_debug_mode();
inline void clear_has_debug_mode();
inline void set_has_share_grads();
inline void clear_has_share_grads();
::google::protobuf::UnknownFieldSet _unknown_fields_;
......@@ -837,6 +846,7 @@ class OperatorDef : public ::google::protobuf::Message {
::google::protobuf::RepeatedPtrField< ::Argument > arg_;
::DeviceOption* device_option_;
bool debug_mode_;
bool share_grads_;
friend void protobuf_AddDesc_dragon_2eproto();
friend void protobuf_AssignDesc_dragon_2eproto();
friend void protobuf_ShutdownFile_dragon_2eproto();
......@@ -1441,6 +1451,13 @@ class GraphDef : public ::google::protobuf::Message {
inline bool debug_mode() const;
inline void set_debug_mode(bool value);
// optional bool share_grads = 11 [default = false];
inline bool has_share_grads() const;
inline void clear_share_grads();
static const int kShareGradsFieldNumber = 11;
inline bool share_grads() const;
inline void set_share_grads(bool value);
// @@protoc_insertion_point(class_scope:GraphDef)
private:
inline void set_has_name();
......@@ -1451,6 +1468,8 @@ class GraphDef : public ::google::protobuf::Message {
inline void clear_has_device_option();
inline void set_has_debug_mode();
inline void clear_has_debug_mode();
inline void set_has_share_grads();
inline void clear_has_share_grads();
::google::protobuf::UnknownFieldSet _unknown_fields_;
......@@ -1465,6 +1484,7 @@ class GraphDef : public ::google::protobuf::Message {
::google::protobuf::RepeatedPtrField< ::GradientTarget > g_target_;
::google::protobuf::RepeatedPtrField< ::UpdateTarget > u_target_;
bool debug_mode_;
bool share_grads_;
friend void protobuf_AddDesc_dragon_2eproto();
friend void protobuf_AssignDesc_dragon_2eproto();
friend void protobuf_ShutdownFile_dragon_2eproto();
......@@ -2712,6 +2732,30 @@ inline void OperatorDef::set_debug_mode(bool value) {
// @@protoc_insertion_point(field_set:OperatorDef.debug_mode)
}
// optional bool share_grads = 8 [default = false];
inline bool OperatorDef::has_share_grads() const {
return (_has_bits_[0] & 0x00000080u) != 0;
}
inline void OperatorDef::set_has_share_grads() {
_has_bits_[0] |= 0x00000080u;
}
inline void OperatorDef::clear_has_share_grads() {
_has_bits_[0] &= ~0x00000080u;
}
inline void OperatorDef::clear_share_grads() {
share_grads_ = false;
clear_has_share_grads();
}
inline bool OperatorDef::share_grads() const {
// @@protoc_insertion_point(field_get:OperatorDef.share_grads)
return share_grads_;
}
inline void OperatorDef::set_share_grads(bool value) {
set_has_share_grads();
share_grads_ = value;
// @@protoc_insertion_point(field_set:OperatorDef.share_grads)
}
// -------------------------------------------------------------------
// GradientTarget
......@@ -3904,6 +3948,30 @@ inline void GraphDef::set_debug_mode(bool value) {
// @@protoc_insertion_point(field_set:GraphDef.debug_mode)
}
// optional bool share_grads = 11 [default = false];
inline bool GraphDef::has_share_grads() const {
return (_has_bits_[0] & 0x00000200u) != 0;
}
inline void GraphDef::set_has_share_grads() {
_has_bits_[0] |= 0x00000200u;
}
inline void GraphDef::clear_has_share_grads() {
_has_bits_[0] &= ~0x00000200u;
}
inline void GraphDef::clear_share_grads() {
share_grads_ = false;
clear_has_share_grads();
}
inline bool GraphDef::share_grads() const {
// @@protoc_insertion_point(field_get:GraphDef.share_grads)
return share_grads_;
}
inline void GraphDef::set_share_grads(bool value) {
set_has_share_grads();
share_grads_ = value;
// @@protoc_insertion_point(field_set:GraphDef.share_grads)
}
// @@protoc_insertion_point(namespace_scope)
......
......@@ -50,6 +50,7 @@ message OperatorDef {
repeated Argument arg= 5;
optional DeviceOption device_option = 6;
optional bool debug_mode = 7 [default = false];
optional bool share_grads = 8 [default = false];
}
message GradientTarget {
......@@ -65,7 +66,6 @@ message UpdateTarget {
repeated Argument arg = 4;
}
// simply copy from caffe1
message TensorFiller {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
......@@ -89,4 +89,5 @@ message GraphDef {
repeated GradientTarget g_target = 8;
repeated UpdateTarget u_target = 9;
optional bool debug_mode = 10 [default = false];
optional bool share_grads = 11 [default = false];
}
\ No newline at end of file
......@@ -650,19 +650,6 @@ template <> void TransposeGrad<float16, CPUContext>(const int count,
}
}
/******************** common.utils ********************/
template <> void OneHot<float, CPUContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
for (int i = 0; i < count; ++i) {
const int val = x[i];
y[i * depth + val] = on_value;
}
}
/******************** loss.l1_loss ********************/
template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, float* dx) {
......@@ -673,7 +660,7 @@ template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, flo
}
}
/******************** loss.sigmoid_cross_entropy_loss ********************/
/******************** loss.sigmoid_cross_entropy ********************/
template <> void SigmoidCrossEntropy<float, CPUContext>(const int count,
const float* x,
......@@ -712,7 +699,7 @@ template<> void SmoothL1Grad<float, CPUContext>(const int count,
}
}
/******************** loss.softmax_cross_entropy_loss ********************/
/******************** loss.softmax_cross_entropy ********************/
template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count,
const float* prob,
......@@ -723,7 +710,7 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count,
}
}
/******************** loss.softmax_loss ********************/
/******************** loss.sparse_softmax_cross_entropy ********************/
template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
const int classes,
......@@ -735,8 +722,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CPUContext>() :
nullptr;
ignore->data<int, CPUContext>() : nullptr;
const int dim = count / outer_dim;
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
......@@ -751,27 +737,25 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
}
if (k == ignore->count()) {
float labeled_prob = prob[i * dim + label * inner_dim + j];
labeled_prob = std::max(labeled_prob, FLT_MIN);
loss[idx] = log(labeled_prob);
loss[idx] = -std::log(std::max(labeled_prob, FLT_MIN));
valid[idx] = 1;
}
}
}
}
template<> void SoftmaxLossGrad<float, CPUContext>(const int count,
template<> void SparseSoftmaxCrossEntropyGrad<float, CPUContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float* labels,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
int dim = count / outer_dim;
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CPUContext>() :
nullptr;
ignore->data <int, CPUContext>() : nullptr;
valid[0] = 0;
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
......@@ -790,6 +774,93 @@ template<> void SoftmaxLossGrad<float, CPUContext>(const int count,
}
}
/******************** loss.sparse_softmax_focal_loss ********************/
template <> void SparseSoftmaxFocalLoss<float, CPUContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const float* prob,
const float* labels,
float* scale,
float* loss,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CPUContext>() : nullptr;
const int dim = count / outer_dim;
for (int i = 0; i < count; ++i) {
scale[i] = alpha * std::pow((1.0f - prob[i]), gamma);
}
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
const int idx = i * inner_dim + j;
const int label = labels[idx];
int k;
for (k = 0; k < ignore->count(); ++k) {
if (label == ignores[k]) {
loss[idx] = valid[idx] = 0;
break;
}
}
if (k == ignore->count()) {
const int t_ = i * dim + label * inner_dim + j;
float labeled_prob = prob[t_];
loss[idx] = -scale[t_] * std::log(std::max(labeled_prob, FLT_MIN));
valid[idx] = 1;
}
}
}
}
template<> void SparseSoftmaxFocalLossGrad<float, CPUContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const float* scale,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
int dim = count / outer_dim;
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CPUContext>() : nullptr;
valid[0] = 0;
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
const int label = labels[i * inner_dim + j];
int k;
for (k = 0; k < ignore->count(); ++k)
if (label == ignores[k]) break;
if (k != ignore->count()) {
for (int c = 0; c < classes; ++c)
dXdata[i * dim + c * inner_dim + j] = 0;
} else {
const int t_ = i * dim + label * inner_dim + j;
float grad = -gamma * (scale[t_] / std::max((1.0f - prob[t_]), eps))
* std::log(std::max(prob[t_], FLT_MIN))
* prob[t_] + scale[t_];
for (int c = 0; c < classes; ++c) {
const int i_ = i * dim + c * inner_dim + j;
if (c == label) {
dXdata[i_] = grad * (prob[t_] - 1);
} else {
dXdata[i_] = grad * prob[i_];
}
}
valid[0]++;
}
}
}
}
/******************** recurrent.lstm_uint ********************/
template <> void LSTMUnit<float, CPUContext>(const int count,
......@@ -1013,6 +1084,19 @@ template <> void MemoryData<uint8_t, float16, CPUContext>(const int count,
LOG(FATAL) << "unsupport float16 with CPU";
}
/******************** utils.one_hot ********************/
template <> void OneHot<float, CPUContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
for (int i = 0; i < count; ++i) {
const int val = x[i];
y[i * depth + val] = on_value;
}
}
/******************** vision.conv ********************/
template <> void Im2Col<float, CPUContext>(const int channels,
......
......@@ -1204,34 +1204,6 @@ template <> void TransposeGrad<float16, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK;
}
/******************** common.utils ********************/
template <typename T>
__global__ void _OneHot(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
CUDA_KERNEL_LOOP(idx, count) {
const int val = x[idx];
y[idx * depth + val] = on_value;
}
}
template <> void OneHot<float, CUDAContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
_OneHot<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
depth,
on_value,
x,
y);
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.l1_loss ********************/
template <typename T>
......@@ -1248,7 +1220,7 @@ template<> void AbsGrad<float, CUDAContext>(const int count, const float* dy, fl
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.sigmoid_cross_entropy_loss ********************/
/******************** loss.sigmoid_cross_entropy ********************/
template <typename T>
__global__ void _SigmoidCrossEntropy(const int count,
......@@ -1311,7 +1283,7 @@ template<> void SmoothL1Grad<float, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.softmax_cross_entropy_loss ********************/
/******************** loss.softmax_cross_entropy ********************/
template <typename T>
__global__ void _SoftmaxCrossEntropy(const int count,
......@@ -1334,7 +1306,7 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.softmax_loss ********************/
/******************** loss.sparse_softmax_cross_entropy ********************/
template <typename T>
__global__ void _SparseSoftmaxCrossEntropy(const int count,
......@@ -1375,8 +1347,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() :
nullptr;
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropy<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
prob,
......@@ -1391,7 +1362,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count,
}
template <typename T>
__global__ void _SoftmaxLossGrad(const int count,
__global__ void _SparseSoftmaxCrossEntropyGrad(const int count,
const T* prob,
const T* labels,
T* dx,
......@@ -1418,12 +1389,12 @@ __global__ void _SoftmaxLossGrad(const int count,
}
}
template<> void SoftmaxLossGrad<float, CUDAContext>(const int count,
template<> void SparseSoftmaxCrossEntropyGrad<float, CUDAContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float* labels,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
......@@ -1431,7 +1402,155 @@ template<> void SoftmaxLossGrad<float, CUDAContext>(const int count,
ignore->data <int, CUDAContext >() :
nullptr;
const int num_preds = outer_dim * inner_dim;
_SoftmaxLossGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
_SparseSoftmaxCrossEntropyGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
prob,
labels,
dXdata,
classes,
inner_dim,
ignores,
ignore->count(),
valid);
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.sparse_softmax_focal_loss ********************/
template <typename T>
__global__ void _FocalScale(const int count,
const float alpha,
const float gamma,
const T* prob,
T* scale) {
CUDA_KERNEL_LOOP(idx, count) {
scale[idx] = alpha * std::pow((1.0f - prob[idx]), gamma);
}
}
template <typename T>
__global__ void _SparseSoftmaxFocalLoss(const int count,
const T* scale,
const T* prob,
const T* labels,
T* loss,
const int classes,
const int inner_dim,
const int* ignores,
const int ignore_num,
T* valid) {
CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim;
const int i_idx = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx];
int k;
for (k = 0; k < ignore_num; k++) {
if (label == ignores[k]) {
loss[idx] = valid[idx] = 0;
break;
}
}
if (k == ignore_num) {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
loss[idx] = -scale[t_] * std::log(max(prob[t_], FLT_MIN));
valid[idx] = 1;
}
}
}
template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const float* prob,
const float* labels,
float* scale,
float* loss,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim;
_FocalScale<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
alpha,
gamma,
prob,
scale);
_SparseSoftmaxFocalLoss<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
scale,
prob,
labels,
loss,
classes,
inner_dim,
ignores,
ignore->count(),
valid);
CUDA_POST_KERNEL_CHECK;
}
template <typename T>
__global__ void _SparseSoftmaxFocalLossGrad(const int count,
const float gamma,
const float eps,
const T* scale,
const T* prob,
const T* labels,
T* dx,
const int classes,
const int inner_dim,
const int* ignores,
const int ignore_num,
T* valid) {
CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim;
const int i_idx = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx];
int k;
for (k = 0; k < ignore_num; k++)
if (label == ignores[k]) break;
if (k != ignore_num) {
for (int c = 0; c < classes; c++)
dx[(o_idx * classes + c) * inner_dim + i_idx] = 0;
valid[idx] = 0;
} else {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
T grad = -gamma * (scale[t_] / max((1.0f - prob[t_]), eps))
* std::log(max(prob[t_], FLT_MIN))
* prob[t_] + scale[t_];
for (int c = 0; c < classes; c++) {
const int i_ = (o_idx * classes + c) * inner_dim + i_idx;
if (c == label) {
dx[i_] = grad * (prob[t_] - 1);
} else {
dx[i_] = grad * prob[i_];
}
}
valid[idx] = 1;
}
}
}
template<> void SparseSoftmaxFocalLossGrad<float, CUDAContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const float* scale,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CUDAContext >() : nullptr;
const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxFocalLossGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
gamma,
eps,
scale,
prob,
labels,
dXdata,
......@@ -1847,6 +1966,34 @@ template <> void MemoryData<uint8_t, float16, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK;
}
/******************** utils.one_hot ********************/
template <typename T>
__global__ void _OneHot(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
CUDA_KERNEL_LOOP(idx, count) {
const int val = x[idx];
y[idx * depth + val] = on_value;
}
}
template <> void OneHot<float, CUDAContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
_OneHot<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
depth,
on_value,
x,
y);
CUDA_POST_KERNEL_CHECK;
}
/******************** vision.conv ********************/
template<typename T>
......
......@@ -164,19 +164,38 @@ dragon.config.EnableCPU()
dragon.config.EnableCUDA(device_id, use_cudnn=True)
```
### Automatic Memory Optimization(AMC)
### Memonger
Dragon is a extremely memory efficient framework.
It is supported to drop intermediate results(mirrow stage) during forward phase, and share grads during backward phase,
takes 25% and 50% memory-usage comparing caffe and tensorflow respectively.
To use it, just:
 
```Shell
import dragon.config
dragon.config.SetDebugMode(False)
import dragon.memonger as opt
```
- ShareGrads
```Shell
opt.share_grads()
```
This option will make all gradients share a global tensor(debugging is intractable).
- Drop
```Shell
import dragon.ops as ops
y = opt.drop(ops.Relu, x)
```
which prefers a 50% memory-usage and 15% slower solution during training phase.
### Scope
As a graph based framework, Dragon supports various scopes.
- NameScope
```Shell
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!