Commit ddb76e7b by Ting PAN

add memonger for Dragon

1 parent d64a3943
Showing with 1800 additions and 982 deletions
...@@ -42,6 +42,7 @@ class Graph final : public GraphBase { ...@@ -42,6 +42,7 @@ class Graph final : public GraphBase {
GraphDef Prune(const GraphDef& graph_def); GraphDef Prune(const GraphDef& graph_def);
GraphDef Share(const GraphDef& graph_def); GraphDef Share(const GraphDef& graph_def);
GraphDef MakeUpdate(const GraphDef& graph_def); GraphDef MakeUpdate(const GraphDef& graph_def);
void RecomputingAware(const GraphDef& graph_def, Workspace* ws);
inline Workspace* ws() const { return ws_; } inline Workspace* ws() const { return ws_; }
......
...@@ -80,30 +80,35 @@ class Operator : public OperatorBase { ...@@ -80,30 +80,35 @@ class Operator : public OperatorBase {
allow_run_ = true; allow_run_ = true;
allow_run_ &= _MPICheck(); allow_run_ &= _MPICheck();
allow_run_ &= (!(OutputSize() == 1 && output(0)->name() == "ignore")); allow_run_ &= (!(OutputSize() == 1 && output(0)->name() == "ignore"));
allow_share_grads_ = (!op_def.debug_mode());
allow_share_grads_ &= op_def.share_grads();
allow_share_grads_ &= (type().find("Gradient") != string::npos);
} }
virtual void Run() final { virtual void Run() final {
if (!allow_run_) return; if (!allow_run_) return;
MakeResource();
ctx_.SwitchToDevice(); ctx_.SwitchToDevice();
if (!op_def_.debug_mode()) ShareBeforeRun();
MemorySwitch(); MemorySwitch();
RunOnDevice(); RunOnDevice();
if (!op_def_.debug_mode()) ClearAfterRun();
ctx_.FinishDeviceCompution(); ctx_.FinishDeviceCompution();
CleanResource();
} }
virtual void ElimateCorruption();
virtual void ShareGradient();
virtual void MakeResource();
virtual void CleanResource();
void MemorySwitch() { void MemorySwitch() {
for (int i = 0; i < InputSize(); i++) for (int i = 0; i < InputSize(); i++)
if (input(i).name() != "ignore") if (input(i).name() != "ignore") input(i).SwitchToDevice();
input(i).SwitchToDevice();
for (int i = 0; i < OutputSize(); i++) for (int i = 0; i < OutputSize(); i++)
if (output(i)->name() != "ignore") if (output(i)->name() != "ignore") output(i)->SwitchToDevice();
output(i)->SwitchToDevice();
} }
virtual void ShareBeforeRun() { /*** share tensors here if necessary ***/ }
virtual void RunOnDevice() = 0; virtual void RunOnDevice() = 0;
virtual void ClearAfterRun() { /*** clear tensors here if necessary ***/ }
inline Context& ctx() { return ctx_; } inline Context& ctx() { return ctx_; }
inline string anchor() { return GetSingleArg("anchor", name()); } inline string anchor() { return GetSingleArg("anchor", name()); }
...@@ -111,7 +116,7 @@ class Operator : public OperatorBase { ...@@ -111,7 +116,7 @@ class Operator : public OperatorBase {
protected: protected:
Context ctx_; Context ctx_;
bool allow_run_; bool allow_run_, allow_share_grads_;
private: private:
bool _MPICheck() { bool _MPICheck() {
...@@ -169,6 +174,9 @@ DECLARE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Worksp ...@@ -169,6 +174,9 @@ DECLARE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Worksp
} \ } \
} }
#define DISABLE_SHARE_GRADIENT \
this->allow_share_grads_ = false
#define INSTANTIATE_OPERATOR(name, context) \ #define INSTANTIATE_OPERATOR(name, context) \
template class name##Op<context>; template class name##Op<context>;
......
...@@ -30,7 +30,7 @@ class Tensor { ...@@ -30,7 +30,7 @@ class Tensor {
CHECK_GT(d, 0); CHECK_GT(d, 0);
new_size *= d; new_size *= d;
} }
if (size_ != new_size && if (size_ != new_size && own_mem_ &&
capacity_ < TIndex(new_size * meta_.itemsize())) { capacity_ < TIndex(new_size * meta_.itemsize())) {
memory_.reset(); memory_.reset();
capacity_ = 0; capacity_ = 0;
...@@ -38,9 +38,7 @@ class Tensor { ...@@ -38,9 +38,7 @@ class Tensor {
size_ = new_size; size_ = new_size;
} }
void ReshapeLike(const Tensor& other) { void ReshapeLike(const Tensor& other) { Reshape(other.dims_); }
Reshape(other.dims_);
}
inline const string& name() const { return name_; } inline const string& name() const { return name_; }
...@@ -92,63 +90,86 @@ class Tensor { ...@@ -92,63 +90,86 @@ class Tensor {
return ss.str(); return ss.str();
} }
MixedMemory::State memory_state() const { return memory_->state(); } inline bool is_corrupted() const { return is_corrupted_; }
MixedMemory* memory() const { return memory_.get(); } inline void Corrupt() { is_corrupted_ = true; }
void SwitchToDevice() { if(memory_) memory_->SwitchToDevice(); }
MixedMemory* memory() const { return own_mem_ ? memory_.get() : ex_memory_; }
MixedMemory::State memory_state() const {
MixedMemory* mem = memory();
CHECK(mem) << "memory access before allowcating.";
return memory()->state();
}
void SwitchToDevice() {
MixedMemory* mem = own_mem_ ? memory_.get() : ex_memory_;
if (mem) mem->SwitchToDevice();
}
const TypeMeta& meta() const { return meta_; } const TypeMeta& meta() const { return meta_; }
void SetMeta(const TypeMeta& meta) { meta_ = meta; } void SetMeta(const TypeMeta& meta) { meta_ = meta; }
template <typename T> inline bool IsType() { return meta_.Match<T>(); } template <typename T> inline bool IsType() { return meta_.Match<T>(); }
template <class Context> template <class Context>
const void* raw_data() const { void mutable_data_ptr(void** data_ptr) {
CHECK(memory_.get()) << "memory access before allowcating."; MixedMemory* mem = memory();
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) if (!mem) {
return memory_->cpu_data(); *data_ptr = nullptr;
else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) } else {
return memory_->cuda_data(); if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) {
else LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported."; *data_ptr = mem->mutable_cpu_data();
return nullptr; } else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data();
} else {
LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported.";
}
} }
template <typename T, class Context>
const T* data() const {
return static_cast<const T*>(raw_data<Context>());
} }
template <class Context> template <class Context>
void active_data_ptr(void** data_ptr) { const void* const_data_ptr() const {
if (!memory_) { MixedMemory* mem = memory();
*data_ptr = nullptr; CHECK(mem) << "memory access before allowcating.";
} else {
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) { if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>()) {
*data_ptr = memory_->mutable_cpu_data(); return mem->cpu_data();
} else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) { } else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>()) {
*data_ptr = memory_->mutable_cuda_data(); return mem->cuda_data();
} } else {
LOG(FATAL) << "unknown memory type access. only CPU or CUDA are supported.";
return nullptr;
} }
} }
template <class Context> template <class Context>
void* raw_mutable_data(const TypeMeta& meta) { void* raw_mutable_data(const TypeMeta& meta) {
void* data_ptr; void* data_ptr;
active_data_ptr<Context>(&data_ptr); if (own_mem_) {
mutable_data_ptr<Context>(&data_ptr);
if (meta_ == meta && data_ptr) { if (meta_ == meta && data_ptr) {
return data_ptr; return data_ptr;
} else { } else {
meta_ = meta; // copy-assign the meta meta_ = meta;
CHECK_GT(size_, 0); // must specify a valid size CHECK_GT(size_, 0);
memory_.reset(new MixedMemory(meta, size_* meta_.itemsize())); memory_.reset(new MixedMemory(meta, size_* meta_.itemsize()));
// malloc mutable_data_ptr<Context>(&data_ptr); // malloc
if (TypeMeta::Id<Context>() == TypeMeta::Id<CPUContext>())
data_ptr = memory_->mutable_cpu_data();
else if (TypeMeta::Id<Context>() == TypeMeta::Id<CUDAContext>())
data_ptr = memory_->mutable_cuda_data();
// init for each structed element if necessary
if (meta.ctor()) meta_.ctor()(data_ptr, size_); if (meta.ctor()) meta_.ctor()(data_ptr, size_);
} }
capacity_ = size_ * meta_.itemsize(); capacity_ = size_ * meta_.itemsize();
return data_ptr; return data_ptr;
} else {
meta_ = meta;
CHECK_GT(size_, 0);
TIndex ex_capacity_ = ex_memory_->nbytes();
if (ex_capacity_ >= TIndex(size_ * meta.itemsize())) {
mutable_data_ptr<Context>(&data_ptr);
} else {
delete ex_memory_;
ex_memory_ = new MixedMemory(meta, size_* meta_.itemsize());
mutable_data_ptr<Context>(&data_ptr); // malloc
if (meta.ctor()) meta_.ctor()(data_ptr, size_);
capacity_ = size_ * meta.itemsize();
}
return data_ptr;
}
} }
template <class Context> template <class Context>
...@@ -159,22 +180,30 @@ class Tensor { ...@@ -159,22 +180,30 @@ class Tensor {
return raw_mutable_data<Context>(meta_); return raw_mutable_data<Context>(meta_);
} }
template <class Context>
const void* raw_data() const { return const_data_ptr<Context>(); }
template <typename T, class Context> template <typename T, class Context>
T* mutable_data() { T* mutable_data() {
void* data_ptr; void* data_ptr;
active_data_ptr<Context>(&data_ptr); mutable_data_ptr<Context>(&data_ptr);
if (data_ptr && meta_ == TypeMeta::Make<T>()) return static_cast<T*>(data_ptr); if (data_ptr && meta_ == TypeMeta::Make<T>()) return static_cast<T*>(data_ptr);
return static_cast<T*>(raw_mutable_data<Context>(TypeMeta::Make<T>())); return static_cast<T*>(raw_mutable_data<Context>(TypeMeta::Make<T>()));
} }
void Share(const Tensor& other) { template <typename T, class Context>
const T* data() const {
return static_cast<const T*>(raw_data<Context>());
}
inline void Share(const Tensor& other) {
CHECK_EQ(size_, other.size_); CHECK_EQ(size_, other.size_);
memory_ = other.memory_; memory_ = other.memory_;
meta_ = other.meta_; meta_ = other.meta_;
capacity_ = other.capacity_; capacity_ = other.capacity_;
} }
void Replace(const Tensor& other) { inline void Replace(const Tensor& other) {
memory_ = other.memory_; memory_ = other.memory_;
meta_ = other.meta_; meta_ = other.meta_;
capacity_ = other.capacity_; capacity_ = other.capacity_;
...@@ -182,23 +211,27 @@ class Tensor { ...@@ -182,23 +211,27 @@ class Tensor {
dims_ = other.dims_; dims_ = other.dims_;
} }
void Reset() { inline void Move(MixedMemory* mem) {
if (mem != nullptr) ex_memory_ = mem;
else ex_memory_ = new MixedMemory(TypeMeta::Make<float>(), 4);
own_mem_ = false;
}
inline void Reset() {
size_ = capacity_ = 0; size_ = capacity_ = 0;
meta_ = TypeMeta(); meta_ = TypeMeta();
dims_.clear(); dims_.clear();
memory_.reset(); memory_.reset();
} }
void Release() {
memory_.reset();
}
private: private:
vector<TIndex> dims_; vector<TIndex> dims_;
TIndex size_ = 0, capacity_ = 0; TIndex size_ = 0, capacity_ = 0;
TypeMeta meta_; TypeMeta meta_;
string name_; string name_;
shared_ptr<MixedMemory> memory_; shared_ptr<MixedMemory> memory_;
MixedMemory* ex_memory_ = nullptr;
bool is_corrupted_ = false, own_mem_ = true;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -13,23 +13,28 @@ ...@@ -13,23 +13,28 @@
namespace dragon { namespace dragon {
#define WORKSPACE_MIN_BUFFER_SIZE 3 #define WORKSPACE_COMMON_BUFFER_SIZE 2
#define WORKSPACE_MAX_BUFFER_SIZE 3 #define WORKSPACE_GRAD_BUFFER_SIZE 1
#define WORKSPACE_MAX_CORRUPTED_SIZE 2
class Workspace{ class Workspace{
public: public:
typedef Map<string, unique_ptr<Tensor> > TensorMap; typedef Map<string, unique_ptr<Tensor> > TensorMap;
typedef Map<string, stack<string> > BufferMap;
typedef Map<string, unique_ptr<mutex> > LockMap; typedef Map<string, unique_ptr<mutex> > LockMap;
typedef Map<string, unique_ptr<GraphBase> > GraphMap; typedef Map<string, unique_ptr<GraphBase> > GraphMap;
typedef Map<string, TensorFiller> FillerMap; typedef Map<string, TensorFiller> FillerMap;
typedef Map<string, string> RenameMap; typedef Map<string, string> RenameMap;
typedef Map<string, vector<OperatorBase*> > RecomputeMap;
Workspace(): root_folder_(".") { init(); } Workspace(): root_folder_(".") { init(); }
Workspace(string root_folder) : root_folder_(root_folder) { init(); } Workspace(string root_folder) : root_folder_(root_folder) { init(); }
~Workspace();
void init() { void init() {
CreateTensor("ignore"); CreateTensor("ignore");
for (int i = 0; i < WORKSPACE_MIN_BUFFER_SIZE; i++) CreateBuffer(); CreateBuffer("Common", WORKSPACE_COMMON_BUFFER_SIZE);
CreateBuffer("Grad", WORKSPACE_GRAD_BUFFER_SIZE);
} }
/******************** Tensor ********************/ /******************** Tensor ********************/
...@@ -101,33 +106,39 @@ class Workspace{ ...@@ -101,33 +106,39 @@ class Workspace{
/******************** Buffer ********************/ /******************** Buffer ********************/
inline Tensor* CreateBuffer() { inline void CreateBuffer(string category, int num) {
int buffer_idx = 1; CHECK(!buffer_map_.count(category));
string name; buffer_map_[category] = stack<string>();
while (1) { for (int i = 1; i <= num; i++) {
name = "_t_buffer_" + dragon_cast<string, int>(buffer_idx++); string name = "_t_" + category + "_buffer_" + dragon_cast<string, int>(i);
if (!HasTensor(name)) break; buffer_map_[category].push(name);
CreateTensor(name);
} }
buffer_stack_.push(name);
return CreateTensor(name);
} }
inline Tensor* GetBuffer() { inline Tensor* GetBuffer(string category = "Common") {
if (!buffer_stack_.empty()) { if (!buffer_map_[category].empty()) {
string name = buffer_stack_.top(); string name = buffer_map_[category].top();
buffer_stack_.pop(); buffer_map_[category].pop();
return GetTensor(name); return GetTensor(name);
} }
LOG(FATAL) << "buffers are not enough, add more if necessary."; LOG(FATAL) << "buffers of [" << category << "] "
<< "are not enough, add more if necessary.";
return nullptr; return nullptr;
} }
inline void ReleaseBuffer(Tensor* tensor, bool force_release=false) { inline void ReleaseBuffer(Tensor* tensor,
string category = "Common",
bool enforce = false) {
static Map<string, int> limits = {
{ "Common", WORKSPACE_COMMON_BUFFER_SIZE },
{ "Grad", WORKSPACE_GRAD_BUFFER_SIZE }};
if (buffer_map_[category].size() >= limits[category] || enforce) {
// release directly // release directly
if (buffer_stack_.size() >= WORKSPACE_MAX_BUFFER_SIZE || force_release) {
ReleaseTensor(tensor->name()); ReleaseTensor(tensor->name());
} else { // recover as a available buffer } else {
buffer_stack_.push(tensor->name()); // recover as a available buffer
buffer_map_[category].push(tensor->name());
} }
} }
...@@ -158,14 +169,30 @@ class Workspace{ ...@@ -158,14 +169,30 @@ class Workspace{
rename_map_[old_tensor] = new_tensor; rename_map_[old_tensor] = new_tensor;
} }
inline void AddRecompute(const string& tensor, OperatorBase* op) {
if (!recompute_map_.count(tensor)) {
recompute_map_[tensor] = vector<OperatorBase*>();
}
recompute_map_[tensor].push_back(op);
}
inline vector<OperatorBase*> GetRecompute(const string& tensor) {
if (recompute_map_.count(tensor)) {
return recompute_map_[tensor];
} else {
return vector<OperatorBase*>();
}
}
private: private:
TensorMap tensor_map_; TensorMap tensor_map_;
BufferMap buffer_map_;
LockMap lock_map_; LockMap lock_map_;
GraphMap graph_map_; GraphMap graph_map_;
FillerMap filler_map_; FillerMap filler_map_;
RenameMap rename_map_; RenameMap rename_map_;
RecomputeMap recompute_map_;
string root_folder_; string root_folder_;
stack<string> buffer_stack_;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -43,10 +43,11 @@ class DropoutGradientOp final : public Operator<Context> { ...@@ -43,10 +43,11 @@ class DropoutGradientOp final : public Operator<Context> {
threshold = static_cast<unsigned int>(UINT_MAX * prob); threshold = static_cast<unsigned int>(UINT_MAX * prob);
if (use_scale) scale = 1.0 / (1.0 - prob); if (use_scale) scale = 1.0 / (1.0 - prob);
else scale = 1.0; else scale = 1.0;
DISABLE_SHARE_GRADIENT;
} }
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override; void CleanResource() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -30,7 +30,9 @@ class ReluGradientOp : public Operator<Context> { ...@@ -30,7 +30,9 @@ class ReluGradientOp : public Operator<Context> {
public: public:
ReluGradientOp(const OperatorDef& op_def, Workspace* ws) ReluGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
slope(OperatorBase::GetSingleArg<float>("slope", 0.0)) {} slope(OperatorBase::GetSingleArg<float>("slope", 0.0)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -23,7 +23,10 @@ class SigmoidOp final : public Operator<Context> { ...@@ -23,7 +23,10 @@ class SigmoidOp final : public Operator<Context> {
template <class Context> template <class Context>
class SigmoidGradientOp final : public Operator<Context> { class SigmoidGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(SigmoidGradientOp); SigmoidGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -32,7 +32,9 @@ class SoftmaxGradientOp final : public Operator<Context> { ...@@ -32,7 +32,9 @@ class SoftmaxGradientOp final : public Operator<Context> {
public: public:
SoftmaxGradientOp(const OperatorDef& op_def, Workspace* ws) SoftmaxGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)) {} axis(OperatorBase::GetSingleArg<int>("axis", 1)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -23,7 +23,10 @@ class TanhOp final : public Operator<Context> { ...@@ -23,7 +23,10 @@ class TanhOp final : public Operator<Context> {
template <class Context> template <class Context>
class TanhGradientOp final : public Operator<Context> { class TanhGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(TanhGradientOp); TanhGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -29,9 +29,8 @@ class AddGradientOp final : public Operator<Context> { ...@@ -29,9 +29,8 @@ class AddGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(AddGradientOp); USE_SIMPLE_CTOR_DTOR(AddGradientOp);
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType(); template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type); template <typename T> void BroadcastRunWithType(int type);
......
...@@ -35,9 +35,7 @@ class BiasAddGradientOp final : public Operator<Context> { ...@@ -35,9 +35,7 @@ class BiasAddGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
data_format(OperatorBase::GetSingleArg<string>("data_format", "NCHW")) {} data_format(OperatorBase::GetSingleArg<string>("data_format", "NCHW")) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void NCHWRunWithType(); template <typename T> void NCHWRunWithType();
template <typename T> void NHWCRunWithType(); template <typename T> void NHWCRunWithType();
......
...@@ -33,9 +33,7 @@ class ClipGradientOp final : public Operator<Context> { ...@@ -33,9 +33,7 @@ class ClipGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(ClipGradientOp); USE_SIMPLE_CTOR_DTOR(ClipGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -29,9 +29,8 @@ class DivGradientOp final : public Operator<Context> { ...@@ -29,9 +29,8 @@ class DivGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(DivGradientOp); USE_SIMPLE_CTOR_DTOR(DivGradientOp);
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType(); template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type); template <typename T> void BroadcastRunWithType(int type);
......
...@@ -37,6 +37,7 @@ class DotGradientOp final : public Operator<Context> { ...@@ -37,6 +37,7 @@ class DotGradientOp final : public Operator<Context> {
transA(OperatorBase::GetSingleArg<bool>("TransA", false)), transA(OperatorBase::GetSingleArg<bool>("TransA", false)),
transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {} transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {}
void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void DotRunWithType(); template <typename T> void DotRunWithType();
template <typename T> void GemmRunWithType(); template <typename T> void GemmRunWithType();
......
...@@ -48,9 +48,8 @@ class EltwiseGradientOp final : public Operator<Context> { ...@@ -48,9 +48,8 @@ class EltwiseGradientOp final : public Operator<Context> {
} else coeffs.resize(InputSize(), float(1)); } else coeffs.resize(InputSize(), float(1));
} }
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void SumRunWithType(); template <typename T> void SumRunWithType();
template <typename T> void ProdRunWithType(); template <typename T> void ProdRunWithType();
......
...@@ -25,9 +25,7 @@ class ExpGradientOp final : public Operator<Context> { ...@@ -25,9 +25,7 @@ class ExpGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(ExpGradientOp); USE_SIMPLE_CTOR_DTOR(ExpGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -33,9 +33,7 @@ class GramMatrixGradientOp final : public Operator<Context> { ...@@ -33,9 +33,7 @@ class GramMatrixGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)) {} axis(OperatorBase::GetSingleArg<int>("axis", 1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -39,9 +39,7 @@ class InnerProductGradientOp final : public Operator<Context> { ...@@ -39,9 +39,7 @@ class InnerProductGradientOp final : public Operator<Context> {
num_output(OperatorBase::GetSingleArg<int>("num_output", 0)), num_output(OperatorBase::GetSingleArg<int>("num_output", 0)),
transW(OperatorBase::GetSingleArg<bool>("TransW", true)) {} transW(OperatorBase::GetSingleArg<bool>("TransW", true)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -25,9 +25,7 @@ class LogGradientOp final : public Operator<Context> { ...@@ -25,9 +25,7 @@ class LogGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(LogGradientOp); USE_SIMPLE_CTOR_DTOR(LogGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -36,9 +36,8 @@ class MatmulGradientOp final : public Operator<Context> { ...@@ -36,9 +36,8 @@ class MatmulGradientOp final : public Operator<Context> {
transA(OperatorBase::GetSingleArg<bool>("TransA", false)), transA(OperatorBase::GetSingleArg<bool>("TransA", false)),
transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {} transB(OperatorBase::GetSingleArg<bool>("TransB", false)) {}
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -29,9 +29,8 @@ class MulGradientOp final : public Operator<Context> { ...@@ -29,9 +29,8 @@ class MulGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(MulGradientOp); USE_SIMPLE_CTOR_DTOR(MulGradientOp);
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType(); template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type); template <typename T> void BroadcastRunWithType(int type);
......
...@@ -40,9 +40,7 @@ class PowGradientOp final : public Operator<Context> { ...@@ -40,9 +40,7 @@ class PowGradientOp final : public Operator<Context> {
power_scale = power * scale; power_scale = power * scale;
} }
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -35,9 +35,7 @@ class ScaleGradientOp final : public Operator<Context> { ...@@ -35,9 +35,7 @@ class ScaleGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {} num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void BiasRunWithType(); template <typename T> void BiasRunWithType();
template <typename T> void ScaleRunWithType(); template <typename T> void ScaleRunWithType();
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -25,9 +25,7 @@ class SquareGradientOp final : public Operator<Context> { ...@@ -25,9 +25,7 @@ class SquareGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(SquareGradientOp); USE_SIMPLE_CTOR_DTOR(SquareGradientOp);
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -29,9 +29,8 @@ class SubGradientOp final : public Operator<Context> { ...@@ -29,9 +29,8 @@ class SubGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(SubGradientOp); USE_SIMPLE_CTOR_DTOR(SubGradientOp);
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void EltwiseRunWithType(); template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType(int type); template <typename T> void BroadcastRunWithType(int type);
......
...@@ -34,9 +34,7 @@ class AtGradientOp final : public Operator<Context> { ...@@ -34,9 +34,7 @@ class AtGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 0)), axis(OperatorBase::GetSingleArg<int>("axis", 0)),
acc_grad(OperatorBase::GetSingleArg<bool>("acc_gradient", false)) {} acc_grad(OperatorBase::GetSingleArg<bool>("acc_gradient", false)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -36,9 +36,8 @@ class ConcatGradientOp : public Operator<Context> { ...@@ -36,9 +36,8 @@ class ConcatGradientOp : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
nin(OperatorBase::GetSingleArg<int>("num_input", 1)) {} nin(OperatorBase::GetSingleArg<int>("num_input", 1)) {}
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -57,9 +57,7 @@ class CropGradientOp final : public Operator<Context > { ...@@ -57,9 +57,7 @@ class CropGradientOp final : public Operator<Context > {
} }
void ComputeOutputShape(); void ComputeOutputShape();
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
template <typename T> void RecursiveRunWithType(vector<TIndex> idxs, template <typename T> void RecursiveRunWithType(vector<TIndex> idxs,
const vector<TIndex>& offsets, const vector<TIndex>& offsets,
......
...@@ -27,7 +27,10 @@ class ExpandDimsOp final : public Operator<Context> { ...@@ -27,7 +27,10 @@ class ExpandDimsOp final : public Operator<Context> {
template <class Context> template <class Context>
class ExpandDimsGradientOp final : public Operator<Context> { class ExpandDimsGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(ExpandDimsGradientOp); ExpandDimsGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
}; };
......
...@@ -28,7 +28,10 @@ class FlattenOp final : public Operator<Context> { ...@@ -28,7 +28,10 @@ class FlattenOp final : public Operator<Context> {
template <class Context> template <class Context>
class FlattenGradientOp final : public Operator<Context> { class FlattenGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(FlattenGradientOp); FlattenGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
}; };
......
...@@ -43,7 +43,9 @@ template <class Context> ...@@ -43,7 +43,9 @@ template <class Context>
class TemplateGradientOp : public TemplateOp<Context> { class TemplateGradientOp : public TemplateOp<Context> {
public: public:
TemplateGradientOp(const OperatorDef& op_def, Workspace* ws) TemplateGradientOp(const OperatorDef& op_def, Workspace* ws)
: TemplateOp<Context>(op_def, ws) {} : TemplateOp<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
}; };
......
...@@ -39,9 +39,7 @@ class ReduceGradientOp final : public Operator<Context> { ...@@ -39,9 +39,7 @@ class ReduceGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", -1)), axis(OperatorBase::GetSingleArg<int>("axis", -1)),
operation(OperatorBase::GetSingleArg<string>("operation", "NONE")) {} operation(OperatorBase::GetSingleArg<string>("operation", "NONE")) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void SumRunWithType(); template <typename T> void SumRunWithType();
template <typename T> void MeanRunWithType(); template <typename T> void MeanRunWithType();
......
...@@ -30,7 +30,10 @@ class ReshapeOp final : public Operator<Context> { ...@@ -30,7 +30,10 @@ class ReshapeOp final : public Operator<Context> {
template <class Context> template <class Context>
class ReshapeGradientOp final : public Operator<Context> { class ReshapeGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(ReshapeGradientOp); ReshapeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
}; };
......
...@@ -61,6 +61,8 @@ class ScanGradientOp final: public Operator<Context> { ...@@ -61,6 +61,8 @@ class ScanGradientOp final: public Operator<Context> {
// handle GI(x) // handle GI(x)
for (int i = 0; i < forward_inputs.size(); i++) for (int i = 0; i < forward_inputs.size(); i++)
terms[forward_inputs[i] + "_grad"] = output(i)->name(); terms[forward_inputs[i] + "_grad"] = output(i)->name();
DISABLE_SHARE_GRADIENT;
} }
void RunOnDevice() override; void RunOnDevice() override;
......
...@@ -35,7 +35,9 @@ class SliceGradientOp final : public Operator<Context> { ...@@ -35,7 +35,9 @@ class SliceGradientOp final : public Operator<Context> {
SliceGradientOp(const OperatorDef& op_def, Workspace* ws): SliceGradientOp(const OperatorDef& op_def, Workspace* ws):
Operator<Context>(op_def, ws), Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
nout(OperatorBase::GetSingleArg<int>("num_output", 1)) {} nout(OperatorBase::GetSingleArg<int>("num_output", 1)) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -43,9 +43,7 @@ class TileGradientOp : public Operator<Context> { ...@@ -43,9 +43,7 @@ class TileGradientOp : public Operator<Context> {
process_axes.push_back({ i, multiples[i] }); process_axes.push_back({ i, multiples[i] });
} }
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template<typename T> void TileRunWithType(); template<typename T> void TileRunWithType();
protected: protected:
......
...@@ -33,9 +33,7 @@ class TransposeGradientOp final : public Operator<Context> { ...@@ -33,9 +33,7 @@ class TransposeGradientOp final : public Operator<Context> {
TransposeGradientOp(const OperatorDef& op_def, Workspace* ws) TransposeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {} : Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -16,14 +16,12 @@ class L1LossOp : public Operator<Context> { ...@@ -16,14 +16,12 @@ class L1LossOp : public Operator<Context> {
public: public:
L1LossOp(const OperatorDef& op_def, Workspace* ws) L1LossOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float coeff;
Tensor* diff; Tensor* diff;
string normalization; string normalization;
}; };
...@@ -33,14 +31,13 @@ class L1LossGradientOp final : public Operator<Context> { ...@@ -33,14 +31,13 @@ class L1LossGradientOp final : public Operator<Context> {
public: public:
L1LossGradientOp(const OperatorDef& op_def, Workspace* ws) L1LossGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float coeff;
Tensor* diff; Tensor* diff;
string normalization; string normalization;
}; };
......
...@@ -16,14 +16,12 @@ class L2LossOp : public Operator<Context> { ...@@ -16,14 +16,12 @@ class L2LossOp : public Operator<Context> {
public: public:
L2LossOp(const OperatorDef& op_def, Workspace* ws) L2LossOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float coeff;
Tensor* diff; Tensor* diff;
string normalization; string normalization;
}; };
...@@ -33,14 +31,13 @@ class L2LossGradientOp final : public Operator<Context> { ...@@ -33,14 +31,13 @@ class L2LossGradientOp final : public Operator<Context> {
public: public:
L2LossGradientOp(const OperatorDef& op_def, Workspace* ws) L2LossGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
coeff(OperatorBase::GetSingleArg<float>("coeff", 1.0)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "BATCH_SIZE")) {}
void ShareGradient() override;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float coeff;
Tensor* diff; Tensor* diff;
string normalization; string normalization;
}; };
......
...@@ -4,19 +4,20 @@ ...@@ -4,19 +4,20 @@
// Written by Ting Pan // Written by Ting Pan
// -------------------------------------------------------- // --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_ #ifndef DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_ #define DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
#include "core/operator.h" #include "core/operator.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
class SigmoidCrossEntropyLossOp final : public Operator<Context> { class SigmoidCrossEntropyOp final : public Operator<Context> {
public: public:
SigmoidCrossEntropyLossOp(const OperatorDef& op_def, Workspace* ws) SigmoidCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
...@@ -27,9 +28,9 @@ class SigmoidCrossEntropyLossOp final : public Operator<Context> { ...@@ -27,9 +28,9 @@ class SigmoidCrossEntropyLossOp final : public Operator<Context> {
}; };
template <class Context> template <class Context>
class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> { class SigmoidCrossEntropyGradientOp final : public Operator<Context> {
public: public:
SigmoidCrossEntropyLossGradientOp(const OperatorDef& op_def, Workspace* ws) SigmoidCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
...@@ -43,4 +44,4 @@ class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> { ...@@ -43,4 +44,4 @@ class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> {
} // namespace dragon } // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_LOSS_OP_H_ #endif // DRAGON_OPERATORS_LOSS_SIGMOID_CROSS_ENTROPY_OP_H_
\ No newline at end of file \ No newline at end of file
...@@ -4,17 +4,17 @@ ...@@ -4,17 +4,17 @@
// Written by Ting Pan // Written by Ting Pan
// -------------------------------------------------------- // --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_ #ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_ #define DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
#include "core/operator.h" #include "core/operator.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
class SoftmaxCrossEntropyLossOp final : public Operator<Context> { class SoftmaxCrossEntropyOp final : public Operator<Context> {
public: public:
SoftmaxCrossEntropyLossOp(const OperatorDef& op_def, Workspace* ws) SoftmaxCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) { normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {
...@@ -39,9 +39,9 @@ class SoftmaxCrossEntropyLossOp final : public Operator<Context> { ...@@ -39,9 +39,9 @@ class SoftmaxCrossEntropyLossOp final : public Operator<Context> {
}; };
template <class Context> template <class Context>
class SoftmaxCrossEntropyLossGradientOp final : public Operator<Context> { class SoftmaxCrossEntropyGradientOp final : public Operator<Context> {
public: public:
SoftmaxCrossEntropyLossGradientOp(const OperatorDef& op_def, Workspace* ws) SoftmaxCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {} normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {}
...@@ -57,4 +57,4 @@ class SoftmaxCrossEntropyLossGradientOp final : public Operator<Context> { ...@@ -57,4 +57,4 @@ class SoftmaxCrossEntropyLossGradientOp final : public Operator<Context> {
} // namespace dragon } // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_LOSS_OP_H_ #endif // DRAGON_OPERATORS_LOSS_SOFTMAX_CROSS_ENTROPY_OP_H_
\ No newline at end of file \ No newline at end of file
...@@ -4,17 +4,17 @@ ...@@ -4,17 +4,17 @@
// Written by Ting Pan // Written by Ting Pan
// -------------------------------------------------------- // --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_ #ifndef DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
#define DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_ #define DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
#include "core/operator.h" #include "core/operator.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
class SoftmaxLossOp final : public Operator<Context> { class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
public: public:
SoftmaxLossOp(const OperatorDef& op_def, Workspace* ws) SparseSoftmaxCrossEntropyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) { normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) {
...@@ -45,9 +45,9 @@ class SoftmaxLossOp final : public Operator<Context> { ...@@ -45,9 +45,9 @@ class SoftmaxLossOp final : public Operator<Context> {
}; };
template <class Context> template <class Context>
class SoftmaxLossGradientOp final : public Operator<Context> { class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {
public: public:
SoftmaxLossGradientOp(const OperatorDef& op_def, Workspace* ws) SparseSoftmaxCrossEntropyGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)), axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) { normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")) {
...@@ -71,4 +71,4 @@ class SoftmaxLossGradientOp final : public Operator<Context> { ...@@ -71,4 +71,4 @@ class SoftmaxLossGradientOp final : public Operator<Context> {
} // namespace dragon } // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SOFTMAX_LOSS_OP_H_ #endif // DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_CROSS_ENTROPY_OP_H_
\ No newline at end of file \ No newline at end of file
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
#include "operators/loss/sparse_softmax_cross_entropy_op.h"
namespace dragon {
template <class Context>
class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Context> {
public:
SparseSoftmaxFocalLossOp(const OperatorDef& op_def, Workspace* ws)
: SparseSoftmaxCrossEntropyOp<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")),
alpha(OperatorBase::GetSingleArg<float>("alpha", 1.0)),
gamma(OperatorBase::GetSingleArg<float>("gamma", 2.0)),
use_pseudo_metric(OperatorBase::GetSingleArg<bool>("use_pseudo_metric", true)) {
if (alpha == 1.0) use_pseudo_metric = false;
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float alpha, gamma;
bool use_pseudo_metric;
TIndex axis, outer_dim, inner_dim;
Tensor* scale;
string normalization;
};
template <class Context>
class SparseSoftmaxFocalLossGradientOp final : public SparseSoftmaxCrossEntropyGradientOp<Context> {
public:
SparseSoftmaxFocalLossGradientOp(const OperatorDef& op_def, Workspace* ws)
: SparseSoftmaxCrossEntropyGradientOp<Context>(op_def, ws),
axis(OperatorBase::GetSingleArg<int>("axis", 1)),
normalization(OperatorBase::GetSingleArg<string>("normalization", "VALID")),
gamma(OperatorBase::GetSingleArg<float>("gamma", 2.0)),
eps(OperatorBase::GetSingleArg<float>("eps", float(1e-10))) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float gamma, eps;
TIndex axis, outer_dim, inner_dim;
Tensor* scale;
string normalization;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_SPARSE_SOFTMAX_FOCAL_LOSS_OP_H_
\ No newline at end of file
...@@ -27,7 +27,9 @@ template <class Context> ...@@ -27,7 +27,9 @@ template <class Context>
class MPIBroadcastGradientOp final : public ModelMPIBase<Context> { class MPIBroadcastGradientOp final : public ModelMPIBase<Context> {
public: public:
MPIBroadcastGradientOp(const OperatorDef& op_def, Workspace* ws) MPIBroadcastGradientOp(const OperatorDef& op_def, Workspace* ws)
: ModelMPIBase<Context>(op_def, ws) {} : ModelMPIBase<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -27,7 +27,9 @@ template <class Context> ...@@ -27,7 +27,9 @@ template <class Context>
class MPIGatherGradientOp final : public ModelMPIBase<Context> { class MPIGatherGradientOp final : public ModelMPIBase<Context> {
public: public:
MPIGatherGradientOp(const OperatorDef& op_def, Workspace *ws) MPIGatherGradientOp(const OperatorDef& op_def, Workspace *ws)
: ModelMPIBase<Context>(op_def, ws) {} : ModelMPIBase<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -30,7 +30,7 @@ class BatchNormOp : public Operator<Context> { ...@@ -30,7 +30,7 @@ class BatchNormOp : public Operator<Context> {
Tensor* num_multiplier, *spatial_multiplier, *stddev, *var; Tensor* num_multiplier, *spatial_multiplier, *stddev, *var;
TIndex num, channels, spatial_dim, nbychans; TIndex num, channels, spatial_dim, nbychans;
int use_stats; int use_stats;
bool use_global_stats, inplace; bool use_global_stats, inplace, is_recomputing;
}; };
template <class Context> template <class Context>
...@@ -40,9 +40,7 @@ class BatchNormGradientOp final : public Operator<Context> { ...@@ -40,9 +40,7 @@ class BatchNormGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {} use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
...@@ -68,7 +66,7 @@ class BNOp : public Operator<Context> { ...@@ -68,7 +66,7 @@ class BNOp : public Operator<Context> {
protected: protected:
float momentum, eps; float momentum, eps;
int use_stats; int use_stats;
bool use_global_stats; bool use_global_stats, is_recomputing;
}; };
template <class Context> template <class Context>
...@@ -79,9 +77,8 @@ class BNGradientOp : public Operator<Context> { ...@@ -79,9 +77,8 @@ class BNGradientOp : public Operator<Context> {
eps(OperatorBase::GetSingleArg<float>("eps", float(1e-3))), eps(OperatorBase::GetSingleArg<float>("eps", float(1e-3))),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) { } use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) { }
void ShareBeforeRun() override; void ShareGradient() override;
void RunOnDevice() override { NOT_IMPLEMENTED; } void RunOnDevice() override { NOT_IMPLEMENTED; }
void ClearAfterRun() override;
template <typename T> void RunWithType() { NOT_IMPLEMENTED; } template <typename T> void RunWithType() { NOT_IMPLEMENTED; }
protected: protected:
...@@ -115,7 +112,7 @@ class CuDNNBNOp final : public BNOp<Context> { ...@@ -115,7 +112,7 @@ class CuDNNBNOp final : public BNOp<Context> {
cudnnTensorDescriptor_t input_desc, output_desc, bn_desc; cudnnTensorDescriptor_t input_desc, output_desc, bn_desc;
TIndex num, channels, spatial_dim; TIndex num, channels, spatial_dim;
Tensor* mean, *var; Tensor* mean, *var;
bool use_global_stats; bool use_global_stats, is_recomputing;
}; };
template <class Context> template <class Context>
......
...@@ -36,7 +36,7 @@ class BatchRenormOp : public Operator<Context> { ...@@ -36,7 +36,7 @@ class BatchRenormOp : public Operator<Context> {
Tensor* stddev, *r, *var, *x_norm; Tensor* stddev, *r, *var, *x_norm;
TIndex num, channels, spatial_dim, nbychans; TIndex num, channels, spatial_dim, nbychans;
int use_stats; int use_stats;
bool use_global_stats, inplace; bool use_global_stats, inplace, is_recomputing;
}; };
template <class Context> template <class Context>
...@@ -46,9 +46,7 @@ class BatchRenormGradientOp final : public Operator<Context> { ...@@ -46,9 +46,7 @@ class BatchRenormGradientOp final : public Operator<Context> {
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {} use_stats(OperatorBase::GetSingleArg<int>("use_stats", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -36,9 +36,7 @@ class InstanceNormGradientOp final : public Operator<Context> { ...@@ -36,9 +36,7 @@ class InstanceNormGradientOp final : public Operator<Context> {
InstanceNormGradientOp(const OperatorDef& op_def, Workspace *ws) InstanceNormGradientOp(const OperatorDef& op_def, Workspace *ws)
: Operator<Context>(op_def, ws) {} : Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -40,9 +40,7 @@ class L2NormGradientOp final : public Operator<Context> { ...@@ -40,9 +40,7 @@ class L2NormGradientOp final : public Operator<Context> {
axis(OperatorBase::GetSingleArg<int>("axis", 0)), axis(OperatorBase::GetSingleArg<int>("axis", 0)),
num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {} num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -30,7 +30,10 @@ class LSTMUnitOp : public Operator<Context> { ...@@ -30,7 +30,10 @@ class LSTMUnitOp : public Operator<Context> {
template <class Context> template <class Context>
class LSTMUnitGradientOp : public Operator<Context> { class LSTMUnitGradientOp : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(LSTMUnitGradientOp); LSTMUnitGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
this->allow_share_grads_ = false;
}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -4,23 +4,14 @@ ...@@ -4,23 +4,14 @@
// Written by Ting Pan // Written by Ting Pan
// -------------------------------------------------------- // --------------------------------------------------------
#ifndef DRAGON_OPERATORS_COMMON_UTILS_OP_H_ #ifndef DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
#define DRAGON_OPERATORS_COMMON_UTILS_OP_H_ #define DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
#include "core/operator.h" #include "core/operator.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
class CopyOp final: public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(CopyOp);
void RunOnDevice() override;
template <typename T> void RunWithType();
};
template <class Context>
class AccuracyOp final: public Operator<Context> { class AccuracyOp final: public Operator<Context> {
public: public:
AccuracyOp(const OperatorDef& op_def, Workspace* ws) AccuracyOp(const OperatorDef& op_def, Workspace* ws)
...@@ -42,22 +33,6 @@ class AccuracyOp final: public Operator<Context> { ...@@ -42,22 +33,6 @@ class AccuracyOp final: public Operator<Context> {
Tensor ignore_labels; Tensor ignore_labels;
}; };
template <class Context>
class OneHotOp final : public Operator < Context > {
public:
OneHotOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
depth(OperatorBase::GetSingleArg<int>("depth", -1)),
on_value(OperatorBase::GetSingleArg<int>("on_value", 1)),
off_value(OperatorBase::GetSingleArg<int>("off_value", 0)) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex depth, on_value, off_value;
};
} // namespace dragon } // namespace dragon
#endif // DRAGON_OPERATORS_COMMON_UTILS_OP_H_ #endif // DRAGON_OPERATORS_UTILS_ACCURACY_OP_H_
\ No newline at end of file \ No newline at end of file
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_UTILS_COPY_OP_H_
#define DRAGON_OPERATORS_UTILS_COPY_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class CopyOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(CopyOp);
void RunOnDevice() override;
template <typename T> void RunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_UTILS_COPY_OP_H_
\ No newline at end of file
...@@ -19,6 +19,7 @@ class GradientGenerateOp final: public Operator<Context> { ...@@ -19,6 +19,7 @@ class GradientGenerateOp final: public Operator<Context> {
defaults(OperatorBase::GetRepeatedArg<float>("defaults")) { defaults(OperatorBase::GetRepeatedArg<float>("defaults")) {
CHECK_EQ(InputSize(), OutputSize()); CHECK_EQ(InputSize(), OutputSize());
CHECK_EQ(defaults.size(), OutputSize()); CHECK_EQ(defaults.size(), OutputSize());
DISABLE_SHARE_GRADIENT;
} }
void RunOnDevice() override; void RunOnDevice() override;
...@@ -35,6 +36,7 @@ class GradientGatherOp final : public Operator<Context> { ...@@ -35,6 +36,7 @@ class GradientGatherOp final : public Operator<Context> {
: Operator<Context>(op_def, ws) { : Operator<Context>(op_def, ws) {
for (int i = 0; i < InputSize(); i++) for (int i = 0; i < InputSize(); i++)
if (input(i).name() != "ignore") indices.push_back(i); if (input(i).name() != "ignore") indices.push_back(i);
DISABLE_SHARE_GRADIENT;
} }
void RunOnDevice() override; void RunOnDevice() override;
...@@ -47,7 +49,11 @@ class GradientGatherOp final : public Operator<Context> { ...@@ -47,7 +49,11 @@ class GradientGatherOp final : public Operator<Context> {
template <class Context> template <class Context>
class StopGradientOp final : public Operator<Context> { class StopGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(StopGradientOp); StopGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {
DISABLE_SHARE_GRADIENT;
}
void RunOnDevice() override; void RunOnDevice() override;
}; };
......
// --------------------------------------------------------
// Dragon
// Copyright(c) 2017 SeetaTech
// Written by Ting Pan
// --------------------------------------------------------
#ifndef DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
#define DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class OneHotOp final : public Operator < Context > {
public:
OneHotOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
depth(OperatorBase::GetSingleArg<int>("depth", -1)),
on_value(OperatorBase::GetSingleArg<int>("on_value", 1)),
off_value(OperatorBase::GetSingleArg<int>("off_value", 0)) {}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex depth, on_value, off_value;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_UTILS_ONE_HOT_OP_H_
\ No newline at end of file
...@@ -30,9 +30,7 @@ class ConvGradientOp : public ConvOp<Context> { ...@@ -30,9 +30,7 @@ class ConvGradientOp : public ConvOp<Context> {
ConvGradientOp(const OperatorDef& def, Workspace* ws) ConvGradientOp(const OperatorDef& def, Workspace* ws)
: ConvOp<Context>(def, ws) {} : ConvOp<Context>(def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -31,9 +31,7 @@ class DeConvGradientOp : public DeConvOp<Context> { ...@@ -31,9 +31,7 @@ class DeConvGradientOp : public DeConvOp<Context> {
DeConvGradientOp(const OperatorDef& def, Workspace* ws) : DeConvGradientOp(const OperatorDef& def, Workspace* ws) :
DeConvOp<Context>(def, ws) {} DeConvOp<Context>(def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -15,21 +15,21 @@ template <class Context> ...@@ -15,21 +15,21 @@ template <class Context>
class DenseConcatOp final : public ConcatOp<Context> { class DenseConcatOp final : public ConcatOp<Context> {
public: public:
DenseConcatOp(const OperatorDef& op_def, Workspace* ws) DenseConcatOp(const OperatorDef& op_def, Workspace* ws)
: ConcatOp<Context>(op_def, ws) { } : ConcatOp<Context>(op_def, ws) {}
void RunOnDevice() override;
}; };
template <class Context> template <class Context>
class DenseConcatGradientOp : public ConcatGradientOp<Context> { class DenseConcatGradientOp : public ConcatGradientOp<Context> {
public: public:
DenseConcatGradientOp(const OperatorDef& op_def, Workspace* ws) DenseConcatGradientOp(const OperatorDef& op_def, Workspace* ws)
: ConcatGradientOp<Context>(op_def, ws) {} : ConcatGradientOp<Context>(op_def, ws),
growth_rate(OperatorBase::GetSingleArg<int>("growth_rate", 0)) {}
void ElimateCorruption() override;
template <typename T> void RestoreX1();
void ShareBeforeRun() override; protected:
void RunOnDevice() override; TIndex growth_rate;
void ClearAfterRun() override;
template <typename T> void RunWithType();
}; };
......
...@@ -35,9 +35,7 @@ class NNResizeGradientOp : public Operator<Context> { ...@@ -35,9 +35,7 @@ class NNResizeGradientOp : public Operator<Context> {
NNResizeGradientOp(const OperatorDef& op_def, Workspace* ws) NNResizeGradientOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws) {} : Operator<Context>(op_def, ws) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
}; };
......
...@@ -69,9 +69,7 @@ class PoolingGradientOp: public Operator<Context> { ...@@ -69,9 +69,7 @@ class PoolingGradientOp: public Operator<Context> {
} }
void Reshape(); void Reshape();
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override;
template <typename T> void MaxRunWithType(); template <typename T> void MaxRunWithType();
template <typename T> void AvgRunWithType(); template <typename T> void AvgRunWithType();
......
...@@ -44,9 +44,8 @@ class ROIAlignGradientOp : public Operator<Context> { ...@@ -44,9 +44,8 @@ class ROIAlignGradientOp : public Operator<Context> {
CHECK_GT(pool_w, 0) << "\npool_w must > 0"; CHECK_GT(pool_w, 0) << "\npool_w must > 0";
} }
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override; void CleanResource() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -41,9 +41,8 @@ class ROIPoolingGradientOp final : public Operator<Context> { ...@@ -41,9 +41,8 @@ class ROIPoolingGradientOp final : public Operator<Context> {
pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)), pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)),
spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)) {} spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)) {}
void ShareBeforeRun() override;
void RunOnDevice() override; void RunOnDevice() override;
void ClearAfterRun() override; void CleanResource() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
......
...@@ -286,21 +286,12 @@ void TransposeGrad(const int count, ...@@ -286,21 +286,12 @@ void TransposeGrad(const int count,
const T* dy, const T* dy,
T* dx); T* dx);
/******************** common.utils ********************/
template <typename T, class Context>
void OneHot(const int count,
const int depth,
const int on_value,
const T* x,
T* y);
/******************** loss.l1_loss ********************/ /******************** loss.l1_loss ********************/
template <typename T, class Context> template <typename T, class Context>
void AbsGrad(const int count, const T* dy, T* dx); void AbsGrad(const int count, const T* dy, T* dx);
/******************** loss.sigmoid_cross_entropy_loss ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <typename T, class Context> template <typename T, class Context>
void SigmoidCrossEntropy(const int count, const T* x, const T* target, T* loss); void SigmoidCrossEntropy(const int count, const T* x, const T* target, T* loss);
...@@ -313,12 +304,12 @@ void SmoothL1(const int count, const float sigma2, const T* x, T* y); ...@@ -313,12 +304,12 @@ void SmoothL1(const int count, const float sigma2, const T* x, T* y);
template <typename T, class Context> template <typename T, class Context>
void SmoothL1Grad(const int count, const float sigma2, const T* dy, T* dx); void SmoothL1Grad(const int count, const float sigma2, const T* dy, T* dx);
/******************** loss.softmax_cross_entropy_loss ********************/ /******************** loss.softmax_cross_entropy ********************/
template <typename T, class Context> template <typename T, class Context>
void SoftmaxCrossEntropy(const int count, const T* prob, const T* target, T* loss); void SoftmaxCrossEntropy(const int count, const T* prob, const T* target, T* loss);
/******************** loss.softmax_loss ********************/ /******************** loss.sparse_softmax_cross_entropy ********************/
template <typename T, class Context> template <typename T, class Context>
void SparseSoftmaxCrossEntropy(const int count, void SparseSoftmaxCrossEntropy(const int count,
...@@ -332,12 +323,42 @@ void SparseSoftmaxCrossEntropy(const int count, ...@@ -332,12 +323,42 @@ void SparseSoftmaxCrossEntropy(const int count,
Tensor* ignore); Tensor* ignore);
template <typename T, class Context> template <typename T, class Context>
void SoftmaxLossGrad(const int count, void SparseSoftmaxCrossEntropyGrad(const int count,
const int classes, const int classes,
const int outer_dim, const int outer_dim,
const int inner_dim, const int inner_dim,
const T* prob,
const T* labels, const T* labels,
T* valid,
Tensor* ignore,
T* dXdata);
/******************** loss.sparse_softmax_focal_loss ********************/
template <typename T, class Context>
void SparseSoftmaxFocalLoss(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const T* prob, const T* prob,
const T* labels,
T* scale,
T* loss,
T* valid,
Tensor* ignore);
template <typename T, class Context>
void SparseSoftmaxFocalLossGrad(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const T* scale,
const T* prob,
const T* labels,
T* valid, T* valid,
Tensor* ignore, Tensor* ignore,
T* dXdata); T* dXdata);
...@@ -422,6 +443,15 @@ void MemoryData(const int count, ...@@ -422,6 +443,15 @@ void MemoryData(const int count,
const Tx* x, const Tx* x,
Ty* y); Ty* y);
/******************** utils.one_hot ********************/
template <typename T, class Context>
void OneHot(const int count,
const int depth,
const int on_value,
const T* x,
T* y);
/******************** vision.conv ********************/ /******************** vision.conv ********************/
template <typename T, class Context> template <typename T, class Context>
......
...@@ -20,7 +20,11 @@ option['device'] = 'CPU' ...@@ -20,7 +20,11 @@ option['device'] = 'CPU'
option['gpu_id'] = 0 option['gpu_id'] = 0
option['use_cudnn'] = False option['use_cudnn'] = False
option['random_seed'] = 3 option['random_seed'] = 3
option['debug_mode'] = True
# if True, disable Dragon-Memonger
option['debug_mode'] = False
option['share_grads'] = False # set it by Dragon-Memonger
option['allow_mirrow_stage'] = True # default
def EnableCPU(): def EnableCPU():
global option global option
...@@ -32,8 +36,8 @@ def EnableCUDA(gpu_id=0, use_cudnn=True): ...@@ -32,8 +36,8 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
option['gpu_id'] = gpu_id option['gpu_id'] = gpu_id
option['use_cudnn'] = use_cudnn option['use_cudnn'] = use_cudnn
# TODO(Pan): please not use @setter # TODO(PhyscalX): please not use @setter
# TODO(Pan): seems that it can't change the global value # TODO(PhyscalX): seems that it can't change the global value
def SetRandomSeed(seed): def SetRandomSeed(seed):
global option global option
......
...@@ -176,6 +176,6 @@ def Restore(filename, format=0): ...@@ -176,6 +176,6 @@ def Restore(filename, format=0):
FeedTensor(key, ndarray) FeedTensor(key, ndarray)
elif format is 1: elif format is 1:
# TODO(pan): caffemodel can't save the tensor name # TODO(PhyscalX): caffemodel can't save the tensor name
# TODO(pan): we simply use 'Scope + LayerName + @paramX' # TODO(PhyscalX): we simply use 'Scope + LayerName + @paramX'
RestoreCC(filename, '', format) RestoreCC(filename, '', format)
\ No newline at end of file
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
def share_grads(enabled=True):
from dragon.config import option
option['share_grads'] = enabled
def drop(op_func, *args, **kwargs):
kwargs['mirrow_stage'] = True
return op_func(*args, **kwargs)
\ No newline at end of file
...@@ -4,12 +4,12 @@ ...@@ -4,12 +4,12 @@
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import print_function
import numpy as np import numpy as np
import dragon.core.workspace as ws import dragon.core.workspace as ws
import dragon.ops as ops import dragon.ops as ops
import dragon.vm.theano as theano import dragon.vm.theano as theano
from multiprocessing import Process, Queue from multiprocessing import Process, Queue
from dragon.config import logger
""" How to custom a RunOp in Dragon """ """ How to custom a RunOp in Dragon """
...@@ -32,7 +32,7 @@ class Fetcher(Process): ...@@ -32,7 +32,7 @@ class Fetcher(Process):
self.daemon = True self.daemon = True
def cleanup(): def cleanup():
logger.info('Terminating Fetcher......') print('Terminating Fetcher......')
self.terminate() self.terminate()
self.join() self.join()
...@@ -104,4 +104,4 @@ if __name__ == '__main__': ...@@ -104,4 +104,4 @@ if __name__ == '__main__':
foo() foo()
# fetch # fetch
logger.info('y \n-------------- \n', y.get_value(), '\n') print('y \n-------------- \n', y.get_value(), '\n')
\ No newline at end of file \ No newline at end of file
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import print_function
import numpy as np import numpy as np
import dragon.core.workspace as ws import dragon.core.workspace as ws
import dragon.ops as ops import dragon.ops as ops
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor
import dragon.vm.theano.tensor as T import dragon.vm.theano.tensor as T
import dragon.vm.theano as theano import dragon.vm.theano as theano
from dragon.config import logger
""" How to custom a TemplateOp in Dragon """ """ How to custom a TemplateOp in Dragon """
...@@ -91,14 +91,14 @@ if __name__ == '__main__': ...@@ -91,14 +91,14 @@ if __name__ == '__main__':
foo = theano.function(outputs=y) foo = theano.function(outputs=y)
# feed # feed
ws.FeedTensor(x1, np.ones((5, 3))) ws.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
ws.FeedTensor(x2, np.ones((5, 3)) * 5.0) ws.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
# run # run
foo() foo()
# fetch # fetch
logger.info('y \n-------------- \n', y.get_value(), '\n') print('y \n-------------- \n', y.get_value(), '\n')
logger.info('dx1 \n-------------- \n', dx1.get_value(), '\n') print('dx1 \n-------------- \n', dx1.get_value(), '\n')
logger.info('dx2 \n-------------- \n', dx2.get_value(), '\n') print('dx2 \n-------------- \n', dx2.get_value(), '\n')
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor
import numpy as np import numpy as np
def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs): def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
""" """
:param inputs: a list of Tensor contains [input, label] :param inputs: a list of Tensor contains [input, label]
:param axis a int of using which axis to compute softmax :param axis a int of using which axis to compute softmax
...@@ -17,12 +17,12 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg ...@@ -17,12 +17,12 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg
""" """
if not isinstance(inputs, list) or len(inputs) is not 2: if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxLoss Operator accpets a list of 2 Tensors') raise RuntimeError('SparseSoftmaxCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs'] args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs) del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SoftmaxLoss', **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SparseSoftmaxCrossEntropy', **kwargs)
if inputs[0].shape is not None: if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1] if normalization != 'UNIT': output.shape = [1]
...@@ -35,7 +35,7 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg ...@@ -35,7 +35,7 @@ def SoftmaxLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwarg
return output return output
def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs): def SigmoidCrossEntropy(inputs, normalization='FULL', **kwargs):
""" """
:param inputs: a list of Tensor contains [input, label] :param inputs: a list of Tensor contains [input, label]
:param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE) :param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE)
...@@ -43,12 +43,12 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs): ...@@ -43,12 +43,12 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs):
""" """
if not isinstance(inputs, list) or len(inputs) is not 2: if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SigmoidCrossEntropyLoss Operator accpets a list of 2 Tensors') raise RuntimeError('SigmoidCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs'] args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs) del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SigmoidCrossEntropyLoss', **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SigmoidCrossEntropy', **kwargs)
if inputs[0].shape is not None: if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1] if normalization != 'UNIT': output.shape = [1]
...@@ -57,7 +57,7 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs): ...@@ -57,7 +57,7 @@ def SigmoidCrossEntropyLoss(inputs, normalization='FULL', **kwargs):
return output return output
def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs): def SoftmaxCrossEntropy(inputs, axis=1, normalization='FULL', **kwargs):
""" """
:param inputs: a list of Tensor contains [input, label] :param inputs: a list of Tensor contains [input, label]
:param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE) :param normalization: a str of (UNIT, FULL, BATCH_SIZE, NONE)
...@@ -65,12 +65,12 @@ def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs): ...@@ -65,12 +65,12 @@ def SoftmaxCrossEntropyLoss(inputs, axis=1, normalization='FULL', **kwargs):
""" """
if not isinstance(inputs, list) or len(inputs) is not 2: if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxCrossEntropyLoss Operator accpets a list of 2 Tensors') raise RuntimeError('SoftmaxCrossEntropy Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs'] args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs) del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SoftmaxCrossEntropyLoss', **kwargs) output = Tensor.CreateOperator(nout=1, op_type='SoftmaxCrossEntropy', **kwargs)
if inputs[0].shape is not None: if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1] if normalization != 'UNIT': output.shape = [1]
...@@ -96,7 +96,7 @@ def SmoothL1Loss(inputs, sigma=1.0, **kwargs): ...@@ -96,7 +96,7 @@ def SmoothL1Loss(inputs, sigma=1.0, **kwargs):
return output return output
def L1Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs): def L1Loss(inputs, normalization='BATCH_SIZE', **kwargs):
if not isinstance(inputs, list) or len(inputs) < 2: if not isinstance(inputs, list) or len(inputs) < 2:
raise RuntimeError('L1Loss Operator accpets a list of at least 2 Tensors') raise RuntimeError('L1Loss Operator accpets a list of at least 2 Tensors')
...@@ -109,7 +109,7 @@ def L1Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs): ...@@ -109,7 +109,7 @@ def L1Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
return output return output
def L2Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs): def L2Loss(inputs, normalization='BATCH_SIZE', **kwargs):
if not isinstance(inputs, list) or len(inputs) < 2: if not isinstance(inputs, list) or len(inputs) < 2:
raise RuntimeError('L2Loss Operator accpets a list of at least 2 Tensors') raise RuntimeError('L2Loss Operator accpets a list of at least 2 Tensors')
...@@ -120,3 +120,35 @@ def L2Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs): ...@@ -120,3 +120,35 @@ def L2Loss(inputs, normalization='BATCH_SIZE', coeff=1.0, **kwargs):
output = Tensor.CreateOperator(nout=1, op_type='L2Loss', **kwargs) output = Tensor.CreateOperator(nout=1, op_type='L2Loss', **kwargs)
if inputs[0].shape is not None: output.shape = [1] if inputs[0].shape is not None: output.shape = [1]
return output return output
def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(),
alpha=0.25, gamma=2.0, eps=1e-10, use_pseudo_metric=True, **kwargs):
"""
:param inputs: a list of Tensor contains [input, label]
:param axis a int of using which axis to compute softmax
:param normalization: a str of (UNIT, FULL, VALID, BATCH_SIZE, NONE)
:param ignore_labels: a list of int contatins the labels to ignore
:param alpha a float of the alpha value
:param gamma a float of the gamma value
:param eps a float of the eps value
:return: a Tensor of loss with the shape (1,)
"""
if not isinstance(inputs, list) or len(inputs) is not 2:
raise RuntimeError('SoftmaxFocalLoss Operator accpets a list of 2 Tensors')
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
output = Tensor.CreateOperator(nout=1, op_type='SparseSoftmaxFocalLoss', **kwargs)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
elif all(dim is not None for dim in inputs[0].shape):
outer_dim = int(np.prod(inputs[0].shape[0 : axis]))
inner_dim = int(np.prod(inputs[0].shape[axis + 1 :]))
output.shape = [outer_dim * inner_dim]
else: output.shape = [None]
return output
\ No newline at end of file
...@@ -197,7 +197,7 @@ def BiasAdd(inputs, data_format='NCHW', **kwargs): ...@@ -197,7 +197,7 @@ def BiasAdd(inputs, data_format='NCHW', **kwargs):
return output return output
def DenseConcat(inputs, axis=1, **kwargs): def DenseConcat(inputs, growth_rate, axis=1, **kwargs):
if not isinstance(inputs, list) or len(inputs) != 2: if not isinstance(inputs, list) or len(inputs) != 2:
raise RuntimeError('DenseConcat Operator accepts 2 Tensors as inputs') raise RuntimeError('DenseConcat Operator accepts 2 Tensors as inputs')
...@@ -207,6 +207,7 @@ def DenseConcat(inputs, axis=1, **kwargs): ...@@ -207,6 +207,7 @@ def DenseConcat(inputs, axis=1, **kwargs):
kwargs['num_input'] = len(inputs) kwargs['num_input'] = len(inputs)
output = Tensor.CreateOperator(nout=1, op_type='DenseConcat', **kwargs) output = Tensor.CreateOperator(nout=1, op_type='DenseConcat', **kwargs)
if all(input.shape is not None for input in inputs): if all(input.shape is not None for input in inputs):
if all(input.shape[axis] is not None for input in inputs): if all(input.shape[axis] is not None for input in inputs):
output.shape = inputs[0].shape[:] output.shape = inputs[0].shape[:]
......
...@@ -52,12 +52,13 @@ Softmax = act.Softmax ...@@ -52,12 +52,13 @@ Softmax = act.Softmax
Dropout = act.Dropout Dropout = act.Dropout
# loss # loss
SoftmaxLoss = loss.SoftmaxLoss SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy
SigmoidCrossEntropyLoss = loss.SigmoidCrossEntropyLoss SigmoidCrossEntropy = loss.SigmoidCrossEntropy
SoftmaxCrossEntropyLoss = loss.SoftmaxCrossEntropyLoss SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy
SmoothL1Loss = loss.SmoothL1Loss SmoothL1Loss = loss.SmoothL1Loss
L1Loss = loss.L1Loss L1Loss = loss.L1Loss
L2Loss = loss.L2Loss L2Loss = loss.L2Loss
SparseSoftmaxFocalLoss = loss.SparseSoftmaxFocalLoss
# arithmetic # arithmetic
Add = math.Add Add = math.Add
......
...@@ -50,6 +50,7 @@ message OperatorDef { ...@@ -50,6 +50,7 @@ message OperatorDef {
repeated Argument arg= 5; repeated Argument arg= 5;
optional DeviceOption device_option = 6; optional DeviceOption device_option = 6;
optional bool debug_mode = 7 [default = false]; optional bool debug_mode = 7 [default = false];
optional bool share_grads = 8 [default = false];
} }
message GradientTarget { message GradientTarget {
...@@ -65,7 +66,6 @@ message UpdateTarget { ...@@ -65,7 +66,6 @@ message UpdateTarget {
repeated Argument arg = 4; repeated Argument arg = 4;
} }
// simply copy from caffe1
message TensorFiller { message TensorFiller {
optional string tensor = 1; optional string tensor = 1;
optional string type = 2 [default = 'constant']; optional string type = 2 [default = 'constant'];
...@@ -89,4 +89,5 @@ message GraphDef { ...@@ -89,4 +89,5 @@ message GraphDef {
repeated GradientTarget g_target = 8; repeated GradientTarget g_target = 8;
repeated UpdateTarget u_target = 9; repeated UpdateTarget u_target = 9;
optional bool debug_mode = 10 [default = false]; optional bool debug_mode = 10 [default = false];
optional bool share_grads = 11 [default = false];
} }
\ No newline at end of file
...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default() ...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor( DESCRIPTOR = _descriptor.FileDescriptor(
name='dragon.proto', name='dragon.proto',
package='', package='',
serialized_pb=_b('\n\x0c\x64ragon.proto\"\xf7\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12/\n\tdata_type\x18\x02 \x01(\x0e\x32\x15.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\",\n\x0cTensorProtos\x12\x1c\n\x06protos\x18\x01 \x03(\x0b\x32\x0c.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"p\n\x0c\x44\x65viceOption\x12%\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x0b.DeviceType:\x03\x43PU\x12\x11\n\x06gpu_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\xa1\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x16\n\x03\x61rg\x18\x05 \x03(\x0b\x32\t.Argument\x12$\n\rdevice_option\x18\x06 \x01(\x0b\x32\r.DeviceOption\x12\x19\n\ndebug_mode\x18\x07 \x01(\x08:\x05\x66\x61lse\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"R\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x16\n\x03\x61rg\x18\x04 \x03(\x0b\x32\t.Argument\"\x8d\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12\x39\n\rvariance_norm\x18\t \x01(\x0e\x32\x1a.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xf3\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x02op\x18\x02 \x03(\x0b\x32\x0c.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12$\n\rdevice_option\x18\x05 \x01(\x0b\x32\r.DeviceOption\x12\x16\n\x03\x61rg\x18\x06 \x03(\x0b\x32\t.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12!\n\x08g_target\x18\x08 \x03(\x0b\x32\x0f.GradientTarget\x12\x1f\n\x08u_target\x18\t \x03(\x0b\x32\r.UpdateTarget\x12\x19\n\ndebug_mode\x18\n \x01(\x08:\x05\x66\x61lse*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02') serialized_pb=_b('\n\x0c\x64ragon.proto\"\xf7\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12/\n\tdata_type\x18\x02 \x01(\x0e\x32\x15.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\",\n\x0cTensorProtos\x12\x1c\n\x06protos\x18\x01 \x03(\x0b\x32\x0c.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"p\n\x0c\x44\x65viceOption\x12%\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x0b.DeviceType:\x03\x43PU\x12\x11\n\x06gpu_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\xbd\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x16\n\x03\x61rg\x18\x05 \x03(\x0b\x32\t.Argument\x12$\n\rdevice_option\x18\x06 \x01(\x0b\x32\r.DeviceOption\x12\x19\n\ndebug_mode\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x1a\n\x0bshare_grads\x18\x08 \x01(\x08:\x05\x66\x61lse\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"R\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x16\n\x03\x61rg\x18\x04 \x03(\x0b\x32\t.Argument\"\x8d\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12\x39\n\rvariance_norm\x18\t \x01(\x0e\x32\x1a.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\x8f\x02\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x02op\x18\x02 \x03(\x0b\x32\x0c.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12$\n\rdevice_option\x18\x05 \x01(\x0b\x32\r.DeviceOption\x12\x16\n\x03\x61rg\x18\x06 \x03(\x0b\x32\t.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12!\n\x08g_target\x18\x08 \x03(\x0b\x32\x0f.GradientTarget\x12\x1f\n\x08u_target\x18\t \x03(\x0b\x32\r.UpdateTarget\x12\x19\n\ndebug_mode\x18\n \x01(\x08:\x05\x66\x61lse\x12\x1a\n\x0bshare_grads\x18\x0b \x01(\x08:\x05\x66\x61lse*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02')
) )
_sym_db.RegisterFileDescriptor(DESCRIPTOR) _sym_db.RegisterFileDescriptor(DESCRIPTOR)
...@@ -44,8 +44,8 @@ _DEVICETYPE = _descriptor.EnumDescriptor( ...@@ -44,8 +44,8 @@ _DEVICETYPE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=1386, serialized_start=1442,
serialized_end=1429, serialized_end=1485,
) )
_sym_db.RegisterEnumDescriptor(_DEVICETYPE) _sym_db.RegisterEnumDescriptor(_DEVICETYPE)
...@@ -110,8 +110,8 @@ _TENSORFILLER_VARIANCENORM = _descriptor.EnumDescriptor( ...@@ -110,8 +110,8 @@ _TENSORFILLER_VARIANCENORM = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=1086, serialized_start=1114,
serialized_end=1138, serialized_end=1166,
) )
_sym_db.RegisterEnumDescriptor(_TENSORFILLER_VARIANCENORM) _sym_db.RegisterEnumDescriptor(_TENSORFILLER_VARIANCENORM)
...@@ -412,6 +412,13 @@ _OPERATORDEF = _descriptor.Descriptor( ...@@ -412,6 +412,13 @@ _OPERATORDEF = _descriptor.Descriptor(
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor(
name='share_grads', full_name='OperatorDef.share_grads', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=True, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
], ],
extensions=[ extensions=[
], ],
...@@ -424,7 +431,7 @@ _OPERATORDEF = _descriptor.Descriptor( ...@@ -424,7 +431,7 @@ _OPERATORDEF = _descriptor.Descriptor(
oneofs=[ oneofs=[
], ],
serialized_start=558, serialized_start=558,
serialized_end=719, serialized_end=747,
) )
...@@ -467,8 +474,8 @@ _GRADIENTTARGET = _descriptor.Descriptor( ...@@ -467,8 +474,8 @@ _GRADIENTTARGET = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=721, serialized_start=749,
serialized_end=782, serialized_end=810,
) )
...@@ -518,8 +525,8 @@ _UPDATETARGET = _descriptor.Descriptor( ...@@ -518,8 +525,8 @@ _UPDATETARGET = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=784, serialized_start=812,
serialized_end=866, serialized_end=894,
) )
...@@ -605,8 +612,8 @@ _TENSORFILLER = _descriptor.Descriptor( ...@@ -605,8 +612,8 @@ _TENSORFILLER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=869, serialized_start=897,
serialized_end=1138, serialized_end=1166,
) )
...@@ -680,6 +687,13 @@ _GRAPHDEF = _descriptor.Descriptor( ...@@ -680,6 +687,13 @@ _GRAPHDEF = _descriptor.Descriptor(
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor(
name='share_grads', full_name='GraphDef.share_grads', index=9,
number=11, type=8, cpp_type=7, label=1,
has_default_value=True, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
], ],
extensions=[ extensions=[
], ],
...@@ -691,8 +705,8 @@ _GRAPHDEF = _descriptor.Descriptor( ...@@ -691,8 +705,8 @@ _GRAPHDEF = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=1141, serialized_start=1169,
serialized_end=1384, serialized_end=1440,
) )
_TENSORPROTO.fields_by_name['data_type'].enum_type = _TENSORPROTO_DATATYPE _TENSORPROTO.fields_by_name['data_type'].enum_type = _TENSORPROTO_DATATYPE
......
...@@ -11,7 +11,7 @@ from .vision import ConvolutionLayer, DeconvolutionLayer, PoolingLayer, \ ...@@ -11,7 +11,7 @@ from .vision import ConvolutionLayer, DeconvolutionLayer, PoolingLayer, \
from .neuron import ReLULayer, DropoutLayer, TanhLayer, PowerLayer from .neuron import ReLULayer, DropoutLayer, TanhLayer, PowerLayer
from .loss import SoftmaxWithLossLayer, SigmoidCrossEntropyLossLayer, \ from .loss import SoftmaxWithLossLayer, SigmoidCrossEntropyLossLayer, \
L2LossLayer, SmoothL1LossLayer L2LossLayer, SmoothL1LossLayer, SoftmaxWithFocalLossLayer
from .mpi import MPIBroadcastLayer, MPIGatherLayer from .mpi import MPIBroadcastLayer, MPIGatherLayer
......
...@@ -93,8 +93,9 @@ class ConcatLayer(Layer): ...@@ -93,8 +93,9 @@ class ConcatLayer(Layer):
class DenseConcatLayer(Layer): class DenseConcatLayer(Layer):
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(DenseConcatLayer, self).__init__(LayerParameter) super(DenseConcatLayer, self).__init__(LayerParameter)
param = LayerParameter.concat_param param = LayerParameter.dense_concat_param
self._param = {'axis': param.axis} self._param = {'axis': param.axis,
'growth_rate': param.growth_rate}
def Setup(self, bottom): def Setup(self, bottom):
super(DenseConcatLayer, self).Setup(bottom) super(DenseConcatLayer, self).Setup(bottom)
...@@ -268,7 +269,7 @@ class BNLayer(Layer): ...@@ -268,7 +269,7 @@ class BNLayer(Layer):
if scale_param.HasField('filler'): if scale_param.HasField('filler'):
self.Fill(scale, scale_param, 'filler') self.Fill(scale, scale_param, 'filler')
else: scale.Constant(value=1.0) else: scale.Uniform(low=0.0, high=1.0)
self.Fill(bias, scale_param, 'bias_filler') self.Fill(bias, scale_param, 'bias_filler')
self.norm_blobs = [{'data': mean, 'diff': None}, self.norm_blobs = [{'data': mean, 'diff': None},
{'data': var, 'diff': None}] {'data': var, 'diff': None}]
......
...@@ -19,14 +19,17 @@ class Layer(object): ...@@ -19,14 +19,17 @@ class Layer(object):
self._name = LayerParameter.name self._name = LayerParameter.name
self._blobs = [] self._blobs = []
self._param = {} self._param = {}
self._mpi_param = {} self._common_param = {}
for include in LayerParameter.include: for include in LayerParameter.include:
mpi_rank = [int(rank) for rank in include.mpi_rank] mpi_rank = [int(rank) for rank in include.mpi_rank]
if len(mpi_rank) > 0: self._mpi_param['mpi_rank'] = mpi_rank if len(mpi_rank) > 0: self._common_param['mpi_rank'] = mpi_rank
if LayerParameter.HasField('mirrow_stage'):
self._common_param['mirrow_stage'] = LayerParameter.mirrow_stage
def Setup(self, bottom): def Setup(self, bottom):
self._param = dict(self._param, **self._mpi_param) self._param = dict(self._param, **self._common_param)
def Fill(self, tensor, param, filler): def Fill(self, tensor, param, filler):
""" wrapper for caffe filler """ """ wrapper for caffe filler """
......
...@@ -24,7 +24,7 @@ class SoftmaxWithLossLayer(Layer): ...@@ -24,7 +24,7 @@ class SoftmaxWithLossLayer(Layer):
def Setup(self, bottom): def Setup(self, bottom):
super(SoftmaxWithLossLayer, self).Setup(bottom) super(SoftmaxWithLossLayer, self).Setup(bottom)
return ops.SoftmaxLoss(bottom, **self._param) return ops.SparseSoftmaxCrossEntropy(bottom, **self._param)
class SigmoidCrossEntropyLossLayer(Layer): class SigmoidCrossEntropyLossLayer(Layer):
...@@ -40,7 +40,7 @@ class SigmoidCrossEntropyLossLayer(Layer): ...@@ -40,7 +40,7 @@ class SigmoidCrossEntropyLossLayer(Layer):
def Setup(self, bottom): def Setup(self, bottom):
super(SigmoidCrossEntropyLossLayer, self).Setup(bottom) super(SigmoidCrossEntropyLossLayer, self).Setup(bottom)
return ops.SigmoidCrossEntropyLoss(bottom, **self._param) return ops.SigmoidCrossEntropy(bottom, **self._param)
class L2LossLayer(Layer): class L2LossLayer(Layer):
...@@ -64,3 +64,27 @@ class SmoothL1LossLayer(Layer): ...@@ -64,3 +64,27 @@ class SmoothL1LossLayer(Layer):
def Setup(self, bottom): def Setup(self, bottom):
super(SmoothL1LossLayer, self).Setup(bottom) super(SmoothL1LossLayer, self).Setup(bottom)
return ops.SmoothL1Loss(bottom, **self._param) return ops.SmoothL1Loss(bottom, **self._param)
class SoftmaxWithFocalLossLayer(Layer):
def __init__(self, LayerParameter):
super(SoftmaxWithFocalLossLayer, self).__init__(LayerParameter)
param = LayerParameter.loss_param
softmax_param = LayerParameter.softmax_param
focal_loss_param = LayerParameter.focal_loss_param
norm_mode = {0: 'FULL', 1: 'VALID', 2: 'BATCH_SIZE', 3: 'NONE'}
normalization = 'VALID'
if param.HasField('normalize'):
if not param.normalize: normalization='BATCH_SIZE'
else: normalization = norm_mode[param.normalization]
self._param = {'axis': softmax_param.axis,
'normalization': normalization,
'ignore_labels': [param.ignore_label] if param.HasField('ignore_label') else [],
'alpha': float(focal_loss_param.alpha),
'gamma': float(focal_loss_param.gamma),
'eps': float(focal_loss_param.eps),
'use_pseudo_metric': focal_loss_param.use_pseudo_metric}
def Setup(self, bottom):
super(SoftmaxWithFocalLossLayer, self).Setup(bottom)
return ops.SparseSoftmaxFocalLoss(bottom, **self._param)
...@@ -20,7 +20,6 @@ def make_if_not_exist(path): ...@@ -20,7 +20,6 @@ def make_if_not_exist(path):
os.makedirs(path) os.makedirs(path)
def UnpackVariable(var, num): def UnpackVariable(var, num):
assert len > 0
if type(var) is list and len(var) == num: if type(var) is list and len(var) == num:
return var return var
else: else:
...@@ -277,7 +276,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, ...@@ -277,7 +276,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
dilation = 1 dilation = 1
kernel_size = 3 kernel_size = 3
pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2)
net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.conv5_1 = L.Convolution(net[name], num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
net.relu5_1 = L.ReLU(net.conv5_1, in_place=True) net.relu5_1 = L.ReLU(net.conv5_1, in_place=True)
net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.conv5_2 = L.Convolution(net.relu5_1, num_output=512, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
...@@ -319,7 +318,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False, ...@@ -319,7 +318,7 @@ def VGGNetBody(net, from_layer, need_fc=True, fully_conv=False, reduced=False,
else: else:
kernel_size = 7 kernel_size = 7
num_output = 4096 num_output = 4096
pad = int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2 pad = int(int((kernel_size + (dilation - 1) * (kernel_size - 1)) - 1) / 2)
net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs) net.fc6 = L.Convolution(net[name], num_output=num_output, pad=pad, kernel_size=kernel_size, dilation=dilation, **kwargs)
net.relu6 = L.ReLU(net.fc6, in_place=True) net.relu6 = L.ReLU(net.fc6, in_place=True)
......
...@@ -318,6 +318,9 @@ message LayerParameter { ...@@ -318,6 +318,9 @@ message LayerParameter {
repeated string bottom = 3; // the name of each bottom blob repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob repeated string top = 4; // the name of each top blob
// The mirrow stage optimization
optional bool mirrow_stage = 162 [default = false];
// The train / test phase for computation. // The train / test phase for computation.
optional Phase phase = 10; optional Phase phase = 10;
...@@ -418,6 +421,8 @@ message LayerParameter { ...@@ -418,6 +421,8 @@ message LayerParameter {
optional ExpandDimsParameter expand_dims_param = 159; optional ExpandDimsParameter expand_dims_param = 159;
optional ProposalParameter proposal_param = 160; optional ProposalParameter proposal_param = 160;
optional BatchRenormParameter batch_renorm_param = 161; optional BatchRenormParameter batch_renorm_param = 161;
optional DenseConcatParameter dense_concat_param = 163;
optional FocalLossParameter focal_loss_param = 164;
} }
// Message that stores parameters used to apply transformation // Message that stores parameters used to apply transformation
...@@ -1494,3 +1499,16 @@ message BatchRenormParameter { ...@@ -1494,3 +1499,16 @@ message BatchRenormParameter {
optional float d_max = 5 [default = 5.0]; optional float d_max = 5 [default = 5.0];
optional float t_delta = 6 [default = 1.0]; optional float t_delta = 6 [default = 1.0];
} }
message DenseConcatParameter {
optional int32 axis = 1 [default = 1];
optional int32 growth_rate = 2 [default = 0];
}
message FocalLossParameter {
optional float alpha = 1 [default = 1.0];
optional float gamma = 2 [default = 0.25];
optional float eps = 3 [default = 1e-10];
optional bool use_pseudo_metric = 4 [default = true];
}
This diff could not be displayed because it is too large.
...@@ -119,7 +119,7 @@ def bias_add(value, bias, data_format='NCHW', name=None): ...@@ -119,7 +119,7 @@ def bias_add(value, bias, data_format='NCHW', name=None):
def sigmoid_cross_entropy_with_logits(logits, targets, name=None): def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
return ops.SigmoidCrossEntropyLoss([logits, targets], normalization='UNIT', name=None) return ops.SigmoidCrossEntropy([logits, targets], normalization='UNIT', name=None)
def softmax_cross_entropy_with_logits(_sentinel=None, def softmax_cross_entropy_with_logits(_sentinel=None,
...@@ -131,13 +131,13 @@ def softmax_cross_entropy_with_logits(_sentinel=None, ...@@ -131,13 +131,13 @@ def softmax_cross_entropy_with_logits(_sentinel=None,
'with named arguments (labels=..., logits=..., ...)') 'with named arguments (labels=..., logits=..., ...)')
if dim == -1: dim = 1 if dim == -1: dim = 1
return ops.SoftmaxCrossEntropyLoss([logits, labels], axis=dim, normalization='UNIT', name=name) return ops.SoftmaxCrossEntropy([logits, labels], axis=dim, normalization='UNIT', name=name)
def sparse_softmax_cross_entropy_with_logits(logits, labels, dim=-1, name=None): def sparse_softmax_cross_entropy_with_logits(logits, labels, dim=-1, name=None):
if dim == -1: dim = 1 if dim == -1: dim = 1
return ops.SoftmaxLoss([logits, labels], axis=dim, normalization='UNIT', name=name) return ops.SparseSoftmaxCrossEntropy([logits, labels], axis=dim, normalization='UNIT', name=name)
def l2_loss(t, name=None): def l2_loss(t, name=None):
......
...@@ -77,10 +77,11 @@ def GraphDef_Update(graph_def, updater): ...@@ -77,10 +77,11 @@ def GraphDef_Update(graph_def, updater):
u_target.arg.add().CopyFrom(MakeArgument(k, v)) u_target.arg.add().CopyFrom(MakeArgument(k, v))
graph_def.u_target.extend([u_target]) graph_def.u_target.extend([u_target])
def GraphDef_Debug(graph_def): def GraphDef_Opt(graph_def):
""" generate debug mode for CC Graph """ """ generate opt options for CC Graph """
from dragon.config import option from dragon.config import option
graph_def.debug_mode = option['debug_mode'] graph_def.debug_mode = option['debug_mode']
graph_def.share_grads = option['share_grads']
def GraphDef_Device(graph_def): def GraphDef_Device(graph_def):
""" generate deivce info for CC Graph """ """ generate deivce info for CC Graph """
...@@ -155,13 +156,13 @@ def function(inputs=[], outputs=[], swaps=None, updater=None): ...@@ -155,13 +156,13 @@ def function(inputs=[], outputs=[], swaps=None, updater=None):
if len(outputs) > 0: if len(outputs) > 0:
GraphDef_Device(graph_def) GraphDef_Device(graph_def)
GraphDef_Debug(graph_def) GraphDef_Opt(graph_def)
GraphDef_Grad(graph_def, outputs) GraphDef_Grad(graph_def, outputs)
GraphDef_Phase(graph_def, outputs) GraphDef_Phase(graph_def, outputs)
elif updater is not None: elif updater is not None:
GraphDef_Device(graph_def) GraphDef_Device(graph_def)
GraphDef_Debug(graph_def) GraphDef_Opt(graph_def)
GraphDef_Update(graph_def, updater) GraphDef_Update(graph_def, updater)
# call c api to create graph # call c api to create graph
......
...@@ -257,6 +257,7 @@ GraphDef Graph::MakeUpdate(const GraphDef& graph_def) { ...@@ -257,6 +257,7 @@ GraphDef Graph::MakeUpdate(const GraphDef& graph_def) {
bool Graph::Create(const GraphDef& graph_def, Workspace* ws) { bool Graph::Create(const GraphDef& graph_def, Workspace* ws) {
bool has_device_option = graph_def.has_device_option(); bool has_device_option = graph_def.has_device_option();
bool has_debug_mode = graph_def.has_debug_mode(); bool has_debug_mode = graph_def.has_debug_mode();
bool has_share_grads = graph_def.has_share_grads();
for (const OperatorDef& plain_op_def: graph_def.op()) { for (const OperatorDef& plain_op_def: graph_def.op()) {
OperatorDef op_def(plain_op_def); OperatorDef op_def(plain_op_def);
LOG(DEBUG) << "Create Operator " << plain_op_def.name() LOG(DEBUG) << "Create Operator " << plain_op_def.name()
...@@ -270,12 +271,83 @@ bool Graph::Create(const GraphDef& graph_def, Workspace* ws) { ...@@ -270,12 +271,83 @@ bool Graph::Create(const GraphDef& graph_def, Workspace* ws) {
if (!op_def.has_debug_mode() && has_debug_mode) if (!op_def.has_debug_mode() && has_debug_mode)
op_def.set_debug_mode(graph_def.debug_mode()); op_def.set_debug_mode(graph_def.debug_mode());
// inherit share_grads if necessary
if (!op_def.has_share_grads() && has_share_grads)
op_def.set_share_grads(graph_def.share_grads());
OperatorBase* op = CreateOperator(op_def, ws); OperatorBase* op = CreateOperator(op_def, ws);
ops_.push_back(op); ops_.push_back(op);
} }
return true; return true;
} }
void Graph::RecomputingAware(const GraphDef& graph_def, Workspace* ws) {
GraphDef fake_graph(graph_def);
Map<string, vector<OperatorBase*> > fake_recompute_map;
Map<string, string> rename_map;
Map<string, Set<string> > hash_map;
Map<string, int> multi_use_count;
// check mirrow stage
for (int i = 0; i < ops_.size(); i++) {
if (ops_[i]->type().find("Gradient") != string::npos) continue;
bool mirrow_stage = ops_[i]->GetSingleArg<bool>("mirrow_stage", false);
for (auto& u : graph_def.op(i).input()) {
bool inplace_flag = false;
for (auto& v : graph_def.op(i).output()) if (u == v) inplace_flag = true;
mirrow_stage &= (!inplace_flag);
if (!inplace_flag) multi_use_count[u]++;
}
if (mirrow_stage) {
// TODO(PhyscalX): we assume that input(0)-output(0) as a force in-place currently
OperatorDef* op = fake_graph.mutable_op(i);
if (rename_map.count(op->input(0)))
*op->mutable_input(0) = rename_map[op->input(0)];
rename_map[op->output(0)] = op->input(0);
*op->mutable_output(0) = op->input(0);
ops_[i]->input(0).Corrupt(); // mark a flag
}
}
// sub-graph aware
for (int i = 0; i < ops_.size(); i++) {
if (ops_[i]->type().find("Gradient") != string::npos) continue;
OperatorDef fake_op = fake_graph.op(i);
OperatorDef op = graph_def.op(i);
for (int j = 0; j < op.output_size(); j++) {
string v = op.output(j);
string fake_v = fake_op.output(j);
if (!fake_recompute_map.count(fake_v))
fake_recompute_map[fake_v] = vector<OperatorBase*>();
if (v != fake_v) {
if (multi_use_count[fake_v] >= 2)
fake_recompute_map[fake_v] = ws->GetRecompute(fake_v);
}
fake_recompute_map[fake_v].push_back(ops_[i]);
for (int k = 0; k < fake_recompute_map[fake_v].size(); k++) {
if (!hash_map.count(v)) hash_map[v] = Set<string>();
string op_name = fake_recompute_map[fake_v][k]->name();
if (!hash_map[v].count(op_name)) {
ws->AddRecompute(v, fake_recompute_map[fake_v][k]);
hash_map[v].insert(op_name);
}
}
}
}
// prepare resources
Tensor* head = ws->CreateTensor("_t_mirrow_stage_head");
head->Reshape(vector<TIndex>(1, WORKSPACE_MAX_CORRUPTED_SIZE));
Tensor* recompute_flag = ws->CreateTensor("_t_global_recompute_flag");
recompute_flag->Reshape(vector<TIndex>(1, 1));
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(i);
Tensor* buffer = ws->CreateTensor(name);
head->mutable_data<string, CPUContext>()[i] = "";
}
}
Graph::Graph(const GraphDef& graph_def, Workspace* ws) Graph::Graph(const GraphDef& graph_def, Workspace* ws)
: GraphBase(graph_def, ws) { : GraphBase(graph_def, ws) {
GraphDef optimized_graph; GraphDef optimized_graph;
...@@ -297,6 +369,9 @@ Graph::Graph(const GraphDef& graph_def, Workspace* ws) ...@@ -297,6 +369,9 @@ Graph::Graph(const GraphDef& graph_def, Workspace* ws)
// create // create
Create(optimized_graph, ws); Create(optimized_graph, ws);
// recomputing-aware
RecomputingAware(optimized_graph, ws);
} }
bool Graph::Run(const string& include, const string& exclude) { bool Graph::Run(const string& include, const string& exclude) {
......
...@@ -59,6 +59,91 @@ Gradient MakeGradientForOp(const OperatorDef& def, const vector<string>& g_outpu ...@@ -59,6 +59,91 @@ Gradient MakeGradientForOp(const OperatorDef& def, const vector<string>& g_outpu
return grad; return grad;
} }
template <class Context>
void Operator<Context>::ElimateCorruption() {
Set<string> all_heads;
queue<int> safe_heads;
Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) all_heads.insert(head_data[i]);
// sub-graph run
for (int i = 0; i < InputSize(); i++) {
if (input(i).is_corrupted()) {
if (all_heads.count(input(i).name())) continue;
LOG(DEBUG) << "Tensor(" << input(i).name() << ") is corrupted, recompute... ";
Tensor* recompute_flag = ws()->GetTensor("_t_global_recompute_flag");
vector<OperatorBase*> list = ws()->GetRecompute(input(i).name());
recompute_flag->mutable_data<bool, CPUContext>()[0] = true;
for (int j = 0; j < list.size(); j++) list[j]->Run();
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
}
}
// check available head
all_heads.clear();
for (int i = 0; i < head->count(); i++) {
bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
all_heads.insert(head_data[i]);
}
// pre-process
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->is_corrupted()) {
bool inplace_flag = false;
for (int j = 0; j < InputSize(); j++)
if (output(i)->name() == input(j).name()) inplace_flag = true;
if (inplace_flag || all_heads.count(output(i)->name())) continue; // skip to use new buffer
CHECK(!safe_heads.empty())
<< "\nat most (" << safe_heads.size() << " [safe] / "
<< all_heads.size() << " [total] can be used for corrupted output in "
<< "(" << name() << ", " << type() << "), "
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirrow stage ?";
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
output(i)->Move(buffer->memory());
head_data[idx] = output(i)->name();
}
}
}
template <class Context>
void Operator<Context>::ShareGradient() {
// TODO(PhyscalX): we preset input(-1)->output(0) to share
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(0)->Replace(*dX);
}
}
template <class Context>
void Operator<Context>::MakeResource() {
ElimateCorruption();
if (allow_share_grads_) ShareGradient();
}
template <class Context>
void Operator<Context>::CleanResource() {
// post-process for mirrow stage
Map<string, int> head_to_idx;
Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) head_to_idx[head_data[i]] = i;
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->is_corrupted() && head_to_idx.count(output(i)->name())) {
string used = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(head_to_idx[output(i)->name()]);
Tensor* buffer = ws()->GetTensor(used);
if (output(i)->memory() != buffer->memory()) buffer->Move(output(i)->memory());
}
}
if (allow_share_grads_) {
// TODO(PhyscalX): we preset input(-1)->output(0) to share
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY, "Grad");
}
}
DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*); DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*);
DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
...@@ -94,4 +179,13 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(int, ints) ...@@ -94,4 +179,13 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(int, ints)
INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings) INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings)
#undef INSTANTIATE_GET_REPEATED_ARGUMENT #undef INSTANTIATE_GET_REPEATED_ARGUMENT
template void Operator<CPUContext>::ElimateCorruption();
template void Operator<CUDAContext>::ElimateCorruption();
template void Operator<CPUContext>::ShareGradient();
template void Operator<CUDAContext>::ShareGradient();
template void Operator<CPUContext>::MakeResource();
template void Operator<CUDAContext>::MakeResource();
template void Operator<CPUContext>::CleanResource();
template void Operator<CUDAContext>::CleanResource();
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -13,4 +13,14 @@ GraphBase* Workspace::CreateGraph(const GraphDef& graph_def) { ...@@ -13,4 +13,14 @@ GraphBase* Workspace::CreateGraph(const GraphDef& graph_def) {
return graph_map_[graph_def.name()].get(); return graph_map_[graph_def.name()].get();
} }
Workspace::~Workspace() {
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "_t_mirrow_stage_buffer_" + dragon_cast<string, int>(i);
if (HasTensor(name)) {
MixedMemory* mem = GetTensor(name)->memory();
if (mem != nullptr) delete mem;
}
}
}
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -70,8 +70,9 @@ void DropoutGradientOp<Context>::RunOnDevice() { ...@@ -70,8 +70,9 @@ void DropoutGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void DropoutGradientOp<Context>::ClearAfterRun() { void DropoutGradientOp<Context>::CleanResource() {
ws()->ReleaseBuffer(mask, true); Operator<Context>::CleanResource();
ws()->ReleaseBuffer(mask, "Common", true);
} }
DEPLOY_CPU(DropoutGradient); DEPLOY_CPU(DropoutGradient);
...@@ -81,7 +82,7 @@ DEPLOY_CUDA(DropoutGradient); ...@@ -81,7 +82,7 @@ DEPLOY_CUDA(DropoutGradient);
OPERATOR_SCHEMA(DropoutGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } }); OPERATOR_SCHEMA(DropoutGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetDropoutGradient final : public GradientMakerBase { class GetDropoutGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetDropoutGradient); GRADIENT_MAKER_CTOR(GetDropoutGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
...@@ -92,4 +93,3 @@ public: ...@@ -92,4 +93,3 @@ public:
REGISTER_GRADIENT(Dropout, GetDropoutGradient); REGISTER_GRADIENT(Dropout, GetDropoutGradient);
} // namepsace dragon } // namepsace dragon
\ No newline at end of file
...@@ -48,7 +48,7 @@ DEPLOY_CUDA(ReluGradient); ...@@ -48,7 +48,7 @@ DEPLOY_CUDA(ReluGradient);
OPERATOR_SCHEMA(ReluGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 }}); OPERATOR_SCHEMA(ReluGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 }});
class GetReluGradient final : public GradientMakerBase { class GetReluGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetReluGradient); GRADIENT_MAKER_CTOR(GetReluGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -48,7 +48,7 @@ DEPLOY_CUDA(SigmoidGradient); ...@@ -48,7 +48,7 @@ DEPLOY_CUDA(SigmoidGradient);
OPERATOR_SCHEMA(SigmoidGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } }); OPERATOR_SCHEMA(SigmoidGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetSigmoidGradient final : public GradientMakerBase { class GetSigmoidGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSigmoidGradient); GRADIENT_MAKER_CTOR(GetSigmoidGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -71,7 +71,7 @@ DEPLOY_CUDA(SoftmaxGradient); ...@@ -71,7 +71,7 @@ DEPLOY_CUDA(SoftmaxGradient);
OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } }); OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetSoftmaxGradient final : public GradientMakerBase { class GetSoftmaxGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSoftmaxGradient); GRADIENT_MAKER_CTOR(GetSoftmaxGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -48,7 +48,7 @@ DEPLOY_CUDA(TanhGradient); ...@@ -48,7 +48,7 @@ DEPLOY_CUDA(TanhGradient);
OPERATOR_SCHEMA(TanhGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } }); OPERATOR_SCHEMA(TanhGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
class GetTanhGradient final : public GradientMakerBase { class GetTanhGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetTanhGradient); GRADIENT_MAKER_CTOR(GetTanhGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
...@@ -59,4 +59,3 @@ public: ...@@ -59,4 +59,3 @@ public:
REGISTER_GRADIENT(Tanh, GetTanhGradient); REGISTER_GRADIENT(Tanh, GetTanhGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -160,22 +160,16 @@ void AddGradientOp<Context>::RunOnDevice() { ...@@ -160,22 +160,16 @@ void AddGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void AddGradientOp<Context>::ShareBeforeRun() { void AddGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void AddGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(AddGradient); DEPLOY_CPU(AddGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(AddGradient); DEPLOY_CUDA(AddGradient);
...@@ -183,7 +177,7 @@ DEPLOY_CUDA(AddGradient); ...@@ -183,7 +177,7 @@ DEPLOY_CUDA(AddGradient);
OPERATOR_SCHEMA(AddGradient).NumInputs(2).NumOutputs(2); OPERATOR_SCHEMA(AddGradient).NumInputs(2).NumOutputs(2);
class GetAddGradient : public GradientMakerBase { class GetAddGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetAddGradient); GRADIENT_MAKER_CTOR(GetAddGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -95,18 +95,6 @@ void BiasAddGradientOp<Context>::RunOnDevice() { ...@@ -95,18 +95,6 @@ void BiasAddGradientOp<Context>::RunOnDevice() {
} }
} }
template <class Context>
void BiasAddGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BiasAddGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BiasAddGradient); DEPLOY_CPU(BiasAddGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BiasAddGradient); DEPLOY_CUDA(BiasAddGradient);
...@@ -114,7 +102,7 @@ DEPLOY_CUDA(BiasAddGradient); ...@@ -114,7 +102,7 @@ DEPLOY_CUDA(BiasAddGradient);
OPERATOR_SCHEMA(BiasAddGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(BiasAddGradient).NumInputs(3).NumOutputs(2);
class GetBiasAddGradient final : public GradientMakerBase { class GetBiasAddGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetBiasAddGradient); GRADIENT_MAKER_CTOR(GetBiasAddGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -45,18 +45,6 @@ void ClipGradientOp<Context>::RunOnDevice() { ...@@ -45,18 +45,6 @@ void ClipGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void ClipGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ClipGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ClipGradient); DEPLOY_CPU(ClipGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ClipGradient); DEPLOY_CUDA(ClipGradient);
...@@ -64,7 +52,7 @@ DEPLOY_CUDA(ClipGradient); ...@@ -64,7 +52,7 @@ DEPLOY_CUDA(ClipGradient);
OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1);
class GetClipGradient final : public GradientMakerBase { class GetClipGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetClipGradient); GRADIENT_MAKER_CTOR(GetClipGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -191,22 +191,16 @@ void DivGradientOp<Context>::RunOnDevice() { ...@@ -191,22 +191,16 @@ void DivGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void DivGradientOp<Context>::ShareBeforeRun() { void DivGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void DivGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(DivGradient); DEPLOY_CPU(DivGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DivGradient); DEPLOY_CUDA(DivGradient);
...@@ -214,7 +208,7 @@ DEPLOY_CUDA(DivGradient); ...@@ -214,7 +208,7 @@ DEPLOY_CUDA(DivGradient);
OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2);
class GetDivGradient final : public GradientMakerBase { class GetDivGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetDivGradient); GRADIENT_MAKER_CTOR(GetDivGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
#include "operators/arithmetic/dot_op.h" #include "operators/arithmetic/dot_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
namespace dragon { namespace dragon {
...@@ -169,6 +170,17 @@ void DotGradientOp<Context>::RunOnDevice() { ...@@ -169,6 +170,17 @@ void DotGradientOp<Context>::RunOnDevice() {
} }
} }
template <class Context>
void DotGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(DotGradient); DEPLOY_CPU(DotGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DotGradient); DEPLOY_CUDA(DotGradient);
...@@ -176,7 +188,7 @@ DEPLOY_CUDA(DotGradient); ...@@ -176,7 +188,7 @@ DEPLOY_CUDA(DotGradient);
OPERATOR_SCHEMA(DotGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(DotGradient).NumInputs(3).NumOutputs(2);
class GetDotGradient final : public GradientMakerBase { class GetDotGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetDotGradient); GRADIENT_MAKER_CTOR(GetDotGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -118,22 +118,16 @@ void EltwiseGradientOp<Context>::RunOnDevice() { ...@@ -118,22 +118,16 @@ void EltwiseGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void EltwiseGradientOp<Context>::ShareBeforeRun() { void EltwiseGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void EltwiseGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(EltwiseGradient); DEPLOY_CPU(EltwiseGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(EltwiseGradient); DEPLOY_CUDA(EltwiseGradient);
...@@ -141,7 +135,7 @@ DEPLOY_CUDA(EltwiseGradient); ...@@ -141,7 +135,7 @@ DEPLOY_CUDA(EltwiseGradient);
OPERATOR_SCHEMA(EltwiseGradient).NumInputs(3, INT_MAX).NumOutputs(2, INT_MAX); OPERATOR_SCHEMA(EltwiseGradient).NumInputs(3, INT_MAX).NumOutputs(2, INT_MAX);
class GetEltwiseGradient final : public GradientMakerBase { class GetEltwiseGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetEltwiseGradient); GRADIENT_MAKER_CTOR(GetEltwiseGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs; vector<string> inputs, outputs;
......
...@@ -41,19 +41,6 @@ void ExpGradientOp<Context>::RunOnDevice() { ...@@ -41,19 +41,6 @@ void ExpGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void ExpGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ExpGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ExpGradient); DEPLOY_CPU(ExpGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ExpGradient); DEPLOY_CUDA(ExpGradient);
...@@ -61,7 +48,7 @@ DEPLOY_CUDA(ExpGradient); ...@@ -61,7 +48,7 @@ DEPLOY_CUDA(ExpGradient);
OPERATOR_SCHEMA(ExpGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(ExpGradient).NumInputs(2).NumOutputs(1);
class GetExpGradient final : public GradientMakerBase { class GetExpGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetExpGradient); GRADIENT_MAKER_CTOR(GetExpGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -61,18 +61,6 @@ void GramMatrixGradientOp<Context>::RunOnDevice() { ...@@ -61,18 +61,6 @@ void GramMatrixGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void GramMatrixGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void GramMatrixGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(GramMatrixGradient); DEPLOY_CPU(GramMatrixGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(GramMatrixGradient); DEPLOY_CUDA(GramMatrixGradient);
...@@ -80,7 +68,7 @@ DEPLOY_CUDA(GramMatrixGradient); ...@@ -80,7 +68,7 @@ DEPLOY_CUDA(GramMatrixGradient);
OPERATOR_SCHEMA(GramMatrixGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(GramMatrixGradient).NumInputs(2).NumOutputs(1);
class GetGramMatrixGradient final : public GradientMakerBase { class GetGramMatrixGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetGramMatrixGradient); GRADIENT_MAKER_CTOR(GetGramMatrixGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -120,20 +120,6 @@ void InnerProductGradientOp<Context>::RunOnDevice() { ...@@ -120,20 +120,6 @@ void InnerProductGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void InnerProductGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void InnerProductGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(InnerProductGradient); DEPLOY_CPU(InnerProductGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(InnerProductGradient); DEPLOY_CUDA(InnerProductGradient);
...@@ -141,7 +127,7 @@ DEPLOY_CUDA(InnerProductGradient); ...@@ -141,7 +127,7 @@ DEPLOY_CUDA(InnerProductGradient);
OPERATOR_SCHEMA(InnerProductGradient).NumInputs(3).NumOutputs(3); OPERATOR_SCHEMA(InnerProductGradient).NumInputs(3).NumOutputs(3);
class GetInnerProductGradient : public GradientMakerBase { class GetInnerProductGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetInnerProductGradient); GRADIENT_MAKER_CTOR(GetInnerProductGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -41,18 +41,6 @@ void LogGradientOp<Context>::RunOnDevice() { ...@@ -41,18 +41,6 @@ void LogGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void LogGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void LogGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(LogGradient); DEPLOY_CPU(LogGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(LogGradient); DEPLOY_CUDA(LogGradient);
...@@ -60,7 +48,7 @@ DEPLOY_CUDA(LogGradient); ...@@ -60,7 +48,7 @@ DEPLOY_CUDA(LogGradient);
OPERATOR_SCHEMA(LogGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(LogGradient).NumInputs(2).NumOutputs(1);
class GetLogGradient : public GradientMakerBase { class GetLogGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetLogGradient); GRADIENT_MAKER_CTOR(GetLogGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -110,22 +110,16 @@ void MatmulGradientOp<Context>::RunOnDevice() { ...@@ -110,22 +110,16 @@ void MatmulGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void MatmulGradientOp<Context>::ShareBeforeRun() { void MatmulGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void MatmulGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(MatmulGradient); DEPLOY_CPU(MatmulGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(MatmulGradient); DEPLOY_CUDA(MatmulGradient);
...@@ -133,7 +127,7 @@ DEPLOY_CUDA(MatmulGradient); ...@@ -133,7 +127,7 @@ DEPLOY_CUDA(MatmulGradient);
OPERATOR_SCHEMA(MatmulGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(MatmulGradient).NumInputs(3).NumOutputs(2);
class GetMatmulGradient final : public GradientMakerBase { class GetMatmulGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetMatmulGradient); GRADIENT_MAKER_CTOR(GetMatmulGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -179,22 +179,16 @@ void MulGradientOp<Context>::RunOnDevice() { ...@@ -179,22 +179,16 @@ void MulGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void MulGradientOp<Context>::ShareBeforeRun() { void MulGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void MulGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(MulGradient); DEPLOY_CPU(MulGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(MulGradient); DEPLOY_CUDA(MulGradient);
...@@ -202,7 +196,7 @@ DEPLOY_CUDA(MulGradient); ...@@ -202,7 +196,7 @@ DEPLOY_CUDA(MulGradient);
OPERATOR_SCHEMA(MulGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(MulGradient).NumInputs(3).NumOutputs(2);
class GetMulGradient : public GradientMakerBase { class GetMulGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetMulGradient); GRADIENT_MAKER_CTOR(GetMulGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -80,18 +80,6 @@ void PowGradientOp<Context>::RunOnDevice() { ...@@ -80,18 +80,6 @@ void PowGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void PowGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void PowGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(PowGradient); DEPLOY_CPU(PowGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(PowGradient); DEPLOY_CUDA(PowGradient);
...@@ -99,7 +87,7 @@ DEPLOY_CUDA(PowGradient); ...@@ -99,7 +87,7 @@ DEPLOY_CUDA(PowGradient);
OPERATOR_SCHEMA(PowGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(PowGradient).NumInputs(3).NumOutputs(1);
class GetPowGradient final : public GradientMakerBase { class GetPowGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetPowGradient); GRADIENT_MAKER_CTOR(GetPowGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -137,18 +137,6 @@ void ScaleGradientOp<Context>::RunOnDevice() { ...@@ -137,18 +137,6 @@ void ScaleGradientOp<Context>::RunOnDevice() {
} }
} }
template <class Context>
void ScaleGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ScaleGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ScaleGradient); DEPLOY_CPU(ScaleGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ScaleGradient); DEPLOY_CUDA(ScaleGradient);
...@@ -156,7 +144,7 @@ DEPLOY_CUDA(ScaleGradient); ...@@ -156,7 +144,7 @@ DEPLOY_CUDA(ScaleGradient);
OPERATOR_SCHEMA(ScaleGradient).NumInputs(3).NumOutputs(3); OPERATOR_SCHEMA(ScaleGradient).NumInputs(3).NumOutputs(3);
class GetScaleGradient final : public GradientMakerBase { class GetScaleGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetScaleGradient); GRADIENT_MAKER_CTOR(GetScaleGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -42,18 +42,6 @@ void SquareGradientOp<Context>::RunOnDevice() { ...@@ -42,18 +42,6 @@ void SquareGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void SquareGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void SquareGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(SquareGradient); DEPLOY_CPU(SquareGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SquareGradient); DEPLOY_CUDA(SquareGradient);
...@@ -61,7 +49,7 @@ DEPLOY_CUDA(SquareGradient); ...@@ -61,7 +49,7 @@ DEPLOY_CUDA(SquareGradient);
OPERATOR_SCHEMA(SquareGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(SquareGradient).NumInputs(2).NumOutputs(1);
class GetSquareGradient final : public GradientMakerBase { class GetSquareGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSquareGradient); GRADIENT_MAKER_CTOR(GetSquareGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -160,22 +160,16 @@ void SubGradientOp<Context>::RunOnDevice() { ...@@ -160,22 +160,16 @@ void SubGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void SubGradientOp<Context>::ShareBeforeRun() { void SubGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void SubGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(SubGradient); DEPLOY_CPU(SubGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SubGradient); DEPLOY_CUDA(SubGradient);
...@@ -183,7 +177,7 @@ DEPLOY_CUDA(SubGradient); ...@@ -183,7 +177,7 @@ DEPLOY_CUDA(SubGradient);
OPERATOR_SCHEMA(SubGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(SubGradient).NumInputs(3).NumOutputs(2);
class GetSubGradient : public GradientMakerBase { class GetSubGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSubGradient); GRADIENT_MAKER_CTOR(GetSubGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -57,18 +57,6 @@ void AtGradientOp<Context>::RunOnDevice() { ...@@ -57,18 +57,6 @@ void AtGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void AtGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void AtGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(AtGradient); DEPLOY_CPU(AtGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(AtGradient); DEPLOY_CUDA(AtGradient);
...@@ -76,7 +64,7 @@ DEPLOY_CUDA(AtGradient); ...@@ -76,7 +64,7 @@ DEPLOY_CUDA(AtGradient);
OPERATOR_SCHEMA(AtGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(AtGradient).NumInputs(3).NumOutputs(1);
class GetAtGradient final : public GradientMakerBase { class GetAtGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetAtGradient); GRADIENT_MAKER_CTOR(GetAtGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -101,22 +101,16 @@ void ConcatGradientOp<Context>::RunOnDevice() { ...@@ -101,22 +101,16 @@ void ConcatGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void ConcatGradientOp<Context>::ShareBeforeRun() { void ConcatGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") { if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(i)->Replace(*dX); output(i)->Replace(*dX);
break; break;
} }
} }
} }
template <class Context>
void ConcatGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ConcatGradient); DEPLOY_CPU(ConcatGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ConcatGradient); DEPLOY_CUDA(ConcatGradient);
...@@ -124,7 +118,7 @@ DEPLOY_CUDA(ConcatGradient); ...@@ -124,7 +118,7 @@ DEPLOY_CUDA(ConcatGradient);
OPERATOR_SCHEMA(ConcatGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX); OPERATOR_SCHEMA(ConcatGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX);
class GetConcatGradient final : public GradientMakerBase { class GetConcatGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetConcatGradient); GRADIENT_MAKER_CTOR(GetConcatGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs; vector<string> inputs, outputs;
......
...@@ -139,18 +139,6 @@ void CropGradientOp<Context>::RunOnDevice() { ...@@ -139,18 +139,6 @@ void CropGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void CropGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void CropGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(CropGradient); DEPLOY_CPU(CropGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(CropGradient); DEPLOY_CUDA(CropGradient);
...@@ -158,7 +146,7 @@ DEPLOY_CUDA(CropGradient); ...@@ -158,7 +146,7 @@ DEPLOY_CUDA(CropGradient);
OPERATOR_SCHEMA(CropGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(CropGradient).NumInputs(2).NumOutputs(1);
class GetCropGradient final : public GradientMakerBase { class GetCropGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetCropGradient); GRADIENT_MAKER_CTOR(GetCropGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -30,7 +30,7 @@ DEPLOY_CUDA(ExpandDimsGradient); ...@@ -30,7 +30,7 @@ DEPLOY_CUDA(ExpandDimsGradient);
OPERATOR_SCHEMA(ExpandDimsGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(ExpandDimsGradient).NumInputs(2).NumOutputs(1);
class GetExpandDimsGradient final : public GradientMakerBase { class GetExpandDimsGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetExpandDimsGradient); GRADIENT_MAKER_CTOR(GetExpandDimsGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -39,7 +39,7 @@ DEPLOY_CUDA(FlattenGradient); ...@@ -39,7 +39,7 @@ DEPLOY_CUDA(FlattenGradient);
OPERATOR_SCHEMA(FlattenGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(FlattenGradient).NumInputs(2).NumOutputs(1);
class GetFlattenGradient final : public GradientMakerBase { class GetFlattenGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetFlattenGradient); GRADIENT_MAKER_CTOR(GetFlattenGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -91,7 +91,7 @@ DEPLOY_CUDA(TemplateGradient); ...@@ -91,7 +91,7 @@ DEPLOY_CUDA(TemplateGradient);
OPERATOR_SCHEMA(TemplateGradient); OPERATOR_SCHEMA(TemplateGradient);
class GetTemplateGradient final : public GradientMakerBase { class GetTemplateGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetTemplateGradient); GRADIENT_MAKER_CTOR(GetTemplateGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs; vector<string> inputs, outputs;
...@@ -104,5 +104,3 @@ public: ...@@ -104,5 +104,3 @@ public:
REGISTER_GRADIENT(Template, GetTemplateGradient); REGISTER_GRADIENT(Template, GetTemplateGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -107,18 +107,6 @@ void ReduceGradientOp<Context>::RunOnDevice() { ...@@ -107,18 +107,6 @@ void ReduceGradientOp<Context>::RunOnDevice() {
} }
} }
template <class Context>
void ReduceGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void ReduceGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ReduceGradient); DEPLOY_CPU(ReduceGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ReduceGradient); DEPLOY_CUDA(ReduceGradient);
...@@ -126,7 +114,7 @@ DEPLOY_CUDA(ReduceGradient); ...@@ -126,7 +114,7 @@ DEPLOY_CUDA(ReduceGradient);
OPERATOR_SCHEMA(ReduceGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(ReduceGradient).NumInputs(2).NumOutputs(1);
class GetReduceGradient final : public GradientMakerBase { class GetReduceGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetReduceGradient); GRADIENT_MAKER_CTOR(GetReduceGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -67,7 +67,7 @@ DEPLOY_CUDA(ReshapeGradient); ...@@ -67,7 +67,7 @@ DEPLOY_CUDA(ReshapeGradient);
OPERATOR_SCHEMA(ReshapeGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(ReshapeGradient).NumInputs(2).NumOutputs(1);
class GetReshapeGradient final : public GradientMakerBase { class GetReshapeGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetReshapeGradient); GRADIENT_MAKER_CTOR(GetReshapeGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
...@@ -78,4 +78,3 @@ public: ...@@ -78,4 +78,3 @@ public:
REGISTER_GRADIENT(Reshape, GetReshapeGradient); REGISTER_GRADIENT(Reshape, GetReshapeGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -214,7 +214,7 @@ DEPLOY_CUDA(ScanGradient); ...@@ -214,7 +214,7 @@ DEPLOY_CUDA(ScanGradient);
OPERATOR_SCHEMA(ScanGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX); OPERATOR_SCHEMA(ScanGradient).NumInputs(2, INT_MAX).NumOutputs(1, INT_MAX);
class GetScanGradient final : public GradientMakerBase { class GetScanGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetScanGradient); GRADIENT_MAKER_CTOR(GetScanGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs, outputs; vector<string> inputs, outputs;
......
...@@ -81,7 +81,7 @@ DEPLOY_CUDA(SliceGradient); ...@@ -81,7 +81,7 @@ DEPLOY_CUDA(SliceGradient);
OPERATOR_SCHEMA(SliceGradient).NumInputs(2, INT_MAX).NumOutputs(1); OPERATOR_SCHEMA(SliceGradient).NumInputs(2, INT_MAX).NumOutputs(1);
class GetSliceGradient final : public GradientMakerBase { class GetSliceGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSliceGradient); GRADIENT_MAKER_CTOR(GetSliceGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs(1, I(0)); vector<string> inputs(1, I(0));
......
...@@ -114,18 +114,6 @@ void TileGradientOp<Context>::RunOnDevice() { ...@@ -114,18 +114,6 @@ void TileGradientOp<Context>::RunOnDevice() {
ws()->ReleaseBuffer(dest); ws()->ReleaseBuffer(dest);
} }
template <class Context>
void TileGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void TileGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(TileGradient); DEPLOY_CPU(TileGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(TileGradient); DEPLOY_CUDA(TileGradient);
...@@ -133,7 +121,7 @@ DEPLOY_CUDA(TileGradient); ...@@ -133,7 +121,7 @@ DEPLOY_CUDA(TileGradient);
OPERATOR_SCHEMA(TileGradient).NumInputs(1).NumOutputs(1); OPERATOR_SCHEMA(TileGradient).NumInputs(1).NumOutputs(1);
class GetTileGradient final : public GradientMakerBase { class GetTileGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetTileGradient); GRADIENT_MAKER_CTOR(GetTileGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -79,18 +79,6 @@ void TransposeGradientOp<Context>::RunOnDevice() { ...@@ -79,18 +79,6 @@ void TransposeGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void TransposeGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void TransposeGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(TransposeGradient); DEPLOY_CPU(TransposeGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(TransposeGradient); DEPLOY_CUDA(TransposeGradient);
...@@ -98,7 +86,7 @@ DEPLOY_CUDA(TransposeGradient); ...@@ -98,7 +86,7 @@ DEPLOY_CUDA(TransposeGradient);
OPERATOR_SCHEMA(TransposeGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(TransposeGradient).NumInputs(2).NumOutputs(1);
class GetTransposeGradient final : public GradientMakerBase{ class GetTransposeGradient final : public GradientMakerBase{
public: public:
GRADIENT_MAKER_CTOR(GetTransposeGradient); GRADIENT_MAKER_CTOR(GetTransposeGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -18,8 +18,8 @@ void L1LossOp<Context>::RunWithType() { ...@@ -18,8 +18,8 @@ void L1LossOp<Context>::RunWithType() {
auto* Wdata = input(2).template data<T, Context>(); auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data); math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
} }
T abs_val = math::ASum<T, Context>(diff->count(), diff_data); Ydata[0] = math::ASum<T, Context>(diff->count(), diff_data);
Ydata[0] = coeff * abs_val;
T normalizer; T normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0); if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count(); else if (normalization == "FULL") normalizer = input(0).count();
...@@ -46,9 +46,11 @@ OPERATOR_SCHEMA(L1Loss).NumInputs(2, 3).NumOutputs(1); ...@@ -46,9 +46,11 @@ OPERATOR_SCHEMA(L1Loss).NumInputs(2, 3).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void L1LossGradientOp<Context>::RunWithType() { void L1LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>(); auto* diff_data = diff->template mutable_data<T, Context>();
kernel::AbsGrad<T, Context>(diff->count(), dYdata, dYdata); auto* dYdata = input(-1).template data<T, CPUContext>();
T alpha = coeff, normalizer; kernel::AbsGrad<T, Context>(diff->count(), diff_data, diff_data);
T alpha = dYdata[0], normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0); if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count(); else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
...@@ -59,7 +61,7 @@ void L1LossGradientOp<Context>::RunWithType() { ...@@ -59,7 +61,7 @@ void L1LossGradientOp<Context>::RunWithType() {
auto* dXdata = output(i)->template mutable_data<T, Context>(); auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1; const T sign = (i == 0) ? 1 : -1;
alpha *= sign; alpha *= sign;
math::Axpby<T, Context>(output(i)->count(), alpha, dYdata, 0, dXdata); math::Axpby<T, Context>(output(i)->count(), alpha, diff_data, 0, dXdata);
} }
} }
...@@ -71,6 +73,17 @@ void L1LossGradientOp<Context>::RunOnDevice() { ...@@ -71,6 +73,17 @@ void L1LossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void L1LossGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(L1LossGradient); DEPLOY_CPU(L1LossGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(L1LossGradient); DEPLOY_CUDA(L1LossGradient);
...@@ -78,7 +91,7 @@ DEPLOY_CUDA(L1LossGradient); ...@@ -78,7 +91,7 @@ DEPLOY_CUDA(L1LossGradient);
OPERATOR_SCHEMA(L1LossGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(L1LossGradient).NumInputs(3).NumOutputs(2);
class GetL1LossGradient final : public GradientMakerBase { class GetL1LossGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetL1LossGradient); GRADIENT_MAKER_CTOR(GetL1LossGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -16,8 +16,8 @@ void L2LossOp<Context>::RunWithType() { ...@@ -16,8 +16,8 @@ void L2LossOp<Context>::RunWithType() {
auto* Wdata = input(2).template data<T, Context>(); auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data); math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
} }
T dot = math::Dot<T, Context>(diff->count(), diff_data, diff_data); Ydata[0] = T(0.5) * math::Dot<T, Context>(diff->count(), diff_data, diff_data);
Ydata[0] = T(0.5) * coeff * dot;
T normalizer; T normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0); if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count(); else if (normalization == "FULL") normalizer = input(0).count();
...@@ -44,8 +44,10 @@ OPERATOR_SCHEMA(L2Loss).NumInputs(2, 3).NumOutputs(1); ...@@ -44,8 +44,10 @@ OPERATOR_SCHEMA(L2Loss).NumInputs(2, 3).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void L2LossGradientOp<Context>::RunWithType() { void L2LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>(); auto* diff_data = diff->template mutable_data<T, Context>();
T alpha = coeff, normalizer; auto* dYdata = input(-1).template data<T, CPUContext>();
T alpha = dYdata[0], normalizer;
if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0); if (normalization == "BATCH_SIZE") normalizer = input(0).dim(0);
else if (normalization == "FULL") normalizer = input(0).count(); else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
...@@ -56,7 +58,7 @@ void L2LossGradientOp<Context>::RunWithType() { ...@@ -56,7 +58,7 @@ void L2LossGradientOp<Context>::RunWithType() {
auto* dXdata = output(i)->template mutable_data<T, Context>(); auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1; const T sign = (i == 0) ? 1 : -1;
alpha *= sign; alpha *= sign;
math::Axpby<T, Context>(output(i)->count(), alpha, dYdata, 0, dXdata); math::Axpby<T, Context>(output(i)->count(), alpha, diff_data, 0, dXdata);
} }
} }
...@@ -68,6 +70,17 @@ void L2LossGradientOp<Context>::RunOnDevice() { ...@@ -68,6 +70,17 @@ void L2LossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void L2LossGradientOp<Context>::ShareGradient() {
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer("Grad");
output(i)->Replace(*dX);
break;
}
}
}
DEPLOY_CPU(L2LossGradient); DEPLOY_CPU(L2LossGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(L2LossGradient); DEPLOY_CUDA(L2LossGradient);
...@@ -75,9 +88,9 @@ DEPLOY_CUDA(L2LossGradient); ...@@ -75,9 +88,9 @@ DEPLOY_CUDA(L2LossGradient);
OPERATOR_SCHEMA(L2LossGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(L2LossGradient).NumInputs(3).NumOutputs(2);
class GetL2LossGradient final : public GradientMakerBase { class GetL2LossGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetL2LossGradient); GRADIENT_MAKER_CTOR(GetL2LossGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)}, vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)}); vector<string> {GI(0), GI(1)});
...@@ -86,5 +99,3 @@ public: ...@@ -86,5 +99,3 @@ public:
REGISTER_GRADIENT(L2Loss, GetL2LossGradient); REGISTER_GRADIENT(L2Loss, GetL2LossGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
#include "operators/loss/sigmoid_cross_entropy_loss_op.h" #include "operators/loss/sigmoid_cross_entropy_op.h"
#include "core/workspace.h" #include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "utils/op_kernel.h" #include "utils/op_kernel.h"
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void SigmoidCrossEntropyLossOp<Context>::RunWithType() { void SigmoidCrossEntropyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
auto* Pdata = prob->template mutable_data<T, Context>(); auto* Pdata = prob->template mutable_data<T, Context>();
kernel::Sigmoid<T, Context>(prob->count(), Xdata, Pdata); kernel::Sigmoid<T, Context>(prob->count(), Xdata, Pdata);
...@@ -32,7 +32,7 @@ void SigmoidCrossEntropyLossOp<Context>::RunWithType() { ...@@ -32,7 +32,7 @@ void SigmoidCrossEntropyLossOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SigmoidCrossEntropyLossOp<Context>::RunOnDevice() { void SigmoidCrossEntropyOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count()) CHECK_EQ(input(0).count(), input(1).count())
<< "\nnumber of predictions must match the number of labels."; << "\nnumber of predictions must match the number of labels.";
prob = ws()->CreateTensor("_t_" + anchor() + "_sigmoid_prob"); prob = ws()->CreateTensor("_t_" + anchor() + "_sigmoid_prob");
...@@ -43,14 +43,14 @@ void SigmoidCrossEntropyLossOp<Context>::RunOnDevice() { ...@@ -43,14 +43,14 @@ void SigmoidCrossEntropyLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SigmoidCrossEntropyLoss); DEPLOY_CPU(SigmoidCrossEntropy);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SigmoidCrossEntropyLoss); DEPLOY_CUDA(SigmoidCrossEntropy);
#endif #endif
OPERATOR_SCHEMA(SigmoidCrossEntropyLoss).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(SigmoidCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void SigmoidCrossEntropyLossGradientOp<Context>::RunWithType() { void SigmoidCrossEntropyGradientOp<Context>::RunWithType() {
auto* Pdata = prob->template data<T, Context>(); auto* Pdata = prob->template data<T, Context>();
auto* Tdata = input(1).template data<T, Context>(); auto* Tdata = input(1).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
...@@ -72,7 +72,7 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunWithType() { ...@@ -72,7 +72,7 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SigmoidCrossEntropyLossGradientOp<Context>::RunOnDevice() { void SigmoidCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_sigmoid_prob"); prob = ws()->GetTensor("_t_" + anchor() + "_sigmoid_prob");
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
...@@ -80,21 +80,21 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunOnDevice() { ...@@ -80,21 +80,21 @@ void SigmoidCrossEntropyLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SigmoidCrossEntropyLossGradient); DEPLOY_CPU(SigmoidCrossEntropyGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SigmoidCrossEntropyLossGradient); DEPLOY_CUDA(SigmoidCrossEntropyGradient);
#endif #endif
OPERATOR_SCHEMA(SigmoidCrossEntropyLossGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(SigmoidCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSigmoidCrossEntropyLossGradient final : public GradientMakerBase { class GetSigmoidCrossEntropyGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSigmoidCrossEntropyLossGradient); GRADIENT_MAKER_CTOR(GetSigmoidCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)}, vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)}); vector<string> {GI(0)});
} }
}; };
REGISTER_GRADIENT(SigmoidCrossEntropyLoss, GetSigmoidCrossEntropyLossGradient); REGISTER_GRADIENT(SigmoidCrossEntropy, GetSigmoidCrossEntropyGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -52,17 +52,18 @@ OPERATOR_SCHEMA(SmoothL1Loss).NumInputs(2, 4).NumOutputs(1); ...@@ -52,17 +52,18 @@ OPERATOR_SCHEMA(SmoothL1Loss).NumInputs(2, 4).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void SmoothL1LossGradientOp<Context>::RunWithType() { void SmoothL1LossGradientOp<Context>::RunWithType() {
auto* dYdata = diff->template mutable_data<T, Context>(); auto* diff_data = diff->template mutable_data<T, Context>();
auto* dYdata = input(-1).template data<T, CPUContext>();
kernel::SmoothL1Grad<T, Context>(diff->count(), sigma2, dYdata, dYdata); kernel::SmoothL1Grad<T, Context>(diff->count(), sigma2, diff_data, diff_data);
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
if (output(i)->name() == "ignore") continue; if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i)); output(i)->ReshapeLike(input(i));
auto* dXdata = output(i)->template mutable_data<T, Context>(); auto* dXdata = output(i)->template mutable_data<T, Context>();
const T sign = (i == 0) ? 1 : -1; const T sign = (i == 0) ? 1 : -1;
const T coeff = sign / input(i).dim(0); const T coeff = sign / input(i).dim(0) * dYdata[0];
math::Axpby<T, Context>(output(i)->count(), coeff, dYdata, 0, dXdata); math::Axpby<T, Context>(output(i)->count(), coeff, diff_data, 0, dXdata);
if (InputSize() > 3) { if (InputSize() > 3) {
auto* inside_w_data = input(2).template data<T, Context>(); auto* inside_w_data = input(2).template data<T, Context>();
math::Mul<T, Context>(output(i)->count(), inside_w_data, dXdata, dXdata); math::Mul<T, Context>(output(i)->count(), inside_w_data, dXdata, dXdata);
...@@ -89,7 +90,7 @@ DEPLOY_CUDA(SmoothL1LossGradient); ...@@ -89,7 +90,7 @@ DEPLOY_CUDA(SmoothL1LossGradient);
OPERATOR_SCHEMA(SmoothL1LossGradient).NumInputs(3, 5).NumOutputs(2); OPERATOR_SCHEMA(SmoothL1LossGradient).NumInputs(3, 5).NumOutputs(2);
class GetSmoothL1LossGradient final : public GradientMakerBase { class GetSmoothL1LossGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSmoothL1LossGradient); GRADIENT_MAKER_CTOR(GetSmoothL1LossGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs; vector<string> inputs;
......
#include "operators/activation/softmax_op.h" #include "operators/activation/softmax_op.h"
#include "operators/loss/softmax_cross_entropy_loss_op.h" #include "operators/loss/softmax_cross_entropy_op.h"
#include "core/workspace.h" #include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "utils/op_kernel.h" #include "utils/op_kernel.h"
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void SoftmaxCrossEntropyLossOp<Context>::RunWithType() { void SoftmaxCrossEntropyOp<Context>::RunWithType() {
auto* Pdata = prob->template data<T, Context>(); auto* Pdata = prob->template data<T, Context>();
auto* Tdata = input(1).template data<T, Context>(); auto* Tdata = input(1).template data<T, Context>();
auto* Ldata = losses.template mutable_data<T, Context>(); auto* Ldata = losses.template mutable_data<T, Context>();
...@@ -36,7 +36,7 @@ void SoftmaxCrossEntropyLossOp<Context>::RunWithType() { ...@@ -36,7 +36,7 @@ void SoftmaxCrossEntropyLossOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SoftmaxCrossEntropyLossOp<Context>::RunOnDevice() { void SoftmaxCrossEntropyOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis); outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1); inner_dim = input(0).count(axis + 1);
CHECK_EQ(input(0).count(), input(1).count()) CHECK_EQ(input(0).count(), input(1).count())
...@@ -49,14 +49,14 @@ void SoftmaxCrossEntropyLossOp<Context>::RunOnDevice() { ...@@ -49,14 +49,14 @@ void SoftmaxCrossEntropyLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SoftmaxCrossEntropyLoss); DEPLOY_CPU(SoftmaxCrossEntropy);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxCrossEntropyLoss); DEPLOY_CUDA(SoftmaxCrossEntropy);
#endif #endif
OPERATOR_SCHEMA(SoftmaxCrossEntropyLoss).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(SoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() { void SoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
auto* Tdata = input(1).template data<T, Context>(); auto* Tdata = input(1).template data<T, Context>();
auto* Pdata = prob->template mutable_data<T, Context>(); auto* Pdata = prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
...@@ -75,7 +75,6 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() { ...@@ -75,7 +75,6 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() {
return; return;
} }
// normalize
T normalizer; T normalizer;
if (normalization == "BATCH_SIZE") normalizer = outer_dim; if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim; else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
...@@ -85,7 +84,7 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() { ...@@ -85,7 +84,7 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SoftmaxCrossEntropyLossGradientOp<Context>::RunOnDevice() { void SoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob"); prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
outer_dim = prob->count(0, axis); outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1); inner_dim = prob->count(axis + 1);
...@@ -95,21 +94,21 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunOnDevice() { ...@@ -95,21 +94,21 @@ void SoftmaxCrossEntropyLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SoftmaxCrossEntropyLossGradient); DEPLOY_CPU(SoftmaxCrossEntropyGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxCrossEntropyLossGradient); DEPLOY_CUDA(SoftmaxCrossEntropyGradient);
#endif #endif
OPERATOR_SCHEMA(SoftmaxCrossEntropyLossGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(SoftmaxCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSoftmaxCrossEntropyLossGradient final : public GradientMakerBase { class GetSoftmaxCrossEntropyGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSoftmaxCrossEntropyLossGradient); GRADIENT_MAKER_CTOR(GetSoftmaxCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)}, vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)}); vector<string> {GI(0)});
} }
}; };
REGISTER_GRADIENT(SoftmaxCrossEntropyLoss, GetSoftmaxCrossEntropyLossGradient); REGISTER_GRADIENT(SoftmaxCrossEntropy, GetSoftmaxCrossEntropyGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
#include "operators/activation/softmax_op.h" #include "operators/activation/softmax_op.h"
#include "operators/loss/softmax_loss_op.h" #include "operators/loss/sparse_softmax_cross_entropy_op.h"
#include "core/workspace.h" #include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "utils/op_kernel.h" #include "utils/op_kernel.h"
...@@ -8,13 +8,21 @@ ...@@ -8,13 +8,21 @@
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void SoftmaxLossOp<Context>::RunWithType() { void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
auto* prob_data = prob->template data<T, Context>(); auto* prob_data = prob->template data<T, Context>();
auto* label_data = input(1).template data<T, Context>(); auto* label_data = input(1).template data<T, Context>();
auto* loss_data = losses.template mutable_data<T, Context>(); auto* loss_data = losses.template mutable_data<T, Context>();
auto* valid_data = valid.template mutable_data<T, Context>(); auto* valid_data = valid.template mutable_data<T, Context>();
kernel::SparseSoftmaxCrossEntropy<T, Context>(input(0).count(), input(0).dim(axis),
outer_dim, inner_dim, prob_data, label_data, loss_data, valid_data, &ignore); kernel::SparseSoftmaxCrossEntropy<T, Context>(input(0).count(),
input(0).dim(axis),
outer_dim,
inner_dim,
prob_data,
label_data,
loss_data,
valid_data,
&ignore);
if (normalization == "UNIT") { if (normalization == "UNIT") {
output(0)->ReshapeLike(losses); output(0)->ReshapeLike(losses);
...@@ -35,7 +43,7 @@ void SoftmaxLossOp<Context>::RunWithType() { ...@@ -35,7 +43,7 @@ void SoftmaxLossOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SoftmaxLossOp<Context>::RunOnDevice() { void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis); outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1); inner_dim = input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, input(1).count()) CHECK_EQ(outer_dim * inner_dim, input(1).count())
...@@ -49,21 +57,29 @@ void SoftmaxLossOp<Context>::RunOnDevice() { ...@@ -49,21 +57,29 @@ void SoftmaxLossOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SoftmaxLoss); DEPLOY_CPU(SparseSoftmaxCrossEntropy);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxLoss); DEPLOY_CUDA(SparseSoftmaxCrossEntropy);
#endif #endif
OPERATOR_SCHEMA(SoftmaxLoss).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(SparseSoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void SoftmaxLossGradientOp<Context>::RunWithType() { void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
auto* label_data = input(1).template data<T, Context>(); auto* label_data = input(1).template data<T, Context>();
auto* prob_data = prob->template mutable_data<T, Context>(); auto* prob_data = prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* valid_data = valid.template mutable_data<T, Context>(); auto* valid_data = valid.template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(prob->count(), dXdata, prob_data); ctx().template Copy<T, Context, Context>(prob->count(), dXdata, prob_data);
kernel::SoftmaxLossGrad<T, Context>(output(0)->count(), output(0)->dim(axis),
outer_dim, inner_dim, label_data, prob_data, valid_data, &ignore, dXdata); kernel::SparseSoftmaxCrossEntropyGrad<T, Context>(output(0)->count(),
output(0)->dim(axis),
outer_dim,
inner_dim,
prob_data,
label_data,
valid_data,
&ignore,
dXdata);
if (normalization == "UNIT") { if (normalization == "UNIT") {
auto* dYdata = input(-1).template data<T, Context>(); auto* dYdata = input(-1).template data<T, Context>();
...@@ -83,7 +99,7 @@ void SoftmaxLossGradientOp<Context>::RunWithType() { ...@@ -83,7 +99,7 @@ void SoftmaxLossGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SoftmaxLossGradientOp<Context>::RunOnDevice() { void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob"); prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
outer_dim = prob->count(0, axis); outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1); inner_dim = prob->count(axis + 1);
...@@ -94,21 +110,21 @@ void SoftmaxLossGradientOp<Context>::RunOnDevice() { ...@@ -94,21 +110,21 @@ void SoftmaxLossGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
DEPLOY_CPU(SoftmaxLossGradient); DEPLOY_CPU(SparseSoftmaxCrossEntropyGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SoftmaxLossGradient); DEPLOY_CUDA(SparseSoftmaxCrossEntropyGradient);
#endif #endif
OPERATOR_SCHEMA(SoftmaxLossGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(SparseSoftmaxCrossEntropyGradient).NumInputs(3).NumOutputs(1);
class GetSoftmaxLossGradient final : public GradientMakerBase { class GetSparseSoftmaxCrossEntropyGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetSoftmaxLossGradient); GRADIENT_MAKER_CTOR(GetSparseSoftmaxCrossEntropyGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)}, vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)}); vector<string> {GI(0)});
} }
}; };
REGISTER_GRADIENT(SoftmaxLoss, GetSoftmaxLossGradient); REGISTER_GRADIENT(SparseSoftmaxCrossEntropy, GetSparseSoftmaxCrossEntropyGradient);
} // namespace dragon } // namespace dragon
\ No newline at end of file
#include "operators/activation/softmax_op.h"
#include "operators/loss/sparse_softmax_focal_loss_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
#include "utils/proto_utils.h"
namespace dragon {
template <class Context> template <typename T>
void SparseSoftmaxFocalLossOp<Context>::RunWithType() {
auto* prob_data = this->prob->template data<T, Context>();
auto* label_data = input(1).template data<T, Context>();
auto* loss_data = this->losses.template mutable_data<T, Context>();
auto* valid_data = this->valid.template mutable_data<T, Context>();
auto* scale_data = scale->template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLoss<T, Context>(input(0).count(),
input(0).dim(axis),
outer_dim,
inner_dim,
alpha,
gamma,
prob_data,
label_data,
scale_data,
loss_data,
valid_data,
&this->ignore);
if (normalization == "UNIT") {
if (use_pseudo_metric) {
math::MulScalar<T, Context>(this->losses.count(),
1.0 / alpha,
loss_data);
}
output(0)->ReshapeLike(this->losses);
output(0)->Share(this->losses);
return;
}
T normalizer;
if (normalization == "VALID")
normalizer = math::ASum<T, Context>(this->valid.count(), valid_data);
else if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1;
T loss = math::ASum<T, Context>(this->losses.count(), loss_data);
loss = use_pseudo_metric ? loss / alpha : loss;
output(0)->Reshape(vector<TIndex>(1, 1));
auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
Ydata[0] = loss / normalizer;
}
template <class Context>
void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis);
inner_dim = input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, input(1).count())
<< "\nnumber of predictions must match the number of labels.";
this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->softmax_op->Run();
this->prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
scale = ws()->CreateTensor("_t_" + anchor() + "_focal_scale");
scale->ReshapeLike(*this->prob);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SparseSoftmaxFocalLoss);
#ifdef WITH_CUDA
DEPLOY_CUDA(SparseSoftmaxFocalLoss);
#endif
OPERATOR_SCHEMA(SparseSoftmaxFocalLoss).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
auto* label_data = input(1).template data<T, Context>();
auto* prob_data = this->prob->template mutable_data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* valid_data = this->valid.template mutable_data<T, Context>();
auto* scale_data = scale->template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLossGrad<T, Context>(output(0)->count(),
output(0)->dim(axis),
outer_dim,
inner_dim,
gamma,
eps,
scale_data,
prob_data,
label_data,
valid_data,
&this->ignore,
dXdata);
if (normalization == "UNIT") {
auto* dYdata = input(-1).template data<T, Context>();
kernel::SumGrad<T, Context>(input(0).count() / input(0).dim(axis),
input(0).dim(axis),
inner_dim,
1.0,
dYdata,
prob_data);
math::Mul<T, Context>(output(0)->count(), prob_data, dXdata, dXdata);
return;
}
T normalizer;
if (normalization == "VALID") normalizer = math::ASum<T, Context>(this->valid.count(), valid_data);
else if (normalization == "BATCH_SIZE") normalizer = outer_dim;
else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1;
auto* dYdata = input(-1).template data<T, CPUContext>();
math::Scal<T, Context>(output(0)->count(), dYdata[0] / normalizer, dXdata);
}
template <class Context>
void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
this->prob = ws()->GetTensor("_t_" + anchor() + "_softmax_prob");
scale = ws()->GetTensor("_t_" + anchor() + "_focal_scale");
outer_dim = this->prob->count(0, axis);
inner_dim = this->prob->count(axis + 1);
output(0)->ReshapeLike(input(0));
this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(SparseSoftmaxFocalLossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SparseSoftmaxFocalLossGradient);
#endif
OPERATOR_SCHEMA(SparseSoftmaxFocalLossGradient).NumInputs(3).NumOutputs(1);
class GetSparseSoftmaxFocalLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetSparseSoftmaxFocalLossGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(SparseSoftmaxFocalLoss, GetSparseSoftmaxFocalLossGradient);
} // namespace dragon
\ No newline at end of file
...@@ -104,7 +104,7 @@ DEPLOY_CUDA(MPIBroadcastGradient); ...@@ -104,7 +104,7 @@ DEPLOY_CUDA(MPIBroadcastGradient);
OPERATOR_SCHEMA(MPIBroadcastGradient).NumInputs(1).NumOutputs(1); OPERATOR_SCHEMA(MPIBroadcastGradient).NumInputs(1).NumOutputs(1);
class GetMPIBroadcastGradient final : public GradientMakerBase { class GetMPIBroadcastGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetMPIBroadcastGradient); GRADIENT_MAKER_CTOR(GetMPIBroadcastGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -86,7 +86,7 @@ DEPLOY_CUDA(MPIGatherGradient); ...@@ -86,7 +86,7 @@ DEPLOY_CUDA(MPIGatherGradient);
OPERATOR_SCHEMA(MPIGatherGradient).NumInputs(2, INT_MAX).NumOutputs(1); OPERATOR_SCHEMA(MPIGatherGradient).NumInputs(2, INT_MAX).NumOutputs(1);
class GetMPIGatherGradient final : public GradientMakerBase { class GetMPIGatherGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetMPIGatherGradient); GRADIENT_MAKER_CTOR(GetMPIGatherGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs(1, I(0)); vector<string> inputs(1, I(0));
......
...@@ -63,7 +63,7 @@ void BatchNormOp<Context>::RunWithType() { ...@@ -63,7 +63,7 @@ void BatchNormOp<Context>::RunWithType() {
1.0, 1.0,
Ydata); Ydata);
if (!use_global_stats) { if (!use_global_stats && !is_recomputing) {
// Var(X) = E((X - EX) ^ 2) // Var(X) = E((X - EX) ^ 2)
math::Square<T, Context>(output(0)->count(), Ydata, Std_data); math::Square<T, Context>(output(0)->count(), Ydata, Std_data);
math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim, math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim,
...@@ -120,9 +120,12 @@ void BatchNormOp<Context>::RunOnDevice() { ...@@ -120,9 +120,12 @@ void BatchNormOp<Context>::RunOnDevice() {
if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false; if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = use_stats == 1 ? true : false; else use_global_stats = use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
// if true, Act/Exp/Pow/Norm Ops can not exist before when train // if true, Act/Exp/Pow/Norm Ops can not exist before when train
if (inplace) output(0)->Share(input(0)); if (inplace) output(0)->Share(input(0));
if (input(0).template IsType<float>()) RunWithType<float>(); if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>(); else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
...@@ -248,18 +251,6 @@ void BatchNormGradientOp<Context>::RunOnDevice() { ...@@ -248,18 +251,6 @@ void BatchNormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void BatchNormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BatchNormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BatchNormGradient); DEPLOY_CPU(BatchNormGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BatchNormGradient); DEPLOY_CUDA(BatchNormGradient);
...@@ -267,7 +258,7 @@ DEPLOY_CUDA(BatchNormGradient); ...@@ -267,7 +258,7 @@ DEPLOY_CUDA(BatchNormGradient);
OPERATOR_SCHEMA(BatchNormGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(BatchNormGradient).NumInputs(3).NumOutputs(1);
class GetBatchNormGradient final : public GradientMakerBase { class GetBatchNormGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetBatchNormGradient); GRADIENT_MAKER_CTOR(GetBatchNormGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -72,7 +72,7 @@ void BatchRenormOp<Context>::RunWithType() { ...@@ -72,7 +72,7 @@ void BatchRenormOp<Context>::RunWithType() {
1.0, 1.0,
Ydata); Ydata);
if (!use_global_stats) { if (!use_global_stats && !is_recomputing) {
// Var(X) = E((X - EX) ^ 2) // Var(X) = E((X - EX) ^ 2)
math::Pow<T, Context>(stddev->count(), 2, Ydata, Std_data); math::Pow<T, Context>(stddev->count(), 2, Ydata, Std_data);
math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim, math::Gemv<T, Context>(CblasNoTrans, nbychans, spatial_dim,
...@@ -97,7 +97,7 @@ void BatchRenormOp<Context>::RunWithType() { ...@@ -97,7 +97,7 @@ void BatchRenormOp<Context>::RunWithType() {
math::AddScalar<T, Context>(mean.count(), eps, tVar_data); math::AddScalar<T, Context>(mean.count(), eps, tVar_data);
math::Pow<T, Context>(mean.count(), 0.5, tVar_data, tVar_data); math::Pow<T, Context>(mean.count(), 0.5, tVar_data, tVar_data);
if (!use_global_stats) { if (!use_global_stats && !is_recomputing) {
// normalize history var // normalize history var
math::AddScalar<T, Context>(mean.count(), eps, thVar_data); math::AddScalar<T, Context>(mean.count(), eps, thVar_data);
math::Pow<T, Context>(mean.count(), 0.5, thVar_data, thVar_data); math::Pow<T, Context>(mean.count(), 0.5, thVar_data, thVar_data);
...@@ -183,6 +183,8 @@ void BatchRenormOp<Context>::RunOnDevice() { ...@@ -183,6 +183,8 @@ void BatchRenormOp<Context>::RunOnDevice() {
if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false; if (use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = use_stats == 1 ? true : false; else use_global_stats = use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
// if true, Act/Exp/Pow/Norm Ops can not exist before when train // if true, Act/Exp/Pow/Norm Ops can not exist before when train
if (inplace) output(0)->Share(input(0)); if (inplace) output(0)->Share(input(0));
...@@ -314,7 +316,7 @@ void BatchRenormGradientOp<Context>::RunWithType() { ...@@ -314,7 +316,7 @@ void BatchRenormGradientOp<Context>::RunWithType() {
// release buffer // release buffer
ws()->ReleaseBuffer(stddev); ws()->ReleaseBuffer(stddev);
ws()->ReleaseBuffer(x_norm, true); ws()->ReleaseBuffer(x_norm, "Common", true);
} }
template <class Context> template <class Context>
...@@ -336,18 +338,6 @@ void BatchRenormGradientOp<Context>::RunOnDevice() { ...@@ -336,18 +338,6 @@ void BatchRenormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void BatchRenormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void BatchRenormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BatchRenormGradient); DEPLOY_CPU(BatchRenormGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BatchRenormGradient); DEPLOY_CUDA(BatchRenormGradient);
...@@ -355,7 +345,7 @@ DEPLOY_CUDA(BatchRenormGradient); ...@@ -355,7 +345,7 @@ DEPLOY_CUDA(BatchRenormGradient);
OPERATOR_SCHEMA(BatchRenormGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(BatchRenormGradient).NumInputs(3).NumOutputs(1);
class GetBatchRenormGradient final : public GradientMakerBase { class GetBatchRenormGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetBatchRenormGradient); GRADIENT_MAKER_CTOR(GetBatchRenormGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -54,7 +54,7 @@ void CuDNNBNOp<Context>::SpatialRunWithType() { ...@@ -54,7 +54,7 @@ void CuDNNBNOp<Context>::SpatialRunWithType() {
bn_desc, bn_desc,
Sdata, Sdata,
Bdata, Bdata,
1.0 - this->momentum, is_recomputing ? 0.0 : 1.0 - this->momentum,
hMean_data, hMean_data,
hVar_data, hVar_data,
this->eps, this->eps,
...@@ -110,7 +110,7 @@ void CuDNNBNOp<Context>::PerActivationRunWithType() { ...@@ -110,7 +110,7 @@ void CuDNNBNOp<Context>::PerActivationRunWithType() {
bn_desc, bn_desc,
Sdata, Sdata,
Bdata, Bdata,
1.0 - this->momentum, is_recomputing ? 0.0 : 1.0 - this->momentum,
hMean_data, hMean_data,
hVar_data, hVar_data,
this->eps, this->eps,
...@@ -133,6 +133,8 @@ void CuDNNBNOp<Context>::RunOnDevice() { ...@@ -133,6 +133,8 @@ void CuDNNBNOp<Context>::RunOnDevice() {
if (this->use_stats == -1) use_global_stats = phase() == "TEST" ? true : false; if (this->use_stats == -1) use_global_stats = phase() == "TEST" ? true : false;
else use_global_stats = this->use_stats == 1 ? true : false; else use_global_stats = this->use_stats == 1 ? true : false;
is_recomputing = ws()->GetTensor("_t_global_recompute_flag")
->template data<bool, CPUContext>()[0];
if (input(0).template IsType<float>()) { if (input(0).template IsType<float>()) {
if (input(0).ndim() == 4) SpatialRunWithType<float>(); if (input(0).ndim() == 4) SpatialRunWithType<float>();
...@@ -344,28 +346,22 @@ void CuDNNBNGradientOp<Context>::RunOnDevice() { ...@@ -344,28 +346,22 @@ void CuDNNBNGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void BNGradientOp<Context>::ShareBeforeRun() { void BNGradientOp<Context>::ShareGradient() {
if (use_global_stats) { if (use_global_stats) {
if (output(0)->name() != "ignore") { if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(0)->Replace(*dX); output(0)->Replace(*dX);
} }
} else { } else {
if (output(0)->name() != "ignore" || if (output(0)->name() != "ignore" ||
output(1)->name() != "ignore" || output(1)->name() != "ignore" ||
output(2)->name() != "ignore") { output(2)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer(); Tensor* dX = ws()->GetBuffer("Grad");
if (dX != nullptr) output(0)->Replace(*dX); output(0)->Replace(*dX);
} }
} }
} }
template <class Context>
void BNGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(BNGradient); DEPLOY_CPU(BNGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BNGradient); DEPLOY_CUDA(BNGradient);
...@@ -374,7 +370,7 @@ OPERATOR_SCHEMA(BNGradient).NumInputs(5).NumOutputs(3); ...@@ -374,7 +370,7 @@ OPERATOR_SCHEMA(BNGradient).NumInputs(5).NumOutputs(3);
DEPLOY_CUDNN(BNGradient); DEPLOY_CUDNN(BNGradient);
class GetBNGradient final : public GradientMakerBase { class GetBNGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetBNGradient); GRADIENT_MAKER_CTOR(GetBNGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -155,18 +155,6 @@ void InstanceNormGradientOp<Context>::RunOnDevice() { ...@@ -155,18 +155,6 @@ void InstanceNormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void InstanceNormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void InstanceNormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(InstanceNormGradient); DEPLOY_CPU(InstanceNormGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(InstanceNormGradient); DEPLOY_CUDA(InstanceNormGradient);
...@@ -174,7 +162,7 @@ DEPLOY_CUDA(InstanceNormGradient); ...@@ -174,7 +162,7 @@ DEPLOY_CUDA(InstanceNormGradient);
OPERATOR_SCHEMA(InstanceNormGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(InstanceNormGradient).NumInputs(3).NumOutputs(1);
class GetInstanceNormGradient final : public GradientMakerBase { class GetInstanceNormGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetInstanceNormGradient); GRADIENT_MAKER_CTOR(GetInstanceNormGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -180,18 +180,6 @@ void L2NormGradientOp<Context>::RunOnDevice() { ...@@ -180,18 +180,6 @@ void L2NormGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void L2NormGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void L2NormGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(L2NormGradient); DEPLOY_CPU(L2NormGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(L2NormGradient); DEPLOY_CUDA(L2NormGradient);
...@@ -199,7 +187,7 @@ DEPLOY_CUDA(L2NormGradient); ...@@ -199,7 +187,7 @@ DEPLOY_CUDA(L2NormGradient);
OPERATOR_SCHEMA(L2NormGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(L2NormGradient).NumInputs(2).NumOutputs(1);
class GetL2NormGradient final : public GradientMakerBase { class GetL2NormGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetL2NormGradient); GRADIENT_MAKER_CTOR(GetL2NormGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -85,7 +85,7 @@ DEPLOY_CUDA(LSTMUnitGradient); ...@@ -85,7 +85,7 @@ DEPLOY_CUDA(LSTMUnitGradient);
OPERATOR_SCHEMA(LSTMUnitGradient).NumInputs(5).NumOutputs(2); OPERATOR_SCHEMA(LSTMUnitGradient).NumInputs(5).NumOutputs(2);
class GetLSTMUnitGradient final : public GradientMakerBase { class GetLSTMUnitGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetLSTMUnitGradient); GRADIENT_MAKER_CTOR(GetLSTMUnitGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
#include <algorithm> #include <algorithm>
#include "operators/common/utils_op.h" #include "operators/utils/accuracy_op.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "utils/op_kernel.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T>
void CopyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(output(0)->count(), Ydata, Xdata);
}
template <class Context>
void CopyOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(Copy);
#ifdef WITH_CUDA
DEPLOY_CUDA(Copy);
#endif
OPERATOR_SCHEMA(Copy).NumInputs(1).NumOutputs(1);
NO_GRADIENT(Copy);
template <class Context> template <typename T> template <class Context> template <typename T>
void AccuracyOp<Context>::RunWithType() { void AccuracyOp<Context>::RunWithType() {
if (OutputSize() > 1) { if (OutputSize() > 1) {
...@@ -94,35 +70,4 @@ OPERATOR_SCHEMA(Accuracy).NumInputs(2).NumOutputs(1, 2); ...@@ -94,35 +70,4 @@ OPERATOR_SCHEMA(Accuracy).NumInputs(2).NumOutputs(1, 2);
NO_GRADIENT(Accuracy); NO_GRADIENT(Accuracy);
template <class Context> template <typename T>
void OneHotOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(output(0)->count(),
dragon_cast<T, float>(float(off_value)),
Ydata);
kernel::OneHot<T, Context>(input(0).count(), depth, on_value, Xdata, Ydata);
}
template <class Context>
void OneHotOp<Context>::RunOnDevice() {
vector<TIndex> dims = input(0).dims();
dims.push_back(depth);
output(0)->Reshape(dims);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(OneHot);
#ifdef WITH_CUDA
DEPLOY_CUDA(OneHot);
#endif
OPERATOR_SCHEMA(OneHot).NumInputs(1).NumOutputs(1);
NO_GRADIENT(OneHot);
} // namespace dragon } // namespace dragon
\ No newline at end of file
#include "operators/utils/copy_op.h"
namespace dragon {
template <class Context> template <typename T>
void CopyOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
ctx().template Copy<T, Context, Context>(output(0)->count(), Ydata, Xdata);
}
template <class Context>
void CopyOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
else if (input(0).template IsType<float16>()) RunWithType<float16>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(Copy);
#ifdef WITH_CUDA
DEPLOY_CUDA(Copy);
#endif
OPERATOR_SCHEMA(Copy).NumInputs(1).NumOutputs(1);
NO_GRADIENT(Copy);
} // namespace dragon
\ No newline at end of file
#include "operators/utils/one_hot_op.h"
#include "utils/math_functions.h"
#include "utils/op_kernel.h"
namespace dragon {
template <class Context> template <typename T>
void OneHotOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(output(0)->count(),
dragon_cast<T, float>(float(off_value)),
Ydata);
kernel::OneHot<T, Context>(input(0).count(), depth, on_value, Xdata, Ydata);
}
template <class Context>
void OneHotOp<Context>::RunOnDevice() {
vector<TIndex> dims = input(0).dims();
dims.push_back(depth);
output(0)->Reshape(dims);
if (input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "unsupported input types.";
}
DEPLOY_CPU(OneHot);
#ifdef WITH_CUDA
DEPLOY_CUDA(OneHot);
#endif
OPERATOR_SCHEMA(OneHot).NumInputs(1).NumOutputs(1);
NO_GRADIENT(OneHot);
} // namespace dragon
\ No newline at end of file
...@@ -96,20 +96,6 @@ void ConvGradientOp<Context>::RunOnDevice() { ...@@ -96,20 +96,6 @@ void ConvGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void ConvGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void ConvGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(ConvGradient); DEPLOY_CPU(ConvGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ConvGradient); DEPLOY_CUDA(ConvGradient);
...@@ -117,7 +103,7 @@ DEPLOY_CUDA(ConvGradient); ...@@ -117,7 +103,7 @@ DEPLOY_CUDA(ConvGradient);
OPERATOR_SCHEMA(ConvGradient).NumInputs(3).NumOutputs(3); OPERATOR_SCHEMA(ConvGradient).NumInputs(3).NumOutputs(3);
class GetConvGradient final : public GradientMakerBase { class GetConvGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetConvGradient); GRADIENT_MAKER_CTOR(GetConvGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -97,20 +97,6 @@ void DeConvGradientOp<Context>::RunOnDevice() { ...@@ -97,20 +97,6 @@ void DeConvGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void DeConvGradientOp<Context>::ShareBeforeRun() {
if (output(0)->name() != "ignore") {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
}
template <class Context>
void DeConvGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(DeConvGradient); DEPLOY_CPU(DeConvGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DeConvGradient); DEPLOY_CUDA(DeConvGradient);
...@@ -118,7 +104,7 @@ DEPLOY_CUDA(DeConvGradient); ...@@ -118,7 +104,7 @@ DEPLOY_CUDA(DeConvGradient);
OPERATOR_SCHEMA(DeConvGradient).NumInputs(3).NumOutputs(3); OPERATOR_SCHEMA(DeConvGradient).NumInputs(3).NumOutputs(3);
class GetDeConvGradient final : public GradientMakerBase { class GetDeConvGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetDeConvGradient); GRADIENT_MAKER_CTOR(GetDeConvGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -4,12 +4,6 @@ ...@@ -4,12 +4,6 @@
namespace dragon { namespace dragon {
template <class Context>
void DenseConcatOp<Context>::RunOnDevice() {
ConcatOp<Context>::RunOnDevice();
input(0).Release(); // keep shape, just release mem
}
DEPLOY_CPU(DenseConcat); DEPLOY_CPU(DenseConcat);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DenseConcat); DEPLOY_CUDA(DenseConcat);
...@@ -17,16 +11,18 @@ DEPLOY_CUDA(DenseConcat); ...@@ -17,16 +11,18 @@ DEPLOY_CUDA(DenseConcat);
OPERATOR_SCHEMA(DenseConcat).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(DenseConcat).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void DenseConcatGradientOp<Context>::RunWithType() { void DenseConcatGradientOp<Context>::RestoreX1() {
// restore X1 from Y CHECK_GT(growth_rate, 0) << "invalid growth rate, please preset it.";
auto* Ydata = input(-2).template data<T, Context>();
auto* Xdata = input(0).template mutable_data<T, Context>();
this->x_concat_dim = input(0).dim(this->axis);
TIndex count = input(0).count();
this->concat_dims = input(-1).dims(); this->concat_dims = input(-1).dims();
this->y_concat_dim = this->concat_dims[this->axis]; this->y_concat_dim = this->concat_dims[this->axis];
this->outer_dim = input(-1).count(0, this->axis); this->outer_dim = input(-1).count(0, this->axis);
this->inner_dim = input(-1).count(this->axis + 1); this->inner_dim = input(-1).count(this->axis + 1);
this->concat_dims[this->axis] -= growth_rate;
input(0).Reshape(this->concat_dims);
this->x_concat_dim = input(0).dim(this->axis);
TIndex count = input(0).count();
auto* Ydata = input(-2).template data<T, Context>();
auto* Xdata = input(0).template mutable_data<T, Context>();
kernel::ConcatGrad<T, Context>(count, kernel::ConcatGrad<T, Context>(count,
this->outer_dim, this->outer_dim,
this->inner_dim, this->inner_dim,
...@@ -39,26 +35,65 @@ void DenseConcatGradientOp<Context>::RunWithType() { ...@@ -39,26 +35,65 @@ void DenseConcatGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void DenseConcatGradientOp<Context>::RunOnDevice() { void DenseConcatGradientOp<Context>::ElimateCorruption() {
if (input(0).template IsType<float>()) RunWithType<float>(); Set<string> all_heads;
else if (input(0).template IsType<float16>()) RunWithType<float16>(); queue<int> safe_heads;
else LOG(FATAL) << "unsupported input types."; Tensor* head = ws()->GetTensor("_t_mirrow_stage_head");
string* head_data = head->mutable_data<string, CPUContext>();
for (int i = 0; i < head->count(); i++) all_heads.insert(head_data[i]);
ConcatGradientOp<Context>::RunOnDevice(); // sub-graph run
} if (input(0).is_corrupted() && !all_heads.count(input(0).name())) {
// pre-process
LOG(DEBUG) << "Tensor(" << input(0).name() << ") is corrupted, recompute... ";
for (int i = 0; i < head->count(); i++) {
bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
}
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
input(0).Move(buffer->memory());
head_data[idx] = input(0).name();
if (input(-2).template IsType<float>()) RestoreX1<float>();
else if (input(-2).template IsType<float16>()) RestoreX1<float16>();
else LOG(FATAL) << "unsupported input types.";
// post-process
if (input(0).memory() != buffer->memory()) buffer->Move(input(0).memory());
}
template <class Context> // check available head
void DenseConcatGradientOp<Context>::ShareBeforeRun() { while (!safe_heads.empty()) safe_heads.pop();
Tensor* dX = ws()->GetBuffer(); all_heads.clear();
if (dX != nullptr) output(0)->Replace(*dX); for (int i = 0; i < head->count(); i++) {
} bool safe = true;
for (int j = 0; j < InputSize(); j++)
if (head_data[i] == input(j).name()) safe = false;
if (safe) safe_heads.push(i);
all_heads.insert(head_data[i]);
}
template <class Context> // pre-process
void DenseConcatGradientOp<Context>::ClearAfterRun() { for (int i = 0; i < OutputSize(); i++) {
Tensor* dY = &input(-1); if (output(i)->is_corrupted()) {
Tensor* Y = &input(-2); bool inplace_flag = false;
ws()->ReleaseBuffer(dY); for (int j = 0; j < InputSize(); j++)
ws()->ReleaseBuffer(Y, true); if (output(i)->name() == input(j).name()) inplace_flag = true;
if (inplace_flag || all_heads.count(output(i)->name())) continue; // skip to use new buffer
CHECK(!safe_heads.empty())
<< "\nat most (" << safe_heads.size() << " [safe] / "
<< all_heads.size() << " [total] can be used for corrupted output in "
<< "(" << name() << ", " << type() << "), "
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirrow stage ?";
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("_t_mirrow_stage_buffer_" + dragon_cast<string, int>(idx));
output(i)->Move(buffer->memory());
head_data[idx] = output(i)->name();
}
}
} }
DEPLOY_CPU(DenseConcatGradient); DEPLOY_CPU(DenseConcatGradient);
...@@ -68,7 +103,7 @@ DEPLOY_CUDA(DenseConcatGradient); ...@@ -68,7 +103,7 @@ DEPLOY_CUDA(DenseConcatGradient);
OPERATOR_SCHEMA(DenseConcatGradient).NumInputs(4).NumOutputs(2); OPERATOR_SCHEMA(DenseConcatGradient).NumInputs(4).NumOutputs(2);
class GetDenseConcatGradient : public GradientMakerBase { class GetDenseConcatGradient : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetDenseConcatGradient); GRADIENT_MAKER_CTOR(GetDenseConcatGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -247,7 +247,7 @@ DEPLOY_CUDA(LRNGradient); ...@@ -247,7 +247,7 @@ DEPLOY_CUDA(LRNGradient);
OPERATOR_SCHEMA(LRNGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(LRNGradient).NumInputs(3).NumOutputs(1);
class GetLRNGradient final : public GradientMakerBase { class GetLRNGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetLRNGradient); GRADIENT_MAKER_CTOR(GetLRNGradient);
vector<OperatorDef> MakeDefs() override{ vector<OperatorDef> MakeDefs() override{
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -54,18 +54,6 @@ void NNResizeGradientOp<Context>::RunOnDevice() { ...@@ -54,18 +54,6 @@ void NNResizeGradientOp<Context>::RunOnDevice() {
else LOG(FATAL) << "unsupported input types."; else LOG(FATAL) << "unsupported input types.";
} }
template <class Context>
void NNResizeGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void NNResizeGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(NNResizeGradient); DEPLOY_CPU(NNResizeGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(NNResizeGradient); DEPLOY_CUDA(NNResizeGradient);
...@@ -73,7 +61,7 @@ DEPLOY_CUDA(NNResizeGradient); ...@@ -73,7 +61,7 @@ DEPLOY_CUDA(NNResizeGradient);
OPERATOR_SCHEMA(NNResizeGradient).NumInputs(2).NumOutputs(1); OPERATOR_SCHEMA(NNResizeGradient).NumInputs(2).NumOutputs(1);
class GetNNResizeGradient final : public GradientMakerBase { class GetNNResizeGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetNNResizeGradient); GRADIENT_MAKER_CTOR(GetNNResizeGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -151,18 +151,6 @@ void PoolingGradientOp<Context>::RunOnDevice() { ...@@ -151,18 +151,6 @@ void PoolingGradientOp<Context>::RunOnDevice() {
} }
} }
template <class Context>
void PoolingGradientOp<Context>::ShareBeforeRun() {
Tensor* dX = ws()->GetBuffer();
if (dX != nullptr) output(0)->Replace(*dX);
}
template <class Context>
void PoolingGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
}
DEPLOY_CPU(PoolingGradient); DEPLOY_CPU(PoolingGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(PoolingGradient); DEPLOY_CUDA(PoolingGradient);
...@@ -170,7 +158,7 @@ DEPLOY_CUDA(PoolingGradient); ...@@ -170,7 +158,7 @@ DEPLOY_CUDA(PoolingGradient);
OPERATOR_SCHEMA(PoolingGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(PoolingGradient).NumInputs(3).NumOutputs(1);
class GetPoolingGradient final : public GradientMakerBase { class GetPoolingGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetPoolingGradient); GRADIENT_MAKER_CTOR(GetPoolingGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -57,17 +57,10 @@ void ROIAlignGradientOp<Context>::RunOnDevice() { ...@@ -57,17 +57,10 @@ void ROIAlignGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void ROIAlignGradientOp<Context>::ShareBeforeRun() { void ROIAlignGradientOp<Context>::CleanResource() {
Tensor* dX = ws()->GetBuffer(); Operator<Context>::CleanResource();
if (dX != nullptr) output(0)->Replace(*dX); ws()->ReleaseBuffer(mask_h, "Common", true);
} ws()->ReleaseBuffer(mask_w, "Common", true);
template <class Context>
void ROIAlignGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
ws()->ReleaseBuffer(mask_h);
ws()->ReleaseBuffer(mask_w);
} }
DEPLOY_CPU(ROIAlignGradient); DEPLOY_CPU(ROIAlignGradient);
...@@ -77,7 +70,7 @@ DEPLOY_CUDA(ROIAlignGradient); ...@@ -77,7 +70,7 @@ DEPLOY_CUDA(ROIAlignGradient);
OPERATOR_SCHEMA(ROIAlignGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(ROIAlignGradient).NumInputs(3).NumOutputs(1);
class GetROIAlignGradient final : public GradientMakerBase { class GetROIAlignGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetROIAlignGradient); GRADIENT_MAKER_CTOR(GetROIAlignGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -55,16 +55,9 @@ void ROIPoolingGradientOp<Context>::RunOnDevice() { ...@@ -55,16 +55,9 @@ void ROIPoolingGradientOp<Context>::RunOnDevice() {
} }
template <class Context> template <class Context>
void ROIPoolingGradientOp<Context>::ShareBeforeRun() { void ROIPoolingGradientOp<Context>::CleanResource() {
Tensor* dX = ws()->GetBuffer(); Operator<Context>::CleanResource();
if (dX != nullptr) output(0)->Replace(*dX); ws()->ReleaseBuffer(mask, "Common", true);
}
template <class Context>
void ROIPoolingGradientOp<Context>::ClearAfterRun() {
Tensor* dY = &input(-1);
ws()->ReleaseBuffer(dY);
ws()->ReleaseBuffer(mask);
} }
DEPLOY_CPU(ROIPoolingGradient); DEPLOY_CPU(ROIPoolingGradient);
...@@ -74,7 +67,7 @@ DEPLOY_CUDA(ROIPoolingGradient); ...@@ -74,7 +67,7 @@ DEPLOY_CUDA(ROIPoolingGradient);
OPERATOR_SCHEMA(ROIPoolingGradient).NumInputs(3).NumOutputs(1); OPERATOR_SCHEMA(ROIPoolingGradient).NumInputs(3).NumOutputs(1);
class GetROIPoolingGradient final : public GradientMakerBase { class GetROIPoolingGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetROIPoolingGradient); GRADIENT_MAKER_CTOR(GetROIPoolingGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
......
...@@ -22,5 +22,3 @@ message LayerParameter { ...@@ -22,5 +22,3 @@ message LayerParameter {
optional string name = 1; // the layer name optional string name = 1; // the layer name
repeated BlobProto blobs = 7; repeated BlobProto blobs = 7;
} }
...@@ -137,7 +137,7 @@ void protobuf_AssignDesc_dragon_2eproto() { ...@@ -137,7 +137,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
::google::protobuf::MessageFactory::generated_factory(), ::google::protobuf::MessageFactory::generated_factory(),
sizeof(DeviceOption)); sizeof(DeviceOption));
OperatorDef_descriptor_ = file->message_type(4); OperatorDef_descriptor_ = file->message_type(4);
static const int OperatorDef_offsets_[7] = { static const int OperatorDef_offsets_[8] = {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, input_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, input_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, output_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, output_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, name_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, name_),
...@@ -145,6 +145,7 @@ void protobuf_AssignDesc_dragon_2eproto() { ...@@ -145,6 +145,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, arg_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, arg_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, device_option_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, device_option_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, debug_mode_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, debug_mode_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(OperatorDef, share_grads_),
}; };
OperatorDef_reflection_ = OperatorDef_reflection_ =
new ::google::protobuf::internal::GeneratedMessageReflection( new ::google::protobuf::internal::GeneratedMessageReflection(
...@@ -217,7 +218,7 @@ void protobuf_AssignDesc_dragon_2eproto() { ...@@ -217,7 +218,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
sizeof(TensorFiller)); sizeof(TensorFiller));
TensorFiller_VarianceNorm_descriptor_ = TensorFiller_descriptor_->enum_type(0); TensorFiller_VarianceNorm_descriptor_ = TensorFiller_descriptor_->enum_type(0);
GraphDef_descriptor_ = file->message_type(8); GraphDef_descriptor_ = file->message_type(8);
static const int GraphDef_offsets_[9] = { static const int GraphDef_offsets_[10] = {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, name_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, name_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, op_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, op_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, graph_type_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, graph_type_),
...@@ -227,6 +228,7 @@ void protobuf_AssignDesc_dragon_2eproto() { ...@@ -227,6 +228,7 @@ void protobuf_AssignDesc_dragon_2eproto() {
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, g_target_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, g_target_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, u_target_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, u_target_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, debug_mode_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, debug_mode_),
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(GraphDef, share_grads_),
}; };
GraphDef_reflection_ = GraphDef_reflection_ =
new ::google::protobuf::internal::GeneratedMessageReflection( new ::google::protobuf::internal::GeneratedMessageReflection(
...@@ -316,29 +318,31 @@ void protobuf_AddDesc_dragon_2eproto() { ...@@ -316,29 +318,31 @@ void protobuf_AddDesc_dragon_2eproto() {
"oats\030\005 \003(\002\022\014\n\004ints\030\006 \003(\005\022\017\n\007strings\030\007 \003(" "oats\030\005 \003(\002\022\014\n\004ints\030\006 \003(\005\022\017\n\007strings\030\007 \003("
"\t\"p\n\014DeviceOption\022%\n\013device_type\030\001 \001(\0162\013" "\t\"p\n\014DeviceOption\022%\n\013device_type\030\001 \001(\0162\013"
".DeviceType:\003CPU\022\021\n\006gpu_id\030\002 \001(\005:\0010\022\026\n\013r" ".DeviceType:\003CPU\022\021\n\006gpu_id\030\002 \001(\005:\0010\022\026\n\013r"
"andom_seed\030\003 \001(\r:\0013\022\016\n\006engine\030\004 \001(\t\"\241\001\n\013" "andom_seed\030\003 \001(\r:\0013\022\016\n\006engine\030\004 \001(\t\"\275\001\n\013"
"OperatorDef\022\r\n\005input\030\001 \003(\t\022\016\n\006output\030\002 \003" "OperatorDef\022\r\n\005input\030\001 \003(\t\022\016\n\006output\030\002 \003"
"(\t\022\014\n\004name\030\003 \001(\t\022\014\n\004type\030\004 \001(\t\022\026\n\003arg\030\005 " "(\t\022\014\n\004name\030\003 \001(\t\022\014\n\004type\030\004 \001(\t\022\026\n\003arg\030\005 "
"\003(\0132\t.Argument\022$\n\rdevice_option\030\006 \001(\0132\r." "\003(\0132\t.Argument\022$\n\rdevice_option\030\006 \001(\0132\r."
"DeviceOption\022\031\n\ndebug_mode\030\007 \001(\010:\005false\"" "DeviceOption\022\031\n\ndebug_mode\030\007 \001(\010:\005false\022"
"=\n\016GradientTarget\022\014\n\004cost\030\001 \001(\t\022\013\n\003wrt\030\002" "\032\n\013share_grads\030\010 \001(\010:\005false\"=\n\016GradientT"
" \001(\t\022\020\n\010external\030\003 \001(\t\"R\n\014UpdateTarget\022\014" "arget\022\014\n\004cost\030\001 \001(\t\022\013\n\003wrt\030\002 \001(\t\022\020\n\010exte"
"\n\004name\030\001 \001(\t\022\014\n\004type\030\002 \001(\t\022\016\n\006tensor\030\003 \003" "rnal\030\003 \001(\t\"R\n\014UpdateTarget\022\014\n\004name\030\001 \001(\t"
"(\t\022\026\n\003arg\030\004 \003(\0132\t.Argument\"\215\002\n\014TensorFil" "\022\014\n\004type\030\002 \001(\t\022\016\n\006tensor\030\003 \003(\t\022\026\n\003arg\030\004 "
"ler\022\016\n\006tensor\030\001 \001(\t\022\026\n\004type\030\002 \001(\t:\010const" "\003(\0132\t.Argument\"\215\002\n\014TensorFiller\022\016\n\006tenso"
"ant\022\020\n\005value\030\003 \001(\002:\0010\022\016\n\003low\030\004 \001(\002:\0010\022\017\n" "r\030\001 \001(\t\022\026\n\004type\030\002 \001(\t:\010constant\022\020\n\005value"
"\004high\030\005 \001(\002:\0011\022\017\n\004mean\030\006 \001(\002:\0010\022\016\n\003std\030\007" "\030\003 \001(\002:\0010\022\016\n\003low\030\004 \001(\002:\0010\022\017\n\004high\030\005 \001(\002:"
" \001(\002:\0011\022\020\n\005scale\030\010 \001(\002:\0013\0229\n\rvariance_no" "\0011\022\017\n\004mean\030\006 \001(\002:\0010\022\016\n\003std\030\007 \001(\002:\0011\022\020\n\005s"
"rm\030\t \001(\0162\032.TensorFiller.VarianceNorm:\006FA" "cale\030\010 \001(\002:\0013\0229\n\rvariance_norm\030\t \001(\0162\032.T"
"N_IN\"4\n\014VarianceNorm\022\n\n\006FAN_IN\020\000\022\013\n\007FAN_" "ensorFiller.VarianceNorm:\006FAN_IN\"4\n\014Vari"
"OUT\020\001\022\013\n\007FAN_AVG\020\002\"\363\001\n\010GraphDef\022\014\n\004name\030" "anceNorm\022\n\n\006FAN_IN\020\000\022\013\n\007FAN_OUT\020\001\022\013\n\007FAN"
"\001 \001(\t\022\030\n\002op\030\002 \003(\0132\014.OperatorDef\022\022\n\ngraph" "_AVG\020\002\"\217\002\n\010GraphDef\022\014\n\004name\030\001 \001(\t\022\030\n\002op\030"
"_type\030\003 \001(\t\022$\n\rdevice_option\030\005 \001(\0132\r.Dev" "\002 \003(\0132\014.OperatorDef\022\022\n\ngraph_type\030\003 \001(\t\022"
"iceOption\022\026\n\003arg\030\006 \003(\0132\t.Argument\022\016\n\006tar" "$\n\rdevice_option\030\005 \001(\0132\r.DeviceOption\022\026\n"
"get\030\007 \003(\t\022!\n\010g_target\030\010 \003(\0132\017.GradientTa" "\003arg\030\006 \003(\0132\t.Argument\022\016\n\006target\030\007 \003(\t\022!\n"
"rget\022\037\n\010u_target\030\t \003(\0132\r.UpdateTarget\022\031\n" "\010g_target\030\010 \003(\0132\017.GradientTarget\022\037\n\010u_ta"
"\ndebug_mode\030\n \001(\010:\005false*+\n\nDeviceType\022\007" "rget\030\t \003(\0132\r.UpdateTarget\022\031\n\ndebug_mode\030"
"\n\003CPU\020\000\022\010\n\004CUDA\020\001\022\n\n\006OPENCL\020\002", 1429); "\n \001(\010:\005false\022\032\n\013share_grads\030\013 \001(\010:\005false"
"*+\n\nDeviceType\022\007\n\003CPU\020\000\022\010\n\004CUDA\020\001\022\n\n\006OPE"
"NCL\020\002", 1485);
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile( ::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
"dragon.proto", &protobuf_RegisterTypes); "dragon.proto", &protobuf_RegisterTypes);
TensorProto::default_instance_ = new TensorProto(); TensorProto::default_instance_ = new TensorProto();
...@@ -2171,6 +2175,7 @@ const int OperatorDef::kTypeFieldNumber; ...@@ -2171,6 +2175,7 @@ const int OperatorDef::kTypeFieldNumber;
const int OperatorDef::kArgFieldNumber; const int OperatorDef::kArgFieldNumber;
const int OperatorDef::kDeviceOptionFieldNumber; const int OperatorDef::kDeviceOptionFieldNumber;
const int OperatorDef::kDebugModeFieldNumber; const int OperatorDef::kDebugModeFieldNumber;
const int OperatorDef::kShareGradsFieldNumber;
#endif // !_MSC_VER #endif // !_MSC_VER
OperatorDef::OperatorDef() OperatorDef::OperatorDef()
...@@ -2197,6 +2202,7 @@ void OperatorDef::SharedCtor() { ...@@ -2197,6 +2202,7 @@ void OperatorDef::SharedCtor() {
type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
device_option_ = NULL; device_option_ = NULL;
debug_mode_ = false; debug_mode_ = false;
share_grads_ = false;
::memset(_has_bits_, 0, sizeof(_has_bits_)); ::memset(_has_bits_, 0, sizeof(_has_bits_));
} }
...@@ -2239,7 +2245,18 @@ OperatorDef* OperatorDef::New() const { ...@@ -2239,7 +2245,18 @@ OperatorDef* OperatorDef::New() const {
} }
void OperatorDef::Clear() { void OperatorDef::Clear() {
if (_has_bits_[0 / 32] & 108) { #define OFFSET_OF_FIELD_(f) (reinterpret_cast<char*>( \
&reinterpret_cast<OperatorDef*>(16)->f) - \
reinterpret_cast<char*>(16))
#define ZR_(first, last) do { \
size_t f = OFFSET_OF_FIELD_(first); \
size_t n = OFFSET_OF_FIELD_(last) - f + sizeof(last); \
::memset(&first, 0, n); \
} while (0)
if (_has_bits_[0 / 32] & 236) {
ZR_(debug_mode_, share_grads_);
if (has_name()) { if (has_name()) {
if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) { if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) {
name_->clear(); name_->clear();
...@@ -2253,8 +2270,11 @@ void OperatorDef::Clear() { ...@@ -2253,8 +2270,11 @@ void OperatorDef::Clear() {
if (has_device_option()) { if (has_device_option()) {
if (device_option_ != NULL) device_option_->::DeviceOption::Clear(); if (device_option_ != NULL) device_option_->::DeviceOption::Clear();
} }
debug_mode_ = false;
} }
#undef OFFSET_OF_FIELD_
#undef ZR_
input_.Clear(); input_.Clear();
output_.Clear(); output_.Clear();
arg_.Clear(); arg_.Clear();
...@@ -2382,6 +2402,21 @@ bool OperatorDef::MergePartialFromCodedStream( ...@@ -2382,6 +2402,21 @@ bool OperatorDef::MergePartialFromCodedStream(
} else { } else {
goto handle_unusual; goto handle_unusual;
} }
if (input->ExpectTag(64)) goto parse_share_grads;
break;
}
// optional bool share_grads = 8 [default = false];
case 8: {
if (tag == 64) {
parse_share_grads:
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
input, &share_grads_)));
set_has_share_grads();
} else {
goto handle_unusual;
}
if (input->ExpectAtEnd()) goto success; if (input->ExpectAtEnd()) goto success;
break; break;
} }
...@@ -2468,6 +2503,11 @@ void OperatorDef::SerializeWithCachedSizes( ...@@ -2468,6 +2503,11 @@ void OperatorDef::SerializeWithCachedSizes(
::google::protobuf::internal::WireFormatLite::WriteBool(7, this->debug_mode(), output); ::google::protobuf::internal::WireFormatLite::WriteBool(7, this->debug_mode(), output);
} }
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
::google::protobuf::internal::WireFormatLite::WriteBool(8, this->share_grads(), output);
}
if (!unknown_fields().empty()) { if (!unknown_fields().empty()) {
::google::protobuf::internal::WireFormat::SerializeUnknownFields( ::google::protobuf::internal::WireFormat::SerializeUnknownFields(
unknown_fields(), output); unknown_fields(), output);
...@@ -2539,6 +2579,11 @@ void OperatorDef::SerializeWithCachedSizes( ...@@ -2539,6 +2579,11 @@ void OperatorDef::SerializeWithCachedSizes(
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(7, this->debug_mode(), target); target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(7, this->debug_mode(), target);
} }
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(8, this->share_grads(), target);
}
if (!unknown_fields().empty()) { if (!unknown_fields().empty()) {
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
unknown_fields(), target); unknown_fields(), target);
...@@ -2577,6 +2622,11 @@ int OperatorDef::ByteSize() const { ...@@ -2577,6 +2622,11 @@ int OperatorDef::ByteSize() const {
total_size += 1 + 1; total_size += 1 + 1;
} }
// optional bool share_grads = 8 [default = false];
if (has_share_grads()) {
total_size += 1 + 1;
}
} }
// repeated string input = 1; // repeated string input = 1;
total_size += 1 * this->input_size(); total_size += 1 * this->input_size();
...@@ -2641,6 +2691,9 @@ void OperatorDef::MergeFrom(const OperatorDef& from) { ...@@ -2641,6 +2691,9 @@ void OperatorDef::MergeFrom(const OperatorDef& from) {
if (from.has_debug_mode()) { if (from.has_debug_mode()) {
set_debug_mode(from.debug_mode()); set_debug_mode(from.debug_mode());
} }
if (from.has_share_grads()) {
set_share_grads(from.share_grads());
}
} }
mutable_unknown_fields()->MergeFrom(from.unknown_fields()); mutable_unknown_fields()->MergeFrom(from.unknown_fields());
} }
...@@ -2671,6 +2724,7 @@ void OperatorDef::Swap(OperatorDef* other) { ...@@ -2671,6 +2724,7 @@ void OperatorDef::Swap(OperatorDef* other) {
arg_.Swap(&other->arg_); arg_.Swap(&other->arg_);
std::swap(device_option_, other->device_option_); std::swap(device_option_, other->device_option_);
std::swap(debug_mode_, other->debug_mode_); std::swap(debug_mode_, other->debug_mode_);
std::swap(share_grads_, other->share_grads_);
std::swap(_has_bits_[0], other->_has_bits_[0]); std::swap(_has_bits_[0], other->_has_bits_[0]);
_unknown_fields_.Swap(&other->_unknown_fields_); _unknown_fields_.Swap(&other->_unknown_fields_);
std::swap(_cached_size_, other->_cached_size_); std::swap(_cached_size_, other->_cached_size_);
...@@ -4066,6 +4120,7 @@ const int GraphDef::kTargetFieldNumber; ...@@ -4066,6 +4120,7 @@ const int GraphDef::kTargetFieldNumber;
const int GraphDef::kGTargetFieldNumber; const int GraphDef::kGTargetFieldNumber;
const int GraphDef::kUTargetFieldNumber; const int GraphDef::kUTargetFieldNumber;
const int GraphDef::kDebugModeFieldNumber; const int GraphDef::kDebugModeFieldNumber;
const int GraphDef::kShareGradsFieldNumber;
#endif // !_MSC_VER #endif // !_MSC_VER
GraphDef::GraphDef() GraphDef::GraphDef()
...@@ -4092,6 +4147,7 @@ void GraphDef::SharedCtor() { ...@@ -4092,6 +4147,7 @@ void GraphDef::SharedCtor() {
graph_type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); graph_type_ = const_cast< ::std::string*>(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
device_option_ = NULL; device_option_ = NULL;
debug_mode_ = false; debug_mode_ = false;
share_grads_ = false;
::memset(_has_bits_, 0, sizeof(_has_bits_)); ::memset(_has_bits_, 0, sizeof(_has_bits_));
} }
...@@ -4134,6 +4190,16 @@ GraphDef* GraphDef::New() const { ...@@ -4134,6 +4190,16 @@ GraphDef* GraphDef::New() const {
} }
void GraphDef::Clear() { void GraphDef::Clear() {
#define OFFSET_OF_FIELD_(f) (reinterpret_cast<char*>( \
&reinterpret_cast<GraphDef*>(16)->f) - \
reinterpret_cast<char*>(16))
#define ZR_(first, last) do { \
size_t f = OFFSET_OF_FIELD_(first); \
size_t n = OFFSET_OF_FIELD_(last) - f + sizeof(last); \
::memset(&first, 0, n); \
} while (0)
if (_has_bits_[0 / 32] & 13) { if (_has_bits_[0 / 32] & 13) {
if (has_name()) { if (has_name()) {
if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) { if (name_ != &::google::protobuf::internal::GetEmptyStringAlreadyInited()) {
...@@ -4149,7 +4215,11 @@ void GraphDef::Clear() { ...@@ -4149,7 +4215,11 @@ void GraphDef::Clear() {
if (device_option_ != NULL) device_option_->::DeviceOption::Clear(); if (device_option_ != NULL) device_option_->::DeviceOption::Clear();
} }
} }
debug_mode_ = false; ZR_(debug_mode_, share_grads_);
#undef OFFSET_OF_FIELD_
#undef ZR_
op_.Clear(); op_.Clear();
arg_.Clear(); arg_.Clear();
target_.Clear(); target_.Clear();
...@@ -4301,6 +4371,21 @@ bool GraphDef::MergePartialFromCodedStream( ...@@ -4301,6 +4371,21 @@ bool GraphDef::MergePartialFromCodedStream(
} else { } else {
goto handle_unusual; goto handle_unusual;
} }
if (input->ExpectTag(88)) goto parse_share_grads;
break;
}
// optional bool share_grads = 11 [default = false];
case 11: {
if (tag == 88) {
parse_share_grads:
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
input, &share_grads_)));
set_has_share_grads();
} else {
goto handle_unusual;
}
if (input->ExpectAtEnd()) goto success; if (input->ExpectAtEnd()) goto success;
break; break;
} }
...@@ -4395,6 +4480,11 @@ void GraphDef::SerializeWithCachedSizes( ...@@ -4395,6 +4480,11 @@ void GraphDef::SerializeWithCachedSizes(
::google::protobuf::internal::WireFormatLite::WriteBool(10, this->debug_mode(), output); ::google::protobuf::internal::WireFormatLite::WriteBool(10, this->debug_mode(), output);
} }
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
::google::protobuf::internal::WireFormatLite::WriteBool(11, this->share_grads(), output);
}
if (!unknown_fields().empty()) { if (!unknown_fields().empty()) {
::google::protobuf::internal::WireFormat::SerializeUnknownFields( ::google::protobuf::internal::WireFormat::SerializeUnknownFields(
unknown_fields(), output); unknown_fields(), output);
...@@ -4477,6 +4567,11 @@ void GraphDef::SerializeWithCachedSizes( ...@@ -4477,6 +4567,11 @@ void GraphDef::SerializeWithCachedSizes(
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(10, this->debug_mode(), target); target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(10, this->debug_mode(), target);
} }
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(11, this->share_grads(), target);
}
if (!unknown_fields().empty()) { if (!unknown_fields().empty()) {
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
unknown_fields(), target); unknown_fields(), target);
...@@ -4517,6 +4612,11 @@ int GraphDef::ByteSize() const { ...@@ -4517,6 +4612,11 @@ int GraphDef::ByteSize() const {
total_size += 1 + 1; total_size += 1 + 1;
} }
// optional bool share_grads = 11 [default = false];
if (has_share_grads()) {
total_size += 1 + 1;
}
} }
// repeated .OperatorDef op = 2; // repeated .OperatorDef op = 2;
total_size += 1 * this->op_size(); total_size += 1 * this->op_size();
...@@ -4602,6 +4702,9 @@ void GraphDef::MergeFrom(const GraphDef& from) { ...@@ -4602,6 +4702,9 @@ void GraphDef::MergeFrom(const GraphDef& from) {
if (from.has_debug_mode()) { if (from.has_debug_mode()) {
set_debug_mode(from.debug_mode()); set_debug_mode(from.debug_mode());
} }
if (from.has_share_grads()) {
set_share_grads(from.share_grads());
}
} }
mutable_unknown_fields()->MergeFrom(from.unknown_fields()); mutable_unknown_fields()->MergeFrom(from.unknown_fields());
} }
...@@ -4634,6 +4737,7 @@ void GraphDef::Swap(GraphDef* other) { ...@@ -4634,6 +4737,7 @@ void GraphDef::Swap(GraphDef* other) {
g_target_.Swap(&other->g_target_); g_target_.Swap(&other->g_target_);
u_target_.Swap(&other->u_target_); u_target_.Swap(&other->u_target_);
std::swap(debug_mode_, other->debug_mode_); std::swap(debug_mode_, other->debug_mode_);
std::swap(share_grads_, other->share_grads_);
std::swap(_has_bits_[0], other->_has_bits_[0]); std::swap(_has_bits_[0], other->_has_bits_[0]);
_unknown_fields_.Swap(&other->_unknown_fields_); _unknown_fields_.Swap(&other->_unknown_fields_);
std::swap(_cached_size_, other->_cached_size_); std::swap(_cached_size_, other->_cached_size_);
......
...@@ -815,6 +815,13 @@ class OperatorDef : public ::google::protobuf::Message { ...@@ -815,6 +815,13 @@ class OperatorDef : public ::google::protobuf::Message {
inline bool debug_mode() const; inline bool debug_mode() const;
inline void set_debug_mode(bool value); inline void set_debug_mode(bool value);
// optional bool share_grads = 8 [default = false];
inline bool has_share_grads() const;
inline void clear_share_grads();
static const int kShareGradsFieldNumber = 8;
inline bool share_grads() const;
inline void set_share_grads(bool value);
// @@protoc_insertion_point(class_scope:OperatorDef) // @@protoc_insertion_point(class_scope:OperatorDef)
private: private:
inline void set_has_name(); inline void set_has_name();
...@@ -825,6 +832,8 @@ class OperatorDef : public ::google::protobuf::Message { ...@@ -825,6 +832,8 @@ class OperatorDef : public ::google::protobuf::Message {
inline void clear_has_device_option(); inline void clear_has_device_option();
inline void set_has_debug_mode(); inline void set_has_debug_mode();
inline void clear_has_debug_mode(); inline void clear_has_debug_mode();
inline void set_has_share_grads();
inline void clear_has_share_grads();
::google::protobuf::UnknownFieldSet _unknown_fields_; ::google::protobuf::UnknownFieldSet _unknown_fields_;
...@@ -837,6 +846,7 @@ class OperatorDef : public ::google::protobuf::Message { ...@@ -837,6 +846,7 @@ class OperatorDef : public ::google::protobuf::Message {
::google::protobuf::RepeatedPtrField< ::Argument > arg_; ::google::protobuf::RepeatedPtrField< ::Argument > arg_;
::DeviceOption* device_option_; ::DeviceOption* device_option_;
bool debug_mode_; bool debug_mode_;
bool share_grads_;
friend void protobuf_AddDesc_dragon_2eproto(); friend void protobuf_AddDesc_dragon_2eproto();
friend void protobuf_AssignDesc_dragon_2eproto(); friend void protobuf_AssignDesc_dragon_2eproto();
friend void protobuf_ShutdownFile_dragon_2eproto(); friend void protobuf_ShutdownFile_dragon_2eproto();
...@@ -1441,6 +1451,13 @@ class GraphDef : public ::google::protobuf::Message { ...@@ -1441,6 +1451,13 @@ class GraphDef : public ::google::protobuf::Message {
inline bool debug_mode() const; inline bool debug_mode() const;
inline void set_debug_mode(bool value); inline void set_debug_mode(bool value);
// optional bool share_grads = 11 [default = false];
inline bool has_share_grads() const;
inline void clear_share_grads();
static const int kShareGradsFieldNumber = 11;
inline bool share_grads() const;
inline void set_share_grads(bool value);
// @@protoc_insertion_point(class_scope:GraphDef) // @@protoc_insertion_point(class_scope:GraphDef)
private: private:
inline void set_has_name(); inline void set_has_name();
...@@ -1451,6 +1468,8 @@ class GraphDef : public ::google::protobuf::Message { ...@@ -1451,6 +1468,8 @@ class GraphDef : public ::google::protobuf::Message {
inline void clear_has_device_option(); inline void clear_has_device_option();
inline void set_has_debug_mode(); inline void set_has_debug_mode();
inline void clear_has_debug_mode(); inline void clear_has_debug_mode();
inline void set_has_share_grads();
inline void clear_has_share_grads();
::google::protobuf::UnknownFieldSet _unknown_fields_; ::google::protobuf::UnknownFieldSet _unknown_fields_;
...@@ -1465,6 +1484,7 @@ class GraphDef : public ::google::protobuf::Message { ...@@ -1465,6 +1484,7 @@ class GraphDef : public ::google::protobuf::Message {
::google::protobuf::RepeatedPtrField< ::GradientTarget > g_target_; ::google::protobuf::RepeatedPtrField< ::GradientTarget > g_target_;
::google::protobuf::RepeatedPtrField< ::UpdateTarget > u_target_; ::google::protobuf::RepeatedPtrField< ::UpdateTarget > u_target_;
bool debug_mode_; bool debug_mode_;
bool share_grads_;
friend void protobuf_AddDesc_dragon_2eproto(); friend void protobuf_AddDesc_dragon_2eproto();
friend void protobuf_AssignDesc_dragon_2eproto(); friend void protobuf_AssignDesc_dragon_2eproto();
friend void protobuf_ShutdownFile_dragon_2eproto(); friend void protobuf_ShutdownFile_dragon_2eproto();
...@@ -2712,6 +2732,30 @@ inline void OperatorDef::set_debug_mode(bool value) { ...@@ -2712,6 +2732,30 @@ inline void OperatorDef::set_debug_mode(bool value) {
// @@protoc_insertion_point(field_set:OperatorDef.debug_mode) // @@protoc_insertion_point(field_set:OperatorDef.debug_mode)
} }
// optional bool share_grads = 8 [default = false];
inline bool OperatorDef::has_share_grads() const {
return (_has_bits_[0] & 0x00000080u) != 0;
}
inline void OperatorDef::set_has_share_grads() {
_has_bits_[0] |= 0x00000080u;
}
inline void OperatorDef::clear_has_share_grads() {
_has_bits_[0] &= ~0x00000080u;
}
inline void OperatorDef::clear_share_grads() {
share_grads_ = false;
clear_has_share_grads();
}
inline bool OperatorDef::share_grads() const {
// @@protoc_insertion_point(field_get:OperatorDef.share_grads)
return share_grads_;
}
inline void OperatorDef::set_share_grads(bool value) {
set_has_share_grads();
share_grads_ = value;
// @@protoc_insertion_point(field_set:OperatorDef.share_grads)
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// GradientTarget // GradientTarget
...@@ -3904,6 +3948,30 @@ inline void GraphDef::set_debug_mode(bool value) { ...@@ -3904,6 +3948,30 @@ inline void GraphDef::set_debug_mode(bool value) {
// @@protoc_insertion_point(field_set:GraphDef.debug_mode) // @@protoc_insertion_point(field_set:GraphDef.debug_mode)
} }
// optional bool share_grads = 11 [default = false];
inline bool GraphDef::has_share_grads() const {
return (_has_bits_[0] & 0x00000200u) != 0;
}
inline void GraphDef::set_has_share_grads() {
_has_bits_[0] |= 0x00000200u;
}
inline void GraphDef::clear_has_share_grads() {
_has_bits_[0] &= ~0x00000200u;
}
inline void GraphDef::clear_share_grads() {
share_grads_ = false;
clear_has_share_grads();
}
inline bool GraphDef::share_grads() const {
// @@protoc_insertion_point(field_get:GraphDef.share_grads)
return share_grads_;
}
inline void GraphDef::set_share_grads(bool value) {
set_has_share_grads();
share_grads_ = value;
// @@protoc_insertion_point(field_set:GraphDef.share_grads)
}
// @@protoc_insertion_point(namespace_scope) // @@protoc_insertion_point(namespace_scope)
......
...@@ -50,6 +50,7 @@ message OperatorDef { ...@@ -50,6 +50,7 @@ message OperatorDef {
repeated Argument arg= 5; repeated Argument arg= 5;
optional DeviceOption device_option = 6; optional DeviceOption device_option = 6;
optional bool debug_mode = 7 [default = false]; optional bool debug_mode = 7 [default = false];
optional bool share_grads = 8 [default = false];
} }
message GradientTarget { message GradientTarget {
...@@ -65,7 +66,6 @@ message UpdateTarget { ...@@ -65,7 +66,6 @@ message UpdateTarget {
repeated Argument arg = 4; repeated Argument arg = 4;
} }
// simply copy from caffe1
message TensorFiller { message TensorFiller {
optional string tensor = 1; optional string tensor = 1;
optional string type = 2 [default = 'constant']; optional string type = 2 [default = 'constant'];
...@@ -89,4 +89,5 @@ message GraphDef { ...@@ -89,4 +89,5 @@ message GraphDef {
repeated GradientTarget g_target = 8; repeated GradientTarget g_target = 8;
repeated UpdateTarget u_target = 9; repeated UpdateTarget u_target = 9;
optional bool debug_mode = 10 [default = false]; optional bool debug_mode = 10 [default = false];
optional bool share_grads = 11 [default = false];
} }
\ No newline at end of file
...@@ -650,19 +650,6 @@ template <> void TransposeGrad<float16, CPUContext>(const int count, ...@@ -650,19 +650,6 @@ template <> void TransposeGrad<float16, CPUContext>(const int count,
} }
} }
/******************** common.utils ********************/
template <> void OneHot<float, CPUContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
for (int i = 0; i < count; ++i) {
const int val = x[i];
y[i * depth + val] = on_value;
}
}
/******************** loss.l1_loss ********************/ /******************** loss.l1_loss ********************/
template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, float* dx) { template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, float* dx) {
...@@ -673,7 +660,7 @@ template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, flo ...@@ -673,7 +660,7 @@ template<> void AbsGrad<float, CPUContext>(const int count, const float* dy, flo
} }
} }
/******************** loss.sigmoid_cross_entropy_loss ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <> void SigmoidCrossEntropy<float, CPUContext>(const int count, template <> void SigmoidCrossEntropy<float, CPUContext>(const int count,
const float* x, const float* x,
...@@ -712,7 +699,7 @@ template<> void SmoothL1Grad<float, CPUContext>(const int count, ...@@ -712,7 +699,7 @@ template<> void SmoothL1Grad<float, CPUContext>(const int count,
} }
} }
/******************** loss.softmax_cross_entropy_loss ********************/ /******************** loss.softmax_cross_entropy ********************/
template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count, template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count,
const float* prob, const float* prob,
...@@ -723,7 +710,7 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count, ...@@ -723,7 +710,7 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>(const int count,
} }
} }
/******************** loss.softmax_loss ********************/ /******************** loss.sparse_softmax_cross_entropy ********************/
template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count, template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
const int classes, const int classes,
...@@ -735,8 +722,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count, ...@@ -735,8 +722,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
float* valid, float* valid,
Tensor* ignore) { Tensor* ignore) {
const int* ignores = ignore->count() > 0 ? const int* ignores = ignore->count() > 0 ?
ignore->data<int, CPUContext>() : ignore->data<int, CPUContext>() : nullptr;
nullptr;
const int dim = count / outer_dim; const int dim = count / outer_dim;
for (int i = 0; i < outer_dim; ++i) { for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) { for (int j = 0; j < inner_dim; ++j) {
...@@ -751,27 +737,25 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count, ...@@ -751,27 +737,25 @@ template <> void SparseSoftmaxCrossEntropy<float, CPUContext>(const int count,
} }
if (k == ignore->count()) { if (k == ignore->count()) {
float labeled_prob = prob[i * dim + label * inner_dim + j]; float labeled_prob = prob[i * dim + label * inner_dim + j];
labeled_prob = std::max(labeled_prob, FLT_MIN); loss[idx] = -std::log(std::max(labeled_prob, FLT_MIN));
loss[idx] = log(labeled_prob);
valid[idx] = 1; valid[idx] = 1;
} }
} }
} }
} }
template<> void SoftmaxLossGrad<float, CPUContext>(const int count, template<> void SparseSoftmaxCrossEntropyGrad<float, CPUContext>(const int count,
const int classes, const int classes,
const int outer_dim, const int outer_dim,
const int inner_dim, const int inner_dim,
const float* labels,
const float* prob, const float* prob,
const float* labels,
float* valid, float* valid,
Tensor* ignore, Tensor* ignore,
float* dXdata) { float* dXdata) {
int dim = count / outer_dim; int dim = count / outer_dim;
const int* ignores = ignore->count() > 0 ? const int* ignores = ignore->count() > 0 ?
ignore->data <int, CPUContext>() : ignore->data <int, CPUContext>() : nullptr;
nullptr;
valid[0] = 0; valid[0] = 0;
for (int i = 0; i < outer_dim; ++i) { for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) { for (int j = 0; j < inner_dim; ++j) {
...@@ -790,6 +774,93 @@ template<> void SoftmaxLossGrad<float, CPUContext>(const int count, ...@@ -790,6 +774,93 @@ template<> void SoftmaxLossGrad<float, CPUContext>(const int count,
} }
} }
/******************** loss.sparse_softmax_focal_loss ********************/
template <> void SparseSoftmaxFocalLoss<float, CPUContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const float* prob,
const float* labels,
float* scale,
float* loss,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CPUContext>() : nullptr;
const int dim = count / outer_dim;
for (int i = 0; i < count; ++i) {
scale[i] = alpha * std::pow((1.0f - prob[i]), gamma);
}
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
const int idx = i * inner_dim + j;
const int label = labels[idx];
int k;
for (k = 0; k < ignore->count(); ++k) {
if (label == ignores[k]) {
loss[idx] = valid[idx] = 0;
break;
}
}
if (k == ignore->count()) {
const int t_ = i * dim + label * inner_dim + j;
float labeled_prob = prob[t_];
loss[idx] = -scale[t_] * std::log(std::max(labeled_prob, FLT_MIN));
valid[idx] = 1;
}
}
}
}
template<> void SparseSoftmaxFocalLossGrad<float, CPUContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const float* scale,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
int dim = count / outer_dim;
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CPUContext>() : nullptr;
valid[0] = 0;
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
const int label = labels[i * inner_dim + j];
int k;
for (k = 0; k < ignore->count(); ++k)
if (label == ignores[k]) break;
if (k != ignore->count()) {
for (int c = 0; c < classes; ++c)
dXdata[i * dim + c * inner_dim + j] = 0;
} else {
const int t_ = i * dim + label * inner_dim + j;
float grad = -gamma * (scale[t_] / std::max((1.0f - prob[t_]), eps))
* std::log(std::max(prob[t_], FLT_MIN))
* prob[t_] + scale[t_];
for (int c = 0; c < classes; ++c) {
const int i_ = i * dim + c * inner_dim + j;
if (c == label) {
dXdata[i_] = grad * (prob[t_] - 1);
} else {
dXdata[i_] = grad * prob[i_];
}
}
valid[0]++;
}
}
}
}
/******************** recurrent.lstm_uint ********************/ /******************** recurrent.lstm_uint ********************/
template <> void LSTMUnit<float, CPUContext>(const int count, template <> void LSTMUnit<float, CPUContext>(const int count,
...@@ -1013,6 +1084,19 @@ template <> void MemoryData<uint8_t, float16, CPUContext>(const int count, ...@@ -1013,6 +1084,19 @@ template <> void MemoryData<uint8_t, float16, CPUContext>(const int count,
LOG(FATAL) << "unsupport float16 with CPU"; LOG(FATAL) << "unsupport float16 with CPU";
} }
/******************** utils.one_hot ********************/
template <> void OneHot<float, CPUContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
for (int i = 0; i < count; ++i) {
const int val = x[i];
y[i * depth + val] = on_value;
}
}
/******************** vision.conv ********************/ /******************** vision.conv ********************/
template <> void Im2Col<float, CPUContext>(const int channels, template <> void Im2Col<float, CPUContext>(const int channels,
......
...@@ -1204,34 +1204,6 @@ template <> void TransposeGrad<float16, CUDAContext>(const int count, ...@@ -1204,34 +1204,6 @@ template <> void TransposeGrad<float16, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK; CUDA_POST_KERNEL_CHECK;
} }
/******************** common.utils ********************/
template <typename T>
__global__ void _OneHot(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
CUDA_KERNEL_LOOP(idx, count) {
const int val = x[idx];
y[idx * depth + val] = on_value;
}
}
template <> void OneHot<float, CUDAContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
_OneHot<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
depth,
on_value,
x,
y);
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.l1_loss ********************/ /******************** loss.l1_loss ********************/
template <typename T> template <typename T>
...@@ -1248,7 +1220,7 @@ template<> void AbsGrad<float, CUDAContext>(const int count, const float* dy, fl ...@@ -1248,7 +1220,7 @@ template<> void AbsGrad<float, CUDAContext>(const int count, const float* dy, fl
CUDA_POST_KERNEL_CHECK; CUDA_POST_KERNEL_CHECK;
} }
/******************** loss.sigmoid_cross_entropy_loss ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <typename T> template <typename T>
__global__ void _SigmoidCrossEntropy(const int count, __global__ void _SigmoidCrossEntropy(const int count,
...@@ -1311,7 +1283,7 @@ template<> void SmoothL1Grad<float, CUDAContext>(const int count, ...@@ -1311,7 +1283,7 @@ template<> void SmoothL1Grad<float, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK; CUDA_POST_KERNEL_CHECK;
} }
/******************** loss.softmax_cross_entropy_loss ********************/ /******************** loss.softmax_cross_entropy ********************/
template <typename T> template <typename T>
__global__ void _SoftmaxCrossEntropy(const int count, __global__ void _SoftmaxCrossEntropy(const int count,
...@@ -1334,7 +1306,7 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>(const int count, ...@@ -1334,7 +1306,7 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK; CUDA_POST_KERNEL_CHECK;
} }
/******************** loss.softmax_loss ********************/ /******************** loss.sparse_softmax_cross_entropy ********************/
template <typename T> template <typename T>
__global__ void _SparseSoftmaxCrossEntropy(const int count, __global__ void _SparseSoftmaxCrossEntropy(const int count,
...@@ -1375,8 +1347,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count, ...@@ -1375,8 +1347,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count,
float* valid, float* valid,
Tensor* ignore) { Tensor* ignore) {
const int* ignores = ignore->count() > 0 ? const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() : ignore->data<int, CUDAContext>() : nullptr;
nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropy<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds, _SparseSoftmaxCrossEntropy<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
prob, prob,
...@@ -1391,7 +1362,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count, ...@@ -1391,7 +1362,7 @@ template <> void SparseSoftmaxCrossEntropy<float, CUDAContext>(const int count,
} }
template <typename T> template <typename T>
__global__ void _SoftmaxLossGrad(const int count, __global__ void _SparseSoftmaxCrossEntropyGrad(const int count,
const T* prob, const T* prob,
const T* labels, const T* labels,
T* dx, T* dx,
...@@ -1418,12 +1389,12 @@ __global__ void _SoftmaxLossGrad(const int count, ...@@ -1418,12 +1389,12 @@ __global__ void _SoftmaxLossGrad(const int count,
} }
} }
template<> void SoftmaxLossGrad<float, CUDAContext>(const int count, template<> void SparseSoftmaxCrossEntropyGrad<float, CUDAContext>(const int count,
const int classes, const int classes,
const int outer_dim, const int outer_dim,
const int inner_dim, const int inner_dim,
const float* labels,
const float* prob, const float* prob,
const float* labels,
float* valid, float* valid,
Tensor* ignore, Tensor* ignore,
float* dXdata) { float* dXdata) {
...@@ -1431,7 +1402,155 @@ template<> void SoftmaxLossGrad<float, CUDAContext>(const int count, ...@@ -1431,7 +1402,155 @@ template<> void SoftmaxLossGrad<float, CUDAContext>(const int count,
ignore->data <int, CUDAContext >() : ignore->data <int, CUDAContext >() :
nullptr; nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SoftmaxLossGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds, _SparseSoftmaxCrossEntropyGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
prob,
labels,
dXdata,
classes,
inner_dim,
ignores,
ignore->count(),
valid);
CUDA_POST_KERNEL_CHECK;
}
/******************** loss.sparse_softmax_focal_loss ********************/
template <typename T>
__global__ void _FocalScale(const int count,
const float alpha,
const float gamma,
const T* prob,
T* scale) {
CUDA_KERNEL_LOOP(idx, count) {
scale[idx] = alpha * std::pow((1.0f - prob[idx]), gamma);
}
}
template <typename T>
__global__ void _SparseSoftmaxFocalLoss(const int count,
const T* scale,
const T* prob,
const T* labels,
T* loss,
const int classes,
const int inner_dim,
const int* ignores,
const int ignore_num,
T* valid) {
CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim;
const int i_idx = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx];
int k;
for (k = 0; k < ignore_num; k++) {
if (label == ignores[k]) {
loss[idx] = valid[idx] = 0;
break;
}
}
if (k == ignore_num) {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
loss[idx] = -scale[t_] * std::log(max(prob[t_], FLT_MIN));
valid[idx] = 1;
}
}
}
template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float alpha,
const float gamma,
const float* prob,
const float* labels,
float* scale,
float* loss,
float* valid,
Tensor* ignore) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim;
_FocalScale<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
alpha,
gamma,
prob,
scale);
_SparseSoftmaxFocalLoss<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
scale,
prob,
labels,
loss,
classes,
inner_dim,
ignores,
ignore->count(),
valid);
CUDA_POST_KERNEL_CHECK;
}
template <typename T>
__global__ void _SparseSoftmaxFocalLossGrad(const int count,
const float gamma,
const float eps,
const T* scale,
const T* prob,
const T* labels,
T* dx,
const int classes,
const int inner_dim,
const int* ignores,
const int ignore_num,
T* valid) {
CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim;
const int i_idx = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx];
int k;
for (k = 0; k < ignore_num; k++)
if (label == ignores[k]) break;
if (k != ignore_num) {
for (int c = 0; c < classes; c++)
dx[(o_idx * classes + c) * inner_dim + i_idx] = 0;
valid[idx] = 0;
} else {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
T grad = -gamma * (scale[t_] / max((1.0f - prob[t_]), eps))
* std::log(max(prob[t_], FLT_MIN))
* prob[t_] + scale[t_];
for (int c = 0; c < classes; c++) {
const int i_ = (o_idx * classes + c) * inner_dim + i_idx;
if (c == label) {
dx[i_] = grad * (prob[t_] - 1);
} else {
dx[i_] = grad * prob[i_];
}
}
valid[idx] = 1;
}
}
}
template<> void SparseSoftmaxFocalLossGrad<float, CUDAContext>(const int count,
const int classes,
const int outer_dim,
const int inner_dim,
const float gamma,
const float eps,
const float* scale,
const float* prob,
const float* labels,
float* valid,
Tensor* ignore,
float* dXdata) {
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CUDAContext >() : nullptr;
const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxFocalLossGrad<float> << <GET_BLOCKS(num_preds), CUDA_NUM_THREADS >> >(num_preds,
gamma,
eps,
scale,
prob, prob,
labels, labels,
dXdata, dXdata,
...@@ -1847,6 +1966,34 @@ template <> void MemoryData<uint8_t, float16, CUDAContext>(const int count, ...@@ -1847,6 +1966,34 @@ template <> void MemoryData<uint8_t, float16, CUDAContext>(const int count,
CUDA_POST_KERNEL_CHECK; CUDA_POST_KERNEL_CHECK;
} }
/******************** utils.one_hot ********************/
template <typename T>
__global__ void _OneHot(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
CUDA_KERNEL_LOOP(idx, count) {
const int val = x[idx];
y[idx * depth + val] = on_value;
}
}
template <> void OneHot<float, CUDAContext>(const int count,
const int depth,
const int on_value,
const float* x,
float* y) {
_OneHot<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
depth,
on_value,
x,
y);
CUDA_POST_KERNEL_CHECK;
}
/******************** vision.conv ********************/ /******************** vision.conv ********************/
template<typename T> template<typename T>
......
...@@ -164,19 +164,38 @@ dragon.config.EnableCPU() ...@@ -164,19 +164,38 @@ dragon.config.EnableCPU()
dragon.config.EnableCUDA(device_id, use_cudnn=True) dragon.config.EnableCUDA(device_id, use_cudnn=True)
``` ```
### Automatic Memory Optimization(AMC) ### Memonger
Dragon is a extremely memory efficient framework.
It is supported to drop intermediate results(mirrow stage) during forward phase, and share grads during backward phase,
takes 25% and 50% memory-usage comparing caffe and tensorflow respectively.
To use it, just:
 
```Shell ```Shell
import dragon.config import dragon.memonger as opt
dragon.config.SetDebugMode(False) ```
- ShareGrads
```Shell
opt.share_grads()
``` ```
This option will make all gradients share a global tensor(debugging is intractable). - Drop
```Shell
import dragon.ops as ops
y = opt.drop(ops.Relu, x)
```
which prefers a 50% memory-usage and 15% slower solution during training phase.
### Scope ### Scope
As a graph based framework, Dragon supports various scopes.
- NameScope - NameScope
```Shell ```Shell
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!