Commit 40e94d24 by Ting PAN

Export Workspace for PyModule

1 parent b35f9320
Showing with 4107 additions and 3744 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in Dragon. The list of most significant changes made over time in Dragon.
Dragon 0.3.0.0 (20190309) Dragon 0.3.0.0 (20190402)
DRAGON_VERSION == 3000 DRAGON_VERSION == 3000
Changes (w.r.t. Dragon 0.2.2.13): Changes (w.r.t. Dragon 0.2.2.13):
...@@ -36,6 +36,8 @@ Preview Features: ...@@ -36,6 +36,8 @@ Preview Features:
- The behavior of accumulating gradients have been canceled. - The behavior of accumulating gradients have been canceled.
- Python module now has been assigned to take charge of ``Workspace``.
Bugs fixed: Bugs fixed:
......
...@@ -22,17 +22,15 @@ class GraphBase { ...@@ -22,17 +22,15 @@ class GraphBase {
public: public:
/*! \brief Default constructor */ /*! \brief Default constructor */
GraphBase( GraphBase(
const GraphDef& meta_graph, const GraphDef& def,
Workspace* ws); Workspace* ws);
/*! \brief Default deconstructor */ /*! \brief Default deconstructor */
virtual ~GraphBase() {} virtual ~GraphBase() {}
GraphDef BuildUpdateOps(const GraphDef& input_def);
/*! \brief Create a graph from the optimized def */ /*! \brief Create a graph from the optimized def */
virtual bool Create( virtual bool Create(
const GraphDef& optimized_graph, const GraphDef& def,
Workspace* ws) = 0; Workspace* ws) = 0;
/*! \brief Run the graph once synchronously */ /*! \brief Run the graph once synchronously */
...@@ -58,14 +56,14 @@ class GraphBase { ...@@ -58,14 +56,14 @@ class GraphBase {
class Graph : public GraphBase { class Graph : public GraphBase {
public: public:
/*! \brief Default constructor */ /*! \brief Default constructor */
Graph(const GraphDef& meta_graph, Workspace* ws); Graph(const GraphDef& def, Workspace* ws);
/*! \brief Default deconstructor */ /*! \brief Default deconstructor */
virtual ~Graph() { for (auto* op : ops_) delete op; } virtual ~Graph() { for (auto* op : ops_) delete op; }
/*! \brief Create a graph from the optimized def */ /*! \brief Create a graph from the optimized def */
bool Create( bool Create(
const GraphDef& optimized_graph, const GraphDef& def,
Workspace* ws) override; Workspace* ws) override;
/*! \brief Run the graph once synchronously */ /*! \brief Run the graph once synchronously */
......
...@@ -31,7 +31,7 @@ class GraphGradientMaker { ...@@ -31,7 +31,7 @@ class GraphGradientMaker {
const GraphDef& forward_def, const GraphDef& forward_def,
GraphDef& backward_def); GraphDef& backward_def);
void Share(GraphDef& graph); GraphDef Share(const GraphDef& input_def);
void SetTerms(const Map<string, string>& terms) { terms_ = terms; } void SetTerms(const Map<string, string>& terms) { terms_ = terms; }
void SetOperatorPrefix(const string& prefix) { op_prefix_ = prefix; } void SetOperatorPrefix(const string& prefix) { op_prefix_ = prefix; }
......
...@@ -42,9 +42,9 @@ class Tensor { ...@@ -42,9 +42,9 @@ class Tensor {
d = dims[i]; strides_[i] = (int64_t)new_size; d = dims[i]; strides_[i] = (int64_t)new_size;
CHECK_GE(d, 0); CHECK_GE(d, 0);
if (d > 0) new_size *= d; if (d > 0) new_size *= d;
} if (own_mem_) { }
if (size_ != new_size && if (own_mem_) {
capacity_ < new_size * meta_.itemsize()) { if (capacity_ < new_size * meta_.itemsize()) {
memory_.reset(); memory_.reset();
capacity_ = 0; capacity_ = 0;
} }
......
...@@ -29,23 +29,28 @@ class Workspace { ...@@ -29,23 +29,28 @@ class Workspace {
typedef Map<string, unique_ptr<OperatorBase> > OperatorMap; typedef Map<string, unique_ptr<OperatorBase> > OperatorMap;
typedef Map<string, unique_ptr<GraphBase> > GraphMap; typedef Map<string, unique_ptr<GraphBase> > GraphMap;
typedef Map<string, Workspace*> WorkspaceMap;
/*! \brief Constructor */ /*! \brief Constructor */
Workspace(const string& name) : name_(name) { InitWorkspace(); } Workspace(const string& name) : name_(name) { Initialize(); }
/*! \brief Return the name of this workspace */ /*! \brief Return the name of this workspace */
const string& name() { return name_; } const string& name() { return name_; }
/*! \brief Create some internal tensors */ /*! \brief Return the name of stored tensors */
void InitWorkspace(); vector<string> tensors() const;
/*! \brief Return the name of stored graphs */
vector<string> graphs() const;
/*! \brief Move a external workspace into this workspace */ /*! \brief Create some internal tensors */
Workspace* Move(Workspace* ws); void Initialize();
/*! \brief Destory all the tensors */ /*! \brief Destory all the tensors */
void Clear(); void Clear();
/*! \brief Merge from a external workspace */
void MergeFrom(Workspace* ws);
/*! \brief Query the real name of specified tensor */ /*! \brief Query the real name of specified tensor */
string GetTensorName(const string& name) const; string GetTensorName(const string& name) const;
...@@ -66,14 +71,11 @@ class Workspace { ...@@ -66,14 +71,11 @@ class Workspace {
/*! \brief Reset the specified tensor */ /*! \brief Reset the specified tensor */
void ResetTensor(const string& name); void ResetTensor(const string& name);
/*! \brief Return all the stored tensor names */
vector<string> GetTensors() const;
/* \brief Whether the specified filler is in this workspace */ /* \brief Whether the specified filler is in this workspace */
bool HasFiller(const string& name, bool use_remote = true) const; bool HasFiller(const string& name, bool use_remote = true) const;
/*! \brief Create the specified filler */ /*! \brief Create the specified filler */
void CreateFiller(const TensorFillerProto filler); void CreateFiller(const TensorFillerProto& filler);
/*! \brief Return the specified filler */ /*! \brief Return the specified filler */
const TensorFillerProto* GetFiller(const string& name) const; const TensorFillerProto* GetFiller(const string& name) const;
...@@ -82,27 +84,26 @@ class Workspace { ...@@ -82,27 +84,26 @@ class Workspace {
template <class Context> template <class Context>
vector<void*> caches(const vector<size_t>& segments) { vector<void*> caches(const vector<size_t>& segments) {
int64_t nbytes = 0; int64_t nbytes = 0;
vector<void*> ret(segments.size());
for (auto& segment : segments) nbytes += (int64_t)segment; for (auto& segment : segments) nbytes += (int64_t)segment;
Tensor* cache_t = CreateTensor("/share/cache"); auto* T = CreateTensor("/share/cache")->Reshape({ nbytes });
cache_t->Reshape({ nbytes }); ret[0] = T->template mutable_data<uint8_t, Context>();
vector<void*> Bcaches(segments.size());
Bcaches[0] = cache_t->template mutable_data<uint8_t, Context>();
for (int i = 1; i < segments.size(); i++) for (int i = 1; i < segments.size(); i++)
Bcaches[i] = (uint8_t*)Bcaches[i - 1] + segments[i - 1]; ret[i] = (uint8_t*)ret[i - 1] + segments[i - 1];
return Bcaches; return ret;
} }
/*! \brief Create temporal cache segments with the specified type */ /*! \brief Create temporal cache segments with the specified type */
template <typename T, class Context> template <typename T, class Context>
vector<T*> caches(const vector<int64_t>& segments) { vector<T*> caches(const vector<int64_t>& segments) {
vector<size_t> Tsegments; vector<size_t> segments_in_byte;
for (auto& segment : segments) vector<T*> ret(segments.size());
Tsegments.emplace_back(segment * sizeof(T)); for (const auto& e : segments)
vector<void*> Bcaches = caches<Context>(Tsegments); segments_in_byte.emplace_back(e * sizeof(T));
vector<T*> Tcaches(segments.size()); auto ret_in_byte = caches<Context>(segments_in_byte);
for (int i = 0; i < segments.size(); i++) for (int i = 0; i < segments.size(); i++)
Tcaches[i] = (T*)Bcaches[i]; ret[i] = (T*)ret_in_byte[i];
return Tcaches; return ret;
} }
/*! \brief Create a operator in this workspace */ /*! \brief Create a operator in this workspace */
...@@ -124,9 +125,6 @@ class Workspace { ...@@ -124,9 +125,6 @@ class Workspace {
const string& exclude, const string& exclude,
int stream_id = 0); int stream_id = 0);
/*! \brief Return all the stored graph names */
vector<string> GetGraphs() const;
/* \brief Set an alias for the tensor */ /* \brief Set an alias for the tensor */
bool SetTensorAlias(const string& name, const string& alias); bool SetTensorAlias(const string& name, const string& alias);
...@@ -160,7 +158,7 @@ class Workspace { ...@@ -160,7 +158,7 @@ class Workspace {
GraphMap graph_map_; GraphMap graph_map_;
/*! \brief Store the remote workspaces */ /*! \brief Store the remote workspaces */
WorkspaceMap workspace_map_; vector<Workspace*> remote_workspaces_;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -40,8 +40,11 @@ class GradientGatherOp final : public Operator<Context> { ...@@ -40,8 +40,11 @@ class GradientGatherOp final : public Operator<Context> {
public: public:
GradientGatherOp(const OperatorDef& def, Workspace* ws) GradientGatherOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) { : Operator<Context>(def, ws) {
for (int i = 0; i < InputSize(); i++) for (int i = 0; i < InputSize(); i++) {
if (Input(i).name() != "NULL") indices.push_back(i); if (Input(i).name() != "NULL") {
indices.push_back(i);
}
}
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -53,6 +56,16 @@ class GradientGatherOp final : public Operator<Context> { ...@@ -53,6 +56,16 @@ class GradientGatherOp final : public Operator<Context> {
}; };
template <class Context> template <class Context>
class GradientAddOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(GradientAddOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
};
template <class Context>
class StopGradientOp final : public Operator<Context> { class StopGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(StopGradientOp); USE_SIMPLE_CTOR_DTOR(StopGradientOp);
......
...@@ -1033,7 +1033,6 @@ void MixedPrecisionUpdate( ...@@ -1033,7 +1033,6 @@ void MixedPrecisionUpdate(
template <typename T, class Context> template <typename T, class Context>
void BiasAdd( void BiasAdd(
const int count,
const int outer_dim, const int outer_dim,
const int dim, const int dim,
const int inner_dim, const int inner_dim,
......
...@@ -38,7 +38,7 @@ Workspace* ResetWorkspace(const std::string& name) { ...@@ -38,7 +38,7 @@ Workspace* ResetWorkspace(const std::string& name) {
g_workspaces[name].reset(new Workspace(name)); g_workspaces[name].reset(new Workspace(name));
for (auto& sub_workspace : sub_workspaces[name]) { for (auto& sub_workspace : sub_workspaces[name]) {
if (g_workspaces.count(sub_workspace) > 0) if (g_workspaces.count(sub_workspace) > 0)
g_workspaces[name]->Move( g_workspaces[name]->MergeFrom(
g_workspaces[sub_workspace].get()); g_workspaces[sub_workspace].get());
} }
return g_workspaces[name].get(); return g_workspaces[name].get();
...@@ -55,10 +55,10 @@ void MoveWorkspace( ...@@ -55,10 +55,10 @@ void MoveWorkspace(
std::unique_lock<std::mutex> lock(g_mutex); std::unique_lock<std::mutex> lock(g_mutex);
CHECK(src) << "\nGiven source workspace is invalid."; CHECK(src) << "\nGiven source workspace is invalid.";
CHECK(dst) << "\nGiven destination workspace is invalid."; CHECK(dst) << "\nGiven destination workspace is invalid.";
dst->Move(src); dst->MergeFrom(src);
sub_workspaces[dst->name()].push_back(src->name()); sub_workspaces[dst->name()].push_back(src->name());
LOG(INFO) << "Move the Workspace(" << src->name() << ") " LOG(INFO) << "Move the Workspace(" << src->name() << ") "
<< "into the Workspace(" << dst->name() << ")."; << "into the Workspace(" << dst->name() << ").";
} }
void DestroyWorkspace(const std::string& name) { void DestroyWorkspace(const std::string& name) {
......
...@@ -36,29 +36,6 @@ void AddGradientMethods(pybind11::module& m) { ...@@ -36,29 +36,6 @@ void AddGradientMethods(pybind11::module& m) {
vector<pybind11::bytes>, vector<string>, vector<float> vector<pybind11::bytes>, vector<string>, vector<float>
>(grad_ops, grad.g_inputs, grad.defaults); >(grad_ops, grad.g_inputs, grad.defaults);
}); });
m.def("FlowGradients", [](
const vector<OperatorDef*>& forward_ops,
const vector<string>& targets,
const vector<string>& input_grads,
const vector<string>& ignore_grads,
const bool is_sharing,
const bool verbose) {
// Make => Optimize => Run
GraphDef backward_ops;
GraphGradientMaker maker;
for (auto& grad : input_grads) maker.AddExternalGrad(grad);
for (auto& grad : ignore_grads) maker.AddIgnoreGrad(grad);
maker.Make(forward_ops, targets, backward_ops);
if (is_sharing) maker.Share(backward_ops);
pybind11::gil_scoped_release g;
for (auto& op : backward_ops.op()) {
if (op.type().empty()) continue;
if (verbose) std::cout << op.DebugString() << std::endl;
if (op.has_uid()) ws()->RunOperator(op);
else ws()->RunOperatorOnce(op);
}
});
} }
} // namespace python } // namespace python
......
...@@ -16,15 +16,17 @@ ...@@ -16,15 +16,17 @@
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "py_types.h" #include "py_types.h"
#include "core/common.h" #include "core/common.h"
#include "core/registry.h"
#include "core/context.h" #include "core/context.h"
#include "core/context_cuda.h"
#include "core/operator.h" #include "core/operator.h"
#include "core/operator_gradient.h" #include "core/registry.h"
#include "core/graph_gradient.h"
#include "core/workspace.h" #include "core/workspace.h"
#include "core/context_cuda.h"
#include "core/graph_gradient.h"
#include "core/operator_gradient.h"
#include "utils/caffemodel.h" #include "utils/caffemodel.h"
#include "onnx/onnx_backend.h"
#include <pybind11/stl.h> #include <pybind11/stl.h>
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
...@@ -136,8 +138,6 @@ class NumpyFeeder : public TensorFeederBase { ...@@ -136,8 +138,6 @@ class NumpyFeeder : public TensorFeederBase {
} }
}; };
Workspace* ws();
} // namespace python } // namespace python
} // namespace dragon } // namespace dragon
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_PYTHON_PY_GRAPH_H_
#define DRAGON_PYTHON_PY_GRAPH_H_
#include "py_dragon.h"
namespace dragon {
namespace python {
void AddGraphMethods(pybind11::module& m) {
/*! \brief Create a graph from the serialized def */
m.def("CreateGraph", [](
const string& serialized,
const bool verbose) {
GraphDef graph_def;
if (!graph_def.ParseFromString(serialized))
LOG(FATAL) << "Failed to parse the GraphDef.";
auto* graph = ws()->CreateGraph(graph_def);
if (verbose) {
// It is not a good design to print the debug string
auto* graph_tensor = ws()->CreateTensor(
"/graph_def/optimized/" + graph->name());
if (graph_tensor->count() > 0) {
auto* data = graph_tensor->mutable_data<string, CPUContext>();
std::cout << data[0] << std::endl;
}
}
// Return the graph name may be different from the def
// We will make a unique dummy name on creating the graph
return graph->name();
});
/*! \brief Run an existing graph */
m.def("RunGraph", [](
const string& name,
const string& include,
const string& exclude) {
pybind11::gil_scoped_release g;
ws()->RunGraph(name, include, exclude);
});
/*! \brief List all of the existing graphs */
m.def("Graphs", []() { ws()->GetGraphs(); });
}
} // namespace python
} // namespace dragon
#endif // DRAGON_PYTHON_PY_GRAPH_H_
\ No newline at end of file
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_PYTHON_PY_IO_H_
#define DRAGON_PYTHON_PY_IO_H_
#include "py_dragon.h"
namespace dragon {
namespace python {
void AddIOMethods(pybind11::module& m) {
m.def("Snapshot", [](
const string& filename,
vector<string>& names,
const int format) {
vector<Tensor*> tensors;
switch (format) {
case 0: // Pickle
LOG(FATAL) << "Format depends on Pickle. "
"Can't be used in C++.";
break;
case 1: // CaffeModel
for (const auto& e : names)
tensors.emplace_back(ws()->GetTensor(e));
SavaCaffeModel(filename, tensors);
break;
default:
LOG(FATAL) << "Unknwon format, code: " << format;
}
});
m.def("Restore", [](
const string& filename,
const int format) {
switch (format) {
case 0: // Pickle
LOG(FATAL) << "Format depends on Pickle. "
"Can't be used in C++.";
break;
case 1: // CaffeModel
LoadCaffeModel(filename, ws());
break;
default:
LOG(FATAL) << "Unknwon format, code: " << format;
}
});
}
} // namespace python
} // namespace dragon
#endif // DRAGON_PYTHON_PY_IO_H_
\ No newline at end of file
#include "py_graph.h"
#include "py_autograd.h" #include "py_autograd.h"
#include "py_operator.h" #include "py_operator.h"
#include "py_tensor.h" #include "py_tensor.h"
#include "py_cuda.h" #include "py_cuda.h"
#include "py_mpi.h" #include "py_mpi.h"
#include "py_io.h"
#include "py_onnx.h"
#include "py_config.h" #include "py_config.h"
#include "py_proto.h" #include "py_proto.h"
...@@ -16,13 +13,6 @@ namespace python { ...@@ -16,13 +13,6 @@ namespace python {
DEFINE_TYPED_REGISTRY(TensorFetcherRegistry, TypeId, TensorFetcherBase); DEFINE_TYPED_REGISTRY(TensorFetcherRegistry, TypeId, TensorFetcherBase);
DEFINE_TYPED_REGISTRY(TensorFeederRegistry, TypeId, TensorFeederBase); DEFINE_TYPED_REGISTRY(TensorFeederRegistry, TypeId, TensorFeederBase);
Map<string, unique_ptr < Workspace > > g_workspaces;
Map<string, vector<string> > sub_workspaces;
Workspace* g_workspace;
string g_current_workspace;
Workspace* ws() { return g_workspace; }
TypeId CTypeToFetcher(TypeId type) { TypeId CTypeToFetcher(TypeId type) {
static Map<TypeId,TypeId> c_type_map { static Map<TypeId,TypeId> c_type_map {
{ TypeMeta::Id<bool>(), TypeMeta::Id<NumpyFetcher>() }, { TypeMeta::Id<bool>(), TypeMeta::Id<NumpyFetcher>() },
...@@ -41,152 +31,389 @@ REGISTER_TENSOR_FETCHER(TypeMeta::Id<NumpyFetcher>(), NumpyFetcher); ...@@ -41,152 +31,389 @@ REGISTER_TENSOR_FETCHER(TypeMeta::Id<NumpyFetcher>(), NumpyFetcher);
REGISTER_TENSOR_FETCHER(TypeMeta::Id<StringFetcher>(), StringFetcher); REGISTER_TENSOR_FETCHER(TypeMeta::Id<StringFetcher>(), StringFetcher);
REGISTER_TENSOR_FEEDER(TypeMeta::Id<NumpyFeeder>(), NumpyFeeder); REGISTER_TENSOR_FEEDER(TypeMeta::Id<NumpyFeeder>(), NumpyFeeder);
void SwitchWorkspace( void OnImportModule() { []() { import_array1(); }(); }
const string& name,
const bool create_if_missing = true) {
if (g_workspaces.count(name)) {
g_current_workspace = name;
g_workspace = g_workspaces[name].get();
} else if (create_if_missing) {
unique_ptr<Workspace> new_workspace(new Workspace(name));
g_workspace = new_workspace.get();
g_workspaces[name] = std::move(new_workspace);
sub_workspaces[name] = vector<string>();
g_current_workspace = name;
} else {
LOG(FATAL) << "Workspace of the given name does not exist."
"\nAnd, it is not allowed to create. (Try to alllow?)";
}
}
void OnImportModule() {
[]() { import_array1(); }();
static bool initialized = false;
if (initialized) return;
SwitchWorkspace("default", true);
g_current_workspace = "default";
initialized = true;
}
PYBIND11_MODULE(libdragon, m) { PYBIND11_MODULE(libdragon, m) {
/*! \brief Export the Workspace class */
pybind11::class_<Workspace>(m, "Workspace")
.def(pybind11::init<const string&>())
/*! \brief Return the name of this workspace */
.def_property_readonly("name", &Workspace::name)
/*! \brief Return the name of stored tensors */
.def_property_readonly("tensors", &Workspace::tensors)
/*! \brief Return the name of stored graphs */
.def_property_readonly("graphs", &Workspace::graphs)
/*! \brief Destory all the tensors */
.def("Clear", &Workspace::Clear)
/*! \brief Merge a external workspace into self */
.def("MergeFrom", &Workspace::MergeFrom)
/*! \brief Return a unique dummy name */
.def("GetDummyName", &Workspace::GetDummyName)
/*! \brief Return the unique name of given tensor */
.def("GetTensorName", &Workspace::GetTensorName)
/*! \brief Reset a tensor with the given name */
.def("ResetTensor", &Workspace::ResetTensor)
/*! \brief Indicate whether the given tensor is existing */
.def("HasTensor", [](
Workspace* self,
const string& name) {
return self->HasTensor(name);
})
/*! \brief Create a tensor with the given name */
.def("CreateTensor", [](
Workspace* self,
const string& name) {
self->CreateTensor(name);
})
/*! \brief Create a tensor from the specified filler */
.def("CreateFiller", [](
Workspace* self,
const string& serialized) {
TensorFillerProto filler_proto;
if (!filler_proto.ParseFromString(serialized))
LOG(FATAL) << "Failed to parse the TensorFiller.";
self->CreateFiller(filler_proto);
self->CreateTensor(filler_proto.tensor());
})
/*! \brief Create a tensor with the given shape */
.def("TensorFromShape", [](
Workspace* self,
const string& name,
const vector<int64_t>& shape,
const string& dtype) {
const TypeMeta& meta = TypeStringToMeta(dtype);
CHECK(meta.id() != 0)
<< "\nUnsupported data type: " + dtype + ".";
Tensor* tensor = self->CreateTensor(name);
tensor->Reshape(shape);
tensor->raw_mutable_data<CPUContext>(meta);
})
/*! \brief Create a tensor with the given array */
.def("TensorFromArray", [](
Workspace* self,
const string& name,
pybind11::object object) {
PyArrayObject* array = PyArray_GETCONTIGUOUS(
reinterpret_cast<PyArrayObject*>(object.ptr()));
const TypeMeta& meta = TypeNPYToMeta(PyArray_TYPE(array));
if (meta.id() == 0) LOG(FATAL) << "Unsupported data type.";
Tensor* tensor = self->CreateTensor(name);
tensor->SetMeta(meta);
int ndim = PyArray_NDIM(array);
npy_intp* npy_dims = PyArray_DIMS(array);
vector<int64_t> dims;
for (int i = 0; i < ndim; i++) dims.push_back(npy_dims[i]);
tensor->Reshape(dims);
auto* data = static_cast<void*>(PyArray_DATA(array));
if (!tensor->has_memory()) {
MixedMemory* memory(new MixedMemory());
memory->set_cpu_data(data, tensor->nbytes());
tensor->set_memory(memory);
} else {
if (tensor->DECREFPyArray) tensor->DECREFPyArray();
tensor->memory()->set_cpu_data(data, tensor->nbytes());
}
// Follow the codes of PyTorch
// Here we bind the DECREF to Tensor
// ResetTensor() or ResetWorkspace() can trigger it
tensor->DECREFPyArray = [array]()->void { Py_XDECREF(array); };
})
/*! \brief Create a tensor copied from an existing one */
.def("TensorFromTensor", [](
Workspace* self,
const string& name,
const string& other,
const string& dev1,
const string& dev2) {
DeviceOption dst_ctx, src_ctx;
dst_ctx.ParseFromString(dev1);
src_ctx.ParseFromString(dev2);
Tensor* srcT = self->GetTensor(other);
Tensor* dstT = self->CreateTensor(name);
dstT->ReshapeLike(*srcT);
const TypeMeta& meta = srcT->meta();
if (dst_ctx.device_type() == PROTO_CUDA) {
if (src_ctx.device_type() == PROTO_CUDA) {
// CUDA <- CUDA
CUDAContext::MemcpyEx<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CUDAContext>(),
src_ctx.device_id());
} else {
// CUDA <- CPU
CUDAContext::MemcpyEx<CUDAContext, CPUContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CPUContext>(),
dst_ctx.device_id());
}
} else {
if (src_ctx.device_type() == PROTO_CUDA) {
// CPU <- CUDA
CUDAContext::MemcpyEx<CPUContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CUDAContext>(),
src_ctx.device_id());
} else {
// CPU <- CPU
CPUContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CPUContext>());
}
}
})
/*! \brief Return a array zero-copied from an existing tensor */
.def("TensorToArray", [](
Workspace* self,
const string& name,
const bool readonly) {
Tensor* tensor = self->GetTensor(name);
CHECK_GT(tensor->count(), 0);
vector<npy_intp> dims;
for (const auto dim : tensor->dims()) dims.push_back(dim);
int npy_type = TypeMetaToNPY(tensor->meta());
if (npy_type == -1) {
LOG(FATAL) << "Tensor(" + tensor->name() + ") "
"with dtype." + TypeMetaToString(tensor->meta()) +
" is not supported by numpy.";
}
auto* data = readonly ?
const_cast<void*>(tensor->raw_data<CPUContext>()) :
tensor->raw_mutable_data<CPUContext>();
PyObject* array = PyArray_SimpleNewFromData(
tensor->ndim(), dims.data(), npy_type, data);
return pybind11::reinterpret_steal<pybind11::object>(array);
})
/*! \brief Return the CXX Tensor reference */
.def("GetTensor", [](
Workspace* self,
const string& name) {
return self->GetTensor(name);
}, pybind11::return_value_policy::reference_internal)
/*! \brief Return the filler type of a tensor */
.def("GetFillerType", [](
Workspace* self,
const string& name) {
return self->GetFiller(name)->type();
})
/* \brief Set an alias for the tensor */
.def("SetTensorAlias", [](
Workspace* self,
const string& name,
const string& alias) {
CHECK(self->HasTensor(name))
<< "\nTensor(" + name << ") has not been "
<< "registered in the current workspace.";
self->SetTensorAlias(name, alias);
})
/*! \brief Copy the array data to tensor */
.def("FeedTensor", [](
Workspace* self,
const string& name,
pybind11::object value,
const string& ctx) {
DeviceOption dev;
if (!ctx.empty()) {
CHECK(dev.ParseFromString(ctx))
<< "\nFailed to parse the DeviceOption.";
}
Tensor* tensor = self->CreateTensor(name);
unique_ptr<TensorFeederBase> feeder(
TensorFeederRegistry()->Create(
TypeMeta::Id<NumpyFeeder>()));
feeder->Feed(dev, reinterpret_cast
<PyArrayObject*>(value.ptr()), tensor);
})
/*! \brief Copy the tensor data to the array */
.def("FetchTensor", [](
Workspace* self,
const string& name) {
CHECK(self->HasTensor(name))
<< "\nTensor(" + name + ") does not exist.\n"
<< "Have you registered it?";
Tensor* tensor = self->GetTensor(name);
TypeId type_id = CTypeToFetcher(tensor->meta().id());
CHECK(type_id != 0)
<< "\nTensor(" << tensor->name()
<< ") does not initialize or had been reset.";
unique_ptr<TensorFetcherBase> fetcher(CreateFetcher(type_id));
if (fetcher.get()) {
// Copy the tensor data to a numpy object
return fetcher->Fetch(*tensor);
} else {
LOG(FATAL) << name << " is not a C++ native type.";
return pybind11::object();
}
})
/*! \brief Run a operator from the def reference */
.def("RunOperator", [](
Workspace* self,
OperatorDef* def,
const bool verbose) {
pybind11::gil_scoped_release g;
if (verbose) {
// It is not a good design to print the debug string
std::cout << def->DebugString() << std::endl;
}
self->RunOperator(*def);
})
/*! \brief Run a operator from the serialized def */
.def("RunOperator", [](
Workspace* self,
const string& serialized,
const bool verbose) {
OperatorDef def;
CHECK(def.ParseFromString(serialized));
pybind11::gil_scoped_release g;
if (verbose) {
// It is not a good design to print the debug string
std::cout << def.DebugString() << std::endl;
}
self->RunOperatorOnce(def);
})
/*! \brief Create a graph from the serialized def */
.def("CreateGraph", [](
Workspace* self,
const string& serialized,
const bool verbose) {
GraphDef graph_def;
CHECK(graph_def.ParseFromString(serialized))
<< "\nFailed to parse the GraphDef.";
auto* graph = self->CreateGraph(graph_def);
if (verbose) {
// It is not a good design to print the debug string
auto* T = self->CreateTensor(
"/graph_def/optimized/" + graph->name());
if (T->count() > 0) {
auto* data = T->mutable_data<string, CPUContext>();
std::cout << data[0] << std::endl;
}
}
// Return the graph name may be different from the def
// We will make a unique dummy name on creating the graph
return graph->name();
})
/*! \brief Run an existing graph */
.def("RunGraph", [](
Workspace* self,
const string& name,
const string& include,
const string& exclude) {
pybind11::gil_scoped_release g;
self->RunGraph(name, include, exclude);
})
.def("Backward", [](
Workspace* self,
const vector<OperatorDef*>& forward_ops,
const vector<string>& targets,
const vector<string>& input_grads,
const vector<string>& ignore_grads,
const bool is_sharing,
const bool verbose) {
// Make => Optimize => Run
GraphDef backward_ops;
GraphGradientMaker maker;
for (auto& e : input_grads) maker.AddExternalGrad(e);
for (auto& e : ignore_grads) maker.AddIgnoreGrad(e);
maker.Make(forward_ops, targets, backward_ops);
pybind11::gil_scoped_release g;
if (is_sharing) backward_ops = maker.Share(backward_ops);
for (auto& op : backward_ops.op()) {
if (verbose) std::cout << op.DebugString() << std::endl;
if (op.has_uid()) self->RunOperator(op);
else self->RunOperatorOnce(op);
}
})
/*! \brief Switch to the specific workspace */ /*! \brief Serialize tensors into a binary file */
m.def("SwitchWorkspace", &SwitchWorkspace); .def("Snapshot", [](
Workspace* self,
/*! \brief Return the current active workspace */ const string& filename,
m.def("CurrentWorkspace", []() { const vector<string>& tensors,
return g_current_workspace; const int format) {
}); vector<Tensor*> refs;
switch (format) {
/*! \brief List all of the existing workspace */ case 0: // Pickle
m.def("Workspaces", []() -> vector<string> { LOG(FATAL) << "Format depends on Pickle. "
vector<string> names; "Can't be used in C++.";
for (auto const& it : g_workspaces) break;
names.emplace_back(it.first); case 1: // CaffeModel
return names; for (const auto& e : tensors)
}); refs.emplace_back(self->GetTensor(e));
SavaCaffeModel(filename, refs);
/*! \brief Move the source workspace into the target */ break;
m.def("MoveWorkspace", []( default:
const string& target, LOG(FATAL) << "Unknwon format, code: " << format;
const string& source) {
CHECK(g_workspaces.count(source))
<< "\nSource Workspace(" << source << ") does not exist.";
CHECK(g_workspaces.count(target))
<< "\nTarget Workspace(" << target << ") does not exist.";
g_workspaces[target]->Move(g_workspaces[source].get());
sub_workspaces[target].push_back(source);
LOG(INFO) << "Move the Workspace(" << source << ") "
<< "into the Workspace(" << target << ").";
});
/*! \brief Reset the specific workspace */
m.def("ResetWorkspace", [](const string& name) {
string target_workspace = g_current_workspace;
if (!name.empty()) target_workspace = name;
CHECK(g_workspaces.count(target_workspace))
<< "\nWorkspace(" << target_workspace
<< ") does not exist, can not be reset.";
LOG(INFO) << "Reset the Workspace(" << target_workspace << ")";
g_workspaces[target_workspace].reset(new Workspace(target_workspace));
g_workspace = g_workspaces[target_workspace].get();
for (auto& sub_workspace : sub_workspaces[target_workspace]) {
if (g_workspaces.count(sub_workspace) > 0)
g_workspace->Move(g_workspaces[sub_workspace].get());
}
});
/*! \brief Release the memory of tensors */
m.def("ClearWorkspace", [](const string& name) {
string target_workspace = g_current_workspace;
if (!name.empty()) target_workspace = name;
CHECK(g_workspaces.count(target_workspace))
<< "\nWorkspace(" << target_workspace
<< ") does not exist, can not be reset.";
LOG(INFO) << "Clear the Workspace(" << target_workspace << ")";
g_workspaces[target_workspace]->Clear();
});
/*! \brief Copy the array data to the tensor */
m.def("FeedTensor", [](
const string& name,
pybind11::object value,
const string& device_option) {
DeviceOption dev;
if (!device_option.empty()) {
if (!dev.ParseFromString(device_option)) {
LOG(FATAL) << "Failed to parse the DeviceOption.";
} }
} })
Tensor* tensor = g_workspace->CreateTensor(name);
unique_ptr<TensorFeederBase> feeder(TensorFeederRegistry() /*! \brief Load tensors from a binary file */
->Create(TypeMeta::Id<NumpyFeeder>())); .def("Restore", [](
feeder->Feed(dev, reinterpret_cast< Workspace* self,
PyArrayObject*>(value.ptr()), tensor); const string& filename,
}); const int format) {
switch (format) {
/*! \brief Copy the tensor data to the array */ case 0: // Pickle
m.def("FetchTensor", [](const string& name) { LOG(FATAL) << "Format depends on Pickle. "
if (!g_workspace->HasTensor(name)) "Can't be used in C++.";
LOG(FATAL) << "Tensor(" + name + ") " break;
"does not exist. Have you registered it?"; case 1: // CaffeModel
Tensor* tensor = g_workspace->GetTensor(name); LoadCaffeModel(filename, self);
TypeId type_id = CTypeToFetcher(tensor->meta().id()); break;
CHECK(type_id != 0) default:
<< "\nTensor(" << tensor->name() LOG(FATAL) << "Unknwon format, code: " << format;
<< ") does not initialize or had been reset."; }
unique_ptr<TensorFetcherBase> fetcher(CreateFetcher(type_id)); })
if (fetcher.get()) {
// Copy the tensor data to a numpy object /*! \brief Load tensors and graph from a ONNX model */
return fetcher->Fetch(*tensor); .def("ImportONNXModel", [](
} else { Workspace* self,
LOG(FATAL) << name << " is not a C++ native type."; const string& model_path) {
return pybind11::object(); GraphDef init_graph, pred_graph;
} onnx::ONNXBackend onnx_backend;
}); onnx_backend.Prepare(model_path, &init_graph, &pred_graph);
// Serializing to Python is intractable
/*! \brief Return a unique dummy name */ // We should apply the initializer immediately
m.def("GetDummyName", []( self->RunGraph(self->CreateGraph(init_graph)->name(), "", "");
const string& basename, return pybind11::bytes(pred_graph.SerializeAsString());
const string& suffix, });
const string& domain,
const bool zero_based) {
return ws()->GetDummyName(
basename, suffix, domain, zero_based);
});
AddIOMethods(m);
AddMPIMethods(m); AddMPIMethods(m);
AddCUDAMethods(m); AddCUDAMethods(m);
AddProtoMethods(m); AddProtoMethods(m);
AddGraphMethods(m);
AddTensorMethods(m); AddTensorMethods(m);
AddConfigMethods(m); AddConfigMethods(m);
AddGradientMethods(m); AddGradientMethods(m);
AddOperatorMethods(m); AddOperatorMethods(m);
OnImportModule(); OnImportModule();
m.def("OnModuleExit", []() { g_workspaces.clear(); });
} }
} // namespace python } // namespace python
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_PYTHON_PY_ONNX_H_
#define DRAGON_PYTHON_PY_ONNX_H_
#include "onnx/onnx_backend.h"
#include "py_dragon.h"
namespace dragon {
namespace python {
void AddONNXMethods(pybind11::module& m) {
m.def("ImportONNXModel", [](
const string& model_path) {
GraphDef init_graph, pred_graph;
onnx::ONNXBackend onnx_backend;
onnx_backend.Prepare(model_path, &init_graph, &pred_graph);
// Serializing to Python is intractable
// We should apply the initializer immediately
ws()->CreateGraph(init_graph);
ws()->RunGraph(init_graph.name(), "", "");
return pybind11::bytes(pred_graph.SerializeAsString());
});
}
} // namespace python
} // namespace dragon
#endif // DRAGON_PYTHON_PY_ONNX_H_
\ No newline at end of file
...@@ -20,36 +20,14 @@ namespace dragon { ...@@ -20,36 +20,14 @@ namespace dragon {
namespace python { namespace python {
void AddOperatorMethods(pybind11::module& m) { void AddOperatorMethods(pybind11::module& m) {
/*! \brief Return all the registered operators */ /*! \brief Return the registered operators */
m.def("RegisteredOperators", []() { return CPUOperatorRegistry()->keys(); }); m.def("RegisteredOperators", []() {
return CPUOperatorRegistry()->keys();
/*! \brief Return all the operators without gradients */
m.def("NoGradientOperators", []() { return NoGradientRegistry()->keys(); });
/*! \brief Run a operator from the def reference */
m.def("RunOperator", [](
OperatorDef* def,
const bool verbose) {
pybind11::gil_scoped_release g;
if (verbose) {
// It is not a good design to print the debug string
std::cout << def->DebugString() << std::endl;
}
ws()->RunOperator(*def);
}); });
/*! \brief Run a operator from the serialized def */ /*! \brief Return the non-gradient operators */
m.def("RunOperator", []( m.def("NoGradientOperators", []() {
const string& serialized, return NoGradientRegistry()->keys();
const bool verbose) {
OperatorDef def;
CHECK(def.ParseFromString(serialized));
pybind11::gil_scoped_release g;
if (verbose) {
// It is not a good design to print the debug string
std::cout << def.DebugString() << std::endl;
}
ws()->RunOperatorOnce(def);
}); });
} }
......
...@@ -22,208 +22,51 @@ namespace python { ...@@ -22,208 +22,51 @@ namespace python {
void AddTensorMethods(pybind11::module& m) { void AddTensorMethods(pybind11::module& m) {
/*! \brief Export the Tensor class */ /*! \brief Export the Tensor class */
pybind11::class_<Tensor>(m, "Tensor") pybind11::class_<Tensor>(m, "Tensor")
/*! \brief Return the number of dimensions */
.def_property_readonly("ndim", &Tensor::ndim) .def_property_readonly("ndim", &Tensor::ndim)
/*! \brief Return all the dimensions */
.def_property_readonly("dims", &Tensor::dims) .def_property_readonly("dims", &Tensor::dims)
/*! \brief Return the total number of elements */
.def_property_readonly("size", &Tensor::size) .def_property_readonly("size", &Tensor::size)
/*! \brief Return the data type */
.def_property_readonly("dtype", [](Tensor* self) { .def_property_readonly("dtype", [](Tensor* self) {
return TypeMetaToString(self->meta()); return TypeMetaToString(self->meta());
}).def_property_readonly("device", [](Tensor* self) { })
/*! \brief Return the device information */
.def_property_readonly("device", [](Tensor* self) {
if (self->has_memory()) { if (self->has_memory()) {
Map<string, string> mem_info = self->memory()->info(); auto mem_info = self->memory()->info();
return std::tuple<string, int>( return std::tuple<string, int>(
mem_info["device_type"], atoi( mem_info["device_type"], atoi(
mem_info["device_id"].c_str())); mem_info["device_id"].c_str()));
} else { } else {
return std::tuple<string, int>("Unknown", 0); return std::tuple<string, int>("Unknown", 0);
} }
}).def("ToCPU", [](Tensor* self) { })
CHECK(self->has_memory()) << "\nTensor(" << self->name()
<< ") does not initialize or had been reset."; /*! \brief Switch the memory to the cpu context */
.def("ToCPU", [](Tensor* self) {
CHECK(self->has_memory())
<< "\nTensor(" << self->name() << ") "
<< "does not initialize or had been reset.";
self->memory()->ToCPU(); self->memory()->ToCPU();
}).def("ToCUDA", [](Tensor* self, const int device_id) { })
/*! \brief Switch the memory to the cuda context */
.def("ToCUDA", [](Tensor* self, int device_id) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
CHECK(self->has_memory()) << "\nTensor(" << self->name() CHECK(self->has_memory())
<< ") does not initialize or had been reset."; << "\nTensor(" << self->name() << ") "
<< "does not initialize or had been reset.";
self->memory()->SwitchToCUDADevice(device_id); self->memory()->SwitchToCUDADevice(device_id);
#else #else
CUDA_NOT_COMPILED; CUDA_NOT_COMPILED;
#endif #endif
}); });
/*! \brief List all the existing tensors */
m.def("Tensors", []() { return ws()->GetTensors(); });
/*! \brief Indicate whether the given tensor is existing */
m.def("HasTensor", [](
const string& name) -> bool {
return ws()->HasTensor(name);
});
/*! \brief Return the unique name of given tensor */
m.def("GetTensorName", [](
const string& name) -> string {
return ws()->GetTensorName(name);
});
/*! \brief Create a tensor with the given name */
m.def("CreateTensor", [](
const string& name) -> void {
ws()->CreateTensor(name);
});
/*! \brief Create a tensor with the given name */
m.def("ResetTensor", [](
const string& name) -> void {
ws()->ResetTensor(name);
});
/*! \brief Create a tensor with the given shape */
m.def("TensorFromShape", [](
const string& name,
const vector<int64_t>& shape,
const string& dtype) {
const TypeMeta& meta = TypeStringToMeta(dtype);
if (meta.id() == 0) {
LOG(FATAL) << "Unsupported data type: " + dtype + ".";
}
Tensor* tensor = ws()->CreateTensor(name);
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Set Tensor(" << tensor->name() << ")"
<< " with different data type from original one.";
tensor->Reshape(shape);
tensor->raw_mutable_data<CPUContext>(meta);
});
/*! \brief Create a tensor with the given array */
m.def("TensorFromPyArray", [](
const string& name,
pybind11::object py_array) {
PyArrayObject* array = PyArray_GETCONTIGUOUS(
reinterpret_cast<PyArrayObject*>(py_array.ptr()));
const TypeMeta& meta = TypeNPYToMeta(PyArray_TYPE(array));
if (meta.id() == 0) LOG(FATAL) << "Unsupported data type.";
Tensor* tensor = ws()->CreateTensor(name);
tensor->SetMeta(meta);
int ndim = PyArray_NDIM(array);
npy_intp* npy_dims = PyArray_DIMS(array);
vector<int64_t> dims;
for (int i = 0; i < ndim; i++) dims.push_back(npy_dims[i]);
tensor->Reshape(dims);
auto* data = static_cast<void*>(PyArray_DATA(array));
if (!tensor->has_memory()) {
MixedMemory* memory(new MixedMemory());
memory->set_cpu_data(data, tensor->nbytes());
tensor->set_memory(memory);
} else {
if (tensor->DECREFPyArray) tensor->DECREFPyArray();
tensor->memory()->set_cpu_data(data, tensor->nbytes());
}
// Follow the codes of PyTorch
// Here we bind the DECREF to Tensor
// ResetTensor() or ResetWorkspace() can trigger it
tensor->DECREFPyArray = [array]()->void { Py_XDECREF(array); };
});
/*! \brief Create a tensor copied from an existing one */
m.def("TensorFromTensor", [](
const string& name,
const string& other,
const string& dev1,
const string& dev2) {
DeviceOption dst_ctx, src_ctx;
dst_ctx.ParseFromString(dev1);
src_ctx.ParseFromString(dev2);
Tensor* srcT = ws()->GetTensor(other);
Tensor* dstT = ws()->CreateTensor(name);
dstT->ReshapeLike(*srcT);
const TypeMeta& meta = srcT->meta();
if (dst_ctx.device_type() == PROTO_CUDA) {
if (src_ctx.device_type() == PROTO_CUDA) {
// CUDA <- CUDA
CUDAContext::MemcpyEx<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CUDAContext>(),
src_ctx.device_id());
} else {
// CUDA <- CPU
CUDAContext::MemcpyEx<CUDAContext, CPUContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CPUContext>(),
dst_ctx.device_id());
}
} else {
if (src_ctx.device_type() == PROTO_CUDA) {
// CPU <- CUDA
CUDAContext::MemcpyEx<CPUContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CUDAContext>(),
src_ctx.device_id());
} else {
// CPU <- CPU
CPUContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CPUContext>());
}
}
});
/*! \brief Return a array zero-copied from an existing tensor */
m.def("TensorToPyArray", [](
const string& name,
const bool readonly) {
Tensor* tensor = ws()->GetTensor(name);
CHECK_GT(tensor->count(), 0);
vector<npy_intp> dims;
for (const auto dim : tensor->dims()) dims.push_back(dim);
int npy_type = TypeMetaToNPY(tensor->meta());
if (npy_type == -1) {
LOG(FATAL) << "Tensor(" + tensor->name() + ") "
"with dtype." + TypeMetaToString(tensor->meta()) +
" is not supported by numpy.";
}
auto* data = readonly ?
const_cast<void*>(tensor->raw_data<CPUContext>()) :
tensor->raw_mutable_data<CPUContext>();
PyObject* array = PyArray_SimpleNewFromData(
tensor->ndim(), dims.data(), npy_type, data);
return pybind11::reinterpret_steal<pybind11::object>(array);
});
/*! \brief Create a tensor from the specified filler */
m.def("CreateFiller", [](
const string& serialized) {
TensorFillerProto filler_proto;
if (!filler_proto.ParseFromString(serialized))
LOG(FATAL) << "Failed to parse the TensorFiller.";
ws()->CreateFiller(filler_proto);
ws()->CreateTensor(filler_proto.tensor());
});
/*! \brief Return the filler type of a tensor */
m.def("GetFillerType", [](const string& name) {
return ws()->GetFiller(name)->type();
});
/* \brief Set an alias for the tensor */
m.def("SetTensorAlias", [](
const string& name,
const string& alias) {
if (!ws()->HasTensor(name)) {
LOG(FATAL) << "Tensor(" + name << ") has not "
"been registered in the current workspace.";
}
ws()->SetTensorAlias(name, alias);
});
/*! \brief Return the CXX Tensor reference */
m.def("GetTensor", [](
const string& name) {
return ws()->GetTensor(name);
}, pybind11::return_value_policy::reference_internal);
} }
} // namespace python } // namespace python
......
...@@ -22,6 +22,9 @@ import dragon.config as config ...@@ -22,6 +22,9 @@ import dragon.config as config
# Core # Core
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor
import dragon.core.workspace as workspace import dragon.core.workspace as workspace
from dragon.core.workspace import Workspace
from dragon.core.workspace import get_default_workspace
from dragon.core.workspace import reset_default_workspace
import dragon.core.tensor_utils as tensor_utils import dragon.core.tensor_utils as tensor_utils
import dragon.core.mpi as mpi import dragon.core.mpi as mpi
import dragon.core.cuda as cuda import dragon.core.cuda as cuda
...@@ -41,7 +44,6 @@ from dragon.vm.theano.tensor import grad as grad ...@@ -41,7 +44,6 @@ from dragon.vm.theano.tensor import grad as grad
from dragon.core.scope import name_scope, get_default_name_scope from dragon.core.scope import name_scope, get_default_name_scope
from dragon.core.scope import phase_scope, get_default_phase from dragon.core.scope import phase_scope, get_default_phase
from dragon.core.scope import device_scope, get_default_device from dragon.core.scope import device_scope, get_default_device
from dragon.core.scope import WorkspaceScope as ws_scope
# Version # Version
from dragon.version import version from dragon.version import version
......
...@@ -15,8 +15,9 @@ from __future__ import absolute_import ...@@ -15,8 +15,9 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.import_c_api as C from dragon import import_c_api as _C
import dragon.core.logging as logging from dragon.core import logging as _logging
option = {} option = {}
...@@ -290,12 +291,12 @@ def SetLoggingLevel(level): ...@@ -290,12 +291,12 @@ def SetLoggingLevel(level):
The default level is *INFO*. The default level is *INFO*.
""" """
C.SetLoggingLevel(level) _C.SetLoggingLevel(level)
logging.set_verbosity({ _logging.set_verbosity({
'DEBUG': logging.DEBUG, 'DEBUG': _logging.DEBUG,
'INFO': logging.INFO, 'INFO': _logging.INFO,
'WARNING': logging.WARN, 'WARNING': _logging.WARN,
'ERROR': logging.ERROR, 'ERROR': _logging.ERROR,
'FATAL': logging.FATAL, 'FATAL': _logging.FATAL,
}[level] }[level]
) )
\ No newline at end of file
...@@ -15,7 +15,7 @@ from __future__ import absolute_import ...@@ -15,7 +15,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.import_c_api as _C from dragon import import_c_api as _C
def IsCUDADriverSufficient(): def IsCUDADriverSufficient():
......
...@@ -30,11 +30,10 @@ from __future__ import print_function ...@@ -30,11 +30,10 @@ from __future__ import print_function
from collections import defaultdict from collections import defaultdict
import dragon.proto.dragon_pb2 as pb from dragon import import_c_api as _C
import dragon.import_c_api as C from dragon.core import helper as _helper
from dragon.proto import dragon_pb2 as _proto_def
from dragon.core.helper import OperatorHelper from dragon.core import proto_utils as _proto_utils
from dragon.core.proto_utils import MakeOperatorDef
class GraphGradientMaker(object): class GraphGradientMaker(object):
...@@ -62,16 +61,22 @@ class GraphGradientMaker(object): ...@@ -62,16 +61,22 @@ class GraphGradientMaker(object):
The OpDef, outputs and defaults of ``BackwardOp``. The OpDef, outputs and defaults of ``BackwardOp``.
""" """
g_ops, g_inputs, defaults = C.CreateGradientDefs( g_ops, g_inputs, defaults = _C.CreateGradientDefs(
forward_op.SerializeToString(), g_outputs) forward_op.SerializeToString(), g_outputs)
for idx, g_op in enumerate(g_ops): for idx, g_op in enumerate(g_ops):
new_def = pb.OperatorDef() new_def = _proto_def.OperatorDef()
new_def.ParseFromString(g_op) new_def.ParseFromString(g_op)
g_ops[idx] = new_def g_ops[idx] = new_def
return g_ops, g_inputs, defaults return g_ops, g_inputs, defaults
@classmethod @classmethod
def CheckGrad(cls, forward_op, inputs_to_grads, blacklist, targets): def CheckGrad(
cls,
forward_op,
inputs_to_grads,
blacklist,
targets,
):
"""Check if missing Grads. If True, skip this Op. """Check if missing Grads. If True, skip this Op.
Parameters Parameters
...@@ -91,7 +96,7 @@ class GraphGradientMaker(object): ...@@ -91,7 +96,7 @@ class GraphGradientMaker(object):
The result of checking and generated filling grads. The result of checking and generated filling grads.
""" """
if forward_op.type in C.NO_GRADIENT_OPERATORS: if forward_op.type in _C.NO_GRADIENT_OPERATORS:
for input in forward_op.input: blacklist.add(input) for input in forward_op.input: blacklist.add(input)
return True, None return True, None
...@@ -114,7 +119,13 @@ class GraphGradientMaker(object): ...@@ -114,7 +119,13 @@ class GraphGradientMaker(object):
return False, gen_grads return False, gen_grads
@classmethod @classmethod
def Make(cls, forward_ops, targets, input_grads=None, auto_names=True): def Make(
cls,
forward_ops,
targets,
input_grads=None,
auto_names=True,
):
"""Make ``BackwardOps`` based on ``ForwardOps``. """Make ``BackwardOps`` based on ``ForwardOps``.
Parameters Parameters
...@@ -149,7 +160,7 @@ class GraphGradientMaker(object): ...@@ -149,7 +160,7 @@ class GraphGradientMaker(object):
# PLAY for the forward # PLAY for the forward
for forward_op in forward_ops: for forward_op in forward_ops:
if forward_op.type in C.NO_GRADIENT_OPERATORS: continue if forward_op.type in _C.NO_GRADIENT_OPERATORS: continue
outputs = [o for o in forward_op.output] outputs = [o for o in forward_op.output]
for input in forward_op.input: for input in forward_op.input:
if input not in outputs: if input not in outputs:
...@@ -176,14 +187,17 @@ class GraphGradientMaker(object): ...@@ -176,14 +187,17 @@ class GraphGradientMaker(object):
op_inputs.append(item[0]) op_inputs.append(item[0])
op_outputs.append(item[0] + '_grad') op_outputs.append(item[0] + '_grad')
values.append(defaults[item[1]]) values.append(defaults[item[1]])
gen_op = MakeOperatorDef('GradientGenerate', op_inputs, op_outputs, defaults=values) gen_op = _proto_utils.MakeOperatorDef(
gen_op.name = OperatorHelper.get_name() if auto_names else 'runtime' 'GradientGenerate', op_inputs, op_outputs, defaults=values)
gen_op.name = _helper.OperatorHelper. \
get_name() if auto_names else 'runtime'
if forward_op.HasField('device_option'): if forward_op.HasField('device_option'):
gen_op.device_option.CopyFrom(forward_op.device_option) gen_op.device_option.CopyFrom(forward_op.device_option)
backward_ops.append(gen_op) backward_ops.append(gen_op)
# GradientOp # GradientOp
for g_op in g_ops: for g_op in g_ops:
g_op.name = OperatorHelper.get_name() if auto_names else 'runtime' g_op.name = _helper.OperatorHelper. \
get_name() if auto_names else 'runtime'
backward_ops.append(g_op) backward_ops.append(g_op)
# Split & Gather grads for multi-used input # Split & Gather grads for multi-used input
...@@ -208,10 +222,12 @@ class GraphGradientMaker(object): ...@@ -208,10 +222,12 @@ class GraphGradientMaker(object):
for idx in range(grads_count[g_output]): for idx in range(grads_count[g_output]):
if '%s_autosplit_%d' % (g_output, idx) in all_split_grads: if '%s_autosplit_%d' % (g_output, idx) in all_split_grads:
split_inputs.append('%s_autosplit_%d' % (g_output, idx)) split_inputs.append('%s_autosplit_%d' % (g_output, idx))
gather_op = MakeOperatorDef('GradientGather', split_inputs, [g_output]) gather_op = _proto_utils.MakeOperatorDef(
'GradientGather', split_inputs, [g_output])
if g_op.HasField('device_option'): if g_op.HasField('device_option'):
gather_op.device_option.CopyFrom(g_op.device_option) gather_op.device_option.CopyFrom(g_op.device_option)
gather_op.name = OperatorHelper.get_name() if auto_names else 'runtime' gather_op.name = _helper.OperatorHelper. \
get_name() if auto_names else 'runtime'
backward_ops.append(gather_op) backward_ops.append(gather_op)
g_op.output[g_output_idx] = split_name g_op.output[g_output_idx] = split_name
......
...@@ -17,7 +17,8 @@ from __future__ import print_function ...@@ -17,7 +17,8 @@ from __future__ import print_function
import math import math
import numpy import numpy
import dragon
from dragon.core import workspace as _workspace
class OperatorHelper(object): class OperatorHelper(object):
...@@ -39,11 +40,11 @@ class OperatorHelper(object): ...@@ -39,11 +40,11 @@ class OperatorHelper(object):
@classmethod @classmethod
def get_index_and_name(cls, prefix='Op'): def get_index_and_name(cls, prefix='Op'):
name = dragon.workspace.GetDummyName(prefix, domain='Operator') name = _workspace.GetDummyName(prefix, domain='Operator')
try: try:
_, op_idx = name.split('_') _, op_idx = name.split('_')
except: except:
name = dragon.workspace.GetDummyName(prefix, domain='Operator') name = _workspace.GetDummyName(prefix, domain='Operator')
_, op_idx = name.split('_') _, op_idx = name.split('_')
return int(op_idx), name return int(op_idx), name
......
...@@ -15,7 +15,7 @@ from __future__ import absolute_import ...@@ -15,7 +15,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.import_c_api as _C from dragon import import_c_api as _C
_GLOBAL_MPI_IS_INIT = False _GLOBAL_MPI_IS_INIT = False
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Define some helpful protobuf makers here.""" """Define some helpful protocol buffer makers here."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -17,28 +17,28 @@ from __future__ import print_function ...@@ -17,28 +17,28 @@ from __future__ import print_function
import sys import sys
import copy import copy
import numpy as np import numpy
from google.protobuf.message import Message
import dragon.config as cfg from dragon import config as _cfg
import dragon.import_c_api as _C from dragon import import_c_api as _C
from dragon.proto import dragon_pb2 as pb from dragon.core import scope as _scope
from dragon.core.scope import get_default_device from dragon.proto import dragon_pb2 as _proto_def
from google.protobuf.message import Message as _Message
if sys.version_info >= (3,0): if sys.version_info >= (3,0):
def MakeArgument(key, value): def MakeArgument(key, value):
argument = pb.Argument() argument = _proto_def.Argument()
argument.name = key argument.name = key
if type(value) is float: argument.f = value if type(value) is float: argument.f = value
elif type(value) in (bool, int, np.int64) : argument.i = value elif type(value) in (bool, int, numpy.int64) : argument.i = value
elif type(value) is bytes: argument.s = value elif type(value) is bytes: argument.s = value
elif type(value) is str: argument.s = str.encode(value) elif type(value) is str: argument.s = str.encode(value)
elif isinstance(value, Message): argument.s = value.SerializeToString() elif isinstance(value, _Message): argument.s = value.SerializeToString()
elif all(type(v) is float for v in value): argument.floats.extend(value) elif all(type(v) is float for v in value): argument.floats.extend(value)
elif all(type(v) is int for v in value): argument.ints.extend(value) elif all(type(v) is int for v in value): argument.ints.extend(value)
elif all(type(v) is str for v in value): argument.strings.extend([str.encode(v) for v in value]) elif all(type(v) is str for v in value): argument.strings.extend([str.encode(v) for v in value])
elif all(isinstance(v, Message) for v in value): elif all(isinstance(v, _Message) for v in value):
argument.strings.extend([v.SerializeToString() for v in value]) argument.strings.extend([v.SerializeToString() for v in value])
else: else:
raise ValueError( raise ValueError(
...@@ -47,20 +47,20 @@ if sys.version_info >= (3,0): ...@@ -47,20 +47,20 @@ if sys.version_info >= (3,0):
return argument return argument
else: else:
def MakeArgument(key, value): def MakeArgument(key, value):
argument = pb.Argument() argument = _proto_def.Argument()
argument.name = key argument.name = key
if type(value) is float: argument.f = value if type(value) is float: argument.f = value
elif type(value) in (bool, int, long, np.int64) : argument.i = value elif type(value) in (bool, int, long, numpy.int64) : argument.i = value
elif type(value) is str: argument.s = value elif type(value) is str: argument.s = value
elif type(value) is unicode: argument.s = str(value) elif type(value) is unicode: argument.s = str(value)
elif isinstance(value, Message): argument.s = value.SerializeToString() elif isinstance(value, _Message): argument.s = value.SerializeToString()
elif all(type(v) is float for v in value): argument.floats.extend(value) elif all(type(v) is float for v in value): argument.floats.extend(value)
elif all(type(v) is int for v in value): argument.ints.extend(value) elif all(type(v) is int for v in value): argument.ints.extend(value)
elif all(type(v) is long for v in value): argument.ints.extend(value) elif all(type(v) is long for v in value): argument.ints.extend(value)
elif all(type(v) is str for v in value): argument.strings.extend(value) elif all(type(v) is str for v in value): argument.strings.extend(value)
elif all(type(v) is unicode for v in value): elif all(type(v) is unicode for v in value):
argument.strings.extend([str(v) for v in value]) argument.strings.extend([str(v) for v in value])
elif all(isinstance(v, Message) for v in value): elif all(isinstance(v, _Message) for v in value):
argument.strings.extend([v.SerializeToString() for v in value]) argument.strings.extend([v.SerializeToString() for v in value])
else: else:
raise ValueError( raise ValueError(
...@@ -70,10 +70,16 @@ else: ...@@ -70,10 +70,16 @@ else:
def MakeOperatorDef( def MakeOperatorDef(
op_type, inputs=(), outputs=(), op_type,
name='', uid=None, device_option=None, inputs=(),
arg=None, **kwargs): outputs=(),
operator = pb.OperatorDef() name='',
uid=None,
device_option=None,
arg=None,
**kwargs
):
operator = _proto_def.OperatorDef()
operator.type = op_type operator.type = op_type
operator.name = name operator.name = name
operator.input.extend([str(tensor) for tensor in inputs]) operator.input.extend([str(tensor) for tensor in inputs])
...@@ -92,9 +98,15 @@ def MakeOperatorDef( ...@@ -92,9 +98,15 @@ def MakeOperatorDef(
def MakeCXXOperatorDef( def MakeCXXOperatorDef(
op_type, inputs=(), outputs=(), op_type,
name='', uid=None, device_option=None, inputs=(),
arg=None, **kwargs): outputs=(),
name='',
uid=None,
device_option=None,
arg=None,
**kwargs
):
c_def = _C.OperatorDef() c_def = _C.OperatorDef()
py_def = MakeOperatorDef( py_def = MakeOperatorDef(
op_type, inputs, outputs, name, uid, op_type, inputs, outputs, name, uid,
...@@ -104,7 +116,7 @@ def MakeCXXOperatorDef( ...@@ -104,7 +116,7 @@ def MakeCXXOperatorDef(
def MakeDeviceOption(device_type, device_id, rng_seed=None): def MakeDeviceOption(device_type, device_id, rng_seed=None):
option = pb.DeviceOption() option = _proto_def.DeviceOption()
option.device_type = device_type option.device_type = device_type
option.device_id = device_id option.device_id = device_id
if rng_seed is not None: option.random_seed = rng_seed if rng_seed is not None: option.random_seed = rng_seed
...@@ -133,7 +145,7 @@ def GetDeviceOption(device_type, device_id=0, rng_seed=None): ...@@ -133,7 +145,7 @@ def GetDeviceOption(device_type, device_id=0, rng_seed=None):
def GetDefaultDeviceOption(): def GetDefaultDeviceOption():
device_info = get_default_device() device_info = _scope.get_default_device()
if device_info is not None: if device_info is not None:
return GetDeviceOption( return GetDeviceOption(
device_info['device_type'], device_info['device_type'],
...@@ -142,10 +154,10 @@ def GetDefaultDeviceOption(): ...@@ -142,10 +154,10 @@ def GetDefaultDeviceOption():
def GetGlobalDeviceOption(): def GetGlobalDeviceOption():
option = cfg.GetGlobalOptions() options = _cfg.GetGlobalOptions()
return GetDeviceOption( return GetDeviceOption(
option['device'], options['device'],
option['device_id']) options['device_id'])
# Fix the python stdout # Fix the python stdout
...@@ -159,6 +171,5 @@ class Unbuffered(object): ...@@ -159,6 +171,5 @@ class Unbuffered(object):
return getattr(self.stream, attr) return getattr(self.stream, attr)
# Clear the stdout buffer for mpi(C++ && Python) # Clear the stdout buffer for mpi
import sys
sys.stdout = Unbuffered(sys.stdout) sys.stdout = Unbuffered(sys.stdout)
\ No newline at end of file
...@@ -13,92 +13,7 @@ from __future__ import absolute_import ...@@ -13,92 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import threading from dragon.core import tls as _tls
import dragon.import_c_api as _C
from contextlib import contextmanager
__all__ = [
'name_scope',
'phase_scope',
'device_scope',
'get_default_phase',
'get_default_device',
'get_default_name_scope',
'WorkspaceScope',
]
class _ThreadLocalStack(threading.local):
def __init__(self):
super(_ThreadLocalStack, self).__init__()
self._enforce_nesting = True
self.stack = []
def get_default(self):
return self.stack[-1] if len(self.stack) >= 1 else None
def is_cleared(self):
return not self.stack
@property
def enforce_nesting(self):
return self._enforce_nesting
@enforce_nesting.setter
def enforce_nesting(self, value):
self._enforce_nesting = value
@contextmanager
def get_controller(self, default):
"""A context manager for manipulating a default stack."""
self.stack.append(default)
try:
yield default
finally:
# stack may be empty if reset() was called
if self.stack:
if self._enforce_nesting:
if self.stack[-1] is not default:
raise AssertionError(
"Nesting violated for default stack of %s objects" %
type(default))
self.stack.pop()
else:
self.stack.remove(default)
class WorkspaceScope(object):
"""WorkspaceScope is a auxiliary to assign the specific workspace.
Examples
--------
>>> import dragon as dg
>>> with WorkspaceScope('session1'): pass
>>> with dg.ws_scope('session2'): pass
"""
def __init__(self, ws_name):
assert isinstance(ws_name, type('str')), \
'WorkspaceScope takes in a string as its argument.'
assert ws_name != '', \
'The workspace name should not be empty.'
self.ws = ws_name
self.prev = 'default'
def __enter__(self):
self.prev = _C.CurrentWorkspace()
_C.SwitchWorkspace(self.ws, True)
def __exit__(self, type, value, traceback):
_C.SwitchWorkspace(self.prev, True)
_GLOBAL_TENSOR_STACK = _ThreadLocalStack()
_GLOBAL_PHASE_STACK = _ThreadLocalStack()
_GLOBAL_DEVICE_STACK = _ThreadLocalStack()
_PREDEFINED_SCOPE_SEPARATOR = '/'
def name_scope(name): def name_scope(name):
...@@ -140,7 +55,7 @@ def device_scope(device_type, device_id=0): ...@@ -140,7 +55,7 @@ def device_scope(device_type, device_id=0):
""" """
device_type, device_id, device_type.lower(), device_id device_type, device_id, device_type.lower(), device_id
assert device_type in ['cpu', 'gpu', 'cuda', 'cnml'] assert device_type in ('cpu', 'gpu', 'cuda', 'cnml')
# Default names # Default names
if device_type == 'gpu': device_type = 'cuda' if device_type == 'gpu': device_type = 'cuda'
return _GLOBAL_DEVICE_STACK.get_controller({ return _GLOBAL_DEVICE_STACK.get_controller({
...@@ -212,4 +127,10 @@ def get_default_device(): ...@@ -212,4 +127,10 @@ def get_default_device():
The device dict. The device dict.
""" """
return _GLOBAL_DEVICE_STACK.get_default() return _GLOBAL_DEVICE_STACK.get_default()
\ No newline at end of file
_GLOBAL_TENSOR_STACK = _tls.Stack()
_GLOBAL_PHASE_STACK = _tls.Stack()
_GLOBAL_DEVICE_STACK = _tls.Stack()
_PREDEFINED_SCOPE_SEPARATOR = '/'
\ No newline at end of file
...@@ -22,14 +22,13 @@ from __future__ import absolute_import ...@@ -22,14 +22,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy
import dragon.core.workspace as ws from dragon.core import scope as _scope
import dragon.proto.dragon_pb2 as pb from dragon.core import helper as _helper
from dragon.core import workspace as _workspace
from dragon.core.proto_utils import MakeOperatorDef, GetDefaultDeviceOption from dragon.proto import dragon_pb2 as _proto_def
from dragon.core.scope import get_default_name_scope from dragon.core import proto_utils as _proto_utils
from dragon.core.helper import OperatorHelper, GradientHelper
class Tensor(object): class Tensor(object):
...@@ -59,7 +58,7 @@ class Tensor(object): ...@@ -59,7 +58,7 @@ class Tensor(object):
""" """
self.name, self.shape, self.dtype = name, shape, dtype self.name, self.shape, self.dtype = name, shape, dtype
self.gradient = GradientHelper(self) self.gradient = _helper.GradientHelper(self)
############################################## ##############################################
# # # #
...@@ -258,8 +257,8 @@ class Tensor(object): ...@@ -258,8 +257,8 @@ class Tensor(object):
@name.setter @name.setter
def name(self, value): def name(self, value):
if value != '': if value != '':
self._name = ws.GetDummyName( self._name = _workspace.GetDummyName(
get_default_name_scope() + value _scope.get_default_name_scope() + value
if value else 'Tensor', domain='Tensor') if value else 'Tensor', domain='Tensor')
else: else:
# Set it manually for same cases # Set it manually for same cases
...@@ -506,15 +505,15 @@ class Tensor(object): ...@@ -506,15 +505,15 @@ class Tensor(object):
existing_outputs=[self], starts=starts, sizes=sizes) existing_outputs=[self], starts=starts, sizes=sizes)
def _from_constants(self, value): def _from_constants(self, value):
if not isinstance(value, np.ndarray): if not isinstance(value, numpy.ndarray):
try: try:
value = np.array(value, dtype=self.dtype value = numpy.array(value, dtype=self.dtype
if self.dtype else 'float32') if self.dtype else 'float32')
except: except:
raise TypeError( raise TypeError(
'Can not convert the value to Tensor or numpy array.') 'Can not convert the value to Tensor or numpy array.')
ref_tensor = Tensor.Ref( ref_tensor = Tensor.Ref(
name=ws.GetDummyName('Constant', name=_workspace.GetDummyName('Constant',
domain='Tensor', zero_based=False), domain='Tensor', zero_based=False),
shape=list(value.shape), dtype=str(value.dtype)) shape=list(value.shape), dtype=str(value.dtype))
ref_tensor.set_value(value) ref_tensor.set_value(value)
...@@ -798,14 +797,16 @@ class Tensor(object): ...@@ -798,14 +797,16 @@ class Tensor(object):
Returns Returns
------- -------
None Tensor
The self.
See Also See Also
-------- --------
`workspace.FeedTensor(*args, **kwargs)`_ - How to feed a Tensor. `workspace.FeedTensor(*args, **kwargs)`_ - How to feed a Tensor.
""" """
ws.FeedTensor(self, new_value) _workspace.FeedTensor(self, new_value)
return self
def get_value(self): def get_value(self):
"""Fetch the values from C++ backend. [**Theano Style**] """Fetch the values from C++ backend. [**Theano Style**]
...@@ -820,7 +821,7 @@ class Tensor(object): ...@@ -820,7 +821,7 @@ class Tensor(object):
`workspace.FetchTensor(*args, **kwargs)`_ - How to fetch a Tensor. `workspace.FetchTensor(*args, **kwargs)`_ - How to fetch a Tensor.
""" """
return ws.FetchTensor(self) return _workspace.FetchTensor(self)
def copy(self): def copy(self):
"""Return a Tensor with same content. [**Theano Style**] """Return a Tensor with same content. [**Theano Style**]
...@@ -835,7 +836,7 @@ class Tensor(object): ...@@ -835,7 +836,7 @@ class Tensor(object):
`ops.Copy(*args, **kwargs)`_ - How to copy A to B. `ops.Copy(*args, **kwargs)`_ - How to copy A to B.
""" """
new_tensor = Tensor(self.name + '_copy') new_tensor = Tensor.Ref(self.name + '_copy')
arguments = {'inputs': self, 'existing_outputs': new_tensor} arguments = {'inputs': self, 'existing_outputs': new_tensor}
return self.CreateOperator('Copy', **arguments) return self.CreateOperator('Copy', **arguments)
...@@ -906,7 +907,7 @@ class Tensor(object): ...@@ -906,7 +907,7 @@ class Tensor(object):
if self.shape is not None: if self.shape is not None:
output.shape = input_shape[:] output.shape = input_shape[:]
output.shape.insert(axis, np.long(1)) output.shape.insert(axis, 1)
return output return output
...@@ -924,17 +925,8 @@ class Tensor(object): ...@@ -924,17 +925,8 @@ class Tensor(object):
TensorShape TensorShape
The shape description. The shape description.
Examples
--------
>>> a = Tensor(shape=[1, 2, 3, 4])
>>> print a.get_shape()
>>> TensorShape([Dimension(1), Dimension(2), Dimension(3), Dimension(4)])
>>> print a.get_shape().as_list()
>>> [1, 2, 3, 4]
""" """
raise NotImplementedError('Implemented in <vm.tensorflow.framework.tensor_shape>') raise NotImplementedError('')
def eval(self, feed_dict=None): def eval(self, feed_dict=None):
"""Run and return the computing results of this tensor. """Run and return the computing results of this tensor.
...@@ -950,7 +942,7 @@ class Tensor(object): ...@@ -950,7 +942,7 @@ class Tensor(object):
The values of this tensor in the backend. The values of this tensor in the backend.
""" """
raise NotImplementedError('Try "import dragon.vm.tensorflow" to load this dynamic methods.') raise NotImplementedError('')
############################################ ############################################
# # # #
...@@ -984,26 +976,32 @@ class Tensor(object): ...@@ -984,26 +976,32 @@ class Tensor(object):
return ref_tensor return ref_tensor
@classmethod @classmethod
def CreateOperator(cls, op_type, inputs, def CreateOperator(
num_outputs=1, existing_outputs=None, cls,
extra_inputs=None, name=None, **kwargs): op_type,
inputs,
num_outputs=1,
existing_outputs=None,
extra_inputs=None,
name=None,
**kwargs
):
"""Construct a new Tensor with specific operator descriptor. """Construct a new Tensor with specific operator descriptor.
Parameters Parameters
---------- ----------
inputs : list of Tensor or Tensor
The inputs for this operator.
op_type : str op_type : str
The operator type. The type of operator.
num_outputs : int, optional inputs : sequence of Tensor
The inputs for this operator.
num_outputs : int, optional, default=1
The number of outputs to return. The number of outputs to return.
Discarded if ``existing_outputs`` is not None.
existing_outputs : sequence of Tensor, optional existing_outputs : sequence of Tensor, optional
The existing outputs for this operator. The existing outputs for this operator.
extra_inputs : sequence of Tensor, optional extra_inputs : sequence of Tensor, optional
The inputs that should be attached to solving targets, e.g. dynamic shape. The inputs that should be attached to solving targets.
name : str, optional name : str, optional
The optional name to use. ``Op_xxx`` will be used automatically if it is None. The optional name.
Returns Returns
------- -------
...@@ -1049,10 +1047,10 @@ class Tensor(object): ...@@ -1049,10 +1047,10 @@ class Tensor(object):
# 2. Generate outputs # 2. Generate outputs
outputs = [] outputs = []
if existing_outputs is None: if existing_outputs is None:
name_scope = get_default_name_scope() name_scope = _scope.get_default_name_scope()
for idx in range(num_outputs): for idx in range(num_outputs):
outputs.append(Tensor.Ref( outputs.append(Tensor.Ref(
ws.GetDummyName(name_scope + _workspace.GetDummyName(name_scope +
(name if name else op_type), (name if name else op_type),
suffix=':{}'.format(idx), suffix=':{}'.format(idx),
domain='Tensor'))) domain='Tensor')))
...@@ -1066,11 +1064,10 @@ class Tensor(object): ...@@ -1066,11 +1064,10 @@ class Tensor(object):
# 3. Construct OperatorDef # 3. Construct OperatorDef
inputs_name = [input.name for input in inputs] inputs_name = [input.name for input in inputs]
outputs_name = [output.name for output in outputs] outputs_name = [output.name for output in outputs]
op_idx, op_name = OperatorHelper.get_index_and_name() op_idx, op_name = _helper.OperatorHelper.get_index_and_name()
device_option = _proto_utils.GetDefaultDeviceOption()
device_option = GetDefaultDeviceOption()
op_def = MakeOperatorDef(op_type, op_def = _proto_utils.MakeOperatorDef(op_type,
inputs_name, outputs_name, op_name, inputs_name, outputs_name, op_name,
device_option=device_option, **kwargs) device_option=device_option, **kwargs)
...@@ -1089,7 +1086,7 @@ class Tensor(object): ...@@ -1089,7 +1086,7 @@ class Tensor(object):
output.extra_targets.add(input.name) output.extra_targets.add(input.name)
# 5. Refine the shape and data type # 5. Refine the shape and data type
outputs = OperatorHelper.apply(op_type, outputs = _helper.OperatorHelper.apply(op_type,
arguments=kwargs, inputs=inputs, outputs=outputs) arguments=kwargs, inputs=inputs, outputs=outputs)
# 6. Returns # 6. Returns
...@@ -1097,42 +1094,6 @@ class Tensor(object): ...@@ -1097,42 +1094,6 @@ class Tensor(object):
elif num_outputs == 1: return outputs[0] elif num_outputs == 1: return outputs[0]
else: return None else: return None
@classmethod
def Convert(cls, value, dtype='float32'):
"""Convert the given value to a tensor.
Parameters
----------
value : number or Tensor
The value to convert.
dtype : str, optional, default='float32'
The data type of the tensor.
Returns
-------
Tensor
The tensor converted with given value.
"""
if isinstance(value, Tensor): return value
else:
if not isinstance(value, np.ndarray):
try:
if dtype:
value = np.array(value, dtype=dtype)
else:
value = np.array(value)
except:
raise TypeError('{} value can not be '
'converted to Tensor.'.format(
type(value).__name__))
ref_tensor = Tensor.Ref(
name=ws.GetDummyName('Constant',
domain='Tensor', zero_based=False),
shape=list(value.shape), dtype=str(value.dtype))
ref_tensor.set_value(value)
return ref_tensor
def Fill(self, type, **kwargs): def Fill(self, type, **kwargs):
"""Fill self with the specific type of filler. """Fill self with the specific type of filler.
...@@ -1147,11 +1108,12 @@ class Tensor(object): ...@@ -1147,11 +1108,12 @@ class Tensor(object):
Self, with filler registered implicitly in the backend. Self, with filler registered implicitly in the backend.
""" """
filler = pb.TensorFillerProto() filler = _proto_def.TensorFillerProto()
filler.tensor = self.name filler.tensor = self.name
filler.type = type.lower() filler.type = type.lower()
if filler.type in ['placeholder', 'variable']: pass if filler.type in ['placeholder', 'variable']:
pass
elif filler.type == 'constant': elif filler.type == 'constant':
filler.value = kwargs['value'] if 'value' in kwargs else 0 filler.value = kwargs['value'] if 'value' in kwargs else 0
elif filler.type in ['normal', 'gaussian']: elif filler.type in ['normal', 'gaussian']:
...@@ -1180,39 +1142,5 @@ class Tensor(object): ...@@ -1180,39 +1142,5 @@ class Tensor(object):
else: else:
raise ValueError('Unknown filler type: {}'.format(filler.type)) raise ValueError('Unknown filler type: {}'.format(filler.type))
ws.CreateFiller(filler) _workspace.CreateFiller(filler)
return self return self
\ No newline at end of file
def debug_expressions(self):
"""Return the internal expressions for displaying.
Returns
-------
str
The internal expressions.
"""
external_inputs = set()
outputs = set()
ordered_exprs = sorted(self.expressions.items(), key=lambda d: d[0])
buffer0 = '-------------------Expressions-------------------\n'
buffer1 = ''; buffer2 = 'Inputs: ['
for k, v in ordered_exprs:
buffer1 = buffer1 + '>>> ' + str(k).zfill(3) + '. ('
for input in v.input:
if input not in outputs:
external_inputs.add(input)
buffer1 = buffer1 + input + ', '
buffer1 = buffer1 + 'None, ' if len(v.input) == 0 else buffer1
buffer1 = buffer1[0:-2] + ') -> ' + v.type + ' -> ('
for output in v.output:
outputs.add(output)
buffer1 = buffer1 + output + ', '
buffer1 = buffer1[0:-2] + ') \n'
buffer1 = buffer1 + 'Target: ' + self._name + '\n'
for ex_input in external_inputs:
buffer2 = buffer2 + ex_input + ', '
buffer2 = buffer2 + ']\n'
return buffer0 + buffer2 + buffer1 + buffer0
\ No newline at end of file
...@@ -16,10 +16,10 @@ from __future__ import division ...@@ -16,10 +16,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy import numpy
import dragon
from dragon.core.tensor import Tensor from dragon.core import workspace as _workspace
from dragon.core.proto_utils import GetDeviceOption from dragon.core import proto_utils as _proto_utils
from dragon.core.tensor import Tensor as _Tensor
def FromShape(shape, dtype='float32', name=None): def FromShape(shape, dtype='float32', name=None):
...@@ -47,9 +47,8 @@ def FromShape(shape, dtype='float32', name=None): ...@@ -47,9 +47,8 @@ def FromShape(shape, dtype='float32', name=None):
tensor.shape = list(shape) tensor.shape = list(shape)
if not isinstance(shape, (tuple, list)): if not isinstance(shape, (tuple, list)):
raise TypeError('The shape should be a tuple or list.') raise TypeError('The shape should be a tuple or list.')
dragon.C.TensorFromShape( _get_workspace().TensorFromShape(
_stringify_tensor(tensor), _stringify_tensor(tensor), list(shape), dtype)
list(shape), dtype)
return tensor return tensor
...@@ -70,7 +69,8 @@ def SetShape(tensor, shape, dtype='float32'): ...@@ -70,7 +69,8 @@ def SetShape(tensor, shape, dtype='float32'):
None None
""" """
dragon.C.TensorFromShape(_stringify_tensor(tensor), shape, dtype) _get_workspace().TensorFromShape(
_stringify_tensor(tensor), shape, dtype)
def FromTensor(src, src_ctx=None, name=None, ctx=None): def FromTensor(src, src_ctx=None, name=None, ctx=None):
...@@ -97,15 +97,17 @@ def FromTensor(src, src_ctx=None, name=None, ctx=None): ...@@ -97,15 +97,17 @@ def FromTensor(src, src_ctx=None, name=None, ctx=None):
""" """
tensor = _try_get_tensor(name) tensor = _try_get_tensor(name)
if src_ctx is None: src_ctx = GetDeviceOption('cpu') if src_ctx is None: src_ctx = _proto_utils.GetDeviceOption('cpu')
if ctx is None: ctx = GetDeviceOption('cpu') if ctx is None: ctx = _proto_utils.GetDeviceOption('cpu')
dragon.C.TensorFromTensor( _get_workspace().TensorFromTensor(
_stringify_tensor(tensor), _stringify_tensor(src), _stringify_tensor(tensor),
_stringify_proto(ctx), _stringify_proto(src_ctx)) _stringify_tensor(src),
_stringify_proto(ctx),
_stringify_proto(src_ctx))
return tensor return tensor
def FromPyArray(array, name=None): def FromArray(array, name=None):
"""Create a Tensor from a existing Array. """Create a Tensor from a existing Array.
Note that memory of Tensor are ``zero-copied``. Note that memory of Tensor are ``zero-copied``.
...@@ -128,12 +130,13 @@ def FromPyArray(array, name=None): ...@@ -128,12 +130,13 @@ def FromPyArray(array, name=None):
""" """
tensor = _try_get_tensor(name) tensor = _try_get_tensor(name)
if not isinstance(array, numpy.ndarray): if not isinstance(array, numpy.ndarray):
raise TypeError('The given nd-array should be numpy.ndarray.') raise TypeError('Excepted a numpy.ndarray.')
dragon.C.TensorFromPyArray(_stringify_tensor(tensor), array) _get_workspace().TensorFromArray(
_stringify_tensor(tensor), array)
return tensor return tensor
def SetPyArray(tensor, array): def SetArray(tensor, array):
"""Set a Tensor from a existing Array. """Set a Tensor from a existing Array.
Note that memory of Tensor are ``zero-copied``. Note that memory of Tensor are ``zero-copied``.
...@@ -149,15 +152,12 @@ def SetPyArray(tensor, array): ...@@ -149,15 +152,12 @@ def SetPyArray(tensor, array):
------- -------
None None
References
----------
The wrapper of ``TensorFromPyArrayCC``.
""" """
dragon.C.TensorFromPyArray(_stringify_tensor(tensor), array) _get_workspace().TensorFromArray(
_stringify_tensor(tensor), array)
def ToPyArray(tensor, readonly=False): def ToArray(tensor, readonly=False):
"""Create a Array from a existing Tensor. """Create a Array from a existing Tensor.
Note that memory of Array are *zero-copied*. Note that memory of Array are *zero-copied*.
...@@ -175,7 +175,8 @@ def ToPyArray(tensor, readonly=False): ...@@ -175,7 +175,8 @@ def ToPyArray(tensor, readonly=False):
The array sharing the memory with original tensor. The array sharing the memory with original tensor.
""" """
return dragon.C.TensorToPyArray(_stringify_tensor(tensor), readonly) return _get_workspace().TensorToArray(
_stringify_tensor(tensor), readonly)
def GetStorage(tensor): def GetStorage(tensor):
...@@ -193,8 +194,8 @@ def GetStorage(tensor): ...@@ -193,8 +194,8 @@ def GetStorage(tensor):
""" """
tensor = _stringify_tensor(tensor) tensor = _stringify_tensor(tensor)
if not dragon.workspace.HasTensor(tensor): return None if not _get_workspace().HasTensor(tensor): return None
return dragon.C.GetTensor(tensor) return _get_workspace().GetTensor(tensor)
def _stringify_proto(obj): def _stringify_proto(obj):
...@@ -210,5 +211,10 @@ def _stringify_tensor(obj): ...@@ -210,5 +211,10 @@ def _stringify_tensor(obj):
def _try_get_tensor(name=None): def _try_get_tensor(name=None):
"""Try to create or get a tensor""" """Try to create or get a tensor"""
if name is None or name == '': return Tensor() if name is None or name == '': return _Tensor()
else: return Tensor.Ref(name) else: return _Tensor.Ref(name)
\ No newline at end of file
def _get_workspace():
"""Get the current default workspace."""
return _workspace.get_default_workspace()
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the common thread local structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import threading
import contextlib
class Constant(threading.local):
def __init__(self, **attrs):
super(Constant, self).__init__()
self.__dict__.update(attrs)
class Stack(threading.local):
def __init__(self):
super(Stack, self).__init__()
self._enforce_nesting = True
self.stack = []
def get_default(self):
return self.stack[-1] if len(self.stack) >= 1 else None
def reset(self):
self.stack = []
def is_cleared(self):
return not self.stack
@property
def enforce_nesting(self):
return self._enforce_nesting
@enforce_nesting.setter
def enforce_nesting(self, value):
self._enforce_nesting = value
@contextlib.contextmanager
def get_controller(self, default):
"""A context manager for manipulating a default stack."""
self.stack.append(default)
try:
yield default
finally:
# stack may be empty if reset() was called
if self.stack:
if self._enforce_nesting:
if self.stack[-1] is not default:
raise AssertionError(
"Nesting violated for default stack of %s objects" %
type(default))
self.stack.pop()
else:
self.stack.remove(default)
\ No newline at end of file
...@@ -9,14 +9,10 @@ ...@@ -9,14 +9,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""A Wrapper for the C++ backend Workspace. """Wrappers for the Workspace of C++ backend.
Note that a default workspace is switched globally, Flexible API is provided to manage the global resources
so these C++ calls are safe and deterministic. between the Python threads (quite different from C++).
See the documentation to learn how to switch between workspaces:
<http://dragon.seetatech.com/api/python/contents/core/workspace.html>
""" """
...@@ -25,112 +21,219 @@ from __future__ import division ...@@ -25,112 +21,219 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import re
import numpy import numpy
import threading import contextlib
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
from collections import defaultdict, deque
import dragon.import_c_api as _C from dragon import config as _cfg
import dragon.core.logging as logging from dragon import import_c_api as _C
import dragon.proto.dragon_pb2 as pb from dragon.core import tls as _tls
from dragon.core import mpi as _mpi
from dragon.core import logging as _logging
from dragon.core import mapping as _mapping
from dragon.proto import dragon_pb2 as _proto_def
from dragon.core import proto_utils as _proto_utils
from dragon.config import GetGlobalOptions
from dragon.core import mpi, mapping, proto_utils
class TensorPool(object):
"""We apply the TensorPool to manage the reused tensors.
def CurrentWorkspace(): Tensors with the same scope in the pool will be reused by turns,
"""Return the current active workspace. which speeds up the whole system by reducing the unnecessary deconstructing.
Returns Heuristically, we have used 5 pools with different scopes:
-------
str
The workspace name.
""" * scope(Leaf): A Pool to reuse leaf tensors.
return _C.CurrentWorkspace()
* scope(NumPy): A pool to reuse leaf tensors from numpy.
def SwitchWorkspace(workspace_name, create_if_missing=True): * scope(Join): A pool to reuse RT(runtime) tensors required by forward-backward.
"""Switch to the specific workspace.
Parameters * scope(Detach): A pool to reuse RT(runtime) tensors required by forward only.
----------
workspace_name : str
The name of the specific workspace.
create_if_missing : boolean
Whether to create the specific workspace if it does not exist.
Returns * scope(Reference): A pool to reuse reshaped tensors(sharing contents).
-------
None
""" """
if workspace_name == '': def __init__(self):
raise ValueError('The workspace name should not be empty.') # deque provide much higher performance than Queue
_C.SwitchWorkspace(workspace_name, create_if_missing) self._scope2keys = defaultdict(deque)
def get(self, scope='${DETACH}'):
try:
return self._scope2keys[scope].popleft()
except:
self._scope2keys[scope].append(
GetDummyName(
'${POOL}/%s/Tensor' % scope,
domain='Tensor', zero_based=False))
return self._scope2keys[scope].popleft()
def put(self, name):
if '${POOL}' in name:
scope, _ = name[8:].split('/')
self._scope2keys[scope].append(name)
return True
else: return False
def MoveWorkspace(target_ws, source_ws):
"""Move the source workspace into the target workspace.
Parameters class OperatorPool(object):
---------- """Operators whose gradients is required will hold a resource handle,
target_ws : str which is also called ``Anchor`` in the backend.
The name of the target workspace.
source_ws : str
The name of the source workspace.
Returns We apply this pool to collect the handles according to the type of operator,
------- as the mem size of temporal resources varies greatly.
None
The resource handle will be released after the gradient flow automatically.
""" """
if target_ws == '' or source_ws == '': def __init__(self):
raise ValueError('The target or source name can not be empty.') # deque provide much higher performance than Queue
_C.MoveWorkspace(target_ws, source_ws) self._type2keys = defaultdict(deque)
def get(self, op_type):
try:
return self._type2keys[op_type].popleft()
except:
self._type2keys[op_type].append(
GetDummyName(
'${POOL}/%s' % op_type,
domain='Operator', zero_based=False))
return self._type2keys[op_type].popleft()
def ResetWorkspace(workspace_name=''): def put(self, op_name):
"""Reset the specific workspace. op_type, _ = op_name[8:].split('_')
self._type2keys[op_type].append(op_name)
Remove all resources of given workspace.
If workspace name is empty, the current workspace will be modified. class Workspace(_C.Workspace):
"""A wrapper for the C implemented workspace.
Parameters This class is a fusion of *Workspace*, *Pool* and *tf.Graph*.
----------
workspace_name : str
The name of the specific workspace.
Returns We find that they work in a similar way while named different.
-------
None
""" """
_C.ResetWorkspace(workspace_name) def __init__(self, name=''):
super(Workspace, self).__init__(name)
self._ref_objects = []
self._collections = {}
self.tensor_pool = TensorPool()
self.operator_pool = OperatorPool()
def get_collection_ref(self, name):
coll_list = self._collections.get(name, None)
if coll_list is None:
coll_list = []
self._collections[name] = coll_list
return coll_list
def get_collection(self, name, scope=None):
coll_list = self._collections.get(name, None)
if coll_list is None:
return []
if scope is None:
return list(coll_list)
else:
filter_coll_list = []
regex = re.compile(scope)
for item in coll_list:
if hasattr(item, "name") and regex.match(item.name):
filter_coll_list.append(item)
return filter_coll_list
def add_to_collection(self, name, value):
if name not in self._collections:
self._collections[name] = [value]
else:
self._collections[name].append(value)
def add_to_collections(self, names, value):
for name in names:
self.add_to_collection(name, value)
def merge_from(self, other):
"""Merge a external workspace into ``self``.
The ``other`` will not be reset until ``self`` is reset.
Carefulness should be taken to associate with the workspaces.
Parameters
----------
other : Workspace
The given external workspace.
Returns
-------
Workspace
The ``self``.
"""
self.MergeFrom(other)
self._ref_objects.append(other)
return self
def as_default(self):
"""Switch ``self`` as the default workspace.
Call this method with the *with* keyword.
Once *with* is exited, the previous default will be set.
Returns
-------
Workspace
The ``self``.
"""
return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_controller(self)
def clear(self):
"""Remove all the tensors.
Optionally call this method to clean the memories.
Returns
-------
None
"""
self.Clear()
def get_default_workspace():
"""Return the current default workspace.
Returns
-------
Workspace
The default workspace.
def ClearWorkspace(workspace_name=''): """
"""Clear the specific workspace. return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_default()
You may need to clear the workspace when sharing grads.
If workspace name is empty, the current workspace will be modified. def reset_default_workspace():
"""Reset the global default workspace.
Parameters Do not call this method to reset any instances.
----------
workspace_name : str
The name of the specific workspace.
Returns Returns
------- -------
None None
""" """
_C.ClearWorkspace(workspace_name) if not _GLOBAL_DEFAULT_WORKSPACE_STACK.is_cleared():
raise AssertionError(
"Do not use reset_default_workspace() to clear "
"nested workspaces.\nIf you need a cleared workspace, "
"exit the nesting and create a new workspace.")
_GLOBAL_DEFAULT_WORKSPACE_STACK.reset()
def CreateGraph(graph_def): def CreateGraph(graph_def):
"""Create the graph in the VM backend. """Create the graph in current workspace.
Parameters Parameters
---------- ----------
...@@ -143,17 +246,16 @@ def CreateGraph(graph_def): ...@@ -143,17 +246,16 @@ def CreateGraph(graph_def):
The graph name to run. The graph name to run.
""" """
option = GetGlobalOptions()
LogMetaGraph(graph_def) LogMetaGraph(graph_def)
ExportMetaGraph(graph_def) ExportMetaGraph(graph_def)
return _C.CreateGraph( options = _cfg.GetGlobalOptions()
return get_default_workspace().CreateGraph(
_stringify_proto(graph_def), _stringify_proto(graph_def),
option['log_optimized_graph'], options['log_optimized_graph'])
)
def RunOperator(op_def, verbose=False): def RunOperator(op_def, verbose=False):
"""Run the operator in the VM backend. """Run the operator.
Parameters Parameters
---------- ----------
...@@ -167,9 +269,9 @@ def RunOperator(op_def, verbose=False): ...@@ -167,9 +269,9 @@ def RunOperator(op_def, verbose=False):
None None
""" """
if isinstance(op_def, pb.OperatorDef): if isinstance(op_def, _proto_def.OperatorDef):
op_def = op_def.SerializeToString() op_def = op_def.SerializeToString()
_C.RunOperator(op_def, verbose) get_default_workspace().RunOperator(op_def, verbose)
def HasTensor(tensor): def HasTensor(tensor):
...@@ -186,7 +288,8 @@ def HasTensor(tensor): ...@@ -186,7 +288,8 @@ def HasTensor(tensor):
The query result. The query result.
""" """
return _C.HasTensor(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
return get_default_workspace().HasTensor(tensor)
def CreateTensor(tensor): def CreateTensor(tensor):
...@@ -202,7 +305,8 @@ def CreateTensor(tensor): ...@@ -202,7 +305,8 @@ def CreateTensor(tensor):
None None
""" """
return _C.CreateTensor(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
get_default_workspace().CreateTensor(tensor)
def CreateFiller(filler_def): def CreateFiller(filler_def):
...@@ -225,7 +329,7 @@ def CreateFiller(filler_def): ...@@ -225,7 +329,7 @@ def CreateFiller(filler_def):
""" """
filler_def = filler_def if isinstance(filler_def, str) \ filler_def = filler_def if isinstance(filler_def, str) \
else filler_def.SerializePartialToString() else filler_def.SerializePartialToString()
_C.CreateFiller(filler_def) get_default_workspace().CreateFiller(filler_def)
def GetFillerType(tensor): def GetFillerType(tensor):
...@@ -246,7 +350,8 @@ def GetFillerType(tensor): ...@@ -246,7 +350,8 @@ def GetFillerType(tensor):
The filler type. The filler type.
""" """
return _C.GetFillerType(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
return get_default_workspace().GetFillerType(tensor)
def GetTensorName(tensor): def GetTensorName(tensor):
...@@ -267,7 +372,8 @@ def GetTensorName(tensor): ...@@ -267,7 +372,8 @@ def GetTensorName(tensor):
The query result may be different from the one used in the frontend. The query result may be different from the one used in the frontend.
""" """
return _C.GetTensorName(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
return get_default_workspace().GetTensorName(tensor)
def SetTensorAlias(tensor, alias): def SetTensorAlias(tensor, alias):
...@@ -285,7 +391,8 @@ def SetTensorAlias(tensor, alias): ...@@ -285,7 +391,8 @@ def SetTensorAlias(tensor, alias):
None None
""" """
return _C.SetTensorAlias(_stringify_tensor(tensor), alias) tensor = _stringify_tensor(tensor)
get_default_workspace().SetTensorAlias(tensor, alias)
def FetchTensor(tensor): def FetchTensor(tensor):
...@@ -302,10 +409,16 @@ def FetchTensor(tensor): ...@@ -302,10 +409,16 @@ def FetchTensor(tensor):
The values copied from the backend. The values copied from the backend.
""" """
return _C.FetchTensor(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
return get_default_workspace().FetchTensor(tensor)
def FeedTensor(tensor, array, force_cpu=False, dtype=None): def FeedTensor(
tensor,
array,
force_cpu=False,
dtype=None,
):
"""Feed the values to the given tensor. """Feed the values to the given tensor.
Parameters Parameters
...@@ -314,10 +427,10 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None): ...@@ -314,10 +427,10 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
The tensor to feed. The tensor to feed.
array : number, list, tuple, or numpy.ndarray array : number, list, tuple, or numpy.ndarray
The values to feed. The values to feed.
force_cpu : boolean force_cpu : boolean, optional, default=False
Whether force to feed to cpu context. Whether force to feed to cpu context.
dtype : str dtype : str, optional
The data type. If ``None``, ``float32`` will be used instead. The optional data type.
Returns Returns
------- -------
...@@ -340,36 +453,29 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None): ...@@ -340,36 +453,29 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
""" """
name = tensor.name if hasattr(tensor, 'name') else str(tensor) name = tensor.name if hasattr(tensor, 'name') else str(tensor)
if force_cpu is True: if force_cpu is True:
dev = proto_utils.GetDeviceOption('cpu') dev = _proto_utils.GetDeviceOption('cpu')
else: else:
dev = proto_utils.GetDefaultDeviceOption() dev = _proto_utils.GetDefaultDeviceOption()
if dev is None: dev = proto_utils.GetGlobalDeviceOption() if dev is None: dev = _proto_utils.GetGlobalDeviceOption()
if not isinstance(array, numpy.ndarray): if not isinstance(array, numpy.ndarray):
auto_data_type = numpy.float32 if dtype is None else dtype dtype = 'float32' if dtype is None else dtype
else: else:
auto_data_type = array.dtype if dtype is None else dtype dtype = array.dtype if dtype is None else dtype
if hasattr(tensor, 'dtype') and tensor.dtype is not None: if hasattr(tensor, 'dtype') and tensor.dtype is not None:
if tensor.dtype not in mapping.TENSOR_TYPE_TO_NP_TYPE: if tensor.dtype not in _mapping.TENSOR_TYPE_TO_NP_TYPE:
raise TypeError('Unsupported data type: {}'.format(tensor.dtype)) raise TypeError('Unsupported data type: {}'.format(tensor.dtype))
preset_data_type = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype] dtype = _mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype]
if dtype is not None:
if dtype != preset_data_type:
raise TypeError(
'The preset data type is {}, but force to {}'.
format(preset_data_type, dtype))
auto_data_type = preset_data_type
nd_array = numpy.array(array, dtype=auto_data_type, copy=False) dev = _stringify_proto(dev)
_C.FeedTensor(name, nd_array, _stringify_proto(dev)) array = numpy.array(array, dtype=dtype, copy=False)
get_default_workspace().FeedTensor(name, array, dev)
def ResetTensor(tensor): def ResetTensor(tensor):
"""Reset the memory of given tensor. """Reset the memory of given tensor.
Note that the tensor will not be ``DELETE`` for the workspace.
Parameters Parameters
---------- ----------
tensor : Tensor or str tensor : Tensor or str
...@@ -380,12 +486,16 @@ def ResetTensor(tensor): ...@@ -380,12 +486,16 @@ def ResetTensor(tensor):
None None
""" """
return _C.ResetTensor(_stringify_tensor(tensor)) tensor = _stringify_tensor(tensor)
return get_default_workspace().ResetTensor(tensor)
def RunGraph( def RunGraph(
graph_name, inputs=(), outputs=[], graph_name,
stage=None, return_outputs=True, inputs=(),
outputs=[],
stage=None,
return_outputs=True,
): ):
"""Run the specific graph. """Run the specific graph.
...@@ -424,7 +534,8 @@ def RunGraph( ...@@ -424,7 +534,8 @@ def RunGraph(
# Run the graph according to the specified include/exclude rule # Run the graph according to the specified include/exclude rule
runtime_stage = stage if stage else 'default' runtime_stage = stage if stage else 'default'
rule = _PREDEFINED_GRAPH_RUNTIME_STAGES[runtime_stage] rule = _PREDEFINED_GRAPH_RUNTIME_STAGES[runtime_stage]
_C.RunGraph(str(graph_name), str(rule['include']), str(rule['exclude'])) get_default_workspace().RunGraph(
graph_name, rule['include'], rule['exclude'])
# Try to return the outputs # Try to return the outputs
# Force to return may lead to asserts if outputs are not computed # Force to return may lead to asserts if outputs are not computed
...@@ -434,18 +545,23 @@ def RunGraph( ...@@ -434,18 +545,23 @@ def RunGraph(
else: return [outputs[i].get_value() for i in range(len(outputs))] else: return [outputs[i].get_value() for i in range(len(outputs))]
def FlowGradients(inputs, targets, input_grads=None, ignored_grads=None): def Backward(
forward_ops,
targets,
input_grads=None,
ignored_grads=None,
):
"""Compute the gradients of given input flows. """Compute the gradients of given input flows.
Parameters Parameters
---------- ----------
input_flow : sequence of OperatorDef input_flow : sequence of OperatorDef
The referring flows to generate gradient flows. The referring ops to generate gradients.
targets : sequence or str targets : sequence or str
The solving targets, generate grads automatically. The solving targets.
input_grads : sequence of str or None input_grads : sequence of str, optional
The input grads. The external input grads.
ignored_grads : sequence of str or None ignored_grads : sequence of str, optional
The grads that are explicitly ignored. The grads that are explicitly ignored.
Returns Returns
...@@ -453,17 +569,17 @@ def FlowGradients(inputs, targets, input_grads=None, ignored_grads=None): ...@@ -453,17 +569,17 @@ def FlowGradients(inputs, targets, input_grads=None, ignored_grads=None):
None None
""" """
option = GetGlobalOptions() options = _cfg.GetGlobalOptions()
required_logging = True \ required_logging = True \
if (option['log_optimized_graph'] or if (options['log_optimized_graph'] or
option['log_meta_graph']) else False options['log_meta_graph']) else False
_C.FlowGradients( get_default_workspace().Backward(
inputs, targets, forward_ops, targets,
input_grads if input_grads else [], input_grads if input_grads else [],
ignored_grads if ignored_grads else [], ignored_grads if ignored_grads else [],
option['share_grads'], required_logging) options['share_grads'], required_logging)
def LogMetaGraph(graph_def): def LogMetaGraph(graph_def):
...@@ -479,8 +595,8 @@ def LogMetaGraph(graph_def): ...@@ -479,8 +595,8 @@ def LogMetaGraph(graph_def):
None None
""" """
option = GetGlobalOptions() options = _cfg.GetGlobalOptions()
if option['log_meta_graph']: print(graph_def) if options['log_meta_graph']: print(graph_def)
def ExportMetaGraph(graph_def): def ExportMetaGraph(graph_def):
...@@ -498,28 +614,34 @@ def ExportMetaGraph(graph_def): ...@@ -498,28 +614,34 @@ def ExportMetaGraph(graph_def):
None None
""" """
option = GetGlobalOptions() options = _cfg.GetGlobalOptions()
if option['export_meta_graph']: if options['export_meta_graph']:
if not os.path.exists(option['export_meta_graph']): if not os.path.exists(options['export_meta_graph']):
try: try:
os.makedirs(option['export_meta_graph']) os.makedirs(options['export_meta_graph'])
except Exception: except Exception:
raise ValueError('The given prefix is invalid.') raise ValueError('The given prefix is invalid.')
path = os.path.join( path = os.path.join(
option['export_meta_graph'], options['export_meta_graph'],
graph_def.name + '.metatxt') graph_def.name + '.metatxt')
with open(path, 'w') as f: f.write(str(graph_def)) with open(path, 'w') as f: f.write(str(graph_def))
logging.info('Export meta graph into: {}'.format(path)) _logging.info('Export meta graph into: {}'.format(path))
def Snapshot( def Snapshot(
tensors, filename, tensors,
prefix='', suffix='.bin', filename,
format='default', prefix='',
suffix='.bin',
format='pickle',
): ):
"""Snapshot tensors into a binary file. """Serialize tensors into a binary file.
The filename is formatted as:
``prefix`` + ``filename`` + ``suffix``
Parameters Parameters
---------- ----------
...@@ -527,11 +649,11 @@ def Snapshot( ...@@ -527,11 +649,11 @@ def Snapshot(
The tensors to be wrote. The tensors to be wrote.
filename : str filename : str
The name of this binary file. The name of this binary file.
prefix : str prefix : str, optional, default=''
The prefix of this binary file. The prefix of this binary file.
suffix : str suffix : str, optional, default='.bin'
The suffix of this binary file. The suffix of this binary file.
format : str format : {'pickle', 'caffe'}, optional
The format of this binary file. The format of this binary file.
Returns Returns
...@@ -540,72 +662,66 @@ def Snapshot( ...@@ -540,72 +662,66 @@ def Snapshot(
Notes Notes
----- -----
The full file path will be: ``prefix`` + ``filename`` + ``suffix``.
Available formats: ['default', 'caffe'].
""" """
file_path = prefix + filename + suffix file_path = prefix + filename + suffix
if mpi.Is_Init(): if _mpi.Is_Init():
if not mpi.AllowSnapshot(): return if not _mpi.AllowSnapshot(): return
file_path = file_path + '.rank.{}'.format(mpi.Rank()) file_path = file_path + '.rank.{}'.format(_mpi.Rank())
dir = os.path.split(file_path)[0] dir = os.path.split(file_path)[0]
if len(dir) > 0 and not os.path.exists(dir): os.makedirs(dir) if len(dir) > 0 and not os.path.exists(dir): os.makedirs(dir)
if format == 'default': if format == 'pickle':
state_dict = {} state_dict = {}
for tensor in tensors: for tensor in tensors:
state_dict[tensor.name] = FetchTensor(tensor) state_dict[tensor.name] = FetchTensor(tensor)
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
pickle.dump(state_dict, f, pickle.HIGHEST_PROTOCOL) pickle.dump(state_dict, f, pickle.HIGHEST_PROTOCOL)
logging.info('Snapshot Model@: ' + file_path) _logging.info('Snapshot Model@: ' + file_path)
logging.info('Model Format: Pickle') _logging.info('Model Format: Pickle')
elif format is 'caffe': elif format == 'caffe':
names = [tensor.name for tensor in tensors] names = [tensor.name for tensor in tensors]
_C.Snapshot(file_path, names, 1) get_default_workspace().Snapshot(file_path, names, 1)
else: raise TypeError('Unknown binary format: {}'.format(format)) else:
raise TypeError('Unknown binary format: ' + format)
def Restore(binary_file, format='default'): def Restore(binary_file, format='pickle'):
"""Restore tensors from a binary file. """Restore tensors from a binary file.
Parameters Parameters
---------- ----------
binary_file : str binary_file : str
The path of binary file. The path of binary file.
format : str format : {'pickle', 'caffe'}, optional
The format of this binary file. The format of this binary file.
Returns Returns
------- -------
None None
Notes
-----
Available formats: ['default', 'caffe'].
""" """
assert os.path.exists(binary_file), \ assert os.path.exists(binary_file), \
'Binary file({}) does not exist.'.format(binary_file) 'Binary file({}) does not exist.'.format(binary_file)
if format == 'default': if format == 'pickle':
try: try:
state_dict = pickle.load(open(binary_file, 'rb')) state_dict = pickle.load(open(binary_file, 'rb'))
except UnicodeDecodeError: except UnicodeDecodeError:
state_dict = pickle.load(open(binary_file, 'rb'), encoding='iso-8859-1') state_dict = pickle.load(
logging.info('Restore From Model@: ' + binary_file) open(binary_file, 'rb'), encoding='iso-8859-1')
logging.info('Model Format: Pickle') _logging.info('Restore From Model@: ' + binary_file)
_logging.info('Model Format: Pickle')
for k, v in state_dict.items(): for k, v in state_dict.items():
if HasTensor(k): if HasTensor(k):
FeedTensor(k, v) FeedTensor(k, v)
logging.info('[Info]: Tensor({}) is restored.'.format(k)) _logging.info('Tensor({}) is restored.'.format(k))
elif format == 'caffe': elif format == 'caffe':
# Caffe models can't save the tensor name get_default_workspace().Restore(binary_file, 1)
# We simply use "layer_name/param:X"
_C.Restore(binary_file, 1)
else: else:
raise TypeError('Unknown binary format: {}'.format(format)) raise TypeError('Unknown binary format: ' + format)
def GetDummyName(basename, suffix='', domain='', zero_based=True): def GetDummyName(basename, suffix='', domain='', zero_based=True):
...@@ -633,7 +749,8 @@ def GetDummyName(basename, suffix='', domain='', zero_based=True): ...@@ -633,7 +749,8 @@ def GetDummyName(basename, suffix='', domain='', zero_based=True):
The unique dummy name. The unique dummy name.
""" """
return _C.GetDummyName(basename, suffix, domain, zero_based) return get_default_workspace().GetDummyName(
basename, suffix, domain, zero_based)
def _stringify_proto(obj): def _stringify_proto(obj):
...@@ -647,8 +764,38 @@ def _stringify_tensor(obj): ...@@ -647,8 +764,38 @@ def _stringify_tensor(obj):
else: return str(obj) else: return str(obj)
# Define a global lock to lock the current workspace class _DefaultWorkspaceStack(_tls.Stack):
_GLOBAL_WORKSPACE_LOCK = threading.Lock() """A thread-local stack of objects for
providing an implicit default workspace."""
def __init__(self):
super(_DefaultWorkspaceStack, self).__init__()
self._global_default_workspace = None
def get_default(self):
"""Override that returns a global default if the stack is empty."""
ret = super(_DefaultWorkspaceStack, self).get_default()
if ret is None: ret = self._get_default_workspace()
return ret
def _get_default_workspace(self):
if self._global_default_workspace is None:
self._global_default_workspace = Workspace()
return self._global_default_workspace
def reset(self):
super(_DefaultWorkspaceStack, self).reset()
self._global_default_workspace = None
@contextlib.contextmanager
def get_controller(self, default):
with super(_DefaultWorkspaceStack, self) \
.get_controller(default) as g:
yield g
# Define a global stack to store the workspaces of current thread
_GLOBAL_DEFAULT_WORKSPACE_STACK = _DefaultWorkspaceStack()
# Define some useful runtime stages # Define some useful runtime stages
_PREDEFINED_GRAPH_RUNTIME_STAGES = { _PREDEFINED_GRAPH_RUNTIME_STAGES = {
......
...@@ -23,7 +23,6 @@ from __future__ import print_function ...@@ -23,7 +23,6 @@ from __future__ import print_function
import sys import sys
import logging as _logging import logging as _logging
import atexit
try: try:
from dragon.libdragon import * from dragon.libdragon import *
...@@ -32,9 +31,5 @@ except ImportError as e: ...@@ -32,9 +31,5 @@ except ImportError as e:
'Cannot import dragon. Error: {0}'.format(str(e))) 'Cannot import dragon. Error: {0}'.format(str(e)))
sys.exit(1) sys.exit(1)
REGISTERED_OPERATORS = set(s for s in RegisteredOperators()) REGISTERED_OPERATORS = set(s for s in RegisteredOperators())
NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperators()) NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperators())
\ No newline at end of file
atexit.register(OnModuleExit)
\ No newline at end of file
...@@ -15,6 +15,8 @@ from __future__ import absolute_import ...@@ -15,6 +15,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon import config as _cfg
def ShareGrads(enabled=True): def ShareGrads(enabled=True):
"""Enable gradients sharing globally. """Enable gradients sharing globally.
...@@ -34,8 +36,8 @@ def ShareGrads(enabled=True): ...@@ -34,8 +36,8 @@ def ShareGrads(enabled=True):
>>> opt.ShareGrads() >>> opt.ShareGrads()
""" """
from dragon.config import option options = _cfg.GetGlobalOptions()
option['share_grads'] = enabled options['share_grads'] = enabled
def IsGradsShared(): def IsGradsShared():
...@@ -47,8 +49,8 @@ def IsGradsShared(): ...@@ -47,8 +49,8 @@ def IsGradsShared():
``True`` if sharing grads else ``False``. ``True`` if sharing grads else ``False``.
""" """
from dragon.config import option options = _cfg.GetGlobalOptions()
return option['share_grads'] return options['share_grads']
def Drop(op_func, *args, **kwargs): def Drop(op_func, *args, **kwargs):
......
...@@ -13,8 +13,8 @@ from __future__ import absolute_import ...@@ -13,8 +13,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon.utils import vision as _vision
import dragon.utils.vision from dragon.core import workspace as _workspace
class MiniBatchOp(object): class MiniBatchOp(object):
...@@ -36,7 +36,7 @@ class MiniBatchOp(object): ...@@ -36,7 +36,7 @@ class MiniBatchOp(object):
""" """
kwargs = eval(self.param_str) kwargs = eval(self.param_str)
self._data_batch = dragon.utils.vision.DataBatch(**kwargs) self._data_batch = _vision.DataBatch(**kwargs)
def run(self, inputs, outputs): def run(self, inputs, outputs):
"""Run method, i.e., forward pass. """Run method, i.e., forward pass.
...@@ -55,4 +55,4 @@ class MiniBatchOp(object): ...@@ -55,4 +55,4 @@ class MiniBatchOp(object):
""" """
blobs = self._data_batch.get() blobs = self._data_batch.get()
for idx, blob in enumerate(blobs): for idx, blob in enumerate(blobs):
dragon.workspace.FeedTensor(outputs[idx], blob) _workspace.FeedTensor(outputs[idx], blob)
\ No newline at end of file \ No newline at end of file
...@@ -15,149 +15,149 @@ from __future__ import absolute_import ...@@ -15,149 +15,149 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from .operators import initializer as init_ops from .operators import initializer as _init_ops
from .operators import vision as vision_ops from .operators import vision as _vision_ops
from .operators import loss as loss_ops from .operators import loss as _loss_ops
from .operators import data as data_ops from .operators import data as _data_ops
from .operators import activation as active_ops from .operators import activation as _active_ops
from .operators import arithmetic as math_ops from .operators import arithmetic as _math_ops
from .operators import control_flow as control_flow_ops from .operators import control_flow as _control_flow_ops
from .operators import misc as misc_ops from .operators import misc as _misc_ops
from .operators import mpi as mpi_ops from .operators import mpi as _mpi_ops
from .operators import array as array_ops from .operators import array as _array_ops
from .operators import norm as norm_ops from .operators import norm as _norm_ops
from .operators import recurrent as recurrent_ops from .operators import recurrent as _recurrent_ops
from .operators import contrib as contrib_ops from .operators import contrib as _contrib_ops
# Data # Data
LMDBData = data_ops.LMDBData LMDBData = _data_ops.LMDBData
ImageData = data_ops.ImageData ImageData = _data_ops.ImageData
# Initializer # Initializer
Fill = init_ops.Fill Fill = _init_ops.Fill
RandomUniform = init_ops.RandomUniform RandomUniform = _init_ops.RandomUniform
RandomNormal = init_ops.RandomNormal RandomNormal = _init_ops.RandomNormal
TruncatedNormal = init_ops.TruncatedNormal TruncatedNormal = _init_ops.TruncatedNormal
GlorotUniform = init_ops.GlorotUniform GlorotUniform = _init_ops.GlorotUniform
GlorotNormal = init_ops.GlorotNormal GlorotNormal = _init_ops.GlorotNormal
# Vision # Vision
Conv2d = vision_ops.Conv2d Conv2d = _vision_ops.Conv2d
DepthwiseConv2d = vision_ops.DepthwiseConv2d DepthwiseConv2d = _vision_ops.DepthwiseConv2d
ConvTranspose2d = DeConv2d = Conv2dTranspose = vision_ops.ConvTranspose2d ConvTranspose2d = DeConv2d = Conv2dTranspose = _vision_ops.ConvTranspose2d
Pool2d = vision_ops.Pool2d Pool2d = _vision_ops.Pool2d
ROIPool = vision_ops.ROIPool ROIPool = _vision_ops.ROIPool
ROIAlign = vision_ops.ROIAlign ROIAlign = _vision_ops.ROIAlign
LRN = vision_ops.LRN LRN = _vision_ops.LRN
NNResize = vision_ops.NNResize NNResize = _vision_ops.NNResize
BilinearResize = vision_ops.BilinearResize BilinearResize = _vision_ops.BilinearResize
BiasAdd = vision_ops.BiasAdd BiasAdd = _vision_ops.BiasAdd
DropBlock2d = vision_ops.DropBlock2d DropBlock2d = _vision_ops.DropBlock2d
# Recurrent # Recurrent
LSTMCell = recurrent_ops.LSTMCell LSTMCell = _recurrent_ops.LSTMCell
RNN = recurrent_ops.RNN RNN = _recurrent_ops.RNN
LSTM = recurrent_ops.LSTM LSTM = _recurrent_ops.LSTM
GRU = recurrent_ops.GRU GRU = _recurrent_ops.GRU
# Activation # Activation
Sigmoid = active_ops.Sigmoid Sigmoid = _active_ops.Sigmoid
Tanh = active_ops.Tanh Tanh = _active_ops.Tanh
Relu = active_ops.Relu Relu = _active_ops.Relu
LRelu = active_ops.LRelu LRelu = _active_ops.LRelu
PRelu = active_ops.PRelu PRelu = _active_ops.PRelu
Elu = active_ops.Elu Elu = _active_ops.Elu
SElu = active_ops.SElu SElu = _active_ops.SElu
Softmax = active_ops.Softmax Softmax = _active_ops.Softmax
Dropout = active_ops.Dropout Dropout = _active_ops.Dropout
# Loss # Loss
NLLLoss = loss_ops.NLLLoss NLLLoss = _loss_ops.NLLLoss
SparseSoftmaxCrossEntropy = loss_ops.SparseSoftmaxCrossEntropy SparseSoftmaxCrossEntropy = _loss_ops.SparseSoftmaxCrossEntropy
SigmoidCrossEntropy = loss_ops.SigmoidCrossEntropy SigmoidCrossEntropy = _loss_ops.SigmoidCrossEntropy
SoftmaxCrossEntropy = loss_ops.SoftmaxCrossEntropy SoftmaxCrossEntropy = _loss_ops.SoftmaxCrossEntropy
SmoothL1Loss = loss_ops.SmoothL1Loss SmoothL1Loss = _loss_ops.SmoothL1Loss
L1Loss = loss_ops.L1Loss L1Loss = _loss_ops.L1Loss
L2Loss = loss_ops.L2Loss L2Loss = _loss_ops.L2Loss
SigmoidFocalLoss = loss_ops.SigmoidFocalLoss SigmoidFocalLoss = _loss_ops.SigmoidFocalLoss
SoftmaxFocalLoss = loss_ops.SoftmaxFocalLoss SoftmaxFocalLoss = _loss_ops.SoftmaxFocalLoss
CTCLoss = loss_ops.CTCLoss CTCLoss = _loss_ops.CTCLoss
# Arithmetic # Arithmetic
Add = math_ops.Add Add = _math_ops.Add
Sub = math_ops.Sub Sub = _math_ops.Sub
Mul = math_ops.Mul Mul = _math_ops.Mul
Div = math_ops.Div Div = _math_ops.Div
Maximum = math_ops.Maximum Maximum = _math_ops.Maximum
Minimum = math_ops.Minimum Minimum = _math_ops.Minimum
Moments = math_ops.Moments Moments = _math_ops.Moments
Clip = math_ops.Clip Clip = _math_ops.Clip
Matmul = math_ops.Matmul Matmul = _math_ops.Matmul
Pow = math_ops.Pow Pow = _math_ops.Pow
Dot = math_ops.Dot Dot = _math_ops.Dot
Log = math_ops.Log Log = _math_ops.Log
Exp = math_ops.Exp Exp = _math_ops.Exp
Square = math_ops.Square Square = _math_ops.Square
Sqrt = math_ops.Sqrt Sqrt = _math_ops.Sqrt
FullyConnected = math_ops.FullyConnected FullyConnected = _math_ops.FullyConnected
Eltwise = math_ops.Eltwise Eltwise = _math_ops.Eltwise
Affine = math_ops.Affine Affine = _math_ops.Affine
GramMatrix = math_ops.GramMatrix GramMatrix = _math_ops.GramMatrix
Accumulate = math_ops.Accumulate Accumulate = _math_ops.Accumulate
MovingAverage = math_ops.MovingAverage MovingAverage = _math_ops.MovingAverage
# Normalization # Normalization
BatchNorm = norm_ops.BatchNorm BatchNorm = _norm_ops.BatchNorm
GroupNorm = norm_ops.GroupNorm GroupNorm = _norm_ops.GroupNorm
LayerNorm = norm_ops.LayerNorm LayerNorm = _norm_ops.LayerNorm
InstanceNorm = norm_ops.InstanceNorm InstanceNorm = _norm_ops.InstanceNorm
L2Norm = norm_ops.L2Norm L2Norm = _norm_ops.L2Norm
# NDArray # NDArray
Gather = array_ops.Gather Gather = _array_ops.Gather
Crop = array_ops.Crop Crop = _array_ops.Crop
Reduce = array_ops.Reduce Reduce = _array_ops.Reduce
Sum = array_ops.Sum Sum = _array_ops.Sum
Mean = array_ops.Mean Mean = _array_ops.Mean
Max = array_ops.Max Max = _array_ops.Max
ArgMax = array_ops.ArgMax ArgMax = _array_ops.ArgMax
Min = array_ops.Min Min = _array_ops.Min
ArgMin = array_ops.ArgMin ArgMin = _array_ops.ArgMin
Slice = array_ops.Slice Slice = _array_ops.Slice
Stack = array_ops.Stack Stack = _array_ops.Stack
Concat = array_ops.Concat Concat = _array_ops.Concat
Transpose = array_ops.Transpose Transpose = _array_ops.Transpose
Repeat = array_ops.Repeat Repeat = _array_ops.Repeat
Tile = array_ops.Tile Tile = _array_ops.Tile
Pad = array_ops.Pad Pad = _array_ops.Pad
OneHot = array_ops.OneHot OneHot = _array_ops.OneHot
Flatten = array_ops.Flatten Flatten = _array_ops.Flatten
Reshape = array_ops.Reshape Reshape = _array_ops.Reshape
ExpandDims = array_ops.ExpandDims ExpandDims = _array_ops.ExpandDims
Squeeze = array_ops.Squeeze Squeeze = _array_ops.Squeeze
Shape = array_ops.Shape Shape = _array_ops.Shape
Arange = array_ops.Arange Arange = _array_ops.Arange
Multinomial = array_ops.Multinomial Multinomial = _array_ops.Multinomial
# Control Flow # Control Flow
Copy = control_flow_ops.Copy Copy = _control_flow_ops.Copy
Assign = control_flow_ops.Assign Assign = _control_flow_ops.Assign
Equal = control_flow_ops.Equal Equal = _control_flow_ops.Equal
Less = control_flow_ops.Less Less = _control_flow_ops.Less
LessEqual = control_flow_ops.LessEqual LessEqual = _control_flow_ops.LessEqual
Greater = control_flow_ops.Greater Greater = _control_flow_ops.Greater
GreaterEqual = control_flow_ops.GreaterEqual GreaterEqual = _control_flow_ops.GreaterEqual
# Misc # Misc
Cast = AsType = misc_ops.Cast Cast = AsType = _misc_ops.Cast
Run = misc_ops.Run Run = _misc_ops.Run
Template = misc_ops.Template Template = _misc_ops.Template
Accuracy = misc_ops.Accuracy Accuracy = _misc_ops.Accuracy
StopGradient = misc_ops.StopGradient StopGradient = _misc_ops.StopGradient
# MPI # MPI
MPIBroadcast = mpi_ops.MPIBroadcast MPIBroadcast = _mpi_ops.MPIBroadcast
MPIGather = mpi_ops.MPIGather MPIGather = _mpi_ops.MPIGather
# Contrib # Contrib
Proposal = contrib_ops.Proposal # R-CNN Proposal = _contrib_ops.Proposal # R-CNN
\ No newline at end of file \ No newline at end of file
...@@ -145,18 +145,6 @@ message GradientProto { ...@@ -145,18 +145,6 @@ message GradientProto {
optional string external = 3; optional string external = 3;
} }
// Record the updater information
message UpdaterProto {
// The operator name to use.
optional string name = 1;
// The operator type.
optional string type = 2;
// The tensor to update.
repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4;
}
// Graph Definition // Graph Definition
message GraphDef { message GraphDef {
// The graph name. // The graph name.
...@@ -181,6 +169,4 @@ message GraphDef { ...@@ -181,6 +169,4 @@ message GraphDef {
// The gradients information. // The gradients information.
repeated GradientProto gradient = 9; repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10;
} }
\ No newline at end of file
...@@ -22,8 +22,8 @@ from __future__ import print_function ...@@ -22,8 +22,8 @@ from __future__ import print_function
import pprint import pprint
from dragon.core import workspace from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor as _Tensor
class BaseUpdater(object): class BaseUpdater(object):
...@@ -32,12 +32,14 @@ class BaseUpdater(object): ...@@ -32,12 +32,14 @@ class BaseUpdater(object):
# Store the global unique slot index # Store the global unique slot index
_DEFAULT_UNIQUE_SLOT_ID = 0 _DEFAULT_UNIQUE_SLOT_ID = 0
def __init__(self, def __init__(
scale_gradient=1.0, self,
clip_gradient=-1.0, scale_gradient=1.0,
l2_decay=-1.0, clip_gradient=-1.0,
slot=None, l2_decay=-1.0,
verbose=True): slot=None,
verbose=True,
):
"""Construct a Updater to optimize the objectives. """Construct a Updater to optimize the objectives.
Parameters Parameters
...@@ -84,7 +86,7 @@ class BaseUpdater(object): ...@@ -84,7 +86,7 @@ class BaseUpdater(object):
None None
""" """
pair = (tensor.name if isinstance(tensor, Tensor) \ pair = (tensor.name if isinstance(tensor, _Tensor) \
else tensor for tensor in pair) else tensor for tensor in pair)
self._param_group.append((pair, self._param_group.append((pair,
{'lr_mult': lr_mult, 'decay_mult': decay_mult})) {'lr_mult': lr_mult, 'decay_mult': decay_mult}))
...@@ -93,7 +95,8 @@ class BaseUpdater(object): ...@@ -93,7 +95,8 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if item in defaults: if item in defaults:
if self._registered: if self._registered:
return workspace.FetchTensor(self._slot + '/' + item) return _workspace.FetchTensor(
self._slot + '/' + item)
else: return defaults[item] else: return defaults[item]
return self.__dict__[item] return self.__dict__[item]
...@@ -101,8 +104,9 @@ class BaseUpdater(object): ...@@ -101,8 +104,9 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults: if defaults is not None and key in defaults:
if self._registered: if self._registered:
workspace.FeedTensor(self._slot + '/' + key, value, _workspace.FeedTensor(
dtype='float32', force_cpu=True) self._slot + '/' + key, value,
dtype='float32', force_cpu=True)
else: else:
self._defaults[key] = value self._defaults[key] = value
else: else:
...@@ -111,8 +115,9 @@ class BaseUpdater(object): ...@@ -111,8 +115,9 @@ class BaseUpdater(object):
def register_in_workspace(self): def register_in_workspace(self):
if not self._registered: if not self._registered:
for k, v in self._defaults.items(): for k, v in self._defaults.items():
workspace.FeedTensor(self._slot + "/" + k, v, _workspace.FeedTensor(
dtype='float32', force_cpu=True) self._slot + "/" + k, v,
dtype='float32', force_cpu=True)
self._registered = True self._registered = True
if self._verbose: if self._verbose:
print('---------------------------------------------------------') print('---------------------------------------------------------')
...@@ -206,8 +211,14 @@ class AdamUpdater(BaseUpdater): ...@@ -206,8 +211,14 @@ class AdamUpdater(BaseUpdater):
Introduced by `[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_. Introduced by `[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_.
""" """
def __init__(self, base_lr=0.01, beta1=0.9, def __init__(
beta2=0.999, eps=1e-8, **kwargs): self,
base_lr=0.01,
beta1=0.9,
beta2=0.999,
eps=1e-8,
**kwargs
):
"""Construct a Adam Updater to optimize the objectives. """Construct a Adam Updater to optimize the objectives.
Parameters Parameters
...@@ -222,7 +233,7 @@ class AdamUpdater(BaseUpdater): ...@@ -222,7 +233,7 @@ class AdamUpdater(BaseUpdater):
The eps. The eps.
""" """
super(AdamUpdater, self).__init__(**kwargs ) super(AdamUpdater, self).__init__(**kwargs)
self._defaults = dict({ self._defaults = dict({
'base_lr': base_lr, 'base_lr': base_lr,
'beta1': beta1, 'beta1': beta1,
......
...@@ -13,11 +13,11 @@ from __future__ import absolute_import ...@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy
from multiprocessing import Process import multiprocessing
class BlobFetcher(Process): class BlobFetcher(multiprocessing.Process):
"""BlobFetcher is deployed to queue blobs from `DataTransformer`_. """BlobFetcher is deployed to queue blobs from `DataTransformer`_.
It is supported to form *NHWC* image blobs and *1d* label blobs. It is supported to form *NHWC* image blobs and *1d* label blobs.
...@@ -37,10 +37,9 @@ class BlobFetcher(Process): ...@@ -37,10 +37,9 @@ class BlobFetcher(Process):
""" """
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
self._batch_size = kwargs.get('batch_size', 100) self._batch_size = kwargs.get('batch_size', 128)
self._partition = kwargs.get('partition', False) self._partition = kwargs.get('partition', False)
if self._partition: if self._partition: self._batch_size /= kwargs['group_size']
self._batch_size = self._batch_size // kwargs['group_size']
self.Q_in = self.Q_out = None self.Q_in = self.Q_out = None
self.daemon = True self.daemon = True
...@@ -54,9 +53,9 @@ class BlobFetcher(Process): ...@@ -54,9 +53,9 @@ class BlobFetcher(Process):
""" """
im, labels = self.Q_in.get() im, labels = self.Q_in.get()
im_blob = np.zeros(shape=([self._batch_size] + list(im.shape)), dtype=np.uint8) im_blob = numpy.zeros(shape=([self._batch_size] + list(im.shape)), dtype='uint8')
label_blob = np.zeros((self._batch_size, len(labels)), dtype=np.int64) label_blob = numpy.zeros((self._batch_size, len(labels)), dtype='int64')
for ix in range(0, self._batch_size): for ix in range(self._batch_size):
im_blob[ix, :, :, :], label_blob[ix, :] = im, labels im_blob[ix, :, :, :], label_blob[ix, :] = im, labels
if ix != self._batch_size - 1: im, labels = self.Q_in.get() if ix != self._batch_size - 1: im, labels = self.Q_in.get()
return im_blob, label_blob return im_blob, label_blob
......
...@@ -14,11 +14,10 @@ from __future__ import division ...@@ -14,11 +14,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import time import time
import pprint import multiprocessing
from multiprocessing import Queue
import dragon.core.mpi as mpi from dragon.core import mpi as _mpi
import dragon.core.logging as logging from dragon.core import logging as _logging
from .data_reader import DataReader from .data_reader import DataReader
from .data_transformer import DataTransformer from .data_transformer import DataTransformer
...@@ -77,10 +76,11 @@ class DataBatch(object): ...@@ -77,10 +76,11 @@ class DataBatch(object):
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Init mpi
global_rank = 0; local_rank = 0; group_size = 1 global_rank = 0; local_rank = 0; group_size = 1
if mpi.Is_Init(): if _mpi.Is_Init() and kwargs.get(
idx, group = mpi.AllowParallel() 'phase', 'TRAIN') == 'TRAIN':
if idx != -1: # DataParallel rank, group = _mpi.AllowParallel()
global_rank = mpi.Rank() if rank != -1: # DataParallel
global_rank = _mpi.Rank()
group_size = len(group) group_size = len(group)
for i, node in enumerate(group): for i, node in enumerate(group):
if global_rank == node: local_rank = i if global_rank == node: local_rank = i
...@@ -105,7 +105,7 @@ class DataBatch(object): ...@@ -105,7 +105,7 @@ class DataBatch(object):
self._num_transformers += 1 self._num_transformers += 1
# Add 1 transformer for random crop # Add 1 transformer for random crop
if kwargs.get('crop_size', 0) > 0 and \ if kwargs.get('crop_size', 0) > 0 and \
kwargs.get('phase', 'TEST') == 'TRAIN': kwargs.get('phase', 'TRAIN') == 'TRAIN':
self._num_transformers += 1 self._num_transformers += 1
self._num_transformers = min(self._num_transformers, self._max_transformers) self._num_transformers = min(self._num_transformers, self._max_transformers)
...@@ -115,9 +115,12 @@ class DataBatch(object): ...@@ -115,9 +115,12 @@ class DataBatch(object):
self._batch_size = int(self._batch_size / kwargs['group_size']) self._batch_size = int(self._batch_size / kwargs['group_size'])
# Init queues # Init queues
self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q_level_1 = multiprocessing.Queue(
self.Q_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size) self._prefetch * self._num_readers * self._batch_size)
self.Q_level_3 = Queue(self._prefetch * self._num_readers) self.Q_level_2 = multiprocessing.Queue(
self._prefetch * self._num_readers * self._batch_size)
self.Q_level_3 = multiprocessing.Queue(
self._prefetch * self._num_readers)
# Init readers # Init readers
self._readers = [] self._readers = []
...@@ -167,11 +170,11 @@ class DataBatch(object): ...@@ -167,11 +170,11 @@ class DataBatch(object):
process.terminate() process.terminate()
process.join() process.join()
terminate(self._fetchers) terminate(self._fetchers)
if local_rank == 0: logging.info('Terminating BlobFetcher ......') if local_rank == 0: _logging.info('Terminate BlobFetcher.')
terminate(self._transformers) terminate(self._transformers)
if local_rank == 0: logging.info('Terminating DataTransformer ......') if local_rank == 0: _logging.info('Terminate DataTransformer.')
terminate(self._readers) terminate(self._readers)
if local_rank == 0: logging.info('Terminating DataReader......') if local_rank == 0: _logging.info('Terminate DataReader.')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
......
...@@ -14,15 +14,14 @@ from __future__ import division ...@@ -14,15 +14,14 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import numpy as np import numpy
import numpy.random as npr import multiprocessing
from multiprocessing import Process
import dragon.config as config from dragon import config as _cfg
from dragon.tools.db import LMDB from dragon.tools import db as _db
class DataReader(Process): class DataReader(multiprocessing.Process):
"""DataReader is deployed to queue encoded str from `LMDB`_. """DataReader is deployed to queue encoded str from `LMDB`_.
It is supported to adaptively partition and shuffle records over all distributed nodes. It is supported to adaptively partition and shuffle records over all distributed nodes.
...@@ -55,7 +54,7 @@ class DataReader(Process): ...@@ -55,7 +54,7 @@ class DataReader(Process):
self._part_idx, self._num_parts = 0, 1 self._part_idx, self._num_parts = 0, 1
self._cur_idx, self._cur_chunk_idx = 0, 0 self._cur_idx, self._cur_chunk_idx = 0, 0
self._random_seed = config.GetRandomSeed() self._random_seed = _cfg.GetRandomSeed()
self.Q_out = None self.Q_out = None
self.daemon = True self.daemon = True
...@@ -106,7 +105,9 @@ class DataReader(Process): ...@@ -106,7 +105,9 @@ class DataReader(Process):
""" """
if self._multiple_nodes or self._use_shuffle: if self._multiple_nodes or self._use_shuffle:
if self._use_shuffle: self._perm = npr.permutation(self._num_shuffle_parts) if self._use_shuffle:
self._perm = numpy.random.permutation(
self._num_shuffle_parts)
self._cur_chunk_idx = 0 self._cur_chunk_idx = 0
self._start_idx = int(self._part_idx * self._num_shuffle_parts + self._perm[self._cur_chunk_idx]) self._start_idx = int(self._part_idx * self._num_shuffle_parts + self._perm[self._cur_chunk_idx])
self._start_idx = int(self._start_idx * self._chunk_size) self._start_idx = int(self._start_idx * self._chunk_size)
...@@ -158,23 +159,23 @@ class DataReader(Process): ...@@ -158,23 +159,23 @@ class DataReader(Process):
""" """
# fix seed # fix seed
npr.seed(self._random_seed) numpy.random.seed(self._random_seed)
# init db # init db
self._db = LMDB() self._db = _db.LMDB()
self._db.open(self._source) self._db.open(self._source)
self._zfill = self._db.zfill() self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries() self._num_entries = self._db.num_entries()
self._epoch_size = int(self._num_entries/ self._num_parts + 1) self._epoch_size = int(self._num_entries / self._num_parts + 1)
if self._use_shuffle: if self._use_shuffle:
if self._chunk_size == 1: if self._chunk_size == 1:
# Each chunk has at most 1 record [For Fully Shuffle] # Each chunk has at most 1 record (Naive Shuffle)
self._chunk_size, self._num_shuffle_parts = \ self._chunk_size, self._num_shuffle_parts = \
1, int(self._num_entries / self._num_parts) + 1 1, int(self._num_entries / self._num_parts) + 1
else: else:
if self._use_shuffle and self._chunk_size == -1: if self._use_shuffle and self._chunk_size == -1:
# Search a optimal chunk size by chunks [For Chunk Shuffle] # Search a optimal chunk size by chunks (Chunk Shuffle)
max_chunk_size = self._db._total_size / ((self._num_chunks * (1 << 20))) max_chunk_size = self._db._total_size / ((self._num_chunks * (1 << 20)))
min_chunk_size = 1 min_chunk_size = 1
while min_chunk_size * 2 < max_chunk_size: min_chunk_size *= 2 while min_chunk_size * 2 < max_chunk_size: min_chunk_size *= 2
...@@ -184,17 +185,17 @@ class DataReader(Process): ...@@ -184,17 +185,17 @@ class DataReader(Process):
self._chunk_size = int(self._num_entries / self._num_shuffle_parts / self._num_parts + 1) self._chunk_size = int(self._num_entries / self._num_shuffle_parts / self._num_parts + 1)
limit = (self._num_parts - 0.5) * self._num_shuffle_parts * self._chunk_size limit = (self._num_parts - 0.5) * self._num_shuffle_parts * self._chunk_size
if self._num_entries <= limit: if self._num_entries <= limit:
# Roll back to fully shuffle # Roll back to naive shuffle
self._chunk_size, self._num_shuffle_parts = \ self._chunk_size, self._num_shuffle_parts = \
1, int(self._num_entries / self._num_parts) + 1 1, int(self._num_entries / self._num_parts) + 1
else: else:
# Each chunk has at most K records [For Multiple Nodes] # Each chunk has at most K records
# Note that if ``shuffle`` and ``multiple_nodes`` are all ``False``, # Note that if ``shuffle`` and ``multiple_nodes`` are all *False*,
# ``chunk_size`` and ``num_shuffle_parts`` are meaningless # ``chunk_size`` and ``num_shuffle_parts`` are meaningless
self._chunk_size = int(self._num_entries / self._num_parts) + 1 self._chunk_size = int(self._num_entries / self._num_parts) + 1
self._num_shuffle_parts = 1 self._num_shuffle_parts = 1
self._perm = np.arange(self._num_shuffle_parts) self._perm = numpy.arange(self._num_shuffle_parts)
# Init env # Init env
self.reset() self.reset()
......
...@@ -13,12 +13,11 @@ from __future__ import absolute_import ...@@ -13,12 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy
import numpy.random as npr import multiprocessing
from multiprocessing import Process
import dragon.config as config from dragon import config as _cfg
import dragon.vm.caffe.proto.caffe_pb2 as pb from dragon.vm.caffe.proto import caffe_pb2 as _proto_def
try: try:
import cv2 import cv2
...@@ -31,7 +30,7 @@ except ImportError as e: ...@@ -31,7 +30,7 @@ except ImportError as e:
print("Failed to import PIL. \nIt's OK if disabling color augmentation.".format(str(e))) print("Failed to import PIL. \nIt's OK if disabling color augmentation.".format(str(e)))
class DataTransformer(Process): class DataTransformer(multiprocessing.Process):
"""DataTransformer is deployed to queue transformed images from `DataReader`_. """DataTransformer is deployed to queue transformed images from `DataReader`_.
Nearly all common image augmentation methods are supported. Nearly all common image augmentation methods are supported.
...@@ -72,7 +71,7 @@ class DataTransformer(Process): ...@@ -72,7 +71,7 @@ class DataTransformer(Process):
self._max_random_scale = kwargs.get('max_random_scale', 1.0) self._max_random_scale = kwargs.get('max_random_scale', 1.0)
self._force_color = kwargs.get('force_color', False) self._force_color = kwargs.get('force_color', False)
self._phase = kwargs.get('phase', 'TRAIN') self._phase = kwargs.get('phase', 'TRAIN')
self._random_seed = config.GetRandomSeed() self._random_seed = _cfg.GetRandomSeed()
self.Q_in = self.Q_out = None self.Q_in = self.Q_out = None
self.daemon = True self.daemon = True
...@@ -91,16 +90,16 @@ class DataTransformer(Process): ...@@ -91,16 +90,16 @@ class DataTransformer(Process):
""" """
# decode # decode
datum = pb.Datum() datum = _proto_def.Datum()
datum.ParseFromString(serialized) datum.ParseFromString(serialized)
im = np.fromstring(datum.data, np.uint8) im = numpy.fromstring(datum.data, numpy.uint8)
if datum.encoded is True: if datum.encoded is True:
im = cv2.imdecode(im, -1) im = cv2.imdecode(im, -1)
else: else:
im = im.reshape((datum.height, datum.width, datum.channels)) im = im.reshape((datum.height, datum.width, datum.channels))
# Random scale # Random scale
random_scale = npr.uniform() * ( random_scale = numpy.random.uniform() * (
self._max_random_scale - self._min_random_scale) \ self._max_random_scale - self._min_random_scale) \
+ self._min_random_scale + self._min_random_scale
if random_scale != 1.0: if random_scale != 1.0:
...@@ -109,7 +108,7 @@ class DataTransformer(Process): ...@@ -109,7 +108,7 @@ class DataTransformer(Process):
# Padding # Padding
if self._padding > 0: if self._padding > 0:
pad_img = np.empty(( pad_img = numpy.empty((
im.shape[0] + 2 * self._padding, im.shape[0] + 2 * self._padding,
im.shape[1] + 2 * self._padding, im.shape[2]), dtype=im.dtype) im.shape[1] + 2 * self._padding, im.shape[2]), dtype=im.dtype)
pad_img.fill(self._fill_value) pad_img.fill(self._fill_value)
...@@ -120,8 +119,8 @@ class DataTransformer(Process): ...@@ -120,8 +119,8 @@ class DataTransformer(Process):
# Random crop # Random crop
if self._crop_size > 0: if self._crop_size > 0:
if self._phase == 'TRAIN': if self._phase == 'TRAIN':
h_off = npr.randint(im.shape[0] - self._crop_size + 1) h_off = numpy.random.randint(im.shape[0] - self._crop_size + 1)
w_off = npr.randint(im.shape[1] - self._crop_size + 1) w_off = numpy.random.randint(im.shape[1] - self._crop_size + 1)
else: else:
h_off = int((im.shape[0] - self._crop_size) / 2) h_off = int((im.shape[0] - self._crop_size) / 2)
w_off = int((im.shape[1] - self._crop_size) / 2) w_off = int((im.shape[1] - self._crop_size) / 2)
...@@ -130,28 +129,28 @@ class DataTransformer(Process): ...@@ -130,28 +129,28 @@ class DataTransformer(Process):
# Random mirror # Random mirror
if self._mirror: if self._mirror:
if npr.randint(0, 2) > 0: if numpy.random.randint(0, 2) > 0:
im = im[:, ::-1, :] im = im[:, ::-1, :]
# Gray Transformation # Gray Transformation
if self._force_color: if self._force_color:
if im.shape[2] == 1: if im.shape[2] == 1:
# duplicate to 3 channels # duplicate to 3 channels
im = np.concatenate([im, im, im], axis=2) im = numpy.concatenate([im, im, im], axis=2)
# Color Augmentation # Color Augmentation
if self._color_aug: if self._color_aug:
im = PIL.Image.fromarray(im) im = PIL.Image.fromarray(im)
delta_brightness = npr.uniform(-0.4, 0.4) + 1.0 delta_brightness = numpy.random.uniform(-0.4, 0.4) + 1.0
delta_contrast = npr.uniform(-0.4, 0.4) + 1.0 delta_contrast = numpy.random.uniform(-0.4, 0.4) + 1.0
delta_saturation = npr.uniform(-0.4, 0.4) + 1.0 delta_saturation = numpy.random.uniform(-0.4, 0.4) + 1.0
im = PIL.ImageEnhance.Brightness(im) im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness) im = im.enhance(delta_brightness)
im = PIL.ImageEnhance.Contrast(im) im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast) im = im.enhance(delta_contrast)
im = PIL.ImageEnhance.Color(im) im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation) im = im.enhance(delta_saturation)
im = np.array(im) im = numpy.array(im)
# Extract Labels # Extract Labels
labels = [] labels = []
...@@ -169,7 +168,7 @@ class DataTransformer(Process): ...@@ -169,7 +168,7 @@ class DataTransformer(Process):
""" """
# Fix the random seed # Fix the random seed
npr.seed(self._random_seed) numpy.random.seed(self._random_seed)
# Run! # Run!
while True: while True:
......
...@@ -16,8 +16,8 @@ import shutil ...@@ -16,8 +16,8 @@ import shutil
import argparse import argparse
import cv2 import cv2
from dragon.tools.db import LMDB from dragon.tools import db as _db
from dragon.vm.caffe.proto import caffe_pb2 from dragon.vm.caffe.proto import caffe_pb2 as _proto_def
def resize_image(im, resize): def resize_image(im, resize):
...@@ -37,11 +37,10 @@ def resize_image(im, resize): ...@@ -37,11 +37,10 @@ def resize_image(im, resize):
""" """
if im.shape[0] > im.shape[1]: if im.shape[0] > im.shape[1]:
newsize = (resize, im.shape[0] * resize / im.shape[1]) new_size = (resize, im.shape[0] * resize // im.shape[1])
else: else:
newsize = (im.shape[1] * resize / im.shape[0], resize) new_size = (im.shape[1] * resize // im.shape[0], resize)
im = cv2.resize(im, newsize) return cv2.resize(im, new_size, interpolation=cv2.INTER_LINEAR)
return im
def make_db(args): def make_db(args):
...@@ -72,7 +71,7 @@ def make_db(args): ...@@ -72,7 +71,7 @@ def make_db(args):
print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000) db = _db.LMDB(max_commit=10000)
db.open(args.database, mode='w') db.open(args.database, mode='w')
total_line = sum(1 for line in open(args.list)) total_line = sum(1 for line in open(args.list))
...@@ -106,7 +105,7 @@ def make_db(args): ...@@ -106,7 +105,7 @@ def make_db(args):
img = resize_image(img, args.resize) img = resize_image(img, args.resize)
result, imgencode = cv2.imencode('.jpg', img, encode_param) result, imgencode = cv2.imencode('.jpg', img, encode_param)
datum = caffe_pb2.Datum() datum = _proto_def.Datum()
datum.height, datum.width, datum.channels = img.shape datum.height, datum.width, datum.channels = img.shape
datum.label = int(label) datum.label = int(label)
datum.encoded = True datum.encoded = True
......
...@@ -15,7 +15,8 @@ from __future__ import absolute_import ...@@ -15,7 +15,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon.core import scope as _scope
from dragon.core.tensor import Tensor as _Tensor
class Layer(object): class Layer(object):
...@@ -74,12 +75,12 @@ class Layer(object): ...@@ -74,12 +75,12 @@ class Layer(object):
# Note that a non-empty tensor scope will make it # Note that a non-empty tensor scope will make it
# impossible to load/save caffe models. You should use # impossible to load/save caffe models. You should use
# a new workspace instead of the terrible name scope # a new workspace instead of the terrible name scope
scoped_name = dragon.get_default_name_scope() + self._name scoped_name = _scope.get_default_name_scope() + self._name
param_name = scoped_name + '/param:{}'.format(len(self._blobs)) param_name = scoped_name + '/param:{}'.format(len(self._blobs))
# Set the name explicitly # Set the name explicitly
variable = dragon.Tensor.Ref(param_name) variable = _Tensor.Ref(param_name)
variable_grad = dragon.Tensor.Ref(param_name + '_grad') variable_grad = _Tensor.Ref(param_name + '_grad')
if filler is not None: if filler is not None:
variable.Fill(**filler) variable.Fill(**filler)
......
...@@ -15,11 +15,11 @@ from __future__ import absolute_import ...@@ -15,11 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class InnerProductLayer(Layer): class InnerProductLayer(_Layer):
"""The implementation of ``InnerProductLayer``. """The implementation of ``InnerProductLayer``.
Parameters Parameters
...@@ -28,9 +28,9 @@ class InnerProductLayer(Layer): ...@@ -28,9 +28,9 @@ class InnerProductLayer(Layer):
The output dim. Refer `InnerProductParameter.num_output`_. The output dim. Refer `InnerProductParameter.num_output`_.
bias_term : boolean bias_term : boolean
Whether to use bias. Refer `InnerProductParameter.bias_term`_. Whether to use bias. Refer `InnerProductParameter.bias_term`_.
weight_filler : caffe_pb2.FillerParameter weight_filler : FillerParameter
The filler of weight. Refer `InnerProductParameter.weight_filler`_. The filler of weight. Refer `InnerProductParameter.weight_filler`_.
bias_filler : caffe_pb2.FillerParameter bias_filler : FillerParameter
The filler of bias. Refer `InnerProductParameter.bias_filler`_. The filler of bias. Refer `InnerProductParameter.bias_filler`_.
axis : int axis : int
The start axis to calculate. Refer `InnerProductParameter.axis`_. The start axis to calculate. Refer `InnerProductParameter.axis`_.
...@@ -53,10 +53,10 @@ class InnerProductLayer(Layer): ...@@ -53,10 +53,10 @@ class InnerProductLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.FullyConnected(inputs, **self.arguments) return _ops.FullyConnected(inputs, **self.arguments)
class AccuracyLayer(Layer): class AccuracyLayer(_Layer):
"""The implementation of ``AccuracyLayer``. """The implementation of ``AccuracyLayer``.
Parameters Parameters
...@@ -79,10 +79,10 @@ class AccuracyLayer(Layer): ...@@ -79,10 +79,10 @@ class AccuracyLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Accuracy(bottom, **self.arguments) return _ops.Accuracy(bottom, **self.arguments)
class PythonLayer(Layer): class PythonLayer(_Layer):
"""The implementation of ``PythonLayer``. """The implementation of ``PythonLayer``.
Parameters Parameters
...@@ -106,10 +106,10 @@ class PythonLayer(Layer): ...@@ -106,10 +106,10 @@ class PythonLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Run(bottom, **self.arguments) return _ops.Run(bottom, **self.arguments)
class EltwiseLayer(Layer): class EltwiseLayer(_Layer):
"""The implementation of ``EltwiseLayer``. """The implementation of ``EltwiseLayer``.
Parameters Parameters
...@@ -130,20 +130,20 @@ class EltwiseLayer(Layer): ...@@ -130,20 +130,20 @@ class EltwiseLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Eltwise(bottom, **self.arguments) return _ops.Eltwise(bottom, **self.arguments)
class AddLayer(Layer): class AddLayer(_Layer):
"""The extended implementation of ``EltwiseLayer``.""" """The extended implementation of ``EltwiseLayer``."""
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(AddLayer, self).__init__(LayerParameter) super(AddLayer, self).__init__(LayerParameter)
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Add(bottom, **self.arguments) return _ops.Add(bottom, **self.arguments)
class ConcatLayer(Layer): class ConcatLayer(_Layer):
"""The implementation of ``ConcatLayer``. """The implementation of ``ConcatLayer``.
Parameters Parameters
...@@ -157,10 +157,10 @@ class ConcatLayer(Layer): ...@@ -157,10 +157,10 @@ class ConcatLayer(Layer):
self.arguments = {'axis': LayerParameter.concat_param.axis} self.arguments = {'axis': LayerParameter.concat_param.axis}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Concat(bottom, **self.arguments) return _ops.Concat(bottom, **self.arguments)
class SliceLayer(Layer): class SliceLayer(_Layer):
"""The implementation of ``SliceLayer``. """The implementation of ``SliceLayer``.
Parameters Parameters
...@@ -181,17 +181,17 @@ class SliceLayer(Layer): ...@@ -181,17 +181,17 @@ class SliceLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Slice(bottom, **self.arguments) return _ops.Slice(bottom, **self.arguments)
class CropLayer(Layer): class CropLayer(_Layer):
"""The implementation of ``CropLayer``. """The implementation of ``CropLayer``.
Parameters Parameters
---------- ----------
axis : int axis : int
The start axis. Refer `CropParameter.axis`_. The start axis. Refer `CropParameter.axis`_.
offset : list of int offset : sequence of int
The offsets. Refer `CropParameter.offset`_. The offsets. Refer `CropParameter.offset`_.
""" """
...@@ -208,15 +208,15 @@ class CropLayer(Layer): ...@@ -208,15 +208,15 @@ class CropLayer(Layer):
raise ValueError('Excepted two bottom blobs.') raise ValueError('Excepted two bottom blobs.')
self.arguments['shape_like'] = bottom[1] self.arguments['shape_like'] = bottom[1]
self.arguments['starts'] = self.arguments['sizes'] = None self.arguments['starts'] = self.arguments['sizes'] = None
return dragon.ops.Crop(bottom[0], **self.arguments) return _ops.Crop(bottom[0], **self.arguments)
class ReshapeLayer(Layer): class ReshapeLayer(_Layer):
"""The implementation of ``ReshapeLayer``. """The implementation of ``ReshapeLayer``.
Parameters Parameters
---------- ----------
shape : list of int shape : sequence of int
The output shape. Refer `ReshapeParameter.shape`_. The output shape. Refer `ReshapeParameter.shape`_.
""" """
...@@ -226,15 +226,15 @@ class ReshapeLayer(Layer): ...@@ -226,15 +226,15 @@ class ReshapeLayer(Layer):
in LayerParameter.reshape_param.shape.dim]} in LayerParameter.reshape_param.shape.dim]}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Reshape(bottom, **self.arguments) return _ops.Reshape(bottom, **self.arguments)
class PermuteLayer(Layer): class PermuteLayer(_Layer):
"""The implementation of ``PermuteLayer``. """The implementation of ``PermuteLayer``.
Parameters Parameters
---------- ----------
order : list of int order : sequence of int
The permutation. Refer `PermuteParameter.order`_. The permutation. Refer `PermuteParameter.order`_.
""" """
...@@ -244,10 +244,10 @@ class PermuteLayer(Layer): ...@@ -244,10 +244,10 @@ class PermuteLayer(Layer):
in LayerParameter.permute_param.order]} in LayerParameter.permute_param.order]}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Transpose(bottom, **self.arguments) return _ops.Transpose(bottom, **self.arguments)
class FlattenLayer(Layer): class FlattenLayer(_Layer):
"""The implementation of ``FlattenLayer``. """The implementation of ``FlattenLayer``.
Parameters Parameters
...@@ -266,10 +266,10 @@ class FlattenLayer(Layer): ...@@ -266,10 +266,10 @@ class FlattenLayer(Layer):
self.arguments = {'axis': axis, 'num_axes': num_axes} self.arguments = {'axis': axis, 'num_axes': num_axes}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Flatten(bottom, **self.arguments) return _ops.Flatten(bottom, **self.arguments)
class GatherLayer(Layer): class GatherLayer(_Layer):
"""The extended implementation of ``GatherOp``. """The extended implementation of ``GatherOp``.
Parameters Parameters
...@@ -285,10 +285,10 @@ class GatherLayer(Layer): ...@@ -285,10 +285,10 @@ class GatherLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
if not isinstance(bottom, (tuple, list)) or len(bottom) != 2: if not isinstance(bottom, (tuple, list)) or len(bottom) != 2:
raise ValueError('Excepted two bottom blobs.') raise ValueError('Excepted two bottom blobs.')
return dragon.ops.Gather(bottom[0], indices=bottom[1], **self.arguments) return _ops.Gather(bottom[0], indices=bottom[1], **self.arguments)
class SoftmaxLayer(Layer): class SoftmaxLayer(_Layer):
"""The implementation of ``SoftmaxLayer``. """The implementation of ``SoftmaxLayer``.
Parameters Parameters
...@@ -302,10 +302,10 @@ class SoftmaxLayer(Layer): ...@@ -302,10 +302,10 @@ class SoftmaxLayer(Layer):
self.arguments = {'axis': LayerParameter.softmax_param.axis} self.arguments = {'axis': LayerParameter.softmax_param.axis}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Softmax(bottom, **self.arguments) return _ops.Softmax(bottom, **self.arguments)
class ArgMaxLayer(Layer): class ArgMaxLayer(_Layer):
"""The implementation of ``ArgMaxLayer``. """The implementation of ``ArgMaxLayer``.
Parameters Parameters
...@@ -326,10 +326,10 @@ class ArgMaxLayer(Layer): ...@@ -326,10 +326,10 @@ class ArgMaxLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.ArgMax(bottom, **self.arguments) return _ops.ArgMax(bottom, **self.arguments)
class BatchNormLayer(Layer): class BatchNormLayer(_Layer):
"""The implementation of ``BatchNormLayer``. """The implementation of ``BatchNormLayer``.
Parameters Parameters
...@@ -359,10 +359,10 @@ class BatchNormLayer(Layer): ...@@ -359,10 +359,10 @@ class BatchNormLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.BatchNorm(inputs, **self.arguments) return _ops.BatchNorm(inputs, **self.arguments)
class GroupNormLayer(Layer): class GroupNormLayer(_Layer):
"""The implementation of ``GroupNormLayer``. """The implementation of ``GroupNormLayer``.
Parameters Parameters
...@@ -386,10 +386,10 @@ class GroupNormLayer(Layer): ...@@ -386,10 +386,10 @@ class GroupNormLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.GroupNorm(inputs, **self.arguments) return _ops.GroupNorm(inputs, **self.arguments)
class InstanceNormLayer(Layer): class InstanceNormLayer(_Layer):
"""The implementation of ``InstanceNormLayer``. """The implementation of ``InstanceNormLayer``.
Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_ Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
...@@ -405,10 +405,10 @@ class InstanceNormLayer(Layer): ...@@ -405,10 +405,10 @@ class InstanceNormLayer(Layer):
self.arguments = {'eps': LayerParameter.instance_norm_param.eps, 'axis': 1} self.arguments = {'eps': LayerParameter.instance_norm_param.eps, 'axis': 1}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.InstanceNorm(bottom, **self.arguments) return _ops.InstanceNorm(bottom, **self.arguments)
class ScaleLayer(Layer): class ScaleLayer(_Layer):
"""The implementation of ``ScaleLayer``. """The implementation of ``ScaleLayer``.
Parameters Parameters
...@@ -439,10 +439,10 @@ class ScaleLayer(Layer): ...@@ -439,10 +439,10 @@ class ScaleLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom]+ [blob['data'] for blob in self._blobs] inputs = [bottom]+ [blob['data'] for blob in self._blobs]
return dragon.ops.Affine(inputs, **self.arguments) return _ops.Affine(inputs, **self.arguments)
class BNLayer(Layer): class BNLayer(_Layer):
"""The implementation of ``BNLayer``. """The implementation of ``BNLayer``.
Parameters Parameters
...@@ -477,10 +477,10 @@ class BNLayer(Layer): ...@@ -477,10 +477,10 @@ class BNLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.BatchNorm(inputs, **self.arguments) return _ops.BatchNorm(inputs, **self.arguments)
class GNLayer(Layer): class GNLayer(_Layer):
"""The implementation of ``GNLayer``. """The implementation of ``GNLayer``.
Parameters Parameters
...@@ -509,10 +509,10 @@ class GNLayer(Layer): ...@@ -509,10 +509,10 @@ class GNLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.GroupNorm(inputs, **self.arguments) return _ops.GroupNorm(inputs, **self.arguments)
class NormalizeLayer(Layer): class NormalizeLayer(_Layer):
"""The implementation of ``NormalizeLayer``. """The implementation of ``NormalizeLayer``.
Parameters Parameters
...@@ -542,13 +542,13 @@ class NormalizeLayer(Layer): ...@@ -542,13 +542,13 @@ class NormalizeLayer(Layer):
self.AddBlob(filler=self.GetFiller(param, 'scale_filler'), value=1) # scale self.AddBlob(filler=self.GetFiller(param, 'scale_filler'), value=1) # scale
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
norm_out = [dragon.ops.L2Norm(bottom, **self.l2norm_arguments)] norm_out = [_ops.L2Norm(bottom, **self.l2norm_arguments)]
return dragon.ops.Affine( return _ops.Affine(
norm_out + [blob['data'] for blob in self._blobs], norm_out + [blob['data'] for blob in self._blobs],
**self.affine_arguments) **self.affine_arguments)
class TileLayer(Layer): class TileLayer(_Layer):
"""The extended implementation of ``TileLayer``. """The extended implementation of ``TileLayer``.
Parameters Parameters
...@@ -565,10 +565,10 @@ class TileLayer(Layer): ...@@ -565,10 +565,10 @@ class TileLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Tile(bottom, **self.arguments) return _ops.Tile(bottom, **self.arguments)
class ReductionLayer(Layer): class ReductionLayer(_Layer):
"""The extended implementation of ``ReductionLayer``. """The extended implementation of ``ReductionLayer``.
Parameters Parameters
...@@ -591,10 +591,10 @@ class ReductionLayer(Layer): ...@@ -591,10 +591,10 @@ class ReductionLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Reduce(bottom, **self.arguments) return _ops.Reduce(bottom, **self.arguments)
class ExpandDimsLayer(Layer): class ExpandDimsLayer(_Layer):
"""The implementation of ``ExpandDimsLayer``. """The implementation of ``ExpandDimsLayer``.
Parameters Parameters
...@@ -608,29 +608,29 @@ class ExpandDimsLayer(Layer): ...@@ -608,29 +608,29 @@ class ExpandDimsLayer(Layer):
self.arguments = {'axis': LayerParameter.expand_dims_param.axis} self.arguments = {'axis': LayerParameter.expand_dims_param.axis}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.ExpandDims(bottom, **self.arguments) return _ops.ExpandDims(bottom, **self.arguments)
class StopGradientLayer(Layer): class StopGradientLayer(_Layer):
"""The implementation of ``StopGradientLayer``.""" """The implementation of ``StopGradientLayer``."""
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(StopGradientLayer, self).__init__(LayerParameter) super(StopGradientLayer, self).__init__(LayerParameter)
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.StopGradient(bottom, **self.arguments) return _ops.StopGradient(bottom, **self.arguments)
class ProposalLayer(Layer): class ProposalLayer(_Layer):
"""The implementation of ``ProposalLayer``. """The implementation of ``ProposalLayer``.
Parameters Parameters
---------- ----------
stride : list of int stride : sequence of int
The stride of anchors. Refer ``ProposalParameter.stride``. The stride of anchors. Refer ``ProposalParameter.stride``.
scale : list of float scale : sequence of float
The scales of anchors. Refer `ProposalParameter.scale`_. The scales of anchors. Refer `ProposalParameter.scale`_.
ratio : list of float ratio : sequence of float
The ratios of anchors. Refer `ProposalParameter.ratio`_. The ratios of anchors. Refer `ProposalParameter.ratio`_.
pre_nms_top_n : int pre_nms_top_n : int
The num of anchors before nms. Refer `ProposalParameter.pre_nms_topn`_. The num of anchors before nms. Refer `ProposalParameter.pre_nms_topn`_.
...@@ -668,10 +668,10 @@ class ProposalLayer(Layer): ...@@ -668,10 +668,10 @@ class ProposalLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Proposal(bottom, **self.arguments) return _ops.Proposal(bottom, **self.arguments)
class CastLayer(Layer): class CastLayer(_Layer):
"""The implementation of ``CastLayer``. """The implementation of ``CastLayer``.
Parameters Parameters
...@@ -686,4 +686,4 @@ class CastLayer(Layer): ...@@ -686,4 +686,4 @@ class CastLayer(Layer):
self.arguments = {'dtype': param.dtype.lower()} self.arguments = {'dtype': param.dtype.lower()}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Cast(bottom, **self.arguments) return _ops.Cast(bottom, **self.arguments)
\ No newline at end of file \ No newline at end of file
...@@ -15,13 +15,12 @@ from __future__ import absolute_import ...@@ -15,13 +15,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class DataLayer(Layer): class DataLayer(_Layer):
""" """The implementation of ``DataLayer``.
The implementation of ``DataLayer``.
Different from ``Caffe``, we force to use `LMDB`_ backend. Different from ``Caffe``, we force to use `LMDB`_ backend.
...@@ -33,7 +32,7 @@ class DataLayer(Layer): ...@@ -33,7 +32,7 @@ class DataLayer(Layer):
The prefetch count. Refer `DataParameter.prefetch`_. The prefetch count. Refer `DataParameter.prefetch`_.
batch_size : int batch_size : int
The size of a mini-batch. Refer `DataParameter.batch_size`_. The size of a mini-batch. Refer `DataParameter.batch_size`_.
phase : caffe_pb2.Phase phase : Phase
The phase of layer. Refer `LayerParameter.phase`_. The phase of layer. Refer `LayerParameter.phase`_.
mirrow : boolean mirrow : boolean
Whether to randomly mirror. Refer `TransformationParameter.mirror`_. Whether to randomly mirror. Refer `TransformationParameter.mirror`_.
...@@ -49,9 +48,9 @@ class DataLayer(Layer): ...@@ -49,9 +48,9 @@ class DataLayer(Layer):
The min scale of the images. Extension of `TransformationParameter`_. The min scale of the images. Extension of `TransformationParameter`_.
max_random_scale : float max_random_scale : float
The max scale of the images. Extension of `TransformationParameter`_. The max scale of the images. Extension of `TransformationParameter`_.
dtype : caffe_pb2.MemoryDataParameter.DataType dtype : MemoryDataParameter.DataType
The output data type. ``FLOAT32`` or ``FLOAT16``. The output data type. *FLOAT32* or *FLOAT16*.
mean_value : list of float mean_value : sequence of float
The mean of each channel. Refer `TransformationParameter.mean_value`_. The mean of each channel. Refer `TransformationParameter.mean_value`_.
scale : float scale : float
The scaling factor. Refer `TransformationParameter.scale`_. The scaling factor. Refer `TransformationParameter.scale`_.
...@@ -93,20 +92,20 @@ class DataLayer(Layer): ...@@ -93,20 +92,20 @@ class DataLayer(Layer):
[1. / transform_param.scale] * 3 [1. / transform_param.scale] * 3
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
data, label = dragon.ops.LMDBData(**self.arguments) data, label = _ops.LMDBData(**self.arguments)
return dragon.ops.ImageData(data, **self.arguments), label return _ops.ImageData(data, **self.arguments), label
class MemoryDataLayer(Layer): class MemoryDataLayer(_Layer):
"""The implementation of ``MemoryDataLayer``. """The implementation of ``MemoryDataLayer``.
We extend it with ``FP16`` and ``NHWC => NCHW``. We extend it with ``FP16`` and ``NHWC => NCHW``.
Parameters Parameters
---------- ----------
dtype : caffe_pb2.MemoryDataParameter.DataType dtype : MemoryDataParameter.DataType
The output data type. ``FLOAT32`` or ``FLOAT16``. The output data type. ``FLOAT32`` or ``FLOAT16``.
mean_value : list of float mean_value : sequence of float
The mean of each channel. Refer `TransformationParameter.mean_value`_. The mean of each channel. Refer `TransformationParameter.mean_value`_.
scale : float scale : float
The scaling factor. Refer `TransformationParameter.scale`_. The scaling factor. Refer `TransformationParameter.scale`_.
...@@ -131,4 +130,4 @@ class MemoryDataLayer(Layer): ...@@ -131,4 +130,4 @@ class MemoryDataLayer(Layer):
[1. / transform_param.scale] * 3 [1. / transform_param.scale] * 3
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.ImageData(bottom, **self.arguments) return _ops.ImageData(bottom, **self.arguments)
\ No newline at end of file \ No newline at end of file
...@@ -15,11 +15,11 @@ from __future__ import absolute_import ...@@ -15,11 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class SoftmaxWithLossLayer(Layer): class SoftmaxWithLossLayer(_Layer):
"""The implementation of ``SoftmaxWithLossLayer``. """The implementation of ``SoftmaxWithLossLayer``.
Parameters Parameters
...@@ -52,12 +52,12 @@ class SoftmaxWithLossLayer(Layer): ...@@ -52,12 +52,12 @@ class SoftmaxWithLossLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.SparseSoftmaxCrossEntropy(bottom, **self.arguments) loss = _ops.SparseSoftmaxCrossEntropy(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
class SigmoidCrossEntropyLossLayer(Layer): class SigmoidCrossEntropyLossLayer(_Layer):
"""The implementation of ``SigmoidCrossEntropyLossLayer``. """The implementation of ``SigmoidCrossEntropyLossLayer``.
Parameters Parameters
...@@ -79,12 +79,12 @@ class SigmoidCrossEntropyLossLayer(Layer): ...@@ -79,12 +79,12 @@ class SigmoidCrossEntropyLossLayer(Layer):
self.arguments = {'normalization': normalization} self.arguments = {'normalization': normalization}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.SigmoidCrossEntropy(bottom, **self.arguments) loss = _ops.SigmoidCrossEntropy(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
class L2LossLayer(Layer): class L2LossLayer(_Layer):
"""The implementation of ``L2LossLayer``. """The implementation of ``L2LossLayer``.
Parameters Parameters
...@@ -106,12 +106,12 @@ class L2LossLayer(Layer): ...@@ -106,12 +106,12 @@ class L2LossLayer(Layer):
self.arguments = {'normalization': normalization} self.arguments = {'normalization': normalization}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.L2Loss(bottom, **self.arguments) loss = _ops.L2Loss(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
class SmoothL1LossLayer(Layer): class SmoothL1LossLayer(_Layer):
"""The implementation of ``SmoothL1LossLayer``. """The implementation of ``SmoothL1LossLayer``.
Parameters Parameters
...@@ -140,12 +140,12 @@ class SmoothL1LossLayer(Layer): ...@@ -140,12 +140,12 @@ class SmoothL1LossLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.SmoothL1Loss(bottom, **self.arguments) loss = _ops.SmoothL1Loss(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
class SigmoidWithFocalLossLayer(Layer): class SigmoidWithFocalLossLayer(_Layer):
"""The implementation of ``SigmoidWithFocalLossLayer``. """The implementation of ``SigmoidWithFocalLossLayer``.
Parameters Parameters
...@@ -183,12 +183,12 @@ class SigmoidWithFocalLossLayer(Layer): ...@@ -183,12 +183,12 @@ class SigmoidWithFocalLossLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.SigmoidFocalLoss(bottom, **self.arguments) loss = _ops.SigmoidFocalLoss(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
class SoftmaxWithFocalLossLayer(Layer): class SoftmaxWithFocalLossLayer(_Layer):
"""The implementation of ``SoftmaxWithFocalLossLayer``. """The implementation of ``SoftmaxWithFocalLossLayer``.
Parameters Parameters
...@@ -227,6 +227,6 @@ class SoftmaxWithFocalLossLayer(Layer): ...@@ -227,6 +227,6 @@ class SoftmaxWithFocalLossLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
loss = dragon.ops.SoftmaxFocalLoss(bottom, **self.arguments) loss = _ops.SoftmaxFocalLoss(bottom, **self.arguments)
if self._loss_weight is not None: loss *= self._loss_weight if self._loss_weight is not None: loss *= self._loss_weight
return loss return loss
\ No newline at end of file
...@@ -15,11 +15,11 @@ from __future__ import absolute_import ...@@ -15,11 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class MPIBroadcastLayer(Layer): class MPIBroadcastLayer(_Layer):
"""The implementation of ``MPIBroadcastLayer``. """The implementation of ``MPIBroadcastLayer``.
Parameters Parameters
...@@ -33,10 +33,10 @@ class MPIBroadcastLayer(Layer): ...@@ -33,10 +33,10 @@ class MPIBroadcastLayer(Layer):
self.arguments = {'root': LayerParameter.mpi_param.root} self.arguments = {'root': LayerParameter.mpi_param.root}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.MPIBroadcast(bottom, **self.arguments) return _ops.MPIBroadcast(bottom, **self.arguments)
class MPIGatherLayer(Layer): class MPIGatherLayer(_Layer):
"""The implementation of ``MPIGatherLayer``. """The implementation of ``MPIGatherLayer``.
Parameters Parameters
...@@ -53,4 +53,4 @@ class MPIGatherLayer(Layer): ...@@ -53,4 +53,4 @@ class MPIGatherLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.MPIGather(bottom, **self.arguments) return _ops.MPIGather(bottom, **self.arguments)
\ No newline at end of file \ No newline at end of file
...@@ -15,11 +15,11 @@ from __future__ import absolute_import ...@@ -15,11 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class ReLULayer(Layer): class ReLULayer(_Layer):
"""The implementation of ``ReLULayer``. """The implementation of ``ReLULayer``.
Parameters Parameters
...@@ -35,10 +35,10 @@ class ReLULayer(Layer): ...@@ -35,10 +35,10 @@ class ReLULayer(Layer):
self.arguments = {'slope': param.negative_slope} self.arguments = {'slope': param.negative_slope}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Relu(bottom, **self.arguments) return _ops.Relu(bottom, **self.arguments)
class PReLULayer(Layer): class PReLULayer(_Layer):
"""The implementation of ``PReLULayer``. """The implementation of ``PReLULayer``.
Parameters Parameters
...@@ -61,10 +61,10 @@ class PReLULayer(Layer): ...@@ -61,10 +61,10 @@ class PReLULayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.PRelu(inputs, **self.arguments) return _ops.PRelu(inputs, **self.arguments)
class ELULayer(Layer): class ELULayer(_Layer):
"""The implementation of ``ELULayer``. """The implementation of ``ELULayer``.
Parameters Parameters
...@@ -78,40 +78,40 @@ class ELULayer(Layer): ...@@ -78,40 +78,40 @@ class ELULayer(Layer):
self.arguments = {'alpha': float(LayerParameter.elu_param.alpha)} self.arguments = {'alpha': float(LayerParameter.elu_param.alpha)}
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Elu(bottom, **self.arguments) return _ops.Elu(bottom, **self.arguments)
class SELULayer(Layer): class SELULayer(_Layer):
"""The implementation of ``SELULayer``.""" """The implementation of ``SELULayer``."""
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(SELULayer, self).__init__(LayerParameter) super(SELULayer, self).__init__(LayerParameter)
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.SElu(bottom, **self.arguments) return _ops.SElu(bottom, **self.arguments)
class SigmoidLayer(Layer): class SigmoidLayer(_Layer):
"""The implementation of ``SigmoidLayer``.""" """The implementation of ``SigmoidLayer``."""
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(SigmoidLayer, self).__init__(LayerParameter) super(SigmoidLayer, self).__init__(LayerParameter)
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Sigmoid(bottom, **self.arguments) return _ops.Sigmoid(bottom, **self.arguments)
class TanHLayer(Layer): class TanHLayer(_Layer):
"""The implementation of ``TanHLayer``.""" """The implementation of ``TanHLayer``."""
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(TanHLayer, self).__init__(LayerParameter) super(TanHLayer, self).__init__(LayerParameter)
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Tanh(bottom, **self.arguments) return _ops.Tanh(bottom, **self.arguments)
class DropoutLayer(Layer): class DropoutLayer(_Layer):
"""The implementation of ``DropoutLayer``. """The implementation of ``DropoutLayer``.
Parameters Parameters
...@@ -132,10 +132,10 @@ class DropoutLayer(Layer): ...@@ -132,10 +132,10 @@ class DropoutLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Dropout(bottom, **self.arguments) return _ops.Dropout(bottom, **self.arguments)
class PowerLayer(Layer): class PowerLayer(_Layer):
"""The implementation of ``PowerLayer``. """The implementation of ``PowerLayer``.
Parameters Parameters
...@@ -158,4 +158,4 @@ class PowerLayer(Layer): ...@@ -158,4 +158,4 @@ class PowerLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Pow(bottom, **self.arguments) return _ops.Pow(bottom, **self.arguments)
\ No newline at end of file \ No newline at end of file
...@@ -15,11 +15,11 @@ from __future__ import absolute_import ...@@ -15,11 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from ..layer import Layer from ..layer import Layer as _Layer
class ConvolutionLayer(Layer): class ConvolutionLayer(_Layer):
"""The implementation of ``ConvolutionLayer``. """The implementation of ``ConvolutionLayer``.
Parameters Parameters
...@@ -28,19 +28,19 @@ class ConvolutionLayer(Layer): ...@@ -28,19 +28,19 @@ class ConvolutionLayer(Layer):
The output channels. Refer `ConvolutionParameter.num_output`_. The output channels. Refer `ConvolutionParameter.num_output`_.
bias_term : boolean bias_term : boolean
Whether to use bias. Refer `ConvolutionParameter.bias_term`_. Whether to use bias. Refer `ConvolutionParameter.bias_term`_.
pad : list of int pad : sequence of int
The zero padding size(s). Refer `ConvolutionParameter.pad`_. The zero padding size(s). Refer `ConvolutionParameter.pad`_.
kernel_size : list of int kernel_size : list of int
The kernel size(s). Refer `ConvolutionParameter.kernel_size`_. The kernel size(s). Refer `ConvolutionParameter.kernel_size`_.
stride : list of int stride : sequence of int
The stride(s). Refer `ConvolutionParameter.stride`_. The stride(s). Refer `ConvolutionParameter.stride`_.
dilation : list of int dilation : sequence of int
The dilation(s). Refer `ConvolutionParameter.dilation`_. The dilation(s). Refer `ConvolutionParameter.dilation`_.
group : int group : int
The group size. Refer `ConvolutionParameter.group`_. The group size. Refer `ConvolutionParameter.group`_.
weight_filler : FillerParameter weight_filler : FillerParameter
The filler of weights. Refer `ConvolutionParameter.weight_filler`_. The filler of weights. Refer `ConvolutionParameter.weight_filler`_.
bias_filler : FillerParameters bias_filler : FillerParameter
The filler of bias. Refer `ConvolutionParameter.bias_filler`_. The filler of bias. Refer `ConvolutionParameter.bias_filler`_.
""" """
...@@ -76,10 +76,10 @@ class ConvolutionLayer(Layer): ...@@ -76,10 +76,10 @@ class ConvolutionLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.Conv2d(inputs, **self.arguments) return _ops.Conv2d(inputs, **self.arguments)
class DepthwiseConvolutionLayer(Layer): class DepthwiseConvolutionLayer(_Layer):
"""The implementation of ``DepthwiseConvolutionLayer``. """The implementation of ``DepthwiseConvolutionLayer``.
Parameters Parameters
...@@ -88,15 +88,15 @@ class DepthwiseConvolutionLayer(Layer): ...@@ -88,15 +88,15 @@ class DepthwiseConvolutionLayer(Layer):
The output channels. Refer `ConvolutionParameter.num_output`_. The output channels. Refer `ConvolutionParameter.num_output`_.
bias_term : boolean bias_term : boolean
Whether to use bias. Refer `ConvolutionParameter.bias_term`_. Whether to use bias. Refer `ConvolutionParameter.bias_term`_.
pad : list of int pad : sequence of int
The zero padding size(s). Refer `ConvolutionParameter.pad`_. The zero padding size(s). Refer `ConvolutionParameter.pad`_.
kernel_size : list of int kernel_size : sequence of int
The kernel size(s). Refer `ConvolutionParameter.kernel_size`_. The kernel size(s). Refer `ConvolutionParameter.kernel_size`_.
stride : list of int stride : sequence of int
The stride(s). Refer `ConvolutionParameter.stride`_. The stride(s). Refer `ConvolutionParameter.stride`_.
weight_filler : FillerParameter weight_filler : FillerParameter
The filler of weights. Refer `ConvolutionParameter.weight_filler`_. The filler of weights. Refer `ConvolutionParameter.weight_filler`_.
bias_filler : FillerParameters bias_filler : FillerParameter
The filler of bias. Refer `ConvolutionParameter.bias_filler`_. The filler of bias. Refer `ConvolutionParameter.bias_filler`_.
""" """
...@@ -130,7 +130,7 @@ class DepthwiseConvolutionLayer(Layer): ...@@ -130,7 +130,7 @@ class DepthwiseConvolutionLayer(Layer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.DepthwiseConv2d(inputs, **self.arguments) return _ops.DepthwiseConv2d(inputs, **self.arguments)
class DeconvolutionLayer(ConvolutionLayer): class DeconvolutionLayer(ConvolutionLayer):
...@@ -142,19 +142,19 @@ class DeconvolutionLayer(ConvolutionLayer): ...@@ -142,19 +142,19 @@ class DeconvolutionLayer(ConvolutionLayer):
The output channels. Refer `ConvolutionParameter.num_output`_. The output channels. Refer `ConvolutionParameter.num_output`_.
bias_term : boolean bias_term : boolean
Whether to use bias. Refer `ConvolutionParameter.bias_term`_. Whether to use bias. Refer `ConvolutionParameter.bias_term`_.
pad : list of int pad : sequence of int
The zero padding size(s). Refer `ConvolutionParameter.pad`_. The zero padding size(s). Refer `ConvolutionParameter.pad`_.
kernel_size : list of int kernel_size : sequence of int
The kernel size(s). Refer `ConvolutionParameter.kernel_size`_. The kernel size(s). Refer `ConvolutionParameter.kernel_size`_.
stride : list of int stride : sequence of int
The stride(s). Refer `ConvolutionParameter.stride`_. The stride(s). Refer `ConvolutionParameter.stride`_.
dilation : list of int dilation : sequence of int
The dilation(s). Refer `ConvolutionParameter.dilation`_. The dilation(s). Refer `ConvolutionParameter.dilation`_.
group : int group : int
The group size. Refer `ConvolutionParameter.group`_. The group size. Refer `ConvolutionParameter.group`_.
weight_filler : FillerParameter weight_filler : FillerParameter
The filler of weights. Refer `ConvolutionParameter.weight_filler`_. The filler of weights. Refer `ConvolutionParameter.weight_filler`_.
bias_filler : FillerParameters bias_filler : FillerParameter
The filler of bias. Refer `ConvolutionParameter.bias_filler`_. The filler of bias. Refer `ConvolutionParameter.bias_filler`_.
""" """
...@@ -163,29 +163,29 @@ class DeconvolutionLayer(ConvolutionLayer): ...@@ -163,29 +163,29 @@ class DeconvolutionLayer(ConvolutionLayer):
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return dragon.ops.ConvTranspose2d(inputs, **self.arguments) return _ops.ConvTranspose2d(inputs, **self.arguments)
class PoolingLayer(Layer): class PoolingLayer(_Layer):
"""The implementation of ``PoolingLayer``. """The implementation of ``PoolingLayer``.
Parameters Parameters
---------- ----------
pool : PoolMethod pool : PoolMethod
The method. Refer `PoolingParameter.pool`_. The method. Refer `PoolingParameter.pool`_.
pad : list of int pad : sequence of int
The zero padding size(s). Refer `PoolingParameter.pad`_. The zero padding size(s). Refer `PoolingParameter.pad`_.
pad_h : int pad_h : int
The padding size of height. Refer `PoolingParameter.pad_h`_. The padding size of height. Refer `PoolingParameter.pad_h`_.
pad_w : int pad_w : int
The padding size of width. Refer `PoolingParameter.pad_w`_. The padding size of width. Refer `PoolingParameter.pad_w`_.
kernel_size : list of int kernel_size : sequence of int
The kernel size(s). Refer `PoolingParameter.kernel_size`_. The kernel size(s). Refer `PoolingParameter.kernel_size`_.
kernel_h : int kernel_h : int
The kernel size of height. Refer `PoolingParameter.kernel_h`_. The kernel size of height. Refer `PoolingParameter.kernel_h`_.
kernel_w : int kernel_w : int
The kernel size of width. Refer `PoolingParameter.kernel_w`_. The kernel size of width. Refer `PoolingParameter.kernel_w`_.
stride : list of int stride : sequence of int
The strides. Refer `PoolingParameter.stride`_. The strides. Refer `PoolingParameter.stride`_.
stride_h : int stride_h : int
The stride of height. Refer `PoolingParameter.stride_h`_. The stride of height. Refer `PoolingParameter.stride_h`_.
...@@ -212,10 +212,10 @@ class PoolingLayer(Layer): ...@@ -212,10 +212,10 @@ class PoolingLayer(Layer):
else: self.arguments['strides'] = [param.stride_h, param.stride_w] else: self.arguments['strides'] = [param.stride_h, param.stride_w]
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.Pool2d(bottom, **self.arguments) return _ops.Pool2d(bottom, **self.arguments)
class ROIPoolingLayer(Layer): class ROIPoolingLayer(_Layer):
"""The implementation of ``ROIPoolingLayer``. """The implementation of ``ROIPoolingLayer``.
Parameters Parameters
...@@ -238,10 +238,10 @@ class ROIPoolingLayer(Layer): ...@@ -238,10 +238,10 @@ class ROIPoolingLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.ROIPool(bottom, **self.arguments) return _ops.ROIPool(bottom, **self.arguments)
class ROIAlignLayer(Layer): class ROIAlignLayer(_Layer):
"""The implementation of ``ROIAlignLayer``. """The implementation of ``ROIAlignLayer``.
Parameters Parameters
...@@ -264,10 +264,10 @@ class ROIAlignLayer(Layer): ...@@ -264,10 +264,10 @@ class ROIAlignLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.ROIAlign(bottom, **self.arguments) return _ops.ROIAlign(bottom, **self.arguments)
class LRNLayer(Layer): class LRNLayer(_Layer):
"""The implementation of ``LRNLayer``. """The implementation of ``LRNLayer``.
Parameters Parameters
...@@ -296,15 +296,15 @@ class LRNLayer(Layer): ...@@ -296,15 +296,15 @@ class LRNLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.LRN(bottom, **self.arguments) return _ops.LRN(bottom, **self.arguments)
class NNResizeLayer(Layer): class NNResizeLayer(_Layer):
"""The implementation of ``NNResizeLayer``. """The implementation of ``NNResizeLayer``.
Parameters Parameters
---------- ----------
shape : caffe_pb2.BlobShape shape : BlobShape
The output shape. Refer `ResizeParameter.shape`_. The output shape. Refer `ResizeParameter.shape`_.
fx : float fx : float
The scale factor of height. Refer `ResizeParameter.fx`_. The scale factor of height. Refer `ResizeParameter.fx`_.
...@@ -330,15 +330,15 @@ class NNResizeLayer(Layer): ...@@ -330,15 +330,15 @@ class NNResizeLayer(Layer):
raise ValueError('The second bottom should be provided to determine the shape.') raise ValueError('The second bottom should be provided to determine the shape.')
self.arguments['shape_like'] = bottom[1] self.arguments['shape_like'] = bottom[1]
bottom = bottom[0] bottom = bottom[0]
return dragon.ops.NNResize(bottom, **self.arguments) return _ops.NNResize(bottom, **self.arguments)
class BilinearResizeLayer(Layer): class BilinearResizeLayer(_Layer):
"""The implementation of ``BilinearResizeLayer``. """The implementation of ``BilinearResizeLayer``.
Parameters Parameters
---------- ----------
shape : caffe_pb2.BlobShape shape : BlobShape
The output shape. Refer `ResizeParameter.shape`_. The output shape. Refer `ResizeParameter.shape`_.
fx : float fx : float
The scale factor of height. Refer `ResizeParameter.fx`_. The scale factor of height. Refer `ResizeParameter.fx`_.
...@@ -364,10 +364,10 @@ class BilinearResizeLayer(Layer): ...@@ -364,10 +364,10 @@ class BilinearResizeLayer(Layer):
raise ValueError('The second bottom should be provided to determine the shape.') raise ValueError('The second bottom should be provided to determine the shape.')
self.arguments['shape_like'] = bottom[1] self.arguments['shape_like'] = bottom[1]
bottom = bottom[0] bottom = bottom[0]
return dragon.ops.BilinearResize(bottom, **self.arguments) return _ops.BilinearResize(bottom, **self.arguments)
class DropBlockLayer(Layer): class DropBlockLayer(_Layer):
"""The implementation of ``DropBlock2dLayer``. """The implementation of ``DropBlock2dLayer``.
Parameters Parameters
...@@ -394,4 +394,4 @@ class DropBlockLayer(Layer): ...@@ -394,4 +394,4 @@ class DropBlockLayer(Layer):
} }
def LayerSetup(self, bottom): def LayerSetup(self, bottom):
return dragon.ops.DropBlock2d(bottom, **self.arguments) return _ops.DropBlock2d(bottom, **self.arguments)
\ No newline at end of file \ No newline at end of file
...@@ -15,10 +15,10 @@ from __future__ import absolute_import ...@@ -15,10 +15,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import config as _cfg
_GLOBAL_ROOT_CAFFE_SOLVER = True _GLOBAL_CAFFE_ROOT_SOLVER = True
def set_mode_cpu(): def set_mode_cpu():
...@@ -33,7 +33,7 @@ def set_mode_cpu(): ...@@ -33,7 +33,7 @@ def set_mode_cpu():
The implementation of `set_mode_cpu(_caffe.cpp, L51)`_. The implementation of `set_mode_cpu(_caffe.cpp, L51)`_.
""" """
dragon.config.EnableCPU() _cfg.EnableCPU()
def set_mode_gpu(): def set_mode_gpu():
...@@ -48,7 +48,7 @@ def set_mode_gpu(): ...@@ -48,7 +48,7 @@ def set_mode_gpu():
The implementation of `set_mode_gpu(_caffe.cpp, L52)`_. The implementation of `set_mode_gpu(_caffe.cpp, L52)`_.
""" """
dragon.config.EnableCUDA() _cfg.EnableCUDA()
def set_device(device): def set_device(device):
...@@ -63,7 +63,7 @@ def set_device(device): ...@@ -63,7 +63,7 @@ def set_device(device):
The implementation of `SetDevice(common.cpp, L65)`_. The implementation of `SetDevice(common.cpp, L65)`_.
""" """
dragon.config.SetGPU(device) _cfg.SetGPU(device)
def set_random_seed(seed): def set_random_seed(seed):
...@@ -83,7 +83,7 @@ def set_random_seed(seed): ...@@ -83,7 +83,7 @@ def set_random_seed(seed):
The implementation of `set_random_seed(_caffe.cpp, L71)`_. The implementation of `set_random_seed(_caffe.cpp, L71)`_.
""" """
dragon.config.SetRandomSeed(seed) _cfg.SetRandomSeed(seed)
def root_solver(): def root_solver():
...@@ -99,7 +99,7 @@ def root_solver(): ...@@ -99,7 +99,7 @@ def root_solver():
The implementation of `root_solver(common.hpp, L164)`_. The implementation of `root_solver(common.hpp, L164)`_.
""" """
return _GLOBAL_ROOT_CAFFE_SOLVER return _GLOBAL_CAFFE_ROOT_SOLVER
def set_root_solver(val): def set_root_solver(val):
...@@ -115,5 +115,5 @@ def set_root_solver(val): ...@@ -115,5 +115,5 @@ def set_root_solver(val):
The implementation of `set_root_solver(common.hpp, L165)`_. The implementation of `set_root_solver(common.hpp, L165)`_.
""" """
global _GLOBAL_ROOT_CAFFE_SOLVER global _GLOBAL_CAFFE_ROOT_SOLVER
_GLOBAL_ROOT_CAFFE_SOLVER = val _GLOBAL_CAFFE_ROOT_SOLVER = val
\ No newline at end of file \ No newline at end of file
...@@ -15,12 +15,16 @@ from __future__ import absolute_import ...@@ -15,12 +15,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon
from collections import OrderedDict from collections import OrderedDict
from google.protobuf.text_format import Parse as parse_text_proto from google.protobuf.text_format import Parse as _parse_text_proto
from dragon.vm.caffe import layers as layer_factory
from dragon.vm.caffe.proto import caffe_pb2 as pb from dragon.core.tensor import Tensor as _Tensor
from dragon.core import workspace as _workspace
from dragon.vm.theano.gradient import grad as _Grad
from dragon.vm.theano.compile.function import function as _Function
from dragon.vm.caffe import layers as _layer_factory
from dragon.vm.caffe.proto import caffe_pb2 as _proto_def
class Blob(object): class Blob(object):
...@@ -89,8 +93,8 @@ class Net(object): ...@@ -89,8 +93,8 @@ class Net(object):
The implementation of `Net_Init(_caffe.cpp, L109)`_. The implementation of `Net_Init(_caffe.cpp, L109)`_.
""" """
self._net = pb.NetParameter() self._net = _proto_def.NetParameter()
parse_text_proto(open(proto_txt,'r').read(), self._net) _parse_text_proto(open(proto_txt,'r').read(), self._net)
self._phase = phase self._phase = phase
self._layers = [] self._layers = []
self._inputs_to_tensors = {} self._inputs_to_tensors = {}
...@@ -100,16 +104,17 @@ class Net(object): ...@@ -100,16 +104,17 @@ class Net(object):
if len(self._net.input) > 0: if len(self._net.input) > 0:
for input in self._net.input: for input in self._net.input:
if not input in self._blobs: if not input in self._blobs:
variable = dragon.Tensor(input).Variable() variable = _Tensor(input).Variable()
self._blobs[input] = { self._blobs[input] = {
'data': variable, 'data': variable,
'diff': dragon.Tensor.Ref(variable.name + '_grad'), 'diff': _Tensor.Ref(variable.name + '_grad'),
} }
self._inputs_to_tensors[input] = self._blobs[input]['data'] self._inputs_to_tensors[input] = self._blobs[input]['data']
for layer in self._net.layer: for layer in self._net.layer:
if not self.FilterLayer(layer): continue if not self.FilterLayer(layer): continue
self._layers.append(getattr(layer_factory, layer.type + 'Layer')(layer)) self._layers.append(getattr(
_layer_factory, layer.type + 'Layer')(layer))
self.Setup() self.Setup()
...@@ -199,7 +204,7 @@ class Net(object): ...@@ -199,7 +204,7 @@ class Net(object):
for idx, top in enumerate(layer._top): for idx, top in enumerate(layer._top):
self._blobs[top] = { self._blobs[top] = {
'data': outputs[idx], 'data': outputs[idx],
'diff': dragon.Tensor.Ref(outputs[idx].name + '_grad'), 'diff': _Tensor.Ref(outputs[idx].name + '_grad'),
} }
self._net_outputs.add(top) self._net_outputs.add(top)
...@@ -271,14 +276,14 @@ class Net(object): ...@@ -271,14 +276,14 @@ class Net(object):
for loss in self.losses: for loss in self.losses:
for var in self.trainable_variables: for var in self.trainable_variables:
dragon.grad(loss, var) _Grad(loss, var)
self._function = dragon.function( self._function = _Function(
outputs=[self.blobs[key].data outputs=[self.blobs[key].data
for key in self.outputs]) for key in self.outputs])
if hasattr(self, '_model'): if hasattr(self, '_model'):
dragon.workspace.Restore(self._model, format='caffe') _workspace.Restore(self._model, format='caffe')
return self._function return self._function
...@@ -299,7 +304,7 @@ class Net(object): ...@@ -299,7 +304,7 @@ class Net(object):
The implementation of `CopyTrainedLayersFromBinaryProto(net.cpp, L780)`_. The implementation of `CopyTrainedLayersFromBinaryProto(net.cpp, L780)`_.
""" """
dragon.workspace.Restore(model, format='caffe') _workspace.Restore(model, format='caffe')
def forward(self, **kwargs): def forward(self, **kwargs):
"""Forward pass. [**PyCaffe Style**] """Forward pass. [**PyCaffe Style**]
...@@ -322,11 +327,11 @@ class Net(object): ...@@ -322,11 +327,11 @@ class Net(object):
def GetOutputs(net, net_outputs): def GetOutputs(net, net_outputs):
ret = {} ret = {}
for output in net_outputs: for output in net_outputs:
ret[output] = dragon.workspace.FetchTensor(net.blobs[output].data) ret[output] = net.blobs[output].data.get_value()
return ret return ret
for name, blob in kwargs.items(): for name, blob in kwargs.items():
dragon.workspace.FeedTensor(self._inputs_to_tensors[name], blob) _workspace.FeedTensor(self._inputs_to_tensors[name], blob)
self.function()(return_outputs=False, stage='forward') self.function()(return_outputs=False, stage='forward')
...@@ -347,7 +352,7 @@ class Net(object): ...@@ -347,7 +352,7 @@ class Net(object):
""" """
for name, blob in kwargs.items(): for name, blob in kwargs.items():
dragon.workspace.FeedTensor(self._inputs_to_tensors[name], blob) _workspace.FeedTensor(self._inputs_to_tensors[name], blob)
self.function()(return_outputs=False, stage='forward') self.function()(return_outputs=False, stage='forward')
def backward(self, **kwargs): def backward(self, **kwargs):
...@@ -368,7 +373,7 @@ class Net(object): ...@@ -368,7 +373,7 @@ class Net(object):
""" """
for name, blob in kwargs.items(): for name, blob in kwargs.items():
dragon.workspace.FeedTensor(self.blobs[name].diff, blob) _workspace.FeedTensor(self.blobs[name].diff, blob)
self.function()(return_outputs=False, stage='backward') self.function()(return_outputs=False, stage='backward')
def save(self, filename): def save(self, filename):
...@@ -399,7 +404,7 @@ class Net(object): ...@@ -399,7 +404,7 @@ class Net(object):
if param.data.name not in keys: if param.data.name not in keys:
tensors.append(param.data) tensors.append(param.data)
keys.add(param.data.name) keys.add(param.data.name)
dragon.workspace.Snapshot(tensors, filename, suffix='', format='caffe') _workspace.Snapshot(tensors, filename, suffix='', format='caffe')
@property @property
def blobs(self): def blobs(self):
......
...@@ -16,12 +16,16 @@ from __future__ import division ...@@ -16,12 +16,16 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import time import time
import dragon
from google.protobuf.text_format import Parse as parse_text_proto
from dragon.vm.caffe.misc import root_solver from dragon import updaters as _updaters
from dragon.vm.caffe.net import Net from dragon.core import mpi as _mpi
from dragon.vm.caffe.proto import caffe_pb2 as pb from dragon.core import workspace as _workspace
from google.protobuf.text_format import Parse as _parse_text_proto
from dragon.vm.caffe.net import Net as _Net
from dragon.vm.caffe.proto import caffe_pb2 as _proto_def
from dragon.vm.caffe.misc import root_solver as _root_solver
from dragon.vm.theano.compile.function import function as _Function
class Solver(object): class Solver(object):
...@@ -48,8 +52,8 @@ class Solver(object): ...@@ -48,8 +52,8 @@ class Solver(object):
>>> solver = Solver('solver.prototxt') >>> solver = Solver('solver.prototxt')
""" """
self._param = pb.SolverParameter() self._param = _proto_def.SolverParameter()
parse_text_proto(open(proto_txt, 'r').read(), self._param) _parse_text_proto(open(proto_txt, 'r').read(), self._param)
if self._param.iter_size > 1: if self._param.iter_size > 1:
raise NotImplementedError('Gradients accumulating is deprecated.') raise NotImplementedError('Gradients accumulating is deprecated.')
self._net = None self._net = None
...@@ -75,12 +79,12 @@ class Solver(object): ...@@ -75,12 +79,12 @@ class Solver(object):
""" """
if self._param.HasField('net'): if self._param.HasField('net'):
self._net = Net(self._param.net, "TRAIN") self._net = _Net(self._param.net, "TRAIN")
if self._param.HasField('train_net'): if self._param.HasField('train_net'):
if self._net is not None: if self._net is not None:
raise RuntimeError('net or train_net can not be specified both.') raise RuntimeError('net or train_net can not be specified both.')
self._net = Net(self._param.train_net, "TRAIN") self._net = _Net(self._param.train_net, "TRAIN")
def InitTestNets(self): def InitTestNets(self):
"""Initialize the test nets. """Initialize the test nets.
...@@ -94,10 +98,10 @@ class Solver(object): ...@@ -94,10 +98,10 @@ class Solver(object):
The implementation of `InitTestNets(solver.cpp, L104)`_. The implementation of `InitTestNets(solver.cpp, L104)`_.
""" """
if dragon.mpi.Is_Init(): if _mpi.Is_Init():
idx, group = dragon.mpi.AllowParallel() rank, group = _mpi.AllowParallel()
# Only the root in a parallel group can test # Only the root in a parallel group can test
if idx != -1 and dragon.mpi.Rank() != group[0]: return if rank != -1 and _mpi.Rank() != group[0]: return
num_test_net = len(self._param.test_iter) num_test_net = len(self._param.test_iter)
if num_test_net > 0: if num_test_net > 0:
...@@ -106,12 +110,12 @@ class Solver(object): ...@@ -106,12 +110,12 @@ class Solver(object):
if len(self._param.test_net) > 0: if len(self._param.test_net) > 0:
for test_net in self._param.test_net: for test_net in self._param.test_net:
self._test_nets.append(Net(test_net, "TEST")) self._test_nets.append(_Net(test_net, "TEST"))
num_test_net -= len(self._param.test_net) num_test_net -= len(self._param.test_net)
# Consider generic_net # Consider generic_net
if num_test_net > 0: if num_test_net > 0:
self._test_nets.append(Net(self._param.net, "TEST")) self._test_nets.append(_Net(self._param.net, "TEST"))
def BuildNets(self): def BuildNets(self):
"""Build the nets. """Build the nets.
...@@ -164,7 +168,7 @@ class Solver(object): ...@@ -164,7 +168,7 @@ class Solver(object):
blob.decay_multiplier) blob.decay_multiplier)
# Compile # Compile
self.update = dragon.function(updater=self.optimizer) self.update = _Function(updater=self.optimizer)
def GetLearningRate(self): def GetLearningRate(self):
"""Get learning rate based on the preset policy. """Get learning rate based on the preset policy.
...@@ -244,7 +248,7 @@ class Solver(object): ...@@ -244,7 +248,7 @@ class Solver(object):
for iter in range(test_iter): for iter in range(test_iter):
self.tests[test_idx](return_outputs=False) self.tests[test_idx](return_outputs=False)
if not root_solver(): continue if not _root_solver(): continue
if iter == 0: if iter == 0:
for key in net.outputs: for key in net.outputs:
values = net.blobs[key].data.get_value().flatten() values = net.blobs[key].data.get_value().flatten()
...@@ -259,7 +263,7 @@ class Solver(object): ...@@ -259,7 +263,7 @@ class Solver(object):
test_score[i] += value test_score[i] += value
i += 1 i += 1
if not root_solver(): return if not _root_solver(): return
print('Iteration {}, Test net #{}'.format(self.iter, test_idx)) print('Iteration {}, Test net #{}'.format(self.iter, test_idx))
for idx, score in enumerate(test_score): for idx, score in enumerate(test_score):
...@@ -299,12 +303,12 @@ class Solver(object): ...@@ -299,12 +303,12 @@ class Solver(object):
loss = 0.0 loss = 0.0
for i in range(self._param.iter_size): for i in range(self._param.iter_size):
self.train(return_outputs=False) self.train(return_outputs=False)
if root_solver(): if _root_solver():
for e in self.net.losses: for e in self.net.losses:
values = e.get_value().flatten() values = e.get_value().flatten()
for v in values: loss += v for v in values: loss += v
if root_solver(): if _root_solver():
loss /= self._param.iter_size loss /= self._param.iter_size
if len(loss_vec) < self._param.average_loss: if len(loss_vec) < self._param.average_loss:
loss_vec.append(loss) loss_vec.append(loss)
...@@ -319,7 +323,7 @@ class Solver(object): ...@@ -319,7 +323,7 @@ class Solver(object):
self.update() self.update()
# Display # Display
if root_solver() and self._param.display: if _root_solver() and self._param.display:
if self.iter % self._param.display == 0: if self.iter % self._param.display == 0:
base_lr = self.optimizer.base_lr base_lr = self.optimizer.base_lr
print('Iteration %d, lr = %s, loss = %f, time = %.2fs' % \ print('Iteration %d, lr = %s, loss = %f, time = %.2fs' % \
...@@ -410,7 +414,7 @@ class Solver(object): ...@@ -410,7 +414,7 @@ class Solver(object):
""" """
tensors = [blob.data for blob in self._layer_blobs] tensors = [blob.data for blob in self._layer_blobs]
filename = "_iter_" + str(self.iter) filename = "_iter_" + str(self.iter)
dragon.workspace.Snapshot(tensors, filename, _workspace.Snapshot(tensors, filename,
prefix=self._param.snapshot_prefix, prefix=self._param.snapshot_prefix,
suffix='.caffemodel', format='caffe') suffix='.caffemodel', format='caffe')
...@@ -492,7 +496,7 @@ class SGDSolver(Solver): ...@@ -492,7 +496,7 @@ class SGDSolver(Solver):
""" """
def __init__(self, proto_txt): def __init__(self, proto_txt):
super(SGDSolver, self).__init__(proto_txt=proto_txt) super(SGDSolver, self).__init__(proto_txt=proto_txt)
self.optimizer = dragon.updaters.SGDUpdater(**self._optimizer_arguments) self.optimizer = _updaters.SGDUpdater(**self._optimizer_arguments)
self.BuildOptimizer() self.BuildOptimizer()
def ParseOptimizerArguments(self): def ParseOptimizerArguments(self):
...@@ -514,7 +518,7 @@ class NesterovSolver(Solver): ...@@ -514,7 +518,7 @@ class NesterovSolver(Solver):
""" """
def __init__(self, proto_txt): def __init__(self, proto_txt):
super(NesterovSolver, self).__init__(proto_txt=proto_txt) super(NesterovSolver, self).__init__(proto_txt=proto_txt)
self.optimizer = dragon.updaters.NesterovUpdater(**self._optimizer_arguments) self.optimizer = _updaters.NesterovUpdater(**self._optimizer_arguments)
self.BuildOptimizer() self.BuildOptimizer()
def ParseOptimizerArguments(self): def ParseOptimizerArguments(self):
...@@ -538,7 +542,7 @@ class RMSPropSolver(Solver): ...@@ -538,7 +542,7 @@ class RMSPropSolver(Solver):
""" """
def __init__(self, proto_txt): def __init__(self, proto_txt):
super(RMSPropSolver, self).__init__(proto_txt=proto_txt) super(RMSPropSolver, self).__init__(proto_txt=proto_txt)
self.optimizer = dragon.updaters.RMSPropUpdater(**self._optimizer_arguments) self.optimizer = _updaters.RMSPropUpdater(**self._optimizer_arguments)
self.BuildOptimizer() self.BuildOptimizer()
def ParseOptimizerArguments(self): def ParseOptimizerArguments(self):
...@@ -565,7 +569,7 @@ class AdamSolver(Solver): ...@@ -565,7 +569,7 @@ class AdamSolver(Solver):
""" """
def __init__(self, proto_txt): def __init__(self, proto_txt):
super(AdamSolver, self).__init__(proto_txt=proto_txt) super(AdamSolver, self).__init__(proto_txt=proto_txt)
self.optimizer = dragon.updaters.AdamUpdater(**self._optimizer_arguments) self.optimizer = _updaters.AdamUpdater(**self._optimizer_arguments)
self.BuildOptimizer() self.BuildOptimizer()
def ParseOptimizerArguments(self): def ParseOptimizerArguments(self):
......
...@@ -17,17 +17,18 @@ from __future__ import absolute_import ...@@ -17,17 +17,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy
import itertools import itertools
import numpy as np
from collections import defaultdict from collections import defaultdict
from onnx import (checker, mapping, numpy_helper, GraphProto, OperatorSetIdProto) from onnx import checker, mapping, numpy_helper, GraphProto, OperatorSetIdProto
from onnx.helper import make_tensor_value_info, make_model, printable_graph from onnx.helper import make_tensor_value_info, make_model, printable_graph
from dragon.vm.onnx.helper import \ from dragon.core import workspace as _workspace
(extract_initializer, extract_leaf_tensors, from dragon.vm.onnx.helper import native_run_graph
native_run_graph, fetch_initializer,) from dragon.vm.onnx.helper import fetch_initializer
from dragon.vm.onnx.helper import extract_initializer
from dragon.vm.onnx.helper import extract_leaf_tensors
from dragon.vm.onnx.nodes.factory import get_nodes_def from dragon.vm.onnx.nodes.factory import get_nodes_def
...@@ -104,15 +105,22 @@ class DragonFrontend(object): ...@@ -104,15 +105,22 @@ class DragonFrontend(object):
if run_native_graph and not enforce_no_running: if run_native_graph and not enforce_no_running:
inputs = {} inputs = {}
for name, (elem_type, shape) in value_info.items(): for name, (elem_type, shape) in value_info.items():
inputs[name] = np.random.randn(*shape).astype( inputs[name] = numpy.random.randn(*shape).astype(
mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type]) mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type])
ws, outputs, initializer = native_run_graph( ws, outputs, initializer = native_run_graph(
graph_def, inputs, initializer, init_func) graph_def, inputs, initializer, init_func)
for name in graph_def.output:
output = outputs[name]
elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype]
shape = output.shape
value_info[name] = (elem_type, shape)
if enforce_no_running: if enforce_no_running:
# In some cases(e.g. PyTorch), we had ran the graph # In some cases(e.g. PyTorch), we had ran the graph
# outputs had been in ``value_info`` already # outputs had been in ``value_info`` already
import dragon.core.workspace as ws ws = _workspace.get_default_workspace()
initializer = fetch_initializer(initializer) initializer = fetch_initializer(initializer)
# Prepare to make the graph # Prepare to make the graph
......
...@@ -21,8 +21,8 @@ import sys ...@@ -21,8 +21,8 @@ import sys
from onnx.backend.base import namedtupledict from onnx.backend.base import namedtupledict
from onnx import numpy_helper from onnx import numpy_helper
import dragon as dg from dragon.core import workspace as _workspace
from dragon.vm.onnx.workspace import Workspace from dragon.core.tensor import Tensor as _Tensor
INITIALIZER_TAG = { INITIALIZER_TAG = {
...@@ -65,7 +65,7 @@ def fetch_initializer(initializer): ...@@ -65,7 +65,7 @@ def fetch_initializer(initializer):
# Fetch the initializer # Fetch the initializer
return [ return [
numpy_helper.from_array( numpy_helper.from_array(
dg.workspace.FetchTensor(name), name=name) _workspace.FetchTensor(name), name=name)
for name in initializer for name in initializer
] ]
...@@ -87,32 +87,32 @@ def native_run_graph(graph_def, inputs, initializer, init_func=None): ...@@ -87,32 +87,32 @@ def native_run_graph(graph_def, inputs, initializer, init_func=None):
graph_def.arg[i].i = 0 graph_def.arg[i].i = 0
# Create an anonymous workspace # Create an anonymous workspace
ws = Workspace() ws = _workspace.Workspace()
with dg.ws_scope(ws.name): with ws.as_default():
# Register all the initializer before feeding them # Register all the initializer before feeding them
for name in initializer: for name in initializer:
dg.Tensor(name=name).Variable() _Tensor(name=name).Variable()
# Feed the given values if necessary # Feed the given values if necessary
if init_func: init_func() if init_func: init_func()
# Feed the external inputs # Feed the external inputs
for name, blob in inputs.items(): for name, blob in inputs.items():
dg.workspace.FeedTensor(name, blob) _workspace.FeedTensor(name, blob)
# Create and Run the graph # Create and Run the graph
graph_name = dg.workspace.CreateGraph(graph_def) graph_name = _workspace.CreateGraph(graph_def)
dg.workspace.RunGraph(graph_name, return_outputs=False) _workspace.RunGraph(graph_name, return_outputs=False)
# Fetch the outputs # Fetch the outputs
output_names = graph_def.output output_names = graph_def.output
output_values = [dg.workspace.FetchTensor(name) for name in output_names] output_values = [_workspace.FetchTensor(name) for name in output_names]
# Fetch the initializer # Fetch the initializer
initializer = [ initializer = [
numpy_helper.from_array( numpy_helper.from_array(
dg.workspace.FetchTensor(name), name=name) _workspace.FetchTensor(name), name=name)
for name in initializer for name in initializer
] ]
......
...@@ -16,12 +16,12 @@ from __future__ import division ...@@ -16,12 +16,12 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import numpy as np import numpy
from onnx import mapping
from google.protobuf.text_format import Parse as parse_text_proto
import dragon.proto.dragon_pb2 as pb from onnx import mapping as _mapping
import dragon.import_c_api as C from dragon.core import workspace as _workspace
from dragon.proto import dragon_pb2 as _proto_def
from google.protobuf.text_format import Parse as _parse_text_proto
from dragon.vm.theano.compile.function import Function from dragon.vm.theano.compile.function import Function
from dragon.vm.onnx.frontend import graph_def_to_onnx_model from dragon.vm.onnx.frontend import graph_def_to_onnx_model
...@@ -119,8 +119,8 @@ def export_from_graph_text( ...@@ -119,8 +119,8 @@ def export_from_graph_text(
""" """
with open(text_file, 'r') as rf: with open(text_file, 'r') as rf:
graph_def = pb.GraphDef() graph_def = _proto_def.GraphDef()
parse_text_proto(rf.read(), graph_def) _parse_text_proto(rf.read(), graph_def)
export_from_graph_def( export_from_graph_def(
graph_def=graph_def, graph_def=graph_def,
...@@ -148,8 +148,10 @@ def import_to_graph_def(model_path): ...@@ -148,8 +148,10 @@ def import_to_graph_def(model_path):
""" """
if not os.path.exists(model_path): if not os.path.exists(model_path):
raise ValueError('Given model({}) is not existed.'.format(model_path)) raise ValueError('Given model({}) is not existed.'.format(model_path))
graph_def = pb.GraphDef() graph_def = _proto_def.GraphDef()
serialized_proto = C.ImportONNXModel(model_path) serialized_proto = _workspace \
.get_default_workspace() \
.ImportONNXModel(model_path)
graph_def.ParseFromString(serialized_proto) graph_def.ParseFromString(serialized_proto)
return graph_def return graph_def
...@@ -238,4 +240,4 @@ def surgery_on_graph_def( ...@@ -238,4 +240,4 @@ def surgery_on_graph_def(
def make_value_info(shape, dtype='float32'): def make_value_info(shape, dtype='float32'):
return mapping.NP_TYPE_TO_TENSOR_TYPE[np.dtype(dtype)], shape return _mapping.NP_TYPE_TO_TENSOR_TYPE[numpy.dtype(dtype)], shape
\ No newline at end of file \ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/pytorch/blob/master/caffe2/python/onnx/workspace.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import uuid
import dragon as dg
class Workspace(object):
def __init__(self):
self.name = 'onnx/' + str(uuid.uuid4())
def __getattr__(self, attr):
def f(*args, **kwargs):
with dg.ws_scope(self.name, ):
return getattr(dg.workspace, attr)(*args, **kwargs)
return f
def __del__(self):
self.ResetWorkspace(self.name)
\ No newline at end of file
...@@ -16,45 +16,42 @@ from __future__ import print_function ...@@ -16,45 +16,42 @@ from __future__ import print_function
import warnings import warnings
from collections import defaultdict from collections import defaultdict
import dragon from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.theano.compile import function as _Function
from dragon.vm.tensorflow.protobuf import config_pb2 from dragon.vm.tensorflow.protobuf import config_pb2
from dragon.vm.tensorflow.training.optimizer import Optimizer from dragon.vm.tensorflow.training.optimizer import Optimizer
from dragon.vm.tensorflow.ops.variables import VariablesInitializer from dragon.vm.tensorflow.ops.variables import VariablesInitializer
from dragon.vm.tensorflow.framework import ops from dragon.vm.tensorflow.framework import ops
_GLOBAL_DATA_FLOW_KEYS = defaultdict(dict)
class _DataFlow(object): class _DataFlow(object):
"""DataFlow takes a group of expressions and """DataFlow takes a group of expressions and
the specified output tensors. the specified output tensors.
We store the flows that requiring the same output names, We store the flows that requiring the same output names,
i.e., those flows can be reused and should not to create a new graph. i.e., those flows can be reused and should not be created again.
""" """
def __init__(self, functions): def __init__(self, functions):
self.functions = functions self.functions = functions
def run(self, feed_dict=None): def run(self, feed_dict=None):
for i, function in enumerate(self.functions): for i, func in enumerate(self.functions):
if i == 0 and feed_dict is not None: if i == 0 and feed_dict is not None:
for tensor, value in feed_dict.items(): for tensor, value in feed_dict.items():
dragon.workspace.FeedTensor(tensor, value) _workspace.FeedTensor(tensor, value)
function(return_outputs=False) func(return_outputs=False)
@classmethod @classmethod
def try_get(cls, workspace, flow_key): def try_get(cls, graph_id, flow_key):
global _GLOBAL_DATA_FLOW_KEYS if flow_key in _GLOBAL_DATA_FLOWS[graph_id]:
if flow_key in _GLOBAL_DATA_FLOW_KEYS[workspace]: return _GLOBAL_DATA_FLOWS[graph_id][flow_key]
return _GLOBAL_DATA_FLOW_KEYS[workspace][flow_key]
@classmethod @classmethod
def try_add(cls, workspace, flow_key, flow): def try_add(cls, graph_id, flow_key, flow):
global _GLOBAL_DATA_FLOW_KEYS global _GLOBAL_DATA_FLOWS
_GLOBAL_DATA_FLOW_KEYS[workspace][flow_key] = flow _GLOBAL_DATA_FLOWS[graph_id][flow_key] = flow
class BaseSession(object): class BaseSession(object):
...@@ -115,7 +112,7 @@ class BaseSession(object): ...@@ -115,7 +112,7 @@ class BaseSession(object):
for e in fetches: for e in fetches:
if isinstance(e, Optimizer): optimizers.append(e) if isinstance(e, Optimizer): optimizers.append(e)
elif isinstance(e, VariablesInitializer): tensors.extend(e.var_list) elif isinstance(e, VariablesInitializer): tensors.extend(e.var_list)
elif isinstance(e, dragon.Tensor): tensors.append(e) elif isinstance(e, _Tensor): tensors.append(e)
# Find minimum solving targets # Find minimum solving targets
targets = set() targets = set()
...@@ -124,45 +121,45 @@ class BaseSession(object): ...@@ -124,45 +121,45 @@ class BaseSession(object):
for t in optimizer._targets: targets.add(t) for t in optimizer._targets: targets.add(t)
targets = list(targets) targets = list(targets)
gen_flow_key = tuple(e.name for e in targets) flow_key = tuple(e.name for e in targets)
# Exist this data flow before? # Exist this data flow before?
data_flow = _DataFlow.try_get( flow = _DataFlow.try_get(id(self._graph), flow_key)
self._graph._workspace, gen_flow_key)
# Run by feeding # Run by feeding
if feed_dict is not None: if feed_dict is not None:
# Check the feed dict # Check the feed dict
for key, value in feed_dict.items(): for key, value in feed_dict.items():
if not isinstance(key, dragon.Tensor): if not isinstance(key, _Tensor):
raise TypeError('The key of feed_dict key should be a Tensor.') raise TypeError('The key of ``feed_dict`` should be a Tensor.')
if key.shape is not None: if key.shape is not None:
# Align the number of dimensions # Align the number of dimensions
if len(key.shape) != len(value.shape): if len(key.shape) != len(value.shape):
raise RuntimeError( raise RuntimeError(
'The Tensor({}) was limited to {} dimensions, \ 'The Tensor({}) was limited to {} dimensions, '\
while feed a value with {} dimensions.' 'while feed a value with {} dimensions.'
.format(key.name, len(key.shape), len(value.shape))) .format(key.name, len(key.shape), len(value.shape)))
# Verify for the each dimension # Verify for the each dimension
for i in range(len(key.shape)): for i in range(len(key.shape)):
if key.shape[i] is None: continue if key.shape[i] is None: continue
if key.shape[i] != value.shape[i]: if key.shape[i] != value.shape[i]:
raise RuntimeError( raise RuntimeError(
'The shape of Tensor({}) was limited as ('.format(key.name) + 'The shape of Tensor({}) was limited as ('.format(key.name) +
','.join([str(dim) for dim in key.shape]) + '), ' + ','.join([str(dim) for dim in key.shape]) + '), ' +
'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').') 'while feed a value with (' +
','.join([str(dim) for dim in value.shape]) + ').')
# Create a new data flow if necessary # Create a new data flow if necessary
if data_flow is None: if flow is None:
functions = [dragon.function(outputs=targets)] functions = [_Function(outputs=targets)]
for optimizer in optimizers: for optimizer in optimizers:
functions.append(dragon.function( functions.append(_Function(
updater=optimizer.updater)) updater=optimizer.updater))
data_flow = _DataFlow(functions) flow = _DataFlow(functions)
_DataFlow.try_add(self.graph._workspace, gen_flow_key, data_flow) _DataFlow.try_add(id(self._graph), flow_key, flow)
# Run this data flow # Run this data flow
data_flow.run(feed_dict) flow.run(feed_dict)
# Fetch after running # Fetch after running
returns = [] returns = []
...@@ -233,4 +230,9 @@ class InteractiveSession(BaseSession): ...@@ -233,4 +230,9 @@ class InteractiveSession(BaseSession):
@staticmethod @staticmethod
def reset(target, containers=None, config=None): def reset(target, containers=None, config=None):
pass pass
\ No newline at end of file
# Store the flows for different graphs
# ThreadLocal is not necessary
_GLOBAL_DATA_FLOWS = defaultdict(dict)
\ No newline at end of file
...@@ -13,8 +13,11 @@ from dragon.vm.tensorflow.framework import ops ...@@ -13,8 +13,11 @@ from dragon.vm.tensorflow.framework import ops
from dragon.vm.tensorflow.ops import var_scope as variable_scope from dragon.vm.tensorflow.ops import var_scope as variable_scope
def get_variables(scope=None, suffix=None, def get_variables(
collection=ops.GraphKeys.GLOBAL_VARIABLES): scope=None,
suffix=None,
collection=ops.GraphKeys.GLOBAL_VARIABLES,
):
if isinstance(scope, variable_scope.VariableScope): if isinstance(scope, variable_scope.VariableScope):
scope = scope.name scope = scope.name
if suffix is not None: if suffix is not None:
......
...@@ -19,25 +19,31 @@ from __future__ import print_function ...@@ -19,25 +19,31 @@ from __future__ import print_function
import math import math
from dragon.vm.tensorflow.framework import dtypes
from dragon.vm.tensorflow.ops import random_ops from dragon.vm.tensorflow.ops import random_ops
from dragon.vm.tensorflow.framework import dtypes
__all__ = ['xavier_initializer', def xavier_initializer(
'xavier_initializer_conv2d', uniform=True,
'variance_scaling_initializer'] seed=None,
dtype=dtypes.float32,
):
def xavier_initializer(uniform=True, seed=None, dtype=dtypes.float32): return variance_scaling_initializer(
return variance_scaling_initializer(factor=1.0, mode='FAN_AVG', factor=1.0,
uniform=uniform, seed=seed, dtype=dtype) mode='FAN_AVG',
uniform=uniform,
seed=seed,
xavier_initializer_conv2d = xavier_initializer dtype=dtype,
)
def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False, def variance_scaling_initializer(
seed=None, dtype=dtypes.float32): factor=2.0,
mode='FAN_IN',
uniform=False,
seed=None,
dtype=dtypes.float32,
):
if not dtype.is_floating: if not dtype.is_floating:
raise TypeError('Cannot create initializer for non-floating point type.') raise TypeError('Cannot create initializer for non-floating point type.')
if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']: if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']:
...@@ -79,3 +85,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False, ...@@ -79,3 +85,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
seed=seed) seed=seed)
return _initializer return _initializer
# Alias
xavier_initializer_conv2d = xavier_initializer
\ No newline at end of file
...@@ -17,20 +17,14 @@ from __future__ import absolute_import ...@@ -17,20 +17,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import defaultdict from dragon.vm.tensorflow.framework import ops
import dragon.ops as op_lib
import dragon.vm.tensorflow.framework.ops as ops
from dragon.vm.tensorflow.contrib.layers import initializers from dragon.vm.tensorflow.contrib.layers import initializers
from dragon.vm.tensorflow.ops import init_ops from dragon.vm.tensorflow.ops import init_ops
from dragon.vm.tensorflow.ops import nn from dragon.vm.tensorflow.ops import nn
from dragon.vm.tensorflow.ops import var_scope as vs from dragon.vm.tensorflow.ops import var_scope as vs
from dragon.vm.tensorflow.layers import layers from dragon.vm.tensorflow.layers import layers
from dragon.ops import Flatten as _FlattenOp
__all__ = ['flatten']
_LAYERS_UID_DICT = defaultdict(int)
DATA_FORMAT_NCHW = 'NCHW' DATA_FORMAT_NCHW = 'NCHW'
DATA_FORMAT_NHWC = 'NHWC' DATA_FORMAT_NHWC = 'NHWC'
...@@ -38,72 +32,72 @@ DATA_FORMAT_NCDHW = 'NCDHW' ...@@ -38,72 +32,72 @@ DATA_FORMAT_NCDHW = 'NCDHW'
DATA_FORMAT_NDHWC = 'NDHWC' DATA_FORMAT_NDHWC = 'NDHWC'
def _default_scope(scope, key, indicator): def avg_pool2d(
if scope is None: inputs,
return indicator kernel_size,
# global _LAYERS_UID_DICT stride=2,
# _LAYERS_UID_DICT[key] += 1 padding='VALID',
# return '{}{}'.format(indicator, _LAYERS_UID_DICT[key]) data_format=DATA_FORMAT_NHWC,
else: outputs_collections=None,
return scope scope=None,
):
def avg_pool2d(inputs,
kernel_size,
stride=2,
padding='VALID',
data_format=DATA_FORMAT_NHWC,
outputs_collections=None,
scope=None):
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
raise ValueError('data_format has to be either NCHW or NHWC.') raise ValueError('data_format has to be either NCHW or NHWC.')
df = ('channels_first' if data_format and data_format.startswith('NC') df = ('channels_first' if data_format and data_format.startswith('NC')
else 'channels_last') else 'channels_last')
return layers.average_pooling2d(inputs=inputs, return layers.average_pooling2d(
pool_size=kernel_size, inputs=inputs,
strides=stride, pool_size=kernel_size,
padding=padding, strides=stride,
data_format=df) padding=padding,
data_format=df,
)
def max_pool2d(inputs,
kernel_size,
stride=2, def max_pool2d(
padding='VALID', inputs,
data_format=DATA_FORMAT_NHWC, kernel_size,
outputs_collections=None, stride=2,
scope=None): padding='VALID',
data_format=DATA_FORMAT_NHWC,
outputs_collections=None,
scope=None,
):
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
raise ValueError('data_format has to be either NCHW or NHWC.') raise ValueError('data_format has to be either NCHW or NHWC.')
df = ('channels_first' if data_format and data_format.startswith('NC') df = ('channels_first' if data_format and data_format.startswith('NC')
else 'channels_last') else 'channels_last')
return layers.max_pooling2d(inputs=inputs, return layers.max_pooling2d(
pool_size=kernel_size, inputs=inputs,
strides=stride, pool_size=kernel_size,
padding=padding, strides=stride,
data_format=df) padding=padding,
data_format=df,
)
def convolution(inputs,
num_outputs,
kernel_size, def convolution(
stride=1, inputs,
padding='SAME', num_outputs,
data_format=None, kernel_size,
rate=1, stride=1,
activation_fn=nn.relu, padding='SAME',
normalizer_fn=None, data_format=None,
normalizer_params=None, rate=1,
weights_initializer=initializers.xavier_initializer(), activation_fn=nn.relu,
weights_regularizer=None, normalizer_fn=None,
biases_initializer=init_ops.zeros_initializer(), normalizer_params=None,
biases_regularizer=None, weights_initializer=initializers.xavier_initializer(),
reuse=None, weights_regularizer=None,
variables_collections=None, biases_initializer=init_ops.zeros_initializer(),
outputs_collections=None, biases_regularizer=None,
trainable=True, reuse=None,
scope=None): variables_collections=None,
scope = _default_scope(scope, 'CONVOLUTION', 'Conv') outputs_collections=None,
trainable=True,
scope=None,
):
scope = _default_scope(scope, 'Conv')
if data_format not in [None, 'NHWC', 'NCHW']: if data_format not in [None, 'NHWC', 'NCHW']:
raise ValueError('Invalid data_format: %r' % (data_format,)) raise ValueError('Invalid data_format: %r' % (data_format,))
data_format = 'channels_first' if data_format == 'NCHW' else 'channels_last' data_format = 'channels_first' if data_format == 'NCHW' else 'channels_last'
...@@ -126,7 +120,8 @@ def convolution(inputs, ...@@ -126,7 +120,8 @@ def convolution(inputs,
bias_regularizer=biases_regularizer, bias_regularizer=biases_regularizer,
activity_regularizer=None, activity_regularizer=None,
trainable=trainable, trainable=trainable,
reuse=reuse) reuse=reuse,
)
# Simple alias. # Simple alias.
...@@ -134,21 +129,23 @@ convolution2d = convolution ...@@ -134,21 +129,23 @@ convolution2d = convolution
conv2d = convolution2d conv2d = convolution2d
def fully_connected(inputs, def fully_connected(
num_outputs, inputs,
activation_fn=nn.relu, num_outputs,
normalizer_fn=None, activation_fn=nn.relu,
normalizer_params=None, normalizer_fn=None,
weights_initializer=initializers.xavier_initializer(), normalizer_params=None,
weights_regularizer=None, weights_initializer=initializers.xavier_initializer(),
biases_initializer=init_ops.zeros_initializer(), weights_regularizer=None,
biases_regularizer=None, biases_initializer=init_ops.zeros_initializer(),
reuse=None, biases_regularizer=None,
variables_collections=None, reuse=None,
outputs_collections=None, variables_collections=None,
trainable=True, outputs_collections=None,
scope=None): trainable=True,
scope = _default_scope(scope, 'FULLY_CONNECTED', 'fully_connected') scope=None,
):
scope = _default_scope(scope, 'fully_connected')
with vs.variable_scope(scope, reuse=reuse) as sc: with vs.variable_scope(scope, reuse=reuse) as sc:
return layers.dense( return layers.dense(
inputs=inputs, inputs=inputs,
...@@ -160,32 +157,35 @@ def fully_connected(inputs, ...@@ -160,32 +157,35 @@ def fully_connected(inputs,
bias_regularizer=biases_regularizer, bias_regularizer=biases_regularizer,
activity_regularizer=None, activity_regularizer=None,
trainable=trainable, trainable=trainable,
reuse=reuse) reuse=reuse,
)
def batch_norm(inputs,
decay=0.999, def batch_norm(
center=True, inputs,
scale=False, decay=0.999,
epsilon=0.001, center=True,
activation_fn=None, scale=False,
param_initializers=None, epsilon=0.001,
param_regularizers=None, activation_fn=None,
updates_collections=ops.GraphKeys.UPDATE_OPS, param_initializers=None,
is_training=True, param_regularizers=None,
reuse=None, updates_collections=ops.GraphKeys.UPDATE_OPS,
variables_collections=None, is_training=True,
outputs_collections=None, reuse=None,
trainable=True, variables_collections=None,
batch_weights=None, outputs_collections=None,
fused=False, trainable=True,
data_format=DATA_FORMAT_NHWC, batch_weights=None,
zero_debias_moving_mean=False, fused=False,
scope=None, data_format=DATA_FORMAT_NHWC,
renorm=False, zero_debias_moving_mean=False,
renorm_clipping=None, scope=None,
renorm_decay=0.99): renorm=False,
scope = _default_scope(scope, 'BATCH_NORM', 'BatchNorm') renorm_clipping=None,
renorm_decay=0.99,
):
scope = _default_scope(scope, 'BatchNorm')
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
raise ValueError('data_format has to be either NCHW or NHWC.') raise ValueError('data_format has to be either NCHW or NHWC.')
axis = 1 if data_format == DATA_FORMAT_NCHW else -1 axis = 1 if data_format == DATA_FORMAT_NCHW else -1
...@@ -193,10 +193,14 @@ def batch_norm(inputs, ...@@ -193,10 +193,14 @@ def batch_norm(inputs,
with vs.variable_scope(scope, reuse=reuse) as sc: with vs.variable_scope(scope, reuse=reuse) as sc:
if not param_initializers: if not param_initializers:
param_initializers = {} param_initializers = {}
beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) beta_initializer = param_initializers.get(
gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) 'beta', init_ops.zeros_initializer())
moving_mean_initializer = param_initializers.get('moving_mean', init_ops.zeros_initializer()) gamma_initializer = param_initializers.get(
moving_variance_initializer = param_initializers.get('moving_variance', init_ops.ones_initializer()) 'gamma', init_ops.ones_initializer())
moving_mean_initializer = param_initializers.get(
'moving_mean', init_ops.zeros_initializer())
moving_variance_initializer = param_initializers.get(
'moving_variance', init_ops.ones_initializer())
if not param_regularizers: if not param_regularizers:
param_regularizers = {} param_regularizers = {}
...@@ -222,11 +226,19 @@ def batch_norm(inputs, ...@@ -222,11 +226,19 @@ def batch_norm(inputs,
renorm_clipping=renorm_clipping, renorm_clipping=renorm_clipping,
renorm_momentum=renorm_decay, renorm_momentum=renorm_decay,
fused=fused, fused=fused,
training=is_training) training=is_training,
)
def flatten(
inputs,
outputs_collections=None,
scope=None,
):
return _FlattenOp(inputs, axis=0, keep_axes=2)
def flatten(inputs,
outputs_collections=None,
scope=None):
return op_lib.Flatten(inputs, axis=0, keep_axes=2)
def _default_scope(scope, indicator):
"""Return the default scope."""
if scope is None: return indicator
else: return scope
\ No newline at end of file
...@@ -13,60 +13,68 @@ from __future__ import absolute_import ...@@ -13,60 +13,68 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon import numpy
import numpy as np
from dragon.core import scope as _scope
from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor as _Tensor
def constant(value, dtype=None, shape=None, name=None, verify_shape=False):
def constant(
value,
dtype=None,
shape=None,
name=None,
verify_shape=False,
):
if dtype is not None: if dtype is not None:
if isinstance(value, np.ndarray): if isinstance(value, numpy.ndarray):
feed = value.astype(dtype.as_numpy_dtype) value = value.astype(dtype.as_numpy_dtype)
elif isinstance(value, list):
feed = np.array(value, dtype.as_numpy_dtype)
else: else:
feed = np.array([value], dtype.as_numpy_dtype) value = numpy.array(value, dtype.as_numpy_dtype)
else: else:
if isinstance(value, np.ndarray): feed = value if not isinstance(value, numpy.ndarray):
else: value = numpy.array(value)
feed = np.array(value)
# Discard the default float64 # Discard the default float64
if feed.dtype == np.float64: if value.dtype == numpy.float64:
feed = feed.astype(np.float32) value = value.astype(numpy.float32)
# Determine the shape # Determine the shape
if shape is not None: if shape is not None:
if feed.size == 1: if value.size == 1:
# Case 1: Broadcast with scalar value # Case 1: Broadcast with scalar value
c = feed.flatten()[0] scalar = value.flatten()[0]
feed = np.zeros(shape, feed.dtype) value = numpy.empty(shape, value.dtype)
feed.fill(c) value.fill(scalar)
else: else:
# Case 2: Reshape directly # Case 2: Reshape directly
if verify_shape: if verify_shape:
if shape is not None: if shape is not None:
if len(shape) != len(value.shape): if len(shape) != len(value.shape):
raise RuntimeError( raise RuntimeError(
'The constant was limited to {} dimensions, \ 'The constant was limited to {} dimensions, ' \
while feed a value with {} dimensions.'. 'while feed a value with {} dimensions.'
format(len(shape), len(value.shape))) .format(len(shape), len(value.shape)))
for i in range(len(shape)): for i in range(len(shape)):
if shape[i] is None: continue if shape[i] is None: continue
if shape[i] != value.shape[i]: if shape[i] != value.shape[i]:
raise RuntimeError( raise RuntimeError(
'The shape of constant was limited as (' + 'The shape of constant was limited as (' +
','.join([str(dim) for dim in shape]) + '), ' + ','.join([str(dim) for dim in shape]) + '), ' +
'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').') 'while feed a value with (' +
feed = feed.reshape(shape) ','.join([str(dim) for dim in value.shape]) + ').')
value = value.reshape(shape)
defined_name = dragon.workspace.GetDummyName( # Get a available name
dragon.get_default_name_scope() + defined_name = \
(name if name else 'Const'), _workspace.GetDummyName(
suffix=':0', domain='Tensor') basename=_scope.get_default_name_scope() +
(name if name else 'Const'),
suffix=':0', domain='Tensor')
# Feed into the workspace # Feed into the workspace
tensor = dragon.Tensor.Ref( return _Tensor.Ref(
name=defined_name, name=defined_name,
shape=list(feed.shape), shape=list(value.shape),
dtype=str(feed.dtype)) dtype=str(value.dtype)
tensor.set_value(feed) ).set_value(value)
return tensor \ No newline at end of file
\ No newline at end of file
...@@ -13,9 +13,10 @@ from __future__ import absolute_import ...@@ -13,9 +13,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.tensorflow.framework.ops import Graph # The Graph (Workspace:))
from dragon.core.workspace import Workspace as Graph
# Utilities used when building a Graph. # Utilities used when building a Graph
from dragon.vm.tensorflow.framework.ops import device from dragon.vm.tensorflow.framework.ops import device
from dragon.vm.tensorflow.framework.ops import name_scope from dragon.vm.tensorflow.framework.ops import name_scope
from dragon.vm.tensorflow.framework.ops import get_default_graph from dragon.vm.tensorflow.framework.ops import get_default_graph
...@@ -27,5 +28,6 @@ from dragon.vm.tensorflow.framework.ops import GraphKeys ...@@ -27,5 +28,6 @@ from dragon.vm.tensorflow.framework.ops import GraphKeys
from dragon.vm.tensorflow.framework.constant_op import * from dragon.vm.tensorflow.framework.constant_op import *
from dragon.vm.tensorflow.framework.dtypes import * from dragon.vm.tensorflow.framework.dtypes import *
# Utilities used to represent a Tensor
from dragon.vm.tensorflow.framework.tensor_shape import Dimension from dragon.vm.tensorflow.framework.tensor_shape import Dimension
from dragon.vm.tensorflow.framework.tensor_shape import TensorShape from dragon.vm.tensorflow.framework.tensor_shape import TensorShape
\ No newline at end of file
...@@ -17,16 +17,20 @@ from __future__ import absolute_import ...@@ -17,16 +17,20 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import re from dragon.core import tls as _tls
import uuid from dragon.core import scope as _scope
import threading from dragon.core import workspace as _workspace
import dragon from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.tensorflow.framework import constant_op from dragon.vm.tensorflow.framework import constant_op
from dragon.vm.tensorflow.util import tf_contextlib
def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): def convert_to_tensor(
value,
dtype=None,
name=None,
preferred_dtype=None,
):
"""Converts the given value to a Tensor. """Converts the given value to a Tensor.
Parameters Parameters
...@@ -46,73 +50,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): ...@@ -46,73 +50,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
The output tensor. The output tensor.
""" """
if isinstance(value, dragon.Tensor): return value if isinstance(value, _Tensor): return value
return constant_op.constant(value, dtype=dtype, name=name) return constant_op.constant(value, dtype=dtype, name=name)
class Graph(object):
"""A wrapper to connect ``Function`` to ``Workspace``.
Note that official TensorFlow trace the expressions explicitly
in this class, while we have done in the virtual stack.
Besides, organizing a ``Flow``, i.e., expressions with specified
outputs should also be done here.
"""
def __init__(self):
self._collections = {}
self._workspace = 'tf/graph/' + str(uuid.uuid4())
def get_collection_ref(self, name):
coll_list = self._collections.get(name, None)
if coll_list is None:
coll_list = []
self._collections[name] = coll_list
return coll_list
def get_collection(self, name, scope=None):
coll_list = self._collections.get(name, None)
if coll_list is None:
return []
if scope is None:
return list(coll_list)
else:
filter_coll_list = []
regex = re.compile(scope)
for item in coll_list:
if hasattr(item, "name") and regex.match(item.name):
filter_coll_list.append(item)
return filter_coll_list
def add_to_collection(self, name, value):
if name not in self._collections:
self._collections[name] = [value]
else:
self._collections[name].append(value)
def add_to_collections(self, names, value):
for name in names:
self.add_to_collection(name, value)
def device(self, device_name_or_function):
if not isinstance(device_name_or_function, str):
raise TypeError('The device function should be a str.')
device_and_id = device_name_or_function.split('/')[1]
device, id = device_and_id.split(':')
if device not in ['cpu', 'gpu']:
raise ValueError('The device should either be cpu or gpu.')
try:
id = int(id)
except Exception as e:
raise ValueError('The device id should be a integer.')
return dragon.device_scope(device, device_id=id)
def as_default(self):
return _default_graph_stack.get_controller(self)
class GraphKeys(object): class GraphKeys(object):
GLOBAL_VARIABLES = "variables" GLOBAL_VARIABLES = "variables"
# Key to collect local variables that are local to the machine and are not # Key to collect local variables that are local to the machine and are not
...@@ -202,112 +143,15 @@ def add_to_collections(names, value): ...@@ -202,112 +143,15 @@ def add_to_collections(names, value):
def name_scope(name, default_name=None, values=None): def name_scope(name, default_name=None, values=None):
name = default_name if name is None else name name = default_name if name is None else name
name = '' if name is None else name name = '' if name is None else name
return dragon.name_scope(name) return _scope.name_scope(name)
##############################################
# #
# Default Stack #
# #
##############################################
class _DefaultStack(threading.local):
"""A thread-local stack of objects for providing implicit defaults."""
def __init__(self):
super(_DefaultStack, self).__init__()
self._enforce_nesting = True
self.stack = []
def get_default(self):
return self.stack[-1] if len(self.stack) >= 1 else None
def reset(self):
self.stack = []
def is_cleared(self):
return not self.stack
@property
def enforce_nesting(self):
return self._enforce_nesting
@enforce_nesting.setter
def enforce_nesting(self, value):
self._enforce_nesting = value
@tf_contextlib.contextmanager
def get_controller(self, default):
"""A context manager for manipulating a default stack."""
self.stack.append(default)
try:
yield default
finally:
# stack may be empty if reset() was called
if self.stack:
if self._enforce_nesting:
if self.stack[-1] is not default:
raise AssertionError(
"Nesting violated for default stack of %s objects" %
type(default))
self.stack.pop()
else:
self.stack.remove(default)
class _DefaultGraphStack(_DefaultStack):
"""A thread-local stack of objects for providing an implicit default graph."""
def __init__(self):
super(_DefaultGraphStack, self).__init__()
self._global_default_graph = None
def get_default(self):
"""Override that returns a global default if the stack is empty."""
ret = super(_DefaultGraphStack, self).get_default()
if ret is None:
ret = self._GetGlobalDefaultGraph()
return ret
def _GetGlobalDefaultGraph(self):
if self._global_default_graph is None:
# TODO(mrry): Perhaps log that the default graph is being used, or set
# provide some other feedback to prevent confusion when a mixture of
# the global default graph and an explicit graph are combined in the
# same process.
self._global_default_graph = Graph()
# Rewritten the random workspace name
self._global_default_graph._workspace = 'default'
return self._global_default_graph
def reset(self):
super(_DefaultGraphStack, self).reset()
# We should call dragon api to reset the workspace
dragon.workspace.ResetWorkspace(self._global_default_graph._workspace)
self._global_default_graph = None
@tf_contextlib.contextmanager
def get_controller(self, default):
with super(_DefaultGraphStack, self).get_controller(default) as g:
with dragon.ws_scope(g._workspace):
yield g
_default_graph_stack = _DefaultGraphStack()
_default_session_stack = _DefaultStack()
def get_default_graph(): def get_default_graph():
return _default_graph_stack.get_default() return _workspace.get_default_workspace()
def reset_default_graph(): def reset_default_graph():
if not _default_graph_stack.is_cleared(): _workspace.reset_default_workspace()
raise AssertionError("Do not use tf.reset_default_graph() to clear "
"nested graphs. If you need a cleared graph, "
"exit the nesting and create a new graph.")
_default_graph_stack.reset()
def default_session(session): def default_session(session):
...@@ -319,7 +163,17 @@ def get_default_session(): ...@@ -319,7 +163,17 @@ def get_default_session():
def device(device_name_or_function): def device(device_name_or_function):
return get_default_graph().device(device_name_or_function) if not isinstance(device_name_or_function, str):
raise TypeError('The device function should be a str.')
device_and_id = device_name_or_function.split('/')[1]
device, id = device_and_id.split(':')
if device not in ['cpu', 'gpu']:
raise ValueError('The device should either be cpu or gpu.')
try:
id = int(id)
except Exception as _:
raise ValueError('The device id should be a integer.')
return _scope.device_scope(device, device_id=id)
def _eval_using_default_session(tensors, feed_dict, session=None): def _eval_using_default_session(tensors, feed_dict, session=None):
...@@ -333,6 +187,10 @@ def _eval_using_default_session(tensors, feed_dict, session=None): ...@@ -333,6 +187,10 @@ def _eval_using_default_session(tensors, feed_dict, session=None):
return session.run(tensors, feed_dict) return session.run(tensors, feed_dict)
_default_session_stack = _tls.Stack()
# The Monkey Patching
# Require "import dragon.vm.tensorflow" # Require "import dragon.vm.tensorflow"
dragon.Tensor.eval = lambda self, feed_dict=None, session=None : \ _Tensor.eval = lambda self, feed_dict=None, session=None : \
_eval_using_default_session(self, feed_dict, session) _eval_using_default_session(self, feed_dict, session)
\ No newline at end of file
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor as _Tensor
class Dimension(object): class Dimension(object):
...@@ -114,4 +114,5 @@ def get_shape(self): ...@@ -114,4 +114,5 @@ def get_shape(self):
return TensorShape(self.shape) return TensorShape(self.shape)
Tensor.get_shape = get_shape # The Monkey Patching
\ No newline at end of file _Tensor.get_shape = get_shape
\ No newline at end of file
...@@ -28,7 +28,13 @@ from dragon.vm.tensorflow.util import nest ...@@ -28,7 +28,13 @@ from dragon.vm.tensorflow.util import nest
class Layer(object): class Layer(object):
def __init__(self, trainable=True, name=None, dtype=dtypes.float32, **kwargs): def __init__(
self,
trainable=True,
name=None,
dtype=dtypes.float32,
**kwargs
):
allowed_kwargs = {'_scope', '_reuse'} allowed_kwargs = {'_scope', '_reuse'}
for kwarg in kwargs: for kwarg in kwargs:
if kwarg not in allowed_kwargs: if kwarg not in allowed_kwargs:
...@@ -79,13 +85,15 @@ class Layer(object): ...@@ -79,13 +85,15 @@ class Layer(object):
_add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS) _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS)
return outputs return outputs
def add_variable(self, def add_variable(
name, self,
shape, name,
dtype=None, shape,
trainable=True, dtype=None,
initializer=None, trainable=True,
regularizer=None): initializer=None,
regularizer=None,
):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
variable = vs.get_variable( variable = vs.get_variable(
name, name,
...@@ -93,7 +101,8 @@ class Layer(object): ...@@ -93,7 +101,8 @@ class Layer(object):
initializer=initializer, initializer=initializer,
regularizer=regularizer, regularizer=regularizer,
dtype=dtypes.as_dtype(dtype), dtype=dtypes.as_dtype(dtype),
trainable=trainable and self.trainable) trainable=trainable and self.trainable,
)
if trainable: if trainable:
self._trainable_weights.append(variable) self._trainable_weights.append(variable)
else: else:
...@@ -105,9 +114,14 @@ class Layer(object): ...@@ -105,9 +114,14 @@ class Layer(object):
class InputSpec(object): class InputSpec(object):
def __init__(self, def __init__(
dtype=None, shape=None, ndim=None, self,
max_ndim=None, min_ndim=None, axes=None dtype=None,
shape=None,
ndim=None,
max_ndim=None,
min_ndim=None,
axes=None,
): ):
self.dtype = dtype self.dtype = dtype
self.shape = shape self.shape = shape
...@@ -125,9 +139,6 @@ def _to_snake_case(name): ...@@ -125,9 +139,6 @@ def _to_snake_case(name):
return 'private' + insecure return 'private' + insecure
PER_GRAPH_LAYER_NAME_UIDS = weakref.WeakKeyDictionary()
def _unique_layer_name(name): def _unique_layer_name(name):
global PER_GRAPH_LAYER_NAME_UIDS global PER_GRAPH_LAYER_NAME_UIDS
graph = ops.get_default_graph() graph = ops.get_default_graph()
...@@ -153,3 +164,6 @@ def _add_elements_to_collection(elements, collection_list): ...@@ -153,3 +164,6 @@ def _add_elements_to_collection(elements, collection_list):
for element in elements: for element in elements:
if element not in collection_set: if element not in collection_set:
collection.append(element) collection.append(element)
PER_GRAPH_LAYER_NAME_UIDS = weakref.WeakKeyDictionary()
\ No newline at end of file
...@@ -20,24 +20,26 @@ from dragon.vm.tensorflow.ops import nn ...@@ -20,24 +20,26 @@ from dragon.vm.tensorflow.ops import nn
class _Conv(base.Layer): class _Conv(base.Layer):
def __init__(self, def __init__(
rank, self,
filters, rank,
kernel_size, filters,
strides=1, kernel_size,
padding='valid', strides=1,
data_format='channels_last', padding='valid',
dilation_rate=1, data_format='channels_last',
activation=None, dilation_rate=1,
use_bias=True, activation=None,
kernel_initializer=None, use_bias=True,
bias_initializer=init_ops.zeros_initializer(), kernel_initializer=None,
kernel_regularizer=None, bias_initializer=init_ops.zeros_initializer(),
bias_regularizer=None, kernel_regularizer=None,
activity_regularizer=None, bias_regularizer=None,
trainable=True, activity_regularizer=None,
name=None, trainable=True,
**kwargs): name=None,
**kwargs
):
super(_Conv, self).__init__(trainable=trainable, name=name, **kwargs) super(_Conv, self).__init__(trainable=trainable, name=name, **kwargs)
self.rank = rank self.rank = rank
self.filters = filters self.filters = filters
...@@ -82,7 +84,8 @@ class _Conv(base.Layer): ...@@ -82,7 +84,8 @@ class _Conv(base.Layer):
shape=kernel_shape, shape=kernel_shape,
initializer=self.kernel_initializer, initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer, regularizer=self.kernel_regularizer,
dtype=self.dtype) dtype=self.dtype,
)
if self.use_bias: if self.use_bias:
self.bias = self.add_variable( self.bias = self.add_variable(
...@@ -90,7 +93,8 @@ class _Conv(base.Layer): ...@@ -90,7 +93,8 @@ class _Conv(base.Layer):
shape=(self.filters,), shape=(self.filters,),
initializer=self.bias_initializer, initializer=self.bias_initializer,
regularizer=self.bias_regularizer, regularizer=self.bias_regularizer,
dtype=self.dtype) dtype=self.dtype,
)
else: else:
self.bias = None self.bias = None
...@@ -108,10 +112,15 @@ class _Conv(base.Layer): ...@@ -108,10 +112,15 @@ class _Conv(base.Layer):
dilation_rate=self.dilation_rate, dilation_rate=self.dilation_rate,
strides=self.strides, strides=self.strides,
padding=self.padding.upper(), padding=self.padding.upper(),
data_format=tf_data_format) data_format=tf_data_format,
)
if self.bias is not None: if self.bias is not None:
outputs = nn.bias_add(outputs, self.bias, data_format=tf_data_format) outputs = nn.bias_add(
outputs,
self.bias,
data_format=tf_data_format,
)
if self.activation is not None: if self.activation is not None:
return self.activation(outputs) return self.activation(outputs)
...@@ -119,22 +128,25 @@ class _Conv(base.Layer): ...@@ -119,22 +128,25 @@ class _Conv(base.Layer):
class Conv2D(_Conv): class Conv2D(_Conv):
def __init__(self, filters, def __init__(
kernel_size, self,
strides=(1, 1), filters,
padding='valid', kernel_size,
data_format='channels_last', strides=(1, 1),
dilation_rate=(1, 1), padding='valid',
activation=None, data_format='channels_last',
use_bias=True, dilation_rate=(1, 1),
kernel_initializer=None, activation=None,
bias_initializer=init_ops.zeros_initializer(), use_bias=True,
kernel_regularizer=None, kernel_initializer=None,
bias_regularizer=None, bias_initializer=init_ops.zeros_initializer(),
activity_regularizer=None, kernel_regularizer=None,
trainable=True, bias_regularizer=None,
name=None, activity_regularizer=None,
**kwargs): trainable=True,
name=None,
**kwargs
):
super(Conv2D, self).__init__( super(Conv2D, self).__init__(
rank=2, rank=2,
filters=filters, filters=filters,
...@@ -154,23 +166,25 @@ class Conv2D(_Conv): ...@@ -154,23 +166,25 @@ class Conv2D(_Conv):
name=name, **kwargs) name=name, **kwargs)
def conv2d(inputs, def conv2d(
filters, inputs,
kernel_size, filters,
strides=(1, 1), kernel_size,
padding='valid', strides=(1, 1),
data_format='channels_last', padding='valid',
dilation_rate=(1, 1), data_format='channels_last',
activation=None, dilation_rate=(1, 1),
use_bias=True, activation=None,
kernel_initializer=None, use_bias=True,
bias_initializer=init_ops.zeros_initializer(), kernel_initializer=None,
kernel_regularizer=None, bias_initializer=init_ops.zeros_initializer(),
bias_regularizer=None, kernel_regularizer=None,
activity_regularizer=None, bias_regularizer=None,
trainable=True, activity_regularizer=None,
name=None, trainable=True,
reuse=None): name=None,
reuse=None,
):
return Conv2D( return Conv2D(
filters=filters, filters=filters,
kernel_size=kernel_size, kernel_size=kernel_size,
...@@ -188,4 +202,5 @@ def conv2d(inputs, ...@@ -188,4 +202,5 @@ def conv2d(inputs,
trainable=trainable, trainable=trainable,
name=name, name=name,
_reuse=reuse, _reuse=reuse,
_scope=name).apply(inputs) _scope=name,
\ No newline at end of file ).apply(inputs)
\ No newline at end of file
...@@ -24,18 +24,20 @@ from dragon.vm.tensorflow.ops import standard_ops ...@@ -24,18 +24,20 @@ from dragon.vm.tensorflow.ops import standard_ops
class Dense(base.Layer): class Dense(base.Layer):
def __init__(self, def __init__(
units, self,
activation=None, units,
use_bias=True, activation=None,
kernel_initializer=None, use_bias=True,
bias_initializer=init_ops.zeros_initializer(), kernel_initializer=None,
kernel_regularizer=None, bias_initializer=init_ops.zeros_initializer(),
bias_regularizer=None, kernel_regularizer=None,
activity_regularizer=None, bias_regularizer=None,
trainable=True, activity_regularizer=None,
name=None, trainable=True,
**kwargs): name=None,
**kwargs
):
super(Dense, self).__init__(trainable=trainable, name=name, **kwargs) super(Dense, self).__init__(trainable=trainable, name=name, **kwargs)
self.units = units self.units = units
self.activation = activation self.activation = activation
...@@ -61,7 +63,8 @@ class Dense(base.Layer): ...@@ -61,7 +63,8 @@ class Dense(base.Layer):
shape=[input_shape[-1].value, self.units], shape=[input_shape[-1].value, self.units],
initializer=self.kernel_initializer, initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer, regularizer=self.kernel_regularizer,
dtype=self.dtype) dtype=self.dtype,
)
if self.use_bias: if self.use_bias:
self.bias = self.add_variable( self.bias = self.add_variable(
...@@ -69,7 +72,8 @@ class Dense(base.Layer): ...@@ -69,7 +72,8 @@ class Dense(base.Layer):
shape=[self.units,], shape=[self.units,],
initializer=self.bias_initializer, initializer=self.bias_initializer,
regularizer=self.bias_regularizer, regularizer=self.bias_regularizer,
dtype=self.dtype) dtype=self.dtype,
)
else: else:
self.bias = None self.bias = None
self.built = True self.built = True
...@@ -83,18 +87,20 @@ class Dense(base.Layer): ...@@ -83,18 +87,20 @@ class Dense(base.Layer):
return outputs return outputs
def dense(inputs, def dense(
units, inputs,
activation=None, units,
use_bias=True, activation=None,
kernel_initializer=None, use_bias=True,
bias_initializer=init_ops.zeros_initializer(), kernel_initializer=None,
kernel_regularizer=None, bias_initializer=init_ops.zeros_initializer(),
bias_regularizer=None, kernel_regularizer=None,
activity_regularizer=None, bias_regularizer=None,
trainable=True, activity_regularizer=None,
name=None, trainable=True,
reuse=None): name=None,
reuse=None,
):
return Dense( return Dense(
units, units,
activation=activation, activation=activation,
...@@ -107,4 +113,5 @@ def dense(inputs, ...@@ -107,4 +113,5 @@ def dense(inputs,
trainable=trainable, trainable=trainable,
name=name, name=name,
_scope=name, _scope=name,
_reuse=reuse).apply(inputs) _reuse=reuse,
\ No newline at end of file ).apply(inputs)
\ No newline at end of file
...@@ -13,20 +13,20 @@ from __future__ import absolute_import ...@@ -13,20 +13,20 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.tensorflow.layers.convolutional import ( from .convolutional import (
conv2d, Conv2D, conv2d, Conv2D,
) )
from dragon.vm.tensorflow.layers.core import ( from .core import (
dense, Dense, dense, Dense,
) )
from dragon.vm.tensorflow.layers.normalization import ( from .normalization import (
batch_normalization, BatchNormalization, batch_normalization, BatchNormalization,
batch_norm, BatchNorm, batch_norm, BatchNorm,
) )
from dragon.vm.tensorflow.layers.pooling import ( from .pooling import (
average_pooling2d, AveragePooling2D, average_pooling2d, AveragePooling2D,
max_pooling2d, MaxPooling2D, max_pooling2d, MaxPooling2D,
) )
...@@ -17,7 +17,7 @@ from __future__ import absolute_import ...@@ -17,7 +17,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon.ops import BatchNorm as _BatchNormOp
from dragon.vm.tensorflow.framework import tensor_shape from dragon.vm.tensorflow.framework import tensor_shape
from dragon.vm.tensorflow.layers import base from dragon.vm.tensorflow.layers import base
...@@ -25,26 +25,29 @@ from dragon.vm.tensorflow.ops import init_ops ...@@ -25,26 +25,29 @@ from dragon.vm.tensorflow.ops import init_ops
class BatchNormalization(base.Layer): class BatchNormalization(base.Layer):
def __init__(self, def __init__(
axis=-1, self,
momentum=0.99, axis=-1,
epsilon=1e-3, momentum=0.99,
center=True, epsilon=1e-3,
scale=True, center=True,
beta_initializer=init_ops.zeros_initializer(), scale=True,
gamma_initializer=init_ops.ones_initializer(), beta_initializer=init_ops.zeros_initializer(),
moving_mean_initializer=init_ops.zeros_initializer(), gamma_initializer=init_ops.ones_initializer(),
moving_variance_initializer=init_ops.ones_initializer(), moving_mean_initializer=init_ops.zeros_initializer(),
beta_regularizer=None, moving_variance_initializer=init_ops.ones_initializer(),
gamma_regularizer=None, beta_regularizer=None,
renorm=False, gamma_regularizer=None,
renorm_clipping=None, renorm=False,
renorm_momentum=0.99, renorm_clipping=None,
fused=None, renorm_momentum=0.99,
trainable=True, fused=None,
name=None, trainable=True,
**kwargs): name=None,
super(BatchNormalization, self).__init__(trainable=trainable, name=name, **kwargs) **kwargs
):
super(BatchNormalization, self).__init__(
trainable=trainable, name=name, **kwargs)
self.axis = axis self.axis = axis
self.momentum = momentum self.momentum = momentum
self.epsilon = epsilon self.epsilon = epsilon
...@@ -92,33 +95,37 @@ class BatchNormalization(base.Layer): ...@@ -92,33 +95,37 @@ class BatchNormalization(base.Layer):
name='moving_mean', name='moving_mean',
shape=(param_dim.value,), shape=(param_dim.value,),
initializer=self.moving_mean_initializer, initializer=self.moving_mean_initializer,
trainable=False) trainable=False,
)
self.moving_variance = self.add_variable( self.moving_variance = self.add_variable(
name='moving_variance', name='moving_variance',
shape=(param_dim.value,), shape=(param_dim.value,),
initializer=self.moving_variance_initializer, initializer=self.moving_variance_initializer,
trainable=False) trainable=False,
)
self.gamma = self.add_variable( self.gamma = self.add_variable(
name='gamma', name='gamma',
shape=(param_dim.value,), shape=(param_dim.value,),
initializer=self.gamma_initializer, initializer=self.gamma_initializer,
regularizer=self.gamma_regularizer, regularizer=self.gamma_regularizer,
trainable=self.scale) trainable=self.scale,
)
self.beta = self.add_variable( self.beta = self.add_variable(
name='beta', name='beta',
shape=(param_dim.value,), shape=(param_dim.value,),
initializer=self.beta_initializer, initializer=self.beta_initializer,
regularizer=self.beta_regularizer, regularizer=self.beta_regularizer,
trainable=self.center) trainable=self.center,
)
self.built = True self.built = True
def call(self, inputs, training=False, *args, **kwargs): def call(self, inputs, training=False, *args, **kwargs):
use_stats = 0 if training else 1 use_stats = 0 if training else 1
return dragon.ops.BatchNorm([ return _BatchNormOp([
inputs, inputs,
self.moving_mean, self.moving_mean,
self.moving_variance, self.moving_variance,
...@@ -127,7 +134,8 @@ class BatchNormalization(base.Layer): ...@@ -127,7 +134,8 @@ class BatchNormalization(base.Layer):
axis=self.axis, axis=self.axis,
momentum=self.momentum, momentum=self.momentum,
eps=self.epsilon, eps=self.epsilon,
use_stats=use_stats) use_stats=use_stats,
)
def batch_normalization( def batch_normalization(
...@@ -170,7 +178,8 @@ def batch_normalization( ...@@ -170,7 +178,8 @@ def batch_normalization(
trainable=trainable, trainable=trainable,
name=name, name=name,
_reuse=reuse, _reuse=reuse,
_scope=name).apply(inputs, training=training) _scope=name,
).apply(inputs, training=training)
# Aliases # Aliases
......
...@@ -22,9 +22,16 @@ from dragon.vm.tensorflow.layers import base, utils ...@@ -22,9 +22,16 @@ from dragon.vm.tensorflow.layers import base, utils
class _Pooling2D(base.Layer): class _Pooling2D(base.Layer):
def __init__(self, pool_function, pool_size, strides, def __init__(
padding='valid', data_format='channels_last', self,
name=None, **kwargs): pool_function,
pool_size,
strides,
padding='valid',
data_format='channels_last',
name=None,
**kwargs
):
super(_Pooling2D, self).__init__(name=name, **kwargs) super(_Pooling2D, self).__init__(name=name, **kwargs)
self.pool_function = pool_function self.pool_function = pool_function
self.pool_size = utils.normalize_tuple(pool_size, 2, 'pool_size') self.pool_size = utils.normalize_tuple(pool_size, 2, 'pool_size')
...@@ -40,19 +47,25 @@ class _Pooling2D(base.Layer): ...@@ -40,19 +47,25 @@ class _Pooling2D(base.Layer):
else: else:
pool_shape = (1, 1) + self.pool_size pool_shape = (1, 1) + self.pool_size
strides = (1, 1) + self.strides strides = (1, 1) + self.strides
outputs = self.pool_function( return self.pool_function(
inputs, inputs,
ksize=pool_shape, ksize=pool_shape,
strides=strides, strides=strides,
padding=self.padding.upper(), padding=self.padding.upper(),
data_format=utils.convert_data_format(self.data_format, 4)) data_format=utils.convert_data_format(self.data_format, 4),
return outputs )
class MaxPooling2D(_Pooling2D): class MaxPooling2D(_Pooling2D):
def __init__(self, pool_size, strides, def __init__(
padding='valid', data_format='channels_last', self,
name=None, **kwargs): pool_size,
strides,
padding='valid',
data_format='channels_last',
name=None,
**kwargs
):
super(MaxPooling2D, self).__init__( super(MaxPooling2D, self).__init__(
nn.max_pool, nn.max_pool,
pool_size=pool_size, pool_size=pool_size,
...@@ -63,9 +76,15 @@ class MaxPooling2D(_Pooling2D): ...@@ -63,9 +76,15 @@ class MaxPooling2D(_Pooling2D):
class AveragePooling2D(_Pooling2D): class AveragePooling2D(_Pooling2D):
def __init__(self, pool_size, strides, def __init__(
padding='valid', data_format='channels_last', self,
name=None, **kwargs): pool_size,
strides,
padding='valid',
data_format='channels_last',
name=None,
**kwargs
):
super(AveragePooling2D, self).__init__( super(AveragePooling2D, self).__init__(
nn.avg_pool, nn.avg_pool,
pool_size=pool_size, pool_size=pool_size,
...@@ -76,22 +95,34 @@ class AveragePooling2D(_Pooling2D): ...@@ -76,22 +95,34 @@ class AveragePooling2D(_Pooling2D):
def max_pooling2d( def max_pooling2d(
inputs, pool_size, strides, padding='valid', inputs,
data_format='channels_last', name=None): pool_size,
strides,
padding='valid',
data_format='channels_last',
name=None,
):
return MaxPooling2D( return MaxPooling2D(
pool_size=pool_size, pool_size=pool_size,
strides=strides, strides=strides,
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
name=name).apply(inputs) name=name,
).apply(inputs)
def average_pooling2d( def average_pooling2d(
inputs, pool_size, strides, padding='valid', inputs,
data_format='channels_last', name=None): pool_size,
strides,
padding='valid',
data_format='channels_last',
name=None,
):
return AveragePooling2D( return AveragePooling2D(
pool_size=pool_size, pool_size=pool_size,
strides=strides, strides=strides,
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
name=name).apply(inputs) name=name,
\ No newline at end of file ).apply(inputs)
\ No newline at end of file
...@@ -51,7 +51,7 @@ def normalize_tuple(value, n, name): ...@@ -51,7 +51,7 @@ def normalize_tuple(value, n, name):
except ValueError: except ValueError:
raise ValueError('The `' + name + '` argument must be a tuple of ' + raise ValueError('The `' + name + '` argument must be a tuple of ' +
str(n) + ' integers. Received: ' + str(value) + ' ' str(n) + ' integers. Received: ' + str(value) + ' '
'including element ' + str(single_value) + ' of type' + 'including element ' + str(single_value) + ' of type' +
' ' + str(type(single_value))) ' ' + str(type(single_value)))
return value_tuple return value_tuple
......
...@@ -13,8 +13,10 @@ from __future__ import absolute_import ...@@ -13,8 +13,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from dragon.core import scope as _scope
from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.tensorflow.framework import dtypes from dragon.vm.tensorflow.framework import dtypes
...@@ -23,19 +25,19 @@ def expand_dims(input, axis=None, name=None, dim=None): ...@@ -23,19 +25,19 @@ def expand_dims(input, axis=None, name=None, dim=None):
if axis is not None: if axis is not None:
raise ValueError("cannot specify both 'axis' and 'dim'.") raise ValueError("cannot specify both 'axis' and 'dim'.")
axis = dim axis = dim
return dragon.ops.ExpandDims(input, axis=axis, name=name) return _ops.ExpandDims(input, axis=axis, name=name)
def shape(input, name=None, out_type=dtypes.float32): def shape(input, name=None, out_type=dtypes.int64):
return dragon.ops.Shape(input, name=name) return _ops.Shape(input, name=name)
def zeros(shape, dtype=dtypes.float32, name=None): def zeros(shape, dtype=dtypes.float32, name=None):
return dragon.ops.Fill(shape, value=0.0, dtype=dtype.name, name=name) return _ops.Fill(shape, value=0.0, dtype=dtype.name, name=name)
def ones(shape, dtype=dtypes.float32, name=None): def ones(shape, dtype=dtypes.float32, name=None):
return dragon.ops.Fill(shape, value=1.0, dtype=dtype.name, name=name) return _ops.Fill(shape, value=1.0, dtype=dtype.name, name=name)
def placeholder(dtype, shape=None, name=None): def placeholder(dtype, shape=None, name=None):
...@@ -45,29 +47,41 @@ def placeholder(dtype, shape=None, name=None): ...@@ -45,29 +47,41 @@ def placeholder(dtype, shape=None, name=None):
raise TypeError('The dtype should be a valid tensorflow data type.') raise TypeError('The dtype should be a valid tensorflow data type.')
# Construct a tensor from the explicit name # Construct a tensor from the explicit name
return dragon.Tensor.Ref( return _Tensor.Ref(
dragon.workspace.GetDummyName( _workspace.GetDummyName(
dragon.get_default_name_scope() + name _scope.get_default_name_scope() + name
if name else 'Placeholder', if name else 'Placeholder',
suffix=':0', domain='Tensor'), suffix=':0', domain='Tensor'),
dtype=dtype.name, shape=shape).Placeholder() dtype=dtype.name, shape=shape).Placeholder()
def concat(values, axis, name=None): def concat(values, axis, name=None):
return dragon.ops.Concat(values, axis=axis, name=name) return _ops.Concat(values, axis=axis, name=name)
def transpose(a, perm=None, name=None): def transpose(a, perm=None, name=None):
return dragon.ops.Transpose(a, perm=perm, name=name) return _ops.Transpose(a, perm=perm, name=name)
def tile(input, multiples, name=None): def tile(input, multiples, name=None):
return dragon.ops.Tile(input, multiples=multiples, name=name) return _ops.Tile(input, multiples=multiples, name=name)
def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0): def pad(
return dragon.ops.Pad(tensor, paddings, mode=mode, name=name, value=constant_values) tensor,
paddings,
mode="CONSTANT",
name=None,
constant_values=0,
):
return _ops.Pad(
tensor,
paddings,
mode=mode,
name=name,
value=constant_values,
)
def reshape(tensor, shape, name=None): def reshape(tensor, shape, name=None):
return dragon.ops.Reshape(tensor, shape=shape, name=name) return _ops.Reshape(tensor, shape=shape, name=name)
...@@ -13,8 +13,17 @@ from __future__ import absolute_import ...@@ -13,8 +13,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
def equal(a, b, name=None): def equal(a, b, name=None):
return dragon.ops.Equal([a, b], name=name) return _ops.Equal([a, b], name=name)
\ No newline at end of file
def greater(a, b, name=None):
return _ops.Greater([a, b], name=name)
def less(a, b, name=None):
return _ops.Less([a, b], name=name)
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon.vm.theano.gradient import grad as _Grad
def gradients(ys, xs, **kwargs): def gradients(ys, xs, **kwargs):
...@@ -34,5 +34,5 @@ def gradients(ys, xs, **kwargs): ...@@ -34,5 +34,5 @@ def gradients(ys, xs, **kwargs):
""" """
dxs = [] dxs = []
if not isinstance(ys, list): ys = [ys] if not isinstance(ys, list): ys = [ys]
for y in ys: dxs.append(dragon.grad(y, xs)) for y in ys: dxs.append(_Grad(y, xs))
if len(dxs) == 1: return dxs[0] if len(dxs) == 1: return dxs[0]
\ No newline at end of file
...@@ -13,8 +13,7 @@ from __future__ import absolute_import ...@@ -13,8 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from dragon.vm.tensorflow.framework import dtypes from dragon.vm.tensorflow.framework import dtypes
...@@ -59,7 +58,7 @@ class Zeros(Initializer): ...@@ -59,7 +58,7 @@ class Zeros(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.Fill(shape, value=0, dtype=dtype.name) return _ops.Fill(shape, value=0, dtype=dtype.name)
class Ones(Initializer): class Ones(Initializer):
...@@ -83,7 +82,7 @@ class Ones(Initializer): ...@@ -83,7 +82,7 @@ class Ones(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.Fill(shape, value=1, dtype=dtype.name) return _ops.Fill(shape, value=1, dtype=dtype.name)
class Constant(Initializer): class Constant(Initializer):
...@@ -93,7 +92,7 @@ class Constant(Initializer): ...@@ -93,7 +92,7 @@ class Constant(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.Fill(shape, value=self.value, dtype=dtype.name) return _ops.Fill(shape, value=self.value, dtype=dtype.name)
class RandomUniform(Initializer): class RandomUniform(Initializer):
...@@ -104,8 +103,12 @@ class RandomUniform(Initializer): ...@@ -104,8 +103,12 @@ class RandomUniform(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.RandomUniform( return _ops.RandomUniform(
shape, self.minval, self.maxval, dtype=dtype.name) shape=shape,
low=self.minval,
high=self.maxval,
dtype=dtype.name,
)
class RandomNormal(Initializer): class RandomNormal(Initializer):
...@@ -117,8 +120,12 @@ class RandomNormal(Initializer): ...@@ -117,8 +120,12 @@ class RandomNormal(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.RandomNormal( return _ops.RandomNormal(
shape, self.mean, self.stddev, dtype=dtype.name) shape=shape,
mean=self.mean,
std=self.stddev,
dtype=dtype.name,
)
class TruncatedNormal(Initializer): class TruncatedNormal(Initializer):
...@@ -130,15 +137,21 @@ class TruncatedNormal(Initializer): ...@@ -130,15 +137,21 @@ class TruncatedNormal(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
return dragon.ops.TruncatedNormal( return _ops.TruncatedNormal(
shape, self.mean, self.stddev, dtype=dtype.name) shape=shape,
mean=self.mean,
std=self.stddev,
dtype=dtype.name,
)
class VarianceScaling(Initializer): class VarianceScaling(Initializer):
def __init__(self, def __init__(
scale=1.0, mode="fan_in", self,
distribution="normal", scale=1.0,
dtype=dtypes.float32 mode="fan_in",
distribution="normal",
dtype=dtypes.float32,
): ):
if scale <= 0.: if scale <= 0.:
raise ValueError("`scale` must be positive float.") raise ValueError("`scale` must be positive float.")
...@@ -159,27 +172,44 @@ class VarianceScaling(Initializer): ...@@ -159,27 +172,44 @@ class VarianceScaling(Initializer):
def __call__(self, shape, dtype=None, **kwargs): def __call__(self, shape, dtype=None, **kwargs):
if dtype is None: dtype = self.dtype if dtype is None: dtype = self.dtype
if self.distribution == "normal": if self.distribution == "normal":
return dragon.ops.GlorotNormal(shape=shape, scale=self.scale * 2., return _ops.GlorotNormal(
mode=self.mode, dtype=dtype.name) shape=shape,
scale=self.scale * 2.,
mode=self.mode,
dtype=dtype.name,
)
else: else:
return dragon.ops.GlorotUniform(shape=shape, scale=self.scale * 3., return _ops.GlorotUniform(
mode=self.mode, dtype=dtype.name) shape=shape,
scale=self.scale * 3.,
mode=self.mode,
dtype=dtype.name,
)
def glorot_uniform_initializer(dtype=dtypes.float32):
return variance_scaling_initializer(
scale=1.0,
mode='fan_avg',
distribution='uniform',
dtype=dtype,
)
def glorot_normal_initializer(dtype=dtypes.float32):
return variance_scaling_initializer(
scale=1.0,
mode='fan_avg',
distribution='normal',
dtype=dtype,
)
# Aliases
zeros_initializer = Zeros zeros_initializer = Zeros
ones_initializer = Ones ones_initializer = Ones
constant_initializer = Constant constant_initializer = Constant
random_uniform_initializer = RandomUniform random_uniform_initializer = RandomUniform
random_normal_initializer = RandomNormal random_normal_initializer = RandomNormal
truncated_normal_initializer = TruncatedNormal truncated_normal_initializer = TruncatedNormal
variance_scaling_initializer = VarianceScaling variance_scaling_initializer = VarianceScaling
\ No newline at end of file
def glorot_uniform_initializer(dtype=dtypes.float32):
return variance_scaling_initializer(scale=1.0,
mode='fan_avg', distribution='uniform', dtype=dtype)
def glorot_normal_initializer(dtype=dtypes.float32):
return variance_scaling_initializer(scale=1.0,
mode='fan_avg', distribution='normal', dtype=dtype)
\ No newline at end of file
...@@ -17,8 +17,7 @@ from __future__ import absolute_import ...@@ -17,8 +17,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from dragon.vm.tensorflow.framework import ops from dragon.vm.tensorflow.framework import ops
...@@ -34,8 +33,8 @@ class Reduction(object): ...@@ -34,8 +33,8 @@ class Reduction(object):
* `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
weights. DEPRECATED. weights. DEPRECATED.
* `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`.
"""
"""
NONE = "none" NONE = "none"
SUM = "weighted_sum" SUM = "weighted_sum"
SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size" SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
...@@ -51,7 +50,8 @@ class Reduction(object): ...@@ -51,7 +50,8 @@ class Reduction(object):
cls.MEAN, cls.MEAN,
cls.SUM_OVER_BATCH_SIZE, cls.SUM_OVER_BATCH_SIZE,
cls.SUM_OVER_NONZERO_WEIGHTS, cls.SUM_OVER_NONZERO_WEIGHTS,
cls.SUM_BY_NONZERO_WEIGHTS) cls.SUM_BY_NONZERO_WEIGHTS,
)
@classmethod @classmethod
def validate(cls, key): def validate(cls, key):
...@@ -59,24 +59,58 @@ class Reduction(object): ...@@ -59,24 +59,58 @@ class Reduction(object):
raise ValueError("Invalid Reduction Key %s." % key) raise ValueError("Invalid Reduction Key %s." % key)
def sparse_softmax_cross_entropy( def softmax_cross_entropy(
labels, logits, weights=1.0, scope=None, onehot_labels,
loss_collection=ops.GraphKeys.LOSSES, logits,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): weights=1.,
if labels is None: raise ValueError("labels must not be None.") label_smoothing=0,
scope=None,
loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
):
if onehot_labels is None: raise ValueError("onehot_labels must not be None.")
if logits is None: raise ValueError("logits must not be None.") if logits is None: raise ValueError("logits must not be None.")
normalization = None normalization = None
if reduction == Reduction.NONE: normalization = 'UNIT' if reduction == Reduction.NONE: normalization = 'UNIT'
elif reduction == Reduction.MEAN: normalization = 'FULL' elif reduction == Reduction.MEAN: normalization = 'FULL'
elif reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or \ elif reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or \
reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS: reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS:
normalization = 'VALID' normalization = 'NONE'
elif reduction == Reduction.SUM_OVER_BATCH_SIZE: elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
normalization = 'BATCH_SIZE' normalization = 'BATCH_SIZE'
loss = _ops.SoftmaxCrossEntropy(
loss = dragon.ops.SparseSoftmaxCrossEntropy([logits, labels], [logits, onehot_labels],
normalization=normalization, name=scope) normalization=normalization,
name=scope,
)
if weights != 1.0: loss = weights * loss if weights != 1.0: loss = weights * loss
ops.add_to_collection(loss_collection, loss) ops.add_to_collection(loss_collection, loss)
return loss return loss
def sparse_softmax_cross_entropy(
labels,
logits,
weights=1.,
scope=None,
loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
):
if labels is None: raise ValueError("labels must not be None.")
if logits is None: raise ValueError("logits must not be None.")
normalization = None
if reduction == Reduction.NONE: normalization = 'UNIT'
elif reduction == Reduction.MEAN: normalization = 'FULL'
elif reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or \
reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS:
normalization = 'NONE'
elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
normalization = 'BATCH_SIZE'
loss = _ops.SparseSoftmaxCrossEntropy(
[logits, labels],
normalization=normalization,
name=scope,
)
if weights != 1.0: loss = weights * loss
ops.add_to_collection(loss_collection, loss)
return loss
\ No newline at end of file
...@@ -13,31 +13,7 @@ from __future__ import absolute_import ...@@ -13,31 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
__all__ = [
'argmax',
'argmin',
'matmul',
'add',
'subtract',
'multiply',
'divide',
'sub',
'mul',
'div',
'cast',
'log',
'exp',
'square',
'sqrt',
'reduce_sum',
'reduce_mean',
'sigmoid',
'tanh',
'add_n',
]
def argmax(input, axis=None, name=None, dimension=None): def argmax(input, axis=None, name=None, dimension=None):
...@@ -46,7 +22,7 @@ def argmax(input, axis=None, name=None, dimension=None): ...@@ -46,7 +22,7 @@ def argmax(input, axis=None, name=None, dimension=None):
raise ValueError("cannot specify both 'axis' and 'dimension'.") raise ValueError("cannot specify both 'axis' and 'dimension'.")
axis = dimension axis = dimension
elif axis is None: axis = 0 elif axis is None: axis = 0
return dragon.ops.ArgMax(input, axis=axis, name=name) return _ops.ArgMax(input, axis=axis, name=name)
def argmin(input, axis=None, name=None, dimension=None): def argmin(input, axis=None, name=None, dimension=None):
...@@ -55,31 +31,38 @@ def argmin(input, axis=None, name=None, dimension=None): ...@@ -55,31 +31,38 @@ def argmin(input, axis=None, name=None, dimension=None):
raise ValueError("cannot specify both 'axis' and 'dimension'.") raise ValueError("cannot specify both 'axis' and 'dimension'.")
axis = dimension axis = dimension
elif axis is None: axis = 0 elif axis is None: axis = 0
return dragon.ops.ArgMin(input, axis=axis, name=name) return _ops.ArgMin(input, axis=axis, name=name)
def matmul(a, def matmul(
b, a,
transpose_a=False, b,
transpose_b=False, transpose_a=False,
name=None): transpose_b=False,
return dragon.ops.Matmul([a, b], transA=transpose_a, transB=transpose_b, name=name) name=None,
):
return _ops.Matmul(
[a, b],
transA=transpose_a,
transB=transpose_b,
name=name,
)
def add(x, y, name=None): def add(x, y, name=None):
return dragon.ops.Add([x, y], name=name) return _ops.Add([x, y], name=name)
def subtract(x, y, name=None): def subtract(x, y, name=None):
return dragon.ops.Sub([x, y], name=name) return _ops.Sub([x, y], name=name)
def multiply(x, y, name=None): def multiply(x, y, name=None):
return dragon.ops.Mul([x, y], name=name) return _ops.Mul([x, y], name=name)
def divide(x, y, name=None): def divide(x, y, name=None):
return dragon.ops.Div([x, y], name=name) return _ops.Div([x, y], name=name)
def mul(x, y, name=None): def mul(x, y, name=None):
...@@ -95,27 +78,27 @@ def div(x, y, name=None): ...@@ -95,27 +78,27 @@ def div(x, y, name=None):
def cast(x, dtype, name=None): def cast(x, dtype, name=None):
return dragon.ops.Cast(x, dtype=dtype, name=name) return _ops.Cast(x, dtype=dtype, name=name)
def log(x, name=None): def log(x, name=None):
return dragon.ops.Log(x, name=name) return _ops.Log(x, name=name)
def exp(x, name=None): def exp(x, name=None):
return dragon.ops.Exp(x, name=name) return _ops.Exp(x, name=name)
def square(x, name=None): def square(x, name=None):
return dragon.ops.Square(x, name=name) return _ops.Square(x, name=name)
def sqrt(x, name=None): def sqrt(x, name=None):
return dragon.ops.Pow(x, power=0.5, name=name) return _ops.Pow(x, power=0.5, name=name)
def pow(x, power, name=None): def pow(x, power, name=None):
return dragon.ops.Pow(x, power=power, name=name) return _ops.Pow(x, power=power, name=name)
def reduce_sum( def reduce_sum(
...@@ -123,13 +106,19 @@ def reduce_sum( ...@@ -123,13 +106,19 @@ def reduce_sum(
axis=None, axis=None,
keep_dims=False, keep_dims=False,
name=None, name=None,
reduction_indices=None reduction_indices=None,
): ):
if reduction_indices is not None: if reduction_indices is not None:
if axis is not None: if axis is not None:
raise ValueError("cannot specify both 'axis' and 'reduction_indices'.") raise ValueError(
"Cannot specify both 'axis' and 'reduction_indices'.")
axis = reduction_indices axis = reduction_indices
return dragon.ops.Sum(input_tensor, axes=axis, keep_dims=keep_dims, nama=name) return _ops.Sum(
input_tensor,
axes=axis,
keep_dims=keep_dims,
name=name,
)
def reduce_mean( def reduce_mean(
...@@ -137,22 +126,28 @@ def reduce_mean( ...@@ -137,22 +126,28 @@ def reduce_mean(
axis=None, axis=None,
keep_dims=False, keep_dims=False,
name=None, name=None,
reduction_indices=None reduction_indices=None,
): ):
if reduction_indices is not None: if reduction_indices is not None:
if axis is not None: if axis is not None:
raise ValueError("cannot specify both 'axis' and 'reduction_indices'.") raise ValueError(
"cannot specify both 'axis' and 'reduction_indices'.")
axis = reduction_indices axis = reduction_indices
return dragon.ops.Mean(input_tensor, axes=axis, keep_dims=keep_dims, nama=name) return _ops.Mean(
input_tensor,
axes=axis,
keep_dims=keep_dims,
name=name,
)
def sigmoid(x, name=None): def sigmoid(x, name=None):
return dragon.ops.Sigmoid(x, name=name) return _ops.Sigmoid(x, name=name)
def tanh(x, name=None): def tanh(x, name=None):
return dragon.ops.Tanh(x, name=name) return _ops.Tanh(x, name=name)
def add_n(inputs, name=None): def add_n(inputs, name=None):
return dragon.ops.Eltwise(inputs, operation='SUM', name=name) return _ops.Eltwise(inputs, operation='SUM', name=name)
...@@ -13,34 +13,48 @@ from __future__ import absolute_import ...@@ -13,34 +13,48 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
def batch_normalization(x, def batch_normalization(
mean, x,
variance, mean,
offset, variance,
scale, offset,
decay=0.9, scale,
variance_epsilon=1e-3, decay=0.9,
name=None): variance_epsilon=1e-3,
raise NotImplementedError('Deprecated. Use tf.layer.batch_normalization.') name=None,
):
raise NotImplementedError(
def batch_norm_with_global_normalization(t, 'Deprecated. Use tf.layer.batch_normalization.')
m,
v,
beta, def batch_norm_with_global_normalization(
gamma, t,
decay=0.9, m,
variance_epsilon=1e-3, v,
scale_after_normalization=True, beta,
name=None): gamma,
raise NotImplementedError('Deprecated. Use tf.layer.batch_normalization.') decay=0.9,
variance_epsilon=1e-3,
scale_after_normalization=True,
def l2_normalize(x, name=None,
dim, ):
epsilon=1e-12, raise NotImplementedError(
name=None): 'Deprecated. Use tf.layer.batch_normalization.')
return dragon.ops.L2Norm(x, axis=dim, num_axes=1, eps=epsilon, name=name)
def l2_normalize(
x,
dim,
epsilon=1e-12,
name=None,
):
return _ops.L2Norm(
x,
axis=dim,
num_axes=1,
eps=epsilon,
name=name,
)
...@@ -13,31 +13,18 @@ from __future__ import absolute_import ...@@ -13,31 +13,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from dragon.core.tensor import Tensor
__all__ = [
'convolution',
'relu',
'softmax',
'conv2d',
'conv2d_transpose',
'avg_pool',
'max_pool',
'xw_plus_b',
'bias_add',
'dropout',
'sigmoid_cross_entropy_with_logits',
'softmax_cross_entropy_with_logits',
'sparse_softmax_cross_entropy_with_logits',
'l2_loss',
]
def convolution( def convolution(
input, filter, padding, strides=None, input,
dilation_rate=None, name=None, data_format=None): filter,
padding,
strides=None,
dilation_rate=None,
name=None,
data_format=None,
):
num_total_dims = filter.get_shape().ndims num_total_dims = filter.get_shape().ndims
if num_total_dims is None: if num_total_dims is None:
num_total_dims = input.get_shape().ndims num_total_dims = input.get_shape().ndims
...@@ -71,24 +58,37 @@ def convolution( ...@@ -71,24 +58,37 @@ def convolution(
if num_spatial_dims == 2: if num_spatial_dims == 2:
return conv2d( return conv2d(
input, filter, strides, padding, input,
dilation_rate, data_format, name) filter,
strides,
padding,
dilation_rate,
data_format,
name,
)
else: else:
raise NotImplementedError( raise NotImplementedError(
'conv{}d is not implemented.'.format(num_spatial_dims)) 'conv{}d is not implemented.'.format(num_spatial_dims))
def relu(features, name=None): def relu(features, name=None):
return dragon.ops.Relu(features, name=name) return _ops.Relu(features, name=name)
def softmax(logits, dim=-1, name=None): def softmax(logits, dim=-1, name=None):
return dragon.ops.Softmax(logits, axis=dim, name=name) return _ops.Softmax(logits, axis=dim, name=name)
def conv2d( def conv2d(
input, filter, strides, padding, input,
dilation_rate=None, data_format='NHWC', name=None, **kwargs): filter,
strides,
padding,
dilation_rate=None,
data_format='NHWC',
name=None,
**kwargs
):
"""Compute 2D convolution according to the given 4D ``input`` and ``filter``. """Compute 2D convolution according to the given 4D ``input`` and ``filter``.
For **NHWC** format, filter should be as ``[filter_height, filter_width, in_channels, out_channels]``. For **NHWC** format, filter should be as ``[filter_height, filter_width, in_channels, out_channels]``.
...@@ -130,30 +130,41 @@ def conv2d( ...@@ -130,30 +130,41 @@ def conv2d(
raise ValueError('dilation_rate must be a list with length 4.') raise ValueError('dilation_rate must be a list with length 4.')
if data_format == 'NHWC': if data_format == 'NHWC':
return dragon.ops.Conv2d([input, filter], return _ops.Conv2d(
[input, filter],
num_output=filter.shape[3], num_output=filter.shape[3],
kernel_shape=filter.shape[0:2], kernel_shape=filter.shape[0:2],
strides=strides[1:3], strides=strides[1:3],
dilations=dilation_rate[1:3] if dilation_rate is not None else 1, dilations=dilation_rate[1:3] if dilation_rate is not None else 1,
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
name=name) name=name,
)
elif data_format == 'NCHW': elif data_format == 'NCHW':
return dragon.ops.Conv2d([input, filter], return _ops.Conv2d(
[input, filter],
num_output=filter.shape[0], num_output=filter.shape[0],
kernel_shape=filter.shape[2:4], kernel_shape=filter.shape[2:4],
strides=strides[2:4], strides=strides[2:4],
dilations=dilation_rate[2:4] if dilation_rate is not None else 1, dilations=dilation_rate[2:4] if dilation_rate is not None else 1,
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
name=name) name=name,
)
else: else:
raise ValueError('Unknown data format: {}'.format(data_format)) raise ValueError('Unknown data format: ' + data_format)
def conv2d_transpose( def conv2d_transpose(
value, filter, output_shape, strides, value,
padding='SAME', data_format='NHWC', name=None, **kwargs): filter,
output_shape,
strides,
padding='SAME',
data_format='NHWC',
name=None,
**kwargs
):
"""Compute 2D deconvolution according to the given 4D ``input`` and ``filter``. """Compute 2D deconvolution according to the given 4D ``input`` and ``filter``.
For **NHWC** format, filter should be as ``[filter_height, filter_width, out_channels, in_channels]``. For **NHWC** format, filter should be as ``[filter_height, filter_width, out_channels, in_channels]``.
...@@ -199,28 +210,39 @@ def conv2d_transpose( ...@@ -199,28 +210,39 @@ def conv2d_transpose(
raise ValueError('output_shape should be a list with length 4.') raise ValueError('output_shape should be a list with length 4.')
if data_format == 'NHWC': if data_format == 'NHWC':
return dragon.ops.ConvTranspose2d([value, filter], return _ops.ConvTranspose2d(
[value, filter],
num_output=filter.shape[2], num_output=filter.shape[2],
kernel_shape=filter.shape[0:2], kernel_shape=filter.shape[0:2],
strides=strides[1:3], strides=strides[1:3],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
output_shape=output_shape, output_shape=output_shape,
name=name) name=name,
)
elif data_format == 'NCHW': elif data_format == 'NCHW':
return dragon.ops.Conv2dTranspose([value, filter], return _ops.Conv2dTranspose(
[value, filter],
num_output=filter.shape[1], num_output=filter.shape[1],
kernel_shape=filter.shape[2:4], kernel_shape=filter.shape[2:4],
strides=strides[2:4], strides=strides[2:4],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
output_shape=output_shape, output_shape=output_shape,
name=name) name=name,
)
else: else:
raise ValueError('Unknown data format: {}'.format(data_format)) raise ValueError('Unknown data format: ' + data_format)
def avg_pool(value, ksize, strides, padding, data_format='NHWC', name=None): def avg_pool(
value,
ksize,
strides,
padding,
data_format='NHWC',
name=None,
):
"""Perform avg pooling on spatial axes. """Perform avg pooling on spatial axes.
Parameters Parameters
...@@ -252,31 +274,40 @@ def avg_pool(value, ksize, strides, padding, data_format='NHWC', name=None): ...@@ -252,31 +274,40 @@ def avg_pool(value, ksize, strides, padding, data_format='NHWC', name=None):
if data_format == 'NHWC': if data_format == 'NHWC':
if ksize[0] != 1 or ksize[3] != 1 or strides[0] != 1 or strides[3] != 1: if ksize[0] != 1 or ksize[3] != 1 or strides[0] != 1 or strides[3] != 1:
raise ValueError('The pooling can only be performed on spatial axes.') raise ValueError('The pooling can only be performed on spatial axes.')
return dragon.ops.Pool2d( return _ops.Pool2d(
value, value,
kernel_shape=[ksize[1], ksize[2]], kernel_shape=[ksize[1], ksize[2]],
strides=[strides[1], strides[2]], strides=[strides[1], strides[2]],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
mode='AVG', mode='AVG',
name=name) name=name,
)
if data_format == 'NCHW': if data_format == 'NCHW':
if ksize[0] != 1 or ksize[1] != 1 or strides[0] != 1 or strides[1] != 1: if ksize[0] != 1 or ksize[1] != 1 or strides[0] != 1 or strides[1] != 1:
raise ValueError('The pooling can only be performed on spatial axes.') raise ValueError('The pooling can only be performed on spatial axes.')
return dragon.ops.Pool2d( return _ops.Pool2d(
value, value,
kernel_shape=[ksize[2], ksize[3]], kernel_shape=[ksize[2], ksize[3]],
strides=[strides[2], strides[3]], strides=[strides[2], strides[3]],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
mode='AVG', mode='AVG',
name=name) name=name,
)
else: else:
raise NotImplementedError( raise NotImplementedError(
'Pool{}d has not been implemented yet.'.format(len(ksize) - 2)) 'Pool{}d has not been implemented yet.'.format(len(ksize) - 2))
def max_pool(value, ksize, strides, padding, data_format='NHWC', name=None): def max_pool(
value,
ksize,
strides,
padding,
data_format='NHWC',
name=None,
):
"""Perform max pooling on spatial axes. """Perform max pooling on spatial axes.
Parameters Parameters
...@@ -308,25 +339,27 @@ def max_pool(value, ksize, strides, padding, data_format='NHWC', name=None): ...@@ -308,25 +339,27 @@ def max_pool(value, ksize, strides, padding, data_format='NHWC', name=None):
if data_format == 'NHWC': if data_format == 'NHWC':
if ksize[0] != 1 or ksize[3] != 1 or strides[0] != 1 or strides[3] != 1: if ksize[0] != 1 or ksize[3] != 1 or strides[0] != 1 or strides[3] != 1:
raise ValueError('The pooling can only be performed on spatial axes.') raise ValueError('The pooling can only be performed on spatial axes.')
return dragon.ops.Pool2d( return _ops.Pool2d(
value, value,
kernel_shape=[ksize[1], ksize[2]], kernel_shape=[ksize[1], ksize[2]],
strides=[strides[1], strides[2]], strides=[strides[1], strides[2]],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
mode='MAX', mode='MAX',
name=name) name=name,
)
if data_format == 'NCHW': if data_format == 'NCHW':
if ksize[0] != 1 or ksize[1] != 1 or strides[0] != 1 or strides[1] != 1: if ksize[0] != 1 or ksize[1] != 1 or strides[0] != 1 or strides[1] != 1:
raise ValueError('The pooling can only be performed on spatial axes.') raise ValueError('The pooling can only be performed on spatial axes.')
return dragon.ops.Pool2d( return _ops.Pool2d(
value, value,
kernel_shape=[ksize[2], ksize[3]], kernel_shape=[ksize[2], ksize[3]],
strides=[strides[2], strides[3]], strides=[strides[2], strides[3]],
padding=padding, padding=padding,
data_format=data_format, data_format=data_format,
mode='MAX', mode='MAX',
name=name) name=name,
)
else: else:
raise NotImplementedError( raise NotImplementedError(
'Pool{}d has not been implemented yet.'.format(len(ksize) - 2)) 'Pool{}d has not been implemented yet.'.format(len(ksize) - 2))
...@@ -347,30 +380,63 @@ def xw_plus_b(x, weights, biases, name=None): ...@@ -347,30 +380,63 @@ def xw_plus_b(x, weights, biases, name=None):
if weights.shape[1] != biases.shape[0]: if weights.shape[1] != biases.shape[0]:
raise ValueError('the shape of weights and biaes are incompatible.') raise ValueError('the shape of weights and biaes are incompatible.')
return dragon.ops.FullyConnected([x, weights, biases], num_output=weights.shape[1], transW=False, name=name) return _ops.FullyConnected(
[x, weights, biases],
num_output=weights.shape[1],
transW=False,
name=name,
)
def bias_add(value, bias, data_format='NHWC', name=None): def bias_add(value, bias, data_format='NHWC', name=None):
return dragon.ops.BiasAdd([value, bias], data_format=data_format, name=name) return _ops.BiasAdd(
[value, bias],
data_format=data_format,
name=name,
)
def sigmoid_cross_entropy_with_logits(logits, targets, name=None): def sigmoid_cross_entropy_with_logits(logits, targets, name=None):
return dragon.ops.SigmoidCrossEntropy([logits, targets], normalization='UNIT', name=name) return _ops.SigmoidCrossEntropy(
[logits, targets],
normalization='UNIT',
def softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, dim=-1, name=None): name=name,
return dragon.ops.SoftmaxCrossEntropy([logits, labels], )
axis=dim, normalization='UNIT', name=name)
def softmax_cross_entropy_with_logits(
def sparse_softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, dim=-1, name=None): _sentinel=None,
return dragon.ops.SparseSoftmaxCrossEntropy([logits, labels], labels=None,
axis=dim, normalization='UNIT', name=name) logits=None,
dim=-1,
name=None,
):
return _ops.SoftmaxCrossEntropy(
[logits, labels],
axis=dim,
normalization='UNIT',
name=name,
)
def sparse_softmax_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
dim=-1,
name=None,
):
return _ops.SparseSoftmaxCrossEntropy(
[logits, labels],
axis=dim,
normalization='UNIT',
name=name,
)
def l2_loss(t, name=None): def l2_loss(t, name=None):
return dragon.ops.L2Loss(t, normalization='NONE', name=name) return _ops.L2Loss(t, normalization='NONE', name=name)
def dropout(x, keep_prob, name=None): def dropout(x, keep_prob, name=None):
return dragon.ops.Dropout(x, 1 - keep_prob, name=name) return _ops.Dropout(x, 1. - keep_prob, name=name)
...@@ -13,34 +13,38 @@ from __future__ import absolute_import ...@@ -13,34 +13,38 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import ops as _ops
from dragon.vm.tensorflow.framework import dtypes from dragon.vm.tensorflow.framework import dtypes
__all__ = [
'random_normal',
'truncated_normal',
'random_uniform'
]
def random_normal( def random_normal(
shape, mean=0.0, stddev=1.0, shape,
dtype=dtypes.float32, mean=0.0,
seed=None, name=None): stddev=1.0,
return dragon.ops.RandomNormal(shape, mean, stddev, name=name) dtype=dtypes.float32,
seed=None,
name=None,
):
return _ops.RandomNormal(shape, mean, stddev, name=name)
def truncated_normal( def truncated_normal(
shape, mean=0.0, stddev=1.0, shape,
dtype=dtypes.float32, mean=0.0,
seed=None, name=None): stddev=1.0,
return dragon.ops.TruncatedNormal(shape, mean, stddev, name=name) dtype=dtypes.float32,
seed=None,
name=None,
):
return _ops.TruncatedNormal(shape, mean, stddev, name=name)
def random_uniform( def random_uniform(
shape, minval=0, maxval=None, shape,
dtype=dtypes.float32, minval=0,
seed=None, name=None): maxval=None,
return dragon.ops.RandomUniform(shape, minval, maxval, name=name) dtype=dtypes.float32,
\ No newline at end of file seed=None,
name=None,
):
return _ops.RandomUniform(shape, minval, maxval, name=name)
\ No newline at end of file
...@@ -13,12 +13,13 @@ from __future__ import absolute_import ...@@ -13,12 +13,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon
import threading import threading
from dragon.core import tls as _tls
from dragon.core import scope as _scope
from dragon.vm.tensorflow.framework import dtypes, ops from dragon.vm.tensorflow.framework import dtypes, ops
from dragon.vm.tensorflow.ops.variables import Variable from dragon.vm.tensorflow.ops.variables import Variable
from dragon.vm.tensorflow.framework.ops import _DefaultStack
from dragon.vm.tensorflow.ops import init_ops from dragon.vm.tensorflow.ops import init_ops
...@@ -87,7 +88,8 @@ class VariableScope(object): ...@@ -87,7 +88,8 @@ class VariableScope(object):
""" """
return self._vars return self._vars
def get_variable(self, def get_variable(
self,
name, name,
shape=None, shape=None,
dtype=None, dtype=None,
...@@ -95,7 +97,8 @@ class VariableScope(object): ...@@ -95,7 +97,8 @@ class VariableScope(object):
regularizer=None, regularizer=None,
trainable=True, trainable=True,
collections=None, collections=None,
validate_shape=True): validate_shape=True,
):
excepted_name = self.name + name excepted_name = self.name + name
if not excepted_name in self._vars: if not excepted_name in self._vars:
# Create a new variable # Create a new variable
...@@ -112,7 +115,8 @@ class VariableScope(object): ...@@ -112,7 +115,8 @@ class VariableScope(object):
collections=collections, collections=collections,
validate_shape=validate_shape, validate_shape=validate_shape,
name_from_variable_scope=excepted_name, name_from_variable_scope=excepted_name,
dtype=dtype) dtype=dtype,
)
self._vars[excepted_name] = variable self._vars[excepted_name] = variable
return variable return variable
else: else:
...@@ -132,7 +136,12 @@ class VariableScope(object): ...@@ -132,7 +136,12 @@ class VariableScope(object):
get_variable_scope_store().close() get_variable_scope_store().close()
self._name_scope_ctx.__exit__(type, value, traceback) self._name_scope_ctx.__exit__(type, value, traceback)
def _get_default_initializer(self, name, shape=None, dtype=dtypes.float32): def _get_default_initializer(
self,
name,
shape=None,
dtype=dtypes.float32,
):
# Defaults: float32 # Defaults: float32
if dtype is None: if dtype is None:
dtype = dtypes.float32 dtype = dtypes.float32
...@@ -162,32 +171,35 @@ def variable_scope(name_or_scope, reuse=None, **kwargs): ...@@ -162,32 +171,35 @@ def variable_scope(name_or_scope, reuse=None, **kwargs):
prefix = name_or_scope + '/' if name_or_scope != '' else '' prefix = name_or_scope + '/' if name_or_scope != '' else ''
vs_store = get_variable_scope_store() vs_store = get_variable_scope_store()
vs_name = vs_store.current_scope.name + prefix vs_name = vs_store.current_scope.name + prefix
original_name_scope = dragon.get_default_name_scope() + prefix original_name_scope = _scope.get_default_name_scope() + prefix
vs = VariableScope(reuse, name=vs_name, name_scope=original_name_scope) vs = VariableScope(reuse, name=vs_name, name_scope=original_name_scope)
# Store the ctx manager instead of returning # Store the ctx manager instead of returning
# As we should return a VariableScope # As we should return a VariableScope
vs._name_scope_ctx = dragon.name_scope(name_or_scope) vs._name_scope_ctx = _scope.name_scope(name_or_scope)
return vs return vs
def get_variable(name, def get_variable(
shape=None, name,
dtype=None, shape=None,
initializer=None, dtype=None,
regularizer=None, initializer=None,
trainable=True, regularizer=None,
collections=None, trainable=True,
validate_shape=True, collections=None,
**kwargs): validate_shape=True,
**kwargs
):
return get_variable_scope().get_variable( return get_variable_scope().get_variable(
name, shape=shape, dtype=dtype, name,
initializer=initializer, regularizer=regularizer, shape=shape,
trainable=trainable, collections=collections, dtype=dtype,
validate_shape=validate_shape) initializer=initializer,
regularizer=regularizer,
trainable=trainable,
_GLOBAL_VARIABLE_SCOPE_STORE_KEY = ("__varscope",) collections=collections,
_GLOBAL_VARIABLE_SCOPE_STACK = _DefaultStack() validate_shape=validate_shape,
)
class _VariableScopeStore(threading.local): class _VariableScopeStore(threading.local):
...@@ -220,3 +232,7 @@ def get_variable_scope_store(): ...@@ -220,3 +232,7 @@ def get_variable_scope_store():
def get_variable_scope(): def get_variable_scope():
"""Returns the current variable scope.""" """Returns the current variable scope."""
return get_variable_scope_store().current_scope return get_variable_scope_store().current_scope
_GLOBAL_VARIABLE_SCOPE_STORE_KEY = ("__varscope",)
_GLOBAL_VARIABLE_SCOPE_STACK = _tls.Stack()
\ No newline at end of file
...@@ -14,24 +14,30 @@ from __future__ import division ...@@ -14,24 +14,30 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import copy import copy
import dragon
from dragon.core import scope as _scope
from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.theano.compile import function as _Function
from dragon.vm.tensorflow.framework import ops, constant_op from dragon.vm.tensorflow.framework import ops, constant_op
from dragon.vm.tensorflow.util.deprecation import deprecated from dragon.vm.tensorflow.util.deprecation import deprecated
class Variable(dragon.Tensor): class Variable(_Tensor):
"""Construct a Variable.""" """Construct a Variable."""
def __init__(self, def __init__(
initial_value=None, self,
trainable=True, initial_value=None,
collections=None, trainable=True,
validate_shape=True, collections=None,
name=None, validate_shape=True,
dtype=None, name=None,
regularizer=None, dtype=None,
**kwargs): regularizer=None,
**kwargs
):
super(Variable, self).__init__() super(Variable, self).__init__()
if initial_value is None: if initial_value is None:
...@@ -50,35 +56,39 @@ class Variable(dragon.Tensor): ...@@ -50,35 +56,39 @@ class Variable(dragon.Tensor):
if name is not None: if name is not None:
# Get a known name from the name scope # Get a known name from the name scope
defined_name = dragon.get_default_name_scope() + name defined_name = _scope.get_default_name_scope() + name
else: else:
if 'name_from_variable_scope' in kwargs: if 'name_from_variable_scope' in kwargs:
# Has a name from the variable scope # Has a name from the variable scope
defined_name = kwargs['name_from_variable_scope'] defined_name = kwargs['name_from_variable_scope']
else: else:
# Get a auto name from the name scope # Get a auto name from the name scope
defined_name = dragon.get_default_name_scope() + 'Variable' defined_name = _scope.get_default_name_scope() + 'Variable'
# Set the name explicitly # Set the name explicitly
self.set_name(dragon.workspace.GetDummyName( self.set_name(_workspace.GetDummyName(
defined_name, suffix=':0', domain='Tensor')) defined_name, suffix=':0', domain='Tensor'))
# Initializer # Initializer
if isinstance(initial_value, dragon.Tensor) and \ if isinstance(initial_value, _Tensor) and \
len(initial_value.expressions) == 1: len(initial_value.expressions) == 1:
# From a initializing ops # From a initializing ops
self.shape, self.dtype = initial_value.shape[:], initial_value.dtype self.shape, self.dtype = \
initial_value.shape[:], \
initial_value.dtype
init_expr = copy.deepcopy(initial_value.expressions) init_expr = copy.deepcopy(initial_value.expressions)
for k, v in init_expr.items(): for k, v in init_expr.items():
init_expr[k].output[0] = self.name init_expr[k].output[0] = self.name
self.__init_expr__ = init_expr self.__init_expr__ = init_expr
else: else:
# From a const tensor # From a const tensor
if not isinstance(initial_value, dragon.Tensor): if not isinstance(initial_value, _Tensor):
initial_value = constant_op.constant( initial_value = constant_op.constant(
initial_value, name=name, dtype=dtype) initial_value, name=name, dtype=dtype)
self.set_value(initial_value.get_value()) self.set_value(initial_value.get_value())
self.shape, self.dtype = initial_value.shape, initial_value.dtype self.shape, self.dtype = \
initial_value.shape, \
initial_value.dtype
# Regularizer # Regularizer
self.__regularizer__ = regularizer self.__regularizer__ = regularizer
...@@ -121,8 +131,9 @@ class VariablesInitializer(object): ...@@ -121,8 +131,9 @@ class VariablesInitializer(object):
def run(self): def run(self):
if not hasattr(self, '_init_func'): if not hasattr(self, '_init_func'):
self._init_func = dragon.function(outputs=self.var_list) \ self._init_func = _Function(
if len(self.var_list) > 0 else None outputs=self.var_list) \
if len(self.var_list) > 0 else None
if self._init_func: self._init_func() if self._init_func: self._init_func()
......
...@@ -14,9 +14,10 @@ from __future__ import division ...@@ -14,9 +14,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import dragon import numpy
import numpy as np
from dragon.ops import Run as _RunOp
from dragon.core import workspace as _workspace
from dragon.vm.tensorflow.framework import ops from dragon.vm.tensorflow.framework import ops
...@@ -25,11 +26,11 @@ class _DecayBase(object): ...@@ -25,11 +26,11 @@ class _DecayBase(object):
self.param_str = '' self.param_str = ''
def set(self, tensor, value, dtype=None): def set(self, tensor, value, dtype=None):
dragon.workspace.FeedTensor(tensor, _workspace.FeedTensor(tensor,
value, dtype=dtype, force_cpu=True) value, dtype=dtype, force_cpu=True)
def get(self, tensor): def get(self, tensor):
return dragon.workspace.FetchTensor(tensor) return _workspace.FetchTensor(tensor)
class _PiecewiseConstant(_DecayBase): class _PiecewiseConstant(_DecayBase):
...@@ -120,8 +121,9 @@ class _CosineDecayRestarts(_DecayBase): ...@@ -120,8 +121,9 @@ class _CosineDecayRestarts(_DecayBase):
def run(self, inputs, outputs): def run(self, inputs, outputs):
gs = self.get(inputs[0]) gs = self.get(inputs[0])
global_step = min(gs - self.last_steps, self.decay_steps) global_step = gs - self.last_steps
cosine_decay = 0.5 * (1 + math.cos(math.pi * global_step / self.decay_steps)) cosine_decay = 0.5 * (1. + math.cos(
math.pi * global_step / self.decay_steps))
decayed = (1. - self.alpha) * cosine_decay + self.alpha decayed = (1. - self.alpha) * cosine_decay + self.alpha
new_lr = self.learning_rate * decayed new_lr = self.learning_rate * decayed
# Restarts # Restarts
...@@ -132,94 +134,122 @@ class _CosineDecayRestarts(_DecayBase): ...@@ -132,94 +134,122 @@ class _CosineDecayRestarts(_DecayBase):
self.set(outputs[0], new_lr, dtype='float32') self.set(outputs[0], new_lr, dtype='float32')
def piecewise_constant(x, boundaries, values, name=None): def piecewise_constant(
x,
boundaries,
values,
name=None,
):
if len(values) != len(boundaries) + 1: if len(values) != len(boundaries) + 1:
raise ValueError('Excepted {} values, got {}.'.format( raise ValueError('Excepted {} values, got {}.'.format(
len(boundaries) + 1, len(values))) len(boundaries) + 1, len(values)))
lr = dragon.ops.Run([ops.convert_to_tensor(x)], lr = _RunOp(
module=__name__, inputs=[ops.convert_to_tensor(x)],
op='_PiecewiseConstant', module=__name__,
param_str=str({ op='_PiecewiseConstant',
'boundaries': boundaries, param_str=str({
'values': values}), 'boundaries': boundaries,
name=name) 'values': values,
lr.set_value(np.array(values[0], dtype=np.float32)) }),
name=name,
)
lr.set_value(numpy.array(values[0], dtype='float32'))
return lr return lr
def exponential_decay(learning_rate, def exponential_decay(
global_step, learning_rate,
decay_steps, global_step,
decay_rate, decay_steps,
staircase=False, decay_rate,
name=None): staircase=False,
lr = dragon.ops.Run([ops.convert_to_tensor(global_step)], name=None,
module=__name__, ):
op='_ExponentialDecay', lr = _RunOp(
param_str=str({ inputs=[ops.convert_to_tensor(global_step)],
'learning_rate': learning_rate, module=__name__,
'decay_steps': decay_steps, op='_ExponentialDecay',
'decay_rate': decay_rate, param_str=str({
'staircase': staircase}), 'learning_rate': learning_rate,
name=name) 'decay_steps': decay_steps,
lr.set_value(np.array(learning_rate, dtype=np.float32)) 'decay_rate': decay_rate,
'staircase': staircase,
}),
name=name,
)
lr.set_value(numpy.array(learning_rate, dtype='float32'))
return lr return lr
def natural_exp_decay(learning_rate, def natural_exp_decay(
global_step, learning_rate,
decay_steps, global_step,
decay_rate, decay_steps,
staircase=False, decay_rate,
name=None): staircase=False,
lr = dragon.ops.Run([ops.convert_to_tensor(global_step)], name=None,
module=__name__, ):
op='_NaturalExpDecay', lr = _RunOp(
param_str=str({ inputs=[ops.convert_to_tensor(global_step)],
'learning_rate': learning_rate, module=__name__,
'decay_steps': decay_steps, op='_NaturalExpDecay',
'decay_rate': decay_rate, param_str=str({
'staircase': staircase}), 'learning_rate': learning_rate,
name=name) 'decay_steps': decay_steps,
lr.set_value(np.array(learning_rate, dtype=np.float32)) 'decay_rate': decay_rate,
'staircase': staircase,
}),
name=name,
)
lr.set_value(numpy.array(learning_rate, dtype='float32'))
return lr return lr
def cosine_decay(learning_rate, def cosine_decay(
global_step, learning_rate,
decay_steps, global_step,
alpha=0.0, decay_steps,
name=None): alpha=0.0,
lr = dragon.ops.Run([ops.convert_to_tensor(global_step)], name=None,
module=__name__, ):
op='_CosineDecay', lr = _RunOp(
param_str=str({ inputs=[ops.convert_to_tensor(global_step)],
'learning_rate': learning_rate, module=__name__,
'decay_steps': decay_steps, op='_CosineDecay',
'alpha': alpha}), param_str=str({
name=name) 'learning_rate': learning_rate,
lr.set_value(np.array(learning_rate, dtype=np.float32)) 'decay_steps': decay_steps,
'alpha': alpha,
}),
name=name,
)
lr.set_value(numpy.array(learning_rate, dtype='float32'))
return lr return lr
def cosine_decay_restarts(learning_rate, def cosine_decay_restarts(
global_step, learning_rate,
first_decay_steps, global_step,
t_mul=2.0, first_decay_steps,
m_mul=1.0, t_mul=2.0,
alpha=0.0, m_mul=1.0,
name=None): alpha=0.0,
lr = dragon.ops.Run([ops.convert_to_tensor(global_step)], name=None,
module=__name__, ):
op='_CosineDecayRestarts', lr = _RunOp(
param_str=str({ inputs=[ops.convert_to_tensor(global_step)],
'learning_rate': learning_rate, module=__name__,
'first_decay_steps': first_decay_steps, op='_CosineDecayRestarts',
't_mul': t_mul, param_str=str({
'm_mul': m_mul, 'learning_rate': learning_rate,
'alpha': alpha}), 'first_decay_steps': first_decay_steps,
name=name) 't_mul': t_mul,
lr.set_value(np.array(learning_rate, dtype=np.float32)) 'm_mul': m_mul,
'alpha': alpha
}),
name=name,
)
lr.set_value(numpy.array(learning_rate, dtype='float32'))
return lr return lr
......
...@@ -13,10 +13,13 @@ from __future__ import absolute_import ...@@ -13,10 +13,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon from dragon import updaters as _updaters
from dragon.core import workspace as _workspace
from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.tensorflow.framework import ops from dragon.vm.tensorflow.framework import ops
from dragon.vm.tensorflow.ops import variables from dragon.vm.tensorflow.ops import variables
from dragon.vm.tensorflow.ops.gradients_impl import gradients
class Optimizer(object): class Optimizer(object):
...@@ -34,16 +37,16 @@ class Optimizer(object): ...@@ -34,16 +37,16 @@ class Optimizer(object):
self.updater = self.train = self.update = None self.updater = self.train = self.update = None
def _set_dynamic_lr(self, learning_rate): def _set_dynamic_lr(self, learning_rate):
if isinstance(learning_rate, dragon.Tensor): if isinstance(learning_rate, _Tensor):
self._targets.append(learning_rate) self._targets.append(learning_rate)
internal_lr = self.updater._slot + '/base_lr' internal_lr = self.updater._slot + '/base_lr'
dragon.workspace.SetTensorAlias(learning_rate.name, internal_lr) _workspace.SetTensorAlias(learning_rate, internal_lr)
self.updater.base_lr = float(learning_rate.get_value()) self.updater.base_lr = float(learning_rate.get_value())
def _inc_global_step(self): def _inc_global_step(self):
if self._global_step is not None: if self._global_step is not None:
gs = self._global_step.get_value() v = self._global_step.get_value() + 1
self._global_step.set_value((gs + 1).astype(gs.dtype)) _workspace.FeedTensor(self._global_step, v, True)
def get_name(self): def get_name(self):
return self._name return self._name
...@@ -57,7 +60,7 @@ class Optimizer(object): ...@@ -57,7 +60,7 @@ class Optimizer(object):
if var_list is None: if var_list is None:
var_list = variables.trainable_variables() + \ var_list = variables.trainable_variables() + \
ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES) ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
grads = dragon.grad(loss, var_list) grads = gradients(loss, var_list)
grads_and_vars = list(zip(grads, var_list)) grads_and_vars = list(zip(grads, var_list))
return grads_and_vars return grads_and_vars
...@@ -95,34 +98,66 @@ class Optimizer(object): ...@@ -95,34 +98,66 @@ class Optimizer(object):
class GradientDescentOptimizer(Optimizer): class GradientDescentOptimizer(Optimizer):
def __init__(self, learning_rate, use_locking=False, name='GradientDescent'): def __init__(
self,
learning_rate,
use_locking=False,
name='GradientDescent',
):
super(GradientDescentOptimizer, self).__init__(use_locking, name) super(GradientDescentOptimizer, self).__init__(use_locking, name)
self.updater = dragon.updaters.SGDUpdater(learning_rate, 0.0) self.updater = _updaters.SGDUpdater(learning_rate, 0.)
self._set_dynamic_lr(learning_rate) self._set_dynamic_lr(learning_rate)
class MomentumOptimizer(Optimizer): class MomentumOptimizer(Optimizer):
def __init__(self, learning_rate, momentum, def __init__(
use_locking=False, name='Momentum', use_nesterov=False): self,
learning_rate,
momentum,
use_locking=False,
name='Momentum',
use_nesterov=False,
):
super(MomentumOptimizer, self).__init__(use_locking, name) super(MomentumOptimizer, self).__init__(use_locking, name)
if not use_nesterov: if not use_nesterov:
self.updater = dragon.updaters.SGDUpdater(learning_rate, momentum) self.updater = _updaters.SGDUpdater(learning_rate, momentum)
else: else:
self.updater = dragon.updaters.NesterovUpdater(learning_rate, momentum) self.updater = _updaters.NesterovUpdater(learning_rate, momentum)
self._set_dynamic_lr(learning_rate) self._set_dynamic_lr(learning_rate)
class AdamOptimizer(Optimizer): class AdamOptimizer(Optimizer):
def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, def __init__(
use_locking=False, name='Adam'): self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8,
use_locking=False,
name='Adam',
):
super(AdamOptimizer, self).__init__(use_locking, name) super(AdamOptimizer, self).__init__(use_locking, name)
self.updater = dragon.updaters.AdamUpdater(learning_rate, beta1, beta2, epsilon) self.updater = _updaters.AdamUpdater(
learning_rate, beta1, beta2, epsilon)
self._set_dynamic_lr(learning_rate) self._set_dynamic_lr(learning_rate)
class RMSPropOptimizer(Optimizer): class RMSPropOptimizer(Optimizer):
def __init__(self, learning_rate, decay=0.9, momentum=0.0, epsilon=1e-10, def __init__(
use_locking=False, centered=False, name='RMSProp'): self,
learning_rate,
decay=0.9,
momentum=0.0,
epsilon=1e-10,
use_locking=False,
centered=False,
name='RMSProp',
):
super(RMSPropOptimizer, self).__init__(use_locking, name) super(RMSPropOptimizer, self).__init__(use_locking, name)
self.updater = dragon.updaters.RMSPropUpdater(learning_rate, decay, epsilon) if momentum > 0.:
self.updater = _updaters.AdamUpdater(
learning_rate, momentum, decay, epsilon)
else:
self.updater = _updaters.RMSPropUpdater(
learning_rate, decay, epsilon)
self._set_dynamic_lr(learning_rate) self._set_dynamic_lr(learning_rate)
\ No newline at end of file
...@@ -13,14 +13,14 @@ from __future__ import absolute_import ...@@ -13,14 +13,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.tensorflow.training.optimizer import ( from .optimizer import (
GradientDescentOptimizer, GradientDescentOptimizer,
MomentumOptimizer, MomentumOptimizer,
RMSPropOptimizer, RMSPropOptimizer,
AdamOptimizer, AdamOptimizer,
) )
from dragon.vm.tensorflow.training.learning_rate_decay import ( from .learning_rate_decay import (
piecewise_constant, piecewise_constant,
piecewise_constant_decay, piecewise_constant_decay,
exponential_decay, exponential_decay,
......
...@@ -9,9 +9,12 @@ ...@@ -9,9 +9,12 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .compile import ( from __future__ import absolute_import
function, from __future__ import division
scan, from __future__ import print_function
shared)
from .configdefaults import config from dragon.vm.theano import tensor
\ No newline at end of file from dragon.vm.theano.compile import scan
from dragon.vm.theano.compile import shared
from dragon.vm.theano.compile import function
from dragon.vm.theano.configdefaults import config
...@@ -9,24 +9,27 @@ ...@@ -9,24 +9,27 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import copy import copy
import numpy as np
import dragon.core.mpi as mpi
import dragon.core.workspace as ws
import dragon.core.logging as logging
import dragon.proto.dragon_pb2 as pb
from dragon.core.proto_utils import MakeArgument from dragon import config as _cfg
from dragon.core.helper import OperatorHelper from dragon.core.tensor import Tensor as _Tensor
from dragon.core.gradient_maker import GraphGradientMaker from dragon.core import mpi as _mpi
from dragon.core.scope import get_default_phase from dragon.core import scope as _scope
from dragon.core.tensor import Tensor from dragon.core import helper as _helper
from dragon.core import logging as _logging
from dragon.core import workspace as _workspace
from dragon.proto import dragon_pb2 as _proto_def
from dragon.core import proto_utils as _proto_utils
from dragon.core import gradient_maker as _gradient_maker
def GraphDef_Grad(graph_def, targets): def _inject_gradients(graph_def, targets):
"""Inject the gradient targets into GraphDef. """Inject the gradients into GraphDef.
Parameters Parameters
---------- ----------
...@@ -44,18 +47,18 @@ def GraphDef_Grad(graph_def, targets): ...@@ -44,18 +47,18 @@ def GraphDef_Grad(graph_def, targets):
`T.grad(*args, **kwargs)`_ - How the generate gradient targets. `T.grad(*args, **kwargs)`_ - How the generate gradient targets.
""" """
all_pairs = set() gradients = set()
for target in targets: for target in targets:
all_pairs.update(target.gradient.make_pairs()) gradients.update(target.gradient.make_pairs())
for pair in all_pairs: for (cost, wrt) in gradients:
gradient = pb.GradientProto() gradient = _proto_def.GradientProto()
gradient.cost, gradient.wrt = str(pair[0]), str(pair[1]) gradient.cost, gradient.wrt = str(cost), str(wrt)
graph_def.gradient.extend([gradient]) graph_def.gradient.extend([gradient])
def GraphDef_Phase(graph_def, targets): def _inject_phase(graph_def, targets):
"""Inject the phase into GraphDef. """Inject the phase info into GraphDef.
If existing gradients, we assume it should be ``TRAIN``, and vice versa. If existing gradients, we assume it should be ``TRAIN``, and vice versa.
...@@ -71,18 +74,20 @@ def GraphDef_Phase(graph_def, targets): ...@@ -71,18 +74,20 @@ def GraphDef_Phase(graph_def, targets):
None None
""" """
phase = get_default_phase() phase = _scope.get_default_phase()
if phase is None: if phase is None:
phase = 'TEST' phase = 'TEST'
for target in targets: for target in targets:
if target.gradient.required(): if target.gradient.required():
phase = 'TRAIN' phase = 'TRAIN'
break break
graph_def.arg.extend([MakeArgument('phase', phase)]) graph_def.arg.extend([
_proto_utils.MakeArgument(
'phase', phase)])
def GraphDef_Update(graph_def, updater): def _inject_update_ops(graph_def, updater):
"""Inject the update targets into GraphDef. """Inject the update ops GraphDef.
The ``updater`` should generate update targets before. The ``updater`` should generate update targets before.
...@@ -99,43 +104,61 @@ def GraphDef_Update(graph_def, updater): ...@@ -99,43 +104,61 @@ def GraphDef_Update(graph_def, updater):
""" """
if updater is None: return if updater is None: return
updater.register_in_workspace()
grads, update_ops = [], []
extra_arguments = updater._extra_kwargs extra_arguments = updater._extra_kwargs
extra_arguments['slot'] = updater._slot extra_arguments['slot'] = updater._slot
parallel_arguments = {}
updater.register_in_workspace()
# Check data parallel if necessary
if mpi.Is_Init():
idx, group = mpi.AllowParallel()
if idx != -1:
parallel_arguments['parallel_mode'] = mpi.GetParallelMode()
parallel_arguments['comm'], parallel_arguments['group'] \
= mpi.CreateGroup(root=group[0], incl=group)
parallel_arguments['root'] = group[0]
for k, v in parallel_arguments.items():
graph_def.arg.add().CopyFrom(MakeArgument(k, v))
# Build update ops according to the updater
for e in updater._param_group: for e in updater._param_group:
pair, arguments = e (param, grad), arguments = e
kwargs = dict(arguments, **extra_arguments) if _workspace.HasTensor(grad):
u_target = pb.UpdaterProto() grads.append(grad)
u_target.type = updater.type() arguments = dict(arguments, **extra_arguments)
u_target.name = OperatorHelper.get_name() update_ops.append(
u_target.tensor.extend(pair) _proto_utils.
for k, v in kwargs.items(): MakeOperatorDef(
u_target.arg.add().CopyFrom(MakeArgument(k, v)) op_type=updater.type(),
graph_def.updater.extend([u_target]) inputs=[grad],
outputs=[param],
name=_helper.OperatorHelper.get_name(),
**arguments
)
)
else:
_logging.info('Skip to update Tensor({}).'.format(param))
def GraphDef_Opt(graph_def): # Check data parallel if necessary
"""Inject the optimization options into GraphDef. if _mpi.Is_Init():
(rank, group), arguments = _mpi.AllowParallel(), {}
if rank != -1:
arguments['parallel_mode'] = _mpi.GetParallelMode()
arguments['root'], (arguments['comm'], arguments['group']) \
= group[0], _mpi.CreateGroup(root=group[0], incl=group)
update_ops.insert(
0, _proto_utils.
MakeOperatorDef(
op_type='CollectiveUpdate',
inputs=grads,
outputs=grads,
name=_helper.OperatorHelper.get_name(),
**arguments
)
)
graph_def.op.extend(update_ops)
def _inject_optimization(graph_def, opt_level=None):
"""Inject the optimization info into GraphDef.
Parameters Parameters
---------- ----------
graph_def : GraphDef graph_def : GraphDef
The definition of graph. The definition of graph.
opt_level : int, optional
The optimization level.
Returns Returns
------- -------
...@@ -148,15 +171,19 @@ def GraphDef_Opt(graph_def): ...@@ -148,15 +171,19 @@ def GraphDef_Opt(graph_def):
`memonger.share_grads(*args, **kwargs)`_ - How the enable gradients sharing. `memonger.share_grads(*args, **kwargs)`_ - How the enable gradients sharing.
""" """
from dragon.config import option options = _cfg.GetGlobalOptions()
OX = option['graph_optimization_level'] if opt_level is None:
if not option['share_grads'] and OX >= 3: OX = 2 opt_level = options['graph_optimization_level']
graph_def.arg.add().CopyFrom(MakeArgument('optimization_level', OX)) if not options['share_grads'] and \
graph_def.graph_type = option['graph_type'] opt_level >= 3: opt_level = 2
graph_def.arg.add().CopyFrom(
_proto_utils.MakeArgument(
'optimization_level', opt_level))
graph_def.graph_type = options['graph_type']
def GraphDef_Device(graph_def): def _inject_device(graph_def):
"""Inject the device option into GraphDef. """Inject the device info into GraphDef.
Parameters Parameters
---------- ----------
...@@ -176,13 +203,13 @@ def GraphDef_Device(graph_def): ...@@ -176,13 +203,13 @@ def GraphDef_Device(graph_def):
`config.SetRandomSeed(*args, **kwargs)`_ - How to set random seed. `config.SetRandomSeed(*args, **kwargs)`_ - How to set random seed.
""" """
from dragon.config import option options = _cfg.GetGlobalOptions()
if option['device'] is not 'None': if options['device'] is not 'none':
supports = {'cpu': 0, 'cuda': 1, 'cnml': 2} supports = {'cpu': 0, 'cuda': 1, 'cnml': 2}
device_option = pb.DeviceOption() device_option = _proto_def.DeviceOption()
device_option.device_type = supports[option['device']] device_option.device_type = supports[options['device']]
device_option.device_id = option['device_id'] device_option.device_id = options['device_id']
device_option.random_seed = option['random_seed'] device_option.random_seed = options['random_seed']
graph_def.device_option.CopyFrom(device_option) graph_def.device_option.CopyFrom(device_option)
...@@ -194,7 +221,7 @@ class Function(object): ...@@ -194,7 +221,7 @@ class Function(object):
""" """
def __init__(self, name=None): def __init__(self, name=None):
self.callback = None self.callback = None
self.meta_graph = pb.GraphDef() self.meta_graph = _proto_def.GraphDef()
self.meta_graph.name = name if name else 'Graph' self.meta_graph.name = name if name else 'Graph'
self.graph_name = None # Determined after creating self.graph_name = None # Determined after creating
...@@ -237,7 +264,7 @@ class Function(object): ...@@ -237,7 +264,7 @@ class Function(object):
external_input_expressions = {} external_input_expressions = {}
# Extract new ops # Extract new ops
for old_tensor, new_tensor in givens.items(): for old_tensor, new_tensor in givens.items():
if isinstance(new_tensor, Tensor): if isinstance(new_tensor, _Tensor):
name_dict[old_tensor.name] = new_tensor.name name_dict[old_tensor.name] = new_tensor.name
external_input_expressions.update(new_tensor.expressions) external_input_expressions.update(new_tensor.expressions)
else: else:
...@@ -259,7 +286,8 @@ class Function(object): ...@@ -259,7 +286,8 @@ class Function(object):
targets = [output.name for output in outputs] targets = [output.name for output in outputs]
targets.extend(all_extra_targets) targets.extend(all_extra_targets)
forward_ops, grad_ops, _ = \ forward_ops, grad_ops, _ = \
GraphGradientMaker.Make(forward_ops, targets) _gradient_maker.GraphGradientMaker \
.Make(forward_ops, targets)
else: else:
grad_ops = [] grad_ops = []
...@@ -276,26 +304,29 @@ class Function(object): ...@@ -276,26 +304,29 @@ class Function(object):
self.inputs, self.outputs = inputs, outputs self.inputs, self.outputs = inputs, outputs
# Write Misc # Inject arguments based on global options
if len(outputs) > 0: if len(outputs) > 0:
GraphDef_Device(meta_graph) _inject_device(meta_graph)
GraphDef_Opt(meta_graph) _inject_optimization(meta_graph)
GraphDef_Grad(meta_graph, outputs) _inject_gradients(meta_graph, outputs)
GraphDef_Phase(meta_graph, outputs) _inject_phase(meta_graph, outputs)
elif updater is not None: elif updater is not None:
GraphDef_Device(meta_graph) _inject_device(meta_graph)
GraphDef_Opt(meta_graph) _inject_optimization(meta_graph, opt_level=0)
GraphDef_Update(meta_graph, updater) _inject_update_ops(meta_graph, updater)
# Call c api to create graph # Call c api to create graph
self.graph_name = ws.CreateGraph(meta_graph) self.graph_name = _workspace.CreateGraph(meta_graph)
# Bind a lambda callback to run this graph # Bind a lambda callback to run this graph
self.callback = lambda *args, **kwargs: \ self.callback = lambda *args, **kwargs: \
ws.RunGraph(self.graph_name, (inputs, args), outputs, **kwargs) _workspace.RunGraph(
graph_name=self.graph_name,
inputs=(inputs, args),
outputs=outputs, **kwargs)
# Self return # Return the self
return self return self
def export_to(self, name=None, export_dir='./'): def export_to(self, name=None, export_dir='./'):
...@@ -320,7 +351,7 @@ class Function(object): ...@@ -320,7 +351,7 @@ class Function(object):
meta_graph_copy.name = self.meta_graph.name if name is None else name meta_graph_copy.name = self.meta_graph.name if name is None else name
file = os.path.join(export_dir, meta_graph_copy.name + '.metatxt') file = os.path.join(export_dir, meta_graph_copy.name + '.metatxt')
with open(file, 'w') as f: f.write(str(meta_graph_copy)) with open(file, 'w') as f: f.write(str(meta_graph_copy))
logging.info('Export meta graph into: {}'.format(file)) _logging.info('Export meta graph into: {}'.format(file))
def import_from(self, graph_def, explicit_inputs=False): def import_from(self, graph_def, explicit_inputs=False):
"""Import the defined function from a graph def. """Import the defined function from a graph def.
...@@ -342,25 +373,28 @@ class Function(object): ...@@ -342,25 +373,28 @@ class Function(object):
The self. The self.
""" """
self.inputs = [Tensor(name=input).Variable() for input in graph_def.input] self.inputs = [_Tensor(input).Variable() for input in graph_def.input]
self.outputs = [Tensor(name=output) for output in graph_def.output] self.outputs = [_Tensor(output) for output in graph_def.output]
GraphDef_Device(graph_def) _inject_device(graph_def)
GraphDef_Opt(graph_def) _inject_optimization(graph_def)
GraphDef_Phase(graph_def, self.outputs) _inject_phase(graph_def, self.outputs)
# Store for future development # Store for future development
self.meta_graph = graph_def self.meta_graph = graph_def
# Call c api to create graph # Call c api to create graph
self.graph_name = ws.CreateGraph(graph_def) self.graph_name = _workspace.CreateGraph(graph_def)
# Bind a lambda callback to run this graph # Bind a lambda callback to run this graph
callback_inputs = self.inputs if explicit_inputs else [] callback_inputs = self.inputs if explicit_inputs else []
self.callback = lambda *args, **kwargs: \ self.callback = lambda *args, **kwargs: \
ws.RunGraph(self.graph_name, (callback_inputs, args), self.outputs, **kwargs) _workspace.RunGraph(
self.graph_name,
(callback_inputs, args),
self.outputs, **kwargs)
# Self return # Return self
return self return self
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
...@@ -396,16 +430,17 @@ def function(inputs=None, outputs=None, givens=None, updater=None): ...@@ -396,16 +430,17 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
Examples Examples
-------- --------
>>> x = Tensor('x', dtype='float32').Variable() >>> import numpy, dragon
>>> x = dragon.Tensor('x', dtype='float32').Variable()
>>> y = x * 2 >>> y = x * 2
>>> f = function(outputs=y) >>> f = function(outputs=y)
>>> x.set_value(np.ones((2, 3))) >>> x.set_value(numpy.ones((2, 3)))
>>> print(f()) >>> print(f())
>>> [[ 2. 2. 2.] >>> [[ 2. 2. 2.]
[ 2. 2. 2.]] [ 2. 2. 2.]]
>>> f = function(inputs=x, outputs=y) >>> f = function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3))) >>> print(f(numpy.ones((2, 3)))
>>> [[ 2. 2. 2.] >>> [[ 2. 2. 2.]
[ 2. 2. 2.]] [ 2. 2. 2.]]
......
...@@ -9,8 +9,11 @@ ...@@ -9,8 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
import dragon as dg from __future__ import division
from __future__ import print_function
from dragon.core.tensor import Tensor as _Tensor
def shared(value, name=None, **kwargs): def shared(value, name=None, **kwargs):
...@@ -18,10 +21,10 @@ def shared(value, name=None, **kwargs): ...@@ -18,10 +21,10 @@ def shared(value, name=None, **kwargs):
Parameters Parameters
---------- ----------
value : number, list or numpy.ndarray value : number, sequence or numpy.ndarray
The numerical values. The numerical values.
name : str name : str, optional
The name of tensor. The optional name
Returns Returns
------- -------
...@@ -29,8 +32,4 @@ def shared(value, name=None, **kwargs): ...@@ -29,8 +32,4 @@ def shared(value, name=None, **kwargs):
The initialized tensor. The initialized tensor.
""" """
if not isinstance(value, (int, float, list, np.ndarray)): return _Tensor(name).set_value(value)
raise TypeError("Unsupported type of value: {}".format(type(value))) \ No newline at end of file
tensor = dg.Tensor(name).Variable()
dg.workspace.FeedTensor(tensor, value)
return tensor
\ No newline at end of file
...@@ -9,6 +9,11 @@ ...@@ -9,6 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class TheanoConfig(object): class TheanoConfig(object):
floatX = 'float32' floatX = 'float32'
......
...@@ -9,7 +9,12 @@ ...@@ -9,7 +9,12 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import dragon as dg from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.tensor import Tensor as _Tensor
from dragon.ops import StopGradient as _StopGradientOp
def grad(cost, wrt, **kwargs): def grad(cost, wrt, **kwargs):
...@@ -44,7 +49,7 @@ def grad(cost, wrt, **kwargs): ...@@ -44,7 +49,7 @@ def grad(cost, wrt, **kwargs):
for w in wrt: for w in wrt:
cost.gradient.add_wrt(w.name) cost.gradient.add_wrt(w.name)
w.gradient.add_cost(cost) w.gradient.add_cost(cost)
grads.append(dg.Tensor.Ref( grads.append(_Tensor.Ref(
name=w.name + '_grad', name=w.name + '_grad',
shape=w.shape, dtype=w.dtype)) shape=w.shape, dtype=w.dtype))
if len(grads) == 1: return grads[0] if len(grads) == 1: return grads[0]
...@@ -67,4 +72,4 @@ def disconnected_grad(x): ...@@ -67,4 +72,4 @@ def disconnected_grad(x):
The identity of input. The identity of input.
""" """
return dg.ops.StopGradient(x) return _StopGradientOp(x)
...@@ -9,9 +9,11 @@ ...@@ -9,9 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .basic import * from __future__ import absolute_import
from .extra_ops import * from __future__ import division
from __future__ import print_function
from . import nnet from dragon.vm.theano.tensor.basic import *
from dragon.vm.theano.tensor.extra_ops import *
from ..gradient import grad, disconnected_grad from dragon.vm.theano.tensor import nnet
\ No newline at end of file from dragon.vm.theano.gradient import grad, disconnected_grad
\ No newline at end of file
...@@ -9,21 +9,13 @@ ...@@ -9,21 +9,13 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.tensor import Tensor from dragon import ops as _ops
import dragon.ops as ops from dragon.core.tensor import Tensor as _Tensor
from dragon.vm.theano.configdefaults import config as _cfg
from ..configdefaults import config
_DATA_TYPES = {
'int32': np.int32,
'int64': np.int64,
'uint8': np.uint8,
'float16': np.float16,
'float32': np.float32,
'float64': np.float64,
}
def scalar(name=None, dtype=None): def scalar(name=None, dtype=None):
...@@ -44,8 +36,8 @@ def scalar(name=None, dtype=None): ...@@ -44,8 +36,8 @@ def scalar(name=None, dtype=None):
The scalar variable. The scalar variable.
""" """
if dtype is None: dtype = config.floatX if dtype is None: dtype = _cfg.floatX
return Tensor(name=name, dtype=dtype) return _Tensor(name=name, dtype=dtype)
def iscalar(name=None): def iscalar(name=None):
...@@ -65,43 +57,10 @@ def iscalar(name=None): ...@@ -65,43 +57,10 @@ def iscalar(name=None):
return scalar(name, 'int32') return scalar(name, 'int32')
def constant(x, name=None, shape=None, dtype=None):
"""Initialize a tensor with constant value.
If dtype is ``None``, use ``config.floatX``.
Parameters
----------
x : basic numerical type
The constant value.
name : str, optional
The name of Tensor.
shape : sequence of int, optional
The shape of Tensor.
dtype : str, optional
The data type of Tensor.
Returns
-------
Tensor
The initialized tensor.
"""
if dtype is None: dtype = config.floatX
else:
if dtype not in _DATA_TYPES.keys():
raise TypeError("Unsupported data type: {}".format(dtype))
if shape is None: shape = ()
np_value = x * np.ones(shape, dtype=_DATA_TYPES[dtype])
output = Tensor(name=name, shape=shape, dtype=dtype)
output.set_value(np_value)
return output
def zeros(shape, dtype=None): def zeros(shape, dtype=None):
"""Initialize a tensor with zeros. """Initialize a tensor with zeros.
If dtype is ``None``, use ``config.floatX``. If dtype is *None*, use *config.floatX*.
Parameters Parameters
---------- ----------
...@@ -116,14 +75,8 @@ def zeros(shape, dtype=None): ...@@ -116,14 +75,8 @@ def zeros(shape, dtype=None):
The initialized tensor. The initialized tensor.
""" """
if dtype is None: dtype = config.floatX if dtype is None: dtype = _cfg.floatX
else: return _ops.Fill(shape=shape, value=0, dtype=dtype)
if dtype not in _DATA_TYPES.keys():
raise TypeError("Unsupported data type: {}".format(dtype))
np_value = np.zeros(shape, dtype=_DATA_TYPES[dtype])
output = Tensor(shape=shape, dtype=dtype)
output.set_value(np_value)
return output
def zeros_like(model, dtype=None, **kwargs): def zeros_like(model, dtype=None, **kwargs):
...@@ -131,13 +84,13 @@ def zeros_like(model, dtype=None, **kwargs): ...@@ -131,13 +84,13 @@ def zeros_like(model, dtype=None, **kwargs):
The values can be access only after the run of graph. The values can be access only after the run of graph.
If dtype is ``None``, use ``config.floatX``. If dtype is *None*, use *config.floatX*.
Parameters Parameters
---------- ----------
model : Tensor model : Tensor
The tensor to refer shape. The tensor to refer shape.
dtype : str dtype : str, optional
The data type of Tensor. The data type of Tensor.
Returns Returns
...@@ -146,16 +99,13 @@ def zeros_like(model, dtype=None, **kwargs): ...@@ -146,16 +99,13 @@ def zeros_like(model, dtype=None, **kwargs):
The initialized tensor. The initialized tensor.
""" """
if dtype is None: dtype = config.floatX return zeros(shape=model.shape, dtype=dtype)
else:
raise TypeError("Unsupported data type: {}".format(dtype))
return ops.Fill(shape=ops.Shape(model), value=0)
def ones(shape, dtype=None): def ones(shape, dtype=None):
"""Initialize a tensor with ones. """Initialize a tensor with ones.
If dtype is ``None``, use ``config.floatX``. If dtype is *None*, use *config.floatX*.
Parameters Parameters
---------- ----------
...@@ -170,14 +120,8 @@ def ones(shape, dtype=None): ...@@ -170,14 +120,8 @@ def ones(shape, dtype=None):
The initialized tensor. The initialized tensor.
""" """
if dtype is None: dtype = config.floatX if dtype is None: dtype = _cfg.floatX
else: return _ops.Fill(shape=shape, value=1, dtype=dtype)
if dtype not in _DATA_TYPES.keys():
raise TypeError("Unsupported data type: {}".format(dtype))
np_value = np.ones(shape, dtype=_DATA_TYPES[dtype])
output = Tensor(shape=shape, dtype=dtype)
output.set_value(np_value)
return output
def ones_like(model, dtype=None, **kwargs): def ones_like(model, dtype=None, **kwargs):
...@@ -185,7 +129,7 @@ def ones_like(model, dtype=None, **kwargs): ...@@ -185,7 +129,7 @@ def ones_like(model, dtype=None, **kwargs):
The values can be access only after the run of graph. The values can be access only after the run of graph.
If dtype is ``None``, use ``config.floatX``. If dtype is *None*, use *config.floatX*.
Parameters Parameters
---------- ----------
...@@ -200,16 +144,13 @@ def ones_like(model, dtype=None, **kwargs): ...@@ -200,16 +144,13 @@ def ones_like(model, dtype=None, **kwargs):
The initialized tensor. The initialized tensor.
""" """
if dtype is None: dtype = config.floatX return ones(shape=model.shape, dtype=dtype)
else:
raise TypeError("Unsupported data type: {}".format(dtype))
return ops.Fill(shape=ops.Shape(model), value=1)
def cast(x, dtype): def cast(x, dtype):
"""Cast input to the tensor of specific data type. """Cast input to the tensor of specific data type.
If dtype is ``None``, use ``config.floatX``. If dtype is *None*, use *config.floatX*.
Parameters Parameters
---------- ----------
...@@ -224,8 +165,8 @@ def cast(x, dtype): ...@@ -224,8 +165,8 @@ def cast(x, dtype):
The output tensor. The output tensor.
""" """
if dtype is None: dtype = config.floatX if dtype is None: dtype = _cfg.floatX
raise NotImplementedError() return x.astype(dtype)
def dot(a, b): def dot(a, b):
...@@ -246,7 +187,7 @@ def dot(a, b): ...@@ -246,7 +187,7 @@ def dot(a, b):
The output tensor. The output tensor.
""" """
return ops.Dot([a, b]) return _ops.Dot([a, b])
def batched_tensordot(x, y, axes=2): def batched_tensordot(x, y, axes=2):
...@@ -269,7 +210,7 @@ def transpose(x, axes=None): ...@@ -269,7 +210,7 @@ def transpose(x, axes=None):
The output tensor. The output tensor.
""" """
return ops.Transpose(x, perm=axes) return _ops.Transpose(x, perm=axes)
def max(x, axis=None, keepdims=False): def max(x, axis=None, keepdims=False):
...@@ -291,7 +232,7 @@ def max(x, axis=None, keepdims=False): ...@@ -291,7 +232,7 @@ def max(x, axis=None, keepdims=False):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.Max(x, axis=axis, keep_dims=keepdims) return _ops.Max(x, axis=axis, keep_dims=keepdims)
def min(x, axis=None, keepdims=False): def min(x, axis=None, keepdims=False):
...@@ -313,7 +254,7 @@ def min(x, axis=None, keepdims=False): ...@@ -313,7 +254,7 @@ def min(x, axis=None, keepdims=False):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.Min(x, axis=axis, keep_dims=keepdims) return _ops.Min(x, axis=axis, keep_dims=keepdims)
def sum(input, axis=None, keepdims=False, **kwargs): def sum(input, axis=None, keepdims=False, **kwargs):
...@@ -335,7 +276,7 @@ def sum(input, axis=None, keepdims=False, **kwargs): ...@@ -335,7 +276,7 @@ def sum(input, axis=None, keepdims=False, **kwargs):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.Sum(input, axis=axis, keep_dims=keepdims) return _ops.Sum(input, axis=axis, keep_dims=keepdims)
def mean(input, axis=None, keepdims=False, **kwargs): def mean(input, axis=None, keepdims=False, **kwargs):
...@@ -357,7 +298,7 @@ def mean(input, axis=None, keepdims=False, **kwargs): ...@@ -357,7 +298,7 @@ def mean(input, axis=None, keepdims=False, **kwargs):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.Mean(input, axis=axis, keep_dims=keepdims) return _ops.Mean(input, axis=axis, keep_dims=keepdims)
def prod(input, axis=None, keepdims=False, **kwargs): def prod(input, axis=None, keepdims=False, **kwargs):
...@@ -401,7 +342,7 @@ def argmax(x, axis=None, keepdims=False): ...@@ -401,7 +342,7 @@ def argmax(x, axis=None, keepdims=False):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.ArgMax(x, axis=axis, keep_dims=keepdims) return _ops.ArgMax(x, axis=axis, keep_dims=keepdims)
def argmin(x, axis=None, keepdims=False): def argmin(x, axis=None, keepdims=False):
...@@ -423,7 +364,7 @@ def argmin(x, axis=None, keepdims=False): ...@@ -423,7 +364,7 @@ def argmin(x, axis=None, keepdims=False):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.ArgMin(x, axis=axis, keep_dims=keepdims) return _ops.ArgMin(x, axis=axis, keep_dims=keepdims)
def square(a): def square(a):
...@@ -440,7 +381,7 @@ def square(a): ...@@ -440,7 +381,7 @@ def square(a):
The square result. The square result.
""" """
return ops.Square(a) return _ops.Square(a)
def sqrt(a): def sqrt(a):
...@@ -457,7 +398,7 @@ def sqrt(a): ...@@ -457,7 +398,7 @@ def sqrt(a):
The sqrt result. The sqrt result.
""" """
return ops.Sqrt(a) return _ops.Sqrt(a)
def pow(a, power): def pow(a, power):
...@@ -474,7 +415,7 @@ def pow(a, power): ...@@ -474,7 +415,7 @@ def pow(a, power):
The pow result. The pow result.
""" """
return ops.Pow(a, power) return _ops.Pow(a, power)
def exp(a): def exp(a):
...@@ -491,7 +432,7 @@ def exp(a): ...@@ -491,7 +432,7 @@ def exp(a):
The exponential result. The exponential result.
""" """
return ops.Exp(a) return _ops.Exp(a)
def log(a): def log(a):
...@@ -508,7 +449,7 @@ def log(a): ...@@ -508,7 +449,7 @@ def log(a):
The logarithm result. The logarithm result.
""" """
return ops.Log(a) return _ops.Log(a)
def clip(x, min=None, max=None): def clip(x, min=None, max=None):
...@@ -529,7 +470,7 @@ def clip(x, min=None, max=None): ...@@ -529,7 +470,7 @@ def clip(x, min=None, max=None):
The clip result. The clip result.
""" """
return ops.Clip(x, low=min, high=max) return _ops.Clip(x, low=min, high=max)
def join(axis, *tensors_list): def join(axis, *tensors_list):
...@@ -548,7 +489,7 @@ def join(axis, *tensors_list): ...@@ -548,7 +489,7 @@ def join(axis, *tensors_list):
The output tensor. The output tensor.
""" """
return ops.Concat(list(tensors_list), axis=axis) return _ops.Concat(list(tensors_list), axis=axis)
def stack(*tensors, **kwargs): def stack(*tensors, **kwargs):
...@@ -573,7 +514,7 @@ def stack(*tensors, **kwargs): ...@@ -573,7 +514,7 @@ def stack(*tensors, **kwargs):
""" """
if not 'axis' in kwargs: axis = 0 if not 'axis' in kwargs: axis = 0
else: axis = kwargs['axis'] else: axis = kwargs['axis']
return ops.Stack(list(tensors), axis=axis) return _ops.Stack(list(tensors), axis=axis)
def concatenate(tensor_list, axis=0): def concatenate(tensor_list, axis=0):
...@@ -594,7 +535,7 @@ def concatenate(tensor_list, axis=0): ...@@ -594,7 +535,7 @@ def concatenate(tensor_list, axis=0):
The output tensor. The output tensor.
""" """
return ops.Concat(tensor_list, axis=axis) return _ops.Concat(tensor_list, axis=axis)
def reshape(x, newshape, **kwargs): def reshape(x, newshape, **kwargs):
...@@ -613,7 +554,7 @@ def reshape(x, newshape, **kwargs): ...@@ -613,7 +554,7 @@ def reshape(x, newshape, **kwargs):
The output tensor. The output tensor.
""" """
return ops.Reshape(x, shape=newshape) return _ops.Reshape(x, shape=newshape)
def flatten(x, outdim=1): def flatten(x, outdim=1):
...@@ -632,7 +573,7 @@ def flatten(x, outdim=1): ...@@ -632,7 +573,7 @@ def flatten(x, outdim=1):
The output tensor. The output tensor.
""" """
return ops.Flatten(x, keep_axes=outdim) return _ops.Flatten(x, keep_axes=outdim)
def repeat(x, repeats, axis=None): def repeat(x, repeats, axis=None):
...@@ -654,7 +595,7 @@ def repeat(x, repeats, axis=None): ...@@ -654,7 +595,7 @@ def repeat(x, repeats, axis=None):
""" """
if axis is None: axis = -1 if axis is None: axis = -1
return ops.Repeat(x, axis=axis, repeats=repeats) return _ops.Repeat(x, axis=axis, repeats=repeats)
def tile(x, reps, **kwargs): def tile(x, reps, **kwargs):
...@@ -673,7 +614,7 @@ def tile(x, reps, **kwargs): ...@@ -673,7 +614,7 @@ def tile(x, reps, **kwargs):
The output tensor. The output tensor.
""" """
return ops.Tile(x, multiples=reps) return _ops.Tile(x, multiples=reps)
def arange(start, stop=None, step=1, dtype=None): def arange(start, stop=None, step=1, dtype=None):
...@@ -698,4 +639,4 @@ def arange(start, stop=None, step=1, dtype=None): ...@@ -698,4 +639,4 @@ def arange(start, stop=None, step=1, dtype=None):
The vector. The vector.
""" """
return ops.Arange(start=start, stop=stop, step=1, dtype=dtype.upper()) return _ops.Arange(start=start, stop=stop, step=step, dtype=dtype)
\ No newline at end of file \ No newline at end of file
...@@ -9,8 +9,11 @@ ...@@ -9,8 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from dragon.core.tensor import Tensor from __future__ import absolute_import
import dragon.ops as ops from __future__ import division
from __future__ import print_function
from dragon import ops as _ops
def cumsum(x, axis=None): def cumsum(x, axis=None):
...@@ -20,8 +23,8 @@ def cumsum(x, axis=None): ...@@ -20,8 +23,8 @@ def cumsum(x, axis=None):
---------- ----------
x : Tensor x : Tensor
The input tensor. The input tensor.
axis : int axis : int, optional
The axis to sum. Default is ``None`` (Along all axes). The axis to sum.
""" """
raise NotImplementedError() raise NotImplementedError()
...@@ -34,8 +37,8 @@ def cumprod(x, axis=None): ...@@ -34,8 +37,8 @@ def cumprod(x, axis=None):
---------- ----------
x : Tensor x : Tensor
The input tensor. The input tensor.
axis : int axis : int, optional
The axis to sum. Default is ``None`` (Along all axes). The axis to sum.
""" """
raise NotImplementedError() raise NotImplementedError()
...@@ -59,5 +62,5 @@ def to_one_hot(y, nb_class, **kwargs): ...@@ -59,5 +62,5 @@ def to_one_hot(y, nb_class, **kwargs):
The one hot matrix. The one hot matrix.
""" """
flat_y = ops.Flatten(y, keep_axes=1) flat_y = _ops.Flatten(y, keep_axes=1)
return ops.OneHot(flat_y, depth=nb_class) return _ops.OneHot(flat_y, depth=nb_class)
...@@ -9,8 +9,12 @@ ...@@ -9,8 +9,12 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from dragon.core.tensor import Tensor from __future__ import absolute_import
import dragon.ops as ops from __future__ import division
from __future__ import print_function
from dragon import ops as _ops
from dragon.core.tensor import Tensor as _Tensor
def batch_normalization(inputs, gamma, beta, mean, var, **kwargs): def batch_normalization(inputs, gamma, beta, mean, var, **kwargs):
...@@ -35,7 +39,7 @@ def batch_normalization(inputs, gamma, beta, mean, var, **kwargs): ...@@ -35,7 +39,7 @@ def batch_normalization(inputs, gamma, beta, mean, var, **kwargs):
The output tensor. The output tensor.
""" """
return ops.BatchNorm([inputs, mean, var, gamma, beta]) return _ops.BatchNorm([inputs, mean, var, gamma, beta])
def relu(x, alpha=0): def relu(x, alpha=0):
...@@ -54,8 +58,8 @@ def relu(x, alpha=0): ...@@ -54,8 +58,8 @@ def relu(x, alpha=0):
The output tensor. The output tensor.
""" """
if alpha == 0: return ops.Relu(x) if alpha == 0: return _ops.Relu(x)
else: return ops.LRelu(x, slope=alpha) else: return _ops.LRelu(x, slope=alpha)
def softmax(c): def softmax(c):
...@@ -74,7 +78,7 @@ def softmax(c): ...@@ -74,7 +78,7 @@ def softmax(c):
The output tensor. The output tensor.
""" """
return ops.Softmax(c, axis=1) return _ops.Softmax(c, axis=1)
def categorical_crossentropy(coding_dist, true_dist, axis=1): def categorical_crossentropy(coding_dist, true_dist, axis=1):
...@@ -95,7 +99,7 @@ def categorical_crossentropy(coding_dist, true_dist, axis=1): ...@@ -95,7 +99,7 @@ def categorical_crossentropy(coding_dist, true_dist, axis=1):
The categorical cross-entropy. The categorical cross-entropy.
""" """
return -ops.Sum(true_dist * ops.Log(coding_dist), axis=axis) return -_ops.Sum(true_dist * _ops.Log(coding_dist), axis=axis)
def sigmoid(x): def sigmoid(x):
...@@ -112,7 +116,7 @@ def sigmoid(x): ...@@ -112,7 +116,7 @@ def sigmoid(x):
The output tensor. The output tensor.
""" """
return ops.Sigmoid(x) return _ops.Sigmoid(x)
def tanh(x): def tanh(x):
...@@ -129,7 +133,7 @@ def tanh(x): ...@@ -129,7 +133,7 @@ def tanh(x):
The output tensor. The output tensor.
""" """
return ops.Tanh(x) return _ops.Tanh(x)
def binary_crossentropy(output, target): def binary_crossentropy(output, target):
...@@ -148,7 +152,7 @@ def binary_crossentropy(output, target): ...@@ -148,7 +152,7 @@ def binary_crossentropy(output, target):
The binary cross-entropy. The binary cross-entropy.
""" """
return -(target * ops.Log(output) + (1.0 - target) * ops.Log(1.0 - output)) return -(target * _ops.Log(output) + (1. - target) * _ops.Log(1. - output))
......
...@@ -9,5 +9,11 @@ ...@@ -9,5 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .variable import Variable from __future__ import absolute_import
from .grad_mode import no_grad, enable_grad, set_grad_enabled from __future__ import division
\ No newline at end of file from __future__ import print_function
from dragon.vm.torch.autograd.variable import Variable
from dragon.vm.torch.autograd.grad_mode import no_grad
from dragon.vm.torch.autograd.grad_mode import enable_grad
from dragon.vm.torch.autograd.grad_mode import set_grad_enabled
\ No newline at end of file
...@@ -17,16 +17,13 @@ from __future__ import absolute_import ...@@ -17,16 +17,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.core import tls as _tls
__all__ = [
'is_grad_enabled',
'no_grad',
'enable_grad',
'set_grad_enabled',
]
def _set_grad_enabled(enabled=True):
grad_option = {'enable_grad': True} """Set the status of grad option."""
global _GLOBAL_GRAD_OPTION
_GLOBAL_GRAD_OPTION.enabled = enabled
def is_grad_enabled(): def is_grad_enabled():
...@@ -38,14 +35,7 @@ def is_grad_enabled(): ...@@ -38,14 +35,7 @@ def is_grad_enabled():
``True`` if enabling auto-grad. ``True`` if enabling auto-grad.
""" """
global grad_option return _GLOBAL_GRAD_OPTION.enabled
return grad_option['enable_grad']
def _set_grad_enabled(enabled=True):
global grad_option
grad_option['enable_grad'] = enabled
class no_grad(object): class no_grad(object):
...@@ -96,4 +86,7 @@ class set_grad_enabled(object): ...@@ -96,4 +86,7 @@ class set_grad_enabled(object):
def __exit__(self, *args): def __exit__(self, *args):
_set_grad_enabled(self.prev) _set_grad_enabled(self.prev)
return False return False
\ No newline at end of file
_GLOBAL_GRAD_OPTION = _tls.Constant(enabled=True)
\ No newline at end of file
...@@ -15,11 +15,12 @@ from __future__ import print_function ...@@ -15,11 +15,12 @@ from __future__ import print_function
import warnings import warnings
import dragon.core.tensor_utils as tensor_utils from dragon.core import tensor_utils as _tensor_utils
import dragon.core.workspace as ws from dragon.core.workspace import Backward as _backward_impl
from dragon.vm.torch.tensor import Tensor from dragon.vm.torch.c_api import _get_tensor_pool
from dragon.vm.torch.pool import TensorPool, OperatorPool from dragon.vm.torch.c_api import _get_operator_pool
from dragon.vm.torch.tensor import Tensor as _Tensor
def Variable(tensor, requires_grad=False, volatile=False): def Variable(tensor, requires_grad=False, volatile=False):
...@@ -44,32 +45,32 @@ def backward(self, gradient=None): ...@@ -44,32 +45,32 @@ def backward(self, gradient=None):
raise RuntimeError('This variable does not require grads.' raise RuntimeError('This variable does not require grads.'
'\nCan not backward from this variable.') '\nCan not backward from this variable.')
# 1. Expressions -> Forward-Ops # 1) expressions -> forward_ops
# We should sort out the topology of these operators before using # We should sort out the topology before using
all_expressions = sorted(self.__jit_recorder__.ops.items(), key=lambda d: d[0]) all_expressions = sorted(self.__jit_recorder__.ops.items(), key=lambda d: d[0])
forward_ops = [v for k, v in all_expressions] forward_ops = [v for k, v in all_expressions]
# 2. Forward-Ops + Targets + InputGrads + IgnoredGrads -> Backward-Ops # 2) forward_ops + targets + input_grads + ignored_grads -> backward_ops
targets = [self.name]; input_grads = [] targets, input_grads = [self.name], []
ignored_grads = list(self._ignored_grads) if self._ignored_grads else [] ignored_grads = list(self._ignored_grads) if self._ignored_grads else []
if gradient is not None: if gradient is not None:
if not isinstance(gradient, Tensor): if not isinstance(gradient, _Tensor):
raise TypeError('gradients can be either Tensors, Variables or None,' raise TypeError('gradients can be either Tensors, Variables or None,'
' but got {}'.format(type(gradient))) ' but got {}'.format(type(gradient)))
tensor_utils.FromPyArray(gradient.cpu().numpy(), self.name + '_grad') _tensor_utils.FromArray(gradient.numpy(True), self.name + '_grad')
input_grads.append(self.name + '_grad') input_grads.append(self.name + '_grad')
# 3. Flow or Flow or Flow # 3. Dispatch the backward ops
ws.FlowGradients(forward_ops, targets, input_grads, ignored_grads) _backward_impl(forward_ops, targets, input_grads, ignored_grads)
# 4. Release resources # 4. Release resources
# We should release both the operator handles and tensors # We should release both the operator handles and tensors
for forward_op in forward_ops: for forward_op in forward_ops:
OperatorPool.put(forward_op.name) _get_operator_pool().put(forward_op.name)
for output in forward_op.output: for output in forward_op.output:
if output not in forward_op.input: if output not in forward_op.input:
TensorPool.put(output) _get_tensor_pool().put(output)
Tensor.backward = backward _Tensor.backward = backward
Tensor.volatile = volatile _Tensor.volatile = volatile
\ No newline at end of file \ No newline at end of file
...@@ -17,7 +17,9 @@ import copy ...@@ -17,7 +17,9 @@ import copy
import numpy import numpy
import importlib import importlib
from dragon.core import mapping, tensor_utils from dragon.core import mapping as _mapping
from dragon.core import workspace as _workspace
from dragon.core import tensor_utils as _tensor_utils
class Size(tuple): class Size(tuple):
...@@ -65,10 +67,10 @@ def from_numpy(data): ...@@ -65,10 +67,10 @@ def from_numpy(data):
""" """
if not isinstance(data, numpy.ndarray): if not isinstance(data, numpy.ndarray):
raise TypeError('The data should be a numpy.ndarray.') raise TypeError('The data should be a numpy.ndarray.')
if str(data.dtype) not in mapping.TENSOR_TYPE_TO_TORCH_TENSOR: if str(data.dtype) not in _mapping.TENSOR_TYPE_TO_TORCH_TENSOR:
raise ValueError('Unsupported type({}) to torch tensor.'.format(data.dtype)) raise ValueError('Unsupported type({}) to torch tensor.'.format(data.dtype))
module = importlib.import_module('dragon.vm.torch.tensor') module = importlib.import_module('dragon.vm.torch.tensor')
return getattr(module, mapping.TENSOR_TYPE_TO_TORCH_TENSOR[str(data.dtype)])(data) return getattr(module, _mapping.TENSOR_TYPE_TO_TORCH_TENSOR[str(data.dtype)])(data)
def from_dragon(tensor, own_storage=False): def from_dragon(tensor, own_storage=False):
...@@ -89,10 +91,20 @@ def from_dragon(tensor, own_storage=False): ...@@ -89,10 +91,20 @@ def from_dragon(tensor, own_storage=False):
The torch tensor. The torch tensor.
""" """
storage = tensor_utils.GetStorage(tensor) storage = _tensor_utils.GetStorage(tensor)
if storage is None: return None if storage is None: return None
module = importlib.import_module('dragon.vm.torch.tensor') module = importlib.import_module('dragon.vm.torch.tensor')
T = getattr(module, mapping.TENSOR_TYPE_TO_TORCH_TENSOR[storage.dtype])() T = getattr(module, _mapping.TENSOR_TYPE_TO_TORCH_TENSOR[storage.dtype])()
T._storage, T._own_storage, T._tensor = storage, own_storage, tensor T._storage, T._own_storage, T._tensor = storage, own_storage, tensor
T._device = device(*storage.device) T._device = device(*storage.device)
return T return T
\ No newline at end of file
def _get_tensor_pool():
"""Return the tensor pool of current workspace."""
return _workspace.get_default_workspace().tensor_pool
def _get_operator_pool():
"""Return the operator pool of current workspace."""
return _workspace.get_default_workspace().operator_pool
\ No newline at end of file
...@@ -28,21 +28,23 @@ from __future__ import division ...@@ -28,21 +28,23 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import six import six
import dragon as dg from dragon import import_c_api as _C
import dragon.import_c_api as C from dragon.config import option as _options
from dragon.config import option from dragon.core import workspace as _workspace
from .c_api import device as _Device
from .jit import JITRecorder, is_jit_enforced
from .autograd.grad_mode import is_grad_enabled
from .tensor import _RuntimeTensor
from .pool import TensorPool
from dragon.vm.torch.c_api import _get_tensor_pool
from dragon.vm.torch.c_api import device as _Device
from dragon.vm.torch.jit import JITRecorder, is_jit_enforced
from dragon.vm.torch.autograd.grad_mode import is_grad_enabled
from dragon.vm.torch.tensor import _RuntimeTensor
def RunOperator( def RunOperator(
inputs, outputs, meta, inputs,
auto_grad=True, outputs,
callback_on_run=None): meta,
auto_grad=True,
callback_on_run=None,
):
if not isinstance(inputs, list): inputs = [inputs] if not isinstance(inputs, list): inputs = [inputs]
if not isinstance(outputs, list): outputs = [outputs] if not isinstance(outputs, list): outputs = [outputs]
if len(outputs) == 0: if len(outputs) == 0:
...@@ -67,14 +69,15 @@ def RunOperator( ...@@ -67,14 +69,15 @@ def RunOperator(
else: else:
# Legacy mode, a torch tensor is excepted # Legacy mode, a torch tensor is excepted
if isinstance(output, _Device): if isinstance(output, _Device):
name = TensorPool.get('${JOIN}' if requires_grad else '${DETACH}') name = _get_tensor_pool().get(
'${JOIN}' if requires_grad else '${DETACH}')
outputs[ix] = _RuntimeTensor(name, device=output) outputs[ix] = _RuntimeTensor(name, device=output)
outputs_name.append(outputs[ix].name) outputs_name.append(outputs[ix].name)
# Key + Inputs + Outputs => Op # Key + Inputs + Outputs => Op
op_name = 'runtime' op_name = 'runtime'
persistent_key, meta_op = meta persistent_key, meta_op = meta
op = C.OperatorDef(); op.CopyFrom(meta_op) op = _C.OperatorDef(); op.CopyFrom(meta_op)
op.input, op.output = inputs_name, outputs_name op.input, op.output = inputs_name, outputs_name
# Auto-Grad # Auto-Grad
...@@ -106,9 +109,9 @@ def RunOperator( ...@@ -106,9 +109,9 @@ def RunOperator(
if callback_on_run: callback_on_run(op_name) if callback_on_run: callback_on_run(op_name)
# Run # Run
dg.workspace.RunOperator(op, _workspace.RunOperator(op,
verbose=option['log_optimized_graph'] or verbose=_options['log_optimized_graph'] or
option['log_meta_graph']) _options['log_meta_graph'])
# Returns # Returns
if len(outputs) > 1: return outputs if len(outputs) > 1: return outputs
......
...@@ -15,10 +15,8 @@ from __future__ import absolute_import ...@@ -15,10 +15,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch.pool import OperatorPool from dragon.core import tls as _tls
from dragon.vm.torch.c_api import _get_operator_pool
_ENFORCE_JIT_TRACER = False
def _Incrementer(): def _Incrementer():
...@@ -38,7 +36,7 @@ class JITRecorder(object): ...@@ -38,7 +36,7 @@ class JITRecorder(object):
def append(self, op): def append(self, op):
uid = next(self.UID_GENERATOR) uid = next(self.UID_GENERATOR)
op_name = OperatorPool.get(op.type) op_name = _get_operator_pool().get(op.type)
self.ops[uid] = op self.ops[uid] = op
self.ops[uid].name = op_name self.ops[uid].name = op_name
return op_name return op_name
...@@ -70,6 +68,11 @@ class JITRecorder(object): ...@@ -70,6 +68,11 @@ class JITRecorder(object):
return buffer0 + buffer2 + buffer1 + buffer0 return buffer0 + buffer2 + buffer1 + buffer0
def is_jit_enforced():
"""Whether jit tracer is enforced."""
return _GLOBAL_ENFORCE_JIT_TRACER.enabled
class enforce_jit(object): class enforce_jit(object):
"""Context-manager that enforce the jit tracer.""" """Context-manager that enforce the jit tracer."""
...@@ -77,13 +80,12 @@ class enforce_jit(object): ...@@ -77,13 +80,12 @@ class enforce_jit(object):
self.prev = is_jit_enforced() self.prev = is_jit_enforced()
def __enter__(self): def __enter__(self):
global _ENFORCE_JIT_TRACER global _GLOBAL_ENFORCE_JIT_TRACER
_ENFORCE_JIT_TRACER = True _GLOBAL_ENFORCE_JIT_TRACER.enabled = True
def __exit__(self, *args): def __exit__(self, *args):
global _ENFORCE_JIT_TRACER global _GLOBAL_ENFORCE_JIT_TRACER
_ENFORCE_JIT_TRACER = self.prev _GLOBAL_ENFORCE_JIT_TRACER.enabled = self.prev
def is_jit_enforced(): _GLOBAL_ENFORCE_JIT_TRACER = _tls.Constant(enabled=False)
return _ENFORCE_JIT_TRACER \ No newline at end of file
\ No newline at end of file
...@@ -24,10 +24,12 @@ import dragon ...@@ -24,10 +24,12 @@ import dragon
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from dragon.core import proto_utils, logging from dragon.core import scope as _scope
from dragon.core.scope import get_default_name_scope from dragon.core import logging as _logging
from dragon.core import proto_utils as _proto_utils
from dragon.core import tensor_utils as _tensor_utils
from dragon.vm.torch.c_api import device as Device from dragon.vm.torch.c_api import device as _Device
from dragon.vm.torch.tensor import Tensor, Parameter from dragon.vm.torch.tensor import Tensor, Parameter
from dragon.vm.torch.execution import RunOperator from dragon.vm.torch.execution import RunOperator
from dragon.vm.torch.environ import add_submodule, get_module_name from dragon.vm.torch.environ import add_submodule, get_module_name
...@@ -38,7 +40,7 @@ class Module(object): ...@@ -38,7 +40,7 @@ class Module(object):
self._modules = OrderedDict() self._modules = OrderedDict()
self._parameters = OrderedDict() self._parameters = OrderedDict()
self._buffers = OrderedDict() self._buffers = OrderedDict()
self._device = Device() self._device = _Device()
self._module_key = None self._module_key = None
self._module_def = None self._module_def = None
self.training = True self.training = True
...@@ -107,7 +109,7 @@ class Module(object): ...@@ -107,7 +109,7 @@ class Module(object):
return destination return destination
def load_state_dict(self, state_dict, strict=True, verbose=True): def load_state_dict(self, state_dict, strict=True, verbose=True):
if verbose: logging.info('Load the state dict.') if verbose: _logging.info('Load the state dict.')
unexpected = [] unexpected = []
own_state = self.state_dict() own_state = self.state_dict()
for name, param in state_dict.items(): for name, param in state_dict.items():
...@@ -122,12 +124,12 @@ class Module(object): ...@@ -122,12 +124,12 @@ class Module(object):
if isinstance(param, Tensor): if isinstance(param, Tensor):
own_state[name].copy_(param) own_state[name].copy_(param)
elif isinstance(param, numpy.ndarray): elif isinstance(param, numpy.ndarray):
dragon.tensor_utils.SetPyArray(own_state[name], param) _tensor_utils.SetArray(own_state[name], param)
else: else:
raise ValueError('Excepted the type of source state is either ' raise ValueError('Excepted the type of source state is either '
'dragon.vm.torch.Tensor or numpy.ndarray, got {}.'.format(type(param))) 'dragon.vm.torch.Tensor or numpy.ndarray, got {}.'.format(type(param)))
if verbose: if verbose:
logging.info('Tensor({}) loaded, Size: ({})'.format(name, _logging.info('Tensor({}) loaded, Size: ({})'.format(name,
', '.join([str(d) for d in param_shape]))) ', '.join([str(d) for d in param_shape])))
else: else:
unexpected.append(name) unexpected.append(name)
...@@ -192,7 +194,7 @@ class Module(object): ...@@ -192,7 +194,7 @@ class Module(object):
raise NotImplementedError('The base module can not be called.') raise NotImplementedError('The base module can not be called.')
def name_scope(self, remove_separator=True): def name_scope(self, remove_separator=True):
scope = get_default_name_scope() scope = _scope.get_default_name_scope()
if remove_separator and \ if remove_separator and \
len(scope) > 0 and \ len(scope) > 0 and \
scope[-1] == '/': scope[-1] == '/':
...@@ -268,7 +270,7 @@ class Module(object): ...@@ -268,7 +270,7 @@ class Module(object):
return self return self
def cpu(self): def cpu(self):
self._device = Device() self._device = _Device()
# Remove key and op to re-create a one with new device # Remove key and op to re-create a one with new device
self._module_key = self._module_def = None self._module_key = self._module_def = None
return self._apply(lambda t: t.cpu(), return self._apply(lambda t: t.cpu(),
...@@ -276,7 +278,7 @@ class Module(object): ...@@ -276,7 +278,7 @@ class Module(object):
def cuda(self, device=None): def cuda(self, device=None):
if device is None: device = dragon.config.GetGPU() if device is None: device = dragon.config.GetGPU()
self._device = Device('cuda', device) self._device = _Device('cuda', device)
# Remove key and op to re-create a one with new device # Remove key and op to re-create a one with new device
self._module_key = self._module_def = None self._module_key = self._module_def = None
return self._apply(lambda t: t.cuda(device), return self._apply(lambda t: t.cuda(device),
...@@ -309,11 +311,11 @@ class Module(object): ...@@ -309,11 +311,11 @@ class Module(object):
def _gen_module_def(self): def _gen_module_def(self):
self._module_def = \ self._module_def = \
proto_utils.MakeCXXOperatorDef( _proto_utils.MakeCXXOperatorDef(
name='runtime', name='runtime',
uid=self.module_key, uid=self.module_key,
op_type=self.op_meta['op_type'], op_type=self.op_meta['op_type'],
device_option=proto_utils. device_option=_proto_utils.
GetDeviceOption( GetDeviceOption(
self._device.type, self._device.type,
self._device.index), self._device.index),
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import warnings import warnings
......
...@@ -19,6 +19,7 @@ from __future__ import print_function ...@@ -19,6 +19,7 @@ from __future__ import print_function
import math import math
import warnings import warnings
from dragon.vm.torch.autograd.grad_mode import no_grad from dragon.vm.torch.autograd.grad_mode import no_grad
......
...@@ -13,10 +13,10 @@ from __future__ import absolute_import ...@@ -13,10 +13,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy
import dragon as dg
from dragon.core import proto_utils from dragon.core import proto_utils as _proto_utils
from dragon.core import workspace as _workspace
from dragon.vm.torch.module import Module from dragon.vm.torch.module import Module
...@@ -25,9 +25,10 @@ class BaseModule(Module): ...@@ -25,9 +25,10 @@ class BaseModule(Module):
super(BaseModule, self).__init__() super(BaseModule, self).__init__()
self._module_key = key self._module_key = key
self._device = dev self._device = dev
self._args_dev = proto_utils.\ self._args_dev = _proto_utils.\
GetDeviceOption('cpu').SerializeToString() GetDeviceOption('cpu').SerializeToString()
def set_argument_i64(self, name, value): def set_argument_i64(self, name, value):
dg.C.FeedTensor(name, np.array( _workspace.get_default_workspace()\
value, dtype=np.int64), self._args_dev) .FeedTensor(name, numpy.array(
\ No newline at end of file value, dtype=numpy.int64), self._args_dev)
\ No newline at end of file
...@@ -13,10 +13,10 @@ from __future__ import absolute_import ...@@ -13,10 +13,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy
import dragon as dg
from dragon.vm.torch.tensor import * from dragon.core import workspace as _workspace
from dragon.vm.torch.tensor import Tensor as _Tensor
from dragon.vm.torch.c_api import device as _Device from dragon.vm.torch.c_api import device as _Device
...@@ -49,8 +49,8 @@ def WrapScalar(scalar, dtype, device): ...@@ -49,8 +49,8 @@ def WrapScalar(scalar, dtype, device):
if 'float' in dtype: scalar = float(scalar) if 'float' in dtype: scalar = float(scalar)
if 'int' in dtype: scalar = int(scalar) if 'int' in dtype: scalar = int(scalar)
name = '/share/scalar/{}/{}'.format(dtype, str(scalar)) name = '/share/scalar/{}/{}'.format(dtype, str(scalar))
if not dg.workspace.HasTensor(name): if not _workspace.HasTensor(name):
dg.workspace.FeedTensor(name, np.array(scalar, dtype=dtype)) _workspace.FeedTensor(name, numpy.array(scalar, dtype=dtype))
t = Tensor(name=name, dtype=dtype, device=device, own_storage=False) t = _Tensor(name=name, dtype=dtype, device=device, own_storage=False)
t.requires_grad = False t.requires_grad = False
return t return t
\ No newline at end of file
...@@ -9,7 +9,11 @@ ...@@ -9,7 +9,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .adam import Adam from __future__ import absolute_import
from .sgd import SGD from __future__ import division
from .rmsprop import RMSprop from __future__ import print_function
from .optimizer import Optimizer
\ No newline at end of file from dragon.vm.torch.optim.adam import Adam
from dragon.vm.torch.optim.sgd import SGD
from dragon.vm.torch.optim.rmsprop import RMSprop
from dragon.vm.torch.optim.optimizer import Optimizer
\ No newline at end of file
...@@ -21,21 +21,38 @@ from dragon.vm.torch.optim.optimizer import Optimizer ...@@ -21,21 +21,38 @@ from dragon.vm.torch.optim.optimizer import Optimizer
class Adam(Optimizer): class Adam(Optimizer):
def __init__(self, params, lr=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, def __init__(
weight_decay=0, amsgrad=False, scale_gradient=1.0, clip_gradient=-1.0): self,
if not 0.0 <= lr: params,
lr=1e-3,
beta1=0.9,
beta2=0.999,
eps=1e-8,
weight_decay=0,
amsgrad=False,
scale_gradient=1.,
clip_gradient=-1.,
):
if not 0. <= lr:
raise ValueError("Invalid learning rate: {}".format(lr)) raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps: if not 0. <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps)) raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= beta1 < 1.0: if not 0. <= beta1 < 1.:
raise ValueError("Invalid beta parameter at index 0: {}".format(beta1)) raise ValueError("Invalid beta parameter at index 0: {}".format(beta1))
if not 0.0 <= beta2 < 1.0: if not 0. <= beta2 < 1.:
raise ValueError("Invalid beta parameter at index 1: {}".format(beta2)) raise ValueError("Invalid beta parameter at index 1: {}".format(beta2))
if amsgrad: if amsgrad:
raise NotImplementedError() raise NotImplementedError()
defaults = dict(lr=lr, beta1=beta1, beta2=beta2, eps=eps, defaults = dict(
weight_decay=weight_decay, amsgrad=amsgrad, lr=lr,
scale_gradient=scale_gradient, clip_gradient=clip_gradient) beta1=beta1,
beta2=beta2,
eps=eps,
weight_decay=weight_decay,
amsgrad=amsgrad,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
)
super(Adam, self).__init__(params, defaults) super(Adam, self).__init__(params, defaults)
self._update_type = 'AdamUpdate' self._update_type = 'AdamUpdate'
self._mutable_parameters = { self._mutable_parameters = {
......
...@@ -17,14 +17,14 @@ from __future__ import absolute_import ...@@ -17,14 +17,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon
from collections import defaultdict from collections import defaultdict
from dragon.vm.torch.tensor import Tensor from dragon.core import mpi as _mpi
from dragon.core import workspace as _workspace
from dragon.vm.torch.ops.builtin import ( from dragon.vm.torch.tensor import Tensor as _Tensor
_accumulate, _allreduce, _update, from dragon.vm.torch.ops.builtin import _update
) from dragon.vm.torch.ops.builtin import _allreduce
from dragon.vm.torch.ops.builtin import _accumulate
# A simple parameter flag # A simple parameter flag
...@@ -37,7 +37,7 @@ class Optimizer(object): ...@@ -37,7 +37,7 @@ class Optimizer(object):
def __init__(self, params, defaults): def __init__(self, params, defaults):
self.defaults = defaults self.defaults = defaults
if isinstance(params, Tensor): if isinstance(params, _Tensor):
raise TypeError("params argument given to the optimizer should be " raise TypeError("params argument given to the optimizer should be "
"an iterable of Variables or dicts, but got " + "an iterable of Variables or dicts, but got " +
str(type(params))) str(type(params)))
...@@ -52,9 +52,9 @@ class Optimizer(object): ...@@ -52,9 +52,9 @@ class Optimizer(object):
self.add_param_group(param_group) self.add_param_group(param_group)
self._update_type = None self._update_type = None
self._allow_parallel = False self._allow_parallel = False
if dragon.mpi.Is_Init(): if _mpi.Is_Init():
local_rank, _ = dragon.mpi.AllowParallel() rank, _ = _mpi.AllowParallel()
if local_rank != -1: self._allow_parallel = True if rank != -1: self._allow_parallel = True
self._mutable_parameters = {} self._mutable_parameters = {}
def __repr__(self): def __repr__(self):
...@@ -72,7 +72,7 @@ class Optimizer(object): ...@@ -72,7 +72,7 @@ class Optimizer(object):
template = group['slot'] + '/{}' template = group['slot'] + '/{}'
for k, v in group.items(): for k, v in group.items():
if k in self._mutable_parameters: if k in self._mutable_parameters:
dragon.workspace.FeedTensor( _workspace.FeedTensor(
template.format(self._mutable_parameters[k]), template.format(self._mutable_parameters[k]),
v, dtype='float32', force_cpu=True) v, dtype='float32', force_cpu=True)
...@@ -80,8 +80,8 @@ class Optimizer(object): ...@@ -80,8 +80,8 @@ class Optimizer(object):
grad_name = param.name + ( grad_name = param.name + (
'_grad[acc]' if accumulating '_grad[acc]' if accumulating
else '_grad') else '_grad')
if dragon.workspace.HasTensor(grad_name): if _workspace.HasTensor(grad_name):
return Tensor( return _Tensor(
name=grad_name, name=grad_name,
own_storage=False, own_storage=False,
device=param.device) device=param.device)
...@@ -172,7 +172,7 @@ class Optimizer(object): ...@@ -172,7 +172,7 @@ class Optimizer(object):
params = param_group['params'] params = param_group['params']
if isinstance(params, Tensor): if isinstance(params, _Tensor):
param_group['params'] = [params] param_group['params'] = [params]
elif isinstance(params, set): elif isinstance(params, set):
raise TypeError('Optimizer parameters need to be organized in ordered collections,' raise TypeError('Optimizer parameters need to be organized in ordered collections,'
......
...@@ -21,27 +21,55 @@ from dragon.vm.torch.optim.optimizer import Optimizer ...@@ -21,27 +21,55 @@ from dragon.vm.torch.optim.optimizer import Optimizer
class RMSprop(Optimizer): class RMSprop(Optimizer):
def __init__(self, params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, def __init__(
momentum=0, centered=False, scale_gradient=1.0, clip_gradient=-1.0): self,
if not 0.0 <= lr: params,
lr=1e-2,
alpha=0.99,
eps=1e-8,
weight_decay=0,
momentum=0,
centered=False,
scale_gradient=1.,
clip_gradient=-1.,
):
if not 0. <= lr:
raise ValueError("Invalid learning rate: {}".format(lr)) raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps: if not 0. <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps)) raise ValueError("Invalid epsilon value: {}".format(eps))
if momentum != 0: if momentum < 0.:
raise NotImplementedError() raise ValueError("Invalid momentum value: {}".format(momentum))
if not 0.0 <= alpha: if not 0. <= alpha:
raise ValueError("Invalid alpha value: {}".format(alpha)) raise ValueError("Invalid alpha value: {}".format(alpha))
defaults = dict(
defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, lr=lr,
centered=centered, weight_decay=weight_decay, momentum=momentum,
scale_gradient=scale_gradient, clip_gradient=clip_gradient) alpha=alpha,
eps=eps,
centered=centered,
weight_decay=weight_decay,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
)
super(RMSprop, self).__init__(params, defaults) super(RMSprop, self).__init__(params, defaults)
self._update_type = 'RMSPropUpdate' if momentum != 0.:
self._mutable_parameters = { self._update_type = 'AdamUpdate'
'lr': 'base_lr', self._mutable_parameters = {
'alpha': 'decay', 'lr': 'base_lr',
'eps': 'eps', 'momentum': 'beta1',
'weight_decay': 'l2_decay', 'alpha': 'beta2',
'clip_gradient': 'clip_gradient', 'eps': 'eps',
'scale_gradient': 'scale_gradient', 'weight_decay': 'l2_decay',
} 'clip_gradient': 'clip_gradient',
\ No newline at end of file 'scale_gradient': 'scale_gradient',
}
else:
self._update_type = 'RMSPropUpdate'
self._mutable_parameters = {
'lr': 'base_lr',
'alpha': 'decay',
'eps': 'eps',
'weight_decay': 'l2_decay',
'clip_gradient': 'clip_gradient',
'scale_gradient': 'scale_gradient',
}
\ No newline at end of file
...@@ -21,17 +21,32 @@ from dragon.vm.torch.optim.optimizer import Optimizer, required ...@@ -21,17 +21,32 @@ from dragon.vm.torch.optim.optimizer import Optimizer, required
class SGD(Optimizer): class SGD(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0, def __init__(
weight_decay=-1.0, nesterov=False, scale_gradient=1.0, clip_gradient=-1.0): self,
if lr is not required and lr < 0.0: params,
lr=required,
momentum=0,
dampening=0,
weight_decay=-1.,
nesterov=False,
scale_gradient=1.,
clip_gradient=-1.,
):
if lr is not required and lr < 0.:
raise ValueError("Invalid learning rate: {}".format(lr)) raise ValueError("Invalid learning rate: {}".format(lr))
if momentum < 0.0: if momentum < 0.:
raise ValueError("Invalid momentum value: {}".format(momentum)) raise ValueError("Invalid momentum value: {}".format(momentum))
defaults = dict(lr=lr, momentum=momentum, dampening=dampening, defaults = dict(
weight_decay=weight_decay, nesterov=nesterov, lr=lr,
scale_gradient=scale_gradient, clip_gradient=clip_gradient) momentum=momentum,
if nesterov and (momentum <= 0 or dampening != 0): dampening=dampening,
raise ValueError("Nesterov momentum requires a momentum and zero dampening") weight_decay=weight_decay,
nesterov=nesterov,
scale_gradient=scale_gradient,
clip_gradient=clip_gradient,
)
if nesterov and (momentum <= 0. or dampening != 0.):
raise ValueError("Nesterov momentum requires a momentum and zero dampening.")
super(SGD, self).__init__(params, defaults) super(SGD, self).__init__(params, defaults)
self._update_type = 'NesterovUpdate' if nesterov else 'SGDUpdate' self._update_type = 'NesterovUpdate' if nesterov else 'SGDUpdate'
self._mutable_parameters = { self._mutable_parameters = {
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Implement some resource pools based on the dummy name. """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon
from collections import defaultdict, deque
class _TensorPool(object):
"""We apply the TensorPool to manage the reused tensors.
Tensors with the same scope in the pool will be reused by turns,
which speeds up the whole system by reducing the unnecessary deconstructing.
Heuristically, we have used 5 pools with different scopes:
* scope(Leaf): A Pool to reuse leaf tensors.
* scope(NumPy): A pool to reuse leaf tensors from numpy.
* scope(Join): A pool to reuse RT(runtime) tensors required by forward-backward.
* scope(Detach): A pool to reuse RT(runtime) tensors required by forward only.
* scope(Reference): A pool to reuse reshaped tensors(sharing contents).
"""
def __init__(self):
# deque provide much higher performance than Queue
self._scope2keys = defaultdict(deque)
def get(self, scope='${DETACH}'):
try:
return self._scope2keys[scope].popleft()
except:
self._scope2keys[scope].append(
dragon.workspace.GetDummyName(
'${POOL}/%s/Tensor' % scope,
domain='Tensor', zero_based=False))
return self._scope2keys[scope].popleft()
def put(self, name):
if '${POOL}' in name:
scope, _ = name[8:].split('/')
self._scope2keys[scope].append(name)
return True
else: return False
class _OperatorPool(object):
"""Operators whose gradients is required will hold a resource handle,
which is also called ``Anchor`` in the backend.
We apply this pool to collect the handles according to the type of operator,
as the mem size of temporal resources varies greatly.
The resource handle will be released after the gradient flow automatically.
"""
def __init__(self):
# deque provide much higher performance than Queue
self._type2keys = defaultdict(deque)
def get(self, op_type):
try:
return self._type2keys[op_type].popleft()
except:
self._type2keys[op_type].append(
dragon.workspace.GetDummyName(
'${POOL}/%s' % op_type,
domain='Operator', zero_based=False))
return self._type2keys[op_type].popleft()
def put(self, op_name):
op_type, _ = op_name[8:].split('_')
self._type2keys[op_type].append(op_name)
# Define the global pools
TensorPool = _TensorPool()
OperatorPool = _OperatorPool()
\ No newline at end of file
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os, sys, io import os, sys, io
from dragon.core.tensor_utils import ToPyArray from dragon.core.tensor_utils import ToArray as _to_array
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
import cPickle as pickle import cPickle as pickle
...@@ -67,7 +67,7 @@ def _save_dict(obj): ...@@ -67,7 +67,7 @@ def _save_dict(obj):
py_dict = type(obj)() py_dict = type(obj)()
for k, v in obj.items(): for k, v in obj.items():
if isinstance(v, dict): py_dict[k] = _save_dict(v) if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArray(v, True) elif hasattr(v, 'name'): py_dict[k] = _to_array(v, True)
else: py_dict[k] = v else: py_dict[k] = v
return py_dict return py_dict
...@@ -79,7 +79,7 @@ def _save(obj, f, pickle_module, pickle_protocol): ...@@ -79,7 +79,7 @@ def _save(obj, f, pickle_module, pickle_protocol):
py_dict = type(obj)() py_dict = type(obj)()
for k, v in obj.items(): for k, v in obj.items():
if isinstance(v, dict): py_dict[k] = _save_dict(v) if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArray(v, True) elif hasattr(v, 'name'): py_dict[k] = _to_array(v, True)
else: py_dict[k] = v else: py_dict[k] = v
pickle_module.dump(py_dict, f, pickle_protocol) pickle_module.dump(py_dict, f, pickle_protocol)
......
...@@ -15,12 +15,18 @@ from __future__ import print_function ...@@ -15,12 +15,18 @@ from __future__ import print_function
import six import six
import numpy import numpy
import dragon
from dragon.core import mapping, tensor_utils, proto_utils from dragon import config as _cfg
from dragon.vm.torch.pool import TensorPool from dragon.core import mapping as _mapping
from dragon.vm.torch.c_api import Size, from_dragon from dragon.core.tensor import Tensor as _Tensor
from dragon.core import proto_utils as _proto_utils
from dragon.core import tensor_utils as _tensor_utils
from dragon import get_default_workspace as _workspace
from dragon.vm.torch.c_api import Size as _Size
from dragon.vm.torch.c_api import device as _Device from dragon.vm.torch.c_api import device as _Device
from dragon.vm.torch.c_api import _get_tensor_pool
from dragon.vm.torch.c_api import from_dragon as _from_dragon
class Tensor(object): class Tensor(object):
...@@ -44,7 +50,7 @@ class Tensor(object): ...@@ -44,7 +50,7 @@ class Tensor(object):
if len(args) == 0: if len(args) == 0:
# + empty tensor, not leaf # + empty tensor, not leaf
if self._tensor is not None: if self._tensor is not None:
dragon.C.CreateTensor(self._tensor) _workspace().CreateTensor(self._tensor)
elif len(args) == 1: elif len(args) == 1:
if isinstance(args[0], (list, tuple)): if isinstance(args[0], (list, tuple)):
# + torch.Tensor(sequence) # + torch.Tensor(sequence)
...@@ -65,23 +71,23 @@ class Tensor(object): ...@@ -65,23 +71,23 @@ class Tensor(object):
self._init_from_shape(args, kwargs.get('dtype', 'float32')) self._init_from_shape(args, kwargs.get('dtype', 'float32'))
# Store the reference of backend # Store the reference of backend
self._storage = dragon.C.GetTensor(self.name) \ self._storage = _workspace().GetTensor(
if self.name is not None else None self.name) if self.name is not None else None
def _init_from_numpy(self, array): def _init_from_numpy(self, array):
self._static_shape = Size(array.shape) self._static_shape = _Size(array.shape)
# We use the scope of ``numpy`` instead of ``leaf`` # We use the scope of ``numpy`` instead of ``leaf``
# As it is costly to switch memory between ``copy`` and ``zero-copy`` # As it is costly to switch memory between ``copy`` and ``zero-copy``
self._tensor = tensor_utils.FromPyArray( self._tensor = _tensor_utils.FromArray(
array, TensorPool.get('${NUMPY}')) array, _get_tensor_pool().get('${NUMPY}'))
self._ignored_grads = {self.name + '_grad'} \ self._ignored_grads = {self.name + '_grad'} \
if not self._requires_grad else None if not self._requires_grad else None
def _init_from_shape(self, shape, dtype): def _init_from_shape(self, shape, dtype):
if isinstance(shape, six.integer_types): shape = [shape] if isinstance(shape, six.integer_types): shape = [shape]
self._static_shape = Size(shape) self._static_shape = _Size(shape)
self._tensor = tensor_utils.FromShape( self._tensor = _tensor_utils.FromShape(
shape, dtype, TensorPool.get('${LEAF}')) shape, dtype, _get_tensor_pool().get('${LEAF}'))
self._ignored_grads = {self.name + '_grad'} \ self._ignored_grads = {self.name + '_grad'} \
if not self._requires_grad else None if not self._requires_grad else None
...@@ -137,7 +143,7 @@ class Tensor(object): ...@@ -137,7 +143,7 @@ class Tensor(object):
The self. The self.
""" """
if device is None: device = dragon.config.GetGPU() if device is None: device = _cfg.GetGPU()
self._storage.ToCUDA(device) self._storage.ToCUDA(device)
self._device.type, self._device.index = 'cuda', device self._device.type, self._device.index = 'cuda', device
return self return self
...@@ -156,7 +162,7 @@ class Tensor(object): ...@@ -156,7 +162,7 @@ class Tensor(object):
The numpy array. The numpy array.
""" """
return tensor_utils.ToPyArray(self._tensor, readonly) return _tensor_utils.ToArray(self._tensor, readonly)
def dragon(self): def dragon(self):
"""Create a dragon tensor sharing this tensor. """Create a dragon tensor sharing this tensor.
...@@ -168,7 +174,7 @@ class Tensor(object): ...@@ -168,7 +174,7 @@ class Tensor(object):
""" """
if isinstance(self._tensor, str): if isinstance(self._tensor, str):
return dragon.Tensor.Ref(self._tensor, return _Tensor.Ref(self._tensor,
shape=self.shape, dtype=self.dtype) shape=self.shape, dtype=self.dtype)
else: return self._tensor else: return self._tensor
...@@ -453,8 +459,8 @@ class Tensor(object): ...@@ -453,8 +459,8 @@ class Tensor(object):
The float value. The float value.
""" """
if self.numel() == 1: return float(str(self.data.squeeze())) if self.numel() == 1: return float(self.numpy(readonly=True))
raise TypeError('Only size-1 arrays can be converted to Python scalars') raise TypeError('Only size-1 array can be converted to Python scalars.')
def __int__(self): def __int__(self):
"""Return a int Python scalar of size-1 tensor. """Return a int Python scalar of size-1 tensor.
...@@ -473,7 +479,7 @@ class Tensor(object): ...@@ -473,7 +479,7 @@ class Tensor(object):
# Always reuse the leaf variables or # Always reuse the leaf variables or
# tensors that do not require grad # tensors that do not require grad
# PyGC will detect them automatically # PyGC will detect them automatically
TensorPool.put(self.name) _get_tensor_pool().put(self.name)
def _process_indices(self, item): def _process_indices(self, item):
if not isinstance(item, (slice, tuple)): if not isinstance(item, (slice, tuple)):
...@@ -570,7 +576,7 @@ class Tensor(object): ...@@ -570,7 +576,7 @@ class Tensor(object):
The size. The size.
""" """
s = Size(self._storage.dims) s = _Size(self._storage.dims)
return s[axis] if axis is not None else s return s[axis] if axis is not None else s
@property @property
...@@ -851,10 +857,10 @@ class Tensor(object): ...@@ -851,10 +857,10 @@ class Tensor(object):
""" """
# Copy memory # Copy memory
tensor_utils.FromTensor( _tensor_utils.FromTensor(
src, proto_utils.GetDeviceOption( src, _proto_utils.GetDeviceOption(
src.device.type, src.device.index), src.device.type, src.device.index),
self.name, proto_utils.GetDeviceOption( self.name, _proto_utils.GetDeviceOption(
self.device.type, self.device.index)) self.device.type, self.device.index))
# Transfer the static shape if necessary # Transfer the static shape if necessary
self._static_shape = src.size() \ self._static_shape = src.size() \
...@@ -1484,7 +1490,7 @@ class Tensor(object): ...@@ -1484,7 +1490,7 @@ class Tensor(object):
@property @property
def grad(self): def grad(self):
g = from_dragon(self.name + '_grad', False) g = _from_dragon(self.name + '_grad', False)
if g: g._static_shape = self.shape if g: g._static_shape = self.shape
return g return g
...@@ -1512,7 +1518,7 @@ class Tensor(object): ...@@ -1512,7 +1518,7 @@ class Tensor(object):
############################################## ##############################################
def _type2str(self): def _type2str(self):
return mapping.TENSOR_TYPE_TO_TORCH_TENSOR[self.dtype] return _mapping.TENSOR_TYPE_TO_TORCH_TENSOR[self.dtype]
def CharTensor(*args, **kwargs): def CharTensor(*args, **kwargs):
...@@ -1556,7 +1562,7 @@ def _LeafTensor(shape, dtype='float32', device=_Device(), requires_grad=False): ...@@ -1556,7 +1562,7 @@ def _LeafTensor(shape, dtype='float32', device=_Device(), requires_grad=False):
Commonly used to create leaf variables, i.e., the parameters or placeholders. Commonly used to create leaf variables, i.e., the parameters or placeholders.
""" """
constructor = globals()[mapping.TENSOR_TYPE_TO_TORCH_TENSOR[dtype]] constructor = globals()[_mapping.TENSOR_TYPE_TO_TORCH_TENSOR[dtype]]
return constructor(*shape, device=device, requires_grad=requires_grad) return constructor(*shape, device=device, requires_grad=requires_grad)
...@@ -1567,7 +1573,7 @@ def _RuntimeTensor(name, dtype='float32', device=_Device()): ...@@ -1567,7 +1573,7 @@ def _RuntimeTensor(name, dtype='float32', device=_Device()):
i.e., the shape is computed by the backend automatically. i.e., the shape is computed by the backend automatically.
""" """
constructor = globals()[mapping.TENSOR_TYPE_TO_TORCH_TENSOR[dtype]] constructor = globals()[_mapping.TENSOR_TYPE_TO_TORCH_TENSOR[dtype]]
return constructor(name=name, device=device) return constructor(name=name, device=device)
...@@ -1578,8 +1584,8 @@ def _ReferenceTensor(src): ...@@ -1578,8 +1584,8 @@ def _ReferenceTensor(src):
i.e., view, squeeze, and unsqueeze. i.e., view, squeeze, and unsqueeze.
""" """
constructor = globals()[mapping.TENSOR_TYPE_TO_TORCH_TENSOR[src.dtype]] constructor = globals()[_mapping.TENSOR_TYPE_TO_TORCH_TENSOR[src.dtype]]
T = constructor(name=TensorPool.get('${REFERENCE}'), device=src.device) T = constructor(name=_get_tensor_pool().get('${REFERENCE}'), device=src.device)
T._ref_objects.append(src) T._ref_objects.append(src)
return T return T
......
...@@ -7,9 +7,9 @@ namespace dragon { ...@@ -7,9 +7,9 @@ namespace dragon {
/*! Default constructor of <GraphBase> */ /*! Default constructor of <GraphBase> */
GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws) GraphBase::GraphBase(const GraphDef& def, Workspace* ws)
: name_(meta_graph.name()), ws_(ws) { : name_(def.name()), ws_(ws) {
for (auto arg : meta_graph.arg()) { for (auto arg : def.arg()) {
CHECK_GT(arg.name().size(), 0); CHECK_GT(arg.name().size(), 0);
CHECK_EQ(args_.count(arg.name()), 0); CHECK_EQ(args_.count(arg.name()), 0);
args_[arg.name()] = arg; args_[arg.name()] = arg;
...@@ -18,7 +18,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws) ...@@ -18,7 +18,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws)
Set<string> known_tensors; Set<string> known_tensors;
// Topo-check for a graph // Topo-check for a graph
for (const auto& op : meta_graph.op()) { for (const auto& op : def.op()) {
// Check inputs // Check inputs
for (const auto& in : op.input()) for (const auto& in : op.input())
CHECK(known_tensors.count(in) || ws_->HasTensor(in)) CHECK(known_tensors.count(in) || ws_->HasTensor(in))
...@@ -30,7 +30,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws) ...@@ -30,7 +30,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws)
// Check for all solving targets // Check for all solving targets
Set<string> objective_targets; Set<string> objective_targets;
for (const auto& target : meta_graph.output()) { for (const auto& target : def.output()) {
CHECK(known_tensors.count(target) || CHECK(known_tensors.count(target) ||
ws_->HasTensor(target)) ws_->HasTensor(target))
<< "\nTarget: " << target << "\nTarget: " << target
...@@ -39,7 +39,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws) ...@@ -39,7 +39,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws)
} }
// Check for all gradients // Check for all gradients
for (const auto& gradient : meta_graph.gradient()) { for (const auto& gradient : def.gradient()) {
const auto& cost = gradient.cost(); const auto& cost = gradient.cost();
const auto& wrt = gradient.wrt(); const auto& wrt = gradient.wrt();
CHECK(known_tensors.count(cost) || ws_->HasTensor(cost)) CHECK(known_tensors.count(cost) || ws_->HasTensor(cost))
...@@ -55,91 +55,23 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws) ...@@ -55,91 +55,23 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws)
} }
} }
/*! Build the update operators from the def */
GraphDef GraphBase::BuildUpdateOps(const GraphDef& input_def) {
OperatorDef collective_op;
collective_op.set_type("CollectiveUpdate");
// Generate Update Ops
vector<OperatorDef> update_ops;
for (const auto& updater : input_def.updater()) {
vector<string> missing_tensors;
for (const auto& tensor : updater.tensor()) {
if (!ws_->HasTensor(tensor)) {
LOG(INFO) << "Missing Tensor: " << tensor;
missing_tensors.push_back(tensor);
}
}
if (missing_tensors.size() == 0) {
vector<Argument> args;
for (const auto& arg : updater.arg()) args.push_back(arg);
OperatorDef op_def = MakeOperatorDef(updater.type(),
updater.name(),
vector<string>({ updater.tensor(1) }), // dX
vector<string>({ updater.tensor(0) })); // X
collective_op.add_input(updater.tensor(1));
collective_op.add_output(updater.tensor(1));
op_def.mutable_arg()->CopyFrom(updater.arg());
update_ops.push_back(op_def);
} else {
LOG(INFO) << "Missing tensors. Skip the update to Tensor("
<< updater.tensor(0) << ")";
}
}
// Generate Collective Ops if necessary
vector<OperatorDef> collective_ops;
if (args_.count("parallel_mode")) {
if (args_["parallel_mode"].s() == "MPI" ||
args_["parallel_mode"].s() == "NCCL") {
OperatorDef op_def;
op_def.CopyFrom(collective_op);
Argument collective_mode;
collective_mode.set_name("mode");
collective_mode.set_s(
args_["parallel_mode"].s() + "_ALLREDUCE");
op_def.add_arg()->CopyFrom(collective_mode);
if (args_.count("comm") &&
args_.count("group") &&
args_.count("root")) {
op_def.add_arg()->CopyFrom(args_["comm"]);
op_def.add_arg()->CopyFrom(args_["group"]);
op_def.add_arg()->CopyFrom(args_["root"]);
} else {
LOG(FATAL) << "MPI was not initialized.";
}
collective_ops.push_back(op_def);
}
}
// Generate graph
GraphDef update_graph(input_def);
update_graph.clear_updater();
for (const auto& op : collective_ops) update_graph.add_op()->CopyFrom(op);
for (const auto& op : update_ops) update_graph.add_op()->CopyFrom(op);
return update_graph;
}
/*! Create a graph from the optimized def */ /*! Create a graph from the optimized def */
bool Graph::Create( bool Graph::Create(const GraphDef& def, Workspace* ws) {
const GraphDef& optimized_graph, bool has_device_option = def.has_device_option();
Workspace* ws) { for (int i = 0; i < def.op_size(); i++) {
bool has_device_option = optimized_graph.has_device_option(); OperatorDef op_def(def.op(i));
for (int i = 0; i < optimized_graph.op_size(); i++) {
OperatorDef op_def(optimized_graph.op(i));
LOG(DEBUG) << "Create Operator " << op_def.name() LOG(DEBUG) << "Create Operator " << op_def.name()
<< ": " << op_def.type(); << ": " << op_def.type();
// Inherit device option if necessary // Inherit device option if necessary
if (!op_def.has_device_option() && has_device_option) if (!op_def.has_device_option() && has_device_option)
op_def.mutable_device_option()->CopyFrom( op_def.mutable_device_option()
optimized_graph.device_option()); ->CopyFrom(def.device_option());
// For the static graph, do recomputing-aware // For the static graph, do recomputing-aware
Argument arg; arg.set_name("allow_recomputing"); Argument arg; arg.set_name("allow_recomputing");
arg.set_i(1); op_def.add_arg()->CopyFrom(arg); arg.set_i(1); op_def.add_arg()->CopyFrom(arg);
// For the last operator, enforce the synchronization // For the last operator, enforce the synchronization
if (i == optimized_graph.op_size() - 1) { if (i == def.op_size() - 1) {
arg.set_name("do_sync"); arg.set_name("do_sync");
arg.set_i(1); op_def.add_arg()->CopyFrom(arg); arg.set_i(1); op_def.add_arg()->CopyFrom(arg);
} }
...@@ -151,53 +83,43 @@ bool Graph::Create( ...@@ -151,53 +83,43 @@ bool Graph::Create(
/*! Default constructor of <Graph> */ /*! Default constructor of <Graph> */
Graph::Graph(const GraphDef& meta_graph, Workspace* ws) Graph::Graph(const GraphDef& def, Workspace* ws)
: GraphBase(meta_graph, ws) { : GraphBase(def, ws) {
GraphDef optimized_graph; // Apply the optimizations
GraphDef opt_def = def;
GraphOptimizer graph_optim(ws);
GraphGradientMaker gradient_maker;
Map< string, vector<int> > subgraph_indices; Map< string, vector<int> > subgraph_indices;
if (meta_graph.updater_size() > 0) { int opt = 3; // defaults: O3
/*! if (this->args_.count("optimization_level"))
* Check if existing any updaters. opt = this->args_["optimization_level"].i();
* if (opt >= 1) opt_def = graph_optim.PruneNodes(def);
* Note that the graph with update ops is not a dag, if (opt >= 2) opt_def = graph_optim.AddInplace(opt_def);
* we should handle them independently. if (opt >= 3) {
*/ if (this->args_["phase"].s() == "TRAIN") {
optimized_graph = this->BuildUpdateOps(meta_graph); opt_def = graph_optim.MirrorStage(
} else { opt_def, subgraph_indices);
int OX = 3; // defaults: O3 opt_def = gradient_maker.Share(opt_def);
if (this->args_.count("optimization_level")) } else {
OX = this->args_["optimization_level"].i(); opt_def = graph_optim.SimulateGC(opt_def);
optimized_graph = meta_graph;
GraphOptimizer optimizer(ws);
GraphGradientMaker gradient_maker;
if (OX >= 1) optimized_graph = optimizer.PruneNodes(meta_graph);
if (OX >= 2) optimized_graph = optimizer.AddInplace(optimized_graph);
if (OX >= 3) {
if (this->args_["phase"].s() == "TRAIN") {
optimized_graph = optimizer.MirrorStage(
optimized_graph, subgraph_indices);
gradient_maker.Share(optimized_graph);
} else {
optimized_graph = optimizer.SimulateGC(optimized_graph);
}
} }
} }
// Try to store the final graph as a tensor for visualization // Try to store the final graph as a tensor for visualization
bool could_be_serialized = true; bool could_be_serialized = true;
for (auto& op : optimized_graph.op()) for (auto& op : opt_def.op())
if (op.type() == "GivenTensorFill") if (op.type() == "GivenTensorFill")
could_be_serialized = false; could_be_serialized = false;
if (could_be_serialized) { if (could_be_serialized) {
auto* T = ws_->CreateTensor( auto* T = ws_->CreateTensor(
"/graph_def/optimized/" + "/graph_def/optimized/" +
meta_graph.name())->Reshape({ 1 }); opt_def.name())->Reshape({ 1 });
T->mutable_data<string, CPUContext>()[0] T->mutable_data<string, CPUContext>()[0]
= optimized_graph.DebugString(); = opt_def.DebugString();
} }
// Create // Create
Create(optimized_graph, ws); Create(opt_def, ws);
// Recomputing-aware // Recomputing-aware
if (subgraph_indices.size() > 0) { if (subgraph_indices.size() > 0) {
......
...@@ -168,7 +168,7 @@ void GraphGradientMaker::Make( ...@@ -168,7 +168,7 @@ void GraphGradientMaker::Make(
OperatorDef generate_op = MakeOperatorDef( OperatorDef generate_op = MakeOperatorDef(
"GradientGenerate", GetOperatorName(), "GradientGenerate", GetOperatorName(),
op_inputs, op_outputs, op_inputs, op_outputs,
vector<Argument>(1, arg_defaults)); vector<Argument>({ arg_defaults}));
if (op.has_device_option()) if (op.has_device_option())
generate_op.mutable_device_option() generate_op.mutable_device_option()
->CopyFrom(op.device_option()); ->CopyFrom(op.device_option());
...@@ -211,25 +211,65 @@ void GraphGradientMaker::Make( ...@@ -211,25 +211,65 @@ void GraphGradientMaker::Make(
} \ } \
*op->mutable_output(ix) = temp_grad;} *op->mutable_output(ix) = temp_grad;}
void GraphGradientMaker::Share(GraphDef& graph) { GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
Set<int> invalid_ops; Set<int> invalid_ops;
Map<string, int> ref_count; Map<string, int> ref_count;
Map< string, pair<int, string> > ssa_map;
// Count the refs for detecting leaf nodes // Count the refs for detecting leaf nodes
for (int i = 0; i < graph.op_size(); ++i) { for (int i = 0; i < input_def.op_size(); ++i) {
const OperatorDef& op = graph.op(i); const OperatorDef& op = input_def.op(i);
// Ignore the non-gradient ops // Ignore the non-gradient ops
if (op.type().find("Gradient") == string::npos) continue; if (op.type().find("Gradient") == string::npos) continue;
if (op.type() == "GradientGather" && if (op.type() == "GradientGather") {
ignore_grads_.count(op.output(0))) { invalid_ops.insert(i);
for (auto& input : op.input()) if (ignore_grads_.count(op.output(0))) {
ignore_grads_.insert(input); for (const auto& input : op.input())
invalid_ops.insert(i); continue; ignore_grads_.insert(input);
continue;
} else {
string head;
for (const auto& input : op.input()) {
if (input != "NULL") {
if (head.empty()) head = input;
ssa_map[input] = { i, head };
}
}
}
} }
for (auto& input : op.input()) for (const auto& input : op.input())
if (input.find("grad") != string::npos) if (input.find("grad") != string::npos)
ref_count[input] += 1; ref_count[input] += 1;
} }
// Decompose the GradientGather in SSA format
GraphDef output_def(input_def); output_def.clear_op();
for (int i = 0; i < input_def.op_size(); ++i) {
if (invalid_ops.count(i)) continue;
const OperatorDef& op = input_def.op(i);
output_def.add_op()->CopyFrom(op);
if (op.type().find("Gradient") == string::npos) continue;
for (const auto& output : op.output()) {
const auto& find_iter = ssa_map.find(output);
if (find_iter != ssa_map.end()) {
const OperatorDef& gather_op =
input_def.op(find_iter->second.first);
OperatorDef acc_op(gather_op);
acc_op.clear_input();
if (output != find_iter->second.second) {
acc_op.set_type("GradientAdd");
// Fake a inplace to avoid a new buffer
acc_op.add_input(gather_op.output(0));
const auto& ref_iter = ref_count.find(
gather_op.output(0));
if (ref_iter != ref_count.end())
ref_iter->second++;
}
acc_op.add_input(output);
output_def.add_op()->CopyFrom(acc_op);
}
}
}
// Prepare the Gradients Pool // Prepare the Gradients Pool
int temporary_idx = 0; int temporary_idx = 0;
Map<string, string> temporary_grads; Map<string, string> temporary_grads;
...@@ -240,7 +280,7 @@ void GraphGradientMaker::Share(GraphDef& graph) { ...@@ -240,7 +280,7 @@ void GraphGradientMaker::Share(GraphDef& graph) {
std::to_string(temporary_idx++); std::to_string(temporary_idx++);
} else { } else {
/*! /*!
* *LIFO* is more memory efficent than *FIFO* usually, * LIFO is more memory efficent than FIFO usually,
* Because the larger gradients will bring out later. * Because the larger gradients will bring out later.
* *
* Memory distribution turns out to be uniform, * Memory distribution turns out to be uniform,
...@@ -252,12 +292,10 @@ void GraphGradientMaker::Share(GraphDef& graph) { ...@@ -252,12 +292,10 @@ void GraphGradientMaker::Share(GraphDef& graph) {
} }
}; };
for (int i = 0; i < graph.op_size(); ++i) { for (int i = 0; i < output_def.op_size(); ++i) {
OperatorDef* op = graph.mutable_op(i); OperatorDef* op = output_def.mutable_op(i);
// Ignore the non-gradient ops // Ignore the non-gradient ops
if (op->type().find("Gradient") == string::npos) continue; if (op->type().find("Gradient") == string::npos) continue;
// Ignore the invalid ops
if (invalid_ops.count(i)) { op->mutable_type()->clear(); continue; }
// GC to store the grads that have finished lifecycle // GC to store the grads that have finished lifecycle
vector<string> GC; vector<string> GC;
// Inplace-aware // Inplace-aware
...@@ -284,9 +322,12 @@ void GraphGradientMaker::Share(GraphDef& graph) { ...@@ -284,9 +322,12 @@ void GraphGradientMaker::Share(GraphDef& graph) {
// Determine the scanning order // Determine the scanning order
bool left = true; bool left = true;
static Set<string> ROrderOps = { static Set<string> ROrderOps = {
"ConcatGradient", "StackGradient", "RAddGradient",
"RAddGradient", "RSubGradient", "RSubGradient",
"RMulGradient", "RDivGradient", "RMulGradient",
"RDivGradient",
"StackGradient",
"ConcatGradient",
}; };
if (ROrderOps.count(op->type())) left = false; if (ROrderOps.count(op->type())) left = false;
// Check output grads, left order // Check output grads, left order
...@@ -296,6 +337,7 @@ void GraphGradientMaker::Share(GraphDef& graph) { ...@@ -296,6 +337,7 @@ void GraphGradientMaker::Share(GraphDef& graph) {
// Update the pool from GC // Update the pool from GC
for (auto& e : GC) grads_pool.emplace_back(e); for (auto& e : GC) grads_pool.emplace_back(e);
} }
return output_def;
} }
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -16,7 +16,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -16,7 +16,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
const OperatorDef& op = input_def.op(i); const OperatorDef& op = input_def.op(i);
for (const auto& v : op.output()) { for (const auto& v : op.output()) {
vector<string> sp_u; vector<string> sp_u;
if (!op.input_size()) sp_u.resize(op.output_size(), ""); if (!op.input_size()) sp_u.resize(op.output_size());
else sp_u.assign(op.input().begin(), op.input().end()); else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) { for (const auto& u : sp_u) {
if (u == "NULL") continue; if (u == "NULL") continue;
...@@ -55,7 +55,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -55,7 +55,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
// Remove the tensors that can not be produced(redundant) // Remove the tensors that can not be produced(redundant)
Set<string> outputs; Set<string> outputs;
// Check if having feeded tensors // Check if having feeded tensors
for (const auto& e : ws_->GetTensors()) outputs.insert(e); for (const auto& e : ws_->tensors()) outputs.insert(e);
// Note that we use map to keep topo-order // Note that we use map to keep topo-order
map<int, OperatorDef> final_sequence; map<int, OperatorDef> final_sequence;
...@@ -114,7 +114,7 @@ GraphDef GraphOptimizer::AddInplace(const GraphDef& input_def) { ...@@ -114,7 +114,7 @@ GraphDef GraphOptimizer::AddInplace(const GraphDef& input_def) {
const OperatorDef& op = input_def.op(i); const OperatorDef& op = input_def.op(i);
for (const auto& v : op.output()) { for (const auto& v : op.output()) {
vector<string> sp_u; vector<string> sp_u;
if (!op.input_size()) sp_u.resize(op.output_size(), ""); if (!op.input_size()) sp_u.resize(op.output_size());
else sp_u.assign(op.input().begin(), op.input().end()); else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) { for (const auto& u : sp_u) {
if (u == "NULL") continue; if (u == "NULL") continue;
...@@ -224,7 +224,6 @@ GraphDef GraphOptimizer::MirrorStage( ...@@ -224,7 +224,6 @@ GraphDef GraphOptimizer::MirrorStage(
} }
CHECK(!v2_name.empty()) << "\nNo enough buffers for outputs."; CHECK(!v2_name.empty()) << "\nNo enough buffers for outputs.";
ws_->CreateTensor(v2_name)->set_version(0); ws_->CreateTensor(v2_name)->set_version(0);
if (!versions.count(v2_name)) versions[v2_name] = 0;
version_name = "/ver:" + std::to_string(versions[v2_name]++); version_name = "/ver:" + std::to_string(versions[v2_name]++);
*op_v2->mutable_output(j) = rename_map[op.output(j)] = *op_v2->mutable_output(j) = rename_map[op.output(j)] =
v2_name + version_name; v2_name + version_name;
...@@ -248,8 +247,6 @@ GraphDef GraphOptimizer::MirrorStage( ...@@ -248,8 +247,6 @@ GraphDef GraphOptimizer::MirrorStage(
set<int> minimum_ops = {i}; set<int> minimum_ops = {i};
for (int j = 0; j < input_op.input_size(); ++j) { for (int j = 0; j < input_op.input_size(); ++j) {
if (input_op.input(j) != output_op.input(j)) { if (input_op.input(j) != output_op.input(j)) {
if (!fake_op_indices.count(input_op.input(j)))
fake_op_indices[input_op.input(j)] = set<int>();
for (auto idx : fake_op_indices[input_op.input(j)]) for (auto idx : fake_op_indices[input_op.input(j)])
minimum_ops.insert(idx); minimum_ops.insert(idx);
} }
...@@ -262,7 +259,6 @@ GraphDef GraphOptimizer::MirrorStage( ...@@ -262,7 +259,6 @@ GraphDef GraphOptimizer::MirrorStage(
// Bind to the renamed tensors // Bind to the renamed tensors
for (const auto& it : rename_map) { for (const auto& it : rename_map) {
op_indices[it.second] = vector<int>();
for (auto op_idx : fake_op_indices[it.first]) for (auto op_idx : fake_op_indices[it.first])
op_indices[it.second].push_back(op_idx); op_indices[it.second].push_back(op_idx);
} }
......
...@@ -6,7 +6,7 @@ namespace dragon { ...@@ -6,7 +6,7 @@ namespace dragon {
/*! Create some internal tensors */ /*! Create some internal tensors */
void Workspace::InitWorkspace() { void Workspace::Initialize() {
CreateTensor("NULL"); CreateTensor("NULL");
Tensor* recomputing_flag = CreateTensor( Tensor* recomputing_flag = CreateTensor(
"/opt/recomputing_flag")->Reshape({ 1 }); "/opt/recomputing_flag")->Reshape({ 1 });
...@@ -14,21 +14,18 @@ void Workspace::InitWorkspace() { ...@@ -14,21 +14,18 @@ void Workspace::InitWorkspace() {
<bool, CPUContext>()[0] = false; <bool, CPUContext>()[0] = false;
} }
/*! Move a external workspace into this workspace */ /*! Destory all the tensors */
Workspace* Workspace::Move(Workspace* ws) { void Workspace::Clear() {
CHECK(ws) << "The given Workspace is invalid."; // Remove and Initialize again
if (workspace_map_.count(ws->name())) tensor_map_.clear(); Initialize();
return workspace_map_[ws->name()];
return workspace_map_[ws->name()] = ws;
} }
/*! Destory all the tensors */ /*! Merge from a external workspace */
void Workspace::Clear() { void Workspace::MergeFrom(Workspace* ws) {
// Clear tensors, then re-initialization CHECK(ws) << "\nThe given Workspace is invalid.";
for (auto& kv : tensor_map_) kv.second->Reset(); remote_workspaces_.emplace_back(ws);
InitWorkspace();
} }
/*! Query the real name of specified tensor */ /*! Query the real name of specified tensor */
...@@ -53,9 +50,9 @@ Tensor* Workspace::TryGetTensor( ...@@ -53,9 +50,9 @@ Tensor* Workspace::TryGetTensor(
if (use_remote) { if (use_remote) {
// Search the remote workspaces // Search the remote workspaces
for (auto& it : workspace_map_) { for (auto* ws : remote_workspaces_) {
if (it.second->HasTensor(query)) if (ws->HasTensor(query))
return it.second->GetTensor(query); return ws->GetTensor(query);
} }
} }
return nullptr; return nullptr;
...@@ -66,7 +63,8 @@ Tensor* Workspace::TryGetTensor( ...@@ -66,7 +63,8 @@ Tensor* Workspace::TryGetTensor(
Tensor* Workspace::CreateTensor(const string& name) { Tensor* Workspace::CreateTensor(const string& name) {
Tensor* tensor = TryGetTensor(name); Tensor* tensor = TryGetTensor(name);
if (!tensor) { if (!tensor) {
tensor_map_[name] = unique_ptr<Tensor>(new Tensor(name)); tensor_map_[name] = unique_ptr
<Tensor>(new Tensor(name));
return tensor_map_[name].get(); return tensor_map_[name].get();
} }
return tensor; return tensor;
...@@ -78,8 +76,8 @@ Tensor* Workspace::GetTensor( ...@@ -78,8 +76,8 @@ Tensor* Workspace::GetTensor(
const string& name, const string& name,
bool use_remote) const { bool use_remote) const {
Tensor* tensor = TryGetTensor(name, use_remote); Tensor* tensor = TryGetTensor(name, use_remote);
CHECK(tensor) << "\nTensor(" << name << ") does not exist " CHECK(tensor) << "\nTensor(" << name << ") does not "
<< "in current workspace or sub-workspace."; << "exist in current workspace.";
return tensor; return tensor;
} }
...@@ -88,22 +86,23 @@ Tensor* Workspace::GetTensor( ...@@ -88,22 +86,23 @@ Tensor* Workspace::GetTensor(
void Workspace::ResetTensor(const string& name) { void Workspace::ResetTensor(const string& name) {
Tensor* tensor = TryGetTensor(name, false); Tensor* tensor = TryGetTensor(name, false);
CHECK(tensor) << "\nTensor(" << name << ") does not " CHECK(tensor) << "\nTensor(" << name << ") does not "
<< "belong to current workspace, could not be reset."; << "belong to current workspace.";
tensor->Reset(); tensor->Reset();
} }
/*! Return all the stored tensor names */ /*! Return the name of stored tensors */
vector<string> Workspace::GetTensors() const { vector<string> Workspace::tensors() const {
vector<string> locals; vector<string> locals;
// Search the local workspace // Search the local workspace
for (const auto& it : tensor_map_) for (const auto& it : tensor_map_)
locals.push_back(it.first); locals.push_back(it.first);
// Serach the remote workspaces // Serach the remote workspaces
for (const auto& it : workspace_map_) { for (auto* ws : remote_workspaces_) {
vector<string> remotes = it.second->GetTensors(); vector<string> remotes = ws->tensors();
locals.insert(locals.end(), remotes.begin(), remotes.end()); locals.insert(locals.end(),
remotes.begin(), remotes.end());
} }
return locals; return locals;
} }
...@@ -118,14 +117,14 @@ bool Workspace::HasFiller( ...@@ -118,14 +117,14 @@ bool Workspace::HasFiller(
if (!use_remote) return result; if (!use_remote) return result;
// Search the remote workspaces // Search the remote workspaces
for (auto& it : workspace_map_) for (auto* ws : remote_workspaces_)
result |= it.second->HasFiller(name); result |= ws->HasFiller(name);
return result; return result;
} }
/*! Create the specified filler */ /*! Create the specified filler */
void Workspace::CreateFiller( void Workspace::CreateFiller(
const TensorFillerProto filler) { const TensorFillerProto& filler) {
CHECK_GT(filler.tensor().size(), 0) CHECK_GT(filler.tensor().size(), 0)
<< "\nTensor with an empty name can not be filled."; << "\nTensor with an empty name can not be filled.";
if (HasFiller(filler.tensor())) return; if (HasFiller(filler.tensor())) return;
...@@ -141,9 +140,9 @@ const TensorFillerProto* Workspace::GetFiller( ...@@ -141,9 +140,9 @@ const TensorFillerProto* Workspace::GetFiller(
if (it != tensor_filler_map_.end()) return &it->second; if (it != tensor_filler_map_.end()) return &it->second;
// Search the remote workspaces // Search the remote workspaces
for (const auto& it : workspace_map_) { for (auto* ws : remote_workspaces_) {
if (it.second->HasFiller(name)) if (ws->HasFiller(name))
return it.second->GetFiller(name); return ws->GetFiller(name);
} }
return nullptr; return nullptr;
} }
...@@ -153,7 +152,6 @@ const TensorFillerProto* Workspace::GetFiller( ...@@ -153,7 +152,6 @@ const TensorFillerProto* Workspace::GetFiller(
OperatorBase* Workspace::CreateOperator(const OperatorDef& def) { OperatorBase* Workspace::CreateOperator(const OperatorDef& def) {
const auto& it = operator_map_.find(def.uid()); const auto& it = operator_map_.find(def.uid());
if (it == operator_map_.end()) { if (it == operator_map_.end()) {
for (auto& input : def.input()) CreateTensor(input);
auto* new_op = NewOperator(def, this); auto* new_op = NewOperator(def, this);
operator_map_[def.uid()] = unique_ptr< operator_map_[def.uid()] = unique_ptr<
OperatorBase>(new_op); return new_op; OperatorBase>(new_op); return new_op;
...@@ -209,9 +207,9 @@ void Workspace::RunGraph( ...@@ -209,9 +207,9 @@ void Workspace::RunGraph(
graph_map_[graph_name]->Run(include, exclude, stream_id); graph_map_[graph_name]->Run(include, exclude, stream_id);
} }
/*! Return all the stored graph names */ /*! Return the name of stored graphs */
vector<string> Workspace::GetGraphs() const { vector<string> Workspace::graphs() const {
vector<string> names; vector<string> names;
for (const auto& it : graph_map_) { for (const auto& it : graph_map_) {
names.push_back(it.first); names.push_back(it.first);
...@@ -237,17 +235,20 @@ string Workspace::GetDummyName( ...@@ -237,17 +235,20 @@ string Workspace::GetDummyName(
const string& suffix, const string& suffix,
const string& domain, const string& domain,
const bool zero_based) { const bool zero_based) {
string required_name = base_name + suffix; string accepted_name; int64_t index;
if (dummy_name_map_.count(domain) == 0) { const auto required_name = base_name + suffix;
dummy_name_map_[domain] = Map<string, int64_t>(); auto& dmap = dummy_name_map_[domain];
while (1) {
index = dmap[required_name]++;
accepted_name = index ? base_name + "_" +
std::to_string(index) + suffix :
zero_based ? required_name :
base_name + "_" + std::to_string(
dmap[required_name]++) + suffix;
if (remote_workspaces_.empty()) break;
if (!HasTensor(accepted_name)) break;
} }
auto& map_this_domain = dummy_name_map_[domain]; return accepted_name;
int64_t index = map_this_domain[required_name]++;
return index ? base_name + "_" +
std::to_string(index) + suffix :
zero_based ? required_name :
base_name + "_" + std::to_string(
map_this_domain[required_name]++) + suffix;
} }
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -117,19 +117,21 @@ template<> void PReluWGrad<float, CPUContext>( ...@@ -117,19 +117,21 @@ template<> void PReluWGrad<float, CPUContext>(
} }
} }
if (channel_shared) { if (channel_shared) {
math::Dot<float, CPUContext>(channels * dim, math::Dot(channels * dim,
bcast_dw, multiplier, dw, ctx); bcast_dw, multiplier, dw, ctx);
} else { } else {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemv<float, CPUContext>( math::Gemv(
CblasNoTrans, channels, dim, CblasNoTrans,
1.f, bcast_dw, multiplier, channels, dim,
0.f, dw, ctx); 1.f, bcast_dw, multiplier,
0.f, dw, ctx);
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemv<float, CPUContext>( math::Gemv(
CblasTrans, dim, channels, CblasTrans,
1.f, bcast_dw, multiplier, dim, channels,
0.f, dw, ctx); 1.f, bcast_dw, multiplier,
0.f, dw, ctx);
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
} }
} }
......
...@@ -204,19 +204,21 @@ template<> void PReluWGrad<float, CUDAContext>( ...@@ -204,19 +204,21 @@ template<> void PReluWGrad<float, CUDAContext>(
0, ctx->cuda_stream() >> > 0, ctx->cuda_stream() >> >
(cdim, rows, row_offset, dy, x, bcast_dw); (cdim, rows, row_offset, dy, x, bcast_dw);
if (channel_shared) { if (channel_shared) {
math::Dot<float, CUDAContext>(channels * dim, math::Dot(channels * dim,
bcast_dw, multiplier, dw, ctx); bcast_dw, multiplier, dw, ctx);
} else { } else {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemv<float, CUDAContext>( math::Gemv(
CblasNoTrans, channels, dim, CblasNoTrans,
1.f, bcast_dw, multiplier, channels, dim,
0.f, dw, ctx); 1.f, bcast_dw, multiplier,
0.f, dw, ctx);
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemv<float, CUDAContext>( math::Gemv<float, CUDAContext>(
CblasTrans, dim, channels, CblasTrans,
1.f, bcast_dw, multiplier, dim, channels,
0.f, dw, ctx); 1.f, bcast_dw, multiplier,
0.f, dw, ctx);
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
} }
} }
......
...@@ -28,17 +28,20 @@ template<> void Softmax<float, CPUContext>( ...@@ -28,17 +28,20 @@ template<> void Softmax<float, CPUContext>(
scale[k], x[i * dim + j * inner_dim + k] scale[k], x[i * dim + j * inner_dim + k]
); );
} }
math::Gemm<float, CPUContext>( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
classes, inner_dim, 1, CblasNoTrans,
-1.f, sum_multiplier, scale, 1.f, y, ctx); classes, inner_dim, 1,
math::Exp<float, CPUContext>(dim, y, y, ctx); -1.f, sum_multiplier, scale,
math::Gemv<float, CPUContext>( 1.f, y, ctx);
CblasTrans, classes, inner_dim, math::Exp(dim, y, y, ctx);
1.f, y, sum_multiplier, math::Gemv(
0.f, scale, ctx); CblasTrans,
classes, inner_dim,
1.f, y, sum_multiplier,
0.f, scale, ctx);
for (int j = 0; j < classes; ++j) { for (int j = 0; j < classes; ++j) {
math::Div<float, CPUContext>(inner_dim, y, scale, y, ctx); math::Div(inner_dim, y, scale, y, ctx);
y += inner_dim; y += inner_dim;
} }
} }
......
...@@ -8,7 +8,6 @@ namespace kernel { ...@@ -8,7 +8,6 @@ namespace kernel {
/*! BiasAdd <T = float32, Device = CPU> */ /*! BiasAdd <T = float32, Device = CPU> */
template<> void BiasAdd<float, CPUContext>( template<> void BiasAdd<float, CPUContext>(
const int count,
const int outer_dim, const int outer_dim,
const int dim, const int dim,
const int inner_dim, const int inner_dim,
......
...@@ -11,38 +11,37 @@ namespace kernel { ...@@ -11,38 +11,37 @@ namespace kernel {
template <typename T> template <typename T>
__global__ void _BiasAdd_NCHW( __global__ void _BiasAdd_NCHW(
const int count, const int nthreads,
const int dim, const int dim,
const int inner_dim, const int inner_dim,
const T* bias, const T* bias,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350 #if __CUDA_ARCH__ >= 350
y[idx] += __ldg(bias + ((idx / inner_dim) % dim)); y[i] += __ldg(bias + ((i / inner_dim) % dim));
#else #else
y[idx] += bias[(idx / inner_dim) % dim]; y[i] += bias[(i / inner_dim) % dim];
#endif #endif
} }
} }
template <typename T> template <typename T>
__global__ void _BiasAdd_NHWC( __global__ void _BiasAdd_NHWC(
const int count, const int nthreads,
const int dim, const int dim,
const int inner_dim, const int inner_dim,
const T* bias, const T* bias,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350 #if __CUDA_ARCH__ >= 350
y[idx] += __ldg(bias + (idx % dim)); y[i] += __ldg(bias + (i % dim));
#else #else
y[idx] += bias[idx % dim]; y[i] += bias[i % dim];
#endif #endif
} }
} }
template<> void BiasAdd<float, CUDAContext>( template<> void BiasAdd<float, CUDAContext>(
const int count,
const int outer_dim, const int outer_dim,
const int dim, const int dim,
const int inner_dim, const int inner_dim,
...@@ -51,16 +50,17 @@ template<> void BiasAdd<float, CUDAContext>( ...@@ -51,16 +50,17 @@ template<> void BiasAdd<float, CUDAContext>(
const float* bias_multiplier, const float* bias_multiplier,
float* y, float* y,
CUDAContext* ctx) { CUDAContext* ctx) {
auto nthreads = outer_dim * dim * inner_dim;
if (data_format == "NCHW") { if (data_format == "NCHW") {
_BiasAdd_NCHW<float> _BiasAdd_NCHW<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(nthreads), CUDA_THREADS,
0, ctx->cuda_stream() >> > 0, ctx->cuda_stream() >> >
(count, dim, inner_dim, bias, y); (nthreads, dim, inner_dim, bias, y);
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
_BiasAdd_NHWC<float> _BiasAdd_NHWC<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(nthreads), CUDA_THREADS,
0, ctx->cuda_stream() >> > 0, ctx->cuda_stream() >> >
(count, dim, inner_dim, bias, y); (nthreads, dim, inner_dim, bias, y);
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
} }
......
...@@ -51,9 +51,9 @@ void CuDNNDropoutOp<Context>::RunWithType() { ...@@ -51,9 +51,9 @@ void CuDNNDropoutOp<Context>::RunWithType() {
auto* Rdata = mask->template mutable_data<uint8_t, Context>(); auto* Rdata = mask->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnDropoutForward( CUDNN_CHECK(cudnnDropoutForward(
ctx()->cudnn_handle(), dropout_desc, ctx()->cudnn_handle(), dropout_desc,
input_desc, Xdata, input_desc, Xdata,
input_desc, Ydata, input_desc, Ydata,
Rdata, reserve_space_size)); Rdata, reserve_space_size));
} else LOG(FATAL) << "Incorrect Op phase: " << phase(); } else LOG(FATAL) << "Incorrect Op phase: " << phase();
} }
...@@ -102,9 +102,9 @@ void CuDNNDropoutGradientOp<Context>::RunWithType() { ...@@ -102,9 +102,9 @@ void CuDNNDropoutGradientOp<Context>::RunWithType() {
auto* Rdata = mask->template mutable_data<uint8_t, Context>(); auto* Rdata = mask->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnDropoutBackward( CUDNN_CHECK(cudnnDropoutBackward(
ctx()->cudnn_handle(), dropout_desc, ctx()->cudnn_handle(), dropout_desc,
input_desc, dYdata, input_desc, dYdata,
input_desc, dXdata, input_desc, dXdata,
Rdata, reserve_space_size)); Rdata, reserve_space_size));
} else LOG(FATAL) << "Incorrect Op phase: " << phase(); } else LOG(FATAL) << "Incorrect Op phase: " << phase();
} }
......
...@@ -15,8 +15,8 @@ void CuDNNEluOp<Context>::RunWithType() { ...@@ -15,8 +15,8 @@ void CuDNNEluOp<Context>::RunWithType() {
CUDNN_CHECK(cudnnActivationForward( CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
} }
template <class Context> template <class Context>
...@@ -40,9 +40,9 @@ void CuDNNEluGradientOp<Context>::RunWithType() { ...@@ -40,9 +40,9 @@ void CuDNNEluGradientOp<Context>::RunWithType() {
CUDNN_CHECK(cudnnActivationBackward( CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
} }
template <class Context> template <class Context>
......
...@@ -14,13 +14,13 @@ void CuDNNReluOp<Context>::RunWithType() { ...@@ -14,13 +14,13 @@ void CuDNNReluOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward( CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
#else #else
CUDNN_CHECK(cudnnActivationForward_v4( CUDNN_CHECK(cudnnActivationForward_v4(
ctx.cudnn_handle(), act_desc, ctx.cudnn_handle(), act_desc,
CUDNNType<Dtype>::one, input_desc, Xdata, CUDNNType<Dtype>::one, input_desc, Xdata,
CUDNNType<Dtype>::zero, output_desc, Ydata)); CUDNNType<Dtype>::zero, output_desc, Ydata));
#endif #endif
} }
...@@ -48,15 +48,15 @@ void CuDNNReluGradientOp<Context>::RunWithType() { ...@@ -48,15 +48,15 @@ void CuDNNReluGradientOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward( CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#else #else
CUDNN_CHECK(cudnnActivationBackward_v4( CUDNN_CHECK(cudnnActivationBackward_v4(
cudnn_handle(), act_desc, cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#endif #endif
} }
......
...@@ -14,13 +14,13 @@ void CuDNNSigmoidOp<Context>::RunWithType() { ...@@ -14,13 +14,13 @@ void CuDNNSigmoidOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward( CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
#else #else
CUDNN_CHECK(cudnnActivationForward_v4( CUDNN_CHECK(cudnnActivationForward_v4(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<Dtype>::one, input_desc, Xdata, CUDNNType<Dtype>::one, input_desc, Xdata,
CUDNNType<Dtype>::zero, output_desc, Ydata)); CUDNNType<Dtype>::zero, output_desc, Ydata));
#endif #endif
} }
...@@ -46,15 +46,15 @@ void CuDNNSigmoidGradientOp<Context>::RunWithType() { ...@@ -46,15 +46,15 @@ void CuDNNSigmoidGradientOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward( CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#else #else
CUDNN_CHECK(cudnnActivationBackward_v4( CUDNN_CHECK(cudnnActivationBackward_v4(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#endif #endif
} }
......
...@@ -21,10 +21,12 @@ void CuDNNSoftmaxOp<Context>::RunWithType() { ...@@ -21,10 +21,12 @@ void CuDNNSoftmaxOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxForward(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnSoftmaxForward(
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, ctx()->cudnn_handle(),
CUDNNType<T>::one, input_desc, Xdata, CUDNN_SOFTMAX_ACCURATE,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata));
} }
template <class Context> template <class Context>
...@@ -52,10 +54,12 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() { ...@@ -52,10 +54,12 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* Ydata = Input(0).template data<T, Context>(); auto* Ydata = Input(0).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxBackward(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnSoftmaxBackward(
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, ctx()->cudnn_handle(),
CUDNNType<T>::one, input_desc, Ydata, input_desc, dYdata, CUDNN_SOFTMAX_ACCURATE,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Ydata, input_desc, dYdata,
CUDNNType<T>::zero, output_desc, dXdata));
} }
template <class Context> template <class Context>
......
...@@ -14,13 +14,13 @@ void CuDNNTanhOp<Context>::RunWithType() { ...@@ -14,13 +14,13 @@ void CuDNNTanhOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationForward( CUDNN_CHECK(cudnnActivationForward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
#else #else
CUDNN_CHECK(cudnnActivationForward_v4( CUDNN_CHECK(cudnnActivationForward_v4(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<Dtype>::one, input_desc, Xdata, CUDNNType<Dtype>::one, input_desc, Xdata,
CUDNNType<Dtype>::zero, output_desc, Ydata)); CUDNNType<Dtype>::zero, output_desc, Ydata));
#endif #endif
} }
...@@ -46,15 +46,15 @@ void CuDNNTanhGradientOp<Context>::RunWithType() { ...@@ -46,15 +46,15 @@ void CuDNNTanhGradientOp<Context>::RunWithType() {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnActivationBackward( CUDNN_CHECK(cudnnActivationBackward(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#else #else
CUDNN_CHECK(cudnnActivationBackward_v4( CUDNN_CHECK(cudnnActivationBackward_v4(
ctx()->cudnn_handle(), act_desc, ctx()->cudnn_handle(), act_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Ydata, input_desc, dYdata, output_desc, Ydata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
#endif #endif
} }
......
...@@ -107,16 +107,18 @@ void AffineGradientOp<Context>::ComputeScaleGradient( ...@@ -107,16 +107,18 @@ void AffineGradientOp<Context>::ComputeScaleGradient(
dA : ws()->template caches<T, Context>( dA : ws()->template caches<T, Context>(
{ outer_dim * scale_dim })[0]; { outer_dim * scale_dim })[0];
math::Gemv( math::Gemv(
CblasNoTrans, outer_dim * scale_dim, inner_dim, CblasNoTrans,
1.f, dYxX, multiplier, outer_dim * scale_dim, inner_dim,
0.f, SRes_data, ctx()); 1.f, dYxX, multiplier,
0.f, SRes_data, ctx());
} }
// Reduce outer dimensions // Reduce outer dimensions
if (outer_dim != 1) { if (outer_dim != 1) {
math::Gemv( math::Gemv(
CblasTrans, outer_dim, scale_dim, CblasTrans,
1.f, SRes_data, multiplier, outer_dim, scale_dim,
0.f, dA, ctx()); 1.f, SRes_data, multiplier,
0.f, dA, ctx());
} }
} }
......
...@@ -49,12 +49,12 @@ void CuDNNAffineOp<Context>::RunWithType() { ...@@ -49,12 +49,12 @@ void CuDNNAffineOp<Context>::RunWithType() {
// Y = alpha * X // Y = alpha * X
CUDNN_CHECK(cudnnSetOpTensorDescriptor( CUDNN_CHECK(cudnnSetOpTensorDescriptor(
mul_desc, CUDNN_OP_TENSOR_MUL, mul_desc, CUDNN_OP_TENSOR_MUL,
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN)); CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN));
CUDNN_CHECK(cudnnOpTensor( CUDNN_CHECK(cudnnOpTensor(
ctx()->cudnn_handle(), mul_desc, ctx()->cudnn_handle(), mul_desc,
CUDNNType<DT>::one, input_desc, Xdata, CUDNNType<DT>::one, input_desc, Xdata,
CUDNNType<DT>::one, param_desc, Adata, CUDNNType<DT>::one, param_desc, Adata,
CUDNNType<DT>::zero, input_desc, Ydata)); CUDNNType<DT>::zero, input_desc, Ydata));
// Y += beta // Y += beta
if (InputSize() > 2) { if (InputSize() > 2) {
...@@ -62,12 +62,12 @@ void CuDNNAffineOp<Context>::RunWithType() { ...@@ -62,12 +62,12 @@ void CuDNNAffineOp<Context>::RunWithType() {
auto* Bdata = Input(2).template data<DT, Context>(); auto* Bdata = Input(2).template data<DT, Context>();
CUDNN_CHECK(cudnnSetOpTensorDescriptor( CUDNN_CHECK(cudnnSetOpTensorDescriptor(
add_desc, CUDNN_OP_TENSOR_ADD, add_desc, CUDNN_OP_TENSOR_ADD,
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN)); CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN));
CUDNN_CHECK(cudnnOpTensor( CUDNN_CHECK(cudnnOpTensor(
ctx()->cudnn_handle(), add_desc, ctx()->cudnn_handle(), add_desc,
CUDNNType<DT>::one, input_desc, Ydata, CUDNNType<DT>::one, input_desc, Ydata,
CUDNNType<DT>::one, param_desc, Bdata, CUDNNType<DT>::one, param_desc, Bdata,
CUDNNType<DT>::zero, input_desc, Ydata)); CUDNNType<DT>::zero, input_desc, Ydata));
} }
} }
...@@ -98,7 +98,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() { ...@@ -98,7 +98,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
CUDNN_CHECK(cudnnSetOpTensorDescriptor( CUDNN_CHECK(cudnnSetOpTensorDescriptor(
mul_desc, CUDNN_OP_TENSOR_MUL, mul_desc, CUDNN_OP_TENSOR_MUL,
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN)); CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN));
// dA = X * dY // dA = X * dY
if (Output(1)->name() != "NULL") { if (Output(1)->name() != "NULL") {
...@@ -139,9 +139,9 @@ void CuDNNAffineGradientOp<Context>::RunWithType() { ...@@ -139,9 +139,9 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
CUDNN_CHECK(cudnnOpTensor( CUDNN_CHECK(cudnnOpTensor(
ctx()->cudnn_handle(), mul_desc, ctx()->cudnn_handle(), mul_desc,
CUDNNType<DT>::one, input_desc, dYdata, CUDNNType<DT>::one, input_desc, dYdata,
CUDNNType<DT>::one, param_desc, Adata, CUDNNType<DT>::one, param_desc, Adata,
CUDNNType<DT>::zero, input_desc, dXdata)); CUDNNType<DT>::zero, input_desc, dXdata));
} }
} }
...@@ -152,8 +152,9 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient( ...@@ -152,8 +152,9 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient(
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetReduceTensorDescriptor( CUDNN_CHECK(cudnnSetReduceTensorDescriptor(
reduce_desc, CUDNN_REDUCE_TENSOR_ADD, reduce_desc, CUDNN_REDUCE_TENSOR_ADD,
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN, CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN,
CUDNN_REDUCE_TENSOR_NO_INDICES, CUDNN_32BIT_INDICES)); CUDNN_REDUCE_TENSOR_NO_INDICES,
CUDNN_32BIT_INDICES));
size_t workspace_size = 0; size_t workspace_size = 0;
CUDNN_CHECK(cudnnGetReductionWorkspaceSize( CUDNN_CHECK(cudnnGetReductionWorkspaceSize(
ctx()->cudnn_handle(), reduce_desc, ctx()->cudnn_handle(), reduce_desc,
...@@ -161,9 +162,9 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient( ...@@ -161,9 +162,9 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient(
auto* WSdata = ws()->template caches<Context>({ workspace_size })[0];; auto* WSdata = ws()->template caches<Context>({ workspace_size })[0];;
CUDNN_CHECK(cudnnReduceTensor( CUDNN_CHECK(cudnnReduceTensor(
ctx()->cudnn_handle(), reduce_desc, ctx()->cudnn_handle(), reduce_desc,
nullptr, 0, WSdata, workspace_size, nullptr, 0, WSdata, workspace_size,
CUDNNType<DT>::one, input_desc, dYxX, CUDNNType<DT>::one, input_desc, dYxX,
CUDNNType<DT>::zero, param_desc, dA)); CUDNNType<DT>::zero, param_desc, dA));
#endif #endif
} }
...@@ -181,16 +182,18 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient_v2( ...@@ -181,16 +182,18 @@ void CuDNNAffineGradientOp<Context>::ComputeScaleGradient_v2(
dA : ws()->template caches<T, Context>( dA : ws()->template caches<T, Context>(
{ outer_dim * scale_dim })[0]; { outer_dim * scale_dim })[0];
math::Gemv( math::Gemv(
CblasNoTrans, outer_dim * scale_dim, inner_dim, CblasNoTrans,
1.f, dYxX, multiplier, outer_dim * scale_dim, inner_dim,
0.f, SRes_data, ctx()); 1.f, dYxX, multiplier,
0.f, SRes_data, ctx());
} }
// Reduce outer dimensions // Reduce outer dimensions
if (outer_dim != 1) { if (outer_dim != 1) {
math::Gemv( math::Gemv(
CblasTrans, outer_dim, scale_dim, CblasTrans,
1.f, SRes_data, multiplier, outer_dim, scale_dim,
0.f, dA, ctx()); 1.f, SRes_data, multiplier,
0.f, dA, ctx());
} }
} }
......
...@@ -65,8 +65,10 @@ void DotOp<Context>::GemvRunWithType() { ...@@ -65,8 +65,10 @@ void DotOp<Context>::GemvRunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Gemv( math::Gemv(
transA ? CblasTrans : CblasNoTrans, M1, N1, transA ? CblasTrans : CblasNoTrans,
1.f, X1data, X2data, 0.f, Ydata, ctx()); M1, N1,
1.f, X1data, X2data,
0.f, Ydata, ctx());
} }
template <class Context> template <class Context>
...@@ -149,16 +151,18 @@ void DotGradientOp<Context>::GemmRunWithType() { ...@@ -149,16 +151,18 @@ void DotGradientOp<Context>::GemmRunWithType() {
auto* dX1data = Output(0)->template mutable_data<T, Context>(); auto* dX1data = Output(0)->template mutable_data<T, Context>();
if (transA) { if (transA) {
math::Gemm( math::Gemm(
transB ? CblasTrans : CblasNoTrans, CblasTrans, transB ? CblasTrans : CblasNoTrans,
K1, M, N, CblasTrans,
1.f, X2data, dYdata, K1, M, N,
0.f, dX1data, ctx()); 1.f, X2data, dYdata,
0.f, dX1data, ctx());
} else { } else {
math::Gemm( math::Gemm(
CblasNoTrans, transB ? CblasNoTrans : CblasTrans, CblasNoTrans,
M, K1, N, transB ? CblasNoTrans : CblasTrans,
1.f, dYdata, X2data, M, K1, N,
0.f, dX1data, ctx()); 1.f, dYdata, X2data,
0.f, dX1data, ctx());
} }
} }
...@@ -166,16 +170,18 @@ void DotGradientOp<Context>::GemmRunWithType() { ...@@ -166,16 +170,18 @@ void DotGradientOp<Context>::GemmRunWithType() {
auto* dX2data = Output(1)->template mutable_data<T, Context>(); auto* dX2data = Output(1)->template mutable_data<T, Context>();
if (transB) { if (transB) {
math::Gemm( math::Gemm(
CblasTrans, transA ? CblasTrans : CblasNoTrans, CblasTrans,
N, K1, M, transA ? CblasTrans : CblasNoTrans,
1.f, dYdata, X1data, N, K1, M,
0.f, dX2data, ctx()); 1.f, dYdata, X1data,
0.f, dX2data, ctx());
} else { } else {
math::Gemm( math::Gemm(
transA ? CblasNoTrans : CblasTrans, CblasNoTrans, transA ? CblasNoTrans : CblasTrans,
K1, N, M, CblasNoTrans,
1.f, X1data, dYdata, K1, N, M,
0.f, dX2data, ctx()); 1.f, X1data, dYdata,
0.f, dX2data, ctx());
} }
} }
} }
...@@ -197,16 +203,17 @@ void DotGradientOp<Context>::GemvRunWithType() { ...@@ -197,16 +203,17 @@ void DotGradientOp<Context>::GemvRunWithType() {
auto* dX2data = Output(1)->template mutable_data<T, Context>(); auto* dX2data = Output(1)->template mutable_data<T, Context>();
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
M, N, 1, CblasNoTrans,
1.f, dYdata, X2data, M, N, 1,
0.f, dX1data, ctx()); 1.f, dYdata, X2data,
0.f, dX1data, ctx());
math::Gemv( math::Gemv(
transA ? CblasNoTrans : CblasTrans, transA ? CblasNoTrans : CblasTrans,
M1, N1, M1, N1,
1.f, X1data, dYdata, 1.f, X1data, dYdata,
0.f, dX2data, ctx()); 0.f, dX2data, ctx());
} }
template <class Context> template <class Context>
......
...@@ -28,19 +28,21 @@ void FullyConnectedOp<Context>::TransRunWithType() { ...@@ -28,19 +28,21 @@ void FullyConnectedOp<Context>::TransRunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Gemm( math::Gemm(
CblasNoTrans, CblasTrans, CblasNoTrans,
M, N, K, CblasTrans,
1.f, Xdata, Wdata, M, N, K,
0.f, Ydata, ctx()); 1.f, Xdata, Wdata,
0.f, Ydata, ctx());
if (InputSize() > 2) { if (InputSize() > 2) {
DECLARE_MULTIPLIER(multiplier, M); DECLARE_MULTIPLIER(multiplier, M);
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
M, N, 1, CblasNoTrans,
1.f, multiplier, Bdata, M, N, 1,
1.f, Ydata, ctx()); 1.f, multiplier, Bdata,
1.f, Ydata, ctx());
} }
} }
...@@ -61,19 +63,21 @@ void FullyConnectedOp<Context>::NoTransRunWithType() { ...@@ -61,19 +63,21 @@ void FullyConnectedOp<Context>::NoTransRunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
M, N, K, CblasNoTrans,
1.f, Xdata, Wdata, M, N, K,
0.f, Ydata, ctx()); 1.f, Xdata, Wdata,
0.f, Ydata, ctx());
if (InputSize() > 2) { if (InputSize() > 2) {
DECLARE_MULTIPLIER(multiplier, M); DECLARE_MULTIPLIER(multiplier, M);
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
M, N, 1, CblasNoTrans,
1.f, multiplier, Bdata, M, N, 1,
1.f, Ydata, ctx()); 1.f, multiplier, Bdata,
1.f, Ydata, ctx());
} }
} }
...@@ -127,16 +131,18 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -127,16 +131,18 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
if (transW) { if (transW) {
math::Gemm( math::Gemm(
CblasTrans, CblasNoTrans, CblasTrans,
N, K, M, CblasNoTrans,
1.f, dYdata, Xdata, N, K, M,
0.f, dWdata, ctx()); 1.f, dYdata, Xdata,
0.f, dWdata, ctx());
} else { } else {
math::Gemm( math::Gemm(
CblasTrans, CblasNoTrans, CblasTrans,
K, N, M, CblasNoTrans,
1.f, Xdata, dYdata, K, N, M,
0.f, dWdata, ctx()); 1.f, Xdata, dYdata,
0.f, dWdata, ctx());
} }
} }
...@@ -145,9 +151,10 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -145,9 +151,10 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
Output(2)->Reshape({ N }); Output(2)->Reshape({ N });
auto* dBdata = Output(2)->template mutable_data<T, Context>(); auto* dBdata = Output(2)->template mutable_data<T, Context>();
math::Gemv( math::Gemv(
CblasTrans, M, N, CblasTrans,
1.f, dYdata, multiplier, M, N,
0.f, dBdata, ctx()); 1.f, dYdata, multiplier,
0.f, dBdata, ctx());
} }
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
...@@ -155,16 +162,18 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -155,16 +162,18 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
if (transW) { if (transW) {
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
M, K, N, CblasNoTrans,
1.f, dYdata, Wdata, M, K, N,
0.f, dXdata, ctx()); 1.f, dYdata, Wdata,
0.f, dXdata, ctx());
} else { } else {
math::Gemm( math::Gemm(
CblasNoTrans, CblasTrans, CblasNoTrans,
M, K, N, CblasTrans,
1.f, dYdata, Wdata, M, K, N,
0.f, dXdata, ctx()); 1.f, dYdata, Wdata,
0.f, dXdata, ctx());
} }
} }
} }
......
...@@ -9,10 +9,11 @@ void GramMatrixOp<Context>::RunWithType() { ...@@ -9,10 +9,11 @@ void GramMatrixOp<Context>::RunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++) { for (int i = 0; i < outer_dim; i++) {
math::Gemm( math::Gemm(
CblasNoTrans, CblasTrans, CblasNoTrans,
dim, dim, inner_dim, CblasTrans,
1.f, Xdata, Xdata, dim, dim, inner_dim,
0.f, Ydata, ctx()); 1.f, Xdata, Xdata,
0.f, Ydata, ctx());
Xdata += x_offset; Xdata += x_offset;
Ydata += y_offset; Ydata += y_offset;
} }
...@@ -44,10 +45,11 @@ void GramMatrixGradientOp<Context>::RunWithType() { ...@@ -44,10 +45,11 @@ void GramMatrixGradientOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++) { for (int i = 0; i < outer_dim; i++) {
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
dim, inner_dim, dim, CblasNoTrans,
2.f, dYdata, Xdata, dim, inner_dim, dim,
0.f, dXdata, ctx()); 2.f, dYdata, Xdata,
0.f, dXdata, ctx());
dYdata += y_offset; dYdata += y_offset;
dXdata += x_offset; dXdata += x_offset;
} }
......
...@@ -33,6 +33,7 @@ void MultinomialOp<Context>::RunWithType() { ...@@ -33,6 +33,7 @@ void MultinomialOp<Context>::RunWithType() {
double running_total, r; double running_total, r;
int idx = 0, num_classes = Input(0).dim(axis); int idx = 0, num_classes = Input(0).dim(axis);
auto* rng = ctx()->rand_generator(); auto* rng = ctx()->rand_generator();
for (int i = 0; i < outer_dim; ++i) { for (int i = 0; i < outer_dim; ++i) {
...@@ -47,7 +48,9 @@ void MultinomialOp<Context>::RunWithType() { ...@@ -47,7 +48,9 @@ void MultinomialOp<Context>::RunWithType() {
r = dist(*rng); r = dist(*rng);
auto found_iter = std::upper_bound( auto found_iter = std::upper_bound(
Sdata, Sdata + num_classes, r); Sdata, Sdata + num_classes, r);
Ydata[idx++] = std::distance(Sdata, found_iter); Ydata[idx++] = std::min(
(int)std::distance(Sdata,
found_iter), num_classes - 1);
} }
Xdata += num_classes; Xdata += num_classes;
} }
......
...@@ -10,9 +10,9 @@ void GradientGenerateOp<Context>::RunWithType() { ...@@ -10,9 +10,9 @@ void GradientGenerateOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "NULL") continue; if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i)); Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto v = cast::to<T>(defaults[i]);
math::Set(Output(0)->count(), auto* Ydata = Output(0)->template mutable_data<T, Context>();
cast::to<T>(defaults[i]), dXdata, ctx()); math::Set(Output(0)->count(), v, Ydata, ctx());
} }
} }
...@@ -40,30 +40,29 @@ OPERATOR_SCHEMA(GradientGenerate); ...@@ -40,30 +40,29 @@ OPERATOR_SCHEMA(GradientGenerate);
template <class Context> template <typename T> template <class Context> template <typename T>
void GradientGatherOp<Context>::RunWithType() { void GradientGatherOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>();
int64_t count = Output(0)->count(); int64_t count = Output(0)->count();
auto* Y = Output(0)->template mutable_data<T, Context>();
if (indices.size() == 1) { if (indices.size() == 1) {
auto* dYdata = Input(indices[0]).template data<T, Context>(); auto* X = Input(indices[0]).template data<T, Context>();
ctx()->template Copy<T, Context, Context>(count, dXdata, dYdata); ctx()->template Copy<T, Context, Context>(count, Y, X);
} else if(indices.size() == 2) { } else if(indices.size() == 2) {
CHECK_EQ(count, Input(indices[1]).count()); CHECK_EQ(count, Input(indices[1]).count());
auto* dY1data = Input(indices[0]).template data<T, Context>(); auto* X1 = Input(indices[0]).template data<T, Context>();
auto* dY2data = Input(indices[1]).template data<T, Context>(); auto* X2 = Input(indices[1]).template data<T, Context>();
math::Add(count, dY1data, dY2data, dXdata, ctx()); math::Add(count, X1, X2, Y, ctx());
} else { } else {
size_t dy_idx = 1; size_t index = 1;
auto* dYdata = Input(indices[0]).template data<T, Context>(); auto* X = Input(indices[0]).template data<T, Context>();
ctx()->template Copy<T, Context, Context>(count, dXdata, dYdata); ctx()->template Copy<T, Context, Context>(count, Y, X);
while (dy_idx < indices.size()) { while (index < indices.size()) {
if (indices.size() - dy_idx >= 2) { if (indices.size() - index >= 2) {
auto* dY1data = Input(indices[dy_idx]).template data<T, Context>(); auto* X1 = Input(indices[index]).template data<T, Context>();
auto* dY2data = Input(indices[dy_idx + 1]).template data<T, Context>(); auto* X2 = Input(indices[index + 1]).template data<T, Context>();
kernel::GradientTwoSum(count, dY1data, dY2data, dXdata, ctx()); kernel::GradientTwoSum(count, X1, X2, Y, ctx());
dy_idx += 2; index += 2;
} else { } else {
dYdata = Input(indices[dy_idx]).template data<T, Context>(); X = Input(indices[index]).template data<T, Context>();
math::Add(count, dXdata, dYdata, dXdata, ctx()); math::Add(count, Y, X, Y, ctx()); break;
dy_idx += 1;
} }
} }
} }
...@@ -92,7 +91,39 @@ DEPLOY_CPU(GradientGather); ...@@ -92,7 +91,39 @@ DEPLOY_CPU(GradientGather);
DEPLOY_CUDA(GradientGather); DEPLOY_CUDA(GradientGather);
#endif #endif
OPERATOR_SCHEMA(GradientGather).NumOutputs(1); OPERATOR_SCHEMA(GradientGather).NumOutputs(1);
NO_GRADIENT(GradientGather);
template <class Context> template <typename T>
void GradientAddOp<Context>::RunWithType() {
auto* X = Input(1).template data<T, Context>();
auto* Y = Output(0)->template mutable_data<T, Context>();
math::Add(Output(0)->count(), Y, X, Y, ctx());
}
template <class Context>
void GradientAddOp<Context>::RunOnDevice() {
CHECK_EQ(Input(0).name(), Output(0)->name())
<< "\nRequires X(0) == Y(0).";
if (XIsType(Input(0), int8_t)) RunWithType<int8_t>();
else if (XIsType(Input(0), uint8_t)) RunWithType<uint8_t>();
else if (XIsType(Input(0), int)) RunWithType<int>();
else if (XIsType(Input(0), int64_t)) RunWithType<int64_t>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), double)) RunWithType<double>();
else LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64",
"float16", "float32", "float64",
});
}
DEPLOY_CPU(GradientAdd);
#ifdef WITH_CUDA
DEPLOY_CUDA(GradientAdd);
#endif
OPERATOR_SCHEMA(GradientAdd)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <class Context>
void StopGradientOp<Context>::RunOnDevice() { void StopGradientOp<Context>::RunOnDevice() {
......
...@@ -37,19 +37,20 @@ void L2NormOp<Context>::RunWithType() { ...@@ -37,19 +37,20 @@ void L2NormOp<Context>::RunWithType() {
math::Square(buffer.count(), Xdata, Bdata, ctx()); math::Square(buffer.count(), Xdata, Bdata, ctx());
// Compute T1 = \sum_{i} x_{i,j}^{2} // Compute T1 = \sum_{i} x_{i,j}^{2}
math::Gemv( math::Gemv(
CblasTrans, dim, inner_dim, CblasTrans,
mode == "MEAN" ? 1.f / dim : 1.f, Bdata, Dmult, dim, inner_dim,
1.f, Ndata, ctx()); mode == "MEAN" ? 1.f / dim : 1.f, Bdata, Dmult,
1.f, Ndata, ctx());
// Compute T2 = \sqrt{T1} // Compute T2 = \sqrt{T1}
math::Sqrt(inner_dim, Ndata, Ndata, ctx()); math::Sqrt(inner_dim, Ndata, Ndata, ctx());
// Compute T3 = x / [(T2)]_{dim} // Compute T3 = x / [(T2)]_{dim}
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
dim, inner_dim, 1, CblasNoTrans,
1.f, Dmult, Ndata, dim, inner_dim, 1,
0.f, Bdata, ctx()); 1.f, Dmult, Ndata,
math::Div(buffer.count(), 0.f, Bdata, ctx());
Xdata, Bdata, Ydata, ctx()); math::Div(buffer.count(), Xdata, Bdata, Ydata, ctx());
Ndata += inner_dim; Ndata += inner_dim;
Xdata += buffer.count(); Xdata += buffer.count();
Ydata += buffer.count(); Ydata += buffer.count();
...@@ -101,31 +102,35 @@ void L2NormGradientOp<Context>::RunWithType() { ...@@ -101,31 +102,35 @@ void L2NormGradientOp<Context>::RunWithType() {
// Compute \sum_{i} x_{i, j}dy_{i, j} // Compute \sum_{i} x_{i, j}dy_{i, j}
math::Mul(buffer.count(), Xdata, dYdata, Bdata, ctx()); math::Mul(buffer.count(), Xdata, dYdata, Bdata, ctx());
math::Gemv( math::Gemv(
CblasTrans, dim, inner_dim, CblasTrans,
mode == "MEAN" ? 1.f / dim : 1.f, Bdata, Dmult, dim, inner_dim,
0.f, BInnerdata, ctx()); mode == "MEAN" ? 1.f / dim : 1.f, Bdata, Dmult,
0.f, BInnerdata, ctx());
// Compute T1 = x[(\sum_{i} x_{i, j}dy_{i, j})]_{dim} // Compute T1 = x[(\sum_{i} x_{i, j}dy_{i, j})]_{dim}
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
dim, inner_dim, 1, CblasNoTrans,
1.f, Dmult, BInnerdata, dim, inner_dim, 1,
0.f, Bdata, ctx()); 1.f, Dmult, BInnerdata,
0.f, Bdata, ctx());
math::Mul(buffer.count(), Xdata, Bdata, dXdata, ctx()); math::Mul(buffer.count(), Xdata, Bdata, dXdata, ctx());
// Compute T2 = T1 / Normalizer^{2} // Compute T2 = T1 / Normalizer^{2}
math::Square(inner_dim, Ndata, BInnerdata, ctx()); math::Square(inner_dim, Ndata, BInnerdata, ctx());
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
dim, inner_dim, 1, CblasNoTrans,
1.f, Dmult, BInnerdata, dim, inner_dim, 1,
0.f, Bdata, ctx()); 1.f, Dmult, BInnerdata,
0.f, Bdata, ctx());
math::Div(buffer.count(), dXdata, Bdata, dXdata, ctx()); math::Div(buffer.count(), dXdata, Bdata, dXdata, ctx());
// Compute T3 = (dy - T2) / Normalizer // Compute T3 = (dy - T2) / Normalizer
math::Sub(buffer.count(), dYdata, dXdata, dXdata, ctx()); math::Sub(buffer.count(), dYdata, dXdata, dXdata, ctx());
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
dim, inner_dim, 1, CblasNoTrans,
1.f, Dmult, Ndata, dim, inner_dim, 1,
0.f, Bdata, ctx()); 1.f, Dmult, Ndata,
0.f, Bdata, ctx());
math::Div(buffer.count(), dXdata, Bdata, dXdata, ctx()); math::Div(buffer.count(), dXdata, Bdata, dXdata, ctx());
Ndata += inner_dim; Ndata += inner_dim;
Xdata += buffer.count(); Xdata += buffer.count();
......
...@@ -48,19 +48,26 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() { ...@@ -48,19 +48,26 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
// Setup RNN // Setup RNN
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK(cudnnSetRNNDescriptor( CUDNN_CHECK(cudnnSetRNNDescriptor(
ctx()->cudnn_handle(), rnn_desc, ctx()->cudnn_handle(),
hidden_size, num_layers, rnn_desc,
dropout_desc, hidden_size,
rnn_input_mode, rnn_direction, rnn_mode, num_layers,
CUDNN_RNN_ALGO_STANDARD, dropout_desc,
CUDNNType<T>::type)); rnn_input_mode,
rnn_direction,
rnn_mode,
CUDNN_RNN_ALGO_STANDARD,
CUDNNType<T>::type));
#else #else
CUDNN_CHECK(cudnnSetRNNDescriptor( CUDNN_CHECK(cudnnSetRNNDescriptor(
rnn_desc, rnn_desc,
hidden_size, num_layers, hidden_size,
dropout_desc, num_layers,
rnn_input_mode, rnn_direction, rnn_mode, dropout_desc,
CUDNNType<T>::type)); rnn_input_mode,
rnn_direction,
rnn_mode,
CUDNNType<T>::type));
#endif #endif
// Setup Xs & Ys & Y // Setup Xs & Ys & Y
...@@ -68,8 +75,6 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() { ...@@ -68,8 +75,6 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
xs_desc->Set<T>({ batch_size, input_dim, 1 }, { input_dim, 1, 1 }); xs_desc->Set<T>({ batch_size, input_dim, 1 }, { input_dim, 1, 1 });
ys_desc.reset(new cudnnTensorDescriptors(seq_length)); ys_desc.reset(new cudnnTensorDescriptors(seq_length));
ys_desc->Set<T>({ batch_size, output_dim, 1 }, { output_dim, 1, 1 }); ys_desc->Set<T>({ batch_size, output_dim, 1 }, { output_dim, 1, 1 });
CUDNN_CHECK(cudnnGetRNNWorkspaceSize(ctx()->cudnn_handle(),
rnn_desc, seq_length, xs_desc->descs(), &workspace_size));
output_dims = { seq_length, batch_size, output_dim }; output_dims = { seq_length, batch_size, output_dim };
// Setup Hx & Cx & Hy & Cy // Setup Hx & Cx & Hy & Cy
...@@ -82,8 +87,10 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() { ...@@ -82,8 +87,10 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
// Setup packed weights // Setup packed weights
size_t weights_size; int64_t weights_count; size_t weights_size; int64_t weights_count;
CUDNN_CHECK(cudnnGetRNNParamsSize( CUDNN_CHECK(cudnnGetRNNParamsSize(
ctx()->cudnn_handle(), rnn_desc, xs_desc->descs()[0], ctx()->cudnn_handle(),
&weights_size, CUDNNType<T>::type)); rnn_desc, xs_desc->descs()[0],
&weights_size,
CUDNNType<T>::type));
weights_count = (int64_t)weights_size / sizeof(T); weights_count = (int64_t)weights_size / sizeof(T);
CHECK_EQ(weights_count, Input(1).count()) CHECK_EQ(weights_count, Input(1).count())
<< "\nModel request " << "Tensor(" << Input(1).name() << ")'s " << "\nModel request " << "Tensor(" << Input(1).name() << ")'s "
...@@ -96,8 +103,11 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() { ...@@ -96,8 +103,11 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
// Determine the RNN workspace // Determine the RNN workspace
CUDNN_CHECK(cudnnGetRNNWorkspaceSize( CUDNN_CHECK(cudnnGetRNNWorkspaceSize(
ctx()->cudnn_handle(), rnn_desc, seq_length, ctx()->cudnn_handle(),
xs_desc->descs(), &workspace_size)); rnn_desc,
seq_length,
xs_desc->descs(),
&workspace_size));
} }
template <class Context> template <typename T> template <class Context> template <typename T>
...@@ -125,8 +135,9 @@ void CuDNNRecurrentOp<Context>::RunWithType() { ...@@ -125,8 +135,9 @@ void CuDNNRecurrentOp<Context>::RunWithType() {
auto handle = ctx()->cudnn_handle(); auto handle = ctx()->cudnn_handle();
if (phase() == "TRAIN") { if (phase() == "TRAIN") {
CUDNN_CHECK(cudnnGetRNNTrainingReserveSize(handle, CUDNN_CHECK(cudnnGetRNNTrainingReserveSize(
rnn_desc, seq_length, xs_desc->descs(), &reserve_size)); handle, rnn_desc, seq_length,
xs_desc->descs(), &reserve_size));
auto* reserveT = ws()->CreateTensor(mount_name( auto* reserveT = ws()->CreateTensor(mount_name(
"rnn/reserve"))->Reshape({ (int64_t)reserve_size }); "rnn/reserve"))->Reshape({ (int64_t)reserve_size });
auto* RSdata = reserveT->template mutable_data<uint8_t, Context>(); auto* RSdata = reserveT->template mutable_data<uint8_t, Context>();
...@@ -182,8 +193,9 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() { ...@@ -182,8 +193,9 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
auto* WSdata = ws()->template caches<Context>({ workspace_size })[0]; auto* WSdata = ws()->template caches<Context>({ workspace_size })[0];
// Check the ReserveSpace // Check the ReserveSpace
CUDNN_CHECK(cudnnGetRNNTrainingReserveSize(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnGetRNNTrainingReserveSize(
rnn_desc, seq_length, xs_desc->descs(), &reserve_size)); ctx()->cudnn_handle(), rnn_desc, seq_length,
xs_desc->descs(), &reserve_size));
auto* reserveT = ws()->GetTensor(mount_name("rnn/reserve")); auto* reserveT = ws()->GetTensor(mount_name("rnn/reserve"));
CHECK_EQ(reserve_size, reserveT->nbytes()); CHECK_EQ(reserve_size, reserveT->nbytes());
#if CUDNN_VERSION_MIN(6,0,0) #if CUDNN_VERSION_MIN(6,0,0)
...@@ -215,6 +227,12 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() { ...@@ -215,6 +227,12 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
} }
if (Output(1)->name() != "NULL") { if (Output(1)->name() != "NULL") {
math::Set(
Output(1)->count(),
cast::to<T>(0.f),
YsData(1),
ctx()
); // CuDNN accumulates the gradient of weights
CUDNN_CHECK(cudnnRNNBackwardWeights(handle, rnn_desc, CUDNN_CHECK(cudnnRNNBackwardWeights(handle, rnn_desc,
seq_length, seq_length,
xs_desc->descs(), XsData(0), // X xs_desc->descs(), XsData(0), // X
......
...@@ -6,10 +6,10 @@ namespace dragon { ...@@ -6,10 +6,10 @@ namespace dragon {
template <class Context> template <class Context>
void AdamUpdateOp<Context>::ComputeUpdates(Tensor* dX) { void AdamUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
Tensor* m = ws()->CreateTensor( auto* M = ws()->CreateTensor(
"/mnt/" + Slot() + "/adam/m") "/mnt/" + Slot() + "/adam/m")
->ReshapeLike(*dX); ->ReshapeLike(*dX);
Tensor* v = ws()->CreateTensor( auto* V = ws()->CreateTensor(
"/mnt/" + Slot() + "/adam/v") "/mnt/" + Slot() + "/adam/v")
->ReshapeLike(*dX); ->ReshapeLike(*dX);
...@@ -18,8 +18,8 @@ void AdamUpdateOp<Context>::ComputeUpdates(Tensor* dX) { ...@@ -18,8 +18,8 @@ void AdamUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
float coeff = sqrt(1. - pow(beta2, t)) / (1. - pow(beta1, t)); float coeff = sqrt(1. - pow(beta2, t)) / (1. - pow(beta1, t));
lr = Param("base_lr") * coeff * this->lr_mult; lr = Param("base_lr") * coeff * this->lr_mult;
auto* dXdata = dX->template mutable_data<float, Context>(); auto* dXdata = dX->template mutable_data<float, Context>();
auto* Mdata = m->mutable_data<float, Context>(); auto* Mdata = M->template mutable_data<float, Context>();
auto* Vdata = v->mutable_data<float, Context>(); auto* Vdata = V->template mutable_data<float, Context>();
kernel::AdamUpdate(dX->count(), lr, beta1, kernel::AdamUpdate(dX->count(), lr, beta1,
beta2, eps, dXdata, Mdata, Vdata, ctx()); beta2, eps, dXdata, Mdata, Vdata, ctx());
......
...@@ -7,13 +7,13 @@ namespace dragon { ...@@ -7,13 +7,13 @@ namespace dragon {
template <class Context> template <class Context>
void NesterovUpdateOp<Context>::ComputeUpdates(Tensor* dX) { void NesterovUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
Tensor* h = ws()->CreateTensor( auto* H = ws()->CreateTensor(
"/mnt/" + Slot() + "/nesterov/h") "/mnt/" + Slot() + "/nesterov/h")
->ReshapeLike(*dX); ->ReshapeLike(*dX);
lr = Param("base_lr") * this->lr_mult, momentum = Param("momentum"); lr = Param("base_lr") * this->lr_mult, momentum = Param("momentum");
auto* dXdata = dX->template mutable_data<float, Context>(); auto* dXdata = dX->template mutable_data<float, Context>();
auto* Hdata = h->template mutable_data<float, Context>(); auto* Hdata = H->template mutable_data<float, Context>();
kernel::NesterovUpdate(dX->count(), lr, kernel::NesterovUpdate(dX->count(), lr,
momentum, dXdata, Hdata, ctx()); momentum, dXdata, Hdata, ctx());
......
...@@ -6,14 +6,14 @@ namespace dragon { ...@@ -6,14 +6,14 @@ namespace dragon {
template <class Context> template <class Context>
void RMSPropUpdateOp<Context>::ComputeUpdates(Tensor* dX) { void RMSPropUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
Tensor* h = ws()->CreateTensor( auto* H = ws()->CreateTensor(
"/mnt/" + Slot() + "/rmsprop/h") "/mnt/" + Slot() + "/rmsprop/h")
->ReshapeLike(*dX); ->ReshapeLike(*dX);
lr = Param("base_lr") * this->lr_mult; lr = Param("base_lr") * this->lr_mult;
decay = Param("decay"), eps = Param("eps"); decay = Param("decay"), eps = Param("eps");
auto* dXdata = dX->template mutable_data<float, Context>(); auto* dXdata = dX->template mutable_data<float, Context>();
auto* Hdata = h->template mutable_data<float, Context>(); auto* Hdata = H->template mutable_data<float, Context>();
kernel::RMSPropUpdate(dX->count(), lr, kernel::RMSPropUpdate(dX->count(), lr,
decay, eps, dXdata, Hdata, ctx()); decay, eps, dXdata, Hdata, ctx());
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <class Context>
void SGDUpdateOp<Context>::ComputeUpdates(Tensor* dX) { void SGDUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
Tensor* h = ws()->CreateTensor( auto* H = ws()->CreateTensor(
"/mnt/" + Slot() + "/sgd/h") "/mnt/" + Slot() + "/sgd/h")
->ReshapeLike(*dX); ->ReshapeLike(*dX);
...@@ -15,7 +15,7 @@ void SGDUpdateOp<Context>::ComputeUpdates(Tensor* dX) { ...@@ -15,7 +15,7 @@ void SGDUpdateOp<Context>::ComputeUpdates(Tensor* dX) {
// Momentum Correction, See arXiv:1706.02677 // Momentum Correction, See arXiv:1706.02677
if (old_lr > 0) { correction = lr / old_lr; } old_lr = lr; if (old_lr > 0) { correction = lr / old_lr; } old_lr = lr;
auto* dXdata = dX->template mutable_data<float, Context>(); auto* dXdata = dX->template mutable_data<float, Context>();
auto* Hdata = h->template mutable_data<float, Context>(); auto* Hdata = H->template mutable_data<float, Context>();
kernel::SGDUpdate(dX->count(), lr, kernel::SGDUpdate(dX->count(), lr,
momentum * correction, dXdata, Hdata, ctx()); momentum * correction, dXdata, Hdata, ctx());
......
...@@ -8,7 +8,7 @@ namespace dragon { ...@@ -8,7 +8,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void BiasAddOp<Context>::RunWithType() { void BiasAddOp<Context>::RunWithType() {
TENSOR_FILL(Input(1), vector<int64_t>(1, dim)); TENSOR_FILL(Input(1), vector<int64_t>({ dim }));
DECLARE_MULTIPLIER(multiplier, inner_dim); DECLARE_MULTIPLIER(multiplier, inner_dim);
auto* Bdata = Input(1).template data<T, Context>(); auto* Bdata = Input(1).template data<T, Context>();
...@@ -17,7 +17,7 @@ void BiasAddOp<Context>::RunWithType() { ...@@ -17,7 +17,7 @@ void BiasAddOp<Context>::RunWithType() {
// Copy X to Y firstly if necessary // Copy X to Y firstly if necessary
Output(0)->template CopyFrom<Context>(Input(0), ctx()); Output(0)->template CopyFrom<Context>(Input(0), ctx());
kernel::BiasAdd(Output(0)->count(), outer_dim, dim, inner_dim, kernel::BiasAdd(outer_dim, dim, inner_dim,
data_format, Bdata, multiplier, Ydata, ctx()); data_format, Bdata, multiplier, Ydata, ctx());
} }
......
...@@ -30,9 +30,9 @@ void BilinearResizeOp<Context>::RunOnDevice() { ...@@ -30,9 +30,9 @@ void BilinearResizeOp<Context>::RunOnDevice() {
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
dims[spatial_axis + i] = dsize(i); dims[spatial_axis + i] = dsize(i);
} else if (!shape_like_desc.empty()) { } else if (!shape_like_desc.empty()) {
Tensor* shape_like_tensor = ws()->GetTensor(shape_like_desc); auto* sl = ws()->GetTensor(shape_like_desc);
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
dims[spatial_axis + i] = shape_like_tensor->dim(spatial_axis + i); dims[spatial_axis + i] = sl->dim(spatial_axis + i);
} else { } else {
CHECK(fy != -1.f && fx != -1.f) CHECK(fy != -1.f && fx != -1.f)
<< "\nThe fx and fy should be set."; << "\nThe fx and fy should be set.";
......
...@@ -95,19 +95,23 @@ void ConvOpBase<Context>::Wx( ...@@ -95,19 +95,23 @@ void ConvOpBase<Context>::Wx(
for (int g = 0; g < group; g++) { for (int g = 0; g < group; g++) {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
conv_out_channels / group, CblasNoTrans,
conv_out_spatial_dim, conv_out_channels / group,
kernel_dim, conv_out_spatial_dim,
kernel_dim,
1.f, weights + weight_offset * g, 1.f, weights + weight_offset * g,
col_buffer + col_offset * g, col_buffer + col_offset * g,
0.f, y + output_offset * g, ctx()); 0.f, y + output_offset * g, ctx());
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemm( math::Gemm(
CblasNoTrans, CblasTrans, CblasNoTrans,
conv_out_spatial_dim, conv_out_channels, CblasTrans,
kernel_dim, conv_out_spatial_dim,
1.f, col_buffer, weights, 0.f, y, ctx()); conv_out_channels,
kernel_dim,
1.f, col_buffer, weights,
0.f, y, ctx());
} }
} }
} }
...@@ -115,7 +119,7 @@ void ConvOpBase<Context>::Wx( ...@@ -115,7 +119,7 @@ void ConvOpBase<Context>::Wx(
template <class Context> template <typename T> template <class Context> template <typename T>
void ConvOpBase<Context>::Pb(const T* bias, T* y) { void ConvOpBase<Context>::Pb(const T* bias, T* y) {
DECLARE_MULTIPLIER(multiplier, out_spatial_dim); DECLARE_MULTIPLIER(multiplier, out_spatial_dim);
kernel::BiasAdd(Output(0)->count(), kernel::BiasAdd(
Input(0).dim(0), num_output, out_spatial_dim, Input(0).dim(0), num_output, out_spatial_dim,
data_format, bias, multiplier, y, ctx()); data_format, bias, multiplier, y, ctx());
} }
...@@ -127,18 +131,23 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) { ...@@ -127,18 +131,23 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) {
for (int g = 0; g < group; g++) { for (int g = 0; g < group; g++) {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemm( math::Gemm(
CblasTrans, CblasNoTrans, CblasTrans,
kernel_dim, conv_out_spatial_dim, CblasNoTrans,
conv_out_channels / group, kernel_dim,
conv_out_spatial_dim,
conv_out_channels / group,
1.f, weights + weight_offset * g, 1.f, weights + weight_offset * g,
dy + output_offset * g, dy + output_offset * g,
0.f, col_buffer + col_offset * g, ctx()); 0.f, col_buffer + col_offset * g, ctx());
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemm( math::Gemm(
CblasNoTrans, CblasNoTrans, CblasNoTrans,
conv_out_spatial_dim, kernel_dim, CblasNoTrans,
conv_out_channels, conv_out_spatial_dim,
1.f, dy, weights, 0.f, col_buffer, ctx()); kernel_dim,
conv_out_channels,
1.f, dy, weights,
0.f, col_buffer, ctx());
} }
} }
if (!is_1x1) Col2Im(col_buffer, dx); if (!is_1x1) Col2Im(col_buffer, dx);
...@@ -158,19 +167,23 @@ void ConvOpBase<Context>::Dw(const T* dy, const T* x, T *dw) { ...@@ -158,19 +167,23 @@ void ConvOpBase<Context>::Dw(const T* dy, const T* x, T *dw) {
for (int g = 0; g < group; g++) { for (int g = 0; g < group; g++) {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemm( math::Gemm(
CblasNoTrans, CblasTrans, CblasNoTrans,
conv_out_channels / group, CblasTrans,
kernel_dim, conv_out_channels / group,
conv_out_spatial_dim, kernel_dim,
conv_out_spatial_dim,
1.f, dy + output_offset * g, 1.f, dy + output_offset * g,
col_buffer + col_offset * g, col_buffer + col_offset * g,
0.f, dw + weight_offset * g, ctx()); 0.f, dw + weight_offset * g, ctx());
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemm( math::Gemm(
CblasTrans, CblasNoTrans, CblasTrans,
conv_out_channels, kernel_dim, CblasNoTrans,
conv_out_spatial_dim, conv_out_channels,
1.f, dy, col_buffer, 0.f, dw, ctx()); kernel_dim,
conv_out_spatial_dim,
1.f, dy, col_buffer,
0.f, dw, ctx());
} }
} }
} }
...@@ -180,14 +193,18 @@ void ConvOpBase<Context>::Db(const T* dy, T* db) { ...@@ -180,14 +193,18 @@ void ConvOpBase<Context>::Db(const T* dy, T* db) {
DECLARE_MULTIPLIER(multiplier, out_spatial_dim); DECLARE_MULTIPLIER(multiplier, out_spatial_dim);
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemv( math::Gemv(
CblasNoTrans, num_output, out_spatial_dim, CblasNoTrans,
1.f, dy, multiplier, num_output,
0.f, db, ctx()); out_spatial_dim,
1.f, dy, multiplier,
0.f, db, ctx());
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemv( math::Gemv(
CblasTrans, out_spatial_dim, num_output, CblasTrans,
1.f, dy, multiplier, out_spatial_dim,
0.f, db, ctx()); num_output,
1.f, dy, multiplier,
0.f, db, ctx());
} }
} }
......
...@@ -28,9 +28,10 @@ void CuDNNBiasAddOp<Context>::RunWithType() { ...@@ -28,9 +28,10 @@ void CuDNNBiasAddOp<Context>::RunWithType() {
// Copy X to Y firstly if necessary // Copy X to Y firstly if necessary
Output(0)->template CopyFrom<Context>(Input(0), ctx()); Output(0)->template CopyFrom<Context>(Input(0), ctx());
CUDNN_CHECK(cudnnAddTensor(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnAddTensor(
ctx()->cudnn_handle(),
CUDNNType<T>::one, bias_desc, Bdata, CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata)); CUDNNType<T>::one, output_desc, Ydata));
} }
template <class Context> template <class Context>
...@@ -70,9 +71,10 @@ void CuDNNBiasAddGradientOp<Context>::RunWithType() { ...@@ -70,9 +71,10 @@ void CuDNNBiasAddGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
T* dBdata = Output(1)->template mutable_data<T, Context>(); T* dBdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnConvolutionBackwardBias(
ctx()->cudnn_handle(),
CUDNNType<T>::one, input_desc, dYdata, CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::zero, bias_desc, dBdata)); CUDNNType<T>::zero, bias_desc, dBdata));
if (Output(0)->name() != "NULL" && if (Output(0)->name() != "NULL" &&
Output(0)->name() != Input(-1).name()) { Output(0)->name() != Input(-1).name()) {
......
...@@ -13,32 +13,38 @@ template <class Context> ...@@ -13,32 +13,38 @@ template <class Context>
void CuDNNConv2dOp<Context>::SetConvDescFromInputs() { void CuDNNConv2dOp<Context>::SetConvDescFromInputs() {
if (XIsType(Input(0), float)) { if (XIsType(Input(0), float)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
CUDNN_DATA_FLOAT)); CUDNN_DATA_FLOAT));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} }
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
...@@ -58,12 +64,15 @@ void CuDNNConv2dOp<Context>::ResetDesc() { ...@@ -58,12 +64,15 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
// Determine the input & output shape // Determine the input & output shape
input_dims = Input(0).dims(); input_dims = Input(0).dims();
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&input_desc, data_format, Input(0).dims(), cudnn_group); &input_desc, data_format,
Input(0).dims(), cudnn_group);
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&output_desc, data_format, Output(0)->dims(), cudnn_group); &output_desc, data_format,
Output(0)->dims(), cudnn_group);
if (HasBias()) { if (HasBias()) {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(
&output2b_desc, data_format, Output(0)->dims()); &output2b_desc, data_format,
Output(0)->dims());
} }
// Determine the misc // Determine the misc
if (data_format == "NCHW") { if (data_format == "NCHW") {
...@@ -104,15 +113,18 @@ void CuDNNConv2dOp<Context>::ResetDesc() { ...@@ -104,15 +113,18 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
// Now, Select the appropriate algorithm // Now, Select the appropriate algorithm
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm( CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
filter_desc, conv_desc, output_desc, input_desc, filter_desc,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, conv_desc, output_desc,
WORKSPACE_LIMIT_BYTES, &fwd_algo)); CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&fwd_algo));
CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
filter_desc, conv_desc, output_desc, input_desc, filter_desc,
fwd_algo, &fwd_data_size)); conv_desc, output_desc,
fwd_algo, &fwd_data_size));
} }
} }
...@@ -169,32 +181,38 @@ template <class Context> ...@@ -169,32 +181,38 @@ template <class Context>
void CuDNNConv2dGradientOp<Context>::SetConvDescFromInputs() { void CuDNNConv2dGradientOp<Context>::SetConvDescFromInputs() {
if (XIsType(Input(0), float)) { if (XIsType(Input(0), float)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
CUDNN_DATA_FLOAT)); CUDNN_DATA_FLOAT));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} }
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
...@@ -214,12 +232,15 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() { ...@@ -214,12 +232,15 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
// Determine the input & output shape // Determine the input & output shape
input_dims = Input(0).dims(); input_dims = Input(0).dims();
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&input_desc, data_format, Input(-1).dims(), cudnn_group); &input_desc, data_format,
Input(-1).dims(), cudnn_group);
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&output_desc, data_format, Input(0).dims(), cudnn_group); &output_desc, data_format,
Input(0).dims(), cudnn_group);
if (HasBias()) { if (HasBias()) {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(
&input2b_desc, data_format, Input(-1).dims()); &input2b_desc, data_format,
Input(-1).dims());
} }
// Determine the misc // Determine the misc
if (data_format == "NCHW") { if (data_format == "NCHW") {
...@@ -260,26 +281,32 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() { ...@@ -260,26 +281,32 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
// Now, Select the appropriate algorithm // Now, Select the appropriate algorithm
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm( CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(
ctx()->cudnn_handle(), output_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, filter_desc, output_desc, input_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, conv_desc, filter_desc,
WORKSPACE_LIMIT_BYTES, &bwd_filter_algo)); CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&bwd_filter_algo));
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(
ctx()->cudnn_handle(), output_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, filter_desc, output_desc, input_desc,
bwd_filter_algo, &bwd_filter_size)); conv_desc, filter_desc,
bwd_filter_algo, &bwd_filter_size));
CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm( CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(
ctx()->cudnn_handle(), filter_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, output_desc, filter_desc, input_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, conv_desc, output_desc,
WORKSPACE_LIMIT_BYTES, &bwd_data_algo)); CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&bwd_data_algo));
CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(
ctx()->cudnn_handle(), filter_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, output_desc, filter_desc, input_desc,
bwd_data_algo, &bwd_data_size)); conv_desc, output_desc,
bwd_data_algo, &bwd_data_size));
} }
} }
...@@ -296,28 +323,31 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() { ...@@ -296,28 +323,31 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
if (Output(2)->name() != "NULL") { if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>(); T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(
cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata, CUDNNType<T>::one, input2b_desc, dYdata,
CUDNNType<T>::zero, bias_desc, dBdata)); CUDNNType<T>::zero, bias_desc, dBdata));
} }
for (int g = 0; g < cudnn_group; g++) { for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "NULL") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardFilter(
cudnn_handle,
CUDNNType<T>::one, output_desc, Xdata + x_offset * g, CUDNNType<T>::one, output_desc, Xdata + x_offset * g,
input_desc, dYdata + y_offset * g, input_desc, dYdata + y_offset * g,
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size, conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g)); CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
} }
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardData(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardData(
cudnn_handle,
CUDNNType<T>::one, filter_desc, Wdata + weight_offset * g, CUDNNType<T>::one, filter_desc, Wdata + weight_offset * g,
input_desc, dYdata + y_offset * g, input_desc, dYdata + y_offset * g,
conv_desc, bwd_data_algo, WSdata, bwd_data_size, conv_desc, bwd_data_algo, WSdata, bwd_data_size,
CUDNNType<T>::zero, output_desc, dXdata + x_offset * g)); CUDNNType<T>::zero, output_desc, dXdata + x_offset * g));
} }
} }
......
...@@ -13,32 +13,37 @@ template <class Context> ...@@ -13,32 +13,37 @@ template <class Context>
void CuDNNConvTranspose2dOp<Context>::SetConvDescFromInputs() { void CuDNNConvTranspose2dOp<Context>::SetConvDescFromInputs() {
if (XIsType(Input(0), float)) { if (XIsType(Input(0), float)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
CUDNN_DATA_FLOAT)); CUDNN_DATA_FLOAT));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1], 1, 1,
CUDNN_CROSS_CORRELATION)); CUDNN_CROSS_CORRELATION));
#endif #endif
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} }
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
...@@ -58,12 +63,15 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() { ...@@ -58,12 +63,15 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() {
// Determine the input & output shape // Determine the input & output shape
output_dims = Output(0)->dims(); output_dims = Output(0)->dims();
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&input_desc, data_format, Input(0).dims(), cudnn_group); &input_desc, data_format,
Input(0).dims(), cudnn_group);
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&output_desc, data_format, Output(0)->dims(), cudnn_group); &output_desc, data_format,
Output(0)->dims(), cudnn_group);
if (HasBias()) { if (HasBias()) {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(
&output2b_desc, data_format, Output(0)->dims()); &output2b_desc, data_format,
Output(0)->dims());
} }
// Determine the misc // Determine the misc
if (data_format == "NCHW") { if (data_format == "NCHW") {
...@@ -102,15 +110,18 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() { ...@@ -102,15 +110,18 @@ void CuDNNConvTranspose2dOp<Context>::ResetDesc() {
// Now, Select the appropriate algorithm // Now, Select the appropriate algorithm
CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm( CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(
ctx()->cudnn_handle(), filter_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, output_desc, filter_desc, input_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, conv_desc, output_desc,
WORKSPACE_LIMIT_BYTES, &fwd_algo)); CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&fwd_algo));
CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(
ctx()->cudnn_handle(), filter_desc, ctx()->cudnn_handle(),
input_desc, conv_desc, output_desc, filter_desc, input_desc,
fwd_algo, &fwd_data_size)); conv_desc, output_desc,
fwd_algo, &fwd_data_size));
} }
} }
...@@ -130,18 +141,20 @@ void CuDNNConvTranspose2dOp<Context>::RunWithType() { ...@@ -130,18 +141,20 @@ void CuDNNConvTranspose2dOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle(); auto cudnn_handle = ctx()->cudnn_handle();
for (int g = 0; g < cudnn_group; g++) { for (int g = 0; g < cudnn_group; g++) {
CUDNN_CHECK(cudnnConvolutionBackwardData(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardData(
cudnn_handle,
CUDNNType<T>::one, filter_desc, Wdata + weight_offset * g, CUDNNType<T>::one, filter_desc, Wdata + weight_offset * g,
input_desc, Xdata + x_offset * g, input_desc, Xdata + x_offset * g,
conv_desc, fwd_algo, WSdata, fwd_data_size, conv_desc, fwd_algo, WSdata, fwd_data_size,
CUDNNType<T>::zero, output_desc, Ydata + y_offset * g)); CUDNNType<T>::zero, output_desc, Ydata + y_offset * g));
} }
if (HasBias()) { if (HasBias()) {
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(cudnn_handle, CUDNN_CHECK(cudnnAddTensor(
cudnn_handle,
CUDNNType<T>::one, bias_desc, Bdata, CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output2b_desc, Ydata)); CUDNNType<T>::one, output2b_desc, Ydata));
} }
} }
...@@ -167,32 +180,38 @@ template <class Context> ...@@ -167,32 +180,38 @@ template <class Context>
void CuDNNConvTranspose2dGradientOp<Context>::SetConvDescFromInputs() { void CuDNNConvTranspose2dGradientOp<Context>::SetConvDescFromInputs() {
if (XIsType(Input(0), float)) { if (XIsType(Input(0), float)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
CUDNN_DATA_FLOAT)); CUDNN_DATA_FLOAT));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], stride[0], stride[1],
dilation[0], dilation[1], dilation[0], dilation[1],
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
#else #else
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_l[0], pad_l[1], pad_l[0], pad_l[1],
stride[0], stride[1], 1, 1, stride[0], stride[1],
CUDNN_CROSS_CORRELATION)); 1, 1,
CUDNN_CROSS_CORRELATION));
#endif #endif
} }
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
...@@ -212,12 +231,15 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() { ...@@ -212,12 +231,15 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() {
// Determine the input & output shape // Determine the input & output shape
output_dims = Input(-1).dims(); output_dims = Input(-1).dims();
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&input_desc, data_format, Input(-1).dims(), cudnn_group); &input_desc, data_format,
Input(-1).dims(), cudnn_group);
cudnnSetTensor4dDescWithGroup<T>( cudnnSetTensor4dDescWithGroup<T>(
&output_desc, data_format, Input(0).dims(), cudnn_group); &output_desc, data_format,
Input(0).dims(), cudnn_group);
if (HasBias()) { if (HasBias()) {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(
&input2b_desc, data_format, Input(-1).dims()); &input2b_desc, data_format,
Input(-1).dims());
} }
// Determine the misc // Determine the misc
if (data_format == "NCHW") { if (data_format == "NCHW") {
...@@ -256,26 +278,32 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() { ...@@ -256,26 +278,32 @@ void CuDNNConvTranspose2dGradientOp<Context>::ResetDesc() {
// Now, Select the appropriate algorithm // Now, Select the appropriate algorithm
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm( CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
output_desc, conv_desc, filter_desc, input_desc, output_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, conv_desc, filter_desc,
WORKSPACE_LIMIT_BYTES, &bwd_filter_algo)); CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&bwd_filter_algo));
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
output_desc, conv_desc, filter_desc, input_desc, output_desc,
bwd_filter_algo, &bwd_filter_size)); conv_desc, filter_desc,
bwd_filter_algo, &bwd_filter_size));
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm( CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
filter_desc, conv_desc, output_desc, input_desc, filter_desc,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, conv_desc, output_desc,
WORKSPACE_LIMIT_BYTES, &bwd_data_algo)); CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
WORKSPACE_LIMIT_BYTES,
&bwd_data_algo));
CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize( CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
ctx()->cudnn_handle(), input_desc, ctx()->cudnn_handle(),
filter_desc, conv_desc, output_desc, input_desc, filter_desc,
bwd_data_algo, &bwd_data_size)); conv_desc, output_desc,
bwd_data_algo, &bwd_data_size));
} }
} }
...@@ -292,28 +320,31 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() { ...@@ -292,28 +320,31 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
if (Output(2)->name() != "NULL") { if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>(); T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(
cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata, CUDNNType<T>::one, input2b_desc, dYdata,
CUDNNType<T>::zero, bias_desc, dBdata)); CUDNNType<T>::zero, bias_desc, dBdata));
} }
for (int g = 0; g < cudnn_group; g++) { for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "NULL") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardFilter(
cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g, CUDNNType<T>::one, input_desc, dYdata + y_offset * g,
output_desc, Xdata + x_offset * g, output_desc, Xdata + x_offset * g,
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size, conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g)); CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
} }
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionForward(cudnn_handle, CUDNN_CHECK(cudnnConvolutionForward(
cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g, CUDNNType<T>::one, input_desc, dYdata + y_offset * g,
filter_desc, Wdata + weight_offset * g, filter_desc, Wdata + weight_offset * g,
conv_desc, bwd_data_algo, WSdata, bwd_data_size, conv_desc, bwd_data_algo, WSdata, bwd_data_size,
CUDNNType<T>::zero, output_desc, dXdata + x_offset * g)); CUDNNType<T>::zero, output_desc, dXdata + x_offset * g));
} }
} }
......
...@@ -28,16 +28,20 @@ void CuDNNDepthwiseConv2dOp<Context>::RunWithType() { ...@@ -28,16 +28,20 @@ void CuDNNDepthwiseConv2dOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2d(Input(0).dim(0), channels, kernel::DepthwiseConv2d(
input_shape[0], input_shape[1], output_shape[0], output_shape[1], Input(0).dim(0), channels,
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], input_shape[0], input_shape[1],
data_format, Xdata, Wdata, Ydata, ctx()); output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, Xdata, Wdata, Ydata, ctx());
if (HasBias()) { if (HasBias()) {
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(ctx()->cudnn_handle(), CUDNN_CHECK(cudnnAddTensor(
ctx()->cudnn_handle(),
CUDNNType<T>::one, bias_desc, Bdata, CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata)); CUDNNType<T>::one, output_desc, Ydata));
} }
} }
...@@ -83,18 +87,24 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -83,18 +87,24 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx()); math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx());
kernel::DepthwiseConv2dWGrad(Input(0).dim(0), channels, kernel::DepthwiseConv2dWGrad(
input_shape[0], input_shape[1], output_shape[0], output_shape[1], Input(0).dim(0), channels,
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], input_shape[0], input_shape[1],
data_format, dYdata, Xdata, dWdata, ctx()); output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx());
} }
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels, kernel::DepthwiseConv2dGrad(
input_shape[0], input_shape[1], output_shape[0], output_shape[1], Input(0).dim(0), channels,
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], input_shape[0], input_shape[1],
data_format, dYdata, Wdata, dXdata, ctx()); output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Wdata, dXdata, ctx());
} }
} }
} }
......
...@@ -10,24 +10,28 @@ void CuDNNPool2dOp<Context>::RunWithType() { ...@@ -10,24 +10,28 @@ void CuDNNPool2dOp<Context>::RunWithType() {
cudnnSetTensor4dDesc<T>(&output_desc, this->data_format, Output(0)); cudnnSetTensor4dDesc<T>(&output_desc, this->data_format, Output(0));
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnSetPooling2dDescriptor( CUDNN_CHECK(cudnnSetPooling2dDescriptor(
pool_desc, pool_mode, CUDNN_PROPAGATE_NAN, pool_desc,
this->kernel_shape[0], this->kernel_shape[1], pool_mode,
this->pad_l[0], this->pad_l[1], CUDNN_PROPAGATE_NAN,
this->stride[0], this->stride[1])); this->kernel_shape[0], this->kernel_shape[1],
this->pad_l[0], this->pad_l[1],
this->stride[0], this->stride[1]));
#else #else
CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4( CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(
pool_desc, pool_mode, CUDNN_PROPAGATE_NAN, pool_desc,
this->kernel_shape[0], this->kernel_shape[1], pool_mode,
this->pad_l[0], this->pad_l[1], CUDNN_PROPAGATE_NAN,
this->stride[0], this->stride[1])); this->kernel_shape[0], this->kernel_shape[1],
this->pad_l[0], this->pad_l[1],
this->stride[0], this->stride[1]));
#endif #endif
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnPoolingForward( CUDNN_CHECK(cudnnPoolingForward(
ctx()->cudnn_handle(), pool_desc, ctx()->cudnn_handle(), pool_desc,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
} }
template <class Context> template <class Context>
...@@ -47,16 +51,20 @@ void CuDNNPool2dGradientOp<Context>::RunWithType() { ...@@ -47,16 +51,20 @@ void CuDNNPool2dGradientOp<Context>::RunWithType() {
cudnnSetTensor4dDesc<T>(&output_desc, this->data_format, Output(0)); cudnnSetTensor4dDesc<T>(&output_desc, this->data_format, Output(0));
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
CUDNN_CHECK(cudnnSetPooling2dDescriptor( CUDNN_CHECK(cudnnSetPooling2dDescriptor(
pool_desc, pool_mode, CUDNN_PROPAGATE_NAN, pool_desc,
this->kernel_shape[0], this->kernel_shape[1], pool_mode,
this->pad_l[0], this->pad_l[1], CUDNN_PROPAGATE_NAN,
this->stride[0], this->stride[1])); this->kernel_shape[0], this->kernel_shape[1],
this->pad_l[0], this->pad_l[1],
this->stride[0], this->stride[1]));
#else #else
CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4( CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(
pool_desc, pool_mode, CUDNN_PROPAGATE_NAN, pool_desc,
this->kernel_shape[0], this->kernel_shape[1], pool_mode,
this->pad_l[0], this->pad_l[1], CUDNN_PROPAGATE_NAN,
this->stride[0], this->stride[1])); this->kernel_shape[0], this->kernel_shape[1],
this->pad_l[0], this->pad_l[1],
this->stride[0], this->stride[1]));
#endif #endif
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
...@@ -65,9 +73,9 @@ void CuDNNPool2dGradientOp<Context>::RunWithType() { ...@@ -65,9 +73,9 @@ void CuDNNPool2dGradientOp<Context>::RunWithType() {
CUDNN_CHECK(cudnnPoolingBackward( CUDNN_CHECK(cudnnPoolingBackward(
ctx()->cudnn_handle(), pool_desc, ctx()->cudnn_handle(), pool_desc,
CUDNNType<T>::one, input_desc, Ydata, CUDNNType<T>::one, input_desc, Ydata,
input_desc, dYdata, output_desc, Xdata, input_desc, dYdata, output_desc, Xdata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
} }
template <class Context> template <class Context>
......
...@@ -14,10 +14,13 @@ void DepthwiseConv2dOp<Context>::RunWithType() { ...@@ -14,10 +14,13 @@ void DepthwiseConv2dOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2d(Input(0).dim(0), channels, kernel::DepthwiseConv2d(
input_shape[0], input_shape[1], output_shape[0], output_shape[1], Input(0).dim(0), channels,
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], input_shape[0], input_shape[1],
data_format, Xdata, Wdata, Ydata, ctx()); output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, Xdata, Wdata, Ydata, ctx());
if (HasBias()) { if (HasBias()) {
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
...@@ -57,19 +60,30 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -57,19 +60,30 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() {
if (Output(1)->name() != "NULL") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx()); math::Set(
kernel::DepthwiseConv2dWGrad(Input(0).dim(0), channels, Output(1)->count(),
input_shape[0], input_shape[1], output_shape[0], output_shape[1], cast::to<T>(0.f),
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], dWdata,
data_format, dYdata, Xdata, dWdata, ctx()); ctx()
); // Zero the gradient of W
kernel::DepthwiseConv2dWGrad(
Input(0).dim(0), channels,
input_shape[0], input_shape[1],
output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx());
} }
if (Output(0)->name() != "NULL") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels, kernel::DepthwiseConv2dGrad(
input_shape[0], input_shape[1], output_shape[0], output_shape[1], Input(0).dim(0), channels,
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], input_shape[0], input_shape[1],
data_format, dYdata, Wdata, dXdata, ctx()); output_shape[0], output_shape[1],
kernel_shape[0], kernel_shape[1],
stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Wdata, dXdata, ctx());
} }
} }
} }
......
...@@ -30,9 +30,9 @@ void NNResizeOp<Context>::RunOnDevice() { ...@@ -30,9 +30,9 @@ void NNResizeOp<Context>::RunOnDevice() {
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
dims[spatial_axis + i] = dsize(i); dims[spatial_axis + i] = dsize(i);
} else if (!shape_like_desc.empty()) { } else if (!shape_like_desc.empty()) {
Tensor* shape_like_tensor = ws()->GetTensor(shape_like_desc); auto* sl = ws()->GetTensor(shape_like_desc);
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
dims[spatial_axis + i] = shape_like_tensor->dim(spatial_axis + i); dims[spatial_axis + i] = sl->dim(spatial_axis + i);
} else { } else {
CHECK(fy != -1.f && fx != -1.f) CHECK(fy != -1.f && fx != -1.f)
<< "\nThe fx and fy should be set."; << "\nThe fx and fy should be set.";
......
...@@ -71,10 +71,11 @@ void Pool2dOp<Context>::MAXRunWithType() { ...@@ -71,10 +71,11 @@ void Pool2dOp<Context>::MAXRunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template mutable_data<int, Context>(); auto* Mdata = mask->template mutable_data<int, Context>();
kernel::MAXPool2d(n, c, h, w, pool_h, pool_w, kernel::MAXPool2d(
n, c, h, w, pool_h, pool_w,
kernel_shape[0], kernel_shape[1], kernel_shape[0], kernel_shape[1],
stride[0], stride[1], pad_l[0], pad_l[1], stride[0], stride[1], pad_l[0], pad_l[1],
data_format, Xdata, Mdata, Ydata, ctx()); data_format, Xdata, Mdata, Ydata, ctx());
} }
template <class Context> template <typename T> template <class Context> template <typename T>
...@@ -82,10 +83,11 @@ void Pool2dOp<Context>::AVGRunWithType() { ...@@ -82,10 +83,11 @@ void Pool2dOp<Context>::AVGRunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::AVGPool2d(n, c, h, w, pool_h, pool_w, kernel::AVGPool2d(
n, c, h, w, pool_h, pool_w,
kernel_shape[0], kernel_shape[1], kernel_shape[0], kernel_shape[1],
stride[0], stride[1], pad_l[0], pad_l[1], stride[0], stride[1], pad_l[0], pad_l[1],
data_format, Xdata, Ydata, ctx()); data_format, Xdata, Ydata, ctx());
} }
template <class Context> template <class Context>
...@@ -123,10 +125,11 @@ void Pool2dGradientOp<Context>::MAXRunWithType() { ...@@ -123,10 +125,11 @@ void Pool2dGradientOp<Context>::MAXRunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<int, Context>(); auto* Mdata = mask->template data<int, Context>();
kernel::MAXPool2dGrad(n, c, h, w, pool_h, pool_w, kernel::MAXPool2dGrad(
n, c, h, w, pool_h, pool_w,
kernel_shape[0], kernel_shape[1], kernel_shape[0], kernel_shape[1],
stride[0], stride[1], pad_l[0], pad_l[1], stride[0], stride[1], pad_l[0], pad_l[1],
data_format, dYdata, Mdata, dXdata, ctx()); data_format, dYdata, Mdata, dXdata, ctx());
} }
template <class Context> template <typename T> template <class Context> template <typename T>
...@@ -134,10 +137,11 @@ void Pool2dGradientOp<Context>::AVGRunWithType() { ...@@ -134,10 +137,11 @@ void Pool2dGradientOp<Context>::AVGRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::AVGPool2dGrad(n, c, h, w, pool_h, pool_w, kernel::AVGPool2dGrad(
n, c, h, w, pool_h, pool_w,
kernel_shape[0], kernel_shape[1], kernel_shape[0], kernel_shape[1],
stride[0], stride[1], pad_l[0], pad_l[1], stride[0], stride[1], pad_l[0], pad_l[1],
data_format, dYdata, dXdata, ctx()); data_format, dYdata, dXdata, ctx());
} }
template <class Context> template <class Context>
......
...@@ -12,10 +12,15 @@ void ROIAlignOp<Context>::RunWithType() { ...@@ -12,10 +12,15 @@ void ROIAlignOp<Context>::RunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::ROIAlign( kernel::ROIAlign(
Input(0).dim(1), Input(0).dim(2), Input(0).dim(3), Input(0).dim(1),
pool_h, pool_w, Input(1).dim(0), Input(0).dim(2),
spatial_scale, sampling_ratio, Input(0).dim(3),
Xdata, Rdata, Ydata, ctx()); pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
sampling_ratio,
Xdata, Rdata,
Ydata, ctx());
} }
template <class Context> template <class Context>
...@@ -47,10 +52,15 @@ void ROIAlignGradientOp<Context>::RunWithType() { ...@@ -47,10 +52,15 @@ void ROIAlignGradientOp<Context>::RunWithType() {
math::Set(Output(0)->count(), cast::to<T>(0.f), dXdata, ctx()); math::Set(Output(0)->count(), cast::to<T>(0.f), dXdata, ctx());
kernel::ROIAlignGrad( kernel::ROIAlignGrad(
Output(0)->dim(1), Output(0)->dim(2), Output(0)->dim(3), Output(0)->dim(1),
pool_h, pool_w, Input(1).dim(0), Output(0)->dim(2),
spatial_scale, sampling_ratio, Output(0)->dim(3),
dYdata, Rdata, dXdata, ctx()); pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
sampling_ratio,
dYdata, Rdata,
dXdata, ctx());
} }
template <class Context> template <class Context>
...@@ -66,10 +76,15 @@ void ROIAlignGradientOp<Context>::RunWithFloat16() { ...@@ -66,10 +76,15 @@ void ROIAlignGradientOp<Context>::RunWithFloat16() {
kernel::TypeA2B(Input(-1).count(), dYdata, WSdata[0], ctx()); kernel::TypeA2B(Input(-1).count(), dYdata, WSdata[0], ctx());
kernel::ROIAlignGrad( kernel::ROIAlignGrad(
Output(0)->dim(1), Output(0)->dim(2), Output(0)->dim(3), Output(0)->dim(1),
pool_h, pool_w, Input(1).dim(0), Output(0)->dim(2),
spatial_scale, sampling_ratio, Output(0)->dim(3),
WSdata[0], Rdata, WSdata[1], ctx()); pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
sampling_ratio,
WSdata[0], Rdata,
WSdata[1], ctx());
kernel::TypeA2B(Output(0)->count(), WSdata[1], dXdata, ctx()); kernel::TypeA2B(Output(0)->count(), WSdata[1], dXdata, ctx());
} }
......
...@@ -16,9 +16,14 @@ void ROIPoolOp<Context>::RunWithType() { ...@@ -16,9 +16,14 @@ void ROIPoolOp<Context>::RunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::ROIPool( kernel::ROIPool(
Input(0).dim(1), Input(0).dim(2), Input(0).dim(3), Input(0).dim(1),
pool_h, pool_w, Input(1).dim(0), spatial_scale, Input(0).dim(2),
Xdata, Rdata, Mdata, Ydata, ctx()); Input(0).dim(3),
pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
Xdata, Rdata, Mdata,
Ydata, ctx());
} }
template <class Context> template <class Context>
...@@ -51,10 +56,15 @@ void ROIPoolGradientOp<Context>::RunWithType() { ...@@ -51,10 +56,15 @@ void ROIPoolGradientOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::ROIPoolGrad( kernel::ROIPoolGrad(
Output(0)->dim(0), Output(0)->dim(1), Output(0)->dim(0),
Output(0)->dim(2), Output(0)->dim(3), Output(0)->dim(1),
pool_h, pool_w, Input(1).dim(0), spatial_scale, Output(0)->dim(2),
dYdata, Rdata, Mdata, dXdata, ctx()); Output(0)->dim(3),
pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
dYdata, Rdata, Mdata,
dXdata, ctx());
} }
template <class Context> template <class Context>
...@@ -73,10 +83,15 @@ void ROIPoolGradientOp<Context>::RunWithFloat16() { ...@@ -73,10 +83,15 @@ void ROIPoolGradientOp<Context>::RunWithFloat16() {
kernel::TypeA2B(Input(-1).count(), dYdata, WSdata[0], ctx()); kernel::TypeA2B(Input(-1).count(), dYdata, WSdata[0], ctx());
kernel::ROIPoolGrad( kernel::ROIPoolGrad(
Output(0)->dim(0), Output(0)->dim(1), Output(0)->dim(0),
Output(0)->dim(2), Output(0)->dim(3), Output(0)->dim(1),
pool_h, pool_w, Input(1).dim(0), spatial_scale, Output(0)->dim(2),
WSdata[0], Rdata, Mdata, WSdata[1], ctx()); Output(0)->dim(3),
pool_h, pool_w,
Input(1).dim(0),
spatial_scale,
WSdata[0], Rdata, Mdata,
WSdata[1], ctx());
kernel::TypeA2B(Output(0)->count(), WSdata[1], dXdata, ctx()); kernel::TypeA2B(Output(0)->count(), WSdata[1], dXdata, ctx());
} }
......
...@@ -145,18 +145,6 @@ message GradientProto { ...@@ -145,18 +145,6 @@ message GradientProto {
optional string external = 3; optional string external = 3;
} }
// Record the updater information
message UpdaterProto {
// The operator name to use.
optional string name = 1;
// The operator type.
optional string type = 2;
// The tensor to update.
repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4;
}
// Graph Definition // Graph Definition
message GraphDef { message GraphDef {
// The graph name. // The graph name.
...@@ -181,6 +169,4 @@ message GraphDef { ...@@ -181,6 +169,4 @@ message GraphDef {
// The gradients information. // The gradients information.
repeated GradientProto gradient = 9; repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10;
} }
\ No newline at end of file
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!