Commit 96f7277e by Ting PAN

Add Cambricon's CNML Context

1 parent 5cd0761b
Showing with 4958 additions and 1148 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in Dragon.
Dragon 0.2.2.12 (20181120)
DRAGON_VERSION == 2212
Changes (w.r.t. Dragon 0.2.2.11):
Preview Features:
- Added Cambricon's CNML context.
- Added the support for Int8(Char) Tensor.
- Removed the cuda device id query from pointer.
- Added ``DropBlock2dOp``
- Added ``MaximumOp``, ``MinimumOp``, ``NLLLossOp``.
- Added CuDNN support for ``BiasAddOp``.
- Optimized memory usage of ``DropoutOp``.
- Replaced ``thread_local`` with platform TLS solution.
- Changed the default norm eps from 1e-3 to 1e-5,
affected: ``BatchNorm``, ``BatchRenorm``, ``GroupNorm``, ``InstanceNorm``, ``L2Norm``.
- Enforced CUDA FP16 support (i.e. Removed ``WITH_CUDA_FP16``).
- [PyTorch] Added ``torch.one_hot``.
- [PyTorch] Added ``torch.log``, ``Tensor.log``, ``torch.exp`` and ``Tensor.exp``.
- [PyTorch] Added ``torch.minimum``, ``torch.maximum``,
``torch.clamp``, ``Tensor.clamp``, ``Tensor.clamp_``.
- [PyTorch] Added ``nn.ELU`` and ``nn.SELU``.
- [PyTorch] Added ``nn.GroupNorm``.
- [PyTorch] Added ``nn.NLLLoss``, ``nn.BCEWithLogitsLoss``,
``nn.L1Loss``, ``nn.MSELoss``, ``nn.SmoothL1Loss``.
- [PyTorch] Added ``nn.DropBlock2d``.
- [PyTorch] Added ``train`` and ``eval`` mode for Module,
affected: ``nn.BatchNorm``, ``nn.Dropout``.
- [PyTorch] Deprecated the ``size_average`` and ``reduce`` in
``nn.Loss``, added ``reduction`` instead.
- [PyTorch] ``torch.save`` can save both ``torch.Tensor`` and other pickle values.
- [PyCaffe] Added ``DropBlockLayer``.
Bugs fixed:
- Fixed the uncomputed output in ``BiasAddGradientOp``.
- Fixed the incorrect gradients of ``ClipGradientOp``.
- Fixed the wrong results of ``math::Inv`` under ``CPUContext``.
- Fixed the issue that the default device is used on initializing NCCL.
- Removed the strictly shape check in ``SmoothL1Op``.
- Fixed wrong CXX API exporting under Win32.
- [PyTorch] Fixed an issue that multiple ``GradientGather`` are triggered by one Operator.
- [PyTorch] Fixed the schema check by in-place fundamental ops.
- [PyTorch] Fixed the missing shape and dtype after ``Tensor.copy_``.
- [PyTorch] Fixed an issue that ``Tensor.fill_`` and ``Tensor.zero_``
will change the data type of an non-empty Tensor.
- [PyTorch] Fixed the Python2 Int(s) check.
------------------------------------------------------------------------
\ No newline at end of file
......@@ -8,10 +8,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
unzip \
ssh \
vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \
libprotobuf-dev \
protobuf-compiler \
libopencv-dev \
libopenblas-dev \
libboost-all-dev \
python3-pip \
python3-dev \
python3-pyqt4 \
......@@ -40,3 +44,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cpu-openblas/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
......@@ -9,10 +9,14 @@ RUN rm /etc/apt/sources.list.d/cuda.list && rm /etc/apt/sources.list.d/nvidia-ml
unzip \
ssh \
vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \
libprotobuf-dev \
protobuf-compiler \
libopencv-dev \
libopenblas-dev \
libboost-all-dev \
libnccl2 \
libnccl-dev \
python3-pip \
......@@ -43,3 +47,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cuda9.0-cudnn7/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
......@@ -17,7 +17,6 @@ option(WITH_SSE "Set ON to use SSE 4.1" ON)
option(WITH_MPI "Set ON to use MPI" OFF)
option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF)
option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF)
option(WITH_CUDA_FP16 "Set ON to use FP16" ON)
# Set your 3rdparty
set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
......@@ -163,10 +162,6 @@ if (WITH_MPI_NCCL)
ADD_DEFINITIONS(-DWITH_MPI_NCCL)
message(STATUS "Use MPI-NCCL [Optional]")
endif()
if (WITH_CUDA_FP16)
ADD_DEFINITIONS(-DWITH_CUDA_FP16)
message(STATUS "Use CUDA FP16 [Optional]")
endif()
# ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
......
......@@ -13,6 +13,7 @@
#define DRAGON_CORE_COMMON_H_
#include <ctime>
#include <random>
#include <climits>
#include <memory>
#include <string>
......@@ -49,25 +50,35 @@ using Map = std::unordered_map<Key, Value>;
template <typename Value>
using Set = std::unordered_set<Value> ;
/*
* Define the Kernel version.
*
* | Major(2) | Minor(2) | Patch(11) |
*/
#define DRAGON_VERSION 2211
/* * * * * * * * * * * * * * * * * * * * *
* *
* Kernel Version *
* *
* Major(2) | Minor(2) | Patch(12) *
* *
* * * * * * * * * * * * * * * * * * * * */
#define DRAGON_VERSION 2212
/* * * * * * * * * * * * * * * * * * * * *
* *
* Default Random Seed *
* *
* * * * * * * * * * * * * * * * * * * * */
/*
* Define the default random seed.
*/
#define DEFAULT_RNG_SEED 3
/*
* Define the common marcos.
*/
#ifdef _MSC_VER
#if _MSC_VER < 1900
#define thread_local __declspec(thread)
#endif
/* * * * * * * * * * * * * * * * * * * * *
* *
* Macros *
* *
* * * * * * * * * * * * * * * * * * * * */
// avoid using of "thread_local" for VS2013 or older Xcode
#if defined(__clang__) || defined(__GNUC__)
#define TLS_OBJECT __thread
#else
#define TLS_OBJECT __declspec(thread)
#endif
#define CONCATENATE_IMPL(s1, s2) s1##s2
......
......@@ -12,15 +12,8 @@
#ifndef DRAGON_CORE_CONTEXT_H_
#define DRAGON_CORE_CONTEXT_H_
#include <random>
#include <ctime>
#include "core/common.h"
#ifdef WITH_CUDA
#include "utils/cuda_device.h"
#endif
namespace dragon {
class CPUContext {
......@@ -45,7 +38,7 @@ class CPUContext {
#else
data = malloc(nbytes);
#endif
CHECK(data) << "Malloc mem: " << nbytes << " bytes failed.";
CHECK(data) << "\nMalloc mem: " << nbytes << " bytes failed.";
return data;
}
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_CORE_CONTEXT_CNML_H_
#define DRAGON_CORE_CONTEXT_CNML_H_
/* CAMBRICON's CNRT && CNML Environment */
#include "core/common.h"
struct cnrtStream;
struct cnmlCpuTensor;
struct cnmlTensor;
struct cnmlFusionOp;
typedef struct cnrtStream* cnrtStream_t;
typedef struct cnmlCpuTensor* cnmlCpuTensor_t;
typedef struct cnmlTensor* cnmlTensor_t;
typedef struct cnmlFusionOp* cnmlFusionOp_t;
namespace dragon {
class CNRTObject;
class CNMLContext {
public:
CNMLContext(const DeviceOption& option)
: device_id_(option.device_id()),
random_seed_(option.has_random_seed() ?
option.random_seed() : DEFAULT_RNG_SEED) {
CHECK_EQ(option.device_type(), CNML);
}
CNMLContext(const int device_id = 0)
: device_id_(device_id),
random_seed_(DEFAULT_RNG_SEED) {}
void SwitchToDevice(int stream_id);
inline void SwitchToDevice() { SwitchToDevice(1); }
void FinishDeviceCompution();
static void* New(size_t nbytes);
static void Memset(
size_t nbytes,
void* ptr);
inline void MemsetAsync(
size_t nbytes,
void* ptr) {
Memset(nbytes, ptr);
}
template<class DstContext, class SrcContext>
static void Memcpy(
size_t nbytes,
void* dst,
const void* src);
template<class DstContext, class SrcContext>
inline void MemcpyAsync(
size_t nbytes,
void* dst,
const void* src) {
Memcpy<DstContext, SrcContext>(dst, src, nbytes);
}
static void Delete(void* data);
inline int device_id() const { return device_id_; }
inline void set_stream_id(int stream_id) { stream_id_ = stream_id; }
inline cnrtStream_t cnrt_stream() {
return cnrt_stream(device_id_, stream_id_);
}
static cnrtStream_t cnrt_stream(
int device_id,
int stream_id);
static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CNRTObject cnrt_object_;
private:
int device_id_, stream_id_ = 1, random_seed_;
unique_ptr<std::mt19937> rand_generator_;
};
} // namepsace dragon
#endif // DRAGON_CORE_CONTEXT_CNML_H_
\ No newline at end of file
......@@ -12,8 +12,9 @@
#ifndef DRAGON_CORE_CONTEXT_CUDA_H_
#define DRAGON_CORE_CONTEXT_CUDA_H_
/* NVIDIA's CUDA Environment */
#include "core/common.h"
#include "core/context.h"
#include "utils/cuda_device.h"
#include "utils/cudnn_device.h"
......@@ -52,13 +53,13 @@ class CUDAObject {
}
// follow the caffe2,
// each device takes a group of non-bl0cking streams
// each device takes a group of non-blocking streams
// the stream 0 is reserved for default stream,
// as some computations really require it,
// e.g. cublas.asum() and mixed cpu/cuda operations
// besides, somes calls, such as cudnn.conv() and cudnn.rnn(),
// produce wrong results if running them on non-blocking streams
// note that caffe2 also use default streams (within CuDNNState)
// note that caffe2 also uses default streams (within CuDNNState)
cudaStream_t GetStream(int device_id, int stream_id) {
vector<cudaStream_t>& dev_streams = cuda_streams[device_id];
if (dev_streams.size() <= (unsigned)stream_id)
......@@ -140,7 +141,7 @@ class CUDAContext {
inline static void* New(size_t nbytes) {
void* data;
cudaMalloc(&data, nbytes);
CHECK(data) << "Malloc cuda mem: "
CHECK(data) << "\nMalloc cuda mem: "
<< nbytes << " bytes failed.";
return data;
}
......@@ -199,11 +200,11 @@ class CUDAContext {
static cudaStream_t cuda_stream(
int device_id,
int stream_id) {
return cuda_object_.GetStream(device_id, stream_id);
return cuda_object()->GetStream(device_id, stream_id);
}
cublasHandle_t cublas_handle() {
return cuda_object_.GetCuBLASHandle(device_id_, stream_id_);
return cuda_object()->GetCuBLASHandle(device_id_, stream_id_);
}
inline std::mt19937* rand_generator() {
......@@ -227,13 +228,17 @@ class CUDAContext {
#ifdef WITH_CUDNN
cudnnHandle_t cudnn_handle() {
return cuda_object_.GetCuDNNHandle(device_id_, stream_id_);
return cuda_object()->GetCuDNNHandle(device_id_, stream_id_);
}
#endif
static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CUDAObject cuda_object_;
static CUDAObject* cuda_object() {
static TLS_OBJECT CUDAObject* cuda_object_;
if (!cuda_object_) cuda_object_ = new CUDAObject();
return cuda_object_;
}
private:
int device_id_, stream_id_ = 1, random_seed_;
......
......@@ -48,10 +48,10 @@ class GraphBase {
Workspace* ws_;
};
class Graph final : public GraphBase {
class Graph : public GraphBase {
public:
Graph(const GraphDef& meta_graph, Workspace* ws);
~Graph() { for (auto* op : ops_) delete op; }
virtual ~Graph() { for (auto* op : ops_) delete op; }
bool Create(
const GraphDef& optimized_graph,
......@@ -73,7 +73,7 @@ class Graph final : public GraphBase {
inline Workspace* ws() const { return ws_; }
private:
protected:
void ForwardShareDyeing(string u, string ancestor);
void ForwardPruneDyeing(
string u,
......@@ -98,6 +98,9 @@ DECLARE_REGISTRY(
const GraphDef&,
Workspace*);
#define REGISTER_GRAPH(name, ...) \
REGISTER_CLASS(GraphRegistry, name, __VA_ARGS__)
} // namespace dragon
#endif // DRAGON_CORE_GRAPH_H_
\ No newline at end of file
......@@ -12,30 +12,49 @@
#ifndef DRAGON_CORE_MIXEDMEM_H_
#define DRAGON_CORE_MIXEDMEM_H_
#include "context.h"
#include "context_cuda.h"
#include "core/context.h"
#include "core/context_cuda.h"
#include "core/context_cnml.h"
namespace dragon {
typedef enum {
NCHW,
NHWC,
} DataOrder;
class MixedMemory {
public:
enum State {
typedef enum {
UNINITIALIZED,
STATE_AT_CPU,
STATE_AT_CUDA,
STATE_AT_CNML,
SWITCHED,
SYNCED };
SYNCED,
} State;
MixedMemory() : cpu_ptr_(nullptr), cuda_ptr_(nullptr) {}
MixedMemory() : cpu_ptr_(nullptr),
cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
MixedMemory(const TypeMeta& meta, const size_t nbytes)
: meta_(meta), nbytes_(nbytes),
cpu_ptr_(nullptr), cuda_ptr_(nullptr) {}
: meta_(meta), nbytes_(nbytes), cpu_ptr_(nullptr),
cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
~MixedMemory();
const void* cpu_data();
const void* cuda_data();
const void* cnml_data();
void* mutable_cpu_data();
void* mutable_cuda_data();
void* mutable_cnml_data();
void* malloc_cnml_data();
void fetch_cnml_data(void** data);
cnmlCpuTensor_t& cnml_cpu_tensor();
cnmlTensor_t& cnml_mlu_tensor();
void set_cpu_data(void* cpu_ptr, size_t nbytes);
void SwitchToDevice();
......@@ -43,23 +62,35 @@ class MixedMemory {
inline size_t nbytes() const { return nbytes_; }
inline void* cpu_ptr() { state_ = STATE_AT_CPU; return cpu_ptr_; }
inline void* cuda_ptr() { state_ = STATE_AT_CUDA; return cuda_ptr_; }
inline size_t nchunks() const { return nchunks_; }
void set_nchunks(size_t nchunks) { nchunks_ = nchunks; }
inline State state() const { return state_; }
inline DataOrder order() const { return order_; }
inline void set_order(DataOrder order) { order_ = order; }
const Map<string, string> info() const;
void ToCUDA();
void ToCPU();
void ToCUDA();
private:
void* cpu_ptr_, *cuda_ptr_;
bool own_cpu_ptr_ = true;
State state_ = UNINITIALIZED;
size_t nbytes_ = 0;
TypeMeta meta_;
size_t nbytes_ = 0, nchunks_ = 1;
DataOrder order_ = NCHW;
State state_ = UNINITIALIZED;
void* cpu_ptr_, *cuda_ptr_, *cnml_ptr_;
int own_cpu_ptr_ = 1, ptr_device_ = 0;
/* For CAMBRICON's CNML Environment */
cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr;
cnmlTensor_t cnml_mlu_tensor_ = nullptr;
};
} // namespace dragon
#endif
\ No newline at end of file
#endif // DRAGON_CORE_MIXEDMEM_H_
\ No newline at end of file
......@@ -44,7 +44,9 @@ class OperatorBase {
const string& anchor);
inline void SwitchToPhase(const string& phase) { phase_ = phase; }
virtual void Run(int stream_id = 1) { NOT_IMPLEMENTED; }
virtual void Fusion(void* graph) { NOT_IMPLEMENTED; }
inline const string& name() const { return def_.name(); }
inline const string& type() const { return def_.type(); }
......@@ -186,12 +188,22 @@ DECLARE_REGISTRY(
const OperatorDef&,
Workspace*);
/* NVIDIA's Accelerated Library - CUDNN */
DECLARE_REGISTRY(
CUDNNOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
/* CAMBRICON's Accelerated Library - CNML */
DECLARE_REGISTRY(
CNMLOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
#define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \
if (tensor.count() == 0) { \
CHECK(ws()->GetFiller(tensor.name())) \
......@@ -310,6 +322,9 @@ DECLARE_REGISTRY(
#define INSTANTIATE_CUDNN_OPERATOR(name) \
template class CuDNN##name##Op<CUDAContext>;
#define INSTANTIATE_CNML_OPERATOR(name) \
template class CnML##name##Op<CNMLContext>;
#define REGISTER_CPU_OPERATOR(name, ...) \
REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
......@@ -319,6 +334,9 @@ DECLARE_REGISTRY(
#define REGISTER_CUDNN_OPERATOR(name, ...) \
REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__)
#define REGISTER_CNML_OPERATOR(name, ...) \
REGISTER_CLASS(CNMLOperatorRegistry, name, __VA_ARGS__)
#define DEPLOY_CPU(name) \
REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \
INSTANTIATE_OPERATOR(name, CPUContext);
......@@ -336,6 +354,10 @@ DECLARE_REGISTRY(
REGISTER_CUDNN_OPERATOR(name, CuDNN##name##Op<CUDAContext>); \
INSTANTIATE_CUDNN_OPERATOR(name);
#define DEPLOY_CNML(name) \
REGISTER_CNML_OPERATOR(name, CnML##name##Op<CNMLContext>); \
INSTANTIATE_CNML_OPERATOR(name);
} // namespace dragon
#endif // DRAGON_CORE_OPERATOR_H_
\ No newline at end of file
......@@ -10,7 +10,7 @@
// ------------------------------------------------------------
#ifndef DRAGON_CORE_TENSOR_H_
#define DRAONG_CORE_TENSOR_H_
#define DRAGON_CORE_TENSOR_H_
#include "core/common.h"
#include "core/mixedmem.h"
......@@ -103,16 +103,20 @@ class Tensor {
return offset;
}
inline string DimString() const {
if (ndim() == 0) return "(0,)";
static inline string DimString(
const vector<TIndex>& dims) {
if (dims.size() == 0) return "(0,)";
std::stringstream ss;
ss << "(";
for (int i = 0; i < ndim() - 1; i++) ss << dim(i) << ",";
if (ndim() == 1) ss << dim(0) << ",)";
else ss << dim(ndim() - 1) << ")";
for (int i = 0; i < dims.size() - 1; i++)
ss << dims[i] << ",";
if (dims.size() == 1) ss << dims[0] << ",)";
else ss << dims.back() << ")";
return ss.str();
}
inline string DimString() const { return DimString(dims_); }
inline bool is_corrupted() const { return is_corrupted_; }
inline void Corrupt() { is_corrupted_ = true; }
......@@ -156,9 +160,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
*data_ptr = mem->mutable_cnml_data();
} else {
LOG(FATAL) << "Unknown memory type. "
<< "Only CPU or CUDA is supported.";
LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU, CUDA and CNML are supported.";
}
}
}
......@@ -173,9 +180,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) {
return mem->cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
return mem->cnml_data();
} else {
LOG(FATAL) << "Unknown memory type. "
<< "Only CPU or CUDA are supported.";
LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU, CUDA, and CNML are supported.";
return nullptr;
}
}
......@@ -295,4 +305,4 @@ class Tensor {
} // namespace dragon
#endif // DRAONG_CORE_TENSOR_H_
\ No newline at end of file
#endif // DRAGON_CORE_TENSOR_H_
\ No newline at end of file
......@@ -18,6 +18,9 @@
namespace dragon {
typedef char int8;
typedef unsigned char uint8;
#ifdef _MSC_VER
typedef struct __declspec(align(2)) {
......@@ -49,8 +52,8 @@ inline const TypeMeta& TypeStringToMeta(
{ "int64", TypeMeta::Make<int64_t>() },
{ "float64", TypeMeta::Make<double>() },
{ "float16", TypeMeta::Make<float16>() },
{ "uint8", TypeMeta::Make<uint8_t>() },
{ "int8", TypeMeta::Make<char>() },
{ "uint8", TypeMeta::Make<uint8>() },
{ "int8", TypeMeta::Make<int8>() },
};
static TypeMeta unknown_type;
return s2m_type_map.count(str_type) ?
......@@ -66,8 +69,8 @@ inline const std::string TypeMetaToString(
{ TypeMeta::Id<int64_t>(), "int64" },
{ TypeMeta::Id<double>(), "float64", },
{ TypeMeta::Id<float16>(), "float16" },
{ TypeMeta::Id<uint8_t>(), "uint8" },
{ TypeMeta::Id<char>(), "int8" }
{ TypeMeta::Id<uint8>(), "uint8" },
{ TypeMeta::Id<int8>(), "int8" }
};
return m2s_type_map.count(meta.id()) ?
m2s_type_map[meta.id()] : "unknown";
......
......@@ -47,8 +47,8 @@ class Workspace {
recompute_flag->Reshape({ 1 });
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "/opt/mirror_stage/buffer_" +
dragon_cast<string, int>(i);
string name = "/opt/mirror_stage/buffer_"
+ std::to_string(i);
Tensor* buffer = CreateTensor(name);
head->mutable_data<string, CPUContext>()[i] = "";
}
......@@ -277,7 +277,8 @@ class Workspace {
inline bool SetProxy(
const string& key,
const string& proxy) {
if (proxy_map_.count(key))
if (key == proxy) return false;
if (proxy_map_.count(key) > 0)
return proxy_map_[key] == proxy;
proxy_map_[key] = proxy;
return true;
......
......@@ -23,7 +23,7 @@ class DropoutOp final : public Operator<Context> {
DropoutOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
......@@ -42,7 +42,7 @@ class DropoutGradientOp final : public Operator<Context> {
DropoutGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
......@@ -53,7 +53,6 @@ class DropoutGradientOp final : public Operator<Context> {
protected:
DECLARE_ARGUMENT_WITH_DESC(float, prob);
bool use_scale;
Tensor* mask;
};
DEFINE_ARGUMENT_WITH_DESC(float, DropoutOp, prob);
......@@ -70,7 +69,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
......@@ -101,7 +100,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
......
......@@ -21,7 +21,7 @@ class ReluOp : public Operator<Context> {
public:
ReluOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {}
slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -36,7 +36,7 @@ class ReluGradientOp : public Operator<Context> {
public:
ReluGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {}
slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......
......@@ -48,8 +48,6 @@ class SoftmaxGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNSoftmaxOp final : public Operator<Context> {
public:
......@@ -70,8 +68,7 @@ class CuDNNSoftmaxOp final : public Operator<Context> {
template <typename T> void RunWithType();
protected:
int axis;
TIndex outer_dim, inner_dim;
TIndex axis, outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc;
};
......@@ -95,8 +92,7 @@ class CuDNNSoftmaxGradientOp final : public Operator<Context> {
template <typename T> void RunWithType();
protected:
int axis;
TIndex outer_dim, inner_dim;
TIndex axis, outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc;
};
......
......@@ -55,7 +55,7 @@ class AffineGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
#if CUDNN_VERSION_MIN(6, 0, 0)
template <class Context>
class CuDNNAffineOpBase : public Operator<Context> {
......@@ -152,6 +152,8 @@ protected:
Tensor sum_result;
};
#endif
#endif // WITH_CUDNN
} // namespace dragon
......
......@@ -36,11 +36,17 @@ class ClipOp final : public Operator<Context> {
template <class Context>
class ClipGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ClipGradientOp);
ClipGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
low(OperatorBase::Arg<float>("low", -FLT_MAX)),
high(OperatorBase::Arg<float>("high", FLT_MAX)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float low, high;
};
} // namespace dragon
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MaximumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MaximumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
\ No newline at end of file
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MinimumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MinimumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
\ No newline at end of file
......@@ -43,8 +43,6 @@ public:
#if CUDNN_VERSION_MIN(7, 0, 0)
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNCTCLossOp final : public Operator<Context> {
public:
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// -------------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class NLLLossOp : public Operator<Context> {
public:
NLLLossOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor losses, flags, ignores;
string normalization;
};
template <class Context>
class NLLLossGradientOp : public Operator<Context> {
public:
NLLLossGradientOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor ignores, flags;
string normalization;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
\ No newline at end of file
......@@ -22,7 +22,8 @@ class InitializeOp : public Operator<Context> {
public:
InitializeOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
shape_desc(OperatorBase::Arg<string>("shape", "")) {
shape_desc(OperatorBase::Arg<string>("shape", "")),
dtype(OperatorBase::Arg<string>("dtype", "float32")) {
GET_ARGUMENTS_WITH_DESC(int, dims);
}
USE_OPERATOR_FUNCTIONS;
......@@ -32,19 +33,29 @@ class InitializeOp : public Operator<Context> {
protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc;
string shape_desc, dtype;
TensorFiller filler;
};
template <class Context>
class FillOp final : public InitializeOp<Context> {
class FillOp final : public Operator<Context> {
public:
FillOp(const OperatorDef& def, Workspace* ws)
: InitializeOp<Context>(def, ws) {
this->filler.set_type("constant");
this->filler.set_value(OperatorBase::Arg<float>("value", 0.0));
: Operator<Context>(def, ws),
shape_desc(OperatorBase::Arg<string>("shape", "")),
dtype(OperatorBase::Arg<string>("dtype", "float32")),
value(OperatorBase::Arg<float>("value", 0.0)) {
GET_ARGUMENTS_WITH_DESC(int, dims);
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc, dtype;
float value;
};
template <class Context>
......@@ -130,6 +141,7 @@ public:
};
DEFINE_ARGUMENTS_WITH_DESC(int, InitializeOp, dims);
DEFINE_ARGUMENTS_WITH_DESC(int, FillOp, dims);
} // namespace
......
......@@ -25,7 +25,7 @@ class BatchNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)),
mode(OperatorBase::Arg<string>("mode", "DEFAULT")) {
if (axis != -1)
......@@ -81,7 +81,7 @@ class FusedBatchNormOp : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -105,7 +105,7 @@ class FusedBatchNormGradientOp : public Operator<Context> {
FusedBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -127,14 +127,13 @@ class FusedBatchNormGradientOp : public Operator<Context> {
#if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> {
class CuDNNBatchNormOp final
: public FusedBatchNormOp<Context> {
public:
CuDNNBatchNormOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
......@@ -167,11 +166,12 @@ class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> {
};
template <class Context>
class CuDNNBatchNormGradientOp final : public FusedBatchNormGradientOp<Context> {
class CuDNNBatchNormGradientOp final
: public FusedBatchNormGradientOp<Context> {
public:
CuDNNBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormGradientOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
......
......@@ -23,7 +23,7 @@ class BatchRenormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
r_max(OperatorBase::Arg<float>("r_max", 3.f)),
d_max(OperatorBase::Arg<float>("d_max", 5.f)),
t_delta(OperatorBase::Arg<float>("t_delta", 1.f)),
......
......@@ -23,7 +23,7 @@ class GroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1)
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
......@@ -73,7 +73,7 @@ class FusedGroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {}
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {}
USE_OPERATOR_FUNCTIONS;
void Setup();
......
......@@ -22,9 +22,10 @@ class InstanceNormOp final : public Operator<Context> {
InstanceNormOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1.";
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
}
USE_OPERATOR_FUNCTIONS;
......@@ -47,7 +48,8 @@ class InstanceNormGradientOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)) {
if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1.";
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
}
USE_OPERATOR_FUNCTIONS;
......
......@@ -23,7 +23,7 @@ class L2NormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 0)),
num_axes(OperatorBase::Arg<int>("num_axes", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
mode(OperatorBase::Arg<string>("mode", "SUM")) {}
USE_OPERATOR_FUNCTIONS;
......
......@@ -20,8 +20,6 @@ namespace dragon {
#if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
class cudnnTensorDescriptors {
public:
cudnnTensorDescriptors(const int num_descs) {
......
......@@ -21,7 +21,8 @@ class BiasAddOp final : public Operator<Context> {
public:
BiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {}
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -37,7 +38,8 @@ class BiasAddGradientOp final : public Operator<Context> {
public:
BiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {}
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -48,6 +50,62 @@ class BiasAddGradientOp final : public Operator<Context> {
string data_format;
};
#ifdef WITH_CUDNN
template <class Context>
class CuDNNBiasAddOp final : public Operator<Context> {
public:
CuDNNBiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t bias_desc, output_desc;
};
template <class Context>
class CuDNNBiasAddGradientOp final : public Operator<Context> {
public:
CuDNNBiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddGradientOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t input_desc, bias_desc;
};
#endif // WITH_CUDNN
} // namespace dragon
#endif // DRAGON_OPERATORS_VISION_BIAS_ADD_OP_H_
\ No newline at end of file
......@@ -50,8 +50,6 @@ class Conv2dGradientOp : public Conv2dOp<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNConv2dOp final : public Conv2dOp<Context> {
public:
......@@ -97,7 +95,7 @@ class CuDNNConv2dOp final : public Conv2dOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......@@ -148,7 +146,7 @@ class CuDNNConv2dGradientOp final : public Conv2dGradientOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......
......@@ -84,6 +84,7 @@ class ConvOpBase : public Operator<Context> {
ctx());
} else LOG(FATAL) << "ConvNd has not been implemented yet";
}
template <typename T> void Col2Im(const T* col, T* im) {
if (Input(0).ndim() == 4) {
kernel::Col2Im2d<T, Context>(conv_in_channels,
......
......@@ -54,8 +54,6 @@ class Conv2dTransposeGradientOp : public Conv2dTransposeOp<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
public:
......@@ -100,7 +98,7 @@ class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......@@ -150,7 +148,7 @@ public:
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#define DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#include "core/operator.h"
#include "utils/math_functions.h"
namespace dragon {
template <class Context>
class DropBlock2dOp final : public Operator<Context> {
public:
DropBlock2dOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
block_size(OperatorBase::Arg<int>("block_size", 7)),
alpha(OperatorBase::Arg<float>("alpha", 1.f)),
decrement(OperatorBase::Arg<float>("decrement", 0.f)),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {
GET_ARGUMENT_WITH_DESC(float, keep_prob, 0.9f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENT_WITH_DESC(float, keep_prob);
TIndex block_size, seed_h, seed_w;
TIndex n, c, h, w;
float alpha, decrement, apply_prob = 1., gamma;
string data_format;
vector<TIndex> seed_dims;
};
template <class Context>
class DropBlock2dGradientOp final : public Operator<Context> {
public:
DropBlock2dGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
};
DEFINE_ARGUMENT_WITH_DESC(float, DropBlock2dOp, keep_prob);
} // namespace dragon
#endif // DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
\ No newline at end of file
......@@ -16,7 +16,10 @@
namespace dragon {
enum LRNMode { ACROSS_CHANNELS, WITHIN_CHANNEL };
typedef enum {
ACROSS_CHANNELS,
WITHIN_CHANNEL,
} LRNMode;
template <class Context>
class LRNOp : public Operator<Context> {
......@@ -82,8 +85,6 @@ class LRNGradientOp : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNLRNOp final : public LRNOp<Context> {
public:
......
......@@ -73,7 +73,7 @@ inline void LoadCaffeModel(
const string& layer_name = layer.name();
string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name))
LOG(WARNING) << "Tensor(" << tensor_name << ") "
<< "does not exist in any Graphs, skip.";
......@@ -114,7 +114,7 @@ inline void SavaCaffeModel(
int layer_idx = -1;
for (int i = 0; i < tensors.size(); i++) {
if (tensors[i]->count() <= 0) continue;
vector<string> splits = SplitString(
vector<string> splits = str::split(
tensors[i]->name(), "/param:");
if (layer_hash.count(splits[0]) == 0) {
layer_hash[splits[0]] = ++layer_idx;
......
......@@ -28,6 +28,10 @@ template<> inline int dragon_cast<int, float>(float val) {
return static_cast<int>(val);
}
template<> inline int64_t dragon_cast<int64_t, float>(float val) {
return static_cast<int64_t>(val);
}
template<> inline float dragon_cast<float, float>(float val) {
return val;
}
......@@ -127,7 +131,7 @@ template<> inline float32 dragon_cast<float32, float>(float val) {
return dragon_cast<float32, float16>(t);
}
#ifdef WITH_CUDA_FP16
#ifdef WITH_CUDA
template<> inline half dragon_cast<half, float>(float val) {
#if CUDA_VERSION_MIN(9, 0, 0)
......@@ -165,7 +169,7 @@ template<> inline half2 dragon_cast<half2, float16>(float16 val) {
}
#endif // WITH_CUDA_FP16
#endif // WITH_CUDA
} // namespace dragon
......
......@@ -101,16 +101,10 @@ inline int CUDA_NUM_DEVICES() {
return count;
}
inline int CUDA_DEVICE() {
int gpu_id;
cudaGetDevice(&gpu_id);
return gpu_id;
}
inline int CUDA_DEVICE(const void* ptr) {
cudaPointerAttributes attr;
CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr));
return attr.device;
inline int CUDA_GET_DEVICE() {
int device_id;
cudaGetDevice(&device_id);
return device_id;
}
struct CUDADeviceProps {
......@@ -132,7 +126,7 @@ inline const cudaDeviceProp& GetDeviceProperty(
}
inline bool CUDA_TRUE_FP16_AVAILABLE() {
int device = CUDA_DEVICE();
int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device);
return prop.major >= 6;
}
......@@ -141,7 +135,7 @@ inline bool TENSOR_CORE_AVAILABLE() {
#if CUDA_VERSION < 9000
return false;
#else
int device = CUDA_DEVICE();
int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device);
return prop.major >= 7;
#endif
......@@ -149,23 +143,16 @@ inline bool TENSOR_CORE_AVAILABLE() {
class DeviceGuard {
public:
DeviceGuard(int newDevice)
: previous_(CUDA_DEVICE()) {
if (previous_ != newDevice)
CUDA_CHECK(cudaSetDevice(newDevice));
DeviceGuard(int new_id) : prev_id(CUDA_GET_DEVICE()) {
if (prev_id != new_id) CUDA_CHECK(cudaSetDevice(new_id));
}
~DeviceGuard() {
CUDA_CHECK(cudaSetDevice(previous_));
}
~DeviceGuard() { CUDA_CHECK(cudaSetDevice(prev_id)); }
private:
int previous_;
int prev_id;
};
#define CUDA_FP16_NOT_COMPILED \
LOG(FATAL) << "CUDA-FP16 was not compiled."
#else
#define CUDA_NOT_COMPILED \
......
......@@ -55,7 +55,6 @@ template<> class CUDNNType<double> {
typedef double BNParamType;
};
#ifdef WITH_CUDA_FP16
template<> class CUDNNType<float16> {
public:
static const cudnnDataType_t type = CUDNN_DATA_HALF;
......@@ -63,37 +62,63 @@ template<> class CUDNNType<float16> {
static const void *one, *zero;
typedef float BNParamType;
};
#endif
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor);
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
Tensor* tensor);
template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const std::vector<int64_t>& dims);
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor4dDescWithGroup(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims, const int64_t group);
void cudnnSetTensor4dDescWithGroup(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims,
const int64_t group);
template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims,
const std::vector<int64_t>& strides);
......
......@@ -69,7 +69,7 @@ template <typename T, class Context>
void RandomBernoulli(
const int n,
const float p,
uint32_t* x,
T* x,
Context* ctx);
/******************** Level-1 ********************/
......
......@@ -25,21 +25,21 @@ typedef int64_t TIndex;
template <typename T, class Context>
void Dropout(
const int count,
T prob,
T scale,
float prob,
float scale,
const T* x,
uint32_t* mask,
uint32_t* mask32,
uint8_t* mask8,
T* y,
Context* ctx);
template <typename T, class Context>
void DropoutGrad(
template <typename Tx, typename Tm, class Context>
void ApplyMask(
const int count,
T prob,
T scale,
const T* dy,
const uint32_t* mask,
T* dx,
const float scale,
const Tx* x,
const Tm* mask,
Tx* y,
Context* ctx);
/******************** activation.elu ********************/
......@@ -234,10 +234,95 @@ void Clip(
const float low,
const float high,
const T* x,
T* mask,
T* y,
Context* ctx);
template <typename T, class Context>
void ClipGrad(
const int count,
const float low,
const float high,
const T* x,
const T* dy,
T* dx,
Context* ctx);
/******************** arithmetic.maximum ********************/
template <typename T, class Context>
void MaximumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MaximumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** arithmetic.minimum ********************/
template <typename T, class Context>
void MinimumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MinimumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** control_flow.compare ********************/
template <typename T, class Context>
......@@ -257,6 +342,34 @@ void AbsGrad(
T* dx,
Context* ctx);
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty, class Context>
void NLLLoss(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
Context* ctx);
template <typename Tx, typename Ty, class Context>
void NLLLossGrad(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
float* flags,
Context* ctx);
/******************** loss.sigmoid_cross_entropy ********************/
template <typename T, class Context>
......@@ -902,6 +1015,23 @@ void Col2Im2d(
T* im,
Context* ctx);
/******************** vision.drop_block ********************/
template <class Context>
void DropBlock2d(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
Context* ctx);
/******************** vision.nn_resize ********************/
template <typename T, class Context>
......
......@@ -111,7 +111,7 @@ void Axpby(
const T beta,
T* y);
} // namespace ssd
} // namespace sse
} // namespace dragon
......
......@@ -18,11 +18,11 @@
#include <iostream>
#include <cstdlib>
#include "utils/cast.h"
namespace dragon {
inline std::vector<std::string> SplitString(
namespace str {
inline std::vector<std::string> split(
const std::string& str,
const std::string& c) {
std::vector<std::string> ret;
......@@ -36,17 +36,7 @@ inline std::vector<std::string> SplitString(
return ret;
}
#define DEFINE_NUMBER2STRING(T) \
template<> inline std::string dragon_cast<std::string, T>(T val) { \
std::stringstream ss; ss << val; return ss.str(); \
}
DEFINE_NUMBER2STRING(int);
DEFINE_NUMBER2STRING(unsigned long long);
template<> inline int dragon_cast<int, std::string>(std::string val) {
return atoi(val.c_str());
}
} // namespace str
} // namespace dragon
......
......@@ -2,6 +2,7 @@ message(STATUS "Found CXX Module: ${CMAKE_CURRENT_LIST_DIR}")
FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
LIST(REMOVE_ITEM SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/operators/misc/python_op.cc)
# ---[ Target
if (WITH_CUDA)
......@@ -36,7 +37,9 @@ if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME}_cxx shlwapi.lib)
endif()
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon_cxx)
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon)
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES DEFINE_SYMBOL DRAGON_CXX_EXPORTS)
# ---[ Install
INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../lib)
\ No newline at end of file
INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../api/lib)
FILE(INSTALL dragon.h DESTINATION ${PROJECT_BINARY_DIR}/../api/include)
\ No newline at end of file
#include "dragon.h"
#include "core/common.h"
#include "utils/logging.h"
namespace dragon {
......
......@@ -12,6 +12,12 @@
namespace dragon {
/* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
Map<string, unique_ptr < Workspace > > g_workspaces;
Map<string, vector<string> > sub_workspaces;
std::mutex g_mutex;
......@@ -29,7 +35,8 @@ Workspace* CreateWorkspace(const std::string& name){
Workspace* ResetWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be reset.";
<< "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be reset.";
LOG(INFO) << "Reset the Workspace(" << name << ").";
g_workspaces[name].reset(new Workspace(name));
for (auto& sub_workspace : sub_workspaces[name]) {
......@@ -43,7 +50,8 @@ Workspace* ResetWorkspace(const std::string& name) {
void ReleaseWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be released.";
<< "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be released.";
LOG(INFO) << "Release the Workspace(" << name << ").";
g_workspaces[name].reset();
g_workspaces.erase(name);
......@@ -61,6 +69,12 @@ void MoveWorkspace(
<< "into the Workspace(" << target_ws->name() << ").";
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
std::string CreateGraph(
const std::string& graph_file,
Workspace* ws) {
......@@ -102,6 +116,19 @@ std::string CreateGraph(
return meta_graph.name();
}
void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id) {
ws->RunGraph(graph_name, "", "", stream_id);
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
void CreateTensor(
const std::string& name,
Workspace* ws) {
......@@ -109,6 +136,32 @@ void CreateTensor(
}
template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
template <typename T>
void FeedTensor(
const std::string& name,
const vector<TIndex>& shape,
......@@ -135,6 +188,12 @@ void FeedTensor(
}
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
void TransplantCaffeModel(
const std::string& input_model,
const std::string& output_model) {
......@@ -146,7 +205,7 @@ void TransplantCaffeModel(
const string& layer_name = layer.name();
string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
BlobProto blob = layer.blobs(j);
TensorProto* proto = protos.add_protos();
proto->set_data_type(TensorProto_DataType_FLOAT);
......@@ -218,7 +277,7 @@ void LoadCaffemodel(
const string& layer_name = layer.name();
string prefix = scope + layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++){
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name))
ws->CreateTensor(tensor_name);
BlobProto blob = layer.blobs(j);
......@@ -248,63 +307,54 @@ void LoadCaffemodel(
}
}
void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id) {
ws->RunGraph(graph_name, "", "", stream_id);
}
template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Config *
* *
* * * * * * * * * * * * * * * * * * * * */
void SetLogLevel(const std::string& level) {
SetLogDestination(StrToLogSeverity(level));
}
template float* FetchTensor<float>(
/* * * * * * * * * * * * * * * * * * * * *
* *
* Template *
* *
* * * * * * * * * * * * * * * * * * * * */
template DRAGON_API float* FetchTensor<float>(
const std::string&,
std::vector<TIndex>&,
Workspace*);
template void FeedTensor<float>(
template DRAGON_API float16* FetchTensor<float16>(
const std::string&,
std::vector<TIndex>&,
Workspace*);
template DRAGON_API void FeedTensor<float>(
const std::string&,
const std::vector<TIndex>&,
const float*,
const Device&,
Workspace*);
template void FeedTensor<int>(
template DRAGON_API void FeedTensor<float16>(
const std::string&,
const std::vector<TIndex>&,
const float16*,
const Device&,
Workspace*);
template DRAGON_API void FeedTensor<int>(
const std::string&,
const std::vector<TIndex>&,
const int*,
const Device&,
Workspace*);
template void FeedTensor<uint8_t>(
template DRAGON_API void FeedTensor<uint8_t>(
const std::string&,
const std::vector<TIndex>&,
const uint8_t*,
......
......@@ -16,10 +16,28 @@
#include <cstdint>
#include <vector>
#ifdef WIN32
#define EXPORT __declspec(dllexport)
#ifdef _MSC_VER
#ifdef DRAGON_CXX_EXPORTS
#define DRAGON_API __declspec(dllexport)
#else
#define DRAGON_API __declspec(dllimport)
#endif
#else
#define EXPORT
#define DRAGON_API
#endif
/* * * * * * * * * * * * * * * * * * * * *
* *
* Internal Headers *
* *
* * * * * * * * * * * * * * * * * * * * */
#ifdef DRAGON_CXX_EXPORTS
#include "core/types.h"
#else
namespace dragon {
struct float16;
}
#endif
namespace dragon {
......@@ -28,72 +46,102 @@ typedef int64_t TIndex;
class Workspace;
class Device {
class DRAGON_API Device {
public:
EXPORT Device();
EXPORT explicit Device(std::string device_type);
EXPORT Device(std::string device_type, int device_id);
Device();
explicit Device(std::string device_type);
Device(std::string device_type, int device_id);
EXPORT const int& device_type() const { return device_type_; }
EXPORT const int device_id() const { return device_id_; }
const int& device_type() const { return device_type_; }
const int device_id() const { return device_id_; }
private:
int device_type_, device_id_;
};
EXPORT Workspace* CreateWorkspace(const std::string& name);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
EXPORT Workspace* ResetWorkspace(const std::string& name);
DRAGON_API Workspace* CreateWorkspace(const std::string& name);
EXPORT void ReleaseWorkspace(const std::string& name);
DRAGON_API Workspace* ResetWorkspace(const std::string& name);
EXPORT void MoveWorkspace(Workspace* main, Workspace* sub);
DRAGON_API void ReleaseWorkspace(const std::string& name);
EXPORT std::string CreateGraph(
DRAGON_API void MoveWorkspace(Workspace* main, Workspace* sub);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API std::string CreateGraph(
const std::string& graph_file,
Workspace* ws);
EXPORT std::string CreateGraph(
DRAGON_API std::string CreateGraph(
const std::string& graph_file,
const Device& device,
Workspace* ws);
EXPORT void RunGraph(
DRAGON_API void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id = 1);
EXPORT void CreateTensor(
/* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void CreateTensor(
const std::string& name,
Workspace* ws);
template <typename T>
EXPORT void FeedTensor(
DRAGON_API T* FetchTensor(
const std::string& name,
const std::vector<TIndex>& shape,
const T* data,
const Device& device,
std::vector<TIndex>& shape,
Workspace* ws);
template <typename T>
EXPORT T* FetchTensor(
DRAGON_API void FeedTensor(
const std::string& name,
std::vector<TIndex>& shape,
const std::vector<TIndex>& shape,
const T* data,
const Device& device,
Workspace* ws);
EXPORT void LoadCaffemodel(
/* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void LoadCaffemodel(
const std::string& model_file,
Workspace* ws);
EXPORT void TransplantCaffeModel(
DRAGON_API void TransplantCaffeModel(
const std::string& input_model,
const std::string& output_model);
EXPORT void LoadDragonmodel(
DRAGON_API void LoadDragonmodel(
const std::string& model_file,
Workspace* ws);
EXPORT void SetLogLevel(const std::string& level);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Config *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void SetLogLevel(const std::string& level);
} // namespace dragon
......
......@@ -19,7 +19,8 @@ Workspace* ws() { return g_workspace; }
TypeId CTypeToFetcher(TypeId type) {
static Map<TypeId,TypeId> c_type_map {
{ TypeMeta::Id<uint8_t>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<uint8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int64_t>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<float>(), TypeMeta::Id<NumpyFetcher>() },
......@@ -197,6 +198,11 @@ inline PyObject* FeedTensorCC(PyObject* self, PyObject* args) {
}
}
inline PyObject* OnModuleExitCC(PyObject* self, PyObject* args) {
g_workspaces.clear();
Py_RETURN_TRUE;
}
#define PYFUNC(name) {#name, name, METH_VARARGS, ""}
#define PYENDFUNC {nullptr, nullptr, 0, nullptr}
......@@ -255,6 +261,7 @@ PyMethodDef* GetAllMethods() {
PYFUNC(SnapshotCC),
/**** Config ****/
PYFUNC(SetLogLevelCC),
PYFUNC(OnModuleExitCC),
PYENDFUNC,
};
return g_python_methods;
......@@ -272,9 +279,11 @@ void common_init() {
}
#ifdef WITH_PYTHON3
static struct PyModuleDef libdragon = { PyModuleDef_HEAD_INIT,
static struct PyModuleDef libdragon = {
PyModuleDef_HEAD_INIT,
"libdragon", "", -1,
GetAllMethods() };
GetAllMethods()
};
PyMODINIT_FUNC PyInit_libdragon(void) {
PyObject* module = PyModule_Create(&libdragon);
......@@ -285,7 +294,8 @@ PyMODINIT_FUNC PyInit_libdragon(void) {
#else // WITH_PYTHON2
PyMODINIT_FUNC initlibdragon(void) {
PyObject* moudle = Py_InitModule("libdragon", GetAllMethods());
PyObject* moudle = Py_InitModule(
"libdragon", GetAllMethods());
if (moudle == nullptr) return;
common_init();
}
......
......@@ -31,7 +31,8 @@ class TensorFetcherBase {
class TensorFeederBase {
public:
virtual ~TensorFeederBase() {}
virtual PyObject* Feed(const DeviceOption& option,
virtual PyObject* Feed(
const DeviceOption& option,
PyArrayObject* array,
Tensor* tensor) = 0;
};
......@@ -61,7 +62,7 @@ class NumpyFetcher : public TensorFetcherBase {
PyErr_SetString(PyExc_RuntimeError, s.c_str());
return nullptr;
}
// create a empty array with r shape
// create a empty array with the same shape
PyObject* array = PyArray_SimpleNew(
tensor.ndim(), npy_dims.data(), npy_type);
// copy the tensor data to the numpy array
......@@ -88,7 +89,8 @@ class StringFetcher : public TensorFetcherBase {
class NumpyFeeder : public TensorFeederBase {
public:
PyObject* Feed(const DeviceOption& option,
PyObject* Feed(
const DeviceOption& option,
PyArrayObject* original_array,
Tensor* tensor) override {
PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
......@@ -100,7 +102,6 @@ class NumpyFeeder : public TensorFeederBase {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Feed Tensor(" << tensor->name() << ")"
<< " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyArray_NDIM(array);
npy_intp* npy_dims = PyArray_DIMS(array);
vector<TIndex> dims;
......@@ -110,16 +111,16 @@ class NumpyFeeder : public TensorFeederBase {
#ifdef WITH_CUDA
CUDAContext context(option);
context.SwitchToDevice();
auto* data = tensor->raw_mutable_data<CUDAContext>(meta);
context.Memcpy<CUDAContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CUDAContext>(),
static_cast<void*>(PyArray_DATA(array)));
data, static_cast<void*>(PyArray_DATA(array)));
#else
LOG(FATAL) << "CUDA was not compiled.";
#endif
} else {
auto* data = tensor->raw_mutable_data<CPUContext>(meta);
CPUContext::Memcpy<CPUContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CPUContext>(),
static_cast<void*>(PyArray_DATA(array)));
data, static_cast<void*>(PyArray_DATA(array)));
}
Py_XDECREF(array);
Py_RETURN_TRUE;
......
......@@ -25,4 +25,4 @@ inline PyObject* IsCUDADriverSufficientCC(PyObject* self, PyObject* args) {
#endif
}
#endif // DRAGON_PYTHON_PY_MPI_H_
\ No newline at end of file
#endif // DRAGON_PYTHON_PY_CUDA_H_
\ No newline at end of file
......@@ -94,7 +94,6 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Set Tensor(" << tensor->name() << ")"
<< " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyList_Size(shape);
CHECK_GT(ndim, 0)
<< "\nThe len of shape should be greater than 1. Got " << ndim << ".";
......@@ -112,9 +111,9 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (dev_opt.device_type() == CUDA) {
CUDAContext ctx(dev_opt);
ctx.SwitchToDevice();
tensor->raw_mutable_data<CUDAContext>();
tensor->raw_mutable_data<CUDAContext>(meta);
} else {
tensor->raw_mutable_data<CPUContext>();
tensor->raw_mutable_data<CPUContext>(meta);
}
Py_RETURN_TRUE;
}
......@@ -173,19 +172,19 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
Tensor* srcT = ws()->GetTensor(src_name);
Tensor* dstT = ws()->CreateTensor(dst_name);
dstT->ReshapeLike(*srcT);
dstT->SetMeta(srcT->meta());
const TypeMeta& meta = srcT->meta();
if (dst_ctx.device_type() == DeviceType::CUDA) {
if (src_ctx.device_type() == DeviceType::CUDA) {
// CUDA <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CUDAContext>());
} else {
// CUDA <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CPUContext>());
}
} else {
......@@ -193,13 +192,13 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
// CPU <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CUDAContext>());
} else {
// CPU <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CPUContext>());
}
}
......
......@@ -23,8 +23,8 @@ inline const int TypeMetaToNPY(const TypeMeta& meta) {
{ TypeMeta::Id<int64_t>(), NPY_INT64 },
{ TypeMeta::Id<double>(), NPY_FLOAT64 },
{ TypeMeta::Id<float16>(), NPY_FLOAT16 },
{ TypeMeta::Id<uint8_t>(), NPY_UINT8 },
{ TypeMeta::Id<char>(), NPY_INT8 }
{ TypeMeta::Id<uint8>(), NPY_UINT8 },
{ TypeMeta::Id<int8>(), NPY_INT8 }
};
return m2npy_type_map.count(meta.id()) ? m2npy_type_map[meta.id()] : -1;
}
......@@ -36,11 +36,12 @@ inline const TypeMeta& TypeNPYToMeta(int npy_type) {
{ NPY_INT64, TypeMeta::Make<int64_t>() },
{ NPY_FLOAT64, TypeMeta::Make<double>() },
{ NPY_FLOAT16, TypeMeta::Make<float16>() },
{ NPY_UINT8, TypeMeta::Make<uint8_t>() },
{ NPY_INT8, TypeMeta::Make<char>() },
{ NPY_UINT8, TypeMeta::Make<uint8>() },
{ NPY_INT8, TypeMeta::Make<int8>() },
};
static TypeMeta unknown_type;
return npy2m_type_map.count(npy_type) ? npy2m_type_map[npy_type] : unknown_type;
return npy2m_type_map.count(npy_type) ?
npy2m_type_map[npy_type] : unknown_type;
}
#endif // DRAGON_PYTHON_PY_TYPES_H_
\ No newline at end of file
......@@ -26,11 +26,11 @@ option = {}
REGISTERED_OPERATORS = set(s for s in RegisteredOperatorsCC())
NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperatorsCC())
# The current device, 'CPU' or 'CUDA'
# The current device, 'CPU', 'CUDA' or 'CNML'
option['device'] = 'CPU'
# The device id
option['gpu_id'] = 0
option['device_id'] = 0
# Whether to use cuDNN if possible
option['use_cudnn'] = False
......@@ -44,6 +44,9 @@ option['debug_mode'] = False
# Whether to share grads
option['share_grads'] = True
# Optional graph type
option['graph_type'] = ''
# Whether to log the meta graphs
option['log_meta_graph'] = False
......@@ -84,7 +87,7 @@ def IsCUDADriverSufficient():
def EnableCUDA(gpu_id=0, use_cudnn=True):
"""Enable CUDA mode globally.
"""Enable NVIDIA's CUDA mode globally.
Parameters
----------
......@@ -100,9 +103,28 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
"""
global option
option['device'] = 'CUDA'
option['gpu_id'] = gpu_id
option['device_id'] = gpu_id
option['use_cudnn'] = use_cudnn
def EnableCNML(mlu_id=0):
"""Enable Cambricon's CNML mode globally.
Parameters
----------
device_id : int
The id of MLU to use.
Returns
-------
None
"""
global option
option['device'] = 'CNML'
option['device_id'] = mlu_id
# TODO(PhyscalX): please not use @setter
# TODO(PhyscalX): seems that it can't change the global value
......@@ -133,7 +155,6 @@ def GetRandomSeed():
The global random seed.
"""
global option
return option['random_seed']
......@@ -151,7 +172,7 @@ def SetGPU(id):
"""
global option
option['gpu_id'] = id
option['device_id'] = id
def GetGPU():
......@@ -163,8 +184,7 @@ def GetGPU():
The global id of GPU.
"""
global option
return option['gpu_id']
return option['device_id']
def SetDebugMode(enabled=True):
......@@ -186,6 +206,25 @@ def SetDebugMode(enabled=True):
option['debug_mode'] = enabled
def SetGraphType(graph_type=''):
"""Set the graph type.
If empty, the default DAG graph will be used.
Parameters
----------
graph_type : str
The graph type.
Returns
-------
None
"""
global option
option['graph_type'] = graph_type
def LogMetaGraph(enabled=True):
"""Enable to log meta graph globally.
......
......@@ -737,7 +737,7 @@ class Tensor(object):
Parameters
----------
new_value : basic type, list or numpy.ndarray
new_value : number, list or numpy.ndarray
The values to set.
Returns
......
......@@ -325,5 +325,7 @@ def GetTensorInfo(tensor, stream=1):
info['mem'].append('CPU'); info['device_id'] = 0
if 'CUDA' in info:
info['mem'].append('CUDA'); info['device_id'] = int(info['CUDA'])
if 'CNML' in info:
info['mem'].append('CNML'); info['device_id'] = int(info['CNML'])
info['init'] = len(info['mem']) > 0
return info
\ No newline at end of file
......@@ -439,7 +439,7 @@ def FetchTensor(tensor):
Returns
-------
numpy.ndarray
ndarray
The values copied from the backend.
References
......@@ -457,7 +457,7 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
----------
tensor : Tensor or str
The tensor to feed.
ndarray : basic type, list or numpy.ndarray
ndarray : number, list or ndarray
The values to feed.
force_cpu : boolean
Whether force to feed to cpu context.
......@@ -488,25 +488,23 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
"""
name = tensor.name if hasattr(tensor, 'name') else str(tensor)
dev = None
if force_cpu is True: dev = utils.MakeDeviceOption(0, 0)
if force_cpu is True:
dev = utils.MakeDeviceOption(0, 0)
else:
from dragon.core.scope import _DEVICE_SCOPE
if _DEVICE_SCOPE != '':
supports = {'/cpu': 0, '/gpu': 1}
supports = {'/cpu': 0, '/gpu': 1, '/mlu': 2}
dev = pb.DeviceOption()
dev.device_type = supports[_DEVICE_SCOPE.split(':')[0]]
dev.gpu_id = int(_DEVICE_SCOPE.split(':')[1])
dev.device_id = int(_DEVICE_SCOPE.split(':')[1])
else:
from dragon.config import option
if option['device'] == 'CUDA':
dev = utils.MakeDeviceOption(1, option['gpu_id'])
elif option['device'] == 'CPU':
dev = utils.MakeDeviceOption(1, option['device_id'])
else:
dev = utils.MakeDeviceOption(0, 0)
if not isinstance(array, np.ndarray):
if not isinstance(array, list):
array = [array]
auto_data_type = np.float32 if dtype is None else dtype
else:
auto_data_type = array.dtype if dtype is None else dtype
......@@ -573,8 +571,8 @@ def RunGraph(graph_name, inputs=(), outputs=[], stage=None, return_outputs=True)
Returns
-------
None, numpy.ndarray or list of numpy.ndarray
The outputs, format as numpy.ndarray.
None, ndarray or list of ndarray
The outputs, format as ndarray.
See Also
--------
......
......@@ -42,6 +42,7 @@ List Brief
`BilinearResize`_ Resize the image with Bi-linear method.
`BiasAdd`_ Add the bias across channels to a ``NCHW`` or ``NHWC`` input.
`DenseConcat`_ Memory-efficient concatenation for DenseNet. `[Huang et.al, 2017] <http://arxiv.org/abs/1608.06993>`_.
`DropBlock2d`_ Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
=================== ======================================================================
Recurrent
......@@ -76,6 +77,7 @@ Loss
============================= ======================================================================
List Brief
============================= ======================================================================
`NLLLoss`_ Negative likelihood loss with sparse labels.
`SparseSoftmaxCrossEntropy`_ SoftmaxCrossEntropy with sparse labels.
`SigmoidCrossEntropy`_ SigmoidCrossEntropy.
`SoftmaxCrossEntropy`_ SoftmaxCrossEntropy with dense(one-hot) labels.
......@@ -102,6 +104,8 @@ List Brief
`Exp`_ Calculate the exponential of input.
`Square`_ Calculate the square of input.
`Sqrt`_ Calculate the sqrt of input.
`Maximum`_ Return the max value of given two inputs.
`Minimum`_ Return the min value of given two inputs.
`Clip`_ Clip the input to be between lower and higher bounds.
`Matmul`_ Matrix Multiplication.
`InnerProduct`_ InnerProduct Function.
......@@ -215,6 +219,7 @@ List Brief
.. _BilinearResize: operators/vision.html#dragon.operators.vision.BilinearResize
.. _BiasAdd: operators/vision.html#dragon.operators.vision.BiasAdd
.. _DenseConcat: operators/vision.html#dragon.operators.vision.DenseConcat
.. _DropBlock2d: operators/vision.html#dragon.operators.vision.DropBlock2d
.. _RNN: operators/recurrent.html#dragon.operators.recurrent.RNN
.. _LSTM: operators/recurrent.html#dragon.operators.recurrent.LSTM
......@@ -231,6 +236,7 @@ List Brief
.. _Softmax: operators/activation.html#dragon.operators.activation.Softmax
.. _Dropout: operators/activation.html#dragon.operators.activation.Dropout
.. _NLLLoss: operators/loss.html#dragon.operators.loss.NLLLoss
.. _SparseSoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SparseSoftmaxCrossEntropy
.. _SigmoidCrossEntropy: operators/loss.html#dragon.operators.loss.SigmoidCrossEntropy
.. _SoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SoftmaxCrossEntropy
......@@ -246,6 +252,8 @@ List Brief
.. _Mul: operators/arithmetic.html#dragon.operators.arithmetic.Mul
.. _Div: operators/arithmetic.html#dragon.operators.arithmetic.Div
.. _Clip: operators/arithmetic.html#dragon.operators.arithmetic.Clip
.. _Maximum: operators/arithmetic.html#dragon.operators.arithmetic.Maximum
.. _Minimum: operators/arithmetic.html#dragon.operators.arithmetic.Minimum
.. _Pow: operators/arithmetic.html#dragon.operators.arithmetic.Pow
.. _Log: operators/arithmetic.html#dragon.operators.arithmetic.Log
.. _Exp: operators/arithmetic.html#dragon.operators.arithmetic.Exp
......
......@@ -32,6 +32,7 @@ List Brief
`LRNLayer`_ The implementation of ``LRNLayer``.
`NNResizeLayer`_ The implementation of ``NNResizeLayer``.
`BilinearResizeLayer`_ The implementation of ``BilinearResizeLayer``.
`DropBlockLayer`_ The implementation of ``DropBlockLayer``.
====================== =============================================================================
......@@ -160,6 +161,7 @@ API Reference
.. _LRNLayer: #dragon.vm.caffe.layers.vision.LRNLayer
.. _NNResizeLayer: #dragon.vm.caffe.layers.vision.NNResizeLayer
.. _BilinearResizeLayer: #dragon.vm.caffe.layers.vision.BilinearResizeLayer
.. _DropBlockLayer: #dragon.vm.caffe.layers.vision.DropBlockLayer
.. _ReLULayer: #dragon.vm.caffe.layers.neuron.ReLULayer
.. _PReLULayer: #dragon.vm.caffe.layers.neuron.PReLULayer
......
......@@ -15,6 +15,7 @@ from __future__ import print_function
import sys
import logging
import atexit
try:
from dragon.libdragon import *
......@@ -22,3 +23,5 @@ except ImportError as e:
logging.critical(
'Cannot import dragon. Error: {0}'.format(str(e)))
sys.exit(1)
atexit.register(OnModuleExitCC)
\ No newline at end of file
......@@ -101,7 +101,8 @@ class DataTransformer(Process):
im = im.reshape((datum.height, datum.width, datum.channels))
# random scale
random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \
random_scale = npr.uniform() * (
self._max_random_scale - self._min_random_scale) \
+ self._min_random_scale
if random_scale != 1.0:
if sys.version_info >= (3, 0):
......@@ -110,7 +111,9 @@ class DataTransformer(Process):
else:
# Fuck Fuck Fuck opencv-python2, it always has a BUG
# that leads to duplicate cuDA handles created at gpu:0
new_shape = (int(im.shape[1] * random_scale), int(im.shape[0] * random_scale))
new_shape = (
int(np.ceil(im.shape[1] * random_scale)),
int(np.ceil(im.shape[0] * random_scale)))
im = PIL.Image.fromarray(im)
im = im.resize(new_shape, PIL.Image.BILINEAR)
im = np.array(im)
......
......@@ -9,10 +9,12 @@
#
# ------------------------------------------------------------
import numpy as np
from dragon.core.tensor import Tensor
INT_MAX = 2147483647
def CheckInputs(inputs, *args):
def Verify(inputs, min_num, max_num):
# type checking
......@@ -44,6 +46,17 @@ def ParseArguments(locals):
return dict(__all__, **kwargs)
def WrapConstants(constants, dtype='float32'):
if not isinstance(constants, Tensor):
if not isinstance(constants, np.ndarray):
constants = np.array(constants, dtype=dtype)
tensor = Tensor()
tensor.set_value(constants)
tensor.shape = constants.shape
constants = tensor
return constants
def AddArgumentWithDesc(arguments, property, name, as_target=True):
if isinstance(property, Tensor):
if as_target:
......
......@@ -115,6 +115,70 @@ def Div(inputs, **kwargs):
return output
def Maximum(inputs, **kwargs):
"""Return the max value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Maximum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Minimum(inputs, **kwargs):
"""Return the min value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Minimum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Clip(inputs, low=None, high=None, **kwargs):
"""Clip the input to be between lower and higher bounds.
......
......@@ -36,15 +36,19 @@ def _wrap_output_shape(output, shape):
return output
def Fill(shape, value=0, **kwargs):
def Fill(shape, value=0, dtype='float32', **kwargs):
"""Return a Tensor with specific value filled.
If ``dtype`` is None, tensor
Parameters
----------
shape : list, tuple or Tensor
The output shape.
value : basic numerical type
The value to fill.
dtype : str
The optional data type.
Returns
-------
......
......@@ -19,6 +19,46 @@ from . import *
from .activation import Softmax
def NLLLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""Negative likelihood loss with sparse labels.
Parameters
----------
inputs : list of Tensor
The inputs, represent [input, sparse_labels].
axis : int
The axis of softmax function.
normalization : str
The normalization, ``UNIT``, ``FULL``, ``VALID``, ``BATCH_SIZE`` or ``NONE``.
ignore_label : tuple or list
The label id to ignore. Default is ``empty``.
Returns
-------
Tensor
The loss.
Notes
-----
Set the normalization to ``UNIT`` will return unreduced losses.
"""
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='NLLLoss', **arguments)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
elif all(dim is not None for dim in inputs[0].shape):
outer_dim = int(np.prod(inputs[0].shape[0 : axis]))
inner_dim = int(np.prod(inputs[0].shape[axis + 1 :]))
output.shape = [outer_dim * inner_dim]
else: output.shape = [None]
return output
def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""SoftmaxCrossEntropy with sparse labels.
......
......@@ -16,8 +16,10 @@ from __future__ import print_function
from . import *
def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
use_stats=-1, mode='DEFAULT', **kwargs):
def BatchNorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Normalization. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
......@@ -70,9 +72,11 @@ def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output
def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
def BatchRenorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
r_max=3.0, d_max=5.0, t_delta=0.001,
use_stats=-1, mode='DEFAULT', **kwargs):
use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Renormalization. `[Ioffe, 2017] <https://arxiv.org/abs/1702.03275>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
......@@ -131,7 +135,10 @@ def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output
def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwargs):
def FusedBatchNorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, **kwargs
):
"""Batch Normalization, with scale procedure after normalization.
Parameters
......@@ -170,7 +177,7 @@ def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwar
return output
def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
def GroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization. `[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
Parameters
......@@ -203,7 +210,7 @@ def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output
def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization, with scale procedure after normalization.
Parameters
......@@ -236,7 +243,7 @@ def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output
def InstanceNorm(inputs, axis=-1, eps=1e-3, **kwargs):
def InstanceNorm(inputs, axis=-1, eps=1e-5, **kwargs):
"""Instance Normalization. `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
Parameters
......
......@@ -630,3 +630,44 @@ def DenseConcat(inputs, growth_rate=0, axis=1, **kwargs):
output.shape[axis] += inputs[i].shape[axis]
return output
def DropBlock2d(inputs, block_size=7, keep_prob=0.9,
alpha=1., decrement=0., data_format='NCHW', **kwargs):
"""Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
Set the ``decrement`` to schedule ``keep_prob`` for each iteration.
Set the ``alpha`` to decrease ``gamma`` for different stages.
Parameters
----------
inputs : Tensor
The input tensor.
block_size : int
The size of dropping block.
keep_prob : float or Tensor
The prob of keeping. Default is ``0.9``.
alpha : float
The scale factor to gamma.
decrement : float
The decrement to keep prob.
data_format : str
The data format, ``NCHW`` or ``NHWC``.
Returns
-------
Tensor
The output tensor.
"""
CheckInputs(inputs, 1)
arguments = ParseArguments(locals())
arguments = AddArgumentWithDesc(arguments, keep_prob, 'keep_prob', as_target=False)
output = Tensor.CreateOperator(nout=1, op_type='DropBlock2d', **arguments)
if inputs.shape is not None:
output.shape = inputs.shape[:]
return output
\ No newline at end of file
......@@ -51,6 +51,7 @@ NNResize = vision.NNResize
BilinearResize = vision.BilinearResize
BiasAdd = vision.BiasAdd
DenseConcat = vision.DenseConcat
DropBlock2d = vision.DropBlock2d
# recurrent
LSTMCell = recurrent.LSTMCell
......@@ -70,6 +71,7 @@ Softmax = act.Softmax
Dropout = act.Dropout
# loss
NLLLoss = loss.NLLLoss
SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy
SigmoidCrossEntropy = loss.SigmoidCrossEntropy
SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy
......@@ -85,6 +87,8 @@ Add = math.Add
Sub = math.Sub
Mul = math.Mul
Div = math.Div
Maximum = math.Maximum
Minimum = math.Minimum
Clip = math.Clip
Matmul = math.Matmul
Pow = math.Pow
......
......@@ -35,7 +35,11 @@ message Argument {
repeated string strings=7;
}
enum DeviceType { CPU = 0; CUDA = 1; OPENCL = 2; }
enum DeviceType {
CPU = 0;
CUDA = 1;
CNML = 2;
}
message DeviceOption {
optional DeviceType device_type = 1 [default = CPU];
......
......@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='dragon.proto',
package='dragon',
serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02')
serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*)\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\x08\n\x04\x43NML\x10\x02')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
......@@ -38,21 +38,21 @@ _DEVICETYPE = _descriptor.EnumDescriptor(
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='OPENCL', index=2, number=2,
name='CNML', index=2, number=2,
options=None,
type=None),
],
containing_type=None,
options=None,
serialized_start=1427,
serialized_end=1470,
serialized_end=1468,
)
_sym_db.RegisterEnumDescriptor(_DEVICETYPE)
DeviceType = enum_type_wrapper.EnumTypeWrapper(_DEVICETYPE)
CPU = 0
CUDA = 1
OPENCL = 2
CNML = 2
_TENSORPROTO_DATATYPE = _descriptor.EnumDescriptor(
......
......@@ -14,7 +14,6 @@ from __future__ import division
from __future__ import print_function
import pprint
import numpy as np
import dragon.core.workspace as ws
from dragon.core.tensor import Tensor
......@@ -43,7 +42,7 @@ class BaseUpdater(object):
self._defaults = {
'scale_gradient': scale_gradient,
'clip_gradient': clip_gradient,
'l2_decay': l2_decay
'l2_decay': l2_decay,
}
self._param_group = []
self._slot = slot
......@@ -77,7 +76,7 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults')
if item in defaults:
if self._registered:
return ws.FetchTensor(self._slot + '/' + item)[0]
return ws.FetchTensor(self._slot + '/' + item)
else: return defaults[item]
return self.__dict__[item]
......@@ -85,9 +84,8 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults:
if self._registered:
# convert all defaults as float32 for convenience
ws.FeedTensor(self._slot + '/' + key,
np.array([value], dtype=np.float32))
ws.FeedTensor(self._slot + '/' + key, value,
dtype='float32', force_cpu=True)
else:
self._defaults[key] = value
else:
......@@ -96,8 +94,8 @@ class BaseUpdater(object):
def register_in_workspace(self):
if not self._registered:
for k, v in self._defaults.items():
# convert all defaults as float32 for convenience
ws.FeedTensor(self._slot + "/" + k, np.array([v], dtype=np.float32))
ws.FeedTensor(self._slot + "/" + k, v,
dtype='float32', force_cpu=True)
self._registered = True
if self._verbose:
from dragon.config import logger
......
......@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function
version = '0.2.2'
full_version = '0.2.2.11'
full_version = '0.2.2.13'
release = False
if not release:
......
......@@ -19,7 +19,8 @@ from .vision import ConvolutionLayer, \
ROIPoolingLayer, \
ROIAlignLayer, \
NNResizeLayer, \
BilinearResizeLayer
BilinearResizeLayer, \
DropBlockLayer
from .neuron import ReLULayer, \
PReLULayer, \
......
......@@ -446,10 +446,13 @@ class InstanceNormLayer(Layer):
The implementation of ``InstanceNormLayer``.
Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
"""
def __init__(self, LayerParameter):
super(InstanceNormLayer, self).__init__(LayerParameter)
self._param = {'axis': 1}
param = LayerParameter.instance_norm_param
self._param = {'eps': param.eps,
'axis': 1}
def Setup(self, bottom):
super(InstanceNormLayer, self).Setup(bottom)
......
......@@ -250,7 +250,7 @@ class NNResizeLayer(Layer):
Parameters
----------
shape : caffe_pb2. BlobShape
shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_.
fx : float
The scale factor of height. Refer `ResizeParameter.fx`_.
......@@ -283,7 +283,7 @@ class BilinearResizeLayer(Layer):
Parameters
----------
shape : caffe_pb2. BlobShape
shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_.
fx : float
The scale factor of height. Refer `ResizeParameter.fx`_.
......@@ -309,3 +309,33 @@ class BilinearResizeLayer(Layer):
raise ValueError('The second bottom should be provided to determine the shape.')
self._param['shape_like'] = bottom[1]
return ops.BilinearResize(input, **self._param)
class DropBlockLayer(Layer):
"""The implementation of ``DropBlock2dLayer``.
Parameters
----------
block_size : int
The size of dropping block. Refer ``DropBlockParameter.block_size``.
keep_prob : float
The prob of keeping. Refer ``DropBlockParameter.keep_prob``.
alpha : float
The scale factor to gamma. Refer ``DropBlockParameter.alpha``.
decrement : float
The decrement to keep prob. Refer ``DropBlockParameter.decrement``.
"""
def __init__(self, LayerParameter):
super(DropBlockLayer, self).__init__(LayerParameter)
param = LayerParameter.drop_block_param
self._param = {'block_size': param.block_size,
'keep_prob': param.keep_prob,
'alpha': param.alpha,
'decrement': param.decrement,
'data_format': 'NCHW'}
def Setup(self, bottom):
super(DropBlockLayer, self).Setup(bottom)
input = bottom[0] if isinstance(bottom, list) else bottom
return ops.DropBlock2d(input, **self._param)
\ No newline at end of file
......@@ -424,7 +424,9 @@ message LayerParameter {
optional DenseConcatParameter dense_concat_param = 163;
optional FocalLossParameter focal_loss_param = 164;
optional GatherParameter gather_param = 165;
optional GroupNormParameter group_norm_param = 166;
optional InstanceNormParameter instance_norm_param = 166;
optional GroupNormParameter group_norm_param = 167;
optional DropBlockParameter drop_block_param = 168;
}
// Message that stores parameters used to apply transformation
......@@ -537,7 +539,7 @@ message BatchNormParameter {
optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-3];
optional float eps = 3 [default = 1e-5];
}
message BiasParameter {
......@@ -595,7 +597,7 @@ message ConvolutionParameter {
repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.)
// algorithme ¨¤ trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to
......@@ -1456,7 +1458,7 @@ message NormalizeParameter {
// Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-3];
optional float eps = 4 [default = 1e-5];
}
message ParallelParameter {
......@@ -1492,7 +1494,7 @@ message ProposalParameter {
message BatchRenormParameter {
optional bool use_global_stats = 1;
optional float moving_average_fraction = 2 [default = 0.9];
optional float eps = 3 [default = 1e-3];
optional float eps = 3 [default = 1e-5];
optional float r_max = 4 [default = 3.0];
optional float d_max = 5 [default = 5.0];
optional float t_delta = 6 [default = 0.001];
......@@ -1513,17 +1515,18 @@ message GatherParameter {
optional int32 axis = 1 [default = 0];
}
message GroupNormParameter {
// If false, accumulate global mean/variance values via a moving average. If
// true, use those accumulated values instead of computing mean/variance
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-3];
optional uint32 group = 5 [default = 32]; // The group size
message InstanceNormParameter {
optional float eps = 1 [default = 1e-5];
}
message GroupNormParameter {
optional float eps = 1 [default = 1e-5];
optional int32 group = 2 [default = 32]; // The group size
}
message DropBlockParameter {
optional int32 block_size = 1 [default = 7];
optional float keep_prob = 2 [default = 0.9];
optional float alpha = 3 [default = 1.0];
optional float decrement = 4 [default = 0.0];
}
......@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='caffe.proto',
package='caffe',
serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x92\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x12\n\x03\x65ps\x18\x04 \x01(\x02:\x05\x30.001\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xbd\x1a\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12:\n\x13instance_norm_param\x18\xa6\x01 \x01(\x0b\x32\x1c.caffe.InstanceNormParameter\x12\x34\n\x10group_norm_param\x18\xa7\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\x12\x34\n\x10\x64rop_block_param\x18\xa8\x01 \x01(\x0b\x32\x19.caffe.DropBlockParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"i\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-005\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-005\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa7\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-005\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\",\n\x15InstanceNormParameter\x12\x13\n\x03\x65ps\x18\x01 \x01(\x02:\x06\x31\x65-005\"<\n\x12GroupNormParameter\x12\x13\n\x03\x65ps\x18\x01 \x01(\x02:\x06\x31\x65-005\x12\x11\n\x05group\x18\x02 \x01(\x05:\x02\x33\x32\"k\n\x12\x44ropBlockParameter\x12\x15\n\nblock_size\x18\x01 \x01(\x05:\x01\x37\x12\x16\n\tkeep_prob\x18\x02 \x01(\x02:\x03\x30.9\x12\x10\n\x05\x61lpha\x18\x03 \x01(\x02:\x01\x31\x12\x14\n\tdecrement\x18\x04 \x01(\x02:\x01\x30*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
......@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=17641,
serialized_end=17669,
serialized_start=17850,
serialized_end=17878,
)
_sym_db.RegisterEnumDescriptor(_PHASE)
......@@ -209,8 +209,8 @@ _LOSSPARAMETER_NORMALIZATIONMODE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=6595,
serialized_end=6671,
serialized_start=6709,
serialized_end=6785,
)
_sym_db.RegisterEnumDescriptor(_LOSSPARAMETER_NORMALIZATIONMODE)
......@@ -235,8 +235,8 @@ _CONVOLUTIONPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_CONVOLUTIONPARAMETER_ENGINE)
......@@ -257,8 +257,8 @@ _DATAPARAMETER_DB = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7995,
serialized_end=8022,
serialized_start=8110,
serialized_end=8137,
)
_sym_db.RegisterEnumDescriptor(_DATAPARAMETER_DB)
......@@ -283,8 +283,8 @@ _ELTWISEPARAMETER_ELTWISEOP = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=8389,
serialized_end=8428,
serialized_start=8504,
serialized_end=8543,
)
_sym_db.RegisterEnumDescriptor(_ELTWISEPARAMETER_ELTWISEOP)
......@@ -305,8 +305,8 @@ _HINGELOSSPARAMETER_NORM = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=8963,
serialized_end=8985,
serialized_start=9078,
serialized_end=9100,
)
_sym_db.RegisterEnumDescriptor(_HINGELOSSPARAMETER_NORM)
......@@ -327,8 +327,8 @@ _LRNPARAMETER_NORMREGION = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=9852,
serialized_end=9905,
serialized_start=9967,
serialized_end=10020,
)
_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_NORMREGION)
......@@ -353,8 +353,8 @@ _LRNPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_ENGINE)
......@@ -375,8 +375,8 @@ _MEMORYDATAPARAMETER_DATATYPE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=10106,
serialized_end=10142,
serialized_start=10221,
serialized_end=10257,
)
_sym_db.RegisterEnumDescriptor(_MEMORYDATAPARAMETER_DATATYPE)
......@@ -401,8 +401,8 @@ _POOLINGPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=10630,
serialized_end=10676,
serialized_start=10745,
serialized_end=10791,
)
_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_POOLMETHOD)
......@@ -427,8 +427,8 @@ _POOLINGPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_ENGINE)
......@@ -457,8 +457,8 @@ _REDUCTIONPARAMETER_REDUCTIONOP = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=11112,
serialized_end=11165,
serialized_start=11227,
serialized_end=11280,
)
_sym_db.RegisterEnumDescriptor(_REDUCTIONPARAMETER_REDUCTIONOP)
......@@ -483,8 +483,8 @@ _RELUPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_RELUPARAMETER_ENGINE)
......@@ -509,8 +509,8 @@ _SIGMOIDPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_SIGMOIDPARAMETER_ENGINE)
......@@ -535,8 +535,8 @@ _SOFTMAXPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_SOFTMAXPARAMETER_ENGINE)
......@@ -561,8 +561,8 @@ _TANHPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_TANHPARAMETER_ENGINE)
......@@ -587,8 +587,8 @@ _SPPPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=10630,
serialized_end=10676,
serialized_start=10745,
serialized_end=10791,
)
_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_POOLMETHOD)
......@@ -613,8 +613,8 @@ _SPPPARAMETER_ENGINE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=7634,
serialized_end=7677,
serialized_start=7749,
serialized_end=7792,
)
_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_ENGINE)
......@@ -787,8 +787,8 @@ _V1LAYERPARAMETER_LAYERTYPE = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=14604,
serialized_end=15204,
serialized_start=14719,
serialized_end=15319,
)
_sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_LAYERTYPE)
......@@ -835,8 +835,8 @@ _V0LAYERPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
],
containing_type=None,
options=None,
serialized_start=10630,
serialized_end=10676,
serialized_start=10745,
serialized_end=10791,
)
_sym_db.RegisterEnumDescriptor(_V0LAYERPARAMETER_POOLMETHOD)
......@@ -2269,12 +2269,26 @@ _LAYERPARAMETER = _descriptor.Descriptor(
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='group_norm_param', full_name='caffe.LayerParameter.group_norm_param', index=71,
name='instance_norm_param', full_name='caffe.LayerParameter.instance_norm_param', index=71,
number=166, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='group_norm_param', full_name='caffe.LayerParameter.group_norm_param', index=72,
number=167, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='drop_block_param', full_name='caffe.LayerParameter.drop_block_param', index=73,
number=168, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
......@@ -2287,7 +2301,7 @@ _LAYERPARAMETER = _descriptor.Descriptor(
oneofs=[
],
serialized_start=2850,
serialized_end=6125,
serialized_end=6239,
)
......@@ -2386,8 +2400,8 @@ _TRANSFORMATIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6128,
serialized_end=6423,
serialized_start=6242,
serialized_end=6537,
)
......@@ -2416,8 +2430,8 @@ _LOSSPARAMETER_EXPANDDIMSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6554,
serialized_end=6593,
serialized_start=6668,
serialized_end=6707,
)
_LOSSPARAMETER = _descriptor.Descriptor(
......@@ -2460,8 +2474,8 @@ _LOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6426,
serialized_end=6671,
serialized_start=6540,
serialized_end=6785,
)
......@@ -2504,8 +2518,8 @@ _ACCURACYPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6673,
serialized_end=6749,
serialized_start=6787,
serialized_end=6863,
)
......@@ -2548,8 +2562,8 @@ _ARGMAXPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6751,
serialized_end=6828,
serialized_start=6865,
serialized_end=6942,
)
......@@ -2585,8 +2599,8 @@ _CONCATPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6830,
serialized_end=6887,
serialized_start=6944,
serialized_end=7001,
)
......@@ -2614,7 +2628,7 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.BatchNormParameter.eps', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
......@@ -2629,8 +2643,8 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6889,
serialized_end=6993,
serialized_start=7003,
serialized_end=7108,
)
......@@ -2673,8 +2687,8 @@ _BIASPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6995,
serialized_end=7088,
serialized_start=7110,
serialized_end=7203,
)
......@@ -2710,8 +2724,8 @@ _CONTRASTIVELOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=7090,
serialized_end=7166,
serialized_start=7205,
serialized_end=7281,
)
......@@ -2860,8 +2874,8 @@ _CONVOLUTIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=7169,
serialized_end=7677,
serialized_start=7284,
serialized_end=7792,
)
......@@ -2897,8 +2911,8 @@ _CROPPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=7679,
serialized_end=7727,
serialized_start=7794,
serialized_end=7842,
)
......@@ -2991,8 +3005,8 @@ _DATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=7730,
serialized_end=8022,
serialized_start=7845,
serialized_end=8137,
)
......@@ -3028,8 +3042,8 @@ _DROPOUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8024,
serialized_end=8097,
serialized_start=8139,
serialized_end=8212,
)
......@@ -3093,8 +3107,8 @@ _DUMMYDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8100,
serialized_end=8260,
serialized_start=8215,
serialized_end=8375,
)
......@@ -3138,8 +3152,8 @@ _ELTWISEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8263,
serialized_end=8428,
serialized_start=8378,
serialized_end=8543,
)
......@@ -3168,8 +3182,8 @@ _ELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8430,
serialized_end=8462,
serialized_start=8545,
serialized_end=8577,
)
......@@ -3226,8 +3240,8 @@ _EMBEDPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8465,
serialized_end=8637,
serialized_start=8580,
serialized_end=8752,
)
......@@ -3270,8 +3284,8 @@ _EXPPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8639,
serialized_end=8707,
serialized_start=8754,
serialized_end=8822,
)
......@@ -3307,8 +3321,8 @@ _FLATTENPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8709,
serialized_end=8766,
serialized_start=8824,
serialized_end=8881,
)
......@@ -3351,8 +3365,8 @@ _HDF5DATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8768,
serialized_end=8847,
serialized_start=8883,
serialized_end=8962,
)
......@@ -3381,8 +3395,8 @@ _HDF5OUTPUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8849,
serialized_end=8889,
serialized_start=8964,
serialized_end=9004,
)
......@@ -3412,8 +3426,8 @@ _HINGELOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8891,
serialized_end=8985,
serialized_start=9006,
serialized_end=9100,
)
......@@ -3519,8 +3533,8 @@ _IMAGEDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=8988,
serialized_end=9267,
serialized_start=9103,
serialized_end=9382,
)
......@@ -3549,8 +3563,8 @@ _INFOGAINLOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9269,
serialized_end=9308,
serialized_start=9384,
serialized_end=9423,
)
......@@ -3614,8 +3628,8 @@ _INNERPRODUCTPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9311,
serialized_end=9514,
serialized_start=9426,
serialized_end=9629,
)
......@@ -3644,8 +3658,8 @@ _INPUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9516,
serialized_end=9565,
serialized_start=9631,
serialized_end=9680,
)
......@@ -3688,8 +3702,8 @@ _LOGPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9567,
serialized_end=9635,
serialized_start=9682,
serialized_end=9750,
)
......@@ -3755,8 +3769,8 @@ _LRNPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9638,
serialized_end=9950,
serialized_start=9753,
serialized_end=10065,
)
......@@ -3814,8 +3828,8 @@ _MEMORYDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=9953,
serialized_end=10142,
serialized_start=10068,
serialized_end=10257,
)
......@@ -3858,8 +3872,8 @@ _MVNPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10144,
serialized_end=10245,
serialized_start=10259,
serialized_end=10360,
)
......@@ -3888,8 +3902,8 @@ _PARAMETERPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10247,
serialized_end=10300,
serialized_start=10362,
serialized_end=10415,
)
......@@ -3997,8 +4011,8 @@ _POOLINGPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10303,
serialized_end=10721,
serialized_start=10418,
serialized_end=10836,
)
......@@ -4041,8 +4055,8 @@ _ROIPOOLINGPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10723,
serialized_end=10812,
serialized_start=10838,
serialized_end=10927,
)
......@@ -4085,8 +4099,8 @@ _POWERPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10814,
serialized_end=10884,
serialized_start=10929,
serialized_end=10999,
)
......@@ -4136,8 +4150,8 @@ _PYTHONPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10886,
serialized_end=10989,
serialized_start=11001,
serialized_end=11104,
)
......@@ -4181,8 +4195,8 @@ _REDUCTIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=10992,
serialized_end=11165,
serialized_start=11107,
serialized_end=11280,
)
......@@ -4219,8 +4233,8 @@ _RELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11168,
serialized_end=11309,
serialized_start=11283,
serialized_end=11424,
)
......@@ -4263,8 +4277,8 @@ _RESHAPEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11311,
serialized_end=11401,
serialized_start=11426,
serialized_end=11516,
)
......@@ -4321,8 +4335,8 @@ _SCALEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11404,
serialized_end=11569,
serialized_start=11519,
serialized_end=11684,
)
......@@ -4352,8 +4366,8 @@ _SIGMOIDPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11571,
serialized_end=11691,
serialized_start=11686,
serialized_end=11806,
)
......@@ -4396,8 +4410,8 @@ _SLICEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11693,
serialized_end=11769,
serialized_start=11808,
serialized_end=11884,
)
......@@ -4434,8 +4448,8 @@ _SOFTMAXPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11772,
serialized_end=11909,
serialized_start=11887,
serialized_end=12024,
)
......@@ -4465,8 +4479,8 @@ _TANHPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=11911,
serialized_end=12025,
serialized_start=12026,
serialized_end=12140,
)
......@@ -4509,8 +4523,8 @@ _TILEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=12027,
serialized_end=12111,
serialized_start=12142,
serialized_end=12226,
)
......@@ -4539,8 +4553,8 @@ _THRESHOLDPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=12113,
serialized_end=12155,
serialized_start=12228,
serialized_end=12270,
)
......@@ -4653,8 +4667,8 @@ _WINDOWDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=12158,
serialized_end=12479,
serialized_start=12273,
serialized_end=12594,
)
......@@ -4699,8 +4713,8 @@ _SPPPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=12482,
serialized_end=12717,
serialized_start=12597,
serialized_end=12832,
)
......@@ -5025,8 +5039,8 @@ _V1LAYERPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=12720,
serialized_end=15248,
serialized_start=12835,
serialized_end=15363,
)
......@@ -5315,8 +5329,8 @@ _V0LAYERPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=15251,
serialized_end=16272,
serialized_start=15366,
serialized_end=16387,
)
......@@ -5352,8 +5366,8 @@ _PRELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16274,
serialized_end=16361,
serialized_start=16389,
serialized_end=16476,
)
......@@ -5382,8 +5396,8 @@ _SMOOTHL1LOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16363,
serialized_end=16404,
serialized_start=16478,
serialized_end=16519,
)
......@@ -5426,8 +5440,8 @@ _MPIPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16406,
serialized_end=16478,
serialized_start=16521,
serialized_end=16593,
)
......@@ -5456,8 +5470,8 @@ _PERMUTEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16480,
serialized_end=16513,
serialized_start=16595,
serialized_end=16628,
)
......@@ -5492,7 +5506,7 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.NormalizeParameter.eps', index=3,
number=4, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
......@@ -5507,8 +5521,8 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16516,
serialized_end=16662,
serialized_start=16631,
serialized_end=16778,
)
......@@ -5551,8 +5565,8 @@ _PARALLELPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16664,
serialized_end=16764,
serialized_start=16780,
serialized_end=16880,
)
......@@ -5595,8 +5609,8 @@ _RESIZEPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16766,
serialized_end=16848,
serialized_start=16882,
serialized_end=16964,
)
......@@ -5625,8 +5639,8 @@ _EXPANDDIMSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=6554,
serialized_end=6593,
serialized_start=6668,
serialized_end=6707,
)
......@@ -5725,8 +5739,8 @@ _PROPOSALPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=16892,
serialized_end=17164,
serialized_start=17008,
serialized_end=17280,
)
......@@ -5754,7 +5768,7 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.BatchRenormParameter.eps', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
......@@ -5790,8 +5804,8 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=17167,
serialized_end=17333,
serialized_start=17283,
serialized_end=17450,
)
......@@ -5827,8 +5841,8 @@ _DENSECONCATPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=17335,
serialized_end=17398,
serialized_start=17452,
serialized_end=17515,
)
......@@ -5871,8 +5885,8 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=17400,
serialized_end=17478,
serialized_start=17517,
serialized_end=17595,
)
......@@ -5901,8 +5915,38 @@ _GATHERPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=17480,
serialized_end=17514,
serialized_start=17597,
serialized_end=17631,
)
_INSTANCENORMPARAMETER = _descriptor.Descriptor(
name='InstanceNormParameter',
full_name='caffe.InstanceNormParameter',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.InstanceNormParameter.eps', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
oneofs=[
],
serialized_start=17633,
serialized_end=17677,
)
......@@ -5914,30 +5958,67 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='use_global_stats', full_name='caffe.GroupNormParameter.use_global_stats', index=0,
number=1, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
name='eps', full_name='caffe.GroupNormParameter.eps', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='group', full_name='caffe.GroupNormParameter.group', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=True, default_value=32,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
oneofs=[
],
serialized_start=17679,
serialized_end=17739,
)
_DROPBLOCKPARAMETER = _descriptor.Descriptor(
name='DropBlockParameter',
full_name='caffe.DropBlockParameter',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='block_size', full_name='caffe.DropBlockParameter.block_size', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=True, default_value=7,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='moving_average_fraction', full_name='caffe.GroupNormParameter.moving_average_fraction', index=1,
name='keep_prob', full_name='caffe.DropBlockParameter.keep_prob', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.9,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.GroupNormParameter.eps', index=2,
name='alpha', full_name='caffe.DropBlockParameter.alpha', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001,
has_default_value=True, default_value=1,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='group', full_name='caffe.GroupNormParameter.group', index=3,
number=5, type=13, cpp_type=3, label=1,
has_default_value=True, default_value=32,
name='decrement', full_name='caffe.DropBlockParameter.decrement', index=3,
number=4, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
......@@ -5952,8 +6033,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=17516,
serialized_end=17639,
serialized_start=17741,
serialized_end=17848,
)
_BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE
......@@ -6044,7 +6125,9 @@ _LAYERPARAMETER.fields_by_name['batch_renorm_param'].message_type = _BATCHRENORM
_LAYERPARAMETER.fields_by_name['dense_concat_param'].message_type = _DENSECONCATPARAMETER
_LAYERPARAMETER.fields_by_name['focal_loss_param'].message_type = _FOCALLOSSPARAMETER
_LAYERPARAMETER.fields_by_name['gather_param'].message_type = _GATHERPARAMETER
_LAYERPARAMETER.fields_by_name['instance_norm_param'].message_type = _INSTANCENORMPARAMETER
_LAYERPARAMETER.fields_by_name['group_norm_param'].message_type = _GROUPNORMPARAMETER
_LAYERPARAMETER.fields_by_name['drop_block_param'].message_type = _DROPBLOCKPARAMETER
_LOSSPARAMETER_EXPANDDIMSPARAMETER.containing_type = _LOSSPARAMETER
_LOSSPARAMETER.fields_by_name['normalization'].enum_type = _LOSSPARAMETER_NORMALIZATIONMODE
_LOSSPARAMETER_NORMALIZATIONMODE.containing_type = _LOSSPARAMETER
......@@ -6215,7 +6298,9 @@ DESCRIPTOR.message_types_by_name['BatchRenormParameter'] = _BATCHRENORMPARAMETER
DESCRIPTOR.message_types_by_name['DenseConcatParameter'] = _DENSECONCATPARAMETER
DESCRIPTOR.message_types_by_name['FocalLossParameter'] = _FOCALLOSSPARAMETER
DESCRIPTOR.message_types_by_name['GatherParameter'] = _GATHERPARAMETER
DESCRIPTOR.message_types_by_name['InstanceNormParameter'] = _INSTANCENORMPARAMETER
DESCRIPTOR.message_types_by_name['GroupNormParameter'] = _GROUPNORMPARAMETER
DESCRIPTOR.message_types_by_name['DropBlockParameter'] = _DROPBLOCKPARAMETER
DESCRIPTOR.enum_types_by_name['Phase'] = _PHASE
BlobShape = _reflection.GeneratedProtocolMessageType('BlobShape', (_message.Message,), dict(
......@@ -6737,6 +6822,13 @@ GatherParameter = _reflection.GeneratedProtocolMessageType('GatherParameter', (_
))
_sym_db.RegisterMessage(GatherParameter)
InstanceNormParameter = _reflection.GeneratedProtocolMessageType('InstanceNormParameter', (_message.Message,), dict(
DESCRIPTOR = _INSTANCENORMPARAMETER,
__module__ = 'caffe_pb2'
# @@protoc_insertion_point(class_scope:caffe.InstanceNormParameter)
))
_sym_db.RegisterMessage(InstanceNormParameter)
GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParameter', (_message.Message,), dict(
DESCRIPTOR = _GROUPNORMPARAMETER,
__module__ = 'caffe_pb2'
......@@ -6744,6 +6836,13 @@ GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParamete
))
_sym_db.RegisterMessage(GroupNormParameter)
DropBlockParameter = _reflection.GeneratedProtocolMessageType('DropBlockParameter', (_message.Message,), dict(
DESCRIPTOR = _DROPBLOCKPARAMETER,
__module__ = 'caffe_pb2'
# @@protoc_insertion_point(class_scope:caffe.DropBlockParameter)
))
_sym_db.RegisterMessage(DropBlockParameter)
_BLOBSHAPE.fields_by_name['dim'].has_options = True
_BLOBSHAPE.fields_by_name['dim']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
......
......@@ -24,7 +24,7 @@ def convert_to_tensor(value, dtype=None, name=None, **kwargs):
Parameters
----------
value : basic type, list or numpy.ndarray
value : number, list or numpy.ndarray
The value to convert.
dtype : Dtype or None
The data type. If ``None``, inferred from the type of `value`.
......
......@@ -15,6 +15,7 @@ import numpy as np
import dragon.core.mpi as mpi
import dragon.core.workspace as ws
import dragon.protos.dragon_pb2 as pb
from dragon.core.utils import MakeArgument
from dragon.core.gradient_maker import GraphGradientMaker
from dragon.core.scope import GetOperatorName, GetTensorName
......@@ -156,6 +157,7 @@ def GraphDef_Opt(meta_graph):
OX = 3 if option['share_grads'] else 2
if option['debug_mode']: OX = 1
meta_graph.arg.add().CopyFrom(MakeArgument('optimization_level', OX))
meta_graph.graph_type = option['graph_type']
def GraphDef_Device(meta_graph):
......@@ -181,11 +183,12 @@ def GraphDef_Device(meta_graph):
"""
from dragon.config import option
if option['device'] is not 'None':
supports = {'CPU': 0, 'CUDA': 1}
supports = {'CPU': 0, 'CUDA': 1, 'CNML': 2}
device_option = pb.DeviceOption()
device_option.device_type = supports[option['device']]
device_option.device_id = option['gpu_id']
device_option.device_id = option['device_id']
device_option.random_seed = option['random_seed']
if option['device'] == 'CUDA':
if option['use_cudnn']: device_option.engine = 'CUDNN'
meta_graph.device_option.CopyFrom(device_option)
......@@ -217,16 +220,16 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
Examples
--------
>>> x = Tensor('x').Variable()
>>> x = Tensor('x', dtype='float32').Variable()
>>> y = x * 2
>>> f = theano.function(outputs=y)
>>> x.set_value(np.ones((2, 3), dtype=np.float32))
>>> f = function(outputs=y)
>>> x.set_value(np.ones((2, 3)))
>>> print(f())
>>> [[ 2. 2. 2.]
[ 2. 2. 2.]]
>>> f = theano.function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3), dtype=np.float32)))
>>> f = function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3)))
>>> [[ 2. 2. 2.]
[ 2. 2. 2.]]
......@@ -339,13 +342,15 @@ def eval(self, feed_dict=None):
raise TypeError('The key of feed_dict key should be a Tensor.')
if key.shape is not None:
if len(key.shape) != len(value.shape):
raise RuntimeError('The Tensor({}) was limited to {} dimensions, \
while feed a value with {} dimensions.'.
format(key.name, len(key.shape), len(value.shape)))
raise RuntimeError(
'The Tensor({}) was limited to {} dimensions, \
while feed a value with {} dimensions.'.format(
key.name, len(key.shape), len(value.shape)))
for i in range(len(key.shape)):
if key.shape[i] is None: continue
if key.shape[i] != value.shape[i]:
raise RuntimeError('The shape of Tensor({}) was limited as ('.format(key.name) +
raise RuntimeError(
'The shape of Tensor({}) was limited as ('.format(key.name) +
','.join([str(dim) for dim in key.shape]) + '), ' +
'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').')
return self._eval_func(*feed_dict.values())
......
......@@ -20,7 +20,7 @@ def shared(value, name=None, **kwargs):
Parameters
----------
value : basic type, list or numpy.ndarray
value : number, list or numpy.ndarray
The numerical values.
name : str
The name of tensor.
......
......@@ -42,6 +42,7 @@ class Module(object):
self._buffers = OrderedDict()
self._persistent_key = self._op = None
self._ctx = ('CPU', 0)
self.training = True
def __getattr__(self, item):
if '_parameters' in self.__dict__:
......@@ -363,3 +364,12 @@ class Module(object):
def run(self, inputs, outputs, auto_grad=True):
meta = ('PERSISTENT', self.persistent_key, self.op)
return RunOperator(inputs, outputs, meta, auto_grad=auto_grad)
def train(self, mode=True):
self.training = mode
for module in self.children():
module.train(mode)
return self
def eval(self):
return self.train(False)
\ No newline at end of file
......@@ -10,20 +10,35 @@
# ------------------------------------------------------------
"""We move the Module & Parameter to ``torch`` instead of ``torch.nn``,
as it will be reused by the ``torch.ops``.
"""
from dragon.vm.torch.module import Module
from dragon.vm.torch.tensor import Parameter
from .modules.conv import Conv2d, ConvTranspose2d
from .modules.pooling import MaxPool2d, AvgPool2d
from .modules.activation import ReLU, LeakyReLU, Sigmoid, Softmax
from .modules.activation import (
ReLU, LeakyReLU, ELU, SELU,
Sigmoid, Softmax,
)
from .modules.linear import Linear
from .modules.loss import CrossEntropyLoss
from .modules.loss import (
BCEWithLogitsLoss,
NLLLoss, CrossEntropyLoss,
L1Loss, MSELoss, SmoothL1Loss,
)
from .modules.container import Container, Sequential, ModuleList
from .modules.batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
from .modules.groupnorm import GroupNorm1d, GroupNorm2d, GroupNorm3d
from .modules.affine import Affine
from .modules.dropout import Dropout, Dropout2d, Dropout3d
from .modules.dropblock import DropBlock2d
from .modules.rnn import RNNBase, RNN, LSTM, GRU
from . import init
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/pytorch/blob/master/torch/nn/functional.py>
#
# ------------------------------------------------------------
import warnings
class _Reduction:
@staticmethod
def get_enum(reduction):
if reduction == 'none':
return 0
if reduction == 'elementwise_mean':
return 1
if reduction == 'sum':
return 2
raise ValueError(reduction + " is not a valid value for reduction")
# In order to support previous versions, accept boolean size_average and reduce
# and convert them into the new constants for now
# We use these functions in torch/legacy as well, in which case we'll silence the warning
@staticmethod
def legacy_get_string(size_average, reduce, emit_warning=True):
warning = "size_average and reduce args will be deprecated, please use reduction='{}' instead."
if size_average is None:
size_average = True
if reduce is None:
reduce = True
if size_average and reduce:
ret = 'elementwise_mean'
elif reduce:
ret = 'sum'
else:
ret = 'none'
if emit_warning:
warnings.warn(warning.format(ret))
return ret
@staticmethod
def legacy_get_enum(size_average, reduce, emit_warning=True):
return _Reduction.get_enum(_Reduction.legacy_get_string(size_average, reduce, emit_warning))
\ No newline at end of file
......@@ -55,6 +55,47 @@ class LeakyReLU(Module):
return self.run(inputs, outputs)
class ELU(Module):
def __init__(self, alpha=1.0, inplace=False):
super(ELU, self).__init__()
self.alpha = alpha
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Elu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'alpha': self.alpha,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class SELU(Module):
def __init__(self, inplace=False):
super(SELU, self).__init__()
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SElu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Sigmoid(Module):
def __init__(self, inplace=False):
super(Sigmoid, self).__init__()
......
......@@ -102,7 +102,7 @@ class _BatchNorm(Module):
inputs = [input] + self.inputs
self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
phase = 'TRAIN' if input.requires_grad else 'TEST'
phase = 'TRAIN' if self.training else 'TEST'
# Normalize the input by using batch stats ALWAYS
# Note that the update of moving average is meaningless(
# Because we can not remove it. Why? Ask nvidia and cuDNN -:)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import Module
class DropBlock2d(Module):
def __init__(self, block_size=7, kp=0.9,
alpha=1., decrement=0., inplace=False):
super(DropBlock2d, self).__init__()
self.kp = kp
self.block_size = block_size
self.alpha = alpha
self.decrement = decrement
self.inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'DropBlock2d',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'block_size': self.block_size,
'keep_prob': self.kp,
'alpha': self.alpha,
'decrement': self.decrement,
'data_format': 'NCHW',
'phase': 'TRAIN',
}
}
def forward(self, input):
if not self.training: return input
inputs = [input]
self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
......@@ -34,7 +34,7 @@ class Dropout(Module):
}
def forward(self, input):
if not input.requires_grad: return input
if not self.training: return input
inputs = [input]
self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)]
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.nn import Module, Parameter
from dragon.vm.torch.ops.creation import zeros, ones
from dragon.vm.torch.module import RunOperator
class _GroupNorm(Module):
def __init__(self, num_features, group=32,
eps=1e-5, affine=True):
super(_GroupNorm, self).__init__()
self.num_features = num_features
self.group = group
self.eps = eps
self.affine = affine
if self.affine:
self.weight = Parameter(Tensor(num_features))
self.bias = Parameter(Tensor(num_features))
else:
self.weight = self.bias = None
self.inputs = [self.weight, self.bias] if self.affine else []
self.reset_parameters()
self.register_op()
def reset_parameters(self):
if self.affine:
self.weight.data.uniform_()
self.bias.data.zero_()
def register_op(self):
self.op_meta = {
'op_type': 'FusedGroupNorm' if self.affine else 'GroupNorm',
'n_inputs': 3 if self.affine else 1, 'n_outputs': 1,
'arguments': {
'group': self.group,
'axis': 1, # Data format: NCHW
'eps': self.eps,
}
}
def forward(self, input):
inputs = [input] + self.inputs
self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class GroupNorm1d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm2d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm3d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
\ No newline at end of file
......@@ -18,50 +18,176 @@ from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import Module
def _assert_no_grad(variable):
assert not variable.requires_grad, \
"nn criterions don't compute the gradient w.r.t. targets - please " \
"mark these variables as not requiring gradients"
from dragon.vm.torch.nn.functional import _Reduction
class _Loss(Module):
def __init__(self, size_average=True):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_Loss, self).__init__()
self.size_average = size_average
if size_average is not None or reduce is not None:
self.reduction = _Reduction.legacy_get_string(size_average, reduce)
else:
self.reduction = reduction
class _WeightedLoss(_Loss):
def __init__(self, weight=None, size_average=True):
super(_WeightedLoss, self).__init__(size_average)
def __init__(self, weight=None, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
self.weight = weight
# TODO(PhyscalX): Dragon will support it later :).
if weight is not None:
raise NotImplementedError('WeightedLoss has been not implemented yet.')
class NLLLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, ignore_index=-100,
reduce=None, reduction='elementwise_mean'):
super(NLLLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'NLLLoss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'axis': 1,
'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class BCEWithLogitsLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, reduce=None,
reduction='elementwise_mean', pos_weight=None):
super(BCEWithLogitsLoss, self).__init__(weight, size_average, reduce, reduction)
if pos_weight is not None:
raise NotImplementedError('Positive weight has been not implemented yet.')
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SigmoidCrossEntropy',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class CrossEntropyLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=True, ignore_index=-100, reduce=True):
super(CrossEntropyLoss, self).__init__(weight, size_average)
def __init__(self, weight=None, size_average=None, ignore_index=-100,
reduce=None, reduction='elementwise_mean'):
super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
self.reduce = reduce
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SparseSoftmaxCrossEntropy' if self.reduce else 'SoftmaxCrossEntropy',
'op_type': 'SparseSoftmaxCrossEntropy',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'axis': 1,
'normalization': 'VALID' if self.size_average else 'NONE',
'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
}
}
def forward(self, input, target):
_assert_no_grad(target)
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class L1Loss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(L1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class MSELoss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(MSELoss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L2Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class SmoothL1Loss(_Loss):
def __init__(self, size_average=None, beta=1.0,
reduce=None, reduction='elementwise_mean'):
super(SmoothL1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.beta = beta
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SmoothL1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'beta': self.beta,
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
......@@ -11,11 +11,12 @@
from .creation import (
zeros, zeros_like, ones, ones_like,
rand, randn
one_hot, rand, randn,
)
from .arithmetic import (
add, sub, mul, div,
add, sub, mul, div, log, exp,
maximum, minimum, clamp,
)
from .ndarray import (
......
......@@ -16,14 +16,15 @@ from __future__ import print_function
from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.ops.primitive import MakeContext, WrapScalar
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.arithmetic import Fundamental
from dragon.vm.torch.ops.modules.arithmetic import (
Fundamental, Log, Exp,
Maximum, Minimum, Clamp,
)
def _fundamental(input, value, op='Add', out=None):
if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
......@@ -33,17 +34,63 @@ def _fundamental(input, value, op='Add', out=None):
def _rfundamental(input, value, op='RAdd', out=None):
if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
module = get_module(Fundamental, key, ctx, op_type=op)
return module.forward(value, input, out)
def _maximum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/maximum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Maximum, key, ctx)
return module.forward(input, other, out, dtype)
def _minimum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/minimum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Minimum, key, ctx)
return module.forward(input, other, out, dtype)
def _clamp(input, min=None, max=None, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/clamp/{}:{}/min:{}/max:{}'.format(
ctx[0].lower(), ctx[1], min, max)
module = get_module(Clamp, key, ctx, min=min, max=max)
return module.forward(input, out)
def _exp(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/exp/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Exp, key, ctx)
return module.forward(input, out)
def _log(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/log/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Log, key, ctx)
return module.forward(input, out)
def add(input, value, out=None):
"""Add the ``input`` and ``value`` into the output tensor.
......@@ -126,3 +173,106 @@ def div(input, value, out=None):
"""
return _fundamental(input, value, out=out, op='Div')
def maximum(input, other, out=None):
"""Return the max value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _maximum(input, other, out)
def minimum(input, other, out=None):
"""Return the min value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _minimum(input, other, out)
def clamp(input, min=None, max=None, out=None):
"""Clamp all elements into the range [min, max].
Parameters
----------
input : vm.torch.Tensor
The input tensor.
min : numerical or None
The min value.
max : numerical or None
The max value.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(input, min, max, out)
def log(input, out=None):
"""Compute the natural logarithm of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _log(input, out)
def exp(input, out=None):
"""Compute the exponential of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _exp(input, out)
......@@ -21,12 +21,18 @@ from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.autograd.grad_mode import no_grad
from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.arithmetic import _fundamental, _rfundamental
from dragon.vm.torch.ops.arithmetic import (
_fundamental, _rfundamental, _log, _exp,
_clamp,
)
from dragon.vm.torch.ops.ndarray import (
reshape, squeeze, unsqueeze,
_permute, _repeat, _crop,
_fill, _reduce, _arg_reduce,
)
from dragon.vm.torch.ops.modules.dtype import AsType
......@@ -53,9 +59,14 @@ def copy_(self, src, non_blocking=False):
The ``self`` tensor.
"""
# Copy memory
FromTensor(
src, CTX_TO_DEVICE_OPTION[tuple(src._ctx)],
self.name, CTX_TO_DEVICE_OPTION[tuple(self._ctx)])
self._dtype = src._dtype
# Transfer the static shape if necessary
self._static_shape = src.size() \
if self._static_shape else None
return self
......@@ -295,6 +306,76 @@ def rdiv(self, value):
return _rfundamental(self, value, op='RDiv')
def clamp(self, min=None, max=None):
"""Return a tensor that all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max)
def clamp_(self, min=None, max=None):
"""Clamp all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max, self)
def log(self):
"""Compute the natural logarithm of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The log tensor.
"""
return _log(self)
def exp(self):
"""Compute the exponential of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The exp tensor.
"""
return _exp(self)
Tensor.add = add
Tensor.add_ = add_
Tensor.__radd__ = radd
......@@ -308,6 +389,10 @@ Tensor.div = div
Tensor.div_ = div_
Tensor.__rdiv__ = rdiv
Tensor.__rtruediv__ = rdiv
Tensor.clamp = clamp
Tensor.clamp_ = clamp_
Tensor.log = log
Tensor.exp = exp
##############################################
......@@ -387,16 +472,12 @@ def _unsqueeze_(self, dim=None):
def view(self, *args):
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=args)
def view_as(self, other):
if not isinstance(other, Tensor):
raise ValueError('The other should be a torch tensor.')
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=None, shape_like=other)
......
......@@ -13,14 +13,20 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.ops.primitive import MakeContext, CanonicalAxis
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.tensor import LeafTensor
from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.creation import OneHot
__all__= [
'zeros', 'zeros_like', 'ones', 'ones_like',
'rand', 'randn',
'one_hot', 'rand', 'randn',
]
......@@ -180,3 +186,26 @@ def randn(*sizes, **kwargs):
inputs = []; outputs = [out]; ctx = MakeContext(inputs, outputs)
meta = ('ONCE', 'RandomNormal', ctx)
return RunOperator(inputs, outputs, meta, **arguments)
def one_hot(input, depth):
"""Return a ont hot tensor according to given input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
depth : int
The depth of channels.
Returns
-------
vm.torch.FloatTensor
The output tensor.
"""
ctx = MakeContext(inputs=[input])
key = 'torch/ops/one_hot/{}:{}/depth:{}'.format(
ctx[0].lower(), ctx[1], depth)
module = get_module(OneHot, key, ctx, depth=depth)
return module.forward(input)
\ No newline at end of file
......@@ -38,3 +38,125 @@ class Fundamental(BaseModule):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x1.dtype)]
return self.run(inputs, outputs)
class Maximum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Maximum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Maximum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Minimum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Minimum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Minimum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Clamp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Clamp, self).__init__(key, ctx, **kwargs)
self.min = kwargs.get('min', None)
self.max = kwargs.get('max', None)
if self.min is not None: self.min = float(self.min)
if self.max is not None: self.max = float(self.max)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments clamp op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Clip',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'low': self.min,
'high': self.max,
}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Log(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Log, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Log',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Exp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Exp, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Exp',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.ops.modules.base import BaseModule
class OneHot(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(OneHot, self).__init__(key, ctx, **kwargs)
self.depth = kwargs.get('depth', 1)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No Arguments for concat op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'OneHot',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'depth': self.depth,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
......@@ -22,6 +22,7 @@ class Fill(BaseModule):
super(Fill, self).__init__(key, ctx, **kwargs)
self.len_shape = kwargs.get('len_shape', 0)
self.value = kwargs.get('value', 0.0)
self.dtype = kwargs.get('dtype', 'float32')
self.register_arguments()
self.register_op()
......@@ -34,6 +35,7 @@ class Fill(BaseModule):
'op_type': 'Fill',
'n_inputs': 0, 'n_outputs': 1,
'arguments': {
'dtype': self.dtype,
'value': float(self.value),
'dims_desc': [d for d in self.shape] if len(self.shape) > 0 else None,
}
......
......@@ -62,9 +62,10 @@ def _repeat(input, times):
def _fill(input, shape, value):
ctx = MakeContext(inputs=[input]); len_shape = len(shape)
key = 'torch/ops/fill/{}:{}/ndims:#{}/value:{}'.format(
ctx[0].lower(), ctx[1], len_shape, value)
module = get_module(Fill, key, ctx, len_shape=len_shape, value=value)
key = 'torch/ops/fill/{}:{}/dtype:{}/ndims:#{}/value:{}'.format(
ctx[0].lower(), ctx[1], input._dtype, len_shape, value)
module = get_module(Fill, key, ctx, len_shape=len_shape,
value=value, dtype=input._dtype)
return module.forward(input, shape)
......
......@@ -35,7 +35,7 @@ def _update(param, grad, op_type, slot,
lr_mult=1.0, decay_mult=1.0):
ctx = MakeContext(inputs=[param])
key = 'torch/ops/{}/{}:{}/{}/{}'.format(op_type.lower(),
ctx[0].lower(),ctx[1], slot, param.name)
ctx[0].lower(), ctx[1], slot, param.name)
module = get_module(Update, key, ctx, op_type=op_type,
lr_mult=lr_mult, decay_mult=decay_mult, slot=slot)
return module.forward(param, grad)
\ No newline at end of file
......@@ -72,10 +72,9 @@ class Optimizer(object):
param_temp = group['slot'] + '/{}'
for k, v in group.items():
if k in self._mutable_parameters:
# convert all defaults as float32 for convenience
dg.workspace.FeedTensor(param_temp.format(
self._mutable_parameters[k]),
np.array([v], dtype=np.float32))
self._mutable_parameters[k]), v,
dtype='float32', force_cpu=True)
def _run_update_ops(self, group):
"""Generate & Run UpdateOps.
......@@ -107,10 +106,12 @@ class Optimizer(object):
# Run regular update ops
for p, g in zip(params, grads):
_update(p, g, op_type=self._update_type,
_update(p, g,
op_type=self._update_type,
slot=group['slot'],
lr_mult=group.get('lr_mult', 1.0),
decay_mult=group.get('decay_mult', 1.0))
decay_mult=group.get('decay_mult', 1.0)
)
def zero_grad(self):
"""Set all gradients to zeros.
......
......@@ -17,9 +17,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import io
import os, sys, io
from dragon.core.tensor_utils import ToPyArrayEx
if sys.version_info[0] == 2:
import cPickle as pickle
......@@ -61,17 +60,27 @@ def _with_file_like(f, mode, body):
f.close()
def _save(obj, f, pickle_module, pickle_protocol):
"""Pickle the object into binary file.
def _save_dict(obj):
"""Recursively save the dict."""
if not isinstance(obj, dict):
raise ValueError('Currently only the state dict can be saved.')
py_dict = type(obj)()
for k, v in obj.items():
if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArrayEx(v)
else: py_dict[k] = v
return py_dict
"""
def _save(obj, f, pickle_module, pickle_protocol):
"""Pickle the object into binary file."""
if not isinstance(obj, dict):
raise ValueError('Currently only the state dict can be saved.')
from collections import OrderedDict
from dragon.core.tensor_utils import ToPyArrayEx
py_dict = OrderedDict()
py_dict = type(obj)()
for k, v in obj.items():
py_dict[k] = ToPyArrayEx(v)
if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArrayEx(v)
else: py_dict[k] = v
pickle_module.dump(py_dict, f, pickle_protocol)
......
......@@ -13,8 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import copy
import six
import numpy as np
import dragon as dg
import dragon.core.tensor_utils as tensor_utils
......@@ -73,12 +72,12 @@ class Tensor(object):
self._init_from_numpy(args[0])
else:
# + class torch.Tensor(size)
if not isinstance(args[0], int):
if not isinstance(args[0], six.integer_types):
raise ValueError('Excepted integer as size.')
self._init_from_shape(args[0])
else:
# + torch.Tensor(*sizes)
if not all(type(arg) is int for arg in args):
if not all(isinstance(arg, six.integer_types) for arg in args):
raise ValueError('Excepted integer(s) as sizes.')
self._init_from_shape(shape=args)
......@@ -90,7 +89,7 @@ class Tensor(object):
self._ignored_grads = {self.name + '_grad'} if not self._requires_grad else None
def _init_from_shape(self, shape):
if isinstance(shape, int): shape = [shape]
if isinstance(shape, six.integer_types): shape = [shape]
self._static_shape = Size(shape)
self._dg_tensor = tensor_utils.FromShape(shape, self._dtype,
ctx=CTX_TO_DEVICE_OPTION[tuple(self._ctx)], name=TPool.get('leaf'))
......@@ -904,6 +903,72 @@ class Tensor(object):
"""
raise NotImplementedError('Refer torch.ops.builtin.div_')
def clamp(self, min=None, max=None):
"""Return a tensor that all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.clamp')
def clamp_(self, min=None, max=None):
"""Clamp all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.clamp_')
def log(self):
"""Compute the natural logarithm of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The log tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.log')
def exp(self):
"""Compute the exponential of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The exp tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.exp')
def mean(self, dim=None, keepdim=False):
"""Returns the mean of all elements or elements along the given dim.
......
......@@ -42,7 +42,7 @@ find_modules()
setup(name = 'dragon',
version='0.2.2.11',
version='0.2.2.12',
description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
url='https://github.com/seetaresearch/Dragon',
author='Ting Pan',
......
......@@ -3,10 +3,6 @@
namespace dragon {
#ifdef WITH_CUDA
thread_local CUDAObject CUDAContext::cuda_object_;
#endif // WITH_CUDA
// cpu <- gpu
template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
size_t nbytes,
......
......@@ -246,6 +246,9 @@ GraphDef Graph::Share(const GraphDef& optimized_graph) {
*g.mutable_op(i)->mutable_input(j)
= renamed_[op.input(j)];
}
// handle handcraft cases
if (op.type() == "BiasAddGradient")
renamed_[op.output(0)] = g.op(i).input(2);
for (int j = 0; j < op.output_size(); j++) {
if (whitelist.count(op.output(j)) == 0 &&
renamed_.count(op.output(j)) &&
......@@ -443,9 +446,10 @@ Graph::Graph(const GraphDef& meta_graph, Workspace* ws)
}
// store the final graph as a tensor for visualization
Tensor* string_tensor = ws_->CreateTensor("GraphDef_" + optimized_graph.name());
string_tensor->Reshape({ 1 });
string* data = string_tensor->mutable_data<string, CPUContext>();
Tensor* graphT = ws_->CreateTensor(
"GraphDef_" + optimized_graph.name());
graphT->Reshape({ 1 });
auto* data = graphT->mutable_data<string, CPUContext>();
data[0] = optimized_graph.SerializeAsString();
// create
......@@ -473,11 +477,22 @@ bool Graph::Run(
return true;
}
DEFINE_REGISTRY(GraphRegistry, GraphBase, const GraphDef&, Workspace*);
GraphBase* NewGraph(
const GraphDef& meta_graph,
Workspace* ws) {
if (!meta_graph.has_graph_type() ||
meta_graph.graph_type().empty())
return new Graph(meta_graph, ws);
GraphBase* NewGraph(const GraphDef& meta_graph, Workspace* ws) {
if (!meta_graph.has_graph_type()) return new Graph(meta_graph, ws);
return GraphRegistry()->Create(meta_graph.graph_type(), meta_graph, ws);
return GraphRegistry()->Create(
meta_graph.graph_type(), meta_graph, ws);
}
DEFINE_REGISTRY(
GraphRegistry,
GraphBase,
const GraphDef&,
Workspace*
);
} // namespace dragon
\ No newline at end of file
......@@ -4,10 +4,10 @@
namespace dragon {
#define str dragon_cast<std::string, int>
bool GraphGradientMaker::CheckGrad(const OperatorDef& forward_op,
const Set<string>& targets, vector< pair<string, int> >& gen_grads) {
bool GraphGradientMaker::CheckGrad(
const OperatorDef& forward_op,
const Set<string>& targets,
vector< pair<string, int> >& gen_grads) {
if (NoGradientRegistry()->Has(forward_op.type())) {
for (auto& input : forward_op.input())
blacklist_set_.insert(input);
......@@ -41,10 +41,11 @@ bool GraphGradientMaker::CheckGrad(const OperatorDef& forward_op,
string GraphGradientMaker::GetOperatorName() {
if (op_prefix_.empty()) return "runtime";
return op_prefix_ + str(cur_op_idx_++) + op_suffix_;
return op_prefix_ + std::to_string(cur_op_idx_++) + op_suffix_;
}
void GraphGradientMaker::Make(const GraphDef& forward_def,
void GraphGradientMaker::Make(
const GraphDef& forward_def,
const vector<string>& targets,
GraphDef& new_def) {
Map<string, int> inputs_count, grads_count;
......@@ -61,9 +62,10 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
}
}
for (auto& t : targets) targets_set.insert(t);
// PLAY for the backward
for (int i = forward_def.op_size() - 1; i >= 0; i--) {
// collect inputs & outputs, generate grad
// collect inputs & outputs, generate RAW grad ops
const OperatorDef& op = forward_def.op(i);
vector< pair<string, int> > gen_grads;
bool is_skip = CheckGrad(op, targets_set, gen_grads);
......@@ -76,8 +78,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
g_outputs.emplace_back(g_output);
}
Gradient grad = MakeGradientForOp(op, g_outputs);
// post-process grad ops
unique_ptr<OperatorDef> gather_op;
// process the RAW grad ops
vector<OperatorDef> gather_ops;
for (auto& g_op : grad.ops) {
// set op name
g_op.set_name(GetOperatorName());
......@@ -112,27 +115,32 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
string original_name = op.input(original_idx);
if (inputs_count[original_name] > 1) {
// split
string split_name = *output + "_autosplit_" + str(grads_count[*output]++);
string split_name = *output + "_autosplit_"
+ std::to_string(grads_count[*output]++);
if (!is_skip) all_split_grads.insert(split_name);
// gather
if (grads_count[*output] == inputs_count[original_name]) {
gather_op.reset(new OperatorDef());
gather_op->set_name(GetOperatorName());
gather_op->set_type("GradientGather");
gather_op->add_output(*output);
OperatorDef gather_op;
gather_op.set_name(GetOperatorName());
gather_op.set_type("GradientGather");
gather_op.add_output(*output);
if (g_op.has_device_option())
gather_op->mutable_device_option()->CopyFrom(g_op.device_option());
gather_op.mutable_device_option()
->CopyFrom(g_op.device_option());
for (int j = 0; j < grads_count[*output]; j++) {
string key = *output + "_autosplit_" + str(j);
if (all_split_grads.count(key)) gather_op->add_input(key);
string key = *output + "_autosplit_" + std::to_string(j);
if (all_split_grads.count(key)) gather_op.add_input(key);
}
gather_ops.emplace_back(gather_op);
}
*output = split_name;
}
}
}
// append ops
// now, append the required ops
if (!is_skip) {
// 1) GradientGenerateOp
if (gen_grads.size() > 0) {
vector<string> op_inputs, op_outputs;
Argument arg_defaults; arg_defaults.set_name("defaults");
......@@ -143,21 +151,24 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
op_outputs.emplace_back(output);
arg_defaults.add_floats(grad.defaults[gen_grad.second]);
}
OperatorDef generate_op = MakeOperatorDef("GradientGenerate",
GetOperatorName(),
op_inputs,
op_outputs,
OperatorDef generate_op = MakeOperatorDef(
"GradientGenerate", GetOperatorName(),
op_inputs, op_outputs,
vector<Argument>(1, arg_defaults));
if (op.has_device_option())
generate_op.mutable_device_option()->CopyFrom(op.device_option());
generate_op.mutable_device_option()
->CopyFrom(op.device_option());
new_def.add_op()->CopyFrom(generate_op);
}
for (auto& g_op : grad.ops) {
// 2) GradientOp
for (auto& g_op : grad.ops)
new_def.add_op()->CopyFrom(g_op);
}
}
if (gather_op) new_def.add_op()->CopyFrom(*gather_op);
// done
// 3) GradientGatherOp
for (auto& gather_op : gather_ops)
new_def.add_op()->CopyFrom(gather_op);
// done!
if (!is_skip) {
for (int i = 0; i < op.input_size(); i++) {
if (!grad.g_inputs[i].empty())
......@@ -189,7 +200,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
} \
*op->mutable_output(ix) = temp_grad;}
void GraphGradientMaker::Share(const string& grads_prefix, GraphDef& graph) {
void GraphGradientMaker::Share(
const string& grads_prefix,
GraphDef& graph) {
Map<string, int> ref_count;
// count the refs for detecting leaf nodes
for (auto& op : graph.op()) {
......@@ -205,8 +218,7 @@ void GraphGradientMaker::Share(const string& grads_prefix, GraphDef& graph) {
Map<string, string> temporary_grads;
std::deque<string> grads_pool;
for (int i = 0; i < TEMPORARY_GRADS_LIMITS; i++)
grads_pool.push_back(grads_prefix + ":" +
dragon_cast<string, int>(i));
grads_pool.push_back(grads_prefix + ":" + std::to_string(i));
for (int i = 0; i < graph.op_size(); i++) {
OperatorDef* op = graph.mutable_op(i);
......
......@@ -31,12 +31,14 @@ void MixedMemory::ToCUDA() {
switch (state_) {
case UNINITIALIZED:
cuda_ptr_ = CUDAContext::New(nbytes_);
ptr_device_ = CUDA_GET_DEVICE();
state_ = STATE_AT_CUDA;
break;
case STATE_AT_CPU:
if (cuda_ptr_ == nullptr)
if (cuda_ptr_ == nullptr) {
cuda_ptr_ = CUDAContext::New(nbytes_);
CUDAContext::Memcpy<CUDAContext, CPUContext>(
ptr_device_ = CUDA_GET_DEVICE();
} CUDAContext::Memcpy<CUDAContext, CPUContext>(
nbytes_, cuda_ptr_, cpu_ptr_);
state_ = SYNCED;
break;
......@@ -66,6 +68,10 @@ const void* MixedMemory::cuda_data() {
return (const void*)cuda_ptr_;
}
const void* MixedMemory::cnml_data() {
return (const void*)cnml_ptr_;
}
void* MixedMemory::mutable_cpu_data() {
ToCPU();
state_ = STATE_AT_CPU;
......@@ -78,6 +84,11 @@ void* MixedMemory::mutable_cuda_data() {
return cuda_ptr_;
}
void* MixedMemory::mutable_cnml_data() {
state_ = STATE_AT_CNML;
return cnml_ptr_;
}
void MixedMemory::set_cpu_data(void* cpu_ptr, size_t nbytes) {
bool use_cudahost_mem = false;
#ifdef WITH_CUDA_HOST_MEM
......@@ -123,9 +134,11 @@ MixedMemory::~MixedMemory() {
void MixedMemory::SwitchToDevice() {
if (cuda_ptr_) {
#ifdef WITH_CUDA
int ptr_device = CUDA_DEVICE(cuda_ptr_);
int cur_device = CUDA_DEVICE();
if (ptr_device != cur_device) state_ = SWITCHED;
int cur_device = CUDA_GET_DEVICE();
if (cur_device != ptr_device_) {
state_ = SWITCHED;
ptr_device_ = cur_device;
}
#endif
}
}
......@@ -134,12 +147,12 @@ void MixedMemory::SwitchToCUDADevice(int device_id) {
#ifdef WITH_CUDA
DeviceGuard gurad(device_id);
if (cuda_ptr_) {
int ptr_device = CUDA_DEVICE(cuda_ptr_);
if (ptr_device != device_id) state_ = SWITCHED;
if (device_id != ptr_device_) {
state_ = SWITCHED;
ptr_device_ = device_id;
}
}
ToCUDA();
#else
CUDA_NOT_COMPILED;
#endif
}
......@@ -148,6 +161,7 @@ const Map<string, string> MixedMemory::info() const {
{ UNINITIALIZED, "UNINITIALIZED" },
{ STATE_AT_CPU, "CPU" },
{ STATE_AT_CUDA, "CUDA" },
{ STATE_AT_CNML, "CNML" },
{ SYNCED, "DEVICE" },
{ SWITCHED, "DEVICE" },
};
......@@ -155,15 +169,14 @@ const Map<string, string> MixedMemory::info() const {
string _state_ = STATE_TO_STRING[state_];
if (_state_ == "DEVICE") {
if (cuda_ptr_) _state_ = "CUDA";
else if (cnml_ptr_) _state_ = "CNML";
else LOG(FATAL) << "Device activated, "
<< "but got invalid mem pointer.";
}
s2s["mem_at"] = _state_;
if (cpu_ptr_) s2s["CPU"] = "0";
#ifdef WITH_CUDA
if (cuda_ptr_) s2s["CUDA"] =
dragon_cast<string, int>(CUDA_DEVICE(cuda_ptr_));
#endif
if (cuda_ptr_) s2s["CUDA"] = std::to_string(ptr_device_);
else if (cnml_ptr_) s2s["CNML"] = std::to_string(ptr_device_);
return s2s;
}
......
......@@ -5,7 +5,8 @@
namespace dragon {
OperatorBase::OperatorBase(
const OperatorDef& def, Workspace* ws)
const OperatorDef& def,
Workspace* ws)
: def_(def), ws_(ws), anchor_(def.name()) {
for (auto& arg : def_.arg()) {
CHECK_GT(arg.name().size(), 0);
......@@ -73,6 +74,8 @@ OperatorBase* TryCreateOperator(
CUDNNOperatorRegistry()->Has(key))
return CUDNNOperatorRegistry()->Create(key, def, ws);
return CUDAOperatorRegistry()->Create(key, def, ws);
case CNML:
return CNMLOperatorRegistry()->Create(key, def, ws);
default:
LOG(FATAL) << "Unknown device type: "
<< def.device_option().device_type();
......@@ -198,7 +201,8 @@ void Operator<Context>::ElimateCorruption() {
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx));
"/opt/mirror_stage/buffer_"
+ std::to_string(idx));
Output(i)->Move(buffer->memory());
head_data[idx] = Output(i)->name();
}
......@@ -220,8 +224,8 @@ void Operator<Context>::CleanResource() {
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->is_corrupted() &&
head_to_idx.count(Output(i)->name())) {
string used = "/opt/mirror_stage/buffer_" +
dragon_cast<string, int>(head_to_idx[Output(i)->name()]);
string used = "/opt/mirror_stage/buffer_"
+ std::to_string(head_to_idx[Output(i)->name()]);
Tensor* buffer = ws()->GetTensor(used);
if (Output(i)->memory() != buffer->memory())
buffer->Move(Output(i)->memory());
......@@ -248,6 +252,12 @@ DEFINE_REGISTRY(
Workspace*);
DEFINE_REGISTRY(
CNMLOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
DEFINE_REGISTRY(
GradientRegistry,
GradientMakerBase,
const OperatorDef&,
......@@ -291,9 +301,12 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings)
template void Operator<CPUContext>::ElimateCorruption();
template void Operator<CUDAContext>::ElimateCorruption();
template void Operator<CNMLContext>::ElimateCorruption();
template void Operator<CPUContext>::MakeResource();
template void Operator<CUDAContext>::MakeResource();
template void Operator<CNMLContext>::MakeResource();
template void Operator<CPUContext>::CleanResource();
template void Operator<CUDAContext>::CleanResource();
template void Operator<CNMLContext>::CleanResource();
} // namespace dragon
\ No newline at end of file
......@@ -16,7 +16,8 @@ GraphBase* Workspace::CreateGraph(const GraphDef& meta_graph) {
Workspace::~Workspace() {
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "/opt/mirror_stage/buffer_" + dragon_cast<string, int>(i);
string name = "/opt/mirror_stage/buffer_"
+ std::to_string(i);
if (tensor_map_.count(name) > 0) {
MixedMemory* mem = tensor_map_[name]->memory();
if (mem != nullptr) delete mem;
......
......@@ -32,8 +32,8 @@ void CuDNNDropoutOp<Context>::RunWithType() {
ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor(
"/share/cudnn/dropout:" + dragon_cast<string,
unsigned long long>(random_seed) + "/states");
"/share/cudnn/dropout:" + std::to_string(
random_seed) + "/states");
if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
......@@ -67,9 +67,7 @@ void CuDNNDropoutOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -89,14 +87,16 @@ void CuDNNDropoutGradientOp<Context>::RunWithType() {
ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor(
"/share/cudnn/dropout:" + dragon_cast<string,
unsigned long long>(random_seed) + "/states");
"/share/cudnn/dropout:" + std::to_string(
random_seed) + "/states");
if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
dropout_desc, ctx()->cudnn_handle(), prob(),
Sdata, states_size, random_seed));
} else { LOG(FATAL) << "Missing states with seed: " << random_seed; }
} else {
LOG(FATAL) << "Missing states with seed: " << random_seed;
}
}
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
......@@ -119,9 +119,7 @@ void CuDNNDropoutGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -24,9 +24,7 @@ void CuDNNEluOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -52,9 +50,7 @@ void CuDNNEluGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -31,9 +31,7 @@ void CuDNNReluOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -69,9 +67,7 @@ void CuDNNReluGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -29,9 +29,7 @@ void CuDNNSigmoidOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -65,9 +63,7 @@ void CuDNNSigmoidGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -14,8 +14,7 @@ void CuDNNSoftmaxOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxForward(
ctx()->cudnn_handle(),
CUDNN_CHECK(cudnnSoftmaxForward(ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata));
......@@ -23,15 +22,13 @@ void CuDNNSoftmaxOp<Context>::RunWithType() {
template <class Context>
void CuDNNSoftmaxOp<Context>::RunOnDevice() {
if (axis == -1) axis = (int)Input(0).ndim() - 1;
if (axis == -1) axis = (TIndex)Input(0).ndim() - 1;
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -47,8 +44,7 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
auto* Ydata = Input(0).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxBackward(
ctx()->cudnn_handle(),
CUDNN_CHECK(cudnnSoftmaxBackward(ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Ydata, input_desc, dYdata,
CUDNNType<T>::zero, output_desc, dXdata));
......@@ -56,15 +52,13 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() {
template <class Context>
void CuDNNSoftmaxGradientOp<Context>::RunOnDevice() {
if (axis == -1) axis = (int)Input(0).ndim() - 1;
if (axis == -1) axis = (TIndex)Input(0).ndim() - 1;
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -29,9 +29,7 @@ void CuDNNTanhOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -65,9 +63,7 @@ void CuDNNTanhGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -8,22 +8,31 @@ template <class Context> template <typename T>
void DropoutOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
float scale = use_scale ? 1.0 / (1.0 - prob()) : 1.0;
float scale = use_scale ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") {
if (Output(0) != &Input(0)) {
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), Ydata, Xdata);
if (scale == 1.0) math::Scal<T, Context>(
Output(0)->count(), 1.0 - prob(), Ydata, ctx());
}
if (!use_scale) {
math::Scal<T, Context>(Output(0)->count(),
1.0 - prob(), Ydata, ctx());
}
} else if (phase() == "TRAIN") {
Tensor* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/dropout/mask");
mask->ReshapeLike(Input(0));
uint32_t* Mdata = mask->template mutable_data<uint32_t, Context>();
auto WSdata = ws()->template caches<Context>({
mask->count() * sizeof(uint32_t) });
auto* Mdata = mask->template mutable_data<uint8_t, Context>();
kernel::Dropout<T, Context>(
Output(0)->count(), prob(), scale,
Xdata, Mdata, Ydata, ctx());
Xdata, (uint32_t*)WSdata[0],
Mdata, Ydata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
......@@ -32,6 +41,7 @@ void DropoutOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
......@@ -39,22 +49,25 @@ DEPLOY_CPU(Dropout);
#ifdef WITH_CUDA
DEPLOY_CUDA(Dropout);
#endif
OPERATOR_SCHEMA(Dropout).NumInputs(1).NumOutputs(1).Inplace({ { 0, 0 } });
OPERATOR_SCHEMA(Dropout)
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T>
void DropoutGradientOp<Context>::RunWithType() {
mask = ws()->GetTensor("/mnt/" + anchor() + "/dropout/mask");
auto* mask = ws()->GetTensor(
"/mnt/" + anchor() + "/dropout/mask");
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<uint32_t, Context>();
float scale = use_scale ? 1.0 / (1.0 - prob()) : 1.0;
auto* Mdata = mask->template data<uint8_t, Context>();
float scale = use_scale ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") { NOT_IMPLEMENTED; }
else if (phase() == "TRAIN") {
kernel::DropoutGrad<T, Context>(
Output(0)->count(), prob(), scale,
dYdata, Mdata, dXdata, ctx());
ctx()->FinishDeviceCompution();
mask->Reset();
kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
scale, dYdata, Mdata, dXdata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
......@@ -63,14 +76,17 @@ void DropoutGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(DropoutGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropoutGradient);
#endif
OPERATOR_SCHEMA(DropoutGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } });
OPERATOR_SCHEMA(DropoutGradient)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
class GetDropoutGradient final : public GradientMakerBase {
public:
......
......@@ -148,7 +148,9 @@ DEPLOY_CPU(AddGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(AddGradient);
#endif
OPERATOR_SCHEMA(AddGradient).NumInputs(1).NumOutputs(2);
OPERATOR_SCHEMA(AddGradient)
.NumInputs(1).NumOutputs(2)
.Inplace({ { 0, 0 } });
class GetAddGradient : public GradientMakerBase {
public:
......
......@@ -7,15 +7,11 @@ namespace dragon {
template <class Context> template <typename T>
void ClipOp<Context>::RunWithType() {
Tensor* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/clip/mask");
mask->ReshapeLike(Input(0));
auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template mutable_data<T, Context>();
kernel::Clip<T, Context>(Output(0)->count(),
low, high, Xdata, Mdata, Ydata, ctx());
low, high, Xdata, Ydata, ctx());
}
template <class Context>
......@@ -30,19 +26,16 @@ DEPLOY_CPU(Clip);
#ifdef WITH_CUDA
DEPLOY_CUDA(Clip);
#endif
OPERATOR_SCHEMA(Clip)
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
OPERATOR_SCHEMA(Clip).NumInputs(1).NumOutputs(1);
template <class Context> template <typename T>
void ClipGradientOp<Context>::RunWithType() {
Tensor* mask = ws()->GetTensor(
"/mnt/" + anchor() + "/clip/mask");
auto* Xdata = Input(0).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<T, Context>();
math::Mul<T, Context>(Output(0)->count(),
dXdata, Mdata, dXdata, ctx());
kernel::ClipGrad<T, Context>(Output(0)->count(),
low, high, Xdata, dYdata, dXdata, ctx());
}
template <class Context>
......@@ -57,16 +50,14 @@ DEPLOY_CPU(ClipGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(ClipGradient);
#endif
OPERATOR_SCHEMA(ClipGradient)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1);
class GetClipGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetClipGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {O(0), GO(0)},
vector<string> {I(0), GO(0)},
vector<string> {GI(0)});
}
};
......
......@@ -5,6 +5,8 @@
#include "utils/math_functions.h"
#include "operators/arithmetic/affine_op.h"
#if CUDNN_VERSION_MIN(6, 0, 0)
namespace dragon {
template <class Context> template <typename T>
......@@ -48,9 +50,7 @@ void CuDNNAffineOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -213,4 +213,6 @@ DEPLOY_CUDNN(AffineGradient);
} // namespace dragon
#endif
#endif // WITH_CUDNN
\ No newline at end of file
......@@ -146,6 +146,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
if (Output(0)->name() != "ignore") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
CHECK(dy != dx1) << "\nCan't set inplace if X2 was broadcast.";
if (type == 0 || type == 1) {
DECLARE_MULTIPLIER(multiplier, outer_dim);
math::Gemm<T, Context>(
......@@ -185,7 +186,9 @@ DEPLOY_CPU(DivGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DivGradient);
#endif
OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2);
OPERATOR_SCHEMA(DivGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetDivGradient final : public GradientMakerBase {
public:
......
......@@ -29,7 +29,8 @@ void LogGradientOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
math::Div<T, Context>(Output(0)->count(), dYdata, Xdata, dXdata, ctx());
math::Div<T, Context>(Output(0)->count(),
dYdata, Xdata, dXdata, ctx());
}
template <class Context>
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/arithmetic/maximum_op.h"
namespace dragon {
template <class Context> template <typename T>
void MaximumOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::MaximumE<T, Context>(Output(0)->count(),
X1data, X2data, Ydata, ctx());
}
template <class Context> template <typename T>
void MaximumOp<Context>::BroadcastRunWithType() {
T max_val; float x2_val; const T* Xdata; T* Ydata;
if (Input(0).count() == 1) {
Output(0)->ReshapeLike(Input(1));
x2_val = Input(0).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else if (Input(1).count() == 1) {
Output(0)->ReshapeLike(Input(0));
x2_val = Input(1).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
kernel::MaximumB<T, Context>(Output(0)->count(),
Xdata, max_val, Ydata, ctx());
}
template <class Context>
void MaximumOp<Context>::RunOnDevice() {
if (Input(0).dims() == Input(1).dims()) {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(Maximum);
#ifdef WITH_CUDA
DEPLOY_CUDA(Maximum);
#endif
OPERATOR_SCHEMA(Maximum).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void MaximumGradientOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dX1data = Output(0)->template mutable_data<T, Context>();
auto* dX2data = Output(1)->template mutable_data<T, Context>();
kernel::MaximumEGrad<T, Context>(Output(0)->count(),
X1data, X2data, dYdata, dX1data, dX2data, ctx());
}
template <class Context> template <typename T>
void MaximumGradientOp<Context>::BroadcastRunWithType() {
T max_val; float x2_val;
const T* Xdata; T* dX1data; float* dX2data;
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
x2_val = Input(0).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
dX1data = Output(1)->template mutable_data<T, Context>();
dX2data = Output(0)->template mutable_data<float, Context>();
kernel::MaximumBGrad<T, Context>(Output(1)->count(),
Xdata, max_val, dYdata, dX1data, ctx());
} else if (Input(1).count() == 1) {
x2_val = Input(1).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
dX1data = Output(0)->template mutable_data<T, Context>();
dX2data = Output(1)->template mutable_data<float, Context>();
kernel::MaximumBGrad<T, Context>(Output(0)->count(),
Xdata, max_val, dYdata, dX1data, ctx());
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
// we simply zero the grad of scalar
math::Set<float, Context>(1, 0, dX2data, ctx());
}
template <class Context>
void MaximumGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
if (Input(0).dims() == Input(1).dims()) {
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(MaximumGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MaximumGradient);
#endif
OPERATOR_SCHEMA(MaximumGradient).NumInputs(3).NumOutputs(2);
class GetMaximumGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetMaximumGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)});
}
};
REGISTER_GRADIENT(Maximum, GetMaximumGradient);
} // namespace dragon
\ No newline at end of file
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/arithmetic/minimum_op.h"
namespace dragon {
template <class Context> template <typename T>
void MinimumOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::MinimumE<T, Context>(Output(0)->count(),
X1data, X2data, Ydata, ctx());
}
template <class Context> template <typename T>
void MinimumOp<Context>::BroadcastRunWithType() {
T min_val; float x2_val; const T* Xdata; T* Ydata;
if (Input(0).count() == 1) {
Output(0)->ReshapeLike(Input(1));
x2_val = Input(0).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else if (Input(1).count() == 1) {
Output(0)->ReshapeLike(Input(0));
x2_val = Input(1).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
kernel::MinimumB<T, Context>(Output(0)->count(),
Xdata, min_val, Ydata, ctx());
}
template <class Context>
void MinimumOp<Context>::RunOnDevice() {
if (Input(0).dims() == Input(1).dims()) {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(Minimum);
#ifdef WITH_CUDA
DEPLOY_CUDA(Minimum);
#endif
OPERATOR_SCHEMA(Minimum).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void MinimumGradientOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dX1data = Output(0)->template mutable_data<T, Context>();
auto* dX2data = Output(1)->template mutable_data<T, Context>();
kernel::MinimumEGrad<T, Context>(Output(0)->count(),
X1data, X2data, dYdata, dX1data, dX2data, ctx());
}
template <class Context> template <typename T>
void MinimumGradientOp<Context>::BroadcastRunWithType() {
T min_val; float x2_val;
const T* Xdata; T* dX1data; float* dX2data;
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
x2_val = Input(0).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
dX1data = Output(1)->template mutable_data<T, Context>();
dX2data = Output(0)->template mutable_data<float, Context>();
kernel::MinimumBGrad<T, Context>(Output(1)->count(),
Xdata, min_val, dYdata, dX1data, ctx());
} else if (Input(1).count() == 1) {
x2_val = Input(1).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
dX1data = Output(0)->template mutable_data<T, Context>();
dX2data = Output(1)->template mutable_data<float, Context>();
kernel::MinimumBGrad<T, Context>(Output(0)->count(),
Xdata, min_val, dYdata, dX1data, ctx());
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
// we simply zero the grad of scalar
math::Set<float, Context>(1, 0, dX2data, ctx());
}
template <class Context>
void MinimumGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
if (Input(0).dims() == Input(1).dims()) {
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(MinimumGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MinimumGradient);
#endif
OPERATOR_SCHEMA(MinimumGradient).NumInputs(3).NumOutputs(2);
class GetMinimumGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetMinimumGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)});
}
};
REGISTER_GRADIENT(Minimum, GetMinimumGradient);
} // namespace dragon
\ No newline at end of file
......@@ -134,6 +134,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
if (Output(0)->name() != "ignore") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
CHECK(dy != dx1) << "\nCan't set inplace if X2 was broadcast.";
if (type == 0 || type == 1) {
DECLARE_MULTIPLIER(multiplier, outer_dim);
math::Gemm<T, Context>(
......@@ -173,7 +174,9 @@ DEPLOY_CPU(MulGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MulGradient);
#endif
OPERATOR_SCHEMA(MulGradient).NumInputs(3).NumOutputs(2);
OPERATOR_SCHEMA(MulGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetMulGradient : public GradientMakerBase {
public:
......
......@@ -15,6 +15,7 @@ void PowOp<Context>::RunWithType() {
dragon_cast<T, float>(value), Ydata, ctx());
return;
}
auto* Xdata = Input(0).template data<T, Context>();
ctx()->template Copy<T, Context, Context>(count, Ydata, Xdata);
if (scale != 1.f) math::Scal<T, Context>(count, scale, Ydata, ctx());
......
......@@ -150,7 +150,9 @@ DEPLOY_CPU(SubGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(SubGradient);
#endif
OPERATOR_SCHEMA(SubGradient).NumInputs(1).NumOutputs(2);
OPERATOR_SCHEMA(SubGradient)
.NumInputs(1).NumOutputs(2)
.Inplace({ { 0, 0 } });
class GetSubGradient : public GradientMakerBase {
public:
......
......@@ -7,8 +7,6 @@
#include "operators/control_flow/scan_op.h"
#include "operators/ndarray/slice_op.h"
#define str dragon_cast<string, int>
namespace dragon {
template <class Context>
......@@ -29,14 +27,14 @@ void ScanOp<Context>::InitTemplate() {
for (int i = 0; i < nseqs; i++) {
OperatorDef* op = template_def.add_op();
op->CopyFrom(slice_def);
op->set_name(name() + "(BodyOp." + str(i) + ")");
op->set_name(name() + "(BodyOp." + std::to_string(i) + ")");
op->add_input(Input(i).name());
terms[Input(i).name()] = Input(i).name() + "@1";
}
for (int i = 0; i < nrepeats; i++) {
OperatorDef* op = template_def.add_op();
op->CopyFrom(func_def.op(i));
op->set_name(name() + "(BodyOp." + str(i + nseqs) + ")@1");
op->set_name(name() + "(BodyOp." + std::to_string(i + nseqs) + ")@1");
// replace inputs term
for (int j = 0; j < op->input_size(); j++) {
string* input = op->mutable_input(j);
......@@ -61,8 +59,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) {
string prev, now;
// update sequences term
for (int i = 0; i < nseqs; i++) {
prev = Input(i).name() + "@" + str(cur_step - 1);
now = Input(i).name() + "@" + str(cur_step);
prev = Input(i).name() + "@" + std::to_string(cur_step - 1);
now = Input(i).name() + "@" + std::to_string(cur_step);
terms[prev] = now;
}
if (cur_step < 3) return;
......@@ -70,8 +68,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) {
// only support the latest one-step (as Theano's done)
for (int i = 0; i < nout; i++) {
if (default_outputs[i].empty()) continue;
prev = Output(i)->name() + "@" + str(cur_step - 2);
now = Output(i)->name() + "@" + str(cur_step - 1);
prev = Output(i)->name() + "@" + std::to_string(cur_step - 2);
now = Output(i)->name() + "@" + std::to_string(cur_step - 1);
terms[prev] = now;
}
}
......@@ -90,7 +88,7 @@ void ScanOp<Context>::UnrollTemplate() {
if (graphs.count(nsteps)) return;
new_def.CopyFrom(template_def);
new_def.set_name(name() + "(ScanLen." + str(nsteps) + ")");
new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
Argument phase; phase.set_name("phase");
phase.set_s(this->phase()); new_def.add_arg()->CopyFrom(phase);
for (int idx = 0; idx < nseqs; idx++) {
......@@ -100,7 +98,7 @@ void ScanOp<Context>::UnrollTemplate() {
op->mutable_arg(1)->set_i(nslices);
// add slices as outputs
for (int t = 1; t <= nslices; t++) {
string slice = op->input(0) + "@" + str(t);
string slice = op->input(0) + "@" + std::to_string(t);
op->add_output(slice);
}
}
......@@ -111,7 +109,8 @@ void ScanOp<Context>::UnrollTemplate() {
for (int idx = copy_l; idx < copy_r; idx++) {
OperatorDef* op = new_def.add_op();
op->CopyFrom(new_def.op(idx));
op->set_name(SplitString(op->name(), "@")[0] + "@" + str(t));
op->set_name(str::split(op->name(), "@")[0]
+ "@" + std::to_string(t));
// replace inputs
for (int j = 0; j < op->input_size(); j++) {
string* input = op->mutable_input(j);
......@@ -120,18 +119,19 @@ void ScanOp<Context>::UnrollTemplate() {
// replace outputs
for (int j = 0; j < op->output_size(); j++) {
string* output = op->mutable_output(j);
terms[*output] = SplitString(*output, "@")[0] + "@" + str(t);
terms[*output] = str::split(*output, "@")[0]
+ "@" + std::to_string(t);
*output = terms[*output];
}
}
}
for (int i = 0; i < nout; i++) {
// solve the last step only
new_def.add_target(func_def.target(i) + "@" + str(nsteps));
new_def.add_target(func_def.target(i) + "@" + std::to_string(nsteps));
// concat all steps if necessary
if (Output(i)->name() == "ignore") continue;
OperatorDef* op = new_def.add_op();
op->set_name(name() + "(BodyOp." + str(nseqs + nrepeats + i) + ")");
op->set_name(name() + "(BodyOp." + std::to_string(nseqs + nrepeats + i) + ")");
op->set_type("Concat");
Argument arg_axis, arg_nin;
arg_axis.set_name("axis"); arg_axis.set_i(axis);
......@@ -139,7 +139,7 @@ void ScanOp<Context>::UnrollTemplate() {
op->add_arg()->CopyFrom(arg_axis);
op->add_arg()->CopyFrom(arg_nin);
for (int t = 1; t <= nsteps; t++)
op->add_input(Output(i)->name() + "@" + str(t));
op->add_input(Output(i)->name() + "@" + std::to_string(t));
op->add_output(Output(i)->name());
// solve all the all steps
new_def.add_target(Output(i)->name());
......@@ -195,7 +195,7 @@ void ScanGradientOp<Context>::MakeOps(const GraphDef& forward_def,
maker.Make(forward_def, targets, new_def);
// post-process
new_def.set_name(name() + "(ScanLen." + str(nsteps) + ")");
new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
for (auto& target : targets) {
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue;
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/loss/nll_loss_op.h"
namespace dragon {
template <class Context> template <typename Tx, typename Ty>
void NLLLossOp<Context>::RunWithType() {
auto* LPdata = Input(0).template data<Tx, Context>();
auto* Tdata = Input(1).template data<Ty, Context>();
auto* Idata = !ignores.count() ? nullptr :
ignores.template data<int, Context>();
auto* Ldata = losses.template mutable_data<float, Context>();
auto* Fdata = flags.template mutable_data<float, Context>();
kernel::NLLLoss<Tx, Ty, Context>(
outer_dim, Input(0).dim(axis), inner_dim,
LPdata, Tdata, Idata, ignores.count(),
Ldata, Fdata, ctx());
if (normalization == "UNIT") {
vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx());
return;
}
float normalizer = 1;
if (normalization == "VALID") {
normalizer = std::max(
math::ASum<float, Context>(
flags.count(), Fdata), 1.f);
} else if (normalization == "BATCH_SIZE") {
normalizer = Input(0).dim(0);
} else if (normalization == "FULL") {
normalizer = outer_dim * inner_dim;
}
float loss = math::ASum<float, Context>(losses.count(), Ldata);
Output(0)->Reshape({ 1 });
auto* Ydata = Output(0)->template mutable_data<float, Context>();
math::Set<float, Context>(1, loss / normalizer, Ydata, ctx());
}
template <class Context>
void NLLLossOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, Input(1).count())
<< "\nNumber of predictions must match the number of labels.";
losses.Reshape({ outer_dim * inner_dim });
flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float)) {
if (XIsType(Input(1), float)) RunWithType<float, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else if (XIsType(Input(0), float16)) {
if (XIsType(Input(1), float)) RunWithType<float16, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float16, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(NLLLoss);
#ifdef WITH_CUDA
DEPLOY_CUDA(NLLLoss);
#endif
OPERATOR_SCHEMA(NLLLoss).NumInputs(2).NumOutputs(1);
template <class Context> template <typename Tx, typename Ty>
void NLLLossGradientOp<Context>::RunWithType() {
auto* LPdata = Input(0).template data<Tx, Context>();
auto* Tdata = Input(1).template data<Ty, Context>();
auto* Idata = !ignores.count() ? nullptr :
ignores.template data<int, Context>();
auto* dXdata = Output(0)->template mutable_data<Tx, Context>();
auto* Fdata = flags.template mutable_data<float, Context>();
math::Set<Tx, Context>(Output(0)->count(),
dragon_cast<Tx, float>(0.) , dXdata, ctx());
kernel::NLLLossGrad<Tx, Ty, Context>(
outer_dim, Output(0)->dim(axis), inner_dim,
LPdata, Tdata, Idata, ignores.count(),
dXdata, Fdata, ctx());
if (normalization == "UNIT") {
auto* dYdata = Input(-1).template data<float, Context>();
vector<void*> WSdata = ws()->template caches<Context>(
{ Input(0).count() * sizeof(float),
Input(0).count() * sizeof(Tx) });
kernel::SumGrad<float, Context>(
Input(0).count() / Input(0).dim(axis),
Input(0).dim(axis), inner_dim,
1.0, dYdata, (float*)WSdata[0], ctx());
kernel::TypeA2B<float, Tx, Context>(Input(0).count(),
(const float*)WSdata[0], (Tx*)WSdata[1], ctx());
math::Mul<Tx, Context>(Output(0)->count(),
(Tx*)WSdata[1], dXdata, dXdata, ctx());
return;
}
float normalizer = 1;
if (normalization == "VALID") {
normalizer = std::max(
math::ASum<float, Context>(
flags.count(), Fdata), 1.f);
} else if (normalization == "BATCH_SIZE") {
normalizer = Input(0).dim(0);
} else if (normalization == "FULL") {
normalizer = outer_dim * inner_dim;
}
auto* dYdata = Input(-1).template data<float, Context>();
float dYdata_host; ctx()->template Copy<float, CPUContext, Context>(
1, &dYdata_host, dYdata);
math::Scal<Tx, Context>(Output(0)->count(),
dYdata_host / normalizer, dXdata, ctx());
}
template <class Context>
void NLLLossGradientOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0));
flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float)) {
if (XIsType(Input(1), float)) RunWithType<float, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else if (XIsType(Input(0), float16)) {
if (XIsType(Input(1), float)) RunWithType<float16, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float16, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(NLLLossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(NLLLossGradient);
#endif
OPERATOR_SCHEMA(NLLLossGradient).NumInputs(3).NumOutputs(1);
class GetNLLLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetNLLLossGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(NLLLoss, GetNLLLossGradient);
} // namespace dragon
\ No newline at end of file
......@@ -18,7 +18,9 @@ void SigmoidFocalLossOp<Context>::RunWithType() {
Xdata, Tdata, Ldata, Fdata, ctx());
if (normalization == "UNIT") {
Output(0)->ReshapeLike(losses);
vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx());
return;
}
......
......@@ -43,9 +43,9 @@ template <class Context>
void SmoothL1LossOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
CHECK(Input(0).dims() == Input(1).dims());
if (InputSize() > 2) CHECK(Input(0).dims() == Input(2).dims());
if (InputSize() > 3) CHECK(Input(0).dims() == Input(3).dims());
CHECK(Input(0).count() == Input(1).count());
if (InputSize() > 2) CHECK(Input(0).count() == Input(2).count());
if (InputSize() > 3) CHECK(Input(0).count() == Input(3).count());
Output(0)->Reshape({ 1 });
diff = ws()->CreateTensor("/mnt/" + anchor() + "/smoothl1_loss/diff");
......
......@@ -36,7 +36,9 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
Ldata, Fdata, ctx());
if (normalization == "UNIT") {
Output(0)->ReshapeLike(losses);
vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx());
return;
}
......
......@@ -28,6 +28,36 @@ void InitializeOp<Context>::RunOnDevice() {
RunWithType<float>();
}
template <class Context> template <typename T>
void FillOp<Context>::RunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(Output(0)->count(),
dragon_cast<T, float>(value), Ydata, ctx());
}
template <class Context>
void FillOp<Context>::RunOnDevice() {
vector<TIndex> output_shape;
if (shape_desc.empty()) {
// determine the shape from dimensions
int ndims = (int)std::max(dims_value.size(), dims_desc.size());
for (int i = 0; i < ndims; i++) output_shape.push_back(dims(i));
} else {
// determine the shape from given shape
Tensor* shape = ws()->GetTensor(shape_desc);
CHECK(shape->IsType<int>()) << "\nThe type of shape should be int32.";
auto* shape_data = shape->template data<int, CPUContext>();
for (int i = 0; i < shape->count(); i++) output_shape.push_back(shape_data[i]);
}
Output(0)->Reshape(output_shape);
if (dtype == "float32") RunWithType<float>();
else if (dtype == "float32") RunWithType<float16>();
else if (dtype == "int32") RunWithType<int>();
else if (dtype == "int64") RunWithType<int64_t>();
else LOG(FATAL) << DTypeHelper(dtype,
{ "float32", "float16", "int32", "int64" });
}
// constant
DEPLOY_CPU(Fill);
#ifdef WITH_CUDA
......
......@@ -3,14 +3,6 @@
namespace dragon {
string DimString(const vector<TIndex>& shape) {
std::stringstream ss;
ss << "(";
for (int i = 0; i < shape.size() - 1; i++) ss << shape[i] << ",";
ss << shape[shape.size() - 1] << ")";
return ss.str();
}
template <class Context>
void ReshapeOp<Context>::RunOnDevice() {
if (shape_desc.size() > 0 || shape_value.size() > 0) {
......@@ -56,7 +48,7 @@ void ReshapeOp<Context>::RunOnDevice() {
CHECK_EQ(Input(0).count() % total_count, 0)
<< "\nCan not change the total size: "
<< Input(0).DimString()
<< " -> " << DimString(new_shape);
<< " -> " << Tensor::DimString(new_shape);
new_shape[i] = Input(0).count() / total_count;
total_count *= new_shape[i];
break;
......@@ -66,7 +58,7 @@ void ReshapeOp<Context>::RunOnDevice() {
CHECK_EQ(total_count, Input(0).count())
<< "\nCan not change the total size."
<< Input(0).DimString()
<< " -> " << DimString(new_shape);
<< " -> " << Tensor::DimString(new_shape);
Output(0)->Reshape(new_shape);
Output(0)->SetMeta(Input(0).meta());
Output(0)->Share(Input(0).memory());
......
......@@ -123,14 +123,9 @@ template <class Context>
void CuDNNBatchNormOp<Context>::RunOnDevice() {
Setup();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
}
REGISTER_CUDNN_OPERATOR(
......@@ -317,7 +312,6 @@ template <class Context>
void CuDNNBatchNormGradientOp<Context>::RunOnDevice() {
Setup();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) {
if (this->use_global_stats) InferenceRunWithType<float>();
else TrainingRunWithType<float>();
......@@ -327,12 +321,6 @@ void CuDNNBatchNormGradientOp<Context>::RunOnDevice() {
LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else TrainingRunWithType<float16>();
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) {
if (this->use_global_stats) InferenceRunWithType<float>();
else TrainingRunWithType<float>();
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
}
REGISTER_CUDNN_OPERATOR(
......
......@@ -25,8 +25,8 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
CUDNN_CHECK(cudnnDropoutGetStatesSize(
ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor("/share/cudnn/dropout:" +
dragon_cast<string, unsigned long long>(random_seed) + "/states");
Tensor* states = ws()->CreateTensor("/share/cudnn/dropout:"
+ std::to_string(random_seed) + "/states");
if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
......@@ -160,9 +160,7 @@ void CuDNNRecurrentOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -240,9 +238,7 @@ void CuDNNRecurrentGradientOp<Context>::RunOnDevice() {
Output(3)->ReshapeLike(Input(3)); // dCx
if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -31,8 +31,9 @@ void CollectiveUpdateOp<Context>::InitNCCL() {
ncclUniqueId id;
if (comm_rank == comm_root) NCCL_CHECK(ncclGetUniqueId(&id));
MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, comm_root, comm);
NCCL_CHECK(ncclCommInitRank(&nccl_comm, comm_size, id, comm_rank));
ctx()->SwitchToDevice();
closure = CUDAClosure<Context>(ctx());
NCCL_CHECK(ncclCommInitRank(&nccl_comm, comm_size, id, comm_rank));
#else
LOG(FATAL) << "NCCL was not compiled.";
#endif
......
......@@ -29,6 +29,7 @@ void BiasAddOp<Context>::RunOnDevice() {
dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format;
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
......@@ -38,7 +39,9 @@ DEPLOY_CPU(BiasAdd);
#ifdef WITH_CUDA
DEPLOY_CUDA(BiasAdd);
#endif
OPERATOR_SCHEMA(BiasAdd).NumInputs(2).NumOutputs(1).Inplace({ { 0, 0 } });
OPERATOR_SCHEMA(BiasAdd)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T>
void BiasAddGradientOp<Context>::RunWithType() {
......@@ -62,6 +65,12 @@ void BiasAddGradientOp<Context>::RunWithType() {
dYdata += y_offset;
}
}
if (Output(0)->name() != "ignore" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
}
}
template <class Context>
......@@ -85,7 +94,9 @@ DEPLOY_CPU(BiasAddGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(BiasAddGradient);
#endif
OPERATOR_SCHEMA(BiasAddGradient).NumInputs(3).NumOutputs(2);
OPERATOR_SCHEMA(BiasAddGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetBiasAddGradient final : public GradientMakerBase {
public:
......
......@@ -13,12 +13,12 @@ void Conv2dOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
for (int n = 0; n < Input(0).dim(0); n++) {
for (int n = 0; n < Input(0).dim(0); n++)
Wx(Xdata + n * x_offset, Wdata, Ydata + n * y_offset);
if (HasBias()) {
auto* Bdata = Input(2).template data<T, Context>();
Pb(Bdata, Ydata + n * y_offset);
}
Pb(Bdata, Ydata);
}
}
......
......@@ -94,19 +94,9 @@ void ConvOpBase<Context>::Wx(
template <class Context> template <typename T>
void ConvOpBase<Context>::Pb(const T* bias, T* y) {
DECLARE_MULTIPLIER(multiplier, out_spatial_dim);
if (data_format == "NCHW") {
math::Gemm<T, Context>(
CblasNoTrans, CblasNoTrans,
num_output, out_spatial_dim, 1,
1.0, bias, multiplier,
1.0, y, ctx());
} else if (data_format == "NHWC") {
math::Gemm<T, Context>(
CblasNoTrans, CblasNoTrans,
out_spatial_dim, num_output, 1,
1.0, multiplier, bias,
1.0, y, ctx());
}
kernel::BiasAdd<T, Context>(Output(0)->count(),
Input(0).dim(0), num_output, out_spatial_dim,
data_format, bias, multiplier, y, ctx());
}
template <class Context> template <typename T>
......@@ -117,8 +107,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) {
if (data_format == "NCHW") {
math::Gemm<T, Context>(
CblasTrans, CblasNoTrans,
kernel_dim,
conv_out_spatial_dim,
kernel_dim, conv_out_spatial_dim,
conv_out_channels / group,
1.0, weights + weight_offset * g,
dy + output_offset * g,
......@@ -126,8 +115,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) {
} else if (data_format == "NHWC") {
math::Gemm<T, Context>(
CblasNoTrans, CblasTrans,
conv_out_spatial_dim,
kernel_dim,
conv_out_spatial_dim, kernel_dim,
conv_out_channels / group,
1.0, dy + output_offset * g,
weights + weight_offset * g,
......
#ifdef WITH_CUDNN
#include "core/workspace.h"
#include "utils/filler.h"
#include "operators/vision/bias_add_op.h"
namespace dragon {
template <class Context> template <typename T>
void CuDNNBiasAddOp<Context>::RunWithType() {
TENSOR_FILL(Input(1), vector<TIndex>(1, dim));
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, dim, 1, 1 }));
cudnnSetTensor4dDesc<T>(&output_desc, data_format,
vector<TIndex>({ outer_dim, dim, 1, inner_dim }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, dim }));
cudnnSetTensor4dDesc<T>(&output_desc, data_format,
vector<TIndex>({ outer_dim, 1, inner_dim, dim }));
}
auto* Bdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(ctx()->cudnn_handle(),
CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata));
}
template <class Context>
void CuDNNBiasAddOp<Context>::RunOnDevice() {
if (data_format == "NCHW") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(1);
inner_dim = Input(0).count(2);
} else if (data_format == "NHWC") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format;
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CUDNN(BiasAdd);
template <class Context> template <typename T>
void CuDNNBiasAddGradientOp<Context>::RunWithType() {
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(&input_desc, data_format,
vector<TIndex>({ outer_dim, dim, 1, inner_dim }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, dim, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(&input_desc, data_format,
vector<TIndex>({ outer_dim, 1, inner_dim, dim }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, dim }));
}
auto* dYdata = Input(-1).template data<T, Context>();
T* dBdata = Output(1)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(ctx()->cudnn_handle(),
CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata));
if (Output(0)->name() != "ignore" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
}
}
template <class Context>
void CuDNNBiasAddGradientOp<Context>::RunOnDevice() {
if (data_format == "NCHW") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(1);
inner_dim = Input(0).count(2);
} else if (data_format == "NHWC") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format;
Output(1)->ReshapeLike(Input(1));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CUDNN(BiasAddGradient);
} // namespace dragon
#endif // WITH_CUDNN
\ No newline at end of file
......@@ -32,15 +32,12 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
// determine the bias shape
if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, bias_offset, 1, 1 }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, num_output, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, bias_offset }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, num_output }));
}
}
......@@ -86,12 +83,13 @@ void CuDNNConv2dOp<Context>::RunWithType() {
filter_desc, Wdata + weight_offset * g,
conv_desc, fwd_algo, WSdata, fwd_data_size,
CUDNNType<T>::zero, output_desc, Ydata + y_offset * g));
}
if (HasBias()) {
auto* bias = Input(2).template data<T, Context>();
auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(cudnn_handle,
CUDNNType<T>::one, bias_desc, bias + bias_offset * g,
CUDNNType<T>::one, output_desc, Ydata + y_offset * g));
}
CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata));
}
}
......@@ -128,7 +126,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
#endif
RunWithType<float>();
} else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
......@@ -150,7 +147,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH));
#endif
RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -179,15 +175,12 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
// determine the bias shape
if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, bias_offset, 1, 1 }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, num_output, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(&
bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, bias_offset }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, num_output }));
}
}
......@@ -234,13 +227,14 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle();
for (int g = 0; g < cudnn_group; g++) {
if (Output(2)->name() != "ignore") {
T* dBdata = Output(2)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g));
CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata));
}
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx());
......@@ -295,7 +289,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
#endif
RunWithType<float>();
} else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
......@@ -317,7 +310,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH));
#endif
RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -32,15 +32,12 @@ void CuDNNConv2dTransposeOp<Context>::ResetDesc() {
// determine the bias shape
if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, bias_offset, 1, 1 }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, num_output, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, bias_offset }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, num_output }));
}
}
......@@ -86,12 +83,13 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() {
input_desc, Xdata + x_offset * g,
conv_desc, fwd_algo, WSdata, fwd_data_size,
CUDNNType<T>::zero, output_desc, Ydata + y_offset * g));
}
if (HasBias()) {
auto* bias = Input(2).template data<T, Context>();
auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(cudnn_handle,
CUDNNType<T>::one, bias_desc, bias + bias_offset * g,
CUDNNType<T>::one, output_desc, Ydata + y_offset * g));
}
CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata));
}
}
......@@ -128,7 +126,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
#endif
RunWithType<float>();
} else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
......@@ -150,7 +147,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH));
#endif
RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......@@ -179,15 +175,12 @@ void CuDNNConv2dTransposeGradientOp<Context>::ResetDesc() {
// determine the bias shape
if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, bias_offset, 1, 1 }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, num_output, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(
&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, bias_offset }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, num_output }));
}
}
......@@ -234,13 +227,14 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle();
for (int g = 0; g < cudnn_group; g++) {
if (Output(2)->name() != "ignore") {
T* dBdata = Output(2)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g));
CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata));
}
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx());
......@@ -295,7 +289,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
#endif
RunWithType<float>();
} else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
......@@ -317,7 +310,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH));
#endif
RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
......
......@@ -26,14 +26,9 @@ void CuDNNLRNOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (this->mode == "ACROSS_CHANNELS") {
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} else if (this->mode == "WITHIN_CHANNEL") {
LRNOp<Context>::RunOnDevice();
} else {
......@@ -69,14 +64,9 @@ void CuDNNLRNGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (this->mode == "ACROSS_CHANNELS") {
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} else if (this->mode == "WITHIN_CHANNEL") {
LRNGradientOp<Context>::RunOnDevice();
} else {
......
......@@ -34,14 +34,9 @@ template <class Context>
void CuDNNPooling2dOp<Context>::RunOnDevice() {
Pooling2dOp<Context>::Reshape();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
}
DEPLOY_CUDNN(Pooling2d);
......@@ -79,14 +74,9 @@ template <class Context>
void CuDNNPooling2dGradientOp<Context>::RunOnDevice() {
Pooling2dGradientOp<Context>::Reshape();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
}
DEPLOY_CUDNN(Pooling2dGradient);
......
......@@ -51,7 +51,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() {
}
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx));
Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + std::to_string(idx));
Input(0).Move(buffer->memory());
head_data[idx] = Input(0).name();
if (XIsType(Input(-2), float)) RestoreX1<float>();
......@@ -86,7 +87,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() {
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirror stage ?";
int idx = safe_heads.front();
safe_heads.pop();
Tensor* buffer = ws()->GetTensor("/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx));
Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + std::to_string(idx));
Output(i)->Move(buffer->memory());
head_data[idx] = Output(i)->name();
}
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/vision/drop_block_op.h"
namespace dragon {
template <class Context> template <typename T>
void DropBlock2dOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
if (phase() == "TEST") {
if (Output(0) != &Input(0)) {
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), Ydata, Xdata);
}
} else if (phase() == "TRAIN") {
auto* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/drop_block/mask");
auto* norm = ws()->CreateTensor(
"/mnt/" + anchor() + "/drop_block/norm");
mask->ReshapeLike(Input(0));
norm->Reshape(vector<TIndex>({ 1 }));
auto WSdata = ws()->template caches<Context>({
n * c * seed_h * seed_w * sizeof(uint32_t),
mask->count() * sizeof(int),
mask->count() * sizeof(float)});
auto* Mdata = mask->template mutable_data<uint8_t, Context>();
auto* Ndata = norm->template mutable_data<float, CPUContext>();
// fill the mask with ones
math::Set<int, Context>(mask->count(),
1, (int*)WSdata[1], ctx());
// generate 2d mask from seed region
kernel::DropBlock2d<Context>(n, c, h, w,
seed_h, seed_w, block_size, gamma, data_format,
(uint32_t*)WSdata[0], (int*)WSdata[1], ctx());
// convert to float mask for counting
kernel::TypeA2B<int, float, Context>(mask->count(),
(int*)WSdata[1], (float*)WSdata[2], ctx());
// convert to uint8 mask for applying
kernel::TypeA2B<int, uint8_t, Context>(mask->count(),
(int*)WSdata[1], Mdata, ctx());
// count && apply
float normalizer = math::ASum<float, Context>(
mask->count(), (float*)WSdata[2]);
normalizer = std::max(normalizer, 1.f);
Ndata[0] = normalizer = mask->count() / normalizer;
kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
normalizer, Xdata, Mdata, Ydata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
template <class Context>
void DropBlock2dOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
if (data_format == "NCHW") {
n = Input(0).dim(0), c = Input(0).dim(1);
h = Input(0).dim(2), w = Input(0).dim(3);
} else if (data_format == "NHWC") {
n = Input(0).dim(0), c = Input(0).dim(-1);
h = Input(0).dim(1), w = Input(0).dim(2);
}
seed_h = h - block_size + 1;
seed_w = w - block_size + 1;
CHECK(seed_h > 0 && seed_w > 0)
<< "\nExcepted block_size <= feat_size.";
Output(0)->ReshapeLike(Input(0));
if (decrement > 0 && apply_prob > keep_prob()) {
apply_prob -= decrement;
} else { apply_prob = keep_prob(); }
gamma = (1.f - apply_prob) / (block_size * block_size);
gamma *= (alpha * (h * w) / (seed_h * seed_w));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(DropBlock2d);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropBlock2d);
#endif
OPERATOR_SCHEMA(DropBlock2d)
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T>
void DropBlock2dGradientOp<Context>::RunWithType() {
auto* mask = ws()->GetTensor(
"/mnt/" + anchor() + "/drop_block/mask");
auto* norm = ws()->GetTensor(
"/mnt/" + anchor() + "/drop_block/norm");
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<uint8_t, Context>();
auto* Ndata = norm->template mutable_data<float, CPUContext>();
if (phase() == "TEST") { NOT_IMPLEMENTED; }
else if (phase() == "TRAIN") {
kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
Ndata[0], dYdata, Mdata, dXdata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
template <class Context>
void DropBlock2dGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(DropBlock2dGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropBlock2dGradient);
#endif
OPERATOR_SCHEMA(DropBlock2dGradient)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
class GetDropBlock2dGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetDropBlock2dGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {O(0), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(DropBlock2d, GetDropBlock2dGradient);
} // namepsace dragon
\ No newline at end of file
......@@ -35,7 +35,11 @@ message Argument {
repeated string strings=7;
}
enum DeviceType { CPU = 0; CUDA = 1; OPENCL = 2; }
enum DeviceType {
CPU = 0;
CUDA = 1;
CNML = 2;
}
message DeviceOption {
optional DeviceType device_type = 1 [default = CPU];
......
......@@ -20,8 +20,6 @@ static_cast<void *>(&CUDNNType<double>::oneval);
const void* CUDNNType<double>::zero =
static_cast<void *>(&CUDNNType<double>::zeroval);
#ifdef WITH_CUDA_FP16
float CUDNNType<float16>::oneval = 1.0;
float CUDNNType<float16>::zeroval = 0.0;
const void* CUDNNType<float16>::one =
......@@ -29,8 +27,6 @@ static_cast<void*>(&CUDNNType<float16>::oneval);
const void* CUDNNType<float16>::zero =
static_cast<void*>(&CUDNNType<float16>::zeroval);
#endif
template <typename T>
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
......@@ -173,8 +169,7 @@ void cudnnSetTensor4dDesc(
<< "\nThe num of dimensions of Tensor("
<< tensor->name() << ") "
<< "should be 4, but got " << tensor->ndim() << ".";
cudnnSetTensor4dDesc<T>(
desc, data_format, tensor->dims());
cudnnSetTensor4dDesc<T>(desc, data_format, tensor->dims());
}
template <typename T>
......@@ -186,8 +181,7 @@ void cudnnSetTensor5dDesc(
<< "\nThe num of dimensions of Tensor("
<< tensor->name() << ") "
<< "should be 5, but got " << tensor->ndim() << ".";
cudnnSetTensor5dDesc<T>(
desc, data_format, tensor->dims());
cudnnSetTensor5dDesc<T>(desc, data_format, tensor->dims());
}
template <typename T>
......@@ -300,9 +294,6 @@ template void cudnnSetTensorDesc<double>(
const vector<TIndex>&,
const vector<TIndex>&);
#ifdef WITH_CUDA_FP16
template void cudnnSetTensorDesc<float16>(
cudnnTensorDescriptor_t*,
Tensor*);
......@@ -352,8 +343,6 @@ template void cudnnSetTensorDesc<float16>(
const vector<TIndex>&,
const vector<TIndex>&);
#endif // WITH_CUDA_FP16
} // namespace dragon
#endif // WITH_CUDNN
\ No newline at end of file
......@@ -35,8 +35,7 @@ LogSeverity StrToLogSeverity(std::string level) {
}
std::string GenLogHashKey(const char* file, int line) {
return std::string(file) +
dragon_cast<std::string, int>(line);
return std::string(file) + std::to_string(line);
}
int EveryNRegister(
......
......@@ -49,6 +49,21 @@ template <> void Set<int, CPUContext>(
#endif // WITH_SSE
}
template <> void Set<int64_t, CPUContext>(
const int n,
const int64_t alpha,
int64_t* x,
CPUContext* ctx) {
if (alpha == 0) {
memset(x, 0, sizeof(int64_t) * n);
return;
}
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif
for (int i = 0; i < n; ++i) x[i] = alpha;
}
template <> void Set<float16, CPUContext>(
const int n,
const float16 alpha,
......@@ -148,19 +163,36 @@ template <> void RandomTruncatedNormal<float16, CPUContext>(
NOT_IMPLEMENTED;
}
template <> void RandomBernoulli<float, CPUContext>(
template <typename T>
void _RandomBernoulli(
const int n,
const float p,
uint32_t* x,
T* x,
CPUContext* ctx) {
std::bernoulli_distribution distribution(p);
auto* rng = ctx->rand_generator();
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif
for (int i = 0; i < n; ++i) x[i] = distribution(*rng);
}
template <> void RandomBernoulli<uint8_t, CPUContext>(
const int n,
const float p,
uint8_t* x,
CPUContext* ctx) {
_RandomBernoulli<uint8_t>(n, p, x, ctx);
}
template <> void RandomBernoulli<uint32_t, CPUContext>(
const int n,
const float p,
uint32_t* x,
CPUContext* ctx) {
_RandomBernoulli<uint32_t>(n, p, x, ctx);
}
/******************** Level-1 ********************/
template <> void Add<float, CPUContext>(
......@@ -311,6 +343,14 @@ template <> void Log<float, CPUContext>(
for (int i = 0; i < n; ++i) y[i] = std::log(x[i]);
}
template <> void Log<float16, CPUContext>(
int n,
const float16* x,
float16* y,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <> void Square<float, CPUContext>(
int n,
const float* x,
......@@ -379,7 +419,7 @@ template <> void Inv<float, CPUContext>(
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif
for (int i = 0; i < n; ++i) y[i] = numerator / y[i];
for (int i = 0; i < n; ++i) y[i] = numerator / x[i];
}
template <> void Inv<float16, CPUContext>(
......
......@@ -53,6 +53,22 @@ template <> void Set<int, CUDAContext>(
}
}
template <> void Set<int64_t, CUDAContext>(
const int n,
const int64_t alpha,
int64_t* x,
CUDAContext* ctx) {
if (alpha == 0) {
CUDA_CHECK(cudaMemsetAsync(x, 0,
sizeof(int64_t) * n, ctx->cuda_stream()));
}
else {
_Set<int64_t>
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n, alpha, x);
}
}
template <> void RandomUniform<uint32_t, CUDAContext>(
const int n,
const float low,
......@@ -75,16 +91,6 @@ template <> void RandomNormal<float, CUDAContext>(
CURAND_CHECK(curandGenerateNormal(rng, x, n, mu, sigma));
}
template <> void RandomBernoulli<float, CUDAContext>(
const int n,
const float p,
unsigned int* x,
CUDAContext* ctx) {
// curand could not generate bernoulli distribution
// we recommend implement it within specfic case, e.g. Dropout
NOT_IMPLEMENTED;
}
/******************** Level-1 ********************/
template <typename T>
......
......@@ -28,7 +28,6 @@ template <> void Set<float16, CUDAContext>(
const float16 alpha,
float16* x,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_SetHalf<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -40,12 +39,8 @@ template <> void Set<float16, CUDAContext>(
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n, alpha, x);
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
__global__ void _TypeFloat2Half(
const int n,
const float* a,
......@@ -54,7 +49,6 @@ __global__ void _TypeFloat2Half(
b[idx] = __float2half(a[idx]);
}
}
#endif
template <> void RandomNormal<float16, CUDAContext>(
const int n,
......@@ -62,7 +56,6 @@ template <> void RandomNormal<float16, CUDAContext>(
const float sigma,
float16* x,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
float* xf32 = (float*)CUDAContext::New(n * sizeof(float));
CURAND_CHECK(curandGenerateNormal(
ctx->curand_generator(), xf32, n, mu, sigma));
......@@ -71,14 +64,10 @@ template <> void RandomNormal<float16, CUDAContext>(
0, ctx->cuda_stream() >> >(n,
xf32, reinterpret_cast<half*>(x));
CUDAContext::Delete(xf32);
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** Level-1 ********************/
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _AddHalf(
const int n,
......@@ -104,7 +93,6 @@ __global__ void _AddHalf2(
#endif
}
}
#endif
template <> void Add<float16, CUDAContext>(
int n,
......@@ -112,7 +100,6 @@ template <> void Add<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_AddHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -128,12 +115,8 @@ template <> void Add<float16, CUDAContext>(
reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _SubHalf(
const int n,
......@@ -159,7 +142,6 @@ __global__ void _SubHalf2(
#endif
}
}
#endif
template <> void Sub<float16, CUDAContext>(
int n,
......@@ -167,7 +149,6 @@ template <> void Sub<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_SubHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -183,12 +164,8 @@ template <> void Sub<float16, CUDAContext>(
reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _MulHalf(
const int n,
......@@ -214,7 +191,6 @@ __global__ void _MulHalf2(
#endif
}
}
#endif
template <> void Mul<float16, CUDAContext>(
int n,
......@@ -222,7 +198,6 @@ template <> void Mul<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_MulHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -238,12 +213,8 @@ template <> void Mul<float16, CUDAContext>(
reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _DivHalf(
const int n,
......@@ -256,7 +227,6 @@ __global__ void _DivHalf(
#endif
}
}
#endif
template <> void Div<float16, CUDAContext>(
int n,
......@@ -264,19 +234,59 @@ template <> void Div<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_DivHalf<half>
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n,
reinterpret_cast<const half*>(a),
reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y));
#else
CUDA_FP16_NOT_COMPILED;
}
template <typename T>
__global__ void _LogHalf(
const int n,
const T* a,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, n) {
#if __CUDA_ARCH__ >= 530
y[idx] = hlog(a[idx]);
#endif
}
}
template <typename T>
__global__ void _LogHalf2(
const int n,
const T* a,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, n) {
#if __CUDA_ARCH__ >= 530
y[idx] = h2log(a[idx]);
#endif
}
}
template <> void Log<float16, CUDAContext>(
int n,
const float16* x,
float16* y,
CUDAContext* ctx) {
if ((n & 1) == 0) {
_LogHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n >> 1,
reinterpret_cast<const half2*>(x),
reinterpret_cast<half2*>(y));
}
else {
_LogHalf<half>
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n,
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _SquareHalf(
const int n,
......@@ -300,14 +310,12 @@ __global__ void _SquareHalf2(
#endif
}
}
#endif
template <> void Square<float16, CUDAContext>(
int n,
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_SquareHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -321,12 +329,8 @@ template <> void Square<float16, CUDAContext>(
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _SqrtHalf(
int n,
......@@ -350,14 +354,12 @@ __global__ void _SqrtHalf2(
#endif
}
}
#endif
template <> void Sqrt<float16, CUDAContext>(
int n,
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_SqrtHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -371,12 +373,8 @@ template <> void Sqrt<float16, CUDAContext>(
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _PowHalf(
const int n,
......@@ -402,7 +400,6 @@ __global__ void _PowHalf2(
#endif
}
}
#endif
template <> void Pow<float16, CUDAContext>(
int n,
......@@ -410,7 +407,6 @@ template <> void Pow<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CHECK(alpha == float(2)) << "fp16 only support the power of 2";
if ((n & 1) == 0) {
_PowHalf2<half2>
......@@ -425,12 +421,8 @@ template <> void Pow<float16, CUDAContext>(
alpha, reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _InvHalf(
const int n,
......@@ -456,7 +448,6 @@ __global__ void _InvHalf2(
#endif
}
}
#endif
template <> void Inv<float16, CUDAContext>(
const int n,
......@@ -464,7 +455,6 @@ template <> void Inv<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_InvHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -480,9 +470,6 @@ template <> void Inv<float16, CUDAContext>(
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** Level-2 ********************/
......@@ -492,15 +479,11 @@ template <> void Scal<float16, CUDAContext>(
const float alpha,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasScalEx(
ctx->cublas_handle(), n,
&alpha, CUDA_R_32F,
y, CUDA_R_16F, 1,
CUDA_R_32F));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <> void Scale<float16, CUDAContext>(
......@@ -519,7 +502,6 @@ template <> void Dot<float16, CUDAContext>(
const float16* b,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasDotEx(
ctx->cublas_handle(), n,
a, CUDA_R_16F, 1,
......@@ -527,12 +509,8 @@ template <> void Dot<float16, CUDAContext>(
y, CUDA_R_16F,
CUDA_R_32F));
ctx->FinishDeviceCompution();
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _AddScalarHalf(
const int n,
......@@ -556,14 +534,12 @@ __global__ void _AddScalarHalf2(
#endif
}
}
#endif
template <> void AddScalar<float16, CUDAContext>(
const int n,
const float alpha,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_AddScalarHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -577,12 +553,8 @@ template <> void AddScalar<float16, CUDAContext>(
dragon_cast<half, float>(alpha),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _MulScalarHalf(
const int n,
......@@ -606,14 +578,12 @@ __global__ void _MulScalarHalf2(
#endif
}
}
#endif
template <> void MulScalar<float16, CUDAContext>(
const int n,
const float alpha,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) {
_MulScalarHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
......@@ -627,9 +597,6 @@ template <> void MulScalar<float16, CUDAContext>(
dragon_cast<half, float>(alpha),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <> void Axpy<float16, CUDAContext>(
......@@ -638,16 +605,12 @@ template <> void Axpy<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasAxpyEx(
ctx->cublas_handle(), n,
&alpha, CUDA_R_32F,
x, CUDA_R_16F, 1,
y, CUDA_R_16F, 1,
CUDA_R_32F));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <> void Axpby<float16, CUDAContext>(
......@@ -667,7 +630,6 @@ template <> void RandomUniform<float16, CUDAContext>(
const float high,
float16* x,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
float* xf32 = (float*)ctx->New(n * sizeof(float));
CURAND_CHECK(curandGenerateUniform(
ctx->curand_generator(), xf32, n));
......@@ -679,9 +641,6 @@ template <> void RandomUniform<float16, CUDAContext>(
if (range != 1.f) Scal<float16, CUDAContext>(n, range, x, ctx);
if (low != 0.f) AddScalar<float16, CUDAContext>(n, low, x, ctx);
ctx->Delete(xf32);
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** Level-3 ********************/
......@@ -699,7 +658,6 @@ template <> void Gemm<float16, CUDAContext>(
float16* C,
CUDAContext* ctx,
TensorProto_DataType math_type) {
#ifdef WITH_CUDA_FP16
int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K;
cublasOperation_t cuTransA = (TransA == CblasNoTrans) ?
......@@ -782,9 +740,6 @@ template <> void Gemm<float16, CUDAContext>(
} else {
LOG(FATAL) << "Unsupported math type";
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <> void Gemv<float16, CUDAContext>(
......@@ -798,7 +753,6 @@ template <> void Gemv<float16, CUDAContext>(
float16* y,
CUDAContext* ctx,
TensorProto_DataType math_type) {
#ifdef WITH_CUDA_FP16
cublasOperation_t cuTransA = (TransA == CblasNoTrans) ?
CUBLAS_OP_T : CUBLAS_OP_N;
int m = (cuTransA == CUBLAS_OP_N) ? N : M;
......@@ -881,9 +835,6 @@ template <> void Gemv<float16, CUDAContext>(
} else {
LOG(FATAL) << "Unsupported math type";
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
} // namespace math
......
......@@ -21,30 +21,65 @@ template<> void Dropout<float, CPUContext>(
float prob,
float scale,
const float* x,
uint32_t* mask,
uint32_t* mask32,
uint8_t* mask8,
float* y,
CPUContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob);
math::RandomBernoulli<float, CPUContext>(count, 1 - prob, mask, ctx);
math::RandomBernoulli<uint8_t, CPUContext>(
count, 1 - prob, mask8, ctx);
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) y[i] = x[i] * mask[i] * scale;
for (int i = 0; i < count; ++i) {
y[i] = x[i] * mask8[i] * scale;
}
}
template<> void DropoutGrad<float, CPUContext>(
template<> void Dropout<float16, CPUContext>(
const int count,
float prob,
float scale,
const float* dy,
const uint32_t* mask,
float* dx,
const float16* x,
uint32_t* mask32,
uint8_t* mask8,
float16* y,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <typename Tx, typename Tm>
void _ApplyMask(
const int count,
const float scale,
const Tx* x,
const Tm* mask,
Tx* y) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i)
dx[i] = dy[i] * mask[i] * scale;
for (int i = 0; i < count; ++i) {
y[i] = x[i] * mask[i] * scale;
}
}
template <> void ApplyMask<float, uint8_t, CPUContext>(
const int count,
const float scale,
const float* x,
const uint8_t* mask,
float* y,
CPUContext* ctx) {
_ApplyMask<float, uint8_t>(count, scale, x, mask, y);
}
template <> void ApplyMask<float16, uint8_t, CPUContext>(
const int count,
const float scale,
const float16* x,
const uint8_t* mask,
float16* y,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
/******************** activation.elu ********************/
......@@ -479,19 +514,161 @@ template <> void Clip<float, CPUContext>(
const float low,
const float high,
const float* x,
float* mask,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
mask[i] = 1.0;
if (x[i] < low || x[i] > high) mask[i] = 0.0;
y[i] = std::max(low, std::min(x[i], high));
}
}
template <> void ClipGrad<float, CPUContext>(
const int count,
const float low,
const float high,
const float* x,
const float* dy,
float* dx,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const float xi = x[i];
dx[i] = (xi < low || xi > high) ? 0 : dy[i];
}
}
/******************** arithmetic.maximum ********************/
template <> void MaximumE<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::max(x1[i], x2[i]);
}
}
template <> void MaximumB<float, CPUContext>(
const int count,
const float* x1,
const float x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::max(x1[i], x2);
}
}
template <> void MaximumEGrad<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const bool dy_to_dx1 = x1[i] > x2[i];
dx1[i] = dy_to_dx1 ? dy[i] : 0;
dx2[i] = dy_to_dx1 ? 0 : dy[i];
}
}
template <> void MaximumBGrad<float, CPUContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
dx1[i] = (x1[i] > x2) ? dy[i] : 0;
}
}
/******************** arithmetic.minimum ********************/
template <> void MinimumE<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::min(x1[i], x2[i]);
}
}
template <> void MinimumB<float, CPUContext>(
const int count,
const float* x1,
const float x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::min(x1[i], x2);
}
}
template <> void MinimumEGrad<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const bool dy_to_dx1 = x1[i] < x2[i];
dx1[i] = dy_to_dx1 ? dy[i] : 0;
dx2[i] = dy_to_dx1 ? 0 : dy[i];
}
}
template <> void MinimumBGrad<float, CPUContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
dx1[i] = (x1[i] < x2) ? dy[i] : 0;
}
}
/******************** control_flow.compare ********************/
template <> void Equal<float, CPUContext>(
......@@ -524,6 +701,189 @@ template<> void AbsGrad<float, CPUContext>(
}
}
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty>
void _NLLLoss(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* losses,
Tx* flags) {
for (int oix = 0; oix < outer_dim; ++oix) {
for (int iix = 0; iix < inner_dim; ++iix) {
const int idx = oix * inner_dim + iix;
const int label = labels[idx];
int k;
for (k = 0; k < num_ignores; ++k) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = -log_prob[
(oix * axis_dim + label) * inner_dim + iix];
flags[idx] = 1;
}
}
}
}
template <> void NLLLoss<float, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
_NLLLoss<float, float>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float16, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <> void NLLLoss<float, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
_NLLLoss<float, int64_t>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float16, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <typename Tx, typename Ty>
void _NLLLossGrad(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
Tx* flags) {
flags[0] = 0;
for (int oix = 0; oix < outer_dim; ++oix) {
for (int iix = 0; iix < inner_dim; ++iix) {
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; ++k)
if (label == ignores[k]) break;
if (k == num_ignores) {
dx[(oix * axis_dim + label) * inner_dim + iix] = -1;
flags[0]++;
}
}
}
}
template<> void NLLLossGrad<float, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CPUContext* ctx) {
_NLLLossGrad<float, float>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float16, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template<> void NLLLossGrad<float, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CPUContext* ctx) {
_NLLLossGrad<float, int64_t>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float16, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
/******************** loss.sigmoid_cross_entropy ********************/
template <> void SigmoidCrossEntropy<float, CPUContext>(
......@@ -2706,6 +3066,94 @@ template<> void Col2Im2d<float, CPUContext>(
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.drop_block ********************/
void _DropBlock2d_NCHW(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t* seed,
int* mask) {
TIndex seed_idx = 0;
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
const int nc = (n * C + c) * H;
for (int y = 0; y < seed_h; ++y) {
for (int x = 0; x < seed_w; ++x) {
if (seed[seed_idx] > 0) {
for (int i = 0; i < block_size; ++i) {
const int nch = (nc + y + i) * W;
for (int j = 0; j < block_size; ++j) {
mask[nch + x + j] &= 0;
} // end j
} // end i
}
seed_idx++;
} // end x
} // end y
} // end c
} // end n
}
void _DropBlock2d_NHWC(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t* seed,
int* mask) {
TIndex seed_idx = 0;
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
for (int y = 0; y < seed_h; ++y) {
for (int x = 0; x < seed_w; ++x) {
if (seed[seed_idx] > 0) {
for (int i = 0; i < block_size; ++i) {
const int nh = (n * H + y + i) * W;
for (int j = 0; j < block_size; ++j) {
mask[(nh + x + j) * C + c] &= 0;
} // end j
} // end i
}
seed_idx++;
} // end x
} // end y
} // end c
} // end n
}
template <> void DropBlock2d<CPUContext>(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
CPUContext* ctx) {
const int count = N * C * seed_h * seed_w;
math::RandomBernoulli<uint32_t, CPUContext>(
count, gamma, seed, ctx);
if (data_format == "NCHW") {
_DropBlock2d_NCHW(N, C, H, W,
seed_h, seed_w, block_size, seed, mask);
} else if (data_format == "NHWC") {
_DropBlock2d_NHWC(N, C, H, W,
seed_h, seed_w, block_size, seed, mask);
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.nn_resize ********************/
template <typename T>
......
......@@ -19,12 +19,14 @@ template<typename T>
__global__ void _Dropout(
const int count,
const uint32_t thresh,
const T scale,
const float scale,
const T* x,
const uint32_t* mask,
const uint32_t* mask32,
uint8_t* mask8,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = x[idx] * (mask[idx] > thresh) * scale;
mask8[idx] = (mask32[idx] > thresh);
y[idx] = x[idx] * mask8[idx] * scale;
}
}
......@@ -33,44 +35,42 @@ template<> void Dropout<float, CUDAContext>(
float prob,
float scale,
const float* x,
uint32_t* mask,
uint32_t* mask32,
uint8_t* mask8,
float* y,
CUDAContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob);
math::RandomUniform<uint32_t, CUDAContext>(
count, float(0), float(UINT_MAX), mask, ctx);
count, float(0), float(UINT_MAX), mask32, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * prob);
_Dropout<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
count, thresh, scale, x, mask, y);
0, ctx->cuda_stream() >> >(count,
thresh, scale, x, mask32, mask8, y);
}
template <typename T>
__global__ void _DropoutGrad(
template <typename Tx, typename Tm>
__global__ void _ApplyMask(
const int count,
const uint32_t thresh,
const T scale,
const T* dy,
const uint32_t* mask,
T* dx) {
const float scale,
const Tx* x,
const Tm* mask,
Tx* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
dx[idx] = dy[idx] * (mask[idx] > thresh) * scale;
y[idx] = x[idx] * mask[idx] * scale;
}
}
template<> void DropoutGrad<float, CUDAContext>(
template <> void ApplyMask<float, uint8_t, CUDAContext>(
const int count,
float prob,
float scale,
const float* dy,
const uint32_t* mask,
float* dx,
const float scale,
const float* x,
const uint8_t* mask,
float* y,
CUDAContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob);
_DropoutGrad<float>
_ApplyMask<float, uint8_t>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
count, thresh, scale, dy, mask, dx);
0, ctx->cuda_stream() >> >(count,
scale, x, mask, y);
}
/******************** activation.prelu ********************/
......@@ -753,13 +753,9 @@ __global__ void _Clip(
const T low,
const T high,
const T* x,
T* mask,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
mask[idx] = 1.0;
if (x[idx] > high || x[idx] < low) mask[idx] = 0.0;
y[idx] = x[idx] > high ? high : x[idx];
y[idx] = x[idx] < low ? low : x[idx];
y[idx] = max(low, min(x[idx], high));
}
}
......@@ -768,13 +764,237 @@ template <> void Clip<float, CUDAContext>(
const float low,
const float high,
const float* x,
float* mask,
float* y,
CUDAContext* ctx) {
_Clip<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, low, high, x, y);
}
template <typename T>
__global__ void _ClipGrad(
const int count,
const T low,
const T high,
const T* x,
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const T xi = x[idx];
dx[idx] = (xi < low || xi > high) ? 0 : dy[idx];
}
}
template <> void ClipGrad<float, CUDAContext>(
const int count,
const float low,
const float high,
const float* x,
const float* dy,
float* dx,
CUDAContext* ctx) {
_ClipGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
low, high, x, mask, y);
low, high, x, dy, dx);
}
/******************** arithmetic.maximum ********************/
template <typename T>
__global__ void _MaximumE(
const int count,
const T* x1,
const T* x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = max(x1[idx], x2[idx]);
}
}
template <> void MaximumE<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CUDAContext* ctx) {
_MaximumE<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MaximumB(
const int count,
const T* x1,
const T x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = max(x1[idx], x2);
}
}
template <> void MaximumB<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
float* y,
CUDAContext* ctx) {
_MaximumB<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MaximumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const bool dy_to_dx1 = x1[idx] > x2[idx];
dx1[idx] = dy_to_dx1 ? dy[idx] : 0;
dx2[idx] = dy_to_dx1 ? 0 : dy[idx];
}
}
template <> void MaximumEGrad<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CUDAContext* ctx) {
_MaximumEGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1, dx2);
}
template <typename T>
__global__ void _MaximumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1) {
CUDA_1D_KERNEL_LOOP(idx, count) {
dx1[idx] = (x1[idx] > x2) ? dy[idx] : 0;
}
}
template <> void MaximumBGrad<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CUDAContext* ctx) {
_MaximumBGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1);
}
/******************** arithmetic.minimum ********************/
template <typename T>
__global__ void _MinimumE(
const int count,
const T* x1,
const T* x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = min(x1[idx], x2[idx]);
}
}
template <> void MinimumE<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CUDAContext* ctx) {
_MinimumE<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MinimumB(
const int count,
const T* x1,
const T x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = min(x1[idx], x2);
}
}
template <> void MinimumB<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
float* y,
CUDAContext* ctx) {
_MinimumB<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MinimumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const bool dy_to_dx1 = x1[idx] < x2[idx];
dx1[idx] = dy_to_dx1 ? dy[idx] : 0;
dx2[idx] = dy_to_dx1 ? 0 : dy[idx];
}
}
template <> void MinimumEGrad<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CUDAContext* ctx) {
_MinimumEGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1, dx2);
}
template <typename T>
__global__ void _MinimumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1) {
CUDA_1D_KERNEL_LOOP(idx, count) {
dx1[idx] = (x1[idx] < x2) ? dy[idx] : 0;
}
}
template <> void MinimumBGrad<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CUDAContext* ctx) {
_MinimumBGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1);
}
/******************** control_flow.compare ********************/
......@@ -825,6 +1045,145 @@ template<> void AbsGrad<float, CUDAContext>(
0, ctx->cuda_stream() >> >(count, dy, dx);
}
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty>
__global__ void _NLLLoss(
const int count,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* losses,
Tx* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = -log_prob[
(oix * axis_dim + label) * inner_dim + iix];
flags[idx] = 1;
}
}
}
template <> void NLLLoss<float, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLoss<float, float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLoss<float, int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <typename Tx, typename Ty>
__global__ void _NLLLossGrad(
const int count,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
Tx* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break;
if (k != num_ignores) {
flags[idx] = 0;
} else {
dx[(oix * axis_dim + label) * inner_dim + iix] = -1;
flags[idx] = 1;
}
}
}
template<> void NLLLossGrad<float, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGrad<float, float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGrad<float, int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
/******************** loss.sigmoid_cross_entropy ********************/
template <typename T>
......@@ -2856,8 +3215,7 @@ __global__ void _BiasAdd_NCHW(
const T* bias,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int bias_idx = (idx / inner_dim) % dim;
y[idx] += bias[bias_idx];
y[idx] += bias[(idx / inner_dim) % dim];
}
}
......@@ -3395,6 +3753,95 @@ template <> void Col2Im2d<float, CUDAContext>(
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.drop_block ********************/
template <typename T>
__global__ void _DropBlock2d_NCHW(
const int count,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t thresh,
const uint32_t* seed,
int* mask) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (seed[idx] < thresh) {
const int x = idx % seed_w;
const int y = (idx / seed_w) % seed_h;
const int c = (idx / seed_w / seed_h) % C;
const int n = (idx / seed_w / seed_h) / C;
const int nc = (n * C + c) * H;
for (int i = 0; i < block_size; ++i) {
const int nch = (nc + y + i) * W;
for (int j = 0; j < block_size; ++j)
atomicAnd(&mask[nch + x + j], 0);
}
}
}
}
template <typename T>
__global__ void _DropBlock2d_NHWC(
const int count,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t thresh,
const uint32_t* seed,
int* mask) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (seed[idx] < thresh) {
const int x = idx % seed_w;
const int y = (idx / seed_w) % seed_h;
const int c = (idx / seed_w / seed_h) % C;
const int n = (idx / seed_w / seed_h) / C;
for (int i = 0; i < block_size; ++i) {
const int nh = (n * H + y + i) * W;
for (int j = 0; j < block_size; ++j)
atomicAnd(&mask[(nh + x + j) * C + c], 0);
}
}
}
}
template <> void DropBlock2d<CUDAContext>(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
CUDAContext* ctx) {
const int count = N * C * seed_h * seed_w;
math::RandomUniform<uint32_t, CUDAContext>(
count, 0.f, float(UINT_MAX), seed, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * gamma);
if (data_format == "NCHW") {
_DropBlock2d_NCHW<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
C, H, W, seed_h, seed_w, block_size,
thresh, seed, mask);
} else if(data_format == "NHWC") {
_DropBlock2d_NHWC<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
C, H, W, seed_h, seed_w, block_size,
thresh, seed, mask);
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.nn_resize ********************/
template <typename T>
......
......@@ -13,9 +13,77 @@ namespace dragon {
namespace kernel {
/******************** activation.dropout ********************/
__global__ void _DropoutHalf(
const int count,
const uint32_t thresh,
const half scale,
const half* x,
const uint32_t* mask32,
uint8_t* mask8,
half* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
mask8[idx] = (mask32[idx] > thresh);
y[idx] = __hmul(__hmul(x[idx], scale),
__float2half((float)mask8[idx]));
#endif
}
}
template<> void Dropout<float16, CUDAContext>(
const int count,
float prob,
float scale,
const float16* x,
uint32_t* mask32,
uint8_t* mask8,
float16* y,
CUDAContext* ctx) {
math::RandomUniform<uint32_t, CUDAContext>(
count, float(0), float(UINT_MAX), mask32, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * prob);
_DropoutHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
thresh, dragon_cast<half, float>(scale),
reinterpret_cast<const half*>(x),
mask32, mask8, reinterpret_cast<half*>(y));
}
template <typename Tm>
__global__ void _ApplyMaskHalf(
const int count,
const half scale,
const half* x,
const Tm* mask,
half* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
y[idx] = __hmul(__hmul(x[idx], scale),
__float2half((float)mask[idx]));
#endif
}
}
template <> void ApplyMask<float16, uint8_t, CUDAContext>(
const int count,
const float scale,
const float16* x,
const uint8_t* mask,
float16* y,
CUDAContext* ctx) {
_ApplyMaskHalf<uint8_t>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
dragon_cast<half, float>(scale),
reinterpret_cast<const half*>(x),
mask, reinterpret_cast<half*>(y));
}
/******************** activation.relu ********************/
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _ReluHalf(
const int count,
......@@ -45,7 +113,6 @@ __global__ void _ReluHalf2(
#endif
}
}
#endif
template<> void Relu<float16, CUDAContext>(
const int count,
......@@ -53,8 +120,7 @@ template<> void Relu<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((count & 1) == 0 == 0) {
if ((count & 1) == 0) {
_ReluHalf2<half2>
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> > (count >> 1,
......@@ -69,14 +135,10 @@ template<> void Relu<float16, CUDAContext>(
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** arithmetic.affine ********************/
#ifdef WITH_CUDA_FP16
template <typename T>
__global__ void _AffineWithOBiasHalf(
const int count,
......@@ -112,7 +174,6 @@ __global__ void _AffineWithBiasHalf(
#endif
}
}
#endif
template<> void Affine<float16, CUDAContext>(
const int count,
......@@ -125,7 +186,6 @@ template<> void Affine<float16, CUDAContext>(
const float16* beta_multiplier,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (beta != nullptr) {
_AffineWithBiasHalf<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
......@@ -144,9 +204,151 @@ template<> void Affine<float16, CUDAContext>(
reinterpret_cast<const half*>(alpha),
reinterpret_cast<half*>(y));
}
#else
CUDA_FP16_NOT_COMPILED;
}
/******************** loss.nll_loss ********************/
template <typename Ty>
__global__ void _NLLLossHalf(
const int count,
const int axis_dim,
const int inner_dim,
const half* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = __half2float(__hneg(
log_prob[(oix * axis_dim + label) * inner_dim + iix]));
flags[idx] = 1;
}
#endif
}
}
template <> void NLLLoss<float16, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossHalf<float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores, losses, flags);
}
template <> void NLLLoss<float16, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossHalf<int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores, losses, flags);
}
template <typename Ty>
__global__ void _NLLLossGradHalf(
const int count,
const int axis_dim,
const int inner_dim,
const half* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
half* dx,
float* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break;
if (k != num_ignores) {
flags[idx] = 0;
} else {
dx[(oix * axis_dim + label) * inner_dim + iix] = __float2half(-1.);
flags[idx] = 1;
}
#endif
}
}
template<> void NLLLossGrad<float16, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGradHalf<float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores,
reinterpret_cast<half*>(dx), flags);
}
template<> void NLLLossGrad<float16, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGradHalf<int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores,
reinterpret_cast<half*>(dx), flags);
}
/******************** loss.sparse_softmax_cross_entropy ********************/
......@@ -304,11 +506,11 @@ template<> void SparseSoftmaxCrossEntropyGrad<float16, int64_t, CUDAContext>(
reinterpret_cast<const half*>(prob), labels,
ignores, num_ignores,
reinterpret_cast<half*>(dx), flags);
}
/******************** misc.astype ********************/
#ifdef WITH_CUDA_FP16
__global__ void _TypeHalf2Float(
const int count,
const half* a,
......@@ -334,7 +536,6 @@ __global__ void _TypeHalf2Half(
b[idx] = a[idx];
}
}
#endif
#define DEFINE_TYPE_DISABLE_FP16(type) \
template <> void TypeA2B<float16, type, CUDAContext>( \
......@@ -376,7 +577,6 @@ __global__ void _TypeHalf2Half(
a, reinterpret_cast<half*>(b)); \
}
#ifdef WITH_CUDA_FP16
template <> void TypeA2B<float16, float16, CUDAContext>(
const int count,
const float16* a,
......@@ -388,29 +588,15 @@ template <> void TypeA2B<float16, float16, CUDAContext>(
reinterpret_cast<const half*>(a),
reinterpret_cast<half*>(b));
}
DEFINE_TYPE_ENABLE_FP16_FP32;
DEFINE_TYPE_DISABLE_FP16(double);
DEFINE_TYPE_DISABLE_FP16(int);
DEFINE_TYPE_DISABLE_FP16(int64_t);
DEFINE_TYPE_DISABLE_FP16(uint8_t);
#else
template <> void TypeA2B<float16, float16, CUDAContext>(
const int count,
const float16* a,
float16* b,
CUDAContext* ctx) {
LOG(FATAL) << "CUDAContext has not implemented: float16 -> float16";
}
DEFINE_TYPE_DISABLE_FP16(float);
DEFINE_TYPE_DISABLE_FP16(double);
DEFINE_TYPE_DISABLE_FP16(int);
DEFINE_TYPE_DISABLE_FP16(int64_t);
DEFINE_TYPE_DISABLE_FP16(uint8_t);
#endif
/******************** misc.image_data ********************/
#ifdef WITH_CUDA_FP16
template <typename Tx, typename Ty>
__global__ void _ImageDataHalf_NCHW(
const int count,
......@@ -453,7 +639,6 @@ __global__ void _ImageDataHalf_NHWC(
y[idx] = __float2half(raw_value);
}
}
#endif
template <> void ImageData<float, float16, CUDAContext>(
const int count,
......@@ -467,7 +652,6 @@ template <> void ImageData<float, float16, CUDAContext>(
const float* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (data_format == "NCHW") {
_ImageDataHalf_NCHW<float, half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
......@@ -481,9 +665,6 @@ template <> void ImageData<float, float16, CUDAContext>(
N, C, H, W, mean_values, std_values,
x, reinterpret_cast<half*>(y));
} else LOG(FATAL) << "Unknown data format: " << data_format;
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <> void ImageData<uint8_t, float16, CUDAContext>(
......@@ -498,7 +679,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>(
const uint8_t* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (data_format == "NCHW") {
_ImageDataHalf_NCHW<uint8_t, half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
......@@ -512,9 +692,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>(
N, C, H, W, mean_values, std_values,
x, reinterpret_cast<half*>(y));
} else LOG(FATAL) << "Unknown data format: " << data_format;
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** ndarray.concat ********************/
......@@ -549,7 +726,6 @@ template <> void Concat<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_ConcatHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
......@@ -557,9 +733,6 @@ template <> void Concat<float16, CUDAContext>(
x_concat_dim, y_concat_dim, concat_offset,
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <typename T>
......@@ -592,7 +765,6 @@ template <> void ConcatGrad<float16, CUDAContext>(
const float16* dy,
float16* dx,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_ConcatGradHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
......@@ -600,9 +772,6 @@ template <> void ConcatGrad<float16, CUDAContext>(
x_concat_dim, y_concat_dim, concat_offset,
reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** ndarray.transpose ********************/
......@@ -636,16 +805,12 @@ template <> void Transpose<float16, CUDAContext>(
const float16* x,
float16* y,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_TransposeHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
ndim, order, old_steps, new_steps,
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
template <typename T>
......@@ -677,21 +842,16 @@ template <> void TransposeGrad<float16, CUDAContext>(
const float16* dy,
float16* dx,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_TransposeGradHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
ndim, order, old_steps, new_steps,
reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** update.adam_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _AdamUpdateHalf(
const int count,
const half lr,
......@@ -720,7 +880,6 @@ __global__ void _AdamUpdateHalf(
#endif
}
}
#endif
template <> void AdamUpdate<float16, CUDAContext>(
const int count,
......@@ -732,7 +891,6 @@ template <> void AdamUpdate<float16, CUDAContext>(
float16* m,
float16* v,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_AdamUpdateHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
......@@ -743,14 +901,10 @@ template <> void AdamUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g),
reinterpret_cast<half*>(m),
reinterpret_cast<half*>(v));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** update.nesterov_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _NesterovUpdateHalf(
const int count,
const half lr,
......@@ -794,7 +948,6 @@ __global__ void _NesterovUpdateHalf2(
#endif
}
}
#endif
template <> void NesterovUpdate<float16, CUDAContext>(
const int count,
......@@ -803,8 +956,7 @@ template <> void NesterovUpdate<float16, CUDAContext>(
float16* g,
float16* h,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((count & 1) == 0 == 0) {
if ((count & 1) == 0) {
_NesterovUpdateHalf2
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count >> 1,
......@@ -821,14 +973,10 @@ template <> void NesterovUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** update.rmsprop_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _RMSPropUpdateHalf(
const int count,
const half lr,
......@@ -851,7 +999,6 @@ __global__ void _RMSPropUpdateHalf(
#endif
}
}
#endif
template <> void RMSPropUpdate<float16, CUDAContext>(
const int count,
......@@ -861,7 +1008,6 @@ template <> void RMSPropUpdate<float16, CUDAContext>(
float16* g,
float16* h,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_RMSPropUpdateHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
......@@ -870,14 +1016,10 @@ template <> void RMSPropUpdate<float16, CUDAContext>(
dragon_cast<half, float>(eps),
reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h));
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
/******************** update.sgd_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _SGDUpdateHalf(
const int count,
const half lr,
......@@ -911,7 +1053,6 @@ __global__ void _SGDUpdateHalf2(
#endif
}
}
#endif
template <> void SGDUpdate<float16, CUDAContext>(
const int count,
......@@ -920,8 +1061,7 @@ template <> void SGDUpdate<float16, CUDAContext>(
float16* g,
float16* h,
CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((count & 1) == 0 == 0) {
if ((count & 1) == 0) {
_SGDUpdateHalf2
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count >> 1,
......@@ -938,9 +1078,6 @@ template <> void SGDUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h));
}
#else
CUDA_FP16_NOT_COMPILED;
#endif
}
} // namespace kernel
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!