Commit 96f7277e by Ting PAN

Add Cambricon's CNML Context

1 parent 5cd0761b
Showing with 4958 additions and 1148 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in Dragon.
Dragon 0.2.2.12 (20181120)
DRAGON_VERSION == 2212
Changes (w.r.t. Dragon 0.2.2.11):
Preview Features:
- Added Cambricon's CNML context.
- Added the support for Int8(Char) Tensor.
- Removed the cuda device id query from pointer.
- Added ``DropBlock2dOp``
- Added ``MaximumOp``, ``MinimumOp``, ``NLLLossOp``.
- Added CuDNN support for ``BiasAddOp``.
- Optimized memory usage of ``DropoutOp``.
- Replaced ``thread_local`` with platform TLS solution.
- Changed the default norm eps from 1e-3 to 1e-5,
affected: ``BatchNorm``, ``BatchRenorm``, ``GroupNorm``, ``InstanceNorm``, ``L2Norm``.
- Enforced CUDA FP16 support (i.e. Removed ``WITH_CUDA_FP16``).
- [PyTorch] Added ``torch.one_hot``.
- [PyTorch] Added ``torch.log``, ``Tensor.log``, ``torch.exp`` and ``Tensor.exp``.
- [PyTorch] Added ``torch.minimum``, ``torch.maximum``,
``torch.clamp``, ``Tensor.clamp``, ``Tensor.clamp_``.
- [PyTorch] Added ``nn.ELU`` and ``nn.SELU``.
- [PyTorch] Added ``nn.GroupNorm``.
- [PyTorch] Added ``nn.NLLLoss``, ``nn.BCEWithLogitsLoss``,
``nn.L1Loss``, ``nn.MSELoss``, ``nn.SmoothL1Loss``.
- [PyTorch] Added ``nn.DropBlock2d``.
- [PyTorch] Added ``train`` and ``eval`` mode for Module,
affected: ``nn.BatchNorm``, ``nn.Dropout``.
- [PyTorch] Deprecated the ``size_average`` and ``reduce`` in
``nn.Loss``, added ``reduction`` instead.
- [PyTorch] ``torch.save`` can save both ``torch.Tensor`` and other pickle values.
- [PyCaffe] Added ``DropBlockLayer``.
Bugs fixed:
- Fixed the uncomputed output in ``BiasAddGradientOp``.
- Fixed the incorrect gradients of ``ClipGradientOp``.
- Fixed the wrong results of ``math::Inv`` under ``CPUContext``.
- Fixed the issue that the default device is used on initializing NCCL.
- Removed the strictly shape check in ``SmoothL1Op``.
- Fixed wrong CXX API exporting under Win32.
- [PyTorch] Fixed an issue that multiple ``GradientGather`` are triggered by one Operator.
- [PyTorch] Fixed the schema check by in-place fundamental ops.
- [PyTorch] Fixed the missing shape and dtype after ``Tensor.copy_``.
- [PyTorch] Fixed an issue that ``Tensor.fill_`` and ``Tensor.zero_``
will change the data type of an non-empty Tensor.
- [PyTorch] Fixed the Python2 Int(s) check.
------------------------------------------------------------------------
\ No newline at end of file
...@@ -8,10 +8,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ...@@ -8,10 +8,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
unzip \ unzip \
ssh \ ssh \
vim \ vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \ libnuma-dev \
libprotobuf-dev \ libprotobuf-dev \
protobuf-compiler \ protobuf-compiler \
libopencv-dev \
libopenblas-dev \ libopenblas-dev \
libboost-all-dev \
python3-pip \ python3-pip \
python3-dev \ python3-dev \
python3-pyqt4 \ python3-pyqt4 \
...@@ -40,3 +44,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \ ...@@ -40,3 +44,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cpu-openblas/CMakeLists.txt && \ wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cpu-openblas/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \ mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
...@@ -9,10 +9,14 @@ RUN rm /etc/apt/sources.list.d/cuda.list && rm /etc/apt/sources.list.d/nvidia-ml ...@@ -9,10 +9,14 @@ RUN rm /etc/apt/sources.list.d/cuda.list && rm /etc/apt/sources.list.d/nvidia-ml
unzip \ unzip \
ssh \ ssh \
vim \ vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \ libnuma-dev \
libprotobuf-dev \ libprotobuf-dev \
protobuf-compiler \ protobuf-compiler \
libopencv-dev \
libopenblas-dev \ libopenblas-dev \
libboost-all-dev \
libnccl2 \ libnccl2 \
libnccl-dev \ libnccl-dev \
python3-pip \ python3-pip \
...@@ -43,3 +47,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \ ...@@ -43,3 +47,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cuda9.0-cudnn7/CMakeLists.txt && \ wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cuda9.0-cudnn7/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \ mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
...@@ -17,7 +17,6 @@ option(WITH_SSE "Set ON to use SSE 4.1" ON) ...@@ -17,7 +17,6 @@ option(WITH_SSE "Set ON to use SSE 4.1" ON)
option(WITH_MPI "Set ON to use MPI" OFF) option(WITH_MPI "Set ON to use MPI" OFF)
option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF) option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF)
option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF) option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF)
option(WITH_CUDA_FP16 "Set ON to use FP16" ON)
# Set your 3rdparty # Set your 3rdparty
set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty) set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
...@@ -163,10 +162,6 @@ if (WITH_MPI_NCCL) ...@@ -163,10 +162,6 @@ if (WITH_MPI_NCCL)
ADD_DEFINITIONS(-DWITH_MPI_NCCL) ADD_DEFINITIONS(-DWITH_MPI_NCCL)
message(STATUS "Use MPI-NCCL [Optional]") message(STATUS "Use MPI-NCCL [Optional]")
endif() endif()
if (WITH_CUDA_FP16)
ADD_DEFINITIONS(-DWITH_CUDA_FP16)
message(STATUS "Use CUDA FP16 [Optional]")
endif()
# ---[ Flags # ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#define DRAGON_CORE_COMMON_H_ #define DRAGON_CORE_COMMON_H_
#include <ctime> #include <ctime>
#include <random>
#include <climits> #include <climits>
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -49,25 +50,35 @@ using Map = std::unordered_map<Key, Value>; ...@@ -49,25 +50,35 @@ using Map = std::unordered_map<Key, Value>;
template <typename Value> template <typename Value>
using Set = std::unordered_set<Value> ; using Set = std::unordered_set<Value> ;
/* /* * * * * * * * * * * * * * * * * * * * *
* Define the Kernel version. * *
* * Kernel Version *
* | Major(2) | Minor(2) | Patch(11) | * *
*/ * Major(2) | Minor(2) | Patch(12) *
#define DRAGON_VERSION 2211 * *
* * * * * * * * * * * * * * * * * * * * */
#define DRAGON_VERSION 2212
/* * * * * * * * * * * * * * * * * * * * *
* *
* Default Random Seed *
* *
* * * * * * * * * * * * * * * * * * * * */
/*
* Define the default random seed.
*/
#define DEFAULT_RNG_SEED 3 #define DEFAULT_RNG_SEED 3
/* /* * * * * * * * * * * * * * * * * * * * *
* Define the common marcos. * *
*/ * Macros *
#ifdef _MSC_VER * *
#if _MSC_VER < 1900 * * * * * * * * * * * * * * * * * * * * */
#define thread_local __declspec(thread)
#endif // avoid using of "thread_local" for VS2013 or older Xcode
#if defined(__clang__) || defined(__GNUC__)
#define TLS_OBJECT __thread
#else
#define TLS_OBJECT __declspec(thread)
#endif #endif
#define CONCATENATE_IMPL(s1, s2) s1##s2 #define CONCATENATE_IMPL(s1, s2) s1##s2
......
...@@ -12,15 +12,8 @@ ...@@ -12,15 +12,8 @@
#ifndef DRAGON_CORE_CONTEXT_H_ #ifndef DRAGON_CORE_CONTEXT_H_
#define DRAGON_CORE_CONTEXT_H_ #define DRAGON_CORE_CONTEXT_H_
#include <random>
#include <ctime>
#include "core/common.h" #include "core/common.h"
#ifdef WITH_CUDA
#include "utils/cuda_device.h"
#endif
namespace dragon { namespace dragon {
class CPUContext { class CPUContext {
...@@ -45,7 +38,7 @@ class CPUContext { ...@@ -45,7 +38,7 @@ class CPUContext {
#else #else
data = malloc(nbytes); data = malloc(nbytes);
#endif #endif
CHECK(data) << "Malloc mem: " << nbytes << " bytes failed."; CHECK(data) << "\nMalloc mem: " << nbytes << " bytes failed.";
return data; return data;
} }
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_CORE_CONTEXT_CNML_H_
#define DRAGON_CORE_CONTEXT_CNML_H_
/* CAMBRICON's CNRT && CNML Environment */
#include "core/common.h"
struct cnrtStream;
struct cnmlCpuTensor;
struct cnmlTensor;
struct cnmlFusionOp;
typedef struct cnrtStream* cnrtStream_t;
typedef struct cnmlCpuTensor* cnmlCpuTensor_t;
typedef struct cnmlTensor* cnmlTensor_t;
typedef struct cnmlFusionOp* cnmlFusionOp_t;
namespace dragon {
class CNRTObject;
class CNMLContext {
public:
CNMLContext(const DeviceOption& option)
: device_id_(option.device_id()),
random_seed_(option.has_random_seed() ?
option.random_seed() : DEFAULT_RNG_SEED) {
CHECK_EQ(option.device_type(), CNML);
}
CNMLContext(const int device_id = 0)
: device_id_(device_id),
random_seed_(DEFAULT_RNG_SEED) {}
void SwitchToDevice(int stream_id);
inline void SwitchToDevice() { SwitchToDevice(1); }
void FinishDeviceCompution();
static void* New(size_t nbytes);
static void Memset(
size_t nbytes,
void* ptr);
inline void MemsetAsync(
size_t nbytes,
void* ptr) {
Memset(nbytes, ptr);
}
template<class DstContext, class SrcContext>
static void Memcpy(
size_t nbytes,
void* dst,
const void* src);
template<class DstContext, class SrcContext>
inline void MemcpyAsync(
size_t nbytes,
void* dst,
const void* src) {
Memcpy<DstContext, SrcContext>(dst, src, nbytes);
}
static void Delete(void* data);
inline int device_id() const { return device_id_; }
inline void set_stream_id(int stream_id) { stream_id_ = stream_id; }
inline cnrtStream_t cnrt_stream() {
return cnrt_stream(device_id_, stream_id_);
}
static cnrtStream_t cnrt_stream(
int device_id,
int stream_id);
static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CNRTObject cnrt_object_;
private:
int device_id_, stream_id_ = 1, random_seed_;
unique_ptr<std::mt19937> rand_generator_;
};
} // namepsace dragon
#endif // DRAGON_CORE_CONTEXT_CNML_H_
\ No newline at end of file
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
#ifndef DRAGON_CORE_CONTEXT_CUDA_H_ #ifndef DRAGON_CORE_CONTEXT_CUDA_H_
#define DRAGON_CORE_CONTEXT_CUDA_H_ #define DRAGON_CORE_CONTEXT_CUDA_H_
/* NVIDIA's CUDA Environment */
#include "core/common.h" #include "core/common.h"
#include "core/context.h"
#include "utils/cuda_device.h" #include "utils/cuda_device.h"
#include "utils/cudnn_device.h" #include "utils/cudnn_device.h"
...@@ -52,13 +53,13 @@ class CUDAObject { ...@@ -52,13 +53,13 @@ class CUDAObject {
} }
// follow the caffe2, // follow the caffe2,
// each device takes a group of non-bl0cking streams // each device takes a group of non-blocking streams
// the stream 0 is reserved for default stream, // the stream 0 is reserved for default stream,
// as some computations really require it, // as some computations really require it,
// e.g. cublas.asum() and mixed cpu/cuda operations // e.g. cublas.asum() and mixed cpu/cuda operations
// besides, somes calls, such as cudnn.conv() and cudnn.rnn(), // besides, somes calls, such as cudnn.conv() and cudnn.rnn(),
// produce wrong results if running them on non-blocking streams // produce wrong results if running them on non-blocking streams
// note that caffe2 also use default streams (within CuDNNState) // note that caffe2 also uses default streams (within CuDNNState)
cudaStream_t GetStream(int device_id, int stream_id) { cudaStream_t GetStream(int device_id, int stream_id) {
vector<cudaStream_t>& dev_streams = cuda_streams[device_id]; vector<cudaStream_t>& dev_streams = cuda_streams[device_id];
if (dev_streams.size() <= (unsigned)stream_id) if (dev_streams.size() <= (unsigned)stream_id)
...@@ -140,7 +141,7 @@ class CUDAContext { ...@@ -140,7 +141,7 @@ class CUDAContext {
inline static void* New(size_t nbytes) { inline static void* New(size_t nbytes) {
void* data; void* data;
cudaMalloc(&data, nbytes); cudaMalloc(&data, nbytes);
CHECK(data) << "Malloc cuda mem: " CHECK(data) << "\nMalloc cuda mem: "
<< nbytes << " bytes failed."; << nbytes << " bytes failed.";
return data; return data;
} }
...@@ -199,11 +200,11 @@ class CUDAContext { ...@@ -199,11 +200,11 @@ class CUDAContext {
static cudaStream_t cuda_stream( static cudaStream_t cuda_stream(
int device_id, int device_id,
int stream_id) { int stream_id) {
return cuda_object_.GetStream(device_id, stream_id); return cuda_object()->GetStream(device_id, stream_id);
} }
cublasHandle_t cublas_handle() { cublasHandle_t cublas_handle() {
return cuda_object_.GetCuBLASHandle(device_id_, stream_id_); return cuda_object()->GetCuBLASHandle(device_id_, stream_id_);
} }
inline std::mt19937* rand_generator() { inline std::mt19937* rand_generator() {
...@@ -227,13 +228,17 @@ class CUDAContext { ...@@ -227,13 +228,17 @@ class CUDAContext {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
cudnnHandle_t cudnn_handle() { cudnnHandle_t cudnn_handle() {
return cuda_object_.GetCuDNNHandle(device_id_, stream_id_); return cuda_object()->GetCuDNNHandle(device_id_, stream_id_);
} }
#endif #endif
static std::mutex& mutex() { static std::mutex m; return m; } static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CUDAObject cuda_object_; static CUDAObject* cuda_object() {
static TLS_OBJECT CUDAObject* cuda_object_;
if (!cuda_object_) cuda_object_ = new CUDAObject();
return cuda_object_;
}
private: private:
int device_id_, stream_id_ = 1, random_seed_; int device_id_, stream_id_ = 1, random_seed_;
......
...@@ -48,10 +48,10 @@ class GraphBase { ...@@ -48,10 +48,10 @@ class GraphBase {
Workspace* ws_; Workspace* ws_;
}; };
class Graph final : public GraphBase { class Graph : public GraphBase {
public: public:
Graph(const GraphDef& meta_graph, Workspace* ws); Graph(const GraphDef& meta_graph, Workspace* ws);
~Graph() { for (auto* op : ops_) delete op; } virtual ~Graph() { for (auto* op : ops_) delete op; }
bool Create( bool Create(
const GraphDef& optimized_graph, const GraphDef& optimized_graph,
...@@ -73,7 +73,7 @@ class Graph final : public GraphBase { ...@@ -73,7 +73,7 @@ class Graph final : public GraphBase {
inline Workspace* ws() const { return ws_; } inline Workspace* ws() const { return ws_; }
private: protected:
void ForwardShareDyeing(string u, string ancestor); void ForwardShareDyeing(string u, string ancestor);
void ForwardPruneDyeing( void ForwardPruneDyeing(
string u, string u,
...@@ -98,6 +98,9 @@ DECLARE_REGISTRY( ...@@ -98,6 +98,9 @@ DECLARE_REGISTRY(
const GraphDef&, const GraphDef&,
Workspace*); Workspace*);
#define REGISTER_GRAPH(name, ...) \
REGISTER_CLASS(GraphRegistry, name, __VA_ARGS__)
} // namespace dragon } // namespace dragon
#endif // DRAGON_CORE_GRAPH_H_ #endif // DRAGON_CORE_GRAPH_H_
\ No newline at end of file
...@@ -12,30 +12,49 @@ ...@@ -12,30 +12,49 @@
#ifndef DRAGON_CORE_MIXEDMEM_H_ #ifndef DRAGON_CORE_MIXEDMEM_H_
#define DRAGON_CORE_MIXEDMEM_H_ #define DRAGON_CORE_MIXEDMEM_H_
#include "context.h" #include "core/context.h"
#include "context_cuda.h" #include "core/context_cuda.h"
#include "core/context_cnml.h"
namespace dragon { namespace dragon {
typedef enum {
NCHW,
NHWC,
} DataOrder;
class MixedMemory { class MixedMemory {
public: public:
enum State { typedef enum {
UNINITIALIZED, UNINITIALIZED,
STATE_AT_CPU, STATE_AT_CPU,
STATE_AT_CUDA, STATE_AT_CUDA,
STATE_AT_CNML,
SWITCHED, SWITCHED,
SYNCED }; SYNCED,
} State;
MixedMemory() : cpu_ptr_(nullptr), cuda_ptr_(nullptr) {} MixedMemory() : cpu_ptr_(nullptr),
cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
MixedMemory(const TypeMeta& meta, const size_t nbytes) MixedMemory(const TypeMeta& meta, const size_t nbytes)
: meta_(meta), nbytes_(nbytes), : meta_(meta), nbytes_(nbytes), cpu_ptr_(nullptr),
cpu_ptr_(nullptr), cuda_ptr_(nullptr) {} cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
~MixedMemory(); ~MixedMemory();
const void* cpu_data(); const void* cpu_data();
const void* cuda_data(); const void* cuda_data();
const void* cnml_data();
void* mutable_cpu_data(); void* mutable_cpu_data();
void* mutable_cuda_data(); void* mutable_cuda_data();
void* mutable_cnml_data();
void* malloc_cnml_data();
void fetch_cnml_data(void** data);
cnmlCpuTensor_t& cnml_cpu_tensor();
cnmlTensor_t& cnml_mlu_tensor();
void set_cpu_data(void* cpu_ptr, size_t nbytes); void set_cpu_data(void* cpu_ptr, size_t nbytes);
void SwitchToDevice(); void SwitchToDevice();
...@@ -43,23 +62,35 @@ class MixedMemory { ...@@ -43,23 +62,35 @@ class MixedMemory {
inline size_t nbytes() const { return nbytes_; } inline size_t nbytes() const { return nbytes_; }
inline void* cpu_ptr() { state_ = STATE_AT_CPU; return cpu_ptr_; } inline size_t nchunks() const { return nchunks_; }
inline void* cuda_ptr() { state_ = STATE_AT_CUDA; return cuda_ptr_; } void set_nchunks(size_t nchunks) { nchunks_ = nchunks; }
inline State state() const { return state_; } inline State state() const { return state_; }
inline DataOrder order() const { return order_; }
inline void set_order(DataOrder order) { order_ = order; }
const Map<string, string> info() const; const Map<string, string> info() const;
void ToCUDA();
void ToCPU(); void ToCPU();
void ToCUDA();
private: private:
void* cpu_ptr_, *cuda_ptr_;
bool own_cpu_ptr_ = true;
State state_ = UNINITIALIZED;
size_t nbytes_ = 0;
TypeMeta meta_; TypeMeta meta_;
size_t nbytes_ = 0, nchunks_ = 1;
DataOrder order_ = NCHW;
State state_ = UNINITIALIZED;
void* cpu_ptr_, *cuda_ptr_, *cnml_ptr_;
int own_cpu_ptr_ = 1, ptr_device_ = 0;
/* For CAMBRICON's CNML Environment */
cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr;
cnmlTensor_t cnml_mlu_tensor_ = nullptr;
}; };
} // namespace dragon } // namespace dragon
#endif #endif // DRAGON_CORE_MIXEDMEM_H_
\ No newline at end of file \ No newline at end of file
...@@ -44,7 +44,9 @@ class OperatorBase { ...@@ -44,7 +44,9 @@ class OperatorBase {
const string& anchor); const string& anchor);
inline void SwitchToPhase(const string& phase) { phase_ = phase; } inline void SwitchToPhase(const string& phase) { phase_ = phase; }
virtual void Run(int stream_id = 1) { NOT_IMPLEMENTED; } virtual void Run(int stream_id = 1) { NOT_IMPLEMENTED; }
virtual void Fusion(void* graph) { NOT_IMPLEMENTED; }
inline const string& name() const { return def_.name(); } inline const string& name() const { return def_.name(); }
inline const string& type() const { return def_.type(); } inline const string& type() const { return def_.type(); }
...@@ -186,12 +188,22 @@ DECLARE_REGISTRY( ...@@ -186,12 +188,22 @@ DECLARE_REGISTRY(
const OperatorDef&, const OperatorDef&,
Workspace*); Workspace*);
/* NVIDIA's Accelerated Library - CUDNN */
DECLARE_REGISTRY( DECLARE_REGISTRY(
CUDNNOperatorRegistry, CUDNNOperatorRegistry,
OperatorBase, OperatorBase,
const OperatorDef&, const OperatorDef&,
Workspace*); Workspace*);
/* CAMBRICON's Accelerated Library - CNML */
DECLARE_REGISTRY(
CNMLOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
#define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \ #define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \
if (tensor.count() == 0) { \ if (tensor.count() == 0) { \
CHECK(ws()->GetFiller(tensor.name())) \ CHECK(ws()->GetFiller(tensor.name())) \
...@@ -310,6 +322,9 @@ DECLARE_REGISTRY( ...@@ -310,6 +322,9 @@ DECLARE_REGISTRY(
#define INSTANTIATE_CUDNN_OPERATOR(name) \ #define INSTANTIATE_CUDNN_OPERATOR(name) \
template class CuDNN##name##Op<CUDAContext>; template class CuDNN##name##Op<CUDAContext>;
#define INSTANTIATE_CNML_OPERATOR(name) \
template class CnML##name##Op<CNMLContext>;
#define REGISTER_CPU_OPERATOR(name, ...) \ #define REGISTER_CPU_OPERATOR(name, ...) \
REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
...@@ -319,6 +334,9 @@ DECLARE_REGISTRY( ...@@ -319,6 +334,9 @@ DECLARE_REGISTRY(
#define REGISTER_CUDNN_OPERATOR(name, ...) \ #define REGISTER_CUDNN_OPERATOR(name, ...) \
REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__) REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__)
#define REGISTER_CNML_OPERATOR(name, ...) \
REGISTER_CLASS(CNMLOperatorRegistry, name, __VA_ARGS__)
#define DEPLOY_CPU(name) \ #define DEPLOY_CPU(name) \
REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \ REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \
INSTANTIATE_OPERATOR(name, CPUContext); INSTANTIATE_OPERATOR(name, CPUContext);
...@@ -336,6 +354,10 @@ DECLARE_REGISTRY( ...@@ -336,6 +354,10 @@ DECLARE_REGISTRY(
REGISTER_CUDNN_OPERATOR(name, CuDNN##name##Op<CUDAContext>); \ REGISTER_CUDNN_OPERATOR(name, CuDNN##name##Op<CUDAContext>); \
INSTANTIATE_CUDNN_OPERATOR(name); INSTANTIATE_CUDNN_OPERATOR(name);
#define DEPLOY_CNML(name) \
REGISTER_CNML_OPERATOR(name, CnML##name##Op<CNMLContext>); \
INSTANTIATE_CNML_OPERATOR(name);
} // namespace dragon } // namespace dragon
#endif // DRAGON_CORE_OPERATOR_H_ #endif // DRAGON_CORE_OPERATOR_H_
\ No newline at end of file
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
// ------------------------------------------------------------ // ------------------------------------------------------------
#ifndef DRAGON_CORE_TENSOR_H_ #ifndef DRAGON_CORE_TENSOR_H_
#define DRAONG_CORE_TENSOR_H_ #define DRAGON_CORE_TENSOR_H_
#include "core/common.h" #include "core/common.h"
#include "core/mixedmem.h" #include "core/mixedmem.h"
...@@ -103,16 +103,20 @@ class Tensor { ...@@ -103,16 +103,20 @@ class Tensor {
return offset; return offset;
} }
inline string DimString() const { static inline string DimString(
if (ndim() == 0) return "(0,)"; const vector<TIndex>& dims) {
if (dims.size() == 0) return "(0,)";
std::stringstream ss; std::stringstream ss;
ss << "("; ss << "(";
for (int i = 0; i < ndim() - 1; i++) ss << dim(i) << ","; for (int i = 0; i < dims.size() - 1; i++)
if (ndim() == 1) ss << dim(0) << ",)"; ss << dims[i] << ",";
else ss << dim(ndim() - 1) << ")"; if (dims.size() == 1) ss << dims[0] << ",)";
else ss << dims.back() << ")";
return ss.str(); return ss.str();
} }
inline string DimString() const { return DimString(dims_); }
inline bool is_corrupted() const { return is_corrupted_; } inline bool is_corrupted() const { return is_corrupted_; }
inline void Corrupt() { is_corrupted_ = true; } inline void Corrupt() { is_corrupted_ = true; }
...@@ -156,9 +160,12 @@ class Tensor { ...@@ -156,9 +160,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() == } else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) { TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data(); *data_ptr = mem->mutable_cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
*data_ptr = mem->mutable_cnml_data();
} else { } else {
LOG(FATAL) << "Unknown memory type. " LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU or CUDA is supported."; << "Only CPU, CUDA and CNML are supported.";
} }
} }
} }
...@@ -173,9 +180,12 @@ class Tensor { ...@@ -173,9 +180,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() == } else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) { TypeMeta::Id<CUDAContext>()) {
return mem->cuda_data(); return mem->cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
return mem->cnml_data();
} else { } else {
LOG(FATAL) << "Unknown memory type. " LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU or CUDA are supported."; << "Only CPU, CUDA, and CNML are supported.";
return nullptr; return nullptr;
} }
} }
...@@ -295,4 +305,4 @@ class Tensor { ...@@ -295,4 +305,4 @@ class Tensor {
} // namespace dragon } // namespace dragon
#endif // DRAONG_CORE_TENSOR_H_ #endif // DRAGON_CORE_TENSOR_H_
\ No newline at end of file \ No newline at end of file
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
namespace dragon { namespace dragon {
typedef char int8;
typedef unsigned char uint8;
#ifdef _MSC_VER #ifdef _MSC_VER
typedef struct __declspec(align(2)) { typedef struct __declspec(align(2)) {
...@@ -49,8 +52,8 @@ inline const TypeMeta& TypeStringToMeta( ...@@ -49,8 +52,8 @@ inline const TypeMeta& TypeStringToMeta(
{ "int64", TypeMeta::Make<int64_t>() }, { "int64", TypeMeta::Make<int64_t>() },
{ "float64", TypeMeta::Make<double>() }, { "float64", TypeMeta::Make<double>() },
{ "float16", TypeMeta::Make<float16>() }, { "float16", TypeMeta::Make<float16>() },
{ "uint8", TypeMeta::Make<uint8_t>() }, { "uint8", TypeMeta::Make<uint8>() },
{ "int8", TypeMeta::Make<char>() }, { "int8", TypeMeta::Make<int8>() },
}; };
static TypeMeta unknown_type; static TypeMeta unknown_type;
return s2m_type_map.count(str_type) ? return s2m_type_map.count(str_type) ?
...@@ -66,8 +69,8 @@ inline const std::string TypeMetaToString( ...@@ -66,8 +69,8 @@ inline const std::string TypeMetaToString(
{ TypeMeta::Id<int64_t>(), "int64" }, { TypeMeta::Id<int64_t>(), "int64" },
{ TypeMeta::Id<double>(), "float64", }, { TypeMeta::Id<double>(), "float64", },
{ TypeMeta::Id<float16>(), "float16" }, { TypeMeta::Id<float16>(), "float16" },
{ TypeMeta::Id<uint8_t>(), "uint8" }, { TypeMeta::Id<uint8>(), "uint8" },
{ TypeMeta::Id<char>(), "int8" } { TypeMeta::Id<int8>(), "int8" }
}; };
return m2s_type_map.count(meta.id()) ? return m2s_type_map.count(meta.id()) ?
m2s_type_map[meta.id()] : "unknown"; m2s_type_map[meta.id()] : "unknown";
......
...@@ -47,8 +47,8 @@ class Workspace { ...@@ -47,8 +47,8 @@ class Workspace {
recompute_flag->Reshape({ 1 }); recompute_flag->Reshape({ 1 });
recompute_flag->mutable_data<bool, CPUContext>()[0] = false; recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) { for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "/opt/mirror_stage/buffer_" + string name = "/opt/mirror_stage/buffer_"
dragon_cast<string, int>(i); + std::to_string(i);
Tensor* buffer = CreateTensor(name); Tensor* buffer = CreateTensor(name);
head->mutable_data<string, CPUContext>()[i] = ""; head->mutable_data<string, CPUContext>()[i] = "";
} }
...@@ -277,7 +277,8 @@ class Workspace { ...@@ -277,7 +277,8 @@ class Workspace {
inline bool SetProxy( inline bool SetProxy(
const string& key, const string& key,
const string& proxy) { const string& proxy) {
if (proxy_map_.count(key)) if (key == proxy) return false;
if (proxy_map_.count(key) > 0)
return proxy_map_[key] == proxy; return proxy_map_[key] == proxy;
proxy_map_[key] = proxy; proxy_map_[key] = proxy;
return true; return true;
......
...@@ -23,7 +23,7 @@ class DropoutOp final : public Operator<Context> { ...@@ -23,7 +23,7 @@ class DropoutOp final : public Operator<Context> {
DropoutOp(const OperatorDef& def, Workspace* ws) DropoutOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) { use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5); GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", "")); SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -42,7 +42,7 @@ class DropoutGradientOp final : public Operator<Context> { ...@@ -42,7 +42,7 @@ class DropoutGradientOp final : public Operator<Context> {
DropoutGradientOp(const OperatorDef& def, Workspace* ws) DropoutGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) { use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5); GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", "")); SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -53,7 +53,6 @@ class DropoutGradientOp final : public Operator<Context> { ...@@ -53,7 +53,6 @@ class DropoutGradientOp final : public Operator<Context> {
protected: protected:
DECLARE_ARGUMENT_WITH_DESC(float, prob); DECLARE_ARGUMENT_WITH_DESC(float, prob);
bool use_scale; bool use_scale;
Tensor* mask;
}; };
DEFINE_ARGUMENT_WITH_DESC(float, DropoutOp, prob); DEFINE_ARGUMENT_WITH_DESC(float, DropoutOp, prob);
...@@ -70,7 +69,7 @@ public: ...@@ -70,7 +69,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false), : Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)), use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) { random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5); GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", "")); SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc)); CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
...@@ -101,7 +100,7 @@ public: ...@@ -101,7 +100,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false), : Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)), use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) { random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5); GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", "")); SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc)); CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
......
...@@ -21,7 +21,7 @@ class ReluOp : public Operator<Context> { ...@@ -21,7 +21,7 @@ class ReluOp : public Operator<Context> {
public: public:
ReluOp(const OperatorDef& def, Workspace* ws) ReluOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {} slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
...@@ -36,7 +36,7 @@ class ReluGradientOp : public Operator<Context> { ...@@ -36,7 +36,7 @@ class ReluGradientOp : public Operator<Context> {
public: public:
ReluGradientOp(const OperatorDef& def, Workspace* ws) ReluGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {} slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
......
...@@ -48,8 +48,6 @@ class SoftmaxGradientOp final : public Operator<Context> { ...@@ -48,8 +48,6 @@ class SoftmaxGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNSoftmaxOp final : public Operator<Context> { class CuDNNSoftmaxOp final : public Operator<Context> {
public: public:
...@@ -70,8 +68,7 @@ class CuDNNSoftmaxOp final : public Operator<Context> { ...@@ -70,8 +68,7 @@ class CuDNNSoftmaxOp final : public Operator<Context> {
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
int axis; TIndex axis, outer_dim, inner_dim;
TIndex outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc; cudnnTensorDescriptor_t input_desc, output_desc;
}; };
...@@ -95,8 +92,7 @@ class CuDNNSoftmaxGradientOp final : public Operator<Context> { ...@@ -95,8 +92,7 @@ class CuDNNSoftmaxGradientOp final : public Operator<Context> {
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
int axis; TIndex axis, outer_dim, inner_dim;
TIndex outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc; cudnnTensorDescriptor_t input_desc, output_desc;
}; };
......
...@@ -55,7 +55,7 @@ class AffineGradientOp final : public Operator<Context> { ...@@ -55,7 +55,7 @@ class AffineGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
#include "utils/cudnn_device.h" #if CUDNN_VERSION_MIN(6, 0, 0)
template <class Context> template <class Context>
class CuDNNAffineOpBase : public Operator<Context> { class CuDNNAffineOpBase : public Operator<Context> {
...@@ -152,6 +152,8 @@ protected: ...@@ -152,6 +152,8 @@ protected:
Tensor sum_result; Tensor sum_result;
}; };
#endif
#endif // WITH_CUDNN #endif // WITH_CUDNN
} // namespace dragon } // namespace dragon
......
...@@ -36,11 +36,17 @@ class ClipOp final : public Operator<Context> { ...@@ -36,11 +36,17 @@ class ClipOp final : public Operator<Context> {
template <class Context> template <class Context>
class ClipGradientOp final : public Operator<Context> { class ClipGradientOp final : public Operator<Context> {
public: public:
USE_SIMPLE_CTOR_DTOR(ClipGradientOp); ClipGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
low(OperatorBase::Arg<float>("low", -FLT_MAX)),
high(OperatorBase::Arg<float>("high", FLT_MAX)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected:
float low, high;
}; };
} // namespace dragon } // namespace dragon
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MaximumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MaximumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
\ No newline at end of file
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MinimumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MinimumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
\ No newline at end of file
...@@ -43,8 +43,6 @@ public: ...@@ -43,8 +43,6 @@ public:
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNCTCLossOp final : public Operator<Context> { class CuDNNCTCLossOp final : public Operator<Context> {
public: public:
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// -------------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class NLLLossOp : public Operator<Context> {
public:
NLLLossOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor losses, flags, ignores;
string normalization;
};
template <class Context>
class NLLLossGradientOp : public Operator<Context> {
public:
NLLLossGradientOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor ignores, flags;
string normalization;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
\ No newline at end of file
...@@ -22,7 +22,8 @@ class InitializeOp : public Operator<Context> { ...@@ -22,7 +22,8 @@ class InitializeOp : public Operator<Context> {
public: public:
InitializeOp(const OperatorDef& def, Workspace* ws) InitializeOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
shape_desc(OperatorBase::Arg<string>("shape", "")) { shape_desc(OperatorBase::Arg<string>("shape", "")),
dtype(OperatorBase::Arg<string>("dtype", "float32")) {
GET_ARGUMENTS_WITH_DESC(int, dims); GET_ARGUMENTS_WITH_DESC(int, dims);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -32,19 +33,29 @@ class InitializeOp : public Operator<Context> { ...@@ -32,19 +33,29 @@ class InitializeOp : public Operator<Context> {
protected: protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims); DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc; string shape_desc, dtype;
TensorFiller filler; TensorFiller filler;
}; };
template <class Context> template <class Context>
class FillOp final : public InitializeOp<Context> { class FillOp final : public Operator<Context> {
public: public:
FillOp(const OperatorDef& def, Workspace* ws) FillOp(const OperatorDef& def, Workspace* ws)
: InitializeOp<Context>(def, ws) { : Operator<Context>(def, ws),
this->filler.set_type("constant"); shape_desc(OperatorBase::Arg<string>("shape", "")),
this->filler.set_value(OperatorBase::Arg<float>("value", 0.0)); dtype(OperatorBase::Arg<string>("dtype", "float32")),
value(OperatorBase::Arg<float>("value", 0.0)) {
GET_ARGUMENTS_WITH_DESC(int, dims);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc, dtype;
float value;
}; };
template <class Context> template <class Context>
...@@ -130,6 +141,7 @@ public: ...@@ -130,6 +141,7 @@ public:
}; };
DEFINE_ARGUMENTS_WITH_DESC(int, InitializeOp, dims); DEFINE_ARGUMENTS_WITH_DESC(int, InitializeOp, dims);
DEFINE_ARGUMENTS_WITH_DESC(int, FillOp, dims);
} // namespace } // namespace
......
...@@ -25,7 +25,7 @@ class BatchNormOp final : public Operator<Context> { ...@@ -25,7 +25,7 @@ class BatchNormOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)), momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)), eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)), use_stats(OperatorBase::Arg<int>("use_stats", -1)),
mode(OperatorBase::Arg<string>("mode", "DEFAULT")) { mode(OperatorBase::Arg<string>("mode", "DEFAULT")) {
if (axis != -1) if (axis != -1)
...@@ -81,7 +81,7 @@ class FusedBatchNormOp : public Operator<Context> { ...@@ -81,7 +81,7 @@ class FusedBatchNormOp : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)), momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)), eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {} use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -105,7 +105,7 @@ class FusedBatchNormGradientOp : public Operator<Context> { ...@@ -105,7 +105,7 @@ class FusedBatchNormGradientOp : public Operator<Context> {
FusedBatchNormGradientOp(const OperatorDef& def, Workspace* ws) FusedBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)), eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {} use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -127,14 +127,13 @@ class FusedBatchNormGradientOp : public Operator<Context> { ...@@ -127,14 +127,13 @@ class FusedBatchNormGradientOp : public Operator<Context> {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> { class CuDNNBatchNormOp final
: public FusedBatchNormOp<Context> {
public: public:
CuDNNBatchNormOp(const OperatorDef& def, Workspace* ws) CuDNNBatchNormOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormOp<Context>(def, ws), : FusedBatchNormOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) { eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
...@@ -167,11 +166,12 @@ class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> { ...@@ -167,11 +166,12 @@ class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> {
}; };
template <class Context> template <class Context>
class CuDNNBatchNormGradientOp final : public FusedBatchNormGradientOp<Context> { class CuDNNBatchNormGradientOp final
: public FusedBatchNormGradientOp<Context> {
public: public:
CuDNNBatchNormGradientOp(const OperatorDef& def, Workspace* ws) CuDNNBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormGradientOp<Context>(def, ws), : FusedBatchNormGradientOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) { eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc)); CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
......
...@@ -23,7 +23,7 @@ class BatchRenormOp final : public Operator<Context> { ...@@ -23,7 +23,7 @@ class BatchRenormOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)), momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)), eps(OperatorBase::Arg<float>("eps", 1e-5f)),
r_max(OperatorBase::Arg<float>("r_max", 3.f)), r_max(OperatorBase::Arg<float>("r_max", 3.f)),
d_max(OperatorBase::Arg<float>("d_max", 5.f)), d_max(OperatorBase::Arg<float>("d_max", 5.f)),
t_delta(OperatorBase::Arg<float>("t_delta", 1.f)), t_delta(OperatorBase::Arg<float>("t_delta", 1.f)),
......
...@@ -23,7 +23,7 @@ class GroupNormOp final : public Operator<Context> { ...@@ -23,7 +23,7 @@ class GroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)), group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) { eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1) if (axis != -1)
CHECK_EQ(axis, 1) CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1."; << "\nThe axis can only be set to 1.";
...@@ -73,7 +73,7 @@ class FusedGroupNormOp final : public Operator<Context> { ...@@ -73,7 +73,7 @@ class FusedGroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)), group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {} eps(OperatorBase::Arg<float>("eps", 1e-5f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void Setup(); void Setup();
......
...@@ -22,9 +22,10 @@ class InstanceNormOp final : public Operator<Context> { ...@@ -22,9 +22,10 @@ class InstanceNormOp final : public Operator<Context> {
InstanceNormOp(const OperatorDef& def, Workspace* ws) InstanceNormOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)), axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) { eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1) if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1."; CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -47,7 +48,8 @@ class InstanceNormGradientOp final : public Operator<Context> { ...@@ -47,7 +48,8 @@ class InstanceNormGradientOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)) { axis(OperatorBase::Arg<int>("axis", -1)) {
if (axis != -1) if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1."; CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
......
...@@ -23,7 +23,7 @@ class L2NormOp final : public Operator<Context> { ...@@ -23,7 +23,7 @@ class L2NormOp final : public Operator<Context> {
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 0)), axis(OperatorBase::Arg<int>("axis", 0)),
num_axes(OperatorBase::Arg<int>("num_axes", -1)), num_axes(OperatorBase::Arg<int>("num_axes", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)), eps(OperatorBase::Arg<float>("eps", 1e-5f)),
mode(OperatorBase::Arg<string>("mode", "SUM")) {} mode(OperatorBase::Arg<string>("mode", "SUM")) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
......
...@@ -20,8 +20,6 @@ namespace dragon { ...@@ -20,8 +20,6 @@ namespace dragon {
#if CUDNN_VERSION_MIN(5, 0, 0) #if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
class cudnnTensorDescriptors { class cudnnTensorDescriptors {
public: public:
cudnnTensorDescriptors(const int num_descs) { cudnnTensorDescriptors(const int num_descs) {
......
...@@ -21,7 +21,8 @@ class BiasAddOp final : public Operator<Context> { ...@@ -21,7 +21,8 @@ class BiasAddOp final : public Operator<Context> {
public: public:
BiasAddOp(const OperatorDef& def, Workspace* ws) BiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {} data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
...@@ -37,7 +38,8 @@ class BiasAddGradientOp final : public Operator<Context> { ...@@ -37,7 +38,8 @@ class BiasAddGradientOp final : public Operator<Context> {
public: public:
BiasAddGradientOp(const OperatorDef& def, Workspace* ws) BiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {} data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
...@@ -48,6 +50,62 @@ class BiasAddGradientOp final : public Operator<Context> { ...@@ -48,6 +50,62 @@ class BiasAddGradientOp final : public Operator<Context> {
string data_format; string data_format;
}; };
#ifdef WITH_CUDNN
template <class Context>
class CuDNNBiasAddOp final : public Operator<Context> {
public:
CuDNNBiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t bias_desc, output_desc;
};
template <class Context>
class CuDNNBiasAddGradientOp final : public Operator<Context> {
public:
CuDNNBiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddGradientOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t input_desc, bias_desc;
};
#endif // WITH_CUDNN
} // namespace dragon } // namespace dragon
#endif // DRAGON_OPERATORS_VISION_BIAS_ADD_OP_H_ #endif // DRAGON_OPERATORS_VISION_BIAS_ADD_OP_H_
\ No newline at end of file
...@@ -50,8 +50,6 @@ class Conv2dGradientOp : public Conv2dOp<Context> { ...@@ -50,8 +50,6 @@ class Conv2dGradientOp : public Conv2dOp<Context> {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNConv2dOp final : public Conv2dOp<Context> { class CuDNNConv2dOp final : public Conv2dOp<Context> {
public: public:
...@@ -97,7 +95,7 @@ class CuDNNConv2dOp final : public Conv2dOp<Context> { ...@@ -97,7 +95,7 @@ class CuDNNConv2dOp final : public Conv2dOp<Context> {
cudnnConvolutionDescriptor_t conv_desc; cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc; cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size; size_t fwd_data_size;
TIndex bias_offset, cudnn_group; TIndex cudnn_group;
vector<TIndex> input_dims; vector<TIndex> input_dims;
bool enable_tensor_core; bool enable_tensor_core;
}; };
...@@ -148,7 +146,7 @@ class CuDNNConv2dGradientOp final : public Conv2dGradientOp<Context> { ...@@ -148,7 +146,7 @@ class CuDNNConv2dGradientOp final : public Conv2dGradientOp<Context> {
cudnnConvolutionDescriptor_t conv_desc; cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc; cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size; size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group; TIndex cudnn_group;
vector<TIndex> input_dims; vector<TIndex> input_dims;
bool enable_tensor_core; bool enable_tensor_core;
}; };
......
...@@ -84,6 +84,7 @@ class ConvOpBase : public Operator<Context> { ...@@ -84,6 +84,7 @@ class ConvOpBase : public Operator<Context> {
ctx()); ctx());
} else LOG(FATAL) << "ConvNd has not been implemented yet"; } else LOG(FATAL) << "ConvNd has not been implemented yet";
} }
template <typename T> void Col2Im(const T* col, T* im) { template <typename T> void Col2Im(const T* col, T* im) {
if (Input(0).ndim() == 4) { if (Input(0).ndim() == 4) {
kernel::Col2Im2d<T, Context>(conv_in_channels, kernel::Col2Im2d<T, Context>(conv_in_channels,
......
...@@ -54,8 +54,6 @@ class Conv2dTransposeGradientOp : public Conv2dTransposeOp<Context> { ...@@ -54,8 +54,6 @@ class Conv2dTransposeGradientOp : public Conv2dTransposeOp<Context> {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> { class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
public: public:
...@@ -100,7 +98,7 @@ class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> { ...@@ -100,7 +98,7 @@ class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
cudnnConvolutionDescriptor_t conv_desc; cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc; cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size; size_t fwd_data_size;
TIndex bias_offset, cudnn_group; TIndex cudnn_group;
vector<TIndex> input_dims; vector<TIndex> input_dims;
bool enable_tensor_core; bool enable_tensor_core;
}; };
...@@ -150,7 +148,7 @@ public: ...@@ -150,7 +148,7 @@ public:
cudnnConvolutionDescriptor_t conv_desc; cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc; cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size; size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group; TIndex cudnn_group;
vector<TIndex> input_dims; vector<TIndex> input_dims;
bool enable_tensor_core; bool enable_tensor_core;
}; };
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#define DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#include "core/operator.h"
#include "utils/math_functions.h"
namespace dragon {
template <class Context>
class DropBlock2dOp final : public Operator<Context> {
public:
DropBlock2dOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
block_size(OperatorBase::Arg<int>("block_size", 7)),
alpha(OperatorBase::Arg<float>("alpha", 1.f)),
decrement(OperatorBase::Arg<float>("decrement", 0.f)),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {
GET_ARGUMENT_WITH_DESC(float, keep_prob, 0.9f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENT_WITH_DESC(float, keep_prob);
TIndex block_size, seed_h, seed_w;
TIndex n, c, h, w;
float alpha, decrement, apply_prob = 1., gamma;
string data_format;
vector<TIndex> seed_dims;
};
template <class Context>
class DropBlock2dGradientOp final : public Operator<Context> {
public:
DropBlock2dGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
};
DEFINE_ARGUMENT_WITH_DESC(float, DropBlock2dOp, keep_prob);
} // namespace dragon
#endif // DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
\ No newline at end of file
...@@ -16,7 +16,10 @@ ...@@ -16,7 +16,10 @@
namespace dragon { namespace dragon {
enum LRNMode { ACROSS_CHANNELS, WITHIN_CHANNEL }; typedef enum {
ACROSS_CHANNELS,
WITHIN_CHANNEL,
} LRNMode;
template <class Context> template <class Context>
class LRNOp : public Operator<Context> { class LRNOp : public Operator<Context> {
...@@ -82,8 +85,6 @@ class LRNGradientOp : public Operator<Context> { ...@@ -82,8 +85,6 @@ class LRNGradientOp : public Operator<Context> {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context> template <class Context>
class CuDNNLRNOp final : public LRNOp<Context> { class CuDNNLRNOp final : public LRNOp<Context> {
public: public:
......
...@@ -73,7 +73,7 @@ inline void LoadCaffeModel( ...@@ -73,7 +73,7 @@ inline void LoadCaffeModel(
const string& layer_name = layer.name(); const string& layer_name = layer.name();
string prefix = layer_name + "/param:"; string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) { for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j); string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name)) if (!ws->HasTensor(tensor_name))
LOG(WARNING) << "Tensor(" << tensor_name << ") " LOG(WARNING) << "Tensor(" << tensor_name << ") "
<< "does not exist in any Graphs, skip."; << "does not exist in any Graphs, skip.";
...@@ -114,7 +114,7 @@ inline void SavaCaffeModel( ...@@ -114,7 +114,7 @@ inline void SavaCaffeModel(
int layer_idx = -1; int layer_idx = -1;
for (int i = 0; i < tensors.size(); i++) { for (int i = 0; i < tensors.size(); i++) {
if (tensors[i]->count() <= 0) continue; if (tensors[i]->count() <= 0) continue;
vector<string> splits = SplitString( vector<string> splits = str::split(
tensors[i]->name(), "/param:"); tensors[i]->name(), "/param:");
if (layer_hash.count(splits[0]) == 0) { if (layer_hash.count(splits[0]) == 0) {
layer_hash[splits[0]] = ++layer_idx; layer_hash[splits[0]] = ++layer_idx;
......
...@@ -28,6 +28,10 @@ template<> inline int dragon_cast<int, float>(float val) { ...@@ -28,6 +28,10 @@ template<> inline int dragon_cast<int, float>(float val) {
return static_cast<int>(val); return static_cast<int>(val);
} }
template<> inline int64_t dragon_cast<int64_t, float>(float val) {
return static_cast<int64_t>(val);
}
template<> inline float dragon_cast<float, float>(float val) { template<> inline float dragon_cast<float, float>(float val) {
return val; return val;
} }
...@@ -127,7 +131,7 @@ template<> inline float32 dragon_cast<float32, float>(float val) { ...@@ -127,7 +131,7 @@ template<> inline float32 dragon_cast<float32, float>(float val) {
return dragon_cast<float32, float16>(t); return dragon_cast<float32, float16>(t);
} }
#ifdef WITH_CUDA_FP16 #ifdef WITH_CUDA
template<> inline half dragon_cast<half, float>(float val) { template<> inline half dragon_cast<half, float>(float val) {
#if CUDA_VERSION_MIN(9, 0, 0) #if CUDA_VERSION_MIN(9, 0, 0)
...@@ -165,7 +169,7 @@ template<> inline half2 dragon_cast<half2, float16>(float16 val) { ...@@ -165,7 +169,7 @@ template<> inline half2 dragon_cast<half2, float16>(float16 val) {
} }
#endif // WITH_CUDA_FP16 #endif // WITH_CUDA
} // namespace dragon } // namespace dragon
......
...@@ -101,16 +101,10 @@ inline int CUDA_NUM_DEVICES() { ...@@ -101,16 +101,10 @@ inline int CUDA_NUM_DEVICES() {
return count; return count;
} }
inline int CUDA_DEVICE() { inline int CUDA_GET_DEVICE() {
int gpu_id; int device_id;
cudaGetDevice(&gpu_id); cudaGetDevice(&device_id);
return gpu_id; return device_id;
}
inline int CUDA_DEVICE(const void* ptr) {
cudaPointerAttributes attr;
CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr));
return attr.device;
} }
struct CUDADeviceProps { struct CUDADeviceProps {
...@@ -132,7 +126,7 @@ inline const cudaDeviceProp& GetDeviceProperty( ...@@ -132,7 +126,7 @@ inline const cudaDeviceProp& GetDeviceProperty(
} }
inline bool CUDA_TRUE_FP16_AVAILABLE() { inline bool CUDA_TRUE_FP16_AVAILABLE() {
int device = CUDA_DEVICE(); int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device); auto& prop = GetDeviceProperty(device);
return prop.major >= 6; return prop.major >= 6;
} }
...@@ -141,7 +135,7 @@ inline bool TENSOR_CORE_AVAILABLE() { ...@@ -141,7 +135,7 @@ inline bool TENSOR_CORE_AVAILABLE() {
#if CUDA_VERSION < 9000 #if CUDA_VERSION < 9000
return false; return false;
#else #else
int device = CUDA_DEVICE(); int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device); auto& prop = GetDeviceProperty(device);
return prop.major >= 7; return prop.major >= 7;
#endif #endif
...@@ -149,23 +143,16 @@ inline bool TENSOR_CORE_AVAILABLE() { ...@@ -149,23 +143,16 @@ inline bool TENSOR_CORE_AVAILABLE() {
class DeviceGuard { class DeviceGuard {
public: public:
DeviceGuard(int newDevice) DeviceGuard(int new_id) : prev_id(CUDA_GET_DEVICE()) {
: previous_(CUDA_DEVICE()) { if (prev_id != new_id) CUDA_CHECK(cudaSetDevice(new_id));
if (previous_ != newDevice)
CUDA_CHECK(cudaSetDevice(newDevice));
} }
~DeviceGuard() { ~DeviceGuard() { CUDA_CHECK(cudaSetDevice(prev_id)); }
CUDA_CHECK(cudaSetDevice(previous_));
}
private: private:
int previous_; int prev_id;
}; };
#define CUDA_FP16_NOT_COMPILED \
LOG(FATAL) << "CUDA-FP16 was not compiled."
#else #else
#define CUDA_NOT_COMPILED \ #define CUDA_NOT_COMPILED \
......
...@@ -55,7 +55,6 @@ template<> class CUDNNType<double> { ...@@ -55,7 +55,6 @@ template<> class CUDNNType<double> {
typedef double BNParamType; typedef double BNParamType;
}; };
#ifdef WITH_CUDA_FP16
template<> class CUDNNType<float16> { template<> class CUDNNType<float16> {
public: public:
static const cudnnDataType_t type = CUDNN_DATA_HALF; static const cudnnDataType_t type = CUDNN_DATA_HALF;
...@@ -63,37 +62,63 @@ template<> class CUDNNType<float16> { ...@@ -63,37 +62,63 @@ template<> class CUDNNType<float16> {
static const void *one, *zero; static const void *one, *zero;
typedef float BNParamType; typedef float BNParamType;
}; };
#endif
template <typename T> template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor); void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
Tensor* tensor);
template <typename T> template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor); void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T> template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor); void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T> template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor); void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T> template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const std::vector<int64_t>& dims); void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims);
template <typename T> template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims); void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T> template <typename T>
void cudnnSetTensor4dDescWithGroup(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims, const int64_t group); void cudnnSetTensor4dDescWithGroup(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims,
const int64_t group);
template <typename T> template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims); void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T> template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims); void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T> template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
const std::vector<int64_t>& strides); const std::vector<int64_t>& strides);
......
...@@ -69,7 +69,7 @@ template <typename T, class Context> ...@@ -69,7 +69,7 @@ template <typename T, class Context>
void RandomBernoulli( void RandomBernoulli(
const int n, const int n,
const float p, const float p,
uint32_t* x, T* x,
Context* ctx); Context* ctx);
/******************** Level-1 ********************/ /******************** Level-1 ********************/
......
...@@ -25,21 +25,21 @@ typedef int64_t TIndex; ...@@ -25,21 +25,21 @@ typedef int64_t TIndex;
template <typename T, class Context> template <typename T, class Context>
void Dropout( void Dropout(
const int count, const int count,
T prob, float prob,
T scale, float scale,
const T* x, const T* x,
uint32_t* mask, uint32_t* mask32,
uint8_t* mask8,
T* y, T* y,
Context* ctx); Context* ctx);
template <typename T, class Context> template <typename Tx, typename Tm, class Context>
void DropoutGrad( void ApplyMask(
const int count, const int count,
T prob, const float scale,
T scale, const Tx* x,
const T* dy, const Tm* mask,
const uint32_t* mask, Tx* y,
T* dx,
Context* ctx); Context* ctx);
/******************** activation.elu ********************/ /******************** activation.elu ********************/
...@@ -234,10 +234,95 @@ void Clip( ...@@ -234,10 +234,95 @@ void Clip(
const float low, const float low,
const float high, const float high,
const T* x, const T* x,
T* mask,
T* y, T* y,
Context* ctx); Context* ctx);
template <typename T, class Context>
void ClipGrad(
const int count,
const float low,
const float high,
const T* x,
const T* dy,
T* dx,
Context* ctx);
/******************** arithmetic.maximum ********************/
template <typename T, class Context>
void MaximumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MaximumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** arithmetic.minimum ********************/
template <typename T, class Context>
void MinimumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MinimumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** control_flow.compare ********************/ /******************** control_flow.compare ********************/
template <typename T, class Context> template <typename T, class Context>
...@@ -257,6 +342,34 @@ void AbsGrad( ...@@ -257,6 +342,34 @@ void AbsGrad(
T* dx, T* dx,
Context* ctx); Context* ctx);
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty, class Context>
void NLLLoss(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
Context* ctx);
template <typename Tx, typename Ty, class Context>
void NLLLossGrad(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
float* flags,
Context* ctx);
/******************** loss.sigmoid_cross_entropy ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <typename T, class Context> template <typename T, class Context>
...@@ -902,6 +1015,23 @@ void Col2Im2d( ...@@ -902,6 +1015,23 @@ void Col2Im2d(
T* im, T* im,
Context* ctx); Context* ctx);
/******************** vision.drop_block ********************/
template <class Context>
void DropBlock2d(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
Context* ctx);
/******************** vision.nn_resize ********************/ /******************** vision.nn_resize ********************/
template <typename T, class Context> template <typename T, class Context>
......
...@@ -111,7 +111,7 @@ void Axpby( ...@@ -111,7 +111,7 @@ void Axpby(
const T beta, const T beta,
T* y); T* y);
} // namespace ssd } // namespace sse
} // namespace dragon } // namespace dragon
......
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include "utils/cast.h"
namespace dragon { namespace dragon {
inline std::vector<std::string> SplitString( namespace str {
inline std::vector<std::string> split(
const std::string& str, const std::string& str,
const std::string& c) { const std::string& c) {
std::vector<std::string> ret; std::vector<std::string> ret;
...@@ -36,17 +36,7 @@ inline std::vector<std::string> SplitString( ...@@ -36,17 +36,7 @@ inline std::vector<std::string> SplitString(
return ret; return ret;
} }
#define DEFINE_NUMBER2STRING(T) \ } // namespace str
template<> inline std::string dragon_cast<std::string, T>(T val) { \
std::stringstream ss; ss << val; return ss.str(); \
}
DEFINE_NUMBER2STRING(int);
DEFINE_NUMBER2STRING(unsigned long long);
template<> inline int dragon_cast<int, std::string>(std::string val) {
return atoi(val.c_str());
}
} // namespace dragon } // namespace dragon
......
...@@ -2,6 +2,7 @@ message(STATUS "Found CXX Module: ${CMAKE_CURRENT_LIST_DIR}") ...@@ -2,6 +2,7 @@ message(STATUS "Found CXX Module: ${CMAKE_CURRENT_LIST_DIR}")
FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc) FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc) FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
LIST(REMOVE_ITEM SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/operators/misc/python_op.cc)
# ---[ Target # ---[ Target
if (WITH_CUDA) if (WITH_CUDA)
...@@ -36,7 +37,9 @@ if(WIN32) ...@@ -36,7 +37,9 @@ if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME}_cxx shlwapi.lib) TARGET_LINK_LIBRARIES(${PROJECT_NAME}_cxx shlwapi.lib)
endif() endif()
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon_cxx) SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon)
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES DEFINE_SYMBOL DRAGON_CXX_EXPORTS)
# ---[ Install # ---[ Install
INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../lib) INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../api/lib)
\ No newline at end of file FILE(INSTALL dragon.h DESTINATION ${PROJECT_BINARY_DIR}/../api/include)
\ No newline at end of file
#include "dragon.h" #include "dragon.h"
#include "core/common.h" #include "utils/logging.h"
namespace dragon { namespace dragon {
......
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
namespace dragon { namespace dragon {
/* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
Map<string, unique_ptr < Workspace > > g_workspaces; Map<string, unique_ptr < Workspace > > g_workspaces;
Map<string, vector<string> > sub_workspaces; Map<string, vector<string> > sub_workspaces;
std::mutex g_mutex; std::mutex g_mutex;
...@@ -29,7 +35,8 @@ Workspace* CreateWorkspace(const std::string& name){ ...@@ -29,7 +35,8 @@ Workspace* CreateWorkspace(const std::string& name){
Workspace* ResetWorkspace(const std::string& name) { Workspace* ResetWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex); std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name)) CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be reset."; << "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be reset.";
LOG(INFO) << "Reset the Workspace(" << name << ")."; LOG(INFO) << "Reset the Workspace(" << name << ").";
g_workspaces[name].reset(new Workspace(name)); g_workspaces[name].reset(new Workspace(name));
for (auto& sub_workspace : sub_workspaces[name]) { for (auto& sub_workspace : sub_workspaces[name]) {
...@@ -43,7 +50,8 @@ Workspace* ResetWorkspace(const std::string& name) { ...@@ -43,7 +50,8 @@ Workspace* ResetWorkspace(const std::string& name) {
void ReleaseWorkspace(const std::string& name) { void ReleaseWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex); std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name)) CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be released."; << "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be released.";
LOG(INFO) << "Release the Workspace(" << name << ")."; LOG(INFO) << "Release the Workspace(" << name << ").";
g_workspaces[name].reset(); g_workspaces[name].reset();
g_workspaces.erase(name); g_workspaces.erase(name);
...@@ -61,6 +69,12 @@ void MoveWorkspace( ...@@ -61,6 +69,12 @@ void MoveWorkspace(
<< "into the Workspace(" << target_ws->name() << ")."; << "into the Workspace(" << target_ws->name() << ").";
} }
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
std::string CreateGraph( std::string CreateGraph(
const std::string& graph_file, const std::string& graph_file,
Workspace* ws) { Workspace* ws) {
...@@ -102,6 +116,19 @@ std::string CreateGraph( ...@@ -102,6 +116,19 @@ std::string CreateGraph(
return meta_graph.name(); return meta_graph.name();
} }
void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id) {
ws->RunGraph(graph_name, "", "", stream_id);
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
void CreateTensor( void CreateTensor(
const std::string& name, const std::string& name,
Workspace* ws) { Workspace* ws) {
...@@ -109,6 +136,32 @@ void CreateTensor( ...@@ -109,6 +136,32 @@ void CreateTensor(
} }
template <typename T> template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
template <typename T>
void FeedTensor( void FeedTensor(
const std::string& name, const std::string& name,
const vector<TIndex>& shape, const vector<TIndex>& shape,
...@@ -135,6 +188,12 @@ void FeedTensor( ...@@ -135,6 +188,12 @@ void FeedTensor(
} }
} }
/* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
void TransplantCaffeModel( void TransplantCaffeModel(
const std::string& input_model, const std::string& input_model,
const std::string& output_model) { const std::string& output_model) {
...@@ -146,7 +205,7 @@ void TransplantCaffeModel( ...@@ -146,7 +205,7 @@ void TransplantCaffeModel(
const string& layer_name = layer.name(); const string& layer_name = layer.name();
string prefix = layer_name + "/param:"; string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) { for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j); string tensor_name = prefix + std::to_string(j);
BlobProto blob = layer.blobs(j); BlobProto blob = layer.blobs(j);
TensorProto* proto = protos.add_protos(); TensorProto* proto = protos.add_protos();
proto->set_data_type(TensorProto_DataType_FLOAT); proto->set_data_type(TensorProto_DataType_FLOAT);
...@@ -218,7 +277,7 @@ void LoadCaffemodel( ...@@ -218,7 +277,7 @@ void LoadCaffemodel(
const string& layer_name = layer.name(); const string& layer_name = layer.name();
string prefix = scope + layer_name + "/param:"; string prefix = scope + layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++){ for (int j = 0; j < layer.blobs_size(); j++){
string tensor_name = prefix + dragon_cast<string, int>(j); string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name)) if (!ws->HasTensor(tensor_name))
ws->CreateTensor(tensor_name); ws->CreateTensor(tensor_name);
BlobProto blob = layer.blobs(j); BlobProto blob = layer.blobs(j);
...@@ -248,63 +307,54 @@ void LoadCaffemodel( ...@@ -248,63 +307,54 @@ void LoadCaffemodel(
} }
} }
void RunGraph( /* * * * * * * * * * * * * * * * * * * * *
const std::string& graph_name, * *
Workspace* ws, * Config *
const int stream_id) { * *
ws->RunGraph(graph_name, "", "", stream_id); * * * * * * * * * * * * * * * * * * * * */
}
template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
void SetLogLevel(const std::string& level) { void SetLogLevel(const std::string& level) {
SetLogDestination(StrToLogSeverity(level)); SetLogDestination(StrToLogSeverity(level));
} }
template float* FetchTensor<float>( /* * * * * * * * * * * * * * * * * * * * *
* *
* Template *
* *
* * * * * * * * * * * * * * * * * * * * */
template DRAGON_API float* FetchTensor<float>(
const std::string&, const std::string&,
std::vector<TIndex>&, std::vector<TIndex>&,
Workspace*); Workspace*);
template void FeedTensor<float>( template DRAGON_API float16* FetchTensor<float16>(
const std::string&,
std::vector<TIndex>&,
Workspace*);
template DRAGON_API void FeedTensor<float>(
const std::string&, const std::string&,
const std::vector<TIndex>&, const std::vector<TIndex>&,
const float*, const float*,
const Device&, const Device&,
Workspace*); Workspace*);
template void FeedTensor<int>( template DRAGON_API void FeedTensor<float16>(
const std::string&,
const std::vector<TIndex>&,
const float16*,
const Device&,
Workspace*);
template DRAGON_API void FeedTensor<int>(
const std::string&, const std::string&,
const std::vector<TIndex>&, const std::vector<TIndex>&,
const int*, const int*,
const Device&, const Device&,
Workspace*); Workspace*);
template void FeedTensor<uint8_t>( template DRAGON_API void FeedTensor<uint8_t>(
const std::string&, const std::string&,
const std::vector<TIndex>&, const std::vector<TIndex>&,
const uint8_t*, const uint8_t*,
......
...@@ -16,10 +16,28 @@ ...@@ -16,10 +16,28 @@
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
#ifdef WIN32 #ifdef _MSC_VER
#define EXPORT __declspec(dllexport) #ifdef DRAGON_CXX_EXPORTS
#define DRAGON_API __declspec(dllexport)
#else
#define DRAGON_API __declspec(dllimport)
#endif
#else #else
#define EXPORT #define DRAGON_API
#endif
/* * * * * * * * * * * * * * * * * * * * *
* *
* Internal Headers *
* *
* * * * * * * * * * * * * * * * * * * * */
#ifdef DRAGON_CXX_EXPORTS
#include "core/types.h"
#else
namespace dragon {
struct float16;
}
#endif #endif
namespace dragon { namespace dragon {
...@@ -28,72 +46,102 @@ typedef int64_t TIndex; ...@@ -28,72 +46,102 @@ typedef int64_t TIndex;
class Workspace; class Workspace;
class Device { class DRAGON_API Device {
public: public:
EXPORT Device(); Device();
EXPORT explicit Device(std::string device_type); explicit Device(std::string device_type);
EXPORT Device(std::string device_type, int device_id); Device(std::string device_type, int device_id);
EXPORT const int& device_type() const { return device_type_; } const int& device_type() const { return device_type_; }
EXPORT const int device_id() const { return device_id_; } const int device_id() const { return device_id_; }
private: private:
int device_type_, device_id_; int device_type_, device_id_;
}; };
EXPORT Workspace* CreateWorkspace(const std::string& name); /* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
EXPORT Workspace* ResetWorkspace(const std::string& name); DRAGON_API Workspace* CreateWorkspace(const std::string& name);
EXPORT void ReleaseWorkspace(const std::string& name); DRAGON_API Workspace* ResetWorkspace(const std::string& name);
EXPORT void MoveWorkspace(Workspace* main, Workspace* sub); DRAGON_API void ReleaseWorkspace(const std::string& name);
EXPORT std::string CreateGraph( DRAGON_API void MoveWorkspace(Workspace* main, Workspace* sub);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API std::string CreateGraph(
const std::string& graph_file, const std::string& graph_file,
Workspace* ws); Workspace* ws);
EXPORT std::string CreateGraph( DRAGON_API std::string CreateGraph(
const std::string& graph_file, const std::string& graph_file,
const Device& device, const Device& device,
Workspace* ws); Workspace* ws);
EXPORT void RunGraph( DRAGON_API void RunGraph(
const std::string& graph_name, const std::string& graph_name,
Workspace* ws, Workspace* ws,
const int stream_id = 1); const int stream_id = 1);
EXPORT void CreateTensor( /* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void CreateTensor(
const std::string& name, const std::string& name,
Workspace* ws); Workspace* ws);
template <typename T> template <typename T>
EXPORT void FeedTensor( DRAGON_API T* FetchTensor(
const std::string& name, const std::string& name,
const std::vector<TIndex>& shape, std::vector<TIndex>& shape,
const T* data,
const Device& device,
Workspace* ws); Workspace* ws);
template <typename T> template <typename T>
EXPORT T* FetchTensor( DRAGON_API void FeedTensor(
const std::string& name, const std::string& name,
std::vector<TIndex>& shape, const std::vector<TIndex>& shape,
const T* data,
const Device& device,
Workspace* ws); Workspace* ws);
EXPORT void LoadCaffemodel( /* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void LoadCaffemodel(
const std::string& model_file, const std::string& model_file,
Workspace* ws); Workspace* ws);
EXPORT void TransplantCaffeModel( DRAGON_API void TransplantCaffeModel(
const std::string& input_model, const std::string& input_model,
const std::string& output_model); const std::string& output_model);
EXPORT void LoadDragonmodel( DRAGON_API void LoadDragonmodel(
const std::string& model_file, const std::string& model_file,
Workspace* ws); Workspace* ws);
EXPORT void SetLogLevel(const std::string& level); /* * * * * * * * * * * * * * * * * * * * *
* *
* Config *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void SetLogLevel(const std::string& level);
} // namespace dragon } // namespace dragon
......
...@@ -19,7 +19,8 @@ Workspace* ws() { return g_workspace; } ...@@ -19,7 +19,8 @@ Workspace* ws() { return g_workspace; }
TypeId CTypeToFetcher(TypeId type) { TypeId CTypeToFetcher(TypeId type) {
static Map<TypeId,TypeId> c_type_map { static Map<TypeId,TypeId> c_type_map {
{ TypeMeta::Id<uint8_t>(), TypeMeta::Id<NumpyFetcher>() }, { TypeMeta::Id<int8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<uint8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int>(), TypeMeta::Id<NumpyFetcher>() }, { TypeMeta::Id<int>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int64_t>(), TypeMeta::Id<NumpyFetcher>() }, { TypeMeta::Id<int64_t>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<float>(), TypeMeta::Id<NumpyFetcher>() }, { TypeMeta::Id<float>(), TypeMeta::Id<NumpyFetcher>() },
...@@ -197,6 +198,11 @@ inline PyObject* FeedTensorCC(PyObject* self, PyObject* args) { ...@@ -197,6 +198,11 @@ inline PyObject* FeedTensorCC(PyObject* self, PyObject* args) {
} }
} }
inline PyObject* OnModuleExitCC(PyObject* self, PyObject* args) {
g_workspaces.clear();
Py_RETURN_TRUE;
}
#define PYFUNC(name) {#name, name, METH_VARARGS, ""} #define PYFUNC(name) {#name, name, METH_VARARGS, ""}
#define PYENDFUNC {nullptr, nullptr, 0, nullptr} #define PYENDFUNC {nullptr, nullptr, 0, nullptr}
...@@ -255,6 +261,7 @@ PyMethodDef* GetAllMethods() { ...@@ -255,6 +261,7 @@ PyMethodDef* GetAllMethods() {
PYFUNC(SnapshotCC), PYFUNC(SnapshotCC),
/**** Config ****/ /**** Config ****/
PYFUNC(SetLogLevelCC), PYFUNC(SetLogLevelCC),
PYFUNC(OnModuleExitCC),
PYENDFUNC, PYENDFUNC,
}; };
return g_python_methods; return g_python_methods;
...@@ -272,9 +279,11 @@ void common_init() { ...@@ -272,9 +279,11 @@ void common_init() {
} }
#ifdef WITH_PYTHON3 #ifdef WITH_PYTHON3
static struct PyModuleDef libdragon = { PyModuleDef_HEAD_INIT, static struct PyModuleDef libdragon = {
PyModuleDef_HEAD_INIT,
"libdragon", "", -1, "libdragon", "", -1,
GetAllMethods() }; GetAllMethods()
};
PyMODINIT_FUNC PyInit_libdragon(void) { PyMODINIT_FUNC PyInit_libdragon(void) {
PyObject* module = PyModule_Create(&libdragon); PyObject* module = PyModule_Create(&libdragon);
...@@ -285,7 +294,8 @@ PyMODINIT_FUNC PyInit_libdragon(void) { ...@@ -285,7 +294,8 @@ PyMODINIT_FUNC PyInit_libdragon(void) {
#else // WITH_PYTHON2 #else // WITH_PYTHON2
PyMODINIT_FUNC initlibdragon(void) { PyMODINIT_FUNC initlibdragon(void) {
PyObject* moudle = Py_InitModule("libdragon", GetAllMethods()); PyObject* moudle = Py_InitModule(
"libdragon", GetAllMethods());
if (moudle == nullptr) return; if (moudle == nullptr) return;
common_init(); common_init();
} }
......
...@@ -31,7 +31,8 @@ class TensorFetcherBase { ...@@ -31,7 +31,8 @@ class TensorFetcherBase {
class TensorFeederBase { class TensorFeederBase {
public: public:
virtual ~TensorFeederBase() {} virtual ~TensorFeederBase() {}
virtual PyObject* Feed(const DeviceOption& option, virtual PyObject* Feed(
const DeviceOption& option,
PyArrayObject* array, PyArrayObject* array,
Tensor* tensor) = 0; Tensor* tensor) = 0;
}; };
...@@ -61,7 +62,7 @@ class NumpyFetcher : public TensorFetcherBase { ...@@ -61,7 +62,7 @@ class NumpyFetcher : public TensorFetcherBase {
PyErr_SetString(PyExc_RuntimeError, s.c_str()); PyErr_SetString(PyExc_RuntimeError, s.c_str());
return nullptr; return nullptr;
} }
// create a empty array with r shape // create a empty array with the same shape
PyObject* array = PyArray_SimpleNew( PyObject* array = PyArray_SimpleNew(
tensor.ndim(), npy_dims.data(), npy_type); tensor.ndim(), npy_dims.data(), npy_type);
// copy the tensor data to the numpy array // copy the tensor data to the numpy array
...@@ -88,7 +89,8 @@ class StringFetcher : public TensorFetcherBase { ...@@ -88,7 +89,8 @@ class StringFetcher : public TensorFetcherBase {
class NumpyFeeder : public TensorFeederBase { class NumpyFeeder : public TensorFeederBase {
public: public:
PyObject* Feed(const DeviceOption& option, PyObject* Feed(
const DeviceOption& option,
PyArrayObject* original_array, PyArrayObject* original_array,
Tensor* tensor) override { Tensor* tensor) override {
PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array); PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
...@@ -100,7 +102,6 @@ class NumpyFeeder : public TensorFeederBase { ...@@ -100,7 +102,6 @@ class NumpyFeeder : public TensorFeederBase {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0) if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Feed Tensor(" << tensor->name() << ")" LOG(WARNING) << "Feed Tensor(" << tensor->name() << ")"
<< " with different data type from original one."; << " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyArray_NDIM(array); int ndim = PyArray_NDIM(array);
npy_intp* npy_dims = PyArray_DIMS(array); npy_intp* npy_dims = PyArray_DIMS(array);
vector<TIndex> dims; vector<TIndex> dims;
...@@ -110,16 +111,16 @@ class NumpyFeeder : public TensorFeederBase { ...@@ -110,16 +111,16 @@ class NumpyFeeder : public TensorFeederBase {
#ifdef WITH_CUDA #ifdef WITH_CUDA
CUDAContext context(option); CUDAContext context(option);
context.SwitchToDevice(); context.SwitchToDevice();
auto* data = tensor->raw_mutable_data<CUDAContext>(meta);
context.Memcpy<CUDAContext, CPUContext>(tensor->nbytes(), context.Memcpy<CUDAContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CUDAContext>(), data, static_cast<void*>(PyArray_DATA(array)));
static_cast<void*>(PyArray_DATA(array)));
#else #else
LOG(FATAL) << "CUDA was not compiled."; LOG(FATAL) << "CUDA was not compiled.";
#endif #endif
} else { } else {
auto* data = tensor->raw_mutable_data<CPUContext>(meta);
CPUContext::Memcpy<CPUContext, CPUContext>(tensor->nbytes(), CPUContext::Memcpy<CPUContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CPUContext>(), data, static_cast<void*>(PyArray_DATA(array)));
static_cast<void*>(PyArray_DATA(array)));
} }
Py_XDECREF(array); Py_XDECREF(array);
Py_RETURN_TRUE; Py_RETURN_TRUE;
......
...@@ -25,4 +25,4 @@ inline PyObject* IsCUDADriverSufficientCC(PyObject* self, PyObject* args) { ...@@ -25,4 +25,4 @@ inline PyObject* IsCUDADriverSufficientCC(PyObject* self, PyObject* args) {
#endif #endif
} }
#endif // DRAGON_PYTHON_PY_MPI_H_ #endif // DRAGON_PYTHON_PY_CUDA_H_
\ No newline at end of file \ No newline at end of file
...@@ -94,7 +94,6 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) { ...@@ -94,7 +94,6 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0) if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Set Tensor(" << tensor->name() << ")" LOG(WARNING) << "Set Tensor(" << tensor->name() << ")"
<< " with different data type from original one."; << " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyList_Size(shape); int ndim = PyList_Size(shape);
CHECK_GT(ndim, 0) CHECK_GT(ndim, 0)
<< "\nThe len of shape should be greater than 1. Got " << ndim << "."; << "\nThe len of shape should be greater than 1. Got " << ndim << ".";
...@@ -112,9 +111,9 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) { ...@@ -112,9 +111,9 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (dev_opt.device_type() == CUDA) { if (dev_opt.device_type() == CUDA) {
CUDAContext ctx(dev_opt); CUDAContext ctx(dev_opt);
ctx.SwitchToDevice(); ctx.SwitchToDevice();
tensor->raw_mutable_data<CUDAContext>(); tensor->raw_mutable_data<CUDAContext>(meta);
} else { } else {
tensor->raw_mutable_data<CPUContext>(); tensor->raw_mutable_data<CPUContext>(meta);
} }
Py_RETURN_TRUE; Py_RETURN_TRUE;
} }
...@@ -173,19 +172,19 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) { ...@@ -173,19 +172,19 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
Tensor* srcT = ws()->GetTensor(src_name); Tensor* srcT = ws()->GetTensor(src_name);
Tensor* dstT = ws()->CreateTensor(dst_name); Tensor* dstT = ws()->CreateTensor(dst_name);
dstT->ReshapeLike(*srcT); dstT->ReshapeLike(*srcT);
dstT->SetMeta(srcT->meta()); const TypeMeta& meta = srcT->meta();
if (dst_ctx.device_type() == DeviceType::CUDA) { if (dst_ctx.device_type() == DeviceType::CUDA) {
if (src_ctx.device_type() == DeviceType::CUDA) { if (src_ctx.device_type() == DeviceType::CUDA) {
// CUDA <- CUDA // CUDA <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>( CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(), srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(), dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CUDAContext>()); srcT->raw_data<CUDAContext>());
} else { } else {
// CUDA <- CPU // CUDA <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>( CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(), srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(), dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CPUContext>()); srcT->raw_data<CPUContext>());
} }
} else { } else {
...@@ -193,13 +192,13 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) { ...@@ -193,13 +192,13 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
// CPU <- CUDA // CPU <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>( CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(), srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(), dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CUDAContext>()); srcT->raw_data<CUDAContext>());
} else { } else {
// CPU <- CPU // CPU <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>( CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(), srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(), dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CPUContext>()); srcT->raw_data<CPUContext>());
} }
} }
......
...@@ -23,8 +23,8 @@ inline const int TypeMetaToNPY(const TypeMeta& meta) { ...@@ -23,8 +23,8 @@ inline const int TypeMetaToNPY(const TypeMeta& meta) {
{ TypeMeta::Id<int64_t>(), NPY_INT64 }, { TypeMeta::Id<int64_t>(), NPY_INT64 },
{ TypeMeta::Id<double>(), NPY_FLOAT64 }, { TypeMeta::Id<double>(), NPY_FLOAT64 },
{ TypeMeta::Id<float16>(), NPY_FLOAT16 }, { TypeMeta::Id<float16>(), NPY_FLOAT16 },
{ TypeMeta::Id<uint8_t>(), NPY_UINT8 }, { TypeMeta::Id<uint8>(), NPY_UINT8 },
{ TypeMeta::Id<char>(), NPY_INT8 } { TypeMeta::Id<int8>(), NPY_INT8 }
}; };
return m2npy_type_map.count(meta.id()) ? m2npy_type_map[meta.id()] : -1; return m2npy_type_map.count(meta.id()) ? m2npy_type_map[meta.id()] : -1;
} }
...@@ -36,11 +36,12 @@ inline const TypeMeta& TypeNPYToMeta(int npy_type) { ...@@ -36,11 +36,12 @@ inline const TypeMeta& TypeNPYToMeta(int npy_type) {
{ NPY_INT64, TypeMeta::Make<int64_t>() }, { NPY_INT64, TypeMeta::Make<int64_t>() },
{ NPY_FLOAT64, TypeMeta::Make<double>() }, { NPY_FLOAT64, TypeMeta::Make<double>() },
{ NPY_FLOAT16, TypeMeta::Make<float16>() }, { NPY_FLOAT16, TypeMeta::Make<float16>() },
{ NPY_UINT8, TypeMeta::Make<uint8_t>() }, { NPY_UINT8, TypeMeta::Make<uint8>() },
{ NPY_INT8, TypeMeta::Make<char>() }, { NPY_INT8, TypeMeta::Make<int8>() },
}; };
static TypeMeta unknown_type; static TypeMeta unknown_type;
return npy2m_type_map.count(npy_type) ? npy2m_type_map[npy_type] : unknown_type; return npy2m_type_map.count(npy_type) ?
npy2m_type_map[npy_type] : unknown_type;
} }
#endif // DRAGON_PYTHON_PY_TYPES_H_ #endif // DRAGON_PYTHON_PY_TYPES_H_
\ No newline at end of file
...@@ -26,11 +26,11 @@ option = {} ...@@ -26,11 +26,11 @@ option = {}
REGISTERED_OPERATORS = set(s for s in RegisteredOperatorsCC()) REGISTERED_OPERATORS = set(s for s in RegisteredOperatorsCC())
NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperatorsCC()) NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperatorsCC())
# The current device, 'CPU' or 'CUDA' # The current device, 'CPU', 'CUDA' or 'CNML'
option['device'] = 'CPU' option['device'] = 'CPU'
# The device id # The device id
option['gpu_id'] = 0 option['device_id'] = 0
# Whether to use cuDNN if possible # Whether to use cuDNN if possible
option['use_cudnn'] = False option['use_cudnn'] = False
...@@ -44,6 +44,9 @@ option['debug_mode'] = False ...@@ -44,6 +44,9 @@ option['debug_mode'] = False
# Whether to share grads # Whether to share grads
option['share_grads'] = True option['share_grads'] = True
# Optional graph type
option['graph_type'] = ''
# Whether to log the meta graphs # Whether to log the meta graphs
option['log_meta_graph'] = False option['log_meta_graph'] = False
...@@ -84,7 +87,7 @@ def IsCUDADriverSufficient(): ...@@ -84,7 +87,7 @@ def IsCUDADriverSufficient():
def EnableCUDA(gpu_id=0, use_cudnn=True): def EnableCUDA(gpu_id=0, use_cudnn=True):
"""Enable CUDA mode globally. """Enable NVIDIA's CUDA mode globally.
Parameters Parameters
---------- ----------
...@@ -100,9 +103,28 @@ def EnableCUDA(gpu_id=0, use_cudnn=True): ...@@ -100,9 +103,28 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
""" """
global option global option
option['device'] = 'CUDA' option['device'] = 'CUDA'
option['gpu_id'] = gpu_id option['device_id'] = gpu_id
option['use_cudnn'] = use_cudnn option['use_cudnn'] = use_cudnn
def EnableCNML(mlu_id=0):
"""Enable Cambricon's CNML mode globally.
Parameters
----------
device_id : int
The id of MLU to use.
Returns
-------
None
"""
global option
option['device'] = 'CNML'
option['device_id'] = mlu_id
# TODO(PhyscalX): please not use @setter # TODO(PhyscalX): please not use @setter
# TODO(PhyscalX): seems that it can't change the global value # TODO(PhyscalX): seems that it can't change the global value
...@@ -133,7 +155,6 @@ def GetRandomSeed(): ...@@ -133,7 +155,6 @@ def GetRandomSeed():
The global random seed. The global random seed.
""" """
global option
return option['random_seed'] return option['random_seed']
...@@ -151,7 +172,7 @@ def SetGPU(id): ...@@ -151,7 +172,7 @@ def SetGPU(id):
""" """
global option global option
option['gpu_id'] = id option['device_id'] = id
def GetGPU(): def GetGPU():
...@@ -163,8 +184,7 @@ def GetGPU(): ...@@ -163,8 +184,7 @@ def GetGPU():
The global id of GPU. The global id of GPU.
""" """
global option return option['device_id']
return option['gpu_id']
def SetDebugMode(enabled=True): def SetDebugMode(enabled=True):
...@@ -186,6 +206,25 @@ def SetDebugMode(enabled=True): ...@@ -186,6 +206,25 @@ def SetDebugMode(enabled=True):
option['debug_mode'] = enabled option['debug_mode'] = enabled
def SetGraphType(graph_type=''):
"""Set the graph type.
If empty, the default DAG graph will be used.
Parameters
----------
graph_type : str
The graph type.
Returns
-------
None
"""
global option
option['graph_type'] = graph_type
def LogMetaGraph(enabled=True): def LogMetaGraph(enabled=True):
"""Enable to log meta graph globally. """Enable to log meta graph globally.
......
...@@ -737,7 +737,7 @@ class Tensor(object): ...@@ -737,7 +737,7 @@ class Tensor(object):
Parameters Parameters
---------- ----------
new_value : basic type, list or numpy.ndarray new_value : number, list or numpy.ndarray
The values to set. The values to set.
Returns Returns
......
...@@ -325,5 +325,7 @@ def GetTensorInfo(tensor, stream=1): ...@@ -325,5 +325,7 @@ def GetTensorInfo(tensor, stream=1):
info['mem'].append('CPU'); info['device_id'] = 0 info['mem'].append('CPU'); info['device_id'] = 0
if 'CUDA' in info: if 'CUDA' in info:
info['mem'].append('CUDA'); info['device_id'] = int(info['CUDA']) info['mem'].append('CUDA'); info['device_id'] = int(info['CUDA'])
if 'CNML' in info:
info['mem'].append('CNML'); info['device_id'] = int(info['CNML'])
info['init'] = len(info['mem']) > 0 info['init'] = len(info['mem']) > 0
return info return info
\ No newline at end of file
...@@ -439,7 +439,7 @@ def FetchTensor(tensor): ...@@ -439,7 +439,7 @@ def FetchTensor(tensor):
Returns Returns
------- -------
numpy.ndarray ndarray
The values copied from the backend. The values copied from the backend.
References References
...@@ -457,7 +457,7 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None): ...@@ -457,7 +457,7 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
---------- ----------
tensor : Tensor or str tensor : Tensor or str
The tensor to feed. The tensor to feed.
ndarray : basic type, list or numpy.ndarray ndarray : number, list or ndarray
The values to feed. The values to feed.
force_cpu : boolean force_cpu : boolean
Whether force to feed to cpu context. Whether force to feed to cpu context.
...@@ -488,25 +488,23 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None): ...@@ -488,25 +488,23 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
""" """
name = tensor.name if hasattr(tensor, 'name') else str(tensor) name = tensor.name if hasattr(tensor, 'name') else str(tensor)
dev = None if force_cpu is True:
if force_cpu is True: dev = utils.MakeDeviceOption(0, 0) dev = utils.MakeDeviceOption(0, 0)
else: else:
from dragon.core.scope import _DEVICE_SCOPE from dragon.core.scope import _DEVICE_SCOPE
if _DEVICE_SCOPE != '': if _DEVICE_SCOPE != '':
supports = {'/cpu': 0, '/gpu': 1} supports = {'/cpu': 0, '/gpu': 1, '/mlu': 2}
dev = pb.DeviceOption() dev = pb.DeviceOption()
dev.device_type = supports[_DEVICE_SCOPE.split(':')[0]] dev.device_type = supports[_DEVICE_SCOPE.split(':')[0]]
dev.gpu_id = int(_DEVICE_SCOPE.split(':')[1]) dev.device_id = int(_DEVICE_SCOPE.split(':')[1])
else: else:
from dragon.config import option from dragon.config import option
if option['device'] == 'CUDA': if option['device'] == 'CUDA':
dev = utils.MakeDeviceOption(1, option['gpu_id']) dev = utils.MakeDeviceOption(1, option['device_id'])
elif option['device'] == 'CPU': else:
dev = utils.MakeDeviceOption(0, 0) dev = utils.MakeDeviceOption(0, 0)
if not isinstance(array, np.ndarray): if not isinstance(array, np.ndarray):
if not isinstance(array, list):
array = [array]
auto_data_type = np.float32 if dtype is None else dtype auto_data_type = np.float32 if dtype is None else dtype
else: else:
auto_data_type = array.dtype if dtype is None else dtype auto_data_type = array.dtype if dtype is None else dtype
...@@ -573,8 +571,8 @@ def RunGraph(graph_name, inputs=(), outputs=[], stage=None, return_outputs=True) ...@@ -573,8 +571,8 @@ def RunGraph(graph_name, inputs=(), outputs=[], stage=None, return_outputs=True)
Returns Returns
------- -------
None, numpy.ndarray or list of numpy.ndarray None, ndarray or list of ndarray
The outputs, format as numpy.ndarray. The outputs, format as ndarray.
See Also See Also
-------- --------
......
...@@ -42,6 +42,7 @@ List Brief ...@@ -42,6 +42,7 @@ List Brief
`BilinearResize`_ Resize the image with Bi-linear method. `BilinearResize`_ Resize the image with Bi-linear method.
`BiasAdd`_ Add the bias across channels to a ``NCHW`` or ``NHWC`` input. `BiasAdd`_ Add the bias across channels to a ``NCHW`` or ``NHWC`` input.
`DenseConcat`_ Memory-efficient concatenation for DenseNet. `[Huang et.al, 2017] <http://arxiv.org/abs/1608.06993>`_. `DenseConcat`_ Memory-efficient concatenation for DenseNet. `[Huang et.al, 2017] <http://arxiv.org/abs/1608.06993>`_.
`DropBlock2d`_ Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
=================== ====================================================================== =================== ======================================================================
Recurrent Recurrent
...@@ -76,6 +77,7 @@ Loss ...@@ -76,6 +77,7 @@ Loss
============================= ====================================================================== ============================= ======================================================================
List Brief List Brief
============================= ====================================================================== ============================= ======================================================================
`NLLLoss`_ Negative likelihood loss with sparse labels.
`SparseSoftmaxCrossEntropy`_ SoftmaxCrossEntropy with sparse labels. `SparseSoftmaxCrossEntropy`_ SoftmaxCrossEntropy with sparse labels.
`SigmoidCrossEntropy`_ SigmoidCrossEntropy. `SigmoidCrossEntropy`_ SigmoidCrossEntropy.
`SoftmaxCrossEntropy`_ SoftmaxCrossEntropy with dense(one-hot) labels. `SoftmaxCrossEntropy`_ SoftmaxCrossEntropy with dense(one-hot) labels.
...@@ -102,6 +104,8 @@ List Brief ...@@ -102,6 +104,8 @@ List Brief
`Exp`_ Calculate the exponential of input. `Exp`_ Calculate the exponential of input.
`Square`_ Calculate the square of input. `Square`_ Calculate the square of input.
`Sqrt`_ Calculate the sqrt of input. `Sqrt`_ Calculate the sqrt of input.
`Maximum`_ Return the max value of given two inputs.
`Minimum`_ Return the min value of given two inputs.
`Clip`_ Clip the input to be between lower and higher bounds. `Clip`_ Clip the input to be between lower and higher bounds.
`Matmul`_ Matrix Multiplication. `Matmul`_ Matrix Multiplication.
`InnerProduct`_ InnerProduct Function. `InnerProduct`_ InnerProduct Function.
...@@ -215,6 +219,7 @@ List Brief ...@@ -215,6 +219,7 @@ List Brief
.. _BilinearResize: operators/vision.html#dragon.operators.vision.BilinearResize .. _BilinearResize: operators/vision.html#dragon.operators.vision.BilinearResize
.. _BiasAdd: operators/vision.html#dragon.operators.vision.BiasAdd .. _BiasAdd: operators/vision.html#dragon.operators.vision.BiasAdd
.. _DenseConcat: operators/vision.html#dragon.operators.vision.DenseConcat .. _DenseConcat: operators/vision.html#dragon.operators.vision.DenseConcat
.. _DropBlock2d: operators/vision.html#dragon.operators.vision.DropBlock2d
.. _RNN: operators/recurrent.html#dragon.operators.recurrent.RNN .. _RNN: operators/recurrent.html#dragon.operators.recurrent.RNN
.. _LSTM: operators/recurrent.html#dragon.operators.recurrent.LSTM .. _LSTM: operators/recurrent.html#dragon.operators.recurrent.LSTM
...@@ -231,6 +236,7 @@ List Brief ...@@ -231,6 +236,7 @@ List Brief
.. _Softmax: operators/activation.html#dragon.operators.activation.Softmax .. _Softmax: operators/activation.html#dragon.operators.activation.Softmax
.. _Dropout: operators/activation.html#dragon.operators.activation.Dropout .. _Dropout: operators/activation.html#dragon.operators.activation.Dropout
.. _NLLLoss: operators/loss.html#dragon.operators.loss.NLLLoss
.. _SparseSoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SparseSoftmaxCrossEntropy .. _SparseSoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SparseSoftmaxCrossEntropy
.. _SigmoidCrossEntropy: operators/loss.html#dragon.operators.loss.SigmoidCrossEntropy .. _SigmoidCrossEntropy: operators/loss.html#dragon.operators.loss.SigmoidCrossEntropy
.. _SoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SoftmaxCrossEntropy .. _SoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SoftmaxCrossEntropy
...@@ -246,6 +252,8 @@ List Brief ...@@ -246,6 +252,8 @@ List Brief
.. _Mul: operators/arithmetic.html#dragon.operators.arithmetic.Mul .. _Mul: operators/arithmetic.html#dragon.operators.arithmetic.Mul
.. _Div: operators/arithmetic.html#dragon.operators.arithmetic.Div .. _Div: operators/arithmetic.html#dragon.operators.arithmetic.Div
.. _Clip: operators/arithmetic.html#dragon.operators.arithmetic.Clip .. _Clip: operators/arithmetic.html#dragon.operators.arithmetic.Clip
.. _Maximum: operators/arithmetic.html#dragon.operators.arithmetic.Maximum
.. _Minimum: operators/arithmetic.html#dragon.operators.arithmetic.Minimum
.. _Pow: operators/arithmetic.html#dragon.operators.arithmetic.Pow .. _Pow: operators/arithmetic.html#dragon.operators.arithmetic.Pow
.. _Log: operators/arithmetic.html#dragon.operators.arithmetic.Log .. _Log: operators/arithmetic.html#dragon.operators.arithmetic.Log
.. _Exp: operators/arithmetic.html#dragon.operators.arithmetic.Exp .. _Exp: operators/arithmetic.html#dragon.operators.arithmetic.Exp
......
...@@ -32,6 +32,7 @@ List Brief ...@@ -32,6 +32,7 @@ List Brief
`LRNLayer`_ The implementation of ``LRNLayer``. `LRNLayer`_ The implementation of ``LRNLayer``.
`NNResizeLayer`_ The implementation of ``NNResizeLayer``. `NNResizeLayer`_ The implementation of ``NNResizeLayer``.
`BilinearResizeLayer`_ The implementation of ``BilinearResizeLayer``. `BilinearResizeLayer`_ The implementation of ``BilinearResizeLayer``.
`DropBlockLayer`_ The implementation of ``DropBlockLayer``.
====================== ============================================================================= ====================== =============================================================================
...@@ -160,6 +161,7 @@ API Reference ...@@ -160,6 +161,7 @@ API Reference
.. _LRNLayer: #dragon.vm.caffe.layers.vision.LRNLayer .. _LRNLayer: #dragon.vm.caffe.layers.vision.LRNLayer
.. _NNResizeLayer: #dragon.vm.caffe.layers.vision.NNResizeLayer .. _NNResizeLayer: #dragon.vm.caffe.layers.vision.NNResizeLayer
.. _BilinearResizeLayer: #dragon.vm.caffe.layers.vision.BilinearResizeLayer .. _BilinearResizeLayer: #dragon.vm.caffe.layers.vision.BilinearResizeLayer
.. _DropBlockLayer: #dragon.vm.caffe.layers.vision.DropBlockLayer
.. _ReLULayer: #dragon.vm.caffe.layers.neuron.ReLULayer .. _ReLULayer: #dragon.vm.caffe.layers.neuron.ReLULayer
.. _PReLULayer: #dragon.vm.caffe.layers.neuron.PReLULayer .. _PReLULayer: #dragon.vm.caffe.layers.neuron.PReLULayer
......
...@@ -15,6 +15,7 @@ from __future__ import print_function ...@@ -15,6 +15,7 @@ from __future__ import print_function
import sys import sys
import logging import logging
import atexit
try: try:
from dragon.libdragon import * from dragon.libdragon import *
...@@ -22,3 +23,5 @@ except ImportError as e: ...@@ -22,3 +23,5 @@ except ImportError as e:
logging.critical( logging.critical(
'Cannot import dragon. Error: {0}'.format(str(e))) 'Cannot import dragon. Error: {0}'.format(str(e)))
sys.exit(1) sys.exit(1)
atexit.register(OnModuleExitCC)
\ No newline at end of file
...@@ -101,7 +101,8 @@ class DataTransformer(Process): ...@@ -101,7 +101,8 @@ class DataTransformer(Process):
im = im.reshape((datum.height, datum.width, datum.channels)) im = im.reshape((datum.height, datum.width, datum.channels))
# random scale # random scale
random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \ random_scale = npr.uniform() * (
self._max_random_scale - self._min_random_scale) \
+ self._min_random_scale + self._min_random_scale
if random_scale != 1.0: if random_scale != 1.0:
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
...@@ -110,7 +111,9 @@ class DataTransformer(Process): ...@@ -110,7 +111,9 @@ class DataTransformer(Process):
else: else:
# Fuck Fuck Fuck opencv-python2, it always has a BUG # Fuck Fuck Fuck opencv-python2, it always has a BUG
# that leads to duplicate cuDA handles created at gpu:0 # that leads to duplicate cuDA handles created at gpu:0
new_shape = (int(im.shape[1] * random_scale), int(im.shape[0] * random_scale)) new_shape = (
int(np.ceil(im.shape[1] * random_scale)),
int(np.ceil(im.shape[0] * random_scale)))
im = PIL.Image.fromarray(im) im = PIL.Image.fromarray(im)
im = im.resize(new_shape, PIL.Image.BILINEAR) im = im.resize(new_shape, PIL.Image.BILINEAR)
im = np.array(im) im = np.array(im)
......
...@@ -9,10 +9,12 @@ ...@@ -9,10 +9,12 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor
INT_MAX = 2147483647 INT_MAX = 2147483647
def CheckInputs(inputs, *args): def CheckInputs(inputs, *args):
def Verify(inputs, min_num, max_num): def Verify(inputs, min_num, max_num):
# type checking # type checking
...@@ -44,6 +46,17 @@ def ParseArguments(locals): ...@@ -44,6 +46,17 @@ def ParseArguments(locals):
return dict(__all__, **kwargs) return dict(__all__, **kwargs)
def WrapConstants(constants, dtype='float32'):
if not isinstance(constants, Tensor):
if not isinstance(constants, np.ndarray):
constants = np.array(constants, dtype=dtype)
tensor = Tensor()
tensor.set_value(constants)
tensor.shape = constants.shape
constants = tensor
return constants
def AddArgumentWithDesc(arguments, property, name, as_target=True): def AddArgumentWithDesc(arguments, property, name, as_target=True):
if isinstance(property, Tensor): if isinstance(property, Tensor):
if as_target: if as_target:
......
...@@ -115,6 +115,70 @@ def Div(inputs, **kwargs): ...@@ -115,6 +115,70 @@ def Div(inputs, **kwargs):
return output return output
def Maximum(inputs, **kwargs):
"""Return the max value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Maximum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Minimum(inputs, **kwargs):
"""Return the min value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Minimum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Clip(inputs, low=None, high=None, **kwargs): def Clip(inputs, low=None, high=None, **kwargs):
"""Clip the input to be between lower and higher bounds. """Clip the input to be between lower and higher bounds.
......
...@@ -36,15 +36,19 @@ def _wrap_output_shape(output, shape): ...@@ -36,15 +36,19 @@ def _wrap_output_shape(output, shape):
return output return output
def Fill(shape, value=0, **kwargs): def Fill(shape, value=0, dtype='float32', **kwargs):
"""Return a Tensor with specific value filled. """Return a Tensor with specific value filled.
If ``dtype`` is None, tensor
Parameters Parameters
---------- ----------
shape : list, tuple or Tensor shape : list, tuple or Tensor
The output shape. The output shape.
value : basic numerical type value : basic numerical type
The value to fill. The value to fill.
dtype : str
The optional data type.
Returns Returns
------- -------
......
...@@ -19,6 +19,46 @@ from . import * ...@@ -19,6 +19,46 @@ from . import *
from .activation import Softmax from .activation import Softmax
def NLLLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""Negative likelihood loss with sparse labels.
Parameters
----------
inputs : list of Tensor
The inputs, represent [input, sparse_labels].
axis : int
The axis of softmax function.
normalization : str
The normalization, ``UNIT``, ``FULL``, ``VALID``, ``BATCH_SIZE`` or ``NONE``.
ignore_label : tuple or list
The label id to ignore. Default is ``empty``.
Returns
-------
Tensor
The loss.
Notes
-----
Set the normalization to ``UNIT`` will return unreduced losses.
"""
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='NLLLoss', **arguments)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
elif all(dim is not None for dim in inputs[0].shape):
outer_dim = int(np.prod(inputs[0].shape[0 : axis]))
inner_dim = int(np.prod(inputs[0].shape[axis + 1 :]))
output.shape = [outer_dim * inner_dim]
else: output.shape = [None]
return output
def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs): def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""SoftmaxCrossEntropy with sparse labels. """SoftmaxCrossEntropy with sparse labels.
......
...@@ -16,8 +16,10 @@ from __future__ import print_function ...@@ -16,8 +16,10 @@ from __future__ import print_function
from . import * from . import *
def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, def BatchNorm(
use_stats=-1, mode='DEFAULT', **kwargs): inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Normalization. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. """Batch Normalization. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_. It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
...@@ -70,9 +72,11 @@ def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, ...@@ -70,9 +72,11 @@ def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output return output
def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3, def BatchRenorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
r_max=3.0, d_max=5.0, t_delta=0.001, r_max=3.0, d_max=5.0, t_delta=0.001,
use_stats=-1, mode='DEFAULT', **kwargs): use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Renormalization. `[Ioffe, 2017] <https://arxiv.org/abs/1702.03275>`_. """Batch Renormalization. `[Ioffe, 2017] <https://arxiv.org/abs/1702.03275>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_. It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
...@@ -131,7 +135,10 @@ def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3, ...@@ -131,7 +135,10 @@ def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output return output
def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwargs): def FusedBatchNorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, **kwargs
):
"""Batch Normalization, with scale procedure after normalization. """Batch Normalization, with scale procedure after normalization.
Parameters Parameters
...@@ -170,7 +177,7 @@ def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwar ...@@ -170,7 +177,7 @@ def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwar
return output return output
def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs): def GroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization. `[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_. """Group Normalization. `[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
Parameters Parameters
...@@ -203,7 +210,7 @@ def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs): ...@@ -203,7 +210,7 @@ def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output return output
def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs): def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization, with scale procedure after normalization. """Group Normalization, with scale procedure after normalization.
Parameters Parameters
...@@ -236,7 +243,7 @@ def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs): ...@@ -236,7 +243,7 @@ def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output return output
def InstanceNorm(inputs, axis=-1, eps=1e-3, **kwargs): def InstanceNorm(inputs, axis=-1, eps=1e-5, **kwargs):
"""Instance Normalization. `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_ """Instance Normalization. `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
Parameters Parameters
......
...@@ -630,3 +630,44 @@ def DenseConcat(inputs, growth_rate=0, axis=1, **kwargs): ...@@ -630,3 +630,44 @@ def DenseConcat(inputs, growth_rate=0, axis=1, **kwargs):
output.shape[axis] += inputs[i].shape[axis] output.shape[axis] += inputs[i].shape[axis]
return output return output
def DropBlock2d(inputs, block_size=7, keep_prob=0.9,
alpha=1., decrement=0., data_format='NCHW', **kwargs):
"""Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
Set the ``decrement`` to schedule ``keep_prob`` for each iteration.
Set the ``alpha`` to decrease ``gamma`` for different stages.
Parameters
----------
inputs : Tensor
The input tensor.
block_size : int
The size of dropping block.
keep_prob : float or Tensor
The prob of keeping. Default is ``0.9``.
alpha : float
The scale factor to gamma.
decrement : float
The decrement to keep prob.
data_format : str
The data format, ``NCHW`` or ``NHWC``.
Returns
-------
Tensor
The output tensor.
"""
CheckInputs(inputs, 1)
arguments = ParseArguments(locals())
arguments = AddArgumentWithDesc(arguments, keep_prob, 'keep_prob', as_target=False)
output = Tensor.CreateOperator(nout=1, op_type='DropBlock2d', **arguments)
if inputs.shape is not None:
output.shape = inputs.shape[:]
return output
\ No newline at end of file
...@@ -51,6 +51,7 @@ NNResize = vision.NNResize ...@@ -51,6 +51,7 @@ NNResize = vision.NNResize
BilinearResize = vision.BilinearResize BilinearResize = vision.BilinearResize
BiasAdd = vision.BiasAdd BiasAdd = vision.BiasAdd
DenseConcat = vision.DenseConcat DenseConcat = vision.DenseConcat
DropBlock2d = vision.DropBlock2d
# recurrent # recurrent
LSTMCell = recurrent.LSTMCell LSTMCell = recurrent.LSTMCell
...@@ -70,6 +71,7 @@ Softmax = act.Softmax ...@@ -70,6 +71,7 @@ Softmax = act.Softmax
Dropout = act.Dropout Dropout = act.Dropout
# loss # loss
NLLLoss = loss.NLLLoss
SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy
SigmoidCrossEntropy = loss.SigmoidCrossEntropy SigmoidCrossEntropy = loss.SigmoidCrossEntropy
SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy
...@@ -85,6 +87,8 @@ Add = math.Add ...@@ -85,6 +87,8 @@ Add = math.Add
Sub = math.Sub Sub = math.Sub
Mul = math.Mul Mul = math.Mul
Div = math.Div Div = math.Div
Maximum = math.Maximum
Minimum = math.Minimum
Clip = math.Clip Clip = math.Clip
Matmul = math.Matmul Matmul = math.Matmul
Pow = math.Pow Pow = math.Pow
......
...@@ -35,7 +35,11 @@ message Argument { ...@@ -35,7 +35,11 @@ message Argument {
repeated string strings=7; repeated string strings=7;
} }
enum DeviceType { CPU = 0; CUDA = 1; OPENCL = 2; } enum DeviceType {
CPU = 0;
CUDA = 1;
CNML = 2;
}
message DeviceOption { message DeviceOption {
optional DeviceType device_type = 1 [default = CPU]; optional DeviceType device_type = 1 [default = CPU];
......
...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default() ...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor( DESCRIPTOR = _descriptor.FileDescriptor(
name='dragon.proto', name='dragon.proto',
package='dragon', package='dragon',
serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02') serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*)\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\x08\n\x04\x43NML\x10\x02')
) )
_sym_db.RegisterFileDescriptor(DESCRIPTOR) _sym_db.RegisterFileDescriptor(DESCRIPTOR)
...@@ -38,21 +38,21 @@ _DEVICETYPE = _descriptor.EnumDescriptor( ...@@ -38,21 +38,21 @@ _DEVICETYPE = _descriptor.EnumDescriptor(
options=None, options=None,
type=None), type=None),
_descriptor.EnumValueDescriptor( _descriptor.EnumValueDescriptor(
name='OPENCL', index=2, number=2, name='CNML', index=2, number=2,
options=None, options=None,
type=None), type=None),
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=1427, serialized_start=1427,
serialized_end=1470, serialized_end=1468,
) )
_sym_db.RegisterEnumDescriptor(_DEVICETYPE) _sym_db.RegisterEnumDescriptor(_DEVICETYPE)
DeviceType = enum_type_wrapper.EnumTypeWrapper(_DEVICETYPE) DeviceType = enum_type_wrapper.EnumTypeWrapper(_DEVICETYPE)
CPU = 0 CPU = 0
CUDA = 1 CUDA = 1
OPENCL = 2 CNML = 2
_TENSORPROTO_DATATYPE = _descriptor.EnumDescriptor( _TENSORPROTO_DATATYPE = _descriptor.EnumDescriptor(
......
...@@ -14,7 +14,6 @@ from __future__ import division ...@@ -14,7 +14,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import pprint import pprint
import numpy as np
import dragon.core.workspace as ws import dragon.core.workspace as ws
from dragon.core.tensor import Tensor from dragon.core.tensor import Tensor
...@@ -43,7 +42,7 @@ class BaseUpdater(object): ...@@ -43,7 +42,7 @@ class BaseUpdater(object):
self._defaults = { self._defaults = {
'scale_gradient': scale_gradient, 'scale_gradient': scale_gradient,
'clip_gradient': clip_gradient, 'clip_gradient': clip_gradient,
'l2_decay': l2_decay 'l2_decay': l2_decay,
} }
self._param_group = [] self._param_group = []
self._slot = slot self._slot = slot
...@@ -77,7 +76,7 @@ class BaseUpdater(object): ...@@ -77,7 +76,7 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if item in defaults: if item in defaults:
if self._registered: if self._registered:
return ws.FetchTensor(self._slot + '/' + item)[0] return ws.FetchTensor(self._slot + '/' + item)
else: return defaults[item] else: return defaults[item]
return self.__dict__[item] return self.__dict__[item]
...@@ -85,9 +84,8 @@ class BaseUpdater(object): ...@@ -85,9 +84,8 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults: if defaults is not None and key in defaults:
if self._registered: if self._registered:
# convert all defaults as float32 for convenience ws.FeedTensor(self._slot + '/' + key, value,
ws.FeedTensor(self._slot + '/' + key, dtype='float32', force_cpu=True)
np.array([value], dtype=np.float32))
else: else:
self._defaults[key] = value self._defaults[key] = value
else: else:
...@@ -96,8 +94,8 @@ class BaseUpdater(object): ...@@ -96,8 +94,8 @@ class BaseUpdater(object):
def register_in_workspace(self): def register_in_workspace(self):
if not self._registered: if not self._registered:
for k, v in self._defaults.items(): for k, v in self._defaults.items():
# convert all defaults as float32 for convenience ws.FeedTensor(self._slot + "/" + k, v,
ws.FeedTensor(self._slot + "/" + k, np.array([v], dtype=np.float32)) dtype='float32', force_cpu=True)
self._registered = True self._registered = True
if self._verbose: if self._verbose:
from dragon.config import logger from dragon.config import logger
......
...@@ -14,7 +14,7 @@ from __future__ import division ...@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
version = '0.2.2' version = '0.2.2'
full_version = '0.2.2.11' full_version = '0.2.2.13'
release = False release = False
if not release: if not release:
......
...@@ -19,7 +19,8 @@ from .vision import ConvolutionLayer, \ ...@@ -19,7 +19,8 @@ from .vision import ConvolutionLayer, \
ROIPoolingLayer, \ ROIPoolingLayer, \
ROIAlignLayer, \ ROIAlignLayer, \
NNResizeLayer, \ NNResizeLayer, \
BilinearResizeLayer BilinearResizeLayer, \
DropBlockLayer
from .neuron import ReLULayer, \ from .neuron import ReLULayer, \
PReLULayer, \ PReLULayer, \
......
...@@ -446,10 +446,13 @@ class InstanceNormLayer(Layer): ...@@ -446,10 +446,13 @@ class InstanceNormLayer(Layer):
The implementation of ``InstanceNormLayer``. The implementation of ``InstanceNormLayer``.
Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_ Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
""" """
def __init__(self, LayerParameter): def __init__(self, LayerParameter):
super(InstanceNormLayer, self).__init__(LayerParameter) super(InstanceNormLayer, self).__init__(LayerParameter)
self._param = {'axis': 1} param = LayerParameter.instance_norm_param
self._param = {'eps': param.eps,
'axis': 1}
def Setup(self, bottom): def Setup(self, bottom):
super(InstanceNormLayer, self).Setup(bottom) super(InstanceNormLayer, self).Setup(bottom)
......
...@@ -250,7 +250,7 @@ class NNResizeLayer(Layer): ...@@ -250,7 +250,7 @@ class NNResizeLayer(Layer):
Parameters Parameters
---------- ----------
shape : caffe_pb2. BlobShape shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_. The output shape. Refer `ResizeParameter.shape`_.
fx : float fx : float
The scale factor of height. Refer `ResizeParameter.fx`_. The scale factor of height. Refer `ResizeParameter.fx`_.
...@@ -283,7 +283,7 @@ class BilinearResizeLayer(Layer): ...@@ -283,7 +283,7 @@ class BilinearResizeLayer(Layer):
Parameters Parameters
---------- ----------
shape : caffe_pb2. BlobShape shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_. The output shape. Refer `ResizeParameter.shape`_.
fx : float fx : float
The scale factor of height. Refer `ResizeParameter.fx`_. The scale factor of height. Refer `ResizeParameter.fx`_.
...@@ -309,3 +309,33 @@ class BilinearResizeLayer(Layer): ...@@ -309,3 +309,33 @@ class BilinearResizeLayer(Layer):
raise ValueError('The second bottom should be provided to determine the shape.') raise ValueError('The second bottom should be provided to determine the shape.')
self._param['shape_like'] = bottom[1] self._param['shape_like'] = bottom[1]
return ops.BilinearResize(input, **self._param) return ops.BilinearResize(input, **self._param)
class DropBlockLayer(Layer):
"""The implementation of ``DropBlock2dLayer``.
Parameters
----------
block_size : int
The size of dropping block. Refer ``DropBlockParameter.block_size``.
keep_prob : float
The prob of keeping. Refer ``DropBlockParameter.keep_prob``.
alpha : float
The scale factor to gamma. Refer ``DropBlockParameter.alpha``.
decrement : float
The decrement to keep prob. Refer ``DropBlockParameter.decrement``.
"""
def __init__(self, LayerParameter):
super(DropBlockLayer, self).__init__(LayerParameter)
param = LayerParameter.drop_block_param
self._param = {'block_size': param.block_size,
'keep_prob': param.keep_prob,
'alpha': param.alpha,
'decrement': param.decrement,
'data_format': 'NCHW'}
def Setup(self, bottom):
super(DropBlockLayer, self).Setup(bottom)
input = bottom[0] if isinstance(bottom, list) else bottom
return ops.DropBlock2d(input, **self._param)
\ No newline at end of file
...@@ -424,7 +424,9 @@ message LayerParameter { ...@@ -424,7 +424,9 @@ message LayerParameter {
optional DenseConcatParameter dense_concat_param = 163; optional DenseConcatParameter dense_concat_param = 163;
optional FocalLossParameter focal_loss_param = 164; optional FocalLossParameter focal_loss_param = 164;
optional GatherParameter gather_param = 165; optional GatherParameter gather_param = 165;
optional GroupNormParameter group_norm_param = 166; optional InstanceNormParameter instance_norm_param = 166;
optional GroupNormParameter group_norm_param = 167;
optional DropBlockParameter drop_block_param = 168;
} }
// Message that stores parameters used to apply transformation // Message that stores parameters used to apply transformation
...@@ -537,7 +539,7 @@ message BatchNormParameter { ...@@ -537,7 +539,7 @@ message BatchNormParameter {
optional float moving_average_fraction = 2 [default = 0.9]; optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by // Small value to add to the variance estimate so that we don't divide by
// zero. // zero.
optional float eps = 3 [default = 1e-3]; optional float eps = 3 [default = 1e-5];
} }
message BiasParameter { message BiasParameter {
...@@ -595,7 +597,7 @@ message ConvolutionParameter { ...@@ -595,7 +597,7 @@ message ConvolutionParameter {
repeated uint32 stride = 6; // The stride; defaults to 1 repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the // holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.) // algorithme ¨¤ trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1 repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to // For 2D convolution only, the *_h and *_w versions may also be used to
...@@ -1456,7 +1458,7 @@ message NormalizeParameter { ...@@ -1456,7 +1458,7 @@ message NormalizeParameter {
// Whether or not scale parameters are shared across channels. // Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true]; optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance // Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-3]; optional float eps = 4 [default = 1e-5];
} }
message ParallelParameter { message ParallelParameter {
...@@ -1492,7 +1494,7 @@ message ProposalParameter { ...@@ -1492,7 +1494,7 @@ message ProposalParameter {
message BatchRenormParameter { message BatchRenormParameter {
optional bool use_global_stats = 1; optional bool use_global_stats = 1;
optional float moving_average_fraction = 2 [default = 0.9]; optional float moving_average_fraction = 2 [default = 0.9];
optional float eps = 3 [default = 1e-3]; optional float eps = 3 [default = 1e-5];
optional float r_max = 4 [default = 3.0]; optional float r_max = 4 [default = 3.0];
optional float d_max = 5 [default = 5.0]; optional float d_max = 5 [default = 5.0];
optional float t_delta = 6 [default = 0.001]; optional float t_delta = 6 [default = 0.001];
...@@ -1513,17 +1515,18 @@ message GatherParameter { ...@@ -1513,17 +1515,18 @@ message GatherParameter {
optional int32 axis = 1 [default = 0]; optional int32 axis = 1 [default = 0];
} }
message GroupNormParameter { message InstanceNormParameter {
// If false, accumulate global mean/variance values via a moving average. If optional float eps = 1 [default = 1e-5];
// true, use those accumulated values instead of computing mean/variance
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-3];
optional uint32 group = 5 [default = 32]; // The group size
} }
message GroupNormParameter {
optional float eps = 1 [default = 1e-5];
optional int32 group = 2 [default = 32]; // The group size
}
message DropBlockParameter {
optional int32 block_size = 1 [default = 7];
optional float keep_prob = 2 [default = 0.9];
optional float alpha = 3 [default = 1.0];
optional float decrement = 4 [default = 0.0];
}
...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default() ...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor( DESCRIPTOR = _descriptor.FileDescriptor(
name='caffe.proto', name='caffe.proto',
package='caffe', package='caffe',
serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x92\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x12\n\x03\x65ps\x18\x04 \x01(\x02:\x05\x30.001\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01') serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xbd\x1a\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12:\n\x13instance_norm_param\x18\xa6\x01 \x01(\x0b\x32\x1c.caffe.InstanceNormParameter\x12\x34\n\x10group_norm_param\x18\xa7\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\x12\x34\n\x10\x64rop_block_param\x18\xa8\x01 \x01(\x0b\x32\x19.caffe.DropBlockParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"i\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-005\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-005\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa7\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-005\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\",\n\x15InstanceNormParameter\x12\x13\n\x03\x65ps\x18\x01 \x01(\x02:\x06\x31\x65-005\"<\n\x12GroupNormParameter\x12\x13\n\x03\x65ps\x18\x01 \x01(\x02:\x06\x31\x65-005\x12\x11\n\x05group\x18\x02 \x01(\x05:\x02\x33\x32\"k\n\x12\x44ropBlockParameter\x12\x15\n\nblock_size\x18\x01 \x01(\x05:\x01\x37\x12\x16\n\tkeep_prob\x18\x02 \x01(\x02:\x03\x30.9\x12\x10\n\x05\x61lpha\x18\x03 \x01(\x02:\x01\x31\x12\x14\n\tdecrement\x18\x04 \x01(\x02:\x01\x30*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
) )
_sym_db.RegisterFileDescriptor(DESCRIPTOR) _sym_db.RegisterFileDescriptor(DESCRIPTOR)
...@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor( ...@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=17641, serialized_start=17850,
serialized_end=17669, serialized_end=17878,
) )
_sym_db.RegisterEnumDescriptor(_PHASE) _sym_db.RegisterEnumDescriptor(_PHASE)
...@@ -209,8 +209,8 @@ _LOSSPARAMETER_NORMALIZATIONMODE = _descriptor.EnumDescriptor( ...@@ -209,8 +209,8 @@ _LOSSPARAMETER_NORMALIZATIONMODE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=6595, serialized_start=6709,
serialized_end=6671, serialized_end=6785,
) )
_sym_db.RegisterEnumDescriptor(_LOSSPARAMETER_NORMALIZATIONMODE) _sym_db.RegisterEnumDescriptor(_LOSSPARAMETER_NORMALIZATIONMODE)
...@@ -235,8 +235,8 @@ _CONVOLUTIONPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -235,8 +235,8 @@ _CONVOLUTIONPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_CONVOLUTIONPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_CONVOLUTIONPARAMETER_ENGINE)
...@@ -257,8 +257,8 @@ _DATAPARAMETER_DB = _descriptor.EnumDescriptor( ...@@ -257,8 +257,8 @@ _DATAPARAMETER_DB = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7995, serialized_start=8110,
serialized_end=8022, serialized_end=8137,
) )
_sym_db.RegisterEnumDescriptor(_DATAPARAMETER_DB) _sym_db.RegisterEnumDescriptor(_DATAPARAMETER_DB)
...@@ -283,8 +283,8 @@ _ELTWISEPARAMETER_ELTWISEOP = _descriptor.EnumDescriptor( ...@@ -283,8 +283,8 @@ _ELTWISEPARAMETER_ELTWISEOP = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=8389, serialized_start=8504,
serialized_end=8428, serialized_end=8543,
) )
_sym_db.RegisterEnumDescriptor(_ELTWISEPARAMETER_ELTWISEOP) _sym_db.RegisterEnumDescriptor(_ELTWISEPARAMETER_ELTWISEOP)
...@@ -305,8 +305,8 @@ _HINGELOSSPARAMETER_NORM = _descriptor.EnumDescriptor( ...@@ -305,8 +305,8 @@ _HINGELOSSPARAMETER_NORM = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=8963, serialized_start=9078,
serialized_end=8985, serialized_end=9100,
) )
_sym_db.RegisterEnumDescriptor(_HINGELOSSPARAMETER_NORM) _sym_db.RegisterEnumDescriptor(_HINGELOSSPARAMETER_NORM)
...@@ -327,8 +327,8 @@ _LRNPARAMETER_NORMREGION = _descriptor.EnumDescriptor( ...@@ -327,8 +327,8 @@ _LRNPARAMETER_NORMREGION = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=9852, serialized_start=9967,
serialized_end=9905, serialized_end=10020,
) )
_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_NORMREGION) _sym_db.RegisterEnumDescriptor(_LRNPARAMETER_NORMREGION)
...@@ -353,8 +353,8 @@ _LRNPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -353,8 +353,8 @@ _LRNPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_LRNPARAMETER_ENGINE)
...@@ -375,8 +375,8 @@ _MEMORYDATAPARAMETER_DATATYPE = _descriptor.EnumDescriptor( ...@@ -375,8 +375,8 @@ _MEMORYDATAPARAMETER_DATATYPE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=10106, serialized_start=10221,
serialized_end=10142, serialized_end=10257,
) )
_sym_db.RegisterEnumDescriptor(_MEMORYDATAPARAMETER_DATATYPE) _sym_db.RegisterEnumDescriptor(_MEMORYDATAPARAMETER_DATATYPE)
...@@ -401,8 +401,8 @@ _POOLINGPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( ...@@ -401,8 +401,8 @@ _POOLINGPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=10630, serialized_start=10745,
serialized_end=10676, serialized_end=10791,
) )
_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_POOLMETHOD) _sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_POOLMETHOD)
...@@ -427,8 +427,8 @@ _POOLINGPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -427,8 +427,8 @@ _POOLINGPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_ENGINE)
...@@ -457,8 +457,8 @@ _REDUCTIONPARAMETER_REDUCTIONOP = _descriptor.EnumDescriptor( ...@@ -457,8 +457,8 @@ _REDUCTIONPARAMETER_REDUCTIONOP = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=11112, serialized_start=11227,
serialized_end=11165, serialized_end=11280,
) )
_sym_db.RegisterEnumDescriptor(_REDUCTIONPARAMETER_REDUCTIONOP) _sym_db.RegisterEnumDescriptor(_REDUCTIONPARAMETER_REDUCTIONOP)
...@@ -483,8 +483,8 @@ _RELUPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -483,8 +483,8 @@ _RELUPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_RELUPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_RELUPARAMETER_ENGINE)
...@@ -509,8 +509,8 @@ _SIGMOIDPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -509,8 +509,8 @@ _SIGMOIDPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_SIGMOIDPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_SIGMOIDPARAMETER_ENGINE)
...@@ -535,8 +535,8 @@ _SOFTMAXPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -535,8 +535,8 @@ _SOFTMAXPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_SOFTMAXPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_SOFTMAXPARAMETER_ENGINE)
...@@ -561,8 +561,8 @@ _TANHPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -561,8 +561,8 @@ _TANHPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_TANHPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_TANHPARAMETER_ENGINE)
...@@ -587,8 +587,8 @@ _SPPPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( ...@@ -587,8 +587,8 @@ _SPPPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=10630, serialized_start=10745,
serialized_end=10676, serialized_end=10791,
) )
_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_POOLMETHOD) _sym_db.RegisterEnumDescriptor(_SPPPARAMETER_POOLMETHOD)
...@@ -613,8 +613,8 @@ _SPPPARAMETER_ENGINE = _descriptor.EnumDescriptor( ...@@ -613,8 +613,8 @@ _SPPPARAMETER_ENGINE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=7634, serialized_start=7749,
serialized_end=7677, serialized_end=7792,
) )
_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_ENGINE) _sym_db.RegisterEnumDescriptor(_SPPPARAMETER_ENGINE)
...@@ -787,8 +787,8 @@ _V1LAYERPARAMETER_LAYERTYPE = _descriptor.EnumDescriptor( ...@@ -787,8 +787,8 @@ _V1LAYERPARAMETER_LAYERTYPE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=14604, serialized_start=14719,
serialized_end=15204, serialized_end=15319,
) )
_sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_LAYERTYPE) _sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_LAYERTYPE)
...@@ -835,8 +835,8 @@ _V0LAYERPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( ...@@ -835,8 +835,8 @@ _V0LAYERPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=10630, serialized_start=10745,
serialized_end=10676, serialized_end=10791,
) )
_sym_db.RegisterEnumDescriptor(_V0LAYERPARAMETER_POOLMETHOD) _sym_db.RegisterEnumDescriptor(_V0LAYERPARAMETER_POOLMETHOD)
...@@ -2269,12 +2269,26 @@ _LAYERPARAMETER = _descriptor.Descriptor( ...@@ -2269,12 +2269,26 @@ _LAYERPARAMETER = _descriptor.Descriptor(
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='group_norm_param', full_name='caffe.LayerParameter.group_norm_param', index=71, name='instance_norm_param', full_name='caffe.LayerParameter.instance_norm_param', index=71,
number=166, type=11, cpp_type=10, label=1, number=166, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None, has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor(
name='group_norm_param', full_name='caffe.LayerParameter.group_norm_param', index=72,
number=167, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='drop_block_param', full_name='caffe.LayerParameter.drop_block_param', index=73,
number=168, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
], ],
extensions=[ extensions=[
], ],
...@@ -2287,7 +2301,7 @@ _LAYERPARAMETER = _descriptor.Descriptor( ...@@ -2287,7 +2301,7 @@ _LAYERPARAMETER = _descriptor.Descriptor(
oneofs=[ oneofs=[
], ],
serialized_start=2850, serialized_start=2850,
serialized_end=6125, serialized_end=6239,
) )
...@@ -2386,8 +2400,8 @@ _TRANSFORMATIONPARAMETER = _descriptor.Descriptor( ...@@ -2386,8 +2400,8 @@ _TRANSFORMATIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6128, serialized_start=6242,
serialized_end=6423, serialized_end=6537,
) )
...@@ -2416,8 +2430,8 @@ _LOSSPARAMETER_EXPANDDIMSPARAMETER = _descriptor.Descriptor( ...@@ -2416,8 +2430,8 @@ _LOSSPARAMETER_EXPANDDIMSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6554, serialized_start=6668,
serialized_end=6593, serialized_end=6707,
) )
_LOSSPARAMETER = _descriptor.Descriptor( _LOSSPARAMETER = _descriptor.Descriptor(
...@@ -2460,8 +2474,8 @@ _LOSSPARAMETER = _descriptor.Descriptor( ...@@ -2460,8 +2474,8 @@ _LOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6426, serialized_start=6540,
serialized_end=6671, serialized_end=6785,
) )
...@@ -2504,8 +2518,8 @@ _ACCURACYPARAMETER = _descriptor.Descriptor( ...@@ -2504,8 +2518,8 @@ _ACCURACYPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6673, serialized_start=6787,
serialized_end=6749, serialized_end=6863,
) )
...@@ -2548,8 +2562,8 @@ _ARGMAXPARAMETER = _descriptor.Descriptor( ...@@ -2548,8 +2562,8 @@ _ARGMAXPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6751, serialized_start=6865,
serialized_end=6828, serialized_end=6942,
) )
...@@ -2585,8 +2599,8 @@ _CONCATPARAMETER = _descriptor.Descriptor( ...@@ -2585,8 +2599,8 @@ _CONCATPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6830, serialized_start=6944,
serialized_end=6887, serialized_end=7001,
) )
...@@ -2614,7 +2628,7 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor( ...@@ -2614,7 +2628,7 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='eps', full_name='caffe.BatchNormParameter.eps', index=2, name='eps', full_name='caffe.BatchNormParameter.eps', index=2,
number=3, type=2, cpp_type=6, label=1, number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001, has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
...@@ -2629,8 +2643,8 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor( ...@@ -2629,8 +2643,8 @@ _BATCHNORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6889, serialized_start=7003,
serialized_end=6993, serialized_end=7108,
) )
...@@ -2673,8 +2687,8 @@ _BIASPARAMETER = _descriptor.Descriptor( ...@@ -2673,8 +2687,8 @@ _BIASPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6995, serialized_start=7110,
serialized_end=7088, serialized_end=7203,
) )
...@@ -2710,8 +2724,8 @@ _CONTRASTIVELOSSPARAMETER = _descriptor.Descriptor( ...@@ -2710,8 +2724,8 @@ _CONTRASTIVELOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=7090, serialized_start=7205,
serialized_end=7166, serialized_end=7281,
) )
...@@ -2860,8 +2874,8 @@ _CONVOLUTIONPARAMETER = _descriptor.Descriptor( ...@@ -2860,8 +2874,8 @@ _CONVOLUTIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=7169, serialized_start=7284,
serialized_end=7677, serialized_end=7792,
) )
...@@ -2897,8 +2911,8 @@ _CROPPARAMETER = _descriptor.Descriptor( ...@@ -2897,8 +2911,8 @@ _CROPPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=7679, serialized_start=7794,
serialized_end=7727, serialized_end=7842,
) )
...@@ -2991,8 +3005,8 @@ _DATAPARAMETER = _descriptor.Descriptor( ...@@ -2991,8 +3005,8 @@ _DATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=7730, serialized_start=7845,
serialized_end=8022, serialized_end=8137,
) )
...@@ -3028,8 +3042,8 @@ _DROPOUTPARAMETER = _descriptor.Descriptor( ...@@ -3028,8 +3042,8 @@ _DROPOUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8024, serialized_start=8139,
serialized_end=8097, serialized_end=8212,
) )
...@@ -3093,8 +3107,8 @@ _DUMMYDATAPARAMETER = _descriptor.Descriptor( ...@@ -3093,8 +3107,8 @@ _DUMMYDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8100, serialized_start=8215,
serialized_end=8260, serialized_end=8375,
) )
...@@ -3138,8 +3152,8 @@ _ELTWISEPARAMETER = _descriptor.Descriptor( ...@@ -3138,8 +3152,8 @@ _ELTWISEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8263, serialized_start=8378,
serialized_end=8428, serialized_end=8543,
) )
...@@ -3168,8 +3182,8 @@ _ELUPARAMETER = _descriptor.Descriptor( ...@@ -3168,8 +3182,8 @@ _ELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8430, serialized_start=8545,
serialized_end=8462, serialized_end=8577,
) )
...@@ -3226,8 +3240,8 @@ _EMBEDPARAMETER = _descriptor.Descriptor( ...@@ -3226,8 +3240,8 @@ _EMBEDPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8465, serialized_start=8580,
serialized_end=8637, serialized_end=8752,
) )
...@@ -3270,8 +3284,8 @@ _EXPPARAMETER = _descriptor.Descriptor( ...@@ -3270,8 +3284,8 @@ _EXPPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8639, serialized_start=8754,
serialized_end=8707, serialized_end=8822,
) )
...@@ -3307,8 +3321,8 @@ _FLATTENPARAMETER = _descriptor.Descriptor( ...@@ -3307,8 +3321,8 @@ _FLATTENPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8709, serialized_start=8824,
serialized_end=8766, serialized_end=8881,
) )
...@@ -3351,8 +3365,8 @@ _HDF5DATAPARAMETER = _descriptor.Descriptor( ...@@ -3351,8 +3365,8 @@ _HDF5DATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8768, serialized_start=8883,
serialized_end=8847, serialized_end=8962,
) )
...@@ -3381,8 +3395,8 @@ _HDF5OUTPUTPARAMETER = _descriptor.Descriptor( ...@@ -3381,8 +3395,8 @@ _HDF5OUTPUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8849, serialized_start=8964,
serialized_end=8889, serialized_end=9004,
) )
...@@ -3412,8 +3426,8 @@ _HINGELOSSPARAMETER = _descriptor.Descriptor( ...@@ -3412,8 +3426,8 @@ _HINGELOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8891, serialized_start=9006,
serialized_end=8985, serialized_end=9100,
) )
...@@ -3519,8 +3533,8 @@ _IMAGEDATAPARAMETER = _descriptor.Descriptor( ...@@ -3519,8 +3533,8 @@ _IMAGEDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=8988, serialized_start=9103,
serialized_end=9267, serialized_end=9382,
) )
...@@ -3549,8 +3563,8 @@ _INFOGAINLOSSPARAMETER = _descriptor.Descriptor( ...@@ -3549,8 +3563,8 @@ _INFOGAINLOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9269, serialized_start=9384,
serialized_end=9308, serialized_end=9423,
) )
...@@ -3614,8 +3628,8 @@ _INNERPRODUCTPARAMETER = _descriptor.Descriptor( ...@@ -3614,8 +3628,8 @@ _INNERPRODUCTPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9311, serialized_start=9426,
serialized_end=9514, serialized_end=9629,
) )
...@@ -3644,8 +3658,8 @@ _INPUTPARAMETER = _descriptor.Descriptor( ...@@ -3644,8 +3658,8 @@ _INPUTPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9516, serialized_start=9631,
serialized_end=9565, serialized_end=9680,
) )
...@@ -3688,8 +3702,8 @@ _LOGPARAMETER = _descriptor.Descriptor( ...@@ -3688,8 +3702,8 @@ _LOGPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9567, serialized_start=9682,
serialized_end=9635, serialized_end=9750,
) )
...@@ -3755,8 +3769,8 @@ _LRNPARAMETER = _descriptor.Descriptor( ...@@ -3755,8 +3769,8 @@ _LRNPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9638, serialized_start=9753,
serialized_end=9950, serialized_end=10065,
) )
...@@ -3814,8 +3828,8 @@ _MEMORYDATAPARAMETER = _descriptor.Descriptor( ...@@ -3814,8 +3828,8 @@ _MEMORYDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=9953, serialized_start=10068,
serialized_end=10142, serialized_end=10257,
) )
...@@ -3858,8 +3872,8 @@ _MVNPARAMETER = _descriptor.Descriptor( ...@@ -3858,8 +3872,8 @@ _MVNPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10144, serialized_start=10259,
serialized_end=10245, serialized_end=10360,
) )
...@@ -3888,8 +3902,8 @@ _PARAMETERPARAMETER = _descriptor.Descriptor( ...@@ -3888,8 +3902,8 @@ _PARAMETERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10247, serialized_start=10362,
serialized_end=10300, serialized_end=10415,
) )
...@@ -3997,8 +4011,8 @@ _POOLINGPARAMETER = _descriptor.Descriptor( ...@@ -3997,8 +4011,8 @@ _POOLINGPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10303, serialized_start=10418,
serialized_end=10721, serialized_end=10836,
) )
...@@ -4041,8 +4055,8 @@ _ROIPOOLINGPARAMETER = _descriptor.Descriptor( ...@@ -4041,8 +4055,8 @@ _ROIPOOLINGPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10723, serialized_start=10838,
serialized_end=10812, serialized_end=10927,
) )
...@@ -4085,8 +4099,8 @@ _POWERPARAMETER = _descriptor.Descriptor( ...@@ -4085,8 +4099,8 @@ _POWERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10814, serialized_start=10929,
serialized_end=10884, serialized_end=10999,
) )
...@@ -4136,8 +4150,8 @@ _PYTHONPARAMETER = _descriptor.Descriptor( ...@@ -4136,8 +4150,8 @@ _PYTHONPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10886, serialized_start=11001,
serialized_end=10989, serialized_end=11104,
) )
...@@ -4181,8 +4195,8 @@ _REDUCTIONPARAMETER = _descriptor.Descriptor( ...@@ -4181,8 +4195,8 @@ _REDUCTIONPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=10992, serialized_start=11107,
serialized_end=11165, serialized_end=11280,
) )
...@@ -4219,8 +4233,8 @@ _RELUPARAMETER = _descriptor.Descriptor( ...@@ -4219,8 +4233,8 @@ _RELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11168, serialized_start=11283,
serialized_end=11309, serialized_end=11424,
) )
...@@ -4263,8 +4277,8 @@ _RESHAPEPARAMETER = _descriptor.Descriptor( ...@@ -4263,8 +4277,8 @@ _RESHAPEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11311, serialized_start=11426,
serialized_end=11401, serialized_end=11516,
) )
...@@ -4321,8 +4335,8 @@ _SCALEPARAMETER = _descriptor.Descriptor( ...@@ -4321,8 +4335,8 @@ _SCALEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11404, serialized_start=11519,
serialized_end=11569, serialized_end=11684,
) )
...@@ -4352,8 +4366,8 @@ _SIGMOIDPARAMETER = _descriptor.Descriptor( ...@@ -4352,8 +4366,8 @@ _SIGMOIDPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11571, serialized_start=11686,
serialized_end=11691, serialized_end=11806,
) )
...@@ -4396,8 +4410,8 @@ _SLICEPARAMETER = _descriptor.Descriptor( ...@@ -4396,8 +4410,8 @@ _SLICEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11693, serialized_start=11808,
serialized_end=11769, serialized_end=11884,
) )
...@@ -4434,8 +4448,8 @@ _SOFTMAXPARAMETER = _descriptor.Descriptor( ...@@ -4434,8 +4448,8 @@ _SOFTMAXPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11772, serialized_start=11887,
serialized_end=11909, serialized_end=12024,
) )
...@@ -4465,8 +4479,8 @@ _TANHPARAMETER = _descriptor.Descriptor( ...@@ -4465,8 +4479,8 @@ _TANHPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=11911, serialized_start=12026,
serialized_end=12025, serialized_end=12140,
) )
...@@ -4509,8 +4523,8 @@ _TILEPARAMETER = _descriptor.Descriptor( ...@@ -4509,8 +4523,8 @@ _TILEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=12027, serialized_start=12142,
serialized_end=12111, serialized_end=12226,
) )
...@@ -4539,8 +4553,8 @@ _THRESHOLDPARAMETER = _descriptor.Descriptor( ...@@ -4539,8 +4553,8 @@ _THRESHOLDPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=12113, serialized_start=12228,
serialized_end=12155, serialized_end=12270,
) )
...@@ -4653,8 +4667,8 @@ _WINDOWDATAPARAMETER = _descriptor.Descriptor( ...@@ -4653,8 +4667,8 @@ _WINDOWDATAPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=12158, serialized_start=12273,
serialized_end=12479, serialized_end=12594,
) )
...@@ -4699,8 +4713,8 @@ _SPPPARAMETER = _descriptor.Descriptor( ...@@ -4699,8 +4713,8 @@ _SPPPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=12482, serialized_start=12597,
serialized_end=12717, serialized_end=12832,
) )
...@@ -5025,8 +5039,8 @@ _V1LAYERPARAMETER = _descriptor.Descriptor( ...@@ -5025,8 +5039,8 @@ _V1LAYERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=12720, serialized_start=12835,
serialized_end=15248, serialized_end=15363,
) )
...@@ -5315,8 +5329,8 @@ _V0LAYERPARAMETER = _descriptor.Descriptor( ...@@ -5315,8 +5329,8 @@ _V0LAYERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=15251, serialized_start=15366,
serialized_end=16272, serialized_end=16387,
) )
...@@ -5352,8 +5366,8 @@ _PRELUPARAMETER = _descriptor.Descriptor( ...@@ -5352,8 +5366,8 @@ _PRELUPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16274, serialized_start=16389,
serialized_end=16361, serialized_end=16476,
) )
...@@ -5382,8 +5396,8 @@ _SMOOTHL1LOSSPARAMETER = _descriptor.Descriptor( ...@@ -5382,8 +5396,8 @@ _SMOOTHL1LOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16363, serialized_start=16478,
serialized_end=16404, serialized_end=16519,
) )
...@@ -5426,8 +5440,8 @@ _MPIPARAMETER = _descriptor.Descriptor( ...@@ -5426,8 +5440,8 @@ _MPIPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16406, serialized_start=16521,
serialized_end=16478, serialized_end=16593,
) )
...@@ -5456,8 +5470,8 @@ _PERMUTEPARAMETER = _descriptor.Descriptor( ...@@ -5456,8 +5470,8 @@ _PERMUTEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16480, serialized_start=16595,
serialized_end=16513, serialized_end=16628,
) )
...@@ -5492,7 +5506,7 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor( ...@@ -5492,7 +5506,7 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='eps', full_name='caffe.NormalizeParameter.eps', index=3, name='eps', full_name='caffe.NormalizeParameter.eps', index=3,
number=4, type=2, cpp_type=6, label=1, number=4, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001, has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
...@@ -5507,8 +5521,8 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor( ...@@ -5507,8 +5521,8 @@ _NORMALIZEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16516, serialized_start=16631,
serialized_end=16662, serialized_end=16778,
) )
...@@ -5551,8 +5565,8 @@ _PARALLELPARAMETER = _descriptor.Descriptor( ...@@ -5551,8 +5565,8 @@ _PARALLELPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16664, serialized_start=16780,
serialized_end=16764, serialized_end=16880,
) )
...@@ -5595,8 +5609,8 @@ _RESIZEPARAMETER = _descriptor.Descriptor( ...@@ -5595,8 +5609,8 @@ _RESIZEPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16766, serialized_start=16882,
serialized_end=16848, serialized_end=16964,
) )
...@@ -5625,8 +5639,8 @@ _EXPANDDIMSPARAMETER = _descriptor.Descriptor( ...@@ -5625,8 +5639,8 @@ _EXPANDDIMSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=6554, serialized_start=6668,
serialized_end=6593, serialized_end=6707,
) )
...@@ -5725,8 +5739,8 @@ _PROPOSALPARAMETER = _descriptor.Descriptor( ...@@ -5725,8 +5739,8 @@ _PROPOSALPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=16892, serialized_start=17008,
serialized_end=17164, serialized_end=17280,
) )
...@@ -5754,7 +5768,7 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor( ...@@ -5754,7 +5768,7 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='eps', full_name='caffe.BatchRenormParameter.eps', index=2, name='eps', full_name='caffe.BatchRenormParameter.eps', index=2,
number=3, type=2, cpp_type=6, label=1, number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001, has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
...@@ -5790,8 +5804,8 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor( ...@@ -5790,8 +5804,8 @@ _BATCHRENORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17167, serialized_start=17283,
serialized_end=17333, serialized_end=17450,
) )
...@@ -5827,8 +5841,8 @@ _DENSECONCATPARAMETER = _descriptor.Descriptor( ...@@ -5827,8 +5841,8 @@ _DENSECONCATPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17335, serialized_start=17452,
serialized_end=17398, serialized_end=17515,
) )
...@@ -5871,8 +5885,8 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor( ...@@ -5871,8 +5885,8 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17400, serialized_start=17517,
serialized_end=17478, serialized_end=17595,
) )
...@@ -5901,8 +5915,38 @@ _GATHERPARAMETER = _descriptor.Descriptor( ...@@ -5901,8 +5915,38 @@ _GATHERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17480, serialized_start=17597,
serialized_end=17514, serialized_end=17631,
)
_INSTANCENORMPARAMETER = _descriptor.Descriptor(
name='InstanceNormParameter',
full_name='caffe.InstanceNormParameter',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.InstanceNormParameter.eps', index=0,
number=1, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
oneofs=[
],
serialized_start=17633,
serialized_end=17677,
) )
...@@ -5914,30 +5958,67 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor( ...@@ -5914,30 +5958,67 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
containing_type=None, containing_type=None,
fields=[ fields=[
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='use_global_stats', full_name='caffe.GroupNormParameter.use_global_stats', index=0, name='eps', full_name='caffe.GroupNormParameter.eps', index=0,
number=1, type=8, cpp_type=7, label=1, number=1, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=False, has_default_value=True, default_value=1e-005,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='group', full_name='caffe.GroupNormParameter.group', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=True, default_value=32,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
oneofs=[
],
serialized_start=17679,
serialized_end=17739,
)
_DROPBLOCKPARAMETER = _descriptor.Descriptor(
name='DropBlockParameter',
full_name='caffe.DropBlockParameter',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='block_size', full_name='caffe.DropBlockParameter.block_size', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=True, default_value=7,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='moving_average_fraction', full_name='caffe.GroupNormParameter.moving_average_fraction', index=1, name='keep_prob', full_name='caffe.DropBlockParameter.keep_prob', index=1,
number=2, type=2, cpp_type=6, label=1, number=2, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.9, has_default_value=True, default_value=0.9,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='eps', full_name='caffe.GroupNormParameter.eps', index=2, name='alpha', full_name='caffe.DropBlockParameter.alpha', index=2,
number=3, type=2, cpp_type=6, label=1, number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.001, has_default_value=True, default_value=1,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='group', full_name='caffe.GroupNormParameter.group', index=3, name='decrement', full_name='caffe.DropBlockParameter.decrement', index=3,
number=5, type=13, cpp_type=3, label=1, number=4, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=32, has_default_value=True, default_value=0,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
...@@ -5952,8 +6033,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor( ...@@ -5952,8 +6033,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17516, serialized_start=17741,
serialized_end=17639, serialized_end=17848,
) )
_BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE _BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE
...@@ -6044,7 +6125,9 @@ _LAYERPARAMETER.fields_by_name['batch_renorm_param'].message_type = _BATCHRENORM ...@@ -6044,7 +6125,9 @@ _LAYERPARAMETER.fields_by_name['batch_renorm_param'].message_type = _BATCHRENORM
_LAYERPARAMETER.fields_by_name['dense_concat_param'].message_type = _DENSECONCATPARAMETER _LAYERPARAMETER.fields_by_name['dense_concat_param'].message_type = _DENSECONCATPARAMETER
_LAYERPARAMETER.fields_by_name['focal_loss_param'].message_type = _FOCALLOSSPARAMETER _LAYERPARAMETER.fields_by_name['focal_loss_param'].message_type = _FOCALLOSSPARAMETER
_LAYERPARAMETER.fields_by_name['gather_param'].message_type = _GATHERPARAMETER _LAYERPARAMETER.fields_by_name['gather_param'].message_type = _GATHERPARAMETER
_LAYERPARAMETER.fields_by_name['instance_norm_param'].message_type = _INSTANCENORMPARAMETER
_LAYERPARAMETER.fields_by_name['group_norm_param'].message_type = _GROUPNORMPARAMETER _LAYERPARAMETER.fields_by_name['group_norm_param'].message_type = _GROUPNORMPARAMETER
_LAYERPARAMETER.fields_by_name['drop_block_param'].message_type = _DROPBLOCKPARAMETER
_LOSSPARAMETER_EXPANDDIMSPARAMETER.containing_type = _LOSSPARAMETER _LOSSPARAMETER_EXPANDDIMSPARAMETER.containing_type = _LOSSPARAMETER
_LOSSPARAMETER.fields_by_name['normalization'].enum_type = _LOSSPARAMETER_NORMALIZATIONMODE _LOSSPARAMETER.fields_by_name['normalization'].enum_type = _LOSSPARAMETER_NORMALIZATIONMODE
_LOSSPARAMETER_NORMALIZATIONMODE.containing_type = _LOSSPARAMETER _LOSSPARAMETER_NORMALIZATIONMODE.containing_type = _LOSSPARAMETER
...@@ -6215,7 +6298,9 @@ DESCRIPTOR.message_types_by_name['BatchRenormParameter'] = _BATCHRENORMPARAMETER ...@@ -6215,7 +6298,9 @@ DESCRIPTOR.message_types_by_name['BatchRenormParameter'] = _BATCHRENORMPARAMETER
DESCRIPTOR.message_types_by_name['DenseConcatParameter'] = _DENSECONCATPARAMETER DESCRIPTOR.message_types_by_name['DenseConcatParameter'] = _DENSECONCATPARAMETER
DESCRIPTOR.message_types_by_name['FocalLossParameter'] = _FOCALLOSSPARAMETER DESCRIPTOR.message_types_by_name['FocalLossParameter'] = _FOCALLOSSPARAMETER
DESCRIPTOR.message_types_by_name['GatherParameter'] = _GATHERPARAMETER DESCRIPTOR.message_types_by_name['GatherParameter'] = _GATHERPARAMETER
DESCRIPTOR.message_types_by_name['InstanceNormParameter'] = _INSTANCENORMPARAMETER
DESCRIPTOR.message_types_by_name['GroupNormParameter'] = _GROUPNORMPARAMETER DESCRIPTOR.message_types_by_name['GroupNormParameter'] = _GROUPNORMPARAMETER
DESCRIPTOR.message_types_by_name['DropBlockParameter'] = _DROPBLOCKPARAMETER
DESCRIPTOR.enum_types_by_name['Phase'] = _PHASE DESCRIPTOR.enum_types_by_name['Phase'] = _PHASE
BlobShape = _reflection.GeneratedProtocolMessageType('BlobShape', (_message.Message,), dict( BlobShape = _reflection.GeneratedProtocolMessageType('BlobShape', (_message.Message,), dict(
...@@ -6737,6 +6822,13 @@ GatherParameter = _reflection.GeneratedProtocolMessageType('GatherParameter', (_ ...@@ -6737,6 +6822,13 @@ GatherParameter = _reflection.GeneratedProtocolMessageType('GatherParameter', (_
)) ))
_sym_db.RegisterMessage(GatherParameter) _sym_db.RegisterMessage(GatherParameter)
InstanceNormParameter = _reflection.GeneratedProtocolMessageType('InstanceNormParameter', (_message.Message,), dict(
DESCRIPTOR = _INSTANCENORMPARAMETER,
__module__ = 'caffe_pb2'
# @@protoc_insertion_point(class_scope:caffe.InstanceNormParameter)
))
_sym_db.RegisterMessage(InstanceNormParameter)
GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParameter', (_message.Message,), dict( GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParameter', (_message.Message,), dict(
DESCRIPTOR = _GROUPNORMPARAMETER, DESCRIPTOR = _GROUPNORMPARAMETER,
__module__ = 'caffe_pb2' __module__ = 'caffe_pb2'
...@@ -6744,6 +6836,13 @@ GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParamete ...@@ -6744,6 +6836,13 @@ GroupNormParameter = _reflection.GeneratedProtocolMessageType('GroupNormParamete
)) ))
_sym_db.RegisterMessage(GroupNormParameter) _sym_db.RegisterMessage(GroupNormParameter)
DropBlockParameter = _reflection.GeneratedProtocolMessageType('DropBlockParameter', (_message.Message,), dict(
DESCRIPTOR = _DROPBLOCKPARAMETER,
__module__ = 'caffe_pb2'
# @@protoc_insertion_point(class_scope:caffe.DropBlockParameter)
))
_sym_db.RegisterMessage(DropBlockParameter)
_BLOBSHAPE.fields_by_name['dim'].has_options = True _BLOBSHAPE.fields_by_name['dim'].has_options = True
_BLOBSHAPE.fields_by_name['dim']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001')) _BLOBSHAPE.fields_by_name['dim']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
......
...@@ -24,7 +24,7 @@ def convert_to_tensor(value, dtype=None, name=None, **kwargs): ...@@ -24,7 +24,7 @@ def convert_to_tensor(value, dtype=None, name=None, **kwargs):
Parameters Parameters
---------- ----------
value : basic type, list or numpy.ndarray value : number, list or numpy.ndarray
The value to convert. The value to convert.
dtype : Dtype or None dtype : Dtype or None
The data type. If ``None``, inferred from the type of `value`. The data type. If ``None``, inferred from the type of `value`.
......
...@@ -15,6 +15,7 @@ import numpy as np ...@@ -15,6 +15,7 @@ import numpy as np
import dragon.core.mpi as mpi import dragon.core.mpi as mpi
import dragon.core.workspace as ws import dragon.core.workspace as ws
import dragon.protos.dragon_pb2 as pb import dragon.protos.dragon_pb2 as pb
from dragon.core.utils import MakeArgument from dragon.core.utils import MakeArgument
from dragon.core.gradient_maker import GraphGradientMaker from dragon.core.gradient_maker import GraphGradientMaker
from dragon.core.scope import GetOperatorName, GetTensorName from dragon.core.scope import GetOperatorName, GetTensorName
...@@ -156,6 +157,7 @@ def GraphDef_Opt(meta_graph): ...@@ -156,6 +157,7 @@ def GraphDef_Opt(meta_graph):
OX = 3 if option['share_grads'] else 2 OX = 3 if option['share_grads'] else 2
if option['debug_mode']: OX = 1 if option['debug_mode']: OX = 1
meta_graph.arg.add().CopyFrom(MakeArgument('optimization_level', OX)) meta_graph.arg.add().CopyFrom(MakeArgument('optimization_level', OX))
meta_graph.graph_type = option['graph_type']
def GraphDef_Device(meta_graph): def GraphDef_Device(meta_graph):
...@@ -181,11 +183,12 @@ def GraphDef_Device(meta_graph): ...@@ -181,11 +183,12 @@ def GraphDef_Device(meta_graph):
""" """
from dragon.config import option from dragon.config import option
if option['device'] is not 'None': if option['device'] is not 'None':
supports = {'CPU': 0, 'CUDA': 1} supports = {'CPU': 0, 'CUDA': 1, 'CNML': 2}
device_option = pb.DeviceOption() device_option = pb.DeviceOption()
device_option.device_type = supports[option['device']] device_option.device_type = supports[option['device']]
device_option.device_id = option['gpu_id'] device_option.device_id = option['device_id']
device_option.random_seed = option['random_seed'] device_option.random_seed = option['random_seed']
if option['device'] == 'CUDA':
if option['use_cudnn']: device_option.engine = 'CUDNN' if option['use_cudnn']: device_option.engine = 'CUDNN'
meta_graph.device_option.CopyFrom(device_option) meta_graph.device_option.CopyFrom(device_option)
...@@ -217,16 +220,16 @@ def function(inputs=None, outputs=None, givens=None, updater=None): ...@@ -217,16 +220,16 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
Examples Examples
-------- --------
>>> x = Tensor('x').Variable() >>> x = Tensor('x', dtype='float32').Variable()
>>> y = x * 2 >>> y = x * 2
>>> f = theano.function(outputs=y) >>> f = function(outputs=y)
>>> x.set_value(np.ones((2, 3), dtype=np.float32)) >>> x.set_value(np.ones((2, 3)))
>>> print(f()) >>> print(f())
>>> [[ 2. 2. 2.] >>> [[ 2. 2. 2.]
[ 2. 2. 2.]] [ 2. 2. 2.]]
>>> f = theano.function(inputs=x, outputs=y) >>> f = function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3), dtype=np.float32))) >>> print(f(np.ones((2, 3)))
>>> [[ 2. 2. 2.] >>> [[ 2. 2. 2.]
[ 2. 2. 2.]] [ 2. 2. 2.]]
...@@ -339,13 +342,15 @@ def eval(self, feed_dict=None): ...@@ -339,13 +342,15 @@ def eval(self, feed_dict=None):
raise TypeError('The key of feed_dict key should be a Tensor.') raise TypeError('The key of feed_dict key should be a Tensor.')
if key.shape is not None: if key.shape is not None:
if len(key.shape) != len(value.shape): if len(key.shape) != len(value.shape):
raise RuntimeError('The Tensor({}) was limited to {} dimensions, \ raise RuntimeError(
while feed a value with {} dimensions.'. 'The Tensor({}) was limited to {} dimensions, \
format(key.name, len(key.shape), len(value.shape))) while feed a value with {} dimensions.'.format(
key.name, len(key.shape), len(value.shape)))
for i in range(len(key.shape)): for i in range(len(key.shape)):
if key.shape[i] is None: continue if key.shape[i] is None: continue
if key.shape[i] != value.shape[i]: if key.shape[i] != value.shape[i]:
raise RuntimeError('The shape of Tensor({}) was limited as ('.format(key.name) + raise RuntimeError(
'The shape of Tensor({}) was limited as ('.format(key.name) +
','.join([str(dim) for dim in key.shape]) + '), ' + ','.join([str(dim) for dim in key.shape]) + '), ' +
'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').') 'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').')
return self._eval_func(*feed_dict.values()) return self._eval_func(*feed_dict.values())
......
...@@ -20,7 +20,7 @@ def shared(value, name=None, **kwargs): ...@@ -20,7 +20,7 @@ def shared(value, name=None, **kwargs):
Parameters Parameters
---------- ----------
value : basic type, list or numpy.ndarray value : number, list or numpy.ndarray
The numerical values. The numerical values.
name : str name : str
The name of tensor. The name of tensor.
......
...@@ -42,6 +42,7 @@ class Module(object): ...@@ -42,6 +42,7 @@ class Module(object):
self._buffers = OrderedDict() self._buffers = OrderedDict()
self._persistent_key = self._op = None self._persistent_key = self._op = None
self._ctx = ('CPU', 0) self._ctx = ('CPU', 0)
self.training = True
def __getattr__(self, item): def __getattr__(self, item):
if '_parameters' in self.__dict__: if '_parameters' in self.__dict__:
...@@ -363,3 +364,12 @@ class Module(object): ...@@ -363,3 +364,12 @@ class Module(object):
def run(self, inputs, outputs, auto_grad=True): def run(self, inputs, outputs, auto_grad=True):
meta = ('PERSISTENT', self.persistent_key, self.op) meta = ('PERSISTENT', self.persistent_key, self.op)
return RunOperator(inputs, outputs, meta, auto_grad=auto_grad) return RunOperator(inputs, outputs, meta, auto_grad=auto_grad)
def train(self, mode=True):
self.training = mode
for module in self.children():
module.train(mode)
return self
def eval(self):
return self.train(False)
\ No newline at end of file
...@@ -10,20 +10,35 @@ ...@@ -10,20 +10,35 @@
# ------------------------------------------------------------ # ------------------------------------------------------------
"""We move the Module & Parameter to ``torch`` instead of ``torch.nn``, """We move the Module & Parameter to ``torch`` instead of ``torch.nn``,
as it will be reused by the ``torch.ops``. as it will be reused by the ``torch.ops``.
""" """
from dragon.vm.torch.module import Module from dragon.vm.torch.module import Module
from dragon.vm.torch.tensor import Parameter from dragon.vm.torch.tensor import Parameter
from .modules.conv import Conv2d, ConvTranspose2d from .modules.conv import Conv2d, ConvTranspose2d
from .modules.pooling import MaxPool2d, AvgPool2d from .modules.pooling import MaxPool2d, AvgPool2d
from .modules.activation import ReLU, LeakyReLU, Sigmoid, Softmax
from .modules.activation import (
ReLU, LeakyReLU, ELU, SELU,
Sigmoid, Softmax,
)
from .modules.linear import Linear from .modules.linear import Linear
from .modules.loss import CrossEntropyLoss
from .modules.loss import (
BCEWithLogitsLoss,
NLLLoss, CrossEntropyLoss,
L1Loss, MSELoss, SmoothL1Loss,
)
from .modules.container import Container, Sequential, ModuleList from .modules.container import Container, Sequential, ModuleList
from .modules.batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d from .modules.batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
from .modules.groupnorm import GroupNorm1d, GroupNorm2d, GroupNorm3d
from .modules.affine import Affine from .modules.affine import Affine
from .modules.dropout import Dropout, Dropout2d, Dropout3d from .modules.dropout import Dropout, Dropout2d, Dropout3d
from .modules.dropblock import DropBlock2d
from .modules.rnn import RNNBase, RNN, LSTM, GRU from .modules.rnn import RNNBase, RNN, LSTM, GRU
from . import init from . import init
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/pytorch/blob/master/torch/nn/functional.py>
#
# ------------------------------------------------------------
import warnings
class _Reduction:
@staticmethod
def get_enum(reduction):
if reduction == 'none':
return 0
if reduction == 'elementwise_mean':
return 1
if reduction == 'sum':
return 2
raise ValueError(reduction + " is not a valid value for reduction")
# In order to support previous versions, accept boolean size_average and reduce
# and convert them into the new constants for now
# We use these functions in torch/legacy as well, in which case we'll silence the warning
@staticmethod
def legacy_get_string(size_average, reduce, emit_warning=True):
warning = "size_average and reduce args will be deprecated, please use reduction='{}' instead."
if size_average is None:
size_average = True
if reduce is None:
reduce = True
if size_average and reduce:
ret = 'elementwise_mean'
elif reduce:
ret = 'sum'
else:
ret = 'none'
if emit_warning:
warnings.warn(warning.format(ret))
return ret
@staticmethod
def legacy_get_enum(size_average, reduce, emit_warning=True):
return _Reduction.get_enum(_Reduction.legacy_get_string(size_average, reduce, emit_warning))
\ No newline at end of file
...@@ -55,6 +55,47 @@ class LeakyReLU(Module): ...@@ -55,6 +55,47 @@ class LeakyReLU(Module):
return self.run(inputs, outputs) return self.run(inputs, outputs)
class ELU(Module):
def __init__(self, alpha=1.0, inplace=False):
super(ELU, self).__init__()
self.alpha = alpha
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Elu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'alpha': self.alpha,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class SELU(Module):
def __init__(self, inplace=False):
super(SELU, self).__init__()
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SElu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Sigmoid(Module): class Sigmoid(Module):
def __init__(self, inplace=False): def __init__(self, inplace=False):
super(Sigmoid, self).__init__() super(Sigmoid, self).__init__()
......
...@@ -102,7 +102,7 @@ class _BatchNorm(Module): ...@@ -102,7 +102,7 @@ class _BatchNorm(Module):
inputs = [input] + self.inputs inputs = [input] + self.inputs
self.unify_devices(inputs) self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)] outputs = [self.register_output(input.dtype)]
phase = 'TRAIN' if input.requires_grad else 'TEST' phase = 'TRAIN' if self.training else 'TEST'
# Normalize the input by using batch stats ALWAYS # Normalize the input by using batch stats ALWAYS
# Note that the update of moving average is meaningless( # Note that the update of moving average is meaningless(
# Because we can not remove it. Why? Ask nvidia and cuDNN -:) # Because we can not remove it. Why? Ask nvidia and cuDNN -:)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import Module
class DropBlock2d(Module):
def __init__(self, block_size=7, kp=0.9,
alpha=1., decrement=0., inplace=False):
super(DropBlock2d, self).__init__()
self.kp = kp
self.block_size = block_size
self.alpha = alpha
self.decrement = decrement
self.inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'DropBlock2d',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'block_size': self.block_size,
'keep_prob': self.kp,
'alpha': self.alpha,
'decrement': self.decrement,
'data_format': 'NCHW',
'phase': 'TRAIN',
}
}
def forward(self, input):
if not self.training: return input
inputs = [input]
self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
...@@ -34,7 +34,7 @@ class Dropout(Module): ...@@ -34,7 +34,7 @@ class Dropout(Module):
} }
def forward(self, input): def forward(self, input):
if not input.requires_grad: return input if not self.training: return input
inputs = [input] inputs = [input]
self.unify_devices(inputs) self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)] outputs = [input if self.inplace else self.register_output(input.dtype)]
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.nn import Module, Parameter
from dragon.vm.torch.ops.creation import zeros, ones
from dragon.vm.torch.module import RunOperator
class _GroupNorm(Module):
def __init__(self, num_features, group=32,
eps=1e-5, affine=True):
super(_GroupNorm, self).__init__()
self.num_features = num_features
self.group = group
self.eps = eps
self.affine = affine
if self.affine:
self.weight = Parameter(Tensor(num_features))
self.bias = Parameter(Tensor(num_features))
else:
self.weight = self.bias = None
self.inputs = [self.weight, self.bias] if self.affine else []
self.reset_parameters()
self.register_op()
def reset_parameters(self):
if self.affine:
self.weight.data.uniform_()
self.bias.data.zero_()
def register_op(self):
self.op_meta = {
'op_type': 'FusedGroupNorm' if self.affine else 'GroupNorm',
'n_inputs': 3 if self.affine else 1, 'n_outputs': 1,
'arguments': {
'group': self.group,
'axis': 1, # Data format: NCHW
'eps': self.eps,
}
}
def forward(self, input):
inputs = [input] + self.inputs
self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class GroupNorm1d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm2d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm3d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
\ No newline at end of file
...@@ -18,50 +18,176 @@ from __future__ import division ...@@ -18,50 +18,176 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch.nn import Module from dragon.vm.torch.nn import Module
from dragon.vm.torch.nn.functional import _Reduction
def _assert_no_grad(variable):
assert not variable.requires_grad, \
"nn criterions don't compute the gradient w.r.t. targets - please " \
"mark these variables as not requiring gradients"
class _Loss(Module): class _Loss(Module):
def __init__(self, size_average=True): def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_Loss, self).__init__() super(_Loss, self).__init__()
self.size_average = size_average if size_average is not None or reduce is not None:
self.reduction = _Reduction.legacy_get_string(size_average, reduce)
else:
self.reduction = reduction
class _WeightedLoss(_Loss): class _WeightedLoss(_Loss):
def __init__(self, weight=None, size_average=True): def __init__(self, weight=None, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_WeightedLoss, self).__init__(size_average) super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
self.weight = weight self.weight = weight
# TODO(PhyscalX): Dragon will support it later :).
if weight is not None: if weight is not None:
raise NotImplementedError('WeightedLoss has been not implemented yet.') raise NotImplementedError('WeightedLoss has been not implemented yet.')
class NLLLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, ignore_index=-100,
reduce=None, reduction='elementwise_mean'):
super(NLLLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'NLLLoss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'axis': 1,
'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class BCEWithLogitsLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, reduce=None,
reduction='elementwise_mean', pos_weight=None):
super(BCEWithLogitsLoss, self).__init__(weight, size_average, reduce, reduction)
if pos_weight is not None:
raise NotImplementedError('Positive weight has been not implemented yet.')
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SigmoidCrossEntropy',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class CrossEntropyLoss(_WeightedLoss): class CrossEntropyLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=True, ignore_index=-100, reduce=True): def __init__(self, weight=None, size_average=None, ignore_index=-100,
super(CrossEntropyLoss, self).__init__(weight, size_average) reduce=None, reduction='elementwise_mean'):
super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index self.ignore_index = ignore_index
self.reduce = reduce self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op() self.register_op()
def register_op(self): def register_op(self):
self.op_meta = { self.op_meta = {
'op_type': 'SparseSoftmaxCrossEntropy' if self.reduce else 'SoftmaxCrossEntropy', 'op_type': 'SparseSoftmaxCrossEntropy',
'n_inputs': 2, 'n_outputs': 1, 'n_inputs': 2, 'n_outputs': 1,
'arguments': { 'arguments': {
'axis': 1, 'axis': 1,
'normalization': 'VALID' if self.size_average else 'NONE', 'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index), 'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
} }
} }
def forward(self, input, target): def forward(self, input, target):
_assert_no_grad(target) inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class L1Loss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(L1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class MSELoss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(MSELoss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L2Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class SmoothL1Loss(_Loss):
def __init__(self, size_average=None, beta=1.0,
reduce=None, reduction='elementwise_mean'):
super(SmoothL1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.beta = beta
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SmoothL1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'beta': self.beta,
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs) inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)] outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs) return self.run(inputs, outputs)
...@@ -11,11 +11,12 @@ ...@@ -11,11 +11,12 @@
from .creation import ( from .creation import (
zeros, zeros_like, ones, ones_like, zeros, zeros_like, ones, ones_like,
rand, randn one_hot, rand, randn,
) )
from .arithmetic import ( from .arithmetic import (
add, sub, mul, div, add, sub, mul, div, log, exp,
maximum, minimum, clamp,
) )
from .ndarray import ( from .ndarray import (
......
...@@ -16,14 +16,15 @@ from __future__ import print_function ...@@ -16,14 +16,15 @@ from __future__ import print_function
from dragon.vm.torch.tensor import Tensor from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.ops.primitive import MakeContext, WrapScalar from dragon.vm.torch.ops.primitive import MakeContext, WrapScalar
from dragon.vm.torch.ops.factory import get_module from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.arithmetic import Fundamental
from dragon.vm.torch.ops.modules.arithmetic import (
Fundamental, Log, Exp,
Maximum, Minimum, Clamp,
)
def _fundamental(input, value, op='Add', out=None): def _fundamental(input, value, op='Add', out=None):
if not isinstance(value, Tensor): if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx) value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value]) ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1]) key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
...@@ -33,17 +34,63 @@ def _fundamental(input, value, op='Add', out=None): ...@@ -33,17 +34,63 @@ def _fundamental(input, value, op='Add', out=None):
def _rfundamental(input, value, op='RAdd', out=None): def _rfundamental(input, value, op='RAdd', out=None):
if not isinstance(value, Tensor): if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx) value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value]) ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1]) key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
module = get_module(Fundamental, key, ctx, op_type=op) module = get_module(Fundamental, key, ctx, op_type=op)
return module.forward(value, input, out) return module.forward(value, input, out)
def _maximum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/maximum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Maximum, key, ctx)
return module.forward(input, other, out, dtype)
def _minimum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/minimum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Minimum, key, ctx)
return module.forward(input, other, out, dtype)
def _clamp(input, min=None, max=None, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/clamp/{}:{}/min:{}/max:{}'.format(
ctx[0].lower(), ctx[1], min, max)
module = get_module(Clamp, key, ctx, min=min, max=max)
return module.forward(input, out)
def _exp(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/exp/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Exp, key, ctx)
return module.forward(input, out)
def _log(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/log/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Log, key, ctx)
return module.forward(input, out)
def add(input, value, out=None): def add(input, value, out=None):
"""Add the ``input`` and ``value`` into the output tensor. """Add the ``input`` and ``value`` into the output tensor.
...@@ -126,3 +173,106 @@ def div(input, value, out=None): ...@@ -126,3 +173,106 @@ def div(input, value, out=None):
""" """
return _fundamental(input, value, out=out, op='Div') return _fundamental(input, value, out=out, op='Div')
def maximum(input, other, out=None):
"""Return the max value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _maximum(input, other, out)
def minimum(input, other, out=None):
"""Return the min value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _minimum(input, other, out)
def clamp(input, min=None, max=None, out=None):
"""Clamp all elements into the range [min, max].
Parameters
----------
input : vm.torch.Tensor
The input tensor.
min : numerical or None
The min value.
max : numerical or None
The max value.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(input, min, max, out)
def log(input, out=None):
"""Compute the natural logarithm of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _log(input, out)
def exp(input, out=None):
"""Compute the exponential of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _exp(input, out)
...@@ -21,12 +21,18 @@ from dragon.vm.torch.execute_engine import RunOperator ...@@ -21,12 +21,18 @@ from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.factory import get_module from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.autograd.grad_mode import no_grad from dragon.vm.torch.autograd.grad_mode import no_grad
from dragon.vm.torch.ops.primitive import MakeContext from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.arithmetic import _fundamental, _rfundamental
from dragon.vm.torch.ops.arithmetic import (
_fundamental, _rfundamental, _log, _exp,
_clamp,
)
from dragon.vm.torch.ops.ndarray import ( from dragon.vm.torch.ops.ndarray import (
reshape, squeeze, unsqueeze, reshape, squeeze, unsqueeze,
_permute, _repeat, _crop, _permute, _repeat, _crop,
_fill, _reduce, _arg_reduce, _fill, _reduce, _arg_reduce,
) )
from dragon.vm.torch.ops.modules.dtype import AsType from dragon.vm.torch.ops.modules.dtype import AsType
...@@ -53,9 +59,14 @@ def copy_(self, src, non_blocking=False): ...@@ -53,9 +59,14 @@ def copy_(self, src, non_blocking=False):
The ``self`` tensor. The ``self`` tensor.
""" """
# Copy memory
FromTensor( FromTensor(
src, CTX_TO_DEVICE_OPTION[tuple(src._ctx)], src, CTX_TO_DEVICE_OPTION[tuple(src._ctx)],
self.name, CTX_TO_DEVICE_OPTION[tuple(self._ctx)]) self.name, CTX_TO_DEVICE_OPTION[tuple(self._ctx)])
self._dtype = src._dtype
# Transfer the static shape if necessary
self._static_shape = src.size() \
if self._static_shape else None
return self return self
...@@ -295,6 +306,76 @@ def rdiv(self, value): ...@@ -295,6 +306,76 @@ def rdiv(self, value):
return _rfundamental(self, value, op='RDiv') return _rfundamental(self, value, op='RDiv')
def clamp(self, min=None, max=None):
"""Return a tensor that all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max)
def clamp_(self, min=None, max=None):
"""Clamp all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max, self)
def log(self):
"""Compute the natural logarithm of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The log tensor.
"""
return _log(self)
def exp(self):
"""Compute the exponential of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The exp tensor.
"""
return _exp(self)
Tensor.add = add Tensor.add = add
Tensor.add_ = add_ Tensor.add_ = add_
Tensor.__radd__ = radd Tensor.__radd__ = radd
...@@ -308,6 +389,10 @@ Tensor.div = div ...@@ -308,6 +389,10 @@ Tensor.div = div
Tensor.div_ = div_ Tensor.div_ = div_
Tensor.__rdiv__ = rdiv Tensor.__rdiv__ = rdiv
Tensor.__rtruediv__ = rdiv Tensor.__rtruediv__ = rdiv
Tensor.clamp = clamp
Tensor.clamp_ = clamp_
Tensor.log = log
Tensor.exp = exp
############################################## ##############################################
...@@ -387,16 +472,12 @@ def _unsqueeze_(self, dim=None): ...@@ -387,16 +472,12 @@ def _unsqueeze_(self, dim=None):
def view(self, *args): def view(self, *args):
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=args) return reshape(self, shape=args)
def view_as(self, other): def view_as(self, other):
if not isinstance(other, Tensor): if not isinstance(other, Tensor):
raise ValueError('The other should be a torch tensor.') raise ValueError('The other should be a torch tensor.')
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=None, shape_like=other) return reshape(self, shape=None, shape_like=other)
......
...@@ -13,14 +13,20 @@ from __future__ import absolute_import ...@@ -13,14 +13,20 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch.ops.primitive import MakeContext, CanonicalAxis
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.tensor import LeafTensor from dragon.vm.torch.tensor import LeafTensor
from dragon.vm.torch.execute_engine import RunOperator from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.primitive import MakeContext from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.creation import OneHot
__all__= [ __all__= [
'zeros', 'zeros_like', 'ones', 'ones_like', 'zeros', 'zeros_like', 'ones', 'ones_like',
'rand', 'randn', 'one_hot', 'rand', 'randn',
] ]
...@@ -180,3 +186,26 @@ def randn(*sizes, **kwargs): ...@@ -180,3 +186,26 @@ def randn(*sizes, **kwargs):
inputs = []; outputs = [out]; ctx = MakeContext(inputs, outputs) inputs = []; outputs = [out]; ctx = MakeContext(inputs, outputs)
meta = ('ONCE', 'RandomNormal', ctx) meta = ('ONCE', 'RandomNormal', ctx)
return RunOperator(inputs, outputs, meta, **arguments) return RunOperator(inputs, outputs, meta, **arguments)
def one_hot(input, depth):
"""Return a ont hot tensor according to given input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
depth : int
The depth of channels.
Returns
-------
vm.torch.FloatTensor
The output tensor.
"""
ctx = MakeContext(inputs=[input])
key = 'torch/ops/one_hot/{}:{}/depth:{}'.format(
ctx[0].lower(), ctx[1], depth)
module = get_module(OneHot, key, ctx, depth=depth)
return module.forward(input)
\ No newline at end of file
...@@ -38,3 +38,125 @@ class Fundamental(BaseModule): ...@@ -38,3 +38,125 @@ class Fundamental(BaseModule):
inputs = [x1, x2]; self.unify_devices(inputs) inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x1.dtype)] outputs = [y] if y else [self.register_output(x1.dtype)]
return self.run(inputs, outputs) return self.run(inputs, outputs)
class Maximum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Maximum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Maximum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Minimum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Minimum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Minimum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Clamp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Clamp, self).__init__(key, ctx, **kwargs)
self.min = kwargs.get('min', None)
self.max = kwargs.get('max', None)
if self.min is not None: self.min = float(self.min)
if self.max is not None: self.max = float(self.max)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments clamp op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Clip',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'low': self.min,
'high': self.max,
}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Log(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Log, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Log',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Exp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Exp, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Exp',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.ops.modules.base import BaseModule
class OneHot(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(OneHot, self).__init__(key, ctx, **kwargs)
self.depth = kwargs.get('depth', 1)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No Arguments for concat op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'OneHot',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'depth': self.depth,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
...@@ -22,6 +22,7 @@ class Fill(BaseModule): ...@@ -22,6 +22,7 @@ class Fill(BaseModule):
super(Fill, self).__init__(key, ctx, **kwargs) super(Fill, self).__init__(key, ctx, **kwargs)
self.len_shape = kwargs.get('len_shape', 0) self.len_shape = kwargs.get('len_shape', 0)
self.value = kwargs.get('value', 0.0) self.value = kwargs.get('value', 0.0)
self.dtype = kwargs.get('dtype', 'float32')
self.register_arguments() self.register_arguments()
self.register_op() self.register_op()
...@@ -34,6 +35,7 @@ class Fill(BaseModule): ...@@ -34,6 +35,7 @@ class Fill(BaseModule):
'op_type': 'Fill', 'op_type': 'Fill',
'n_inputs': 0, 'n_outputs': 1, 'n_inputs': 0, 'n_outputs': 1,
'arguments': { 'arguments': {
'dtype': self.dtype,
'value': float(self.value), 'value': float(self.value),
'dims_desc': [d for d in self.shape] if len(self.shape) > 0 else None, 'dims_desc': [d for d in self.shape] if len(self.shape) > 0 else None,
} }
......
...@@ -62,9 +62,10 @@ def _repeat(input, times): ...@@ -62,9 +62,10 @@ def _repeat(input, times):
def _fill(input, shape, value): def _fill(input, shape, value):
ctx = MakeContext(inputs=[input]); len_shape = len(shape) ctx = MakeContext(inputs=[input]); len_shape = len(shape)
key = 'torch/ops/fill/{}:{}/ndims:#{}/value:{}'.format( key = 'torch/ops/fill/{}:{}/dtype:{}/ndims:#{}/value:{}'.format(
ctx[0].lower(), ctx[1], len_shape, value) ctx[0].lower(), ctx[1], input._dtype, len_shape, value)
module = get_module(Fill, key, ctx, len_shape=len_shape, value=value) module = get_module(Fill, key, ctx, len_shape=len_shape,
value=value, dtype=input._dtype)
return module.forward(input, shape) return module.forward(input, shape)
......
...@@ -35,7 +35,7 @@ def _update(param, grad, op_type, slot, ...@@ -35,7 +35,7 @@ def _update(param, grad, op_type, slot,
lr_mult=1.0, decay_mult=1.0): lr_mult=1.0, decay_mult=1.0):
ctx = MakeContext(inputs=[param]) ctx = MakeContext(inputs=[param])
key = 'torch/ops/{}/{}:{}/{}/{}'.format(op_type.lower(), key = 'torch/ops/{}/{}:{}/{}/{}'.format(op_type.lower(),
ctx[0].lower(),ctx[1], slot, param.name) ctx[0].lower(), ctx[1], slot, param.name)
module = get_module(Update, key, ctx, op_type=op_type, module = get_module(Update, key, ctx, op_type=op_type,
lr_mult=lr_mult, decay_mult=decay_mult, slot=slot) lr_mult=lr_mult, decay_mult=decay_mult, slot=slot)
return module.forward(param, grad) return module.forward(param, grad)
\ No newline at end of file
...@@ -72,10 +72,9 @@ class Optimizer(object): ...@@ -72,10 +72,9 @@ class Optimizer(object):
param_temp = group['slot'] + '/{}' param_temp = group['slot'] + '/{}'
for k, v in group.items(): for k, v in group.items():
if k in self._mutable_parameters: if k in self._mutable_parameters:
# convert all defaults as float32 for convenience
dg.workspace.FeedTensor(param_temp.format( dg.workspace.FeedTensor(param_temp.format(
self._mutable_parameters[k]), self._mutable_parameters[k]), v,
np.array([v], dtype=np.float32)) dtype='float32', force_cpu=True)
def _run_update_ops(self, group): def _run_update_ops(self, group):
"""Generate & Run UpdateOps. """Generate & Run UpdateOps.
...@@ -107,10 +106,12 @@ class Optimizer(object): ...@@ -107,10 +106,12 @@ class Optimizer(object):
# Run regular update ops # Run regular update ops
for p, g in zip(params, grads): for p, g in zip(params, grads):
_update(p, g, op_type=self._update_type, _update(p, g,
op_type=self._update_type,
slot=group['slot'], slot=group['slot'],
lr_mult=group.get('lr_mult', 1.0), lr_mult=group.get('lr_mult', 1.0),
decay_mult=group.get('decay_mult', 1.0)) decay_mult=group.get('decay_mult', 1.0)
)
def zero_grad(self): def zero_grad(self):
"""Set all gradients to zeros. """Set all gradients to zeros.
......
...@@ -17,9 +17,8 @@ from __future__ import absolute_import ...@@ -17,9 +17,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os, sys, io
import sys from dragon.core.tensor_utils import ToPyArrayEx
import io
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
import cPickle as pickle import cPickle as pickle
...@@ -61,17 +60,27 @@ def _with_file_like(f, mode, body): ...@@ -61,17 +60,27 @@ def _with_file_like(f, mode, body):
f.close() f.close()
def _save(obj, f, pickle_module, pickle_protocol): def _save_dict(obj):
"""Pickle the object into binary file. """Recursively save the dict."""
if not isinstance(obj, dict):
raise ValueError('Currently only the state dict can be saved.')
py_dict = type(obj)()
for k, v in obj.items():
if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArrayEx(v)
else: py_dict[k] = v
return py_dict
"""
def _save(obj, f, pickle_module, pickle_protocol):
"""Pickle the object into binary file."""
if not isinstance(obj, dict): if not isinstance(obj, dict):
raise ValueError('Currently only the state dict can be saved.') raise ValueError('Currently only the state dict can be saved.')
from collections import OrderedDict py_dict = type(obj)()
from dragon.core.tensor_utils import ToPyArrayEx
py_dict = OrderedDict()
for k, v in obj.items(): for k, v in obj.items():
py_dict[k] = ToPyArrayEx(v) if isinstance(v, dict): py_dict[k] = _save_dict(v)
elif hasattr(v, 'name'): py_dict[k] = ToPyArrayEx(v)
else: py_dict[k] = v
pickle_module.dump(py_dict, f, pickle_protocol) pickle_module.dump(py_dict, f, pickle_protocol)
......
...@@ -13,8 +13,7 @@ from __future__ import absolute_import ...@@ -13,8 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys import six
import copy
import numpy as np import numpy as np
import dragon as dg import dragon as dg
import dragon.core.tensor_utils as tensor_utils import dragon.core.tensor_utils as tensor_utils
...@@ -73,12 +72,12 @@ class Tensor(object): ...@@ -73,12 +72,12 @@ class Tensor(object):
self._init_from_numpy(args[0]) self._init_from_numpy(args[0])
else: else:
# + class torch.Tensor(size) # + class torch.Tensor(size)
if not isinstance(args[0], int): if not isinstance(args[0], six.integer_types):
raise ValueError('Excepted integer as size.') raise ValueError('Excepted integer as size.')
self._init_from_shape(args[0]) self._init_from_shape(args[0])
else: else:
# + torch.Tensor(*sizes) # + torch.Tensor(*sizes)
if not all(type(arg) is int for arg in args): if not all(isinstance(arg, six.integer_types) for arg in args):
raise ValueError('Excepted integer(s) as sizes.') raise ValueError('Excepted integer(s) as sizes.')
self._init_from_shape(shape=args) self._init_from_shape(shape=args)
...@@ -90,7 +89,7 @@ class Tensor(object): ...@@ -90,7 +89,7 @@ class Tensor(object):
self._ignored_grads = {self.name + '_grad'} if not self._requires_grad else None self._ignored_grads = {self.name + '_grad'} if not self._requires_grad else None
def _init_from_shape(self, shape): def _init_from_shape(self, shape):
if isinstance(shape, int): shape = [shape] if isinstance(shape, six.integer_types): shape = [shape]
self._static_shape = Size(shape) self._static_shape = Size(shape)
self._dg_tensor = tensor_utils.FromShape(shape, self._dtype, self._dg_tensor = tensor_utils.FromShape(shape, self._dtype,
ctx=CTX_TO_DEVICE_OPTION[tuple(self._ctx)], name=TPool.get('leaf')) ctx=CTX_TO_DEVICE_OPTION[tuple(self._ctx)], name=TPool.get('leaf'))
...@@ -904,6 +903,72 @@ class Tensor(object): ...@@ -904,6 +903,72 @@ class Tensor(object):
""" """
raise NotImplementedError('Refer torch.ops.builtin.div_') raise NotImplementedError('Refer torch.ops.builtin.div_')
def clamp(self, min=None, max=None):
"""Return a tensor that all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.clamp')
def clamp_(self, min=None, max=None):
"""Clamp all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.clamp_')
def log(self):
"""Compute the natural logarithm of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The log tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.log')
def exp(self):
"""Compute the exponential of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The exp tensor.
"""
raise NotImplementedError('Refer torch.ops.builtin.exp')
def mean(self, dim=None, keepdim=False): def mean(self, dim=None, keepdim=False):
"""Returns the mean of all elements or elements along the given dim. """Returns the mean of all elements or elements along the given dim.
......
...@@ -42,7 +42,7 @@ find_modules() ...@@ -42,7 +42,7 @@ find_modules()
setup(name = 'dragon', setup(name = 'dragon',
version='0.2.2.11', version='0.2.2.12',
description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework', description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
url='https://github.com/seetaresearch/Dragon', url='https://github.com/seetaresearch/Dragon',
author='Ting Pan', author='Ting Pan',
......
...@@ -3,10 +3,6 @@ ...@@ -3,10 +3,6 @@
namespace dragon { namespace dragon {
#ifdef WITH_CUDA
thread_local CUDAObject CUDAContext::cuda_object_;
#endif // WITH_CUDA
// cpu <- gpu // cpu <- gpu
template<> void CPUContext::Memcpy<CPUContext, CUDAContext>( template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
size_t nbytes, size_t nbytes,
......
...@@ -246,6 +246,9 @@ GraphDef Graph::Share(const GraphDef& optimized_graph) { ...@@ -246,6 +246,9 @@ GraphDef Graph::Share(const GraphDef& optimized_graph) {
*g.mutable_op(i)->mutable_input(j) *g.mutable_op(i)->mutable_input(j)
= renamed_[op.input(j)]; = renamed_[op.input(j)];
} }
// handle handcraft cases
if (op.type() == "BiasAddGradient")
renamed_[op.output(0)] = g.op(i).input(2);
for (int j = 0; j < op.output_size(); j++) { for (int j = 0; j < op.output_size(); j++) {
if (whitelist.count(op.output(j)) == 0 && if (whitelist.count(op.output(j)) == 0 &&
renamed_.count(op.output(j)) && renamed_.count(op.output(j)) &&
...@@ -443,9 +446,10 @@ Graph::Graph(const GraphDef& meta_graph, Workspace* ws) ...@@ -443,9 +446,10 @@ Graph::Graph(const GraphDef& meta_graph, Workspace* ws)
} }
// store the final graph as a tensor for visualization // store the final graph as a tensor for visualization
Tensor* string_tensor = ws_->CreateTensor("GraphDef_" + optimized_graph.name()); Tensor* graphT = ws_->CreateTensor(
string_tensor->Reshape({ 1 }); "GraphDef_" + optimized_graph.name());
string* data = string_tensor->mutable_data<string, CPUContext>(); graphT->Reshape({ 1 });
auto* data = graphT->mutable_data<string, CPUContext>();
data[0] = optimized_graph.SerializeAsString(); data[0] = optimized_graph.SerializeAsString();
// create // create
...@@ -473,11 +477,22 @@ bool Graph::Run( ...@@ -473,11 +477,22 @@ bool Graph::Run(
return true; return true;
} }
DEFINE_REGISTRY(GraphRegistry, GraphBase, const GraphDef&, Workspace*); GraphBase* NewGraph(
const GraphDef& meta_graph,
Workspace* ws) {
if (!meta_graph.has_graph_type() ||
meta_graph.graph_type().empty())
return new Graph(meta_graph, ws);
GraphBase* NewGraph(const GraphDef& meta_graph, Workspace* ws) { return GraphRegistry()->Create(
if (!meta_graph.has_graph_type()) return new Graph(meta_graph, ws); meta_graph.graph_type(), meta_graph, ws);
return GraphRegistry()->Create(meta_graph.graph_type(), meta_graph, ws);
} }
DEFINE_REGISTRY(
GraphRegistry,
GraphBase,
const GraphDef&,
Workspace*
);
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -4,10 +4,10 @@ ...@@ -4,10 +4,10 @@
namespace dragon { namespace dragon {
#define str dragon_cast<std::string, int> bool GraphGradientMaker::CheckGrad(
const OperatorDef& forward_op,
bool GraphGradientMaker::CheckGrad(const OperatorDef& forward_op, const Set<string>& targets,
const Set<string>& targets, vector< pair<string, int> >& gen_grads) { vector< pair<string, int> >& gen_grads) {
if (NoGradientRegistry()->Has(forward_op.type())) { if (NoGradientRegistry()->Has(forward_op.type())) {
for (auto& input : forward_op.input()) for (auto& input : forward_op.input())
blacklist_set_.insert(input); blacklist_set_.insert(input);
...@@ -41,10 +41,11 @@ bool GraphGradientMaker::CheckGrad(const OperatorDef& forward_op, ...@@ -41,10 +41,11 @@ bool GraphGradientMaker::CheckGrad(const OperatorDef& forward_op,
string GraphGradientMaker::GetOperatorName() { string GraphGradientMaker::GetOperatorName() {
if (op_prefix_.empty()) return "runtime"; if (op_prefix_.empty()) return "runtime";
return op_prefix_ + str(cur_op_idx_++) + op_suffix_; return op_prefix_ + std::to_string(cur_op_idx_++) + op_suffix_;
} }
void GraphGradientMaker::Make(const GraphDef& forward_def, void GraphGradientMaker::Make(
const GraphDef& forward_def,
const vector<string>& targets, const vector<string>& targets,
GraphDef& new_def) { GraphDef& new_def) {
Map<string, int> inputs_count, grads_count; Map<string, int> inputs_count, grads_count;
...@@ -61,9 +62,10 @@ void GraphGradientMaker::Make(const GraphDef& forward_def, ...@@ -61,9 +62,10 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
} }
} }
for (auto& t : targets) targets_set.insert(t); for (auto& t : targets) targets_set.insert(t);
// PLAY for the backward // PLAY for the backward
for (int i = forward_def.op_size() - 1; i >= 0; i--) { for (int i = forward_def.op_size() - 1; i >= 0; i--) {
// collect inputs & outputs, generate grad // collect inputs & outputs, generate RAW grad ops
const OperatorDef& op = forward_def.op(i); const OperatorDef& op = forward_def.op(i);
vector< pair<string, int> > gen_grads; vector< pair<string, int> > gen_grads;
bool is_skip = CheckGrad(op, targets_set, gen_grads); bool is_skip = CheckGrad(op, targets_set, gen_grads);
...@@ -76,8 +78,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def, ...@@ -76,8 +78,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
g_outputs.emplace_back(g_output); g_outputs.emplace_back(g_output);
} }
Gradient grad = MakeGradientForOp(op, g_outputs); Gradient grad = MakeGradientForOp(op, g_outputs);
// post-process grad ops
unique_ptr<OperatorDef> gather_op; // process the RAW grad ops
vector<OperatorDef> gather_ops;
for (auto& g_op : grad.ops) { for (auto& g_op : grad.ops) {
// set op name // set op name
g_op.set_name(GetOperatorName()); g_op.set_name(GetOperatorName());
...@@ -112,27 +115,32 @@ void GraphGradientMaker::Make(const GraphDef& forward_def, ...@@ -112,27 +115,32 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
string original_name = op.input(original_idx); string original_name = op.input(original_idx);
if (inputs_count[original_name] > 1) { if (inputs_count[original_name] > 1) {
// split // split
string split_name = *output + "_autosplit_" + str(grads_count[*output]++); string split_name = *output + "_autosplit_"
+ std::to_string(grads_count[*output]++);
if (!is_skip) all_split_grads.insert(split_name); if (!is_skip) all_split_grads.insert(split_name);
// gather // gather
if (grads_count[*output] == inputs_count[original_name]) { if (grads_count[*output] == inputs_count[original_name]) {
gather_op.reset(new OperatorDef()); OperatorDef gather_op;
gather_op->set_name(GetOperatorName()); gather_op.set_name(GetOperatorName());
gather_op->set_type("GradientGather"); gather_op.set_type("GradientGather");
gather_op->add_output(*output); gather_op.add_output(*output);
if (g_op.has_device_option()) if (g_op.has_device_option())
gather_op->mutable_device_option()->CopyFrom(g_op.device_option()); gather_op.mutable_device_option()
->CopyFrom(g_op.device_option());
for (int j = 0; j < grads_count[*output]; j++) { for (int j = 0; j < grads_count[*output]; j++) {
string key = *output + "_autosplit_" + str(j); string key = *output + "_autosplit_" + std::to_string(j);
if (all_split_grads.count(key)) gather_op->add_input(key); if (all_split_grads.count(key)) gather_op.add_input(key);
} }
gather_ops.emplace_back(gather_op);
} }
*output = split_name; *output = split_name;
} }
} }
} }
// append ops
// now, append the required ops
if (!is_skip) { if (!is_skip) {
// 1) GradientGenerateOp
if (gen_grads.size() > 0) { if (gen_grads.size() > 0) {
vector<string> op_inputs, op_outputs; vector<string> op_inputs, op_outputs;
Argument arg_defaults; arg_defaults.set_name("defaults"); Argument arg_defaults; arg_defaults.set_name("defaults");
...@@ -143,21 +151,24 @@ void GraphGradientMaker::Make(const GraphDef& forward_def, ...@@ -143,21 +151,24 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
op_outputs.emplace_back(output); op_outputs.emplace_back(output);
arg_defaults.add_floats(grad.defaults[gen_grad.second]); arg_defaults.add_floats(grad.defaults[gen_grad.second]);
} }
OperatorDef generate_op = MakeOperatorDef("GradientGenerate", OperatorDef generate_op = MakeOperatorDef(
GetOperatorName(), "GradientGenerate", GetOperatorName(),
op_inputs, op_inputs, op_outputs,
op_outputs,
vector<Argument>(1, arg_defaults)); vector<Argument>(1, arg_defaults));
if (op.has_device_option()) if (op.has_device_option())
generate_op.mutable_device_option()->CopyFrom(op.device_option()); generate_op.mutable_device_option()
->CopyFrom(op.device_option());
new_def.add_op()->CopyFrom(generate_op); new_def.add_op()->CopyFrom(generate_op);
} }
for (auto& g_op : grad.ops) { // 2) GradientOp
for (auto& g_op : grad.ops)
new_def.add_op()->CopyFrom(g_op); new_def.add_op()->CopyFrom(g_op);
} }
} // 3) GradientGatherOp
if (gather_op) new_def.add_op()->CopyFrom(*gather_op); for (auto& gather_op : gather_ops)
// done new_def.add_op()->CopyFrom(gather_op);
// done!
if (!is_skip) { if (!is_skip) {
for (int i = 0; i < op.input_size(); i++) { for (int i = 0; i < op.input_size(); i++) {
if (!grad.g_inputs[i].empty()) if (!grad.g_inputs[i].empty())
...@@ -189,7 +200,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def, ...@@ -189,7 +200,9 @@ void GraphGradientMaker::Make(const GraphDef& forward_def,
} \ } \
*op->mutable_output(ix) = temp_grad;} *op->mutable_output(ix) = temp_grad;}
void GraphGradientMaker::Share(const string& grads_prefix, GraphDef& graph) { void GraphGradientMaker::Share(
const string& grads_prefix,
GraphDef& graph) {
Map<string, int> ref_count; Map<string, int> ref_count;
// count the refs for detecting leaf nodes // count the refs for detecting leaf nodes
for (auto& op : graph.op()) { for (auto& op : graph.op()) {
...@@ -205,8 +218,7 @@ void GraphGradientMaker::Share(const string& grads_prefix, GraphDef& graph) { ...@@ -205,8 +218,7 @@ void GraphGradientMaker::Share(const string& grads_prefix, GraphDef& graph) {
Map<string, string> temporary_grads; Map<string, string> temporary_grads;
std::deque<string> grads_pool; std::deque<string> grads_pool;
for (int i = 0; i < TEMPORARY_GRADS_LIMITS; i++) for (int i = 0; i < TEMPORARY_GRADS_LIMITS; i++)
grads_pool.push_back(grads_prefix + ":" + grads_pool.push_back(grads_prefix + ":" + std::to_string(i));
dragon_cast<string, int>(i));
for (int i = 0; i < graph.op_size(); i++) { for (int i = 0; i < graph.op_size(); i++) {
OperatorDef* op = graph.mutable_op(i); OperatorDef* op = graph.mutable_op(i);
......
...@@ -31,12 +31,14 @@ void MixedMemory::ToCUDA() { ...@@ -31,12 +31,14 @@ void MixedMemory::ToCUDA() {
switch (state_) { switch (state_) {
case UNINITIALIZED: case UNINITIALIZED:
cuda_ptr_ = CUDAContext::New(nbytes_); cuda_ptr_ = CUDAContext::New(nbytes_);
ptr_device_ = CUDA_GET_DEVICE();
state_ = STATE_AT_CUDA; state_ = STATE_AT_CUDA;
break; break;
case STATE_AT_CPU: case STATE_AT_CPU:
if (cuda_ptr_ == nullptr) if (cuda_ptr_ == nullptr) {
cuda_ptr_ = CUDAContext::New(nbytes_); cuda_ptr_ = CUDAContext::New(nbytes_);
CUDAContext::Memcpy<CUDAContext, CPUContext>( ptr_device_ = CUDA_GET_DEVICE();
} CUDAContext::Memcpy<CUDAContext, CPUContext>(
nbytes_, cuda_ptr_, cpu_ptr_); nbytes_, cuda_ptr_, cpu_ptr_);
state_ = SYNCED; state_ = SYNCED;
break; break;
...@@ -66,6 +68,10 @@ const void* MixedMemory::cuda_data() { ...@@ -66,6 +68,10 @@ const void* MixedMemory::cuda_data() {
return (const void*)cuda_ptr_; return (const void*)cuda_ptr_;
} }
const void* MixedMemory::cnml_data() {
return (const void*)cnml_ptr_;
}
void* MixedMemory::mutable_cpu_data() { void* MixedMemory::mutable_cpu_data() {
ToCPU(); ToCPU();
state_ = STATE_AT_CPU; state_ = STATE_AT_CPU;
...@@ -78,6 +84,11 @@ void* MixedMemory::mutable_cuda_data() { ...@@ -78,6 +84,11 @@ void* MixedMemory::mutable_cuda_data() {
return cuda_ptr_; return cuda_ptr_;
} }
void* MixedMemory::mutable_cnml_data() {
state_ = STATE_AT_CNML;
return cnml_ptr_;
}
void MixedMemory::set_cpu_data(void* cpu_ptr, size_t nbytes) { void MixedMemory::set_cpu_data(void* cpu_ptr, size_t nbytes) {
bool use_cudahost_mem = false; bool use_cudahost_mem = false;
#ifdef WITH_CUDA_HOST_MEM #ifdef WITH_CUDA_HOST_MEM
...@@ -123,9 +134,11 @@ MixedMemory::~MixedMemory() { ...@@ -123,9 +134,11 @@ MixedMemory::~MixedMemory() {
void MixedMemory::SwitchToDevice() { void MixedMemory::SwitchToDevice() {
if (cuda_ptr_) { if (cuda_ptr_) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
int ptr_device = CUDA_DEVICE(cuda_ptr_); int cur_device = CUDA_GET_DEVICE();
int cur_device = CUDA_DEVICE(); if (cur_device != ptr_device_) {
if (ptr_device != cur_device) state_ = SWITCHED; state_ = SWITCHED;
ptr_device_ = cur_device;
}
#endif #endif
} }
} }
...@@ -134,12 +147,12 @@ void MixedMemory::SwitchToCUDADevice(int device_id) { ...@@ -134,12 +147,12 @@ void MixedMemory::SwitchToCUDADevice(int device_id) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
DeviceGuard gurad(device_id); DeviceGuard gurad(device_id);
if (cuda_ptr_) { if (cuda_ptr_) {
int ptr_device = CUDA_DEVICE(cuda_ptr_); if (device_id != ptr_device_) {
if (ptr_device != device_id) state_ = SWITCHED; state_ = SWITCHED;
ptr_device_ = device_id;
}
} }
ToCUDA(); ToCUDA();
#else
CUDA_NOT_COMPILED;
#endif #endif
} }
...@@ -148,6 +161,7 @@ const Map<string, string> MixedMemory::info() const { ...@@ -148,6 +161,7 @@ const Map<string, string> MixedMemory::info() const {
{ UNINITIALIZED, "UNINITIALIZED" }, { UNINITIALIZED, "UNINITIALIZED" },
{ STATE_AT_CPU, "CPU" }, { STATE_AT_CPU, "CPU" },
{ STATE_AT_CUDA, "CUDA" }, { STATE_AT_CUDA, "CUDA" },
{ STATE_AT_CNML, "CNML" },
{ SYNCED, "DEVICE" }, { SYNCED, "DEVICE" },
{ SWITCHED, "DEVICE" }, { SWITCHED, "DEVICE" },
}; };
...@@ -155,15 +169,14 @@ const Map<string, string> MixedMemory::info() const { ...@@ -155,15 +169,14 @@ const Map<string, string> MixedMemory::info() const {
string _state_ = STATE_TO_STRING[state_]; string _state_ = STATE_TO_STRING[state_];
if (_state_ == "DEVICE") { if (_state_ == "DEVICE") {
if (cuda_ptr_) _state_ = "CUDA"; if (cuda_ptr_) _state_ = "CUDA";
else if (cnml_ptr_) _state_ = "CNML";
else LOG(FATAL) << "Device activated, " else LOG(FATAL) << "Device activated, "
<< "but got invalid mem pointer."; << "but got invalid mem pointer.";
} }
s2s["mem_at"] = _state_; s2s["mem_at"] = _state_;
if (cpu_ptr_) s2s["CPU"] = "0"; if (cpu_ptr_) s2s["CPU"] = "0";
#ifdef WITH_CUDA if (cuda_ptr_) s2s["CUDA"] = std::to_string(ptr_device_);
if (cuda_ptr_) s2s["CUDA"] = else if (cnml_ptr_) s2s["CNML"] = std::to_string(ptr_device_);
dragon_cast<string, int>(CUDA_DEVICE(cuda_ptr_));
#endif
return s2s; return s2s;
} }
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
namespace dragon { namespace dragon {
OperatorBase::OperatorBase( OperatorBase::OperatorBase(
const OperatorDef& def, Workspace* ws) const OperatorDef& def,
Workspace* ws)
: def_(def), ws_(ws), anchor_(def.name()) { : def_(def), ws_(ws), anchor_(def.name()) {
for (auto& arg : def_.arg()) { for (auto& arg : def_.arg()) {
CHECK_GT(arg.name().size(), 0); CHECK_GT(arg.name().size(), 0);
...@@ -73,6 +74,8 @@ OperatorBase* TryCreateOperator( ...@@ -73,6 +74,8 @@ OperatorBase* TryCreateOperator(
CUDNNOperatorRegistry()->Has(key)) CUDNNOperatorRegistry()->Has(key))
return CUDNNOperatorRegistry()->Create(key, def, ws); return CUDNNOperatorRegistry()->Create(key, def, ws);
return CUDAOperatorRegistry()->Create(key, def, ws); return CUDAOperatorRegistry()->Create(key, def, ws);
case CNML:
return CNMLOperatorRegistry()->Create(key, def, ws);
default: default:
LOG(FATAL) << "Unknown device type: " LOG(FATAL) << "Unknown device type: "
<< def.device_option().device_type(); << def.device_option().device_type();
...@@ -198,7 +201,8 @@ void Operator<Context>::ElimateCorruption() { ...@@ -198,7 +201,8 @@ void Operator<Context>::ElimateCorruption() {
int idx = safe_heads.front(); int idx = safe_heads.front();
safe_heads.pop(); safe_heads.pop();
Tensor* buffer = ws()->GetTensor( Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx)); "/opt/mirror_stage/buffer_"
+ std::to_string(idx));
Output(i)->Move(buffer->memory()); Output(i)->Move(buffer->memory());
head_data[idx] = Output(i)->name(); head_data[idx] = Output(i)->name();
} }
...@@ -220,8 +224,8 @@ void Operator<Context>::CleanResource() { ...@@ -220,8 +224,8 @@ void Operator<Context>::CleanResource() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->is_corrupted() && if (Output(i)->is_corrupted() &&
head_to_idx.count(Output(i)->name())) { head_to_idx.count(Output(i)->name())) {
string used = "/opt/mirror_stage/buffer_" + string used = "/opt/mirror_stage/buffer_"
dragon_cast<string, int>(head_to_idx[Output(i)->name()]); + std::to_string(head_to_idx[Output(i)->name()]);
Tensor* buffer = ws()->GetTensor(used); Tensor* buffer = ws()->GetTensor(used);
if (Output(i)->memory() != buffer->memory()) if (Output(i)->memory() != buffer->memory())
buffer->Move(Output(i)->memory()); buffer->Move(Output(i)->memory());
...@@ -248,6 +252,12 @@ DEFINE_REGISTRY( ...@@ -248,6 +252,12 @@ DEFINE_REGISTRY(
Workspace*); Workspace*);
DEFINE_REGISTRY( DEFINE_REGISTRY(
CNMLOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
DEFINE_REGISTRY(
GradientRegistry, GradientRegistry,
GradientMakerBase, GradientMakerBase,
const OperatorDef&, const OperatorDef&,
...@@ -291,9 +301,12 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings) ...@@ -291,9 +301,12 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings)
template void Operator<CPUContext>::ElimateCorruption(); template void Operator<CPUContext>::ElimateCorruption();
template void Operator<CUDAContext>::ElimateCorruption(); template void Operator<CUDAContext>::ElimateCorruption();
template void Operator<CNMLContext>::ElimateCorruption();
template void Operator<CPUContext>::MakeResource(); template void Operator<CPUContext>::MakeResource();
template void Operator<CUDAContext>::MakeResource(); template void Operator<CUDAContext>::MakeResource();
template void Operator<CNMLContext>::MakeResource();
template void Operator<CPUContext>::CleanResource(); template void Operator<CPUContext>::CleanResource();
template void Operator<CUDAContext>::CleanResource(); template void Operator<CUDAContext>::CleanResource();
template void Operator<CNMLContext>::CleanResource();
} // namespace dragon } // namespace dragon
\ No newline at end of file
...@@ -16,7 +16,8 @@ GraphBase* Workspace::CreateGraph(const GraphDef& meta_graph) { ...@@ -16,7 +16,8 @@ GraphBase* Workspace::CreateGraph(const GraphDef& meta_graph) {
Workspace::~Workspace() { Workspace::~Workspace() {
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) { for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "/opt/mirror_stage/buffer_" + dragon_cast<string, int>(i); string name = "/opt/mirror_stage/buffer_"
+ std::to_string(i);
if (tensor_map_.count(name) > 0) { if (tensor_map_.count(name) > 0) {
MixedMemory* mem = tensor_map_[name]->memory(); MixedMemory* mem = tensor_map_[name]->memory();
if (mem != nullptr) delete mem; if (mem != nullptr) delete mem;
......
...@@ -32,8 +32,8 @@ void CuDNNDropoutOp<Context>::RunWithType() { ...@@ -32,8 +32,8 @@ void CuDNNDropoutOp<Context>::RunWithType() {
ctx()->cudnn_handle(), &states_size)); ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex()); std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor( Tensor* states = ws()->CreateTensor(
"/share/cudnn/dropout:" + dragon_cast<string, "/share/cudnn/dropout:" + std::to_string(
unsigned long long>(random_seed) + "/states"); random_seed) + "/states");
if (states->count() > 0) { if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>(); auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor( CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
...@@ -67,9 +67,7 @@ void CuDNNDropoutOp<Context>::RunOnDevice() { ...@@ -67,9 +67,7 @@ void CuDNNDropoutOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -89,14 +87,16 @@ void CuDNNDropoutGradientOp<Context>::RunWithType() { ...@@ -89,14 +87,16 @@ void CuDNNDropoutGradientOp<Context>::RunWithType() {
ctx()->cudnn_handle(), &states_size)); ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex()); std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor( Tensor* states = ws()->CreateTensor(
"/share/cudnn/dropout:" + dragon_cast<string, "/share/cudnn/dropout:" + std::to_string(
unsigned long long>(random_seed) + "/states"); random_seed) + "/states");
if (states->count() > 0) { if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>(); auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor( CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
dropout_desc, ctx()->cudnn_handle(), prob(), dropout_desc, ctx()->cudnn_handle(), prob(),
Sdata, states_size, random_seed)); Sdata, states_size, random_seed));
} else { LOG(FATAL) << "Missing states with seed: " << random_seed; } } else {
LOG(FATAL) << "Missing states with seed: " << random_seed;
}
} }
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
...@@ -119,9 +119,7 @@ void CuDNNDropoutGradientOp<Context>::RunOnDevice() { ...@@ -119,9 +119,7 @@ void CuDNNDropoutGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -24,9 +24,7 @@ void CuDNNEluOp<Context>::RunOnDevice() { ...@@ -24,9 +24,7 @@ void CuDNNEluOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -52,9 +50,7 @@ void CuDNNEluGradientOp<Context>::RunOnDevice() { ...@@ -52,9 +50,7 @@ void CuDNNEluGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -31,9 +31,7 @@ void CuDNNReluOp<Context>::RunOnDevice() { ...@@ -31,9 +31,7 @@ void CuDNNReluOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -69,9 +67,7 @@ void CuDNNReluGradientOp<Context>::RunOnDevice() { ...@@ -69,9 +67,7 @@ void CuDNNReluGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -29,9 +29,7 @@ void CuDNNSigmoidOp<Context>::RunOnDevice() { ...@@ -29,9 +29,7 @@ void CuDNNSigmoidOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -65,9 +63,7 @@ void CuDNNSigmoidGradientOp<Context>::RunOnDevice() { ...@@ -65,9 +63,7 @@ void CuDNNSigmoidGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -14,8 +14,7 @@ void CuDNNSoftmaxOp<Context>::RunWithType() { ...@@ -14,8 +14,7 @@ void CuDNNSoftmaxOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxForward( CUDNN_CHECK(cudnnSoftmaxForward(ctx()->cudnn_handle(),
ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Xdata, CUDNNType<T>::one, input_desc, Xdata,
CUDNNType<T>::zero, output_desc, Ydata)); CUDNNType<T>::zero, output_desc, Ydata));
...@@ -23,15 +22,13 @@ void CuDNNSoftmaxOp<Context>::RunWithType() { ...@@ -23,15 +22,13 @@ void CuDNNSoftmaxOp<Context>::RunWithType() {
template <class Context> template <class Context>
void CuDNNSoftmaxOp<Context>::RunOnDevice() { void CuDNNSoftmaxOp<Context>::RunOnDevice() {
if (axis == -1) axis = (int)Input(0).ndim() - 1; if (axis == -1) axis = (TIndex)Input(0).ndim() - 1;
outer_dim = Input(0).count(0, axis); outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1); inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -47,8 +44,7 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() { ...@@ -47,8 +44,7 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* Ydata = Input(0).template data<T, Context>(); auto* Ydata = Input(0).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnSoftmaxBackward( CUDNN_CHECK(cudnnSoftmaxBackward(ctx()->cudnn_handle(),
ctx()->cudnn_handle(),
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
CUDNNType<T>::one, input_desc, Ydata, input_desc, dYdata, CUDNNType<T>::one, input_desc, Ydata, input_desc, dYdata,
CUDNNType<T>::zero, output_desc, dXdata)); CUDNNType<T>::zero, output_desc, dXdata));
...@@ -56,15 +52,13 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() { ...@@ -56,15 +52,13 @@ void CuDNNSoftmaxGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void CuDNNSoftmaxGradientOp<Context>::RunOnDevice() { void CuDNNSoftmaxGradientOp<Context>::RunOnDevice() {
if (axis == -1) axis = (int)Input(0).ndim() - 1; if (axis == -1) axis = (TIndex)Input(0).ndim() - 1;
outer_dim = Input(0).count(0, axis); outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1); inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -29,9 +29,7 @@ void CuDNNTanhOp<Context>::RunOnDevice() { ...@@ -29,9 +29,7 @@ void CuDNNTanhOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -65,9 +63,7 @@ void CuDNNTanhGradientOp<Context>::RunOnDevice() { ...@@ -65,9 +63,7 @@ void CuDNNTanhGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -8,22 +8,31 @@ template <class Context> template <typename T> ...@@ -8,22 +8,31 @@ template <class Context> template <typename T>
void DropoutOp<Context>::RunWithType() { void DropoutOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
float scale = use_scale ? 1.0 / (1.0 - prob()) : 1.0; float scale = use_scale ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") { if (phase() == "TEST") {
if (Output(0) != &Input(0)) { if (Output(0) != &Input(0)) {
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(0)->count(), Ydata, Xdata); Output(0)->count(), Ydata, Xdata);
if (scale == 1.0) math::Scal<T, Context>( }
Output(0)->count(), 1.0 - prob(), Ydata, ctx()); if (!use_scale) {
math::Scal<T, Context>(Output(0)->count(),
1.0 - prob(), Ydata, ctx());
} }
} else if (phase() == "TRAIN") { } else if (phase() == "TRAIN") {
Tensor* mask = ws()->CreateTensor( Tensor* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/dropout/mask"); "/mnt/" + anchor() + "/dropout/mask");
mask->ReshapeLike(Input(0)); mask->ReshapeLike(Input(0));
uint32_t* Mdata = mask->template mutable_data<uint32_t, Context>();
auto WSdata = ws()->template caches<Context>({
mask->count() * sizeof(uint32_t) });
auto* Mdata = mask->template mutable_data<uint8_t, Context>();
kernel::Dropout<T, Context>( kernel::Dropout<T, Context>(
Output(0)->count(), prob(), scale, Output(0)->count(), prob(), scale,
Xdata, Mdata, Ydata, ctx()); Xdata, (uint32_t*)WSdata[0],
Mdata, Ydata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase(); } else LOG(FATAL) << "Incorrect Op phase: " << phase();
} }
...@@ -32,6 +41,7 @@ void DropoutOp<Context>::RunOnDevice() { ...@@ -32,6 +41,7 @@ void DropoutOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} }
...@@ -39,22 +49,25 @@ DEPLOY_CPU(Dropout); ...@@ -39,22 +49,25 @@ DEPLOY_CPU(Dropout);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(Dropout); DEPLOY_CUDA(Dropout);
#endif #endif
OPERATOR_SCHEMA(Dropout).NumInputs(1).NumOutputs(1).Inplace({ { 0, 0 } }); OPERATOR_SCHEMA(Dropout)
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T> template <class Context> template <typename T>
void DropoutGradientOp<Context>::RunWithType() { void DropoutGradientOp<Context>::RunWithType() {
mask = ws()->GetTensor("/mnt/" + anchor() + "/dropout/mask"); auto* mask = ws()->GetTensor(
"/mnt/" + anchor() + "/dropout/mask");
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<uint32_t, Context>(); auto* Mdata = mask->template data<uint8_t, Context>();
float scale = use_scale ? 1.0 / (1.0 - prob()) : 1.0;
float scale = use_scale ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") { NOT_IMPLEMENTED; } if (phase() == "TEST") { NOT_IMPLEMENTED; }
else if (phase() == "TRAIN") { else if (phase() == "TRAIN") {
kernel::DropoutGrad<T, Context>( kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
Output(0)->count(), prob(), scale, scale, dYdata, Mdata, dXdata, ctx());
dYdata, Mdata, dXdata, ctx());
ctx()->FinishDeviceCompution();
mask->Reset();
} else LOG(FATAL) << "Incorrect Op phase: " << phase(); } else LOG(FATAL) << "Incorrect Op phase: " << phase();
} }
...@@ -63,14 +76,17 @@ void DropoutGradientOp<Context>::RunOnDevice() { ...@@ -63,14 +76,17 @@ void DropoutGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
DEPLOY_CPU(DropoutGradient); DEPLOY_CPU(DropoutGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DropoutGradient); DEPLOY_CUDA(DropoutGradient);
#endif #endif
OPERATOR_SCHEMA(DropoutGradient).NumInputs(2).NumOutputs(1).Inplace({ { 1, 0 } }); OPERATOR_SCHEMA(DropoutGradient)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
class GetDropoutGradient final : public GradientMakerBase { class GetDropoutGradient final : public GradientMakerBase {
public: public:
......
...@@ -148,7 +148,9 @@ DEPLOY_CPU(AddGradient); ...@@ -148,7 +148,9 @@ DEPLOY_CPU(AddGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(AddGradient); DEPLOY_CUDA(AddGradient);
#endif #endif
OPERATOR_SCHEMA(AddGradient).NumInputs(1).NumOutputs(2); OPERATOR_SCHEMA(AddGradient)
.NumInputs(1).NumOutputs(2)
.Inplace({ { 0, 0 } });
class GetAddGradient : public GradientMakerBase { class GetAddGradient : public GradientMakerBase {
public: public:
......
...@@ -7,15 +7,11 @@ namespace dragon { ...@@ -7,15 +7,11 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void ClipOp<Context>::RunWithType() { void ClipOp<Context>::RunWithType() {
Tensor* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/clip/mask");
mask->ReshapeLike(Input(0));
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template mutable_data<T, Context>();
kernel::Clip<T, Context>(Output(0)->count(), kernel::Clip<T, Context>(Output(0)->count(),
low, high, Xdata, Mdata, Ydata, ctx()); low, high, Xdata, Ydata, ctx());
} }
template <class Context> template <class Context>
...@@ -30,19 +26,16 @@ DEPLOY_CPU(Clip); ...@@ -30,19 +26,16 @@ DEPLOY_CPU(Clip);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(Clip); DEPLOY_CUDA(Clip);
#endif #endif
OPERATOR_SCHEMA(Clip) OPERATOR_SCHEMA(Clip).NumInputs(1).NumOutputs(1);
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T> template <class Context> template <typename T>
void ClipGradientOp<Context>::RunWithType() { void ClipGradientOp<Context>::RunWithType() {
Tensor* mask = ws()->GetTensor( auto* Xdata = Input(0).template data<T, Context>();
"/mnt/" + anchor() + "/clip/mask"); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<T, Context>();
math::Mul<T, Context>(Output(0)->count(), kernel::ClipGrad<T, Context>(Output(0)->count(),
dXdata, Mdata, dXdata, ctx()); low, high, Xdata, dYdata, dXdata, ctx());
} }
template <class Context> template <class Context>
...@@ -57,16 +50,14 @@ DEPLOY_CPU(ClipGradient); ...@@ -57,16 +50,14 @@ DEPLOY_CPU(ClipGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(ClipGradient); DEPLOY_CUDA(ClipGradient);
#endif #endif
OPERATOR_SCHEMA(ClipGradient) OPERATOR_SCHEMA(ClipGradient).NumInputs(2).NumOutputs(1);
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
class GetClipGradient final : public GradientMakerBase { class GetClipGradient final : public GradientMakerBase {
public: public:
GRADIENT_MAKER_CTOR(GetClipGradient); GRADIENT_MAKER_CTOR(GetClipGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "", return SingleDef(def.type() + "Gradient", "",
vector<string> {O(0), GO(0)}, vector<string> {I(0), GO(0)},
vector<string> {GI(0)}); vector<string> {GI(0)});
} }
}; };
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "operators/arithmetic/affine_op.h" #include "operators/arithmetic/affine_op.h"
#if CUDNN_VERSION_MIN(6, 0, 0)
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
...@@ -48,9 +50,7 @@ void CuDNNAffineOp<Context>::RunOnDevice() { ...@@ -48,9 +50,7 @@ void CuDNNAffineOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -213,4 +213,6 @@ DEPLOY_CUDNN(AffineGradient); ...@@ -213,4 +213,6 @@ DEPLOY_CUDNN(AffineGradient);
} // namespace dragon } // namespace dragon
#endif
#endif // WITH_CUDNN #endif // WITH_CUDNN
\ No newline at end of file
...@@ -146,6 +146,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -146,6 +146,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "ignore") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
CHECK(dy != dx1) << "\nCan't set inplace if X2 was broadcast.";
if (type == 0 || type == 1) { if (type == 0 || type == 1) {
DECLARE_MULTIPLIER(multiplier, outer_dim); DECLARE_MULTIPLIER(multiplier, outer_dim);
math::Gemm<T, Context>( math::Gemm<T, Context>(
...@@ -185,7 +186,9 @@ DEPLOY_CPU(DivGradient); ...@@ -185,7 +186,9 @@ DEPLOY_CPU(DivGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DivGradient); DEPLOY_CUDA(DivGradient);
#endif #endif
OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(DivGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetDivGradient final : public GradientMakerBase { class GetDivGradient final : public GradientMakerBase {
public: public:
......
...@@ -29,7 +29,8 @@ void LogGradientOp<Context>::RunWithType() { ...@@ -29,7 +29,8 @@ void LogGradientOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
math::Div<T, Context>(Output(0)->count(), dYdata, Xdata, dXdata, ctx()); math::Div<T, Context>(Output(0)->count(),
dYdata, Xdata, dXdata, ctx());
} }
template <class Context> template <class Context>
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/arithmetic/maximum_op.h"
namespace dragon {
template <class Context> template <typename T>
void MaximumOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::MaximumE<T, Context>(Output(0)->count(),
X1data, X2data, Ydata, ctx());
}
template <class Context> template <typename T>
void MaximumOp<Context>::BroadcastRunWithType() {
T max_val; float x2_val; const T* Xdata; T* Ydata;
if (Input(0).count() == 1) {
Output(0)->ReshapeLike(Input(1));
x2_val = Input(0).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else if (Input(1).count() == 1) {
Output(0)->ReshapeLike(Input(0));
x2_val = Input(1).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
kernel::MaximumB<T, Context>(Output(0)->count(),
Xdata, max_val, Ydata, ctx());
}
template <class Context>
void MaximumOp<Context>::RunOnDevice() {
if (Input(0).dims() == Input(1).dims()) {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(Maximum);
#ifdef WITH_CUDA
DEPLOY_CUDA(Maximum);
#endif
OPERATOR_SCHEMA(Maximum).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void MaximumGradientOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dX1data = Output(0)->template mutable_data<T, Context>();
auto* dX2data = Output(1)->template mutable_data<T, Context>();
kernel::MaximumEGrad<T, Context>(Output(0)->count(),
X1data, X2data, dYdata, dX1data, dX2data, ctx());
}
template <class Context> template <typename T>
void MaximumGradientOp<Context>::BroadcastRunWithType() {
T max_val; float x2_val;
const T* Xdata; T* dX1data; float* dX2data;
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
x2_val = Input(0).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
dX1data = Output(1)->template mutable_data<T, Context>();
dX2data = Output(0)->template mutable_data<float, Context>();
kernel::MaximumBGrad<T, Context>(Output(1)->count(),
Xdata, max_val, dYdata, dX1data, ctx());
} else if (Input(1).count() == 1) {
x2_val = Input(1).template data<float, CPUContext>()[0];
max_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
dX1data = Output(0)->template mutable_data<T, Context>();
dX2data = Output(1)->template mutable_data<float, Context>();
kernel::MaximumBGrad<T, Context>(Output(0)->count(),
Xdata, max_val, dYdata, dX1data, ctx());
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
// we simply zero the grad of scalar
math::Set<float, Context>(1, 0, dX2data, ctx());
}
template <class Context>
void MaximumGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
if (Input(0).dims() == Input(1).dims()) {
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(MaximumGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MaximumGradient);
#endif
OPERATOR_SCHEMA(MaximumGradient).NumInputs(3).NumOutputs(2);
class GetMaximumGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetMaximumGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)});
}
};
REGISTER_GRADIENT(Maximum, GetMaximumGradient);
} // namespace dragon
\ No newline at end of file
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/arithmetic/minimum_op.h"
namespace dragon {
template <class Context> template <typename T>
void MinimumOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
kernel::MinimumE<T, Context>(Output(0)->count(),
X1data, X2data, Ydata, ctx());
}
template <class Context> template <typename T>
void MinimumOp<Context>::BroadcastRunWithType() {
T min_val; float x2_val; const T* Xdata; T* Ydata;
if (Input(0).count() == 1) {
Output(0)->ReshapeLike(Input(1));
x2_val = Input(0).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else if (Input(1).count() == 1) {
Output(0)->ReshapeLike(Input(0));
x2_val = Input(1).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
Ydata = Output(0)->template mutable_data<T, Context>();
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
kernel::MinimumB<T, Context>(Output(0)->count(),
Xdata, min_val, Ydata, ctx());
}
template <class Context>
void MinimumOp<Context>::RunOnDevice() {
if (Input(0).dims() == Input(1).dims()) {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(Minimum);
#ifdef WITH_CUDA
DEPLOY_CUDA(Minimum);
#endif
OPERATOR_SCHEMA(Minimum).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T>
void MinimumGradientOp<Context>::EltwiseRunWithType() {
auto* X1data = Input(0).template data<T, Context>();
auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
auto* dX1data = Output(0)->template mutable_data<T, Context>();
auto* dX2data = Output(1)->template mutable_data<T, Context>();
kernel::MinimumEGrad<T, Context>(Output(0)->count(),
X1data, X2data, dYdata, dX1data, dX2data, ctx());
}
template <class Context> template <typename T>
void MinimumGradientOp<Context>::BroadcastRunWithType() {
T min_val; float x2_val;
const T* Xdata; T* dX1data; float* dX2data;
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
x2_val = Input(0).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(1).template data<T, Context>();
dX1data = Output(1)->template mutable_data<T, Context>();
dX2data = Output(0)->template mutable_data<float, Context>();
kernel::MinimumBGrad<T, Context>(Output(1)->count(),
Xdata, min_val, dYdata, dX1data, ctx());
} else if (Input(1).count() == 1) {
x2_val = Input(1).template data<float, CPUContext>()[0];
min_val = dragon_cast<T, float>(x2_val);
Xdata = Input(0).template data<T, Context>();
dX1data = Output(0)->template mutable_data<T, Context>();
dX2data = Output(1)->template mutable_data<float, Context>();
kernel::MinimumBGrad<T, Context>(Output(0)->count(),
Xdata, min_val, dYdata, dX1data, ctx());
} else { LOG(FATAL) << "Either Input(0) or Input(1) should be a scalar."; }
// we simply zero the grad of scalar
math::Set<float, Context>(1, 0, dX2data, ctx());
}
template <class Context>
void MinimumGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
if (Input(0).dims() == Input(1).dims()) {
if (XIsType(Input(0), float)) EltwiseRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
if (XIsType(Input(0), float)) BroadcastRunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
}
DEPLOY_CPU(MinimumGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(MinimumGradient);
#endif
OPERATOR_SCHEMA(MinimumGradient).NumInputs(3).NumOutputs(2);
class GetMinimumGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetMinimumGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0), GI(1)});
}
};
REGISTER_GRADIENT(Minimum, GetMinimumGradient);
} // namespace dragon
\ No newline at end of file
...@@ -134,6 +134,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -134,6 +134,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "ignore") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
CHECK(dy != dx1) << "\nCan't set inplace if X2 was broadcast.";
if (type == 0 || type == 1) { if (type == 0 || type == 1) {
DECLARE_MULTIPLIER(multiplier, outer_dim); DECLARE_MULTIPLIER(multiplier, outer_dim);
math::Gemm<T, Context>( math::Gemm<T, Context>(
...@@ -173,7 +174,9 @@ DEPLOY_CPU(MulGradient); ...@@ -173,7 +174,9 @@ DEPLOY_CPU(MulGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(MulGradient); DEPLOY_CUDA(MulGradient);
#endif #endif
OPERATOR_SCHEMA(MulGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(MulGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetMulGradient : public GradientMakerBase { class GetMulGradient : public GradientMakerBase {
public: public:
......
...@@ -15,6 +15,7 @@ void PowOp<Context>::RunWithType() { ...@@ -15,6 +15,7 @@ void PowOp<Context>::RunWithType() {
dragon_cast<T, float>(value), Ydata, ctx()); dragon_cast<T, float>(value), Ydata, ctx());
return; return;
} }
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
ctx()->template Copy<T, Context, Context>(count, Ydata, Xdata); ctx()->template Copy<T, Context, Context>(count, Ydata, Xdata);
if (scale != 1.f) math::Scal<T, Context>(count, scale, Ydata, ctx()); if (scale != 1.f) math::Scal<T, Context>(count, scale, Ydata, ctx());
......
...@@ -150,7 +150,9 @@ DEPLOY_CPU(SubGradient); ...@@ -150,7 +150,9 @@ DEPLOY_CPU(SubGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(SubGradient); DEPLOY_CUDA(SubGradient);
#endif #endif
OPERATOR_SCHEMA(SubGradient).NumInputs(1).NumOutputs(2); OPERATOR_SCHEMA(SubGradient)
.NumInputs(1).NumOutputs(2)
.Inplace({ { 0, 0 } });
class GetSubGradient : public GradientMakerBase { class GetSubGradient : public GradientMakerBase {
public: public:
......
...@@ -7,8 +7,6 @@ ...@@ -7,8 +7,6 @@
#include "operators/control_flow/scan_op.h" #include "operators/control_flow/scan_op.h"
#include "operators/ndarray/slice_op.h" #include "operators/ndarray/slice_op.h"
#define str dragon_cast<string, int>
namespace dragon { namespace dragon {
template <class Context> template <class Context>
...@@ -29,14 +27,14 @@ void ScanOp<Context>::InitTemplate() { ...@@ -29,14 +27,14 @@ void ScanOp<Context>::InitTemplate() {
for (int i = 0; i < nseqs; i++) { for (int i = 0; i < nseqs; i++) {
OperatorDef* op = template_def.add_op(); OperatorDef* op = template_def.add_op();
op->CopyFrom(slice_def); op->CopyFrom(slice_def);
op->set_name(name() + "(BodyOp." + str(i) + ")"); op->set_name(name() + "(BodyOp." + std::to_string(i) + ")");
op->add_input(Input(i).name()); op->add_input(Input(i).name());
terms[Input(i).name()] = Input(i).name() + "@1"; terms[Input(i).name()] = Input(i).name() + "@1";
} }
for (int i = 0; i < nrepeats; i++) { for (int i = 0; i < nrepeats; i++) {
OperatorDef* op = template_def.add_op(); OperatorDef* op = template_def.add_op();
op->CopyFrom(func_def.op(i)); op->CopyFrom(func_def.op(i));
op->set_name(name() + "(BodyOp." + str(i + nseqs) + ")@1"); op->set_name(name() + "(BodyOp." + std::to_string(i + nseqs) + ")@1");
// replace inputs term // replace inputs term
for (int j = 0; j < op->input_size(); j++) { for (int j = 0; j < op->input_size(); j++) {
string* input = op->mutable_input(j); string* input = op->mutable_input(j);
...@@ -61,8 +59,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) { ...@@ -61,8 +59,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) {
string prev, now; string prev, now;
// update sequences term // update sequences term
for (int i = 0; i < nseqs; i++) { for (int i = 0; i < nseqs; i++) {
prev = Input(i).name() + "@" + str(cur_step - 1); prev = Input(i).name() + "@" + std::to_string(cur_step - 1);
now = Input(i).name() + "@" + str(cur_step); now = Input(i).name() + "@" + std::to_string(cur_step);
terms[prev] = now; terms[prev] = now;
} }
if (cur_step < 3) return; if (cur_step < 3) return;
...@@ -70,8 +68,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) { ...@@ -70,8 +68,8 @@ void ScanOp<Context>::UpdateTerms(int cur_step) {
// only support the latest one-step (as Theano's done) // only support the latest one-step (as Theano's done)
for (int i = 0; i < nout; i++) { for (int i = 0; i < nout; i++) {
if (default_outputs[i].empty()) continue; if (default_outputs[i].empty()) continue;
prev = Output(i)->name() + "@" + str(cur_step - 2); prev = Output(i)->name() + "@" + std::to_string(cur_step - 2);
now = Output(i)->name() + "@" + str(cur_step - 1); now = Output(i)->name() + "@" + std::to_string(cur_step - 1);
terms[prev] = now; terms[prev] = now;
} }
} }
...@@ -90,7 +88,7 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -90,7 +88,7 @@ void ScanOp<Context>::UnrollTemplate() {
if (graphs.count(nsteps)) return; if (graphs.count(nsteps)) return;
new_def.CopyFrom(template_def); new_def.CopyFrom(template_def);
new_def.set_name(name() + "(ScanLen." + str(nsteps) + ")"); new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
Argument phase; phase.set_name("phase"); Argument phase; phase.set_name("phase");
phase.set_s(this->phase()); new_def.add_arg()->CopyFrom(phase); phase.set_s(this->phase()); new_def.add_arg()->CopyFrom(phase);
for (int idx = 0; idx < nseqs; idx++) { for (int idx = 0; idx < nseqs; idx++) {
...@@ -100,7 +98,7 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -100,7 +98,7 @@ void ScanOp<Context>::UnrollTemplate() {
op->mutable_arg(1)->set_i(nslices); op->mutable_arg(1)->set_i(nslices);
// add slices as outputs // add slices as outputs
for (int t = 1; t <= nslices; t++) { for (int t = 1; t <= nslices; t++) {
string slice = op->input(0) + "@" + str(t); string slice = op->input(0) + "@" + std::to_string(t);
op->add_output(slice); op->add_output(slice);
} }
} }
...@@ -111,7 +109,8 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -111,7 +109,8 @@ void ScanOp<Context>::UnrollTemplate() {
for (int idx = copy_l; idx < copy_r; idx++) { for (int idx = copy_l; idx < copy_r; idx++) {
OperatorDef* op = new_def.add_op(); OperatorDef* op = new_def.add_op();
op->CopyFrom(new_def.op(idx)); op->CopyFrom(new_def.op(idx));
op->set_name(SplitString(op->name(), "@")[0] + "@" + str(t)); op->set_name(str::split(op->name(), "@")[0]
+ "@" + std::to_string(t));
// replace inputs // replace inputs
for (int j = 0; j < op->input_size(); j++) { for (int j = 0; j < op->input_size(); j++) {
string* input = op->mutable_input(j); string* input = op->mutable_input(j);
...@@ -120,18 +119,19 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -120,18 +119,19 @@ void ScanOp<Context>::UnrollTemplate() {
// replace outputs // replace outputs
for (int j = 0; j < op->output_size(); j++) { for (int j = 0; j < op->output_size(); j++) {
string* output = op->mutable_output(j); string* output = op->mutable_output(j);
terms[*output] = SplitString(*output, "@")[0] + "@" + str(t); terms[*output] = str::split(*output, "@")[0]
+ "@" + std::to_string(t);
*output = terms[*output]; *output = terms[*output];
} }
} }
} }
for (int i = 0; i < nout; i++) { for (int i = 0; i < nout; i++) {
// solve the last step only // solve the last step only
new_def.add_target(func_def.target(i) + "@" + str(nsteps)); new_def.add_target(func_def.target(i) + "@" + std::to_string(nsteps));
// concat all steps if necessary // concat all steps if necessary
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "ignore") continue;
OperatorDef* op = new_def.add_op(); OperatorDef* op = new_def.add_op();
op->set_name(name() + "(BodyOp." + str(nseqs + nrepeats + i) + ")"); op->set_name(name() + "(BodyOp." + std::to_string(nseqs + nrepeats + i) + ")");
op->set_type("Concat"); op->set_type("Concat");
Argument arg_axis, arg_nin; Argument arg_axis, arg_nin;
arg_axis.set_name("axis"); arg_axis.set_i(axis); arg_axis.set_name("axis"); arg_axis.set_i(axis);
...@@ -139,7 +139,7 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -139,7 +139,7 @@ void ScanOp<Context>::UnrollTemplate() {
op->add_arg()->CopyFrom(arg_axis); op->add_arg()->CopyFrom(arg_axis);
op->add_arg()->CopyFrom(arg_nin); op->add_arg()->CopyFrom(arg_nin);
for (int t = 1; t <= nsteps; t++) for (int t = 1; t <= nsteps; t++)
op->add_input(Output(i)->name() + "@" + str(t)); op->add_input(Output(i)->name() + "@" + std::to_string(t));
op->add_output(Output(i)->name()); op->add_output(Output(i)->name());
// solve all the all steps // solve all the all steps
new_def.add_target(Output(i)->name()); new_def.add_target(Output(i)->name());
...@@ -195,7 +195,7 @@ void ScanGradientOp<Context>::MakeOps(const GraphDef& forward_def, ...@@ -195,7 +195,7 @@ void ScanGradientOp<Context>::MakeOps(const GraphDef& forward_def,
maker.Make(forward_def, targets, new_def); maker.Make(forward_def, targets, new_def);
// post-process // post-process
new_def.set_name(name() + "(ScanLen." + str(nsteps) + ")"); new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
for (auto& target : targets) { for (auto& target : targets) {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "ignore") continue;
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/loss/nll_loss_op.h"
namespace dragon {
template <class Context> template <typename Tx, typename Ty>
void NLLLossOp<Context>::RunWithType() {
auto* LPdata = Input(0).template data<Tx, Context>();
auto* Tdata = Input(1).template data<Ty, Context>();
auto* Idata = !ignores.count() ? nullptr :
ignores.template data<int, Context>();
auto* Ldata = losses.template mutable_data<float, Context>();
auto* Fdata = flags.template mutable_data<float, Context>();
kernel::NLLLoss<Tx, Ty, Context>(
outer_dim, Input(0).dim(axis), inner_dim,
LPdata, Tdata, Idata, ignores.count(),
Ldata, Fdata, ctx());
if (normalization == "UNIT") {
vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx());
return;
}
float normalizer = 1;
if (normalization == "VALID") {
normalizer = std::max(
math::ASum<float, Context>(
flags.count(), Fdata), 1.f);
} else if (normalization == "BATCH_SIZE") {
normalizer = Input(0).dim(0);
} else if (normalization == "FULL") {
normalizer = outer_dim * inner_dim;
}
float loss = math::ASum<float, Context>(losses.count(), Ldata);
Output(0)->Reshape({ 1 });
auto* Ydata = Output(0)->template mutable_data<float, Context>();
math::Set<float, Context>(1, loss / normalizer, Ydata, ctx());
}
template <class Context>
void NLLLossOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, Input(1).count())
<< "\nNumber of predictions must match the number of labels.";
losses.Reshape({ outer_dim * inner_dim });
flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float)) {
if (XIsType(Input(1), float)) RunWithType<float, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else if (XIsType(Input(0), float16)) {
if (XIsType(Input(1), float)) RunWithType<float16, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float16, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(NLLLoss);
#ifdef WITH_CUDA
DEPLOY_CUDA(NLLLoss);
#endif
OPERATOR_SCHEMA(NLLLoss).NumInputs(2).NumOutputs(1);
template <class Context> template <typename Tx, typename Ty>
void NLLLossGradientOp<Context>::RunWithType() {
auto* LPdata = Input(0).template data<Tx, Context>();
auto* Tdata = Input(1).template data<Ty, Context>();
auto* Idata = !ignores.count() ? nullptr :
ignores.template data<int, Context>();
auto* dXdata = Output(0)->template mutable_data<Tx, Context>();
auto* Fdata = flags.template mutable_data<float, Context>();
math::Set<Tx, Context>(Output(0)->count(),
dragon_cast<Tx, float>(0.) , dXdata, ctx());
kernel::NLLLossGrad<Tx, Ty, Context>(
outer_dim, Output(0)->dim(axis), inner_dim,
LPdata, Tdata, Idata, ignores.count(),
dXdata, Fdata, ctx());
if (normalization == "UNIT") {
auto* dYdata = Input(-1).template data<float, Context>();
vector<void*> WSdata = ws()->template caches<Context>(
{ Input(0).count() * sizeof(float),
Input(0).count() * sizeof(Tx) });
kernel::SumGrad<float, Context>(
Input(0).count() / Input(0).dim(axis),
Input(0).dim(axis), inner_dim,
1.0, dYdata, (float*)WSdata[0], ctx());
kernel::TypeA2B<float, Tx, Context>(Input(0).count(),
(const float*)WSdata[0], (Tx*)WSdata[1], ctx());
math::Mul<Tx, Context>(Output(0)->count(),
(Tx*)WSdata[1], dXdata, dXdata, ctx());
return;
}
float normalizer = 1;
if (normalization == "VALID") {
normalizer = std::max(
math::ASum<float, Context>(
flags.count(), Fdata), 1.f);
} else if (normalization == "BATCH_SIZE") {
normalizer = Input(0).dim(0);
} else if (normalization == "FULL") {
normalizer = outer_dim * inner_dim;
}
auto* dYdata = Input(-1).template data<float, Context>();
float dYdata_host; ctx()->template Copy<float, CPUContext, Context>(
1, &dYdata_host, dYdata);
math::Scal<Tx, Context>(Output(0)->count(),
dYdata_host / normalizer, dXdata, ctx());
}
template <class Context>
void NLLLossGradientOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
outer_dim = Input(0).count(0, axis);
inner_dim = Input(0).count(axis + 1);
Output(0)->ReshapeLike(Input(0));
flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float)) {
if (XIsType(Input(1), float)) RunWithType<float, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else if (XIsType(Input(0), float16)) {
if (XIsType(Input(1), float)) RunWithType<float16, float>();
else if (XIsType(Input(1), int64_t)) RunWithType<float16, int64_t>();
else LOG(FATAL) << DTypeHelper(Input(1), { "float32", "int64" });
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(NLLLossGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(NLLLossGradient);
#endif
OPERATOR_SCHEMA(NLLLossGradient).NumInputs(3).NumOutputs(1);
class GetNLLLossGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetNLLLossGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {I(0), I(1), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(NLLLoss, GetNLLLossGradient);
} // namespace dragon
\ No newline at end of file
...@@ -18,7 +18,9 @@ void SigmoidFocalLossOp<Context>::RunWithType() { ...@@ -18,7 +18,9 @@ void SigmoidFocalLossOp<Context>::RunWithType() {
Xdata, Tdata, Ldata, Fdata, ctx()); Xdata, Tdata, Ldata, Fdata, ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
Output(0)->ReshapeLike(losses); vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx()); Output(0)->template CopyFrom<Context>(losses, ctx());
return; return;
} }
......
...@@ -43,9 +43,9 @@ template <class Context> ...@@ -43,9 +43,9 @@ template <class Context>
void SmoothL1LossOp<Context>::RunOnDevice() { void SmoothL1LossOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream ctx()->set_stream_id(0); // enforce default stream
CHECK(Input(0).dims() == Input(1).dims()); CHECK(Input(0).count() == Input(1).count());
if (InputSize() > 2) CHECK(Input(0).dims() == Input(2).dims()); if (InputSize() > 2) CHECK(Input(0).count() == Input(2).count());
if (InputSize() > 3) CHECK(Input(0).dims() == Input(3).dims()); if (InputSize() > 3) CHECK(Input(0).count() == Input(3).count());
Output(0)->Reshape({ 1 }); Output(0)->Reshape({ 1 });
diff = ws()->CreateTensor("/mnt/" + anchor() + "/smoothl1_loss/diff"); diff = ws()->CreateTensor("/mnt/" + anchor() + "/smoothl1_loss/diff");
......
...@@ -36,7 +36,9 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() { ...@@ -36,7 +36,9 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
Ldata, Fdata, ctx()); Ldata, Fdata, ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
Output(0)->ReshapeLike(losses); vector<TIndex> output_dims = Input(0).dims();
output_dims.erase(output_dims.begin() + axis);
Output(0)->Reshape(output_dims);
Output(0)->template CopyFrom<Context>(losses, ctx()); Output(0)->template CopyFrom<Context>(losses, ctx());
return; return;
} }
......
...@@ -28,6 +28,36 @@ void InitializeOp<Context>::RunOnDevice() { ...@@ -28,6 +28,36 @@ void InitializeOp<Context>::RunOnDevice() {
RunWithType<float>(); RunWithType<float>();
} }
template <class Context> template <typename T>
void FillOp<Context>::RunWithType() {
auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(Output(0)->count(),
dragon_cast<T, float>(value), Ydata, ctx());
}
template <class Context>
void FillOp<Context>::RunOnDevice() {
vector<TIndex> output_shape;
if (shape_desc.empty()) {
// determine the shape from dimensions
int ndims = (int)std::max(dims_value.size(), dims_desc.size());
for (int i = 0; i < ndims; i++) output_shape.push_back(dims(i));
} else {
// determine the shape from given shape
Tensor* shape = ws()->GetTensor(shape_desc);
CHECK(shape->IsType<int>()) << "\nThe type of shape should be int32.";
auto* shape_data = shape->template data<int, CPUContext>();
for (int i = 0; i < shape->count(); i++) output_shape.push_back(shape_data[i]);
}
Output(0)->Reshape(output_shape);
if (dtype == "float32") RunWithType<float>();
else if (dtype == "float32") RunWithType<float16>();
else if (dtype == "int32") RunWithType<int>();
else if (dtype == "int64") RunWithType<int64_t>();
else LOG(FATAL) << DTypeHelper(dtype,
{ "float32", "float16", "int32", "int64" });
}
// constant // constant
DEPLOY_CPU(Fill); DEPLOY_CPU(Fill);
#ifdef WITH_CUDA #ifdef WITH_CUDA
......
...@@ -3,14 +3,6 @@ ...@@ -3,14 +3,6 @@
namespace dragon { namespace dragon {
string DimString(const vector<TIndex>& shape) {
std::stringstream ss;
ss << "(";
for (int i = 0; i < shape.size() - 1; i++) ss << shape[i] << ",";
ss << shape[shape.size() - 1] << ")";
return ss.str();
}
template <class Context> template <class Context>
void ReshapeOp<Context>::RunOnDevice() { void ReshapeOp<Context>::RunOnDevice() {
if (shape_desc.size() > 0 || shape_value.size() > 0) { if (shape_desc.size() > 0 || shape_value.size() > 0) {
...@@ -56,7 +48,7 @@ void ReshapeOp<Context>::RunOnDevice() { ...@@ -56,7 +48,7 @@ void ReshapeOp<Context>::RunOnDevice() {
CHECK_EQ(Input(0).count() % total_count, 0) CHECK_EQ(Input(0).count() % total_count, 0)
<< "\nCan not change the total size: " << "\nCan not change the total size: "
<< Input(0).DimString() << Input(0).DimString()
<< " -> " << DimString(new_shape); << " -> " << Tensor::DimString(new_shape);
new_shape[i] = Input(0).count() / total_count; new_shape[i] = Input(0).count() / total_count;
total_count *= new_shape[i]; total_count *= new_shape[i];
break; break;
...@@ -66,7 +58,7 @@ void ReshapeOp<Context>::RunOnDevice() { ...@@ -66,7 +58,7 @@ void ReshapeOp<Context>::RunOnDevice() {
CHECK_EQ(total_count, Input(0).count()) CHECK_EQ(total_count, Input(0).count())
<< "\nCan not change the total size." << "\nCan not change the total size."
<< Input(0).DimString() << Input(0).DimString()
<< " -> " << DimString(new_shape); << " -> " << Tensor::DimString(new_shape);
Output(0)->Reshape(new_shape); Output(0)->Reshape(new_shape);
Output(0)->SetMeta(Input(0).meta()); Output(0)->SetMeta(Input(0).meta());
Output(0)->Share(Input(0).memory()); Output(0)->Share(Input(0).memory());
......
...@@ -123,14 +123,9 @@ template <class Context> ...@@ -123,14 +123,9 @@ template <class Context>
void CuDNNBatchNormOp<Context>::RunOnDevice() { void CuDNNBatchNormOp<Context>::RunOnDevice() {
Setup(); Setup();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} }
REGISTER_CUDNN_OPERATOR( REGISTER_CUDNN_OPERATOR(
...@@ -317,7 +312,6 @@ template <class Context> ...@@ -317,7 +312,6 @@ template <class Context>
void CuDNNBatchNormGradientOp<Context>::RunOnDevice() { void CuDNNBatchNormGradientOp<Context>::RunOnDevice() {
Setup(); Setup();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) { if (XIsType(Input(0), float)) {
if (this->use_global_stats) InferenceRunWithType<float>(); if (this->use_global_stats) InferenceRunWithType<float>();
else TrainingRunWithType<float>(); else TrainingRunWithType<float>();
...@@ -327,12 +321,6 @@ void CuDNNBatchNormGradientOp<Context>::RunOnDevice() { ...@@ -327,12 +321,6 @@ void CuDNNBatchNormGradientOp<Context>::RunOnDevice() {
LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else TrainingRunWithType<float16>(); } else TrainingRunWithType<float16>();
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); } else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) {
if (this->use_global_stats) InferenceRunWithType<float>();
else TrainingRunWithType<float>();
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} }
REGISTER_CUDNN_OPERATOR( REGISTER_CUDNN_OPERATOR(
......
...@@ -25,8 +25,8 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() { ...@@ -25,8 +25,8 @@ void CuDNNRecurrentOpBase<Context>::ResetDesc() {
CUDNN_CHECK(cudnnDropoutGetStatesSize( CUDNN_CHECK(cudnnDropoutGetStatesSize(
ctx()->cudnn_handle(), &states_size)); ctx()->cudnn_handle(), &states_size));
std::lock_guard<std::mutex> lk(CUDAContext::mutex()); std::lock_guard<std::mutex> lk(CUDAContext::mutex());
Tensor* states = ws()->CreateTensor("/share/cudnn/dropout:" + Tensor* states = ws()->CreateTensor("/share/cudnn/dropout:"
dragon_cast<string, unsigned long long>(random_seed) + "/states"); + std::to_string(random_seed) + "/states");
if (states->count() > 0) { if (states->count() > 0) {
auto* Sdata = states->template mutable_data<uint8_t, Context>(); auto* Sdata = states->template mutable_data<uint8_t, Context>();
CUDNN_CHECK(cudnnRestoreDropoutDescriptor( CUDNN_CHECK(cudnnRestoreDropoutDescriptor(
...@@ -160,9 +160,7 @@ void CuDNNRecurrentOp<Context>::RunOnDevice() { ...@@ -160,9 +160,7 @@ void CuDNNRecurrentOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream ctx()->set_stream_id(0); // enforce default stream
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -240,9 +238,7 @@ void CuDNNRecurrentGradientOp<Context>::RunOnDevice() { ...@@ -240,9 +238,7 @@ void CuDNNRecurrentGradientOp<Context>::RunOnDevice() {
Output(3)->ReshapeLike(Input(3)); // dCx Output(3)->ReshapeLike(Input(3)); // dCx
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
#ifdef WITH_CUDA_FP16
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
#endif
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -31,8 +31,9 @@ void CollectiveUpdateOp<Context>::InitNCCL() { ...@@ -31,8 +31,9 @@ void CollectiveUpdateOp<Context>::InitNCCL() {
ncclUniqueId id; ncclUniqueId id;
if (comm_rank == comm_root) NCCL_CHECK(ncclGetUniqueId(&id)); if (comm_rank == comm_root) NCCL_CHECK(ncclGetUniqueId(&id));
MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, comm_root, comm); MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, comm_root, comm);
NCCL_CHECK(ncclCommInitRank(&nccl_comm, comm_size, id, comm_rank)); ctx()->SwitchToDevice();
closure = CUDAClosure<Context>(ctx()); closure = CUDAClosure<Context>(ctx());
NCCL_CHECK(ncclCommInitRank(&nccl_comm, comm_size, id, comm_rank));
#else #else
LOG(FATAL) << "NCCL was not compiled."; LOG(FATAL) << "NCCL was not compiled.";
#endif #endif
......
...@@ -29,6 +29,7 @@ void BiasAddOp<Context>::RunOnDevice() { ...@@ -29,6 +29,7 @@ void BiasAddOp<Context>::RunOnDevice() {
dim = Input(0).dim(-1); dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim; inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
...@@ -38,7 +39,9 @@ DEPLOY_CPU(BiasAdd); ...@@ -38,7 +39,9 @@ DEPLOY_CPU(BiasAdd);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BiasAdd); DEPLOY_CUDA(BiasAdd);
#endif #endif
OPERATOR_SCHEMA(BiasAdd).NumInputs(2).NumOutputs(1).Inplace({ { 0, 0 } }); OPERATOR_SCHEMA(BiasAdd)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T> template <class Context> template <typename T>
void BiasAddGradientOp<Context>::RunWithType() { void BiasAddGradientOp<Context>::RunWithType() {
...@@ -62,6 +65,12 @@ void BiasAddGradientOp<Context>::RunWithType() { ...@@ -62,6 +65,12 @@ void BiasAddGradientOp<Context>::RunWithType() {
dYdata += y_offset; dYdata += y_offset;
} }
} }
if (Output(0)->name() != "ignore" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
}
} }
template <class Context> template <class Context>
...@@ -85,7 +94,9 @@ DEPLOY_CPU(BiasAddGradient); ...@@ -85,7 +94,9 @@ DEPLOY_CPU(BiasAddGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(BiasAddGradient); DEPLOY_CUDA(BiasAddGradient);
#endif #endif
OPERATOR_SCHEMA(BiasAddGradient).NumInputs(3).NumOutputs(2); OPERATOR_SCHEMA(BiasAddGradient)
.NumInputs(3).NumOutputs(2)
.Inplace({ { 2, 0 } });
class GetBiasAddGradient final : public GradientMakerBase { class GetBiasAddGradient final : public GradientMakerBase {
public: public:
......
...@@ -13,12 +13,12 @@ void Conv2dOp<Context>::RunWithType() { ...@@ -13,12 +13,12 @@ void Conv2dOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
for (int n = 0; n < Input(0).dim(0); n++) { for (int n = 0; n < Input(0).dim(0); n++)
Wx(Xdata + n * x_offset, Wdata, Ydata + n * y_offset); Wx(Xdata + n * x_offset, Wdata, Ydata + n * y_offset);
if (HasBias()) { if (HasBias()) {
auto* Bdata = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
Pb(Bdata, Ydata + n * y_offset); Pb(Bdata, Ydata);
}
} }
} }
......
...@@ -94,19 +94,9 @@ void ConvOpBase<Context>::Wx( ...@@ -94,19 +94,9 @@ void ConvOpBase<Context>::Wx(
template <class Context> template <typename T> template <class Context> template <typename T>
void ConvOpBase<Context>::Pb(const T* bias, T* y) { void ConvOpBase<Context>::Pb(const T* bias, T* y) {
DECLARE_MULTIPLIER(multiplier, out_spatial_dim); DECLARE_MULTIPLIER(multiplier, out_spatial_dim);
if (data_format == "NCHW") { kernel::BiasAdd<T, Context>(Output(0)->count(),
math::Gemm<T, Context>( Input(0).dim(0), num_output, out_spatial_dim,
CblasNoTrans, CblasNoTrans, data_format, bias, multiplier, y, ctx());
num_output, out_spatial_dim, 1,
1.0, bias, multiplier,
1.0, y, ctx());
} else if (data_format == "NHWC") {
math::Gemm<T, Context>(
CblasNoTrans, CblasNoTrans,
out_spatial_dim, num_output, 1,
1.0, multiplier, bias,
1.0, y, ctx());
}
} }
template <class Context> template <typename T> template <class Context> template <typename T>
...@@ -117,8 +107,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) { ...@@ -117,8 +107,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) {
if (data_format == "NCHW") { if (data_format == "NCHW") {
math::Gemm<T, Context>( math::Gemm<T, Context>(
CblasTrans, CblasNoTrans, CblasTrans, CblasNoTrans,
kernel_dim, kernel_dim, conv_out_spatial_dim,
conv_out_spatial_dim,
conv_out_channels / group, conv_out_channels / group,
1.0, weights + weight_offset * g, 1.0, weights + weight_offset * g,
dy + output_offset * g, dy + output_offset * g,
...@@ -126,8 +115,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) { ...@@ -126,8 +115,7 @@ void ConvOpBase<Context>::Dx(const T* dy, const T* weights, T* dx) {
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
math::Gemm<T, Context>( math::Gemm<T, Context>(
CblasNoTrans, CblasTrans, CblasNoTrans, CblasTrans,
conv_out_spatial_dim, conv_out_spatial_dim, kernel_dim,
kernel_dim,
conv_out_channels / group, conv_out_channels / group,
1.0, dy + output_offset * g, 1.0, dy + output_offset * g,
weights + weight_offset * g, weights + weight_offset * g,
......
#ifdef WITH_CUDNN
#include "core/workspace.h"
#include "utils/filler.h"
#include "operators/vision/bias_add_op.h"
namespace dragon {
template <class Context> template <typename T>
void CuDNNBiasAddOp<Context>::RunWithType() {
TENSOR_FILL(Input(1), vector<TIndex>(1, dim));
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, dim, 1, 1 }));
cudnnSetTensor4dDesc<T>(&output_desc, data_format,
vector<TIndex>({ outer_dim, dim, 1, inner_dim }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, dim }));
cudnnSetTensor4dDesc<T>(&output_desc, data_format,
vector<TIndex>({ outer_dim, 1, inner_dim, dim }));
}
auto* Bdata = Input(1).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(ctx()->cudnn_handle(),
CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata));
}
template <class Context>
void CuDNNBiasAddOp<Context>::RunOnDevice() {
if (data_format == "NCHW") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(1);
inner_dim = Input(0).count(2);
} else if (data_format == "NHWC") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format;
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CUDNN(BiasAdd);
template <class Context> template <typename T>
void CuDNNBiasAddGradientOp<Context>::RunWithType() {
if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>(&input_desc, data_format,
vector<TIndex>({ outer_dim, dim, 1, inner_dim }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, dim, 1, 1 }));
} else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(&input_desc, data_format,
vector<TIndex>({ outer_dim, 1, inner_dim, dim }));
cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
vector<TIndex>({ 1, 1, 1, dim }));
}
auto* dYdata = Input(-1).template data<T, Context>();
T* dBdata = Output(1)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(ctx()->cudnn_handle(),
CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata));
if (Output(0)->name() != "ignore" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
}
}
template <class Context>
void CuDNNBiasAddGradientOp<Context>::RunOnDevice() {
if (data_format == "NCHW") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(1);
inner_dim = Input(0).count(2);
} else if (data_format == "NHWC") {
outer_dim = Input(0).dim(0);
dim = Input(0).dim(-1);
inner_dim = Input(0).count(1) / dim;
} else LOG(FATAL) << "Unknown data format: " << data_format;
Output(1)->ReshapeLike(Input(1));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CUDNN(BiasAddGradient);
} // namespace dragon
#endif // WITH_CUDNN
\ No newline at end of file
...@@ -32,15 +32,12 @@ void CuDNNConv2dOp<Context>::ResetDesc() { ...@@ -32,15 +32,12 @@ void CuDNNConv2dOp<Context>::ResetDesc() {
// determine the bias shape // determine the bias shape
if (HasBias()) { if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") { if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, num_output, 1, 1 }));
vector<TIndex>({ 1, bias_offset, 1, 1 }));
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, 1, 1, num_output }));
vector<TIndex>({ 1, 1, 1, bias_offset }));
} }
} }
...@@ -86,12 +83,13 @@ void CuDNNConv2dOp<Context>::RunWithType() { ...@@ -86,12 +83,13 @@ void CuDNNConv2dOp<Context>::RunWithType() {
filter_desc, Wdata + weight_offset * g, filter_desc, Wdata + weight_offset * g,
conv_desc, fwd_algo, WSdata, fwd_data_size, conv_desc, fwd_algo, WSdata, fwd_data_size,
CUDNNType<T>::zero, output_desc, Ydata + y_offset * g)); CUDNNType<T>::zero, output_desc, Ydata + y_offset * g));
}
if (HasBias()) { if (HasBias()) {
auto* bias = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(cudnn_handle, CUDNN_CHECK(cudnnAddTensor(cudnn_handle,
CUDNNType<T>::one, bias_desc, bias + bias_offset * g, CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata + y_offset * g)); CUDNNType<T>::one, output_desc, Ydata));
}
} }
} }
...@@ -128,7 +126,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() { ...@@ -128,7 +126,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
#endif #endif
RunWithType<float>(); RunWithType<float>();
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
...@@ -150,7 +147,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() { ...@@ -150,7 +147,6 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH)); conv_desc, CUDNN_TENSOR_OP_MATH));
#endif #endif
RunWithType<float16>(); RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); } else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -179,15 +175,12 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() { ...@@ -179,15 +175,12 @@ void CuDNNConv2dGradientOp<Context>::ResetDesc() {
// determine the bias shape // determine the bias shape
if (HasBias()) { if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") { if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, num_output, 1, 1 }));
vector<TIndex>({ 1, bias_offset, 1, 1 }));
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>(& cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
bias_desc, data_format, vector<TIndex>({ 1, 1, 1, num_output }));
vector<TIndex>({ 1, 1, 1, bias_offset }));
} }
} }
...@@ -234,13 +227,14 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() { ...@@ -234,13 +227,14 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle(); auto cudnn_handle = ctx()->cudnn_handle();
for (int g = 0; g < cudnn_group; g++) {
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "ignore") {
T* dBdata = Output(2)->template mutable_data<T, Context>(ctx()); T* dBdata = Output(2)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g, CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g)); CUDNNType<T>::one, bias_desc, dBdata));
} }
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "ignore") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx()); auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx());
...@@ -295,7 +289,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() { ...@@ -295,7 +289,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
#endif #endif
RunWithType<float>(); RunWithType<float>();
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
...@@ -317,7 +310,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() { ...@@ -317,7 +310,6 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH)); conv_desc, CUDNN_TENSOR_OP_MATH));
#endif #endif
RunWithType<float16>(); RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); } else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -32,15 +32,12 @@ void CuDNNConv2dTransposeOp<Context>::ResetDesc() { ...@@ -32,15 +32,12 @@ void CuDNNConv2dTransposeOp<Context>::ResetDesc() {
// determine the bias shape // determine the bias shape
if (HasBias()) { if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") { if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, num_output, 1, 1 }));
vector<TIndex>({ 1, bias_offset, 1, 1 }));
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, 1, 1, num_output }));
vector<TIndex>({ 1, 1, 1, bias_offset }));
} }
} }
...@@ -86,12 +83,13 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() { ...@@ -86,12 +83,13 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() {
input_desc, Xdata + x_offset * g, input_desc, Xdata + x_offset * g,
conv_desc, fwd_algo, WSdata, fwd_data_size, conv_desc, fwd_algo, WSdata, fwd_data_size,
CUDNNType<T>::zero, output_desc, Ydata + y_offset * g)); CUDNNType<T>::zero, output_desc, Ydata + y_offset * g));
}
if (HasBias()) { if (HasBias()) {
auto* bias = Input(2).template data<T, Context>(); auto* Bdata = Input(2).template data<T, Context>();
CUDNN_CHECK(cudnnAddTensor(cudnn_handle, CUDNN_CHECK(cudnnAddTensor(cudnn_handle,
CUDNNType<T>::one, bias_desc, bias + bias_offset * g, CUDNNType<T>::one, bias_desc, Bdata,
CUDNNType<T>::one, output_desc, Ydata + y_offset * g)); CUDNNType<T>::one, output_desc, Ydata));
}
} }
} }
...@@ -128,7 +126,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() { ...@@ -128,7 +126,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
#endif #endif
RunWithType<float>(); RunWithType<float>();
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
...@@ -150,7 +147,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() { ...@@ -150,7 +147,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH)); conv_desc, CUDNN_TENSOR_OP_MATH));
#endif #endif
RunWithType<float16>(); RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); } else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
...@@ -179,15 +175,12 @@ void CuDNNConv2dTransposeGradientOp<Context>::ResetDesc() { ...@@ -179,15 +175,12 @@ void CuDNNConv2dTransposeGradientOp<Context>::ResetDesc() {
// determine the bias shape // determine the bias shape
if (HasBias()) { if (HasBias()) {
bias_offset = num_output / cudnn_group;
if (data_format == "NCHW") { if (data_format == "NCHW") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, num_output, 1, 1 }));
vector<TIndex>({ 1, bias_offset, 1, 1 }));
} else if (data_format == "NHWC") { } else if (data_format == "NHWC") {
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(&bias_desc, data_format,
&bias_desc, data_format, vector<TIndex>({ 1, 1, 1, num_output }));
vector<TIndex>({ 1, 1, 1, bias_offset }));
} }
} }
...@@ -234,13 +227,14 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() { ...@@ -234,13 +227,14 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle(); auto cudnn_handle = ctx()->cudnn_handle();
for (int g = 0; g < cudnn_group; g++) {
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "ignore") {
T* dBdata = Output(2)->template mutable_data<T, Context>(ctx()); T* dBdata = Output(2)->template mutable_data<T, Context>(ctx());
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input_desc, dYdata + y_offset * g, CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g)); CUDNNType<T>::one, bias_desc, dBdata));
} }
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "ignore") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx()); auto* dWdata = Output(1)->template mutable_data<T, Context>(ctx());
...@@ -295,7 +289,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() { ...@@ -295,7 +289,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
#endif #endif
RunWithType<float>(); RunWithType<float>();
} else if (XIsType(Input(0), float16)) { } else if (XIsType(Input(0), float16)) {
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0) #if CUDNN_VERSION_MIN(6, 0, 0)
compute_type = CUDNN_DATA_FLOAT; compute_type = CUDNN_DATA_FLOAT;
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc, CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc,
...@@ -317,7 +310,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() { ...@@ -317,7 +310,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
conv_desc, CUDNN_TENSOR_OP_MATH)); conv_desc, CUDNN_TENSOR_OP_MATH));
#endif #endif
RunWithType<float16>(); RunWithType<float16>();
#endif // WITH_CUDA_FP16
} else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); } else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
} }
......
...@@ -26,14 +26,9 @@ void CuDNNLRNOp<Context>::RunOnDevice() { ...@@ -26,14 +26,9 @@ void CuDNNLRNOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (this->mode == "ACROSS_CHANNELS") { if (this->mode == "ACROSS_CHANNELS") {
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} else if (this->mode == "WITHIN_CHANNEL") { } else if (this->mode == "WITHIN_CHANNEL") {
LRNOp<Context>::RunOnDevice(); LRNOp<Context>::RunOnDevice();
} else { } else {
...@@ -69,14 +64,9 @@ void CuDNNLRNGradientOp<Context>::RunOnDevice() { ...@@ -69,14 +64,9 @@ void CuDNNLRNGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
if (this->mode == "ACROSS_CHANNELS") { if (this->mode == "ACROSS_CHANNELS") {
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} else if (this->mode == "WITHIN_CHANNEL") { } else if (this->mode == "WITHIN_CHANNEL") {
LRNGradientOp<Context>::RunOnDevice(); LRNGradientOp<Context>::RunOnDevice();
} else { } else {
......
...@@ -34,14 +34,9 @@ template <class Context> ...@@ -34,14 +34,9 @@ template <class Context>
void CuDNNPooling2dOp<Context>::RunOnDevice() { void CuDNNPooling2dOp<Context>::RunOnDevice() {
Pooling2dOp<Context>::Reshape(); Pooling2dOp<Context>::Reshape();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} }
DEPLOY_CUDNN(Pooling2d); DEPLOY_CUDNN(Pooling2d);
...@@ -79,14 +74,9 @@ template <class Context> ...@@ -79,14 +74,9 @@ template <class Context>
void CuDNNPooling2dGradientOp<Context>::RunOnDevice() { void CuDNNPooling2dGradientOp<Context>::RunOnDevice() {
Pooling2dGradientOp<Context>::Reshape(); Pooling2dGradientOp<Context>::Reshape();
#ifdef WITH_CUDA_FP16
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>(); else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
#else
if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
#endif
} }
DEPLOY_CUDNN(Pooling2dGradient); DEPLOY_CUDNN(Pooling2dGradient);
......
...@@ -51,7 +51,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() { ...@@ -51,7 +51,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() {
} }
int idx = safe_heads.front(); int idx = safe_heads.front();
safe_heads.pop(); safe_heads.pop();
Tensor* buffer = ws()->GetTensor("/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx)); Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + std::to_string(idx));
Input(0).Move(buffer->memory()); Input(0).Move(buffer->memory());
head_data[idx] = Input(0).name(); head_data[idx] = Input(0).name();
if (XIsType(Input(-2), float)) RestoreX1<float>(); if (XIsType(Input(-2), float)) RestoreX1<float>();
...@@ -86,7 +87,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() { ...@@ -86,7 +87,8 @@ void DenseConcatGradientOp<Context>::ElimateCorruption() {
<< "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirror stage ?"; << "\nadd WORKSPACE_MAX_CORRUPTED_SIZE for more powerful mirror stage ?";
int idx = safe_heads.front(); int idx = safe_heads.front();
safe_heads.pop(); safe_heads.pop();
Tensor* buffer = ws()->GetTensor("/opt/mirror_stage/buffer_" + dragon_cast<string, int>(idx)); Tensor* buffer = ws()->GetTensor(
"/opt/mirror_stage/buffer_" + std::to_string(idx));
Output(i)->Move(buffer->memory()); Output(i)->Move(buffer->memory());
head_data[idx] = Output(i)->name(); head_data[idx] = Output(i)->name();
} }
......
#include "core/workspace.h"
#include "utils/op_kernel.h"
#include "utils/math_functions.h"
#include "operators/vision/drop_block_op.h"
namespace dragon {
template <class Context> template <typename T>
void DropBlock2dOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* Ydata = Output(0)->template mutable_data<T, Context>();
if (phase() == "TEST") {
if (Output(0) != &Input(0)) {
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), Ydata, Xdata);
}
} else if (phase() == "TRAIN") {
auto* mask = ws()->CreateTensor(
"/mnt/" + anchor() + "/drop_block/mask");
auto* norm = ws()->CreateTensor(
"/mnt/" + anchor() + "/drop_block/norm");
mask->ReshapeLike(Input(0));
norm->Reshape(vector<TIndex>({ 1 }));
auto WSdata = ws()->template caches<Context>({
n * c * seed_h * seed_w * sizeof(uint32_t),
mask->count() * sizeof(int),
mask->count() * sizeof(float)});
auto* Mdata = mask->template mutable_data<uint8_t, Context>();
auto* Ndata = norm->template mutable_data<float, CPUContext>();
// fill the mask with ones
math::Set<int, Context>(mask->count(),
1, (int*)WSdata[1], ctx());
// generate 2d mask from seed region
kernel::DropBlock2d<Context>(n, c, h, w,
seed_h, seed_w, block_size, gamma, data_format,
(uint32_t*)WSdata[0], (int*)WSdata[1], ctx());
// convert to float mask for counting
kernel::TypeA2B<int, float, Context>(mask->count(),
(int*)WSdata[1], (float*)WSdata[2], ctx());
// convert to uint8 mask for applying
kernel::TypeA2B<int, uint8_t, Context>(mask->count(),
(int*)WSdata[1], Mdata, ctx());
// count && apply
float normalizer = math::ASum<float, Context>(
mask->count(), (float*)WSdata[2]);
normalizer = std::max(normalizer, 1.f);
Ndata[0] = normalizer = mask->count() / normalizer;
kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
normalizer, Xdata, Mdata, Ydata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
template <class Context>
void DropBlock2dOp<Context>::RunOnDevice() {
ctx()->set_stream_id(0); // enforce default stream
if (data_format == "NCHW") {
n = Input(0).dim(0), c = Input(0).dim(1);
h = Input(0).dim(2), w = Input(0).dim(3);
} else if (data_format == "NHWC") {
n = Input(0).dim(0), c = Input(0).dim(-1);
h = Input(0).dim(1), w = Input(0).dim(2);
}
seed_h = h - block_size + 1;
seed_w = w - block_size + 1;
CHECK(seed_h > 0 && seed_w > 0)
<< "\nExcepted block_size <= feat_size.";
Output(0)->ReshapeLike(Input(0));
if (decrement > 0 && apply_prob > keep_prob()) {
apply_prob -= decrement;
} else { apply_prob = keep_prob(); }
gamma = (1.f - apply_prob) / (block_size * block_size);
gamma *= (alpha * (h * w) / (seed_h * seed_w));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(DropBlock2d);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropBlock2d);
#endif
OPERATOR_SCHEMA(DropBlock2d)
.NumInputs(1).NumOutputs(1)
.Inplace({ { 0, 0 } });
template <class Context> template <typename T>
void DropBlock2dGradientOp<Context>::RunWithType() {
auto* mask = ws()->GetTensor(
"/mnt/" + anchor() + "/drop_block/mask");
auto* norm = ws()->GetTensor(
"/mnt/" + anchor() + "/drop_block/norm");
auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<uint8_t, Context>();
auto* Ndata = norm->template mutable_data<float, CPUContext>();
if (phase() == "TEST") { NOT_IMPLEMENTED; }
else if (phase() == "TRAIN") {
kernel::ApplyMask<T, uint8_t, Context>(mask->count(),
Ndata[0], dYdata, Mdata, dXdata, ctx());
} else LOG(FATAL) << "Incorrect Op phase: " << phase();
}
template <class Context>
void DropBlock2dGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
if (XIsType(Input(0), float)) RunWithType<float>();
else if (XIsType(Input(0), float16)) RunWithType<float16>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32", "float16" });
}
DEPLOY_CPU(DropBlock2dGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropBlock2dGradient);
#endif
OPERATOR_SCHEMA(DropBlock2dGradient)
.NumInputs(2).NumOutputs(1)
.Inplace({ { 1, 0 } });
class GetDropBlock2dGradient final : public GradientMakerBase {
public:
GRADIENT_MAKER_CTOR(GetDropBlock2dGradient);
vector<OperatorDef> MakeDefs() override {
return SingleDef(def.type() + "Gradient", "",
vector<string> {O(0), GO(0)},
vector<string> {GI(0)});
}
};
REGISTER_GRADIENT(DropBlock2d, GetDropBlock2dGradient);
} // namepsace dragon
\ No newline at end of file
...@@ -35,7 +35,11 @@ message Argument { ...@@ -35,7 +35,11 @@ message Argument {
repeated string strings=7; repeated string strings=7;
} }
enum DeviceType { CPU = 0; CUDA = 1; OPENCL = 2; } enum DeviceType {
CPU = 0;
CUDA = 1;
CNML = 2;
}
message DeviceOption { message DeviceOption {
optional DeviceType device_type = 1 [default = CPU]; optional DeviceType device_type = 1 [default = CPU];
......
...@@ -20,8 +20,6 @@ static_cast<void *>(&CUDNNType<double>::oneval); ...@@ -20,8 +20,6 @@ static_cast<void *>(&CUDNNType<double>::oneval);
const void* CUDNNType<double>::zero = const void* CUDNNType<double>::zero =
static_cast<void *>(&CUDNNType<double>::zeroval); static_cast<void *>(&CUDNNType<double>::zeroval);
#ifdef WITH_CUDA_FP16
float CUDNNType<float16>::oneval = 1.0; float CUDNNType<float16>::oneval = 1.0;
float CUDNNType<float16>::zeroval = 0.0; float CUDNNType<float16>::zeroval = 0.0;
const void* CUDNNType<float16>::one = const void* CUDNNType<float16>::one =
...@@ -29,8 +27,6 @@ static_cast<void*>(&CUDNNType<float16>::oneval); ...@@ -29,8 +27,6 @@ static_cast<void*>(&CUDNNType<float16>::oneval);
const void* CUDNNType<float16>::zero = const void* CUDNNType<float16>::zero =
static_cast<void*>(&CUDNNType<float16>::zeroval); static_cast<void*>(&CUDNNType<float16>::zeroval);
#endif
template <typename T> template <typename T>
void cudnnSetTensorDesc( void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc, cudnnTensorDescriptor_t* desc,
...@@ -173,8 +169,7 @@ void cudnnSetTensor4dDesc( ...@@ -173,8 +169,7 @@ void cudnnSetTensor4dDesc(
<< "\nThe num of dimensions of Tensor(" << "\nThe num of dimensions of Tensor("
<< tensor->name() << ") " << tensor->name() << ") "
<< "should be 4, but got " << tensor->ndim() << "."; << "should be 4, but got " << tensor->ndim() << ".";
cudnnSetTensor4dDesc<T>( cudnnSetTensor4dDesc<T>(desc, data_format, tensor->dims());
desc, data_format, tensor->dims());
} }
template <typename T> template <typename T>
...@@ -186,8 +181,7 @@ void cudnnSetTensor5dDesc( ...@@ -186,8 +181,7 @@ void cudnnSetTensor5dDesc(
<< "\nThe num of dimensions of Tensor(" << "\nThe num of dimensions of Tensor("
<< tensor->name() << ") " << tensor->name() << ") "
<< "should be 5, but got " << tensor->ndim() << "."; << "should be 5, but got " << tensor->ndim() << ".";
cudnnSetTensor5dDesc<T>( cudnnSetTensor5dDesc<T>(desc, data_format, tensor->dims());
desc, data_format, tensor->dims());
} }
template <typename T> template <typename T>
...@@ -300,9 +294,6 @@ template void cudnnSetTensorDesc<double>( ...@@ -300,9 +294,6 @@ template void cudnnSetTensorDesc<double>(
const vector<TIndex>&, const vector<TIndex>&,
const vector<TIndex>&); const vector<TIndex>&);
#ifdef WITH_CUDA_FP16
template void cudnnSetTensorDesc<float16>( template void cudnnSetTensorDesc<float16>(
cudnnTensorDescriptor_t*, cudnnTensorDescriptor_t*,
Tensor*); Tensor*);
...@@ -352,8 +343,6 @@ template void cudnnSetTensorDesc<float16>( ...@@ -352,8 +343,6 @@ template void cudnnSetTensorDesc<float16>(
const vector<TIndex>&, const vector<TIndex>&,
const vector<TIndex>&); const vector<TIndex>&);
#endif // WITH_CUDA_FP16
} // namespace dragon } // namespace dragon
#endif // WITH_CUDNN #endif // WITH_CUDNN
\ No newline at end of file
...@@ -35,8 +35,7 @@ LogSeverity StrToLogSeverity(std::string level) { ...@@ -35,8 +35,7 @@ LogSeverity StrToLogSeverity(std::string level) {
} }
std::string GenLogHashKey(const char* file, int line) { std::string GenLogHashKey(const char* file, int line) {
return std::string(file) + return std::string(file) + std::to_string(line);
dragon_cast<std::string, int>(line);
} }
int EveryNRegister( int EveryNRegister(
......
...@@ -49,6 +49,21 @@ template <> void Set<int, CPUContext>( ...@@ -49,6 +49,21 @@ template <> void Set<int, CPUContext>(
#endif // WITH_SSE #endif // WITH_SSE
} }
template <> void Set<int64_t, CPUContext>(
const int n,
const int64_t alpha,
int64_t* x,
CPUContext* ctx) {
if (alpha == 0) {
memset(x, 0, sizeof(int64_t) * n);
return;
}
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif
for (int i = 0; i < n; ++i) x[i] = alpha;
}
template <> void Set<float16, CPUContext>( template <> void Set<float16, CPUContext>(
const int n, const int n,
const float16 alpha, const float16 alpha,
...@@ -148,19 +163,36 @@ template <> void RandomTruncatedNormal<float16, CPUContext>( ...@@ -148,19 +163,36 @@ template <> void RandomTruncatedNormal<float16, CPUContext>(
NOT_IMPLEMENTED; NOT_IMPLEMENTED;
} }
template <> void RandomBernoulli<float, CPUContext>( template <typename T>
void _RandomBernoulli(
const int n, const int n,
const float p, const float p,
uint32_t* x, T* x,
CPUContext* ctx) { CPUContext* ctx) {
std::bernoulli_distribution distribution(p); std::bernoulli_distribution distribution(p);
auto* rng = ctx->rand_generator(); auto* rng = ctx->rand_generator();
#ifdef WITH_OMP #ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n)) #pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif #endif
for (int i = 0; i < n; ++i) x[i] = distribution(*rng); for (int i = 0; i < n; ++i) x[i] = distribution(*rng);
} }
template <> void RandomBernoulli<uint8_t, CPUContext>(
const int n,
const float p,
uint8_t* x,
CPUContext* ctx) {
_RandomBernoulli<uint8_t>(n, p, x, ctx);
}
template <> void RandomBernoulli<uint32_t, CPUContext>(
const int n,
const float p,
uint32_t* x,
CPUContext* ctx) {
_RandomBernoulli<uint32_t>(n, p, x, ctx);
}
/******************** Level-1 ********************/ /******************** Level-1 ********************/
template <> void Add<float, CPUContext>( template <> void Add<float, CPUContext>(
...@@ -311,6 +343,14 @@ template <> void Log<float, CPUContext>( ...@@ -311,6 +343,14 @@ template <> void Log<float, CPUContext>(
for (int i = 0; i < n; ++i) y[i] = std::log(x[i]); for (int i = 0; i < n; ++i) y[i] = std::log(x[i]);
} }
template <> void Log<float16, CPUContext>(
int n,
const float16* x,
float16* y,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <> void Square<float, CPUContext>( template <> void Square<float, CPUContext>(
int n, int n,
const float* x, const float* x,
...@@ -379,7 +419,7 @@ template <> void Inv<float, CPUContext>( ...@@ -379,7 +419,7 @@ template <> void Inv<float, CPUContext>(
#ifdef WITH_OMP #ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n)) #pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif #endif
for (int i = 0; i < n; ++i) y[i] = numerator / y[i]; for (int i = 0; i < n; ++i) y[i] = numerator / x[i];
} }
template <> void Inv<float16, CPUContext>( template <> void Inv<float16, CPUContext>(
......
...@@ -53,6 +53,22 @@ template <> void Set<int, CUDAContext>( ...@@ -53,6 +53,22 @@ template <> void Set<int, CUDAContext>(
} }
} }
template <> void Set<int64_t, CUDAContext>(
const int n,
const int64_t alpha,
int64_t* x,
CUDAContext* ctx) {
if (alpha == 0) {
CUDA_CHECK(cudaMemsetAsync(x, 0,
sizeof(int64_t) * n, ctx->cuda_stream()));
}
else {
_Set<int64_t>
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n, alpha, x);
}
}
template <> void RandomUniform<uint32_t, CUDAContext>( template <> void RandomUniform<uint32_t, CUDAContext>(
const int n, const int n,
const float low, const float low,
...@@ -75,16 +91,6 @@ template <> void RandomNormal<float, CUDAContext>( ...@@ -75,16 +91,6 @@ template <> void RandomNormal<float, CUDAContext>(
CURAND_CHECK(curandGenerateNormal(rng, x, n, mu, sigma)); CURAND_CHECK(curandGenerateNormal(rng, x, n, mu, sigma));
} }
template <> void RandomBernoulli<float, CUDAContext>(
const int n,
const float p,
unsigned int* x,
CUDAContext* ctx) {
// curand could not generate bernoulli distribution
// we recommend implement it within specfic case, e.g. Dropout
NOT_IMPLEMENTED;
}
/******************** Level-1 ********************/ /******************** Level-1 ********************/
template <typename T> template <typename T>
......
...@@ -28,7 +28,6 @@ template <> void Set<float16, CUDAContext>( ...@@ -28,7 +28,6 @@ template <> void Set<float16, CUDAContext>(
const float16 alpha, const float16 alpha,
float16* x, float16* x,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_SetHalf<half2> _SetHalf<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -40,12 +39,8 @@ template <> void Set<float16, CUDAContext>( ...@@ -40,12 +39,8 @@ template <> void Set<float16, CUDAContext>(
<< < CUDA_BLOCKS(n), CUDA_THREADS, << < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n, alpha, x); 0, ctx->cuda_stream() >> >(n, alpha, x);
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
__global__ void _TypeFloat2Half( __global__ void _TypeFloat2Half(
const int n, const int n,
const float* a, const float* a,
...@@ -54,7 +49,6 @@ __global__ void _TypeFloat2Half( ...@@ -54,7 +49,6 @@ __global__ void _TypeFloat2Half(
b[idx] = __float2half(a[idx]); b[idx] = __float2half(a[idx]);
} }
} }
#endif
template <> void RandomNormal<float16, CUDAContext>( template <> void RandomNormal<float16, CUDAContext>(
const int n, const int n,
...@@ -62,7 +56,6 @@ template <> void RandomNormal<float16, CUDAContext>( ...@@ -62,7 +56,6 @@ template <> void RandomNormal<float16, CUDAContext>(
const float sigma, const float sigma,
float16* x, float16* x,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
float* xf32 = (float*)CUDAContext::New(n * sizeof(float)); float* xf32 = (float*)CUDAContext::New(n * sizeof(float));
CURAND_CHECK(curandGenerateNormal( CURAND_CHECK(curandGenerateNormal(
ctx->curand_generator(), xf32, n, mu, sigma)); ctx->curand_generator(), xf32, n, mu, sigma));
...@@ -71,14 +64,10 @@ template <> void RandomNormal<float16, CUDAContext>( ...@@ -71,14 +64,10 @@ template <> void RandomNormal<float16, CUDAContext>(
0, ctx->cuda_stream() >> >(n, 0, ctx->cuda_stream() >> >(n,
xf32, reinterpret_cast<half*>(x)); xf32, reinterpret_cast<half*>(x));
CUDAContext::Delete(xf32); CUDAContext::Delete(xf32);
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** Level-1 ********************/ /******************** Level-1 ********************/
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _AddHalf( __global__ void _AddHalf(
const int n, const int n,
...@@ -104,7 +93,6 @@ __global__ void _AddHalf2( ...@@ -104,7 +93,6 @@ __global__ void _AddHalf2(
#endif #endif
} }
} }
#endif
template <> void Add<float16, CUDAContext>( template <> void Add<float16, CUDAContext>(
int n, int n,
...@@ -112,7 +100,6 @@ template <> void Add<float16, CUDAContext>( ...@@ -112,7 +100,6 @@ template <> void Add<float16, CUDAContext>(
const float16* b, const float16* b,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_AddHalf2<half2> _AddHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -128,12 +115,8 @@ template <> void Add<float16, CUDAContext>( ...@@ -128,12 +115,8 @@ template <> void Add<float16, CUDAContext>(
reinterpret_cast<const half*>(b), reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _SubHalf( __global__ void _SubHalf(
const int n, const int n,
...@@ -159,7 +142,6 @@ __global__ void _SubHalf2( ...@@ -159,7 +142,6 @@ __global__ void _SubHalf2(
#endif #endif
} }
} }
#endif
template <> void Sub<float16, CUDAContext>( template <> void Sub<float16, CUDAContext>(
int n, int n,
...@@ -167,7 +149,6 @@ template <> void Sub<float16, CUDAContext>( ...@@ -167,7 +149,6 @@ template <> void Sub<float16, CUDAContext>(
const float16* b, const float16* b,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_SubHalf2<half2> _SubHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -183,12 +164,8 @@ template <> void Sub<float16, CUDAContext>( ...@@ -183,12 +164,8 @@ template <> void Sub<float16, CUDAContext>(
reinterpret_cast<const half*>(b), reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _MulHalf( __global__ void _MulHalf(
const int n, const int n,
...@@ -214,7 +191,6 @@ __global__ void _MulHalf2( ...@@ -214,7 +191,6 @@ __global__ void _MulHalf2(
#endif #endif
} }
} }
#endif
template <> void Mul<float16, CUDAContext>( template <> void Mul<float16, CUDAContext>(
int n, int n,
...@@ -222,7 +198,6 @@ template <> void Mul<float16, CUDAContext>( ...@@ -222,7 +198,6 @@ template <> void Mul<float16, CUDAContext>(
const float16* b, const float16* b,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_MulHalf2<half2> _MulHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -238,12 +213,8 @@ template <> void Mul<float16, CUDAContext>( ...@@ -238,12 +213,8 @@ template <> void Mul<float16, CUDAContext>(
reinterpret_cast<const half*>(b), reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _DivHalf( __global__ void _DivHalf(
const int n, const int n,
...@@ -256,7 +227,6 @@ __global__ void _DivHalf( ...@@ -256,7 +227,6 @@ __global__ void _DivHalf(
#endif #endif
} }
} }
#endif
template <> void Div<float16, CUDAContext>( template <> void Div<float16, CUDAContext>(
int n, int n,
...@@ -264,19 +234,59 @@ template <> void Div<float16, CUDAContext>( ...@@ -264,19 +234,59 @@ template <> void Div<float16, CUDAContext>(
const float16* b, const float16* b,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_DivHalf<half> _DivHalf<half>
<< < CUDA_BLOCKS(n), CUDA_THREADS, << < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n, 0, ctx->cuda_stream() >> >(n,
reinterpret_cast<const half*>(a), reinterpret_cast<const half*>(a),
reinterpret_cast<const half*>(b), reinterpret_cast<const half*>(b),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
#else }
CUDA_FP16_NOT_COMPILED;
template <typename T>
__global__ void _LogHalf(
const int n,
const T* a,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, n) {
#if __CUDA_ARCH__ >= 530
y[idx] = hlog(a[idx]);
#endif
}
}
template <typename T>
__global__ void _LogHalf2(
const int n,
const T* a,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, n) {
#if __CUDA_ARCH__ >= 530
y[idx] = h2log(a[idx]);
#endif #endif
}
}
template <> void Log<float16, CUDAContext>(
int n,
const float16* x,
float16* y,
CUDAContext* ctx) {
if ((n & 1) == 0) {
_LogHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n >> 1,
reinterpret_cast<const half2*>(x),
reinterpret_cast<half2*>(y));
}
else {
_LogHalf<half>
<< < CUDA_BLOCKS(n), CUDA_THREADS,
0, ctx->cuda_stream() >> >(n,
reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y));
}
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _SquareHalf( __global__ void _SquareHalf(
const int n, const int n,
...@@ -300,14 +310,12 @@ __global__ void _SquareHalf2( ...@@ -300,14 +310,12 @@ __global__ void _SquareHalf2(
#endif #endif
} }
} }
#endif
template <> void Square<float16, CUDAContext>( template <> void Square<float16, CUDAContext>(
int n, int n,
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_SquareHalf2<half2> _SquareHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -321,12 +329,8 @@ template <> void Square<float16, CUDAContext>( ...@@ -321,12 +329,8 @@ template <> void Square<float16, CUDAContext>(
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _SqrtHalf( __global__ void _SqrtHalf(
int n, int n,
...@@ -350,14 +354,12 @@ __global__ void _SqrtHalf2( ...@@ -350,14 +354,12 @@ __global__ void _SqrtHalf2(
#endif #endif
} }
} }
#endif
template <> void Sqrt<float16, CUDAContext>( template <> void Sqrt<float16, CUDAContext>(
int n, int n,
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_SqrtHalf2<half2> _SqrtHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -371,12 +373,8 @@ template <> void Sqrt<float16, CUDAContext>( ...@@ -371,12 +373,8 @@ template <> void Sqrt<float16, CUDAContext>(
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _PowHalf( __global__ void _PowHalf(
const int n, const int n,
...@@ -402,7 +400,6 @@ __global__ void _PowHalf2( ...@@ -402,7 +400,6 @@ __global__ void _PowHalf2(
#endif #endif
} }
} }
#endif
template <> void Pow<float16, CUDAContext>( template <> void Pow<float16, CUDAContext>(
int n, int n,
...@@ -410,7 +407,6 @@ template <> void Pow<float16, CUDAContext>( ...@@ -410,7 +407,6 @@ template <> void Pow<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CHECK(alpha == float(2)) << "fp16 only support the power of 2"; CHECK(alpha == float(2)) << "fp16 only support the power of 2";
if ((n & 1) == 0) { if ((n & 1) == 0) {
_PowHalf2<half2> _PowHalf2<half2>
...@@ -425,12 +421,8 @@ template <> void Pow<float16, CUDAContext>( ...@@ -425,12 +421,8 @@ template <> void Pow<float16, CUDAContext>(
alpha, reinterpret_cast<const half*>(x), alpha, reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _InvHalf( __global__ void _InvHalf(
const int n, const int n,
...@@ -456,7 +448,6 @@ __global__ void _InvHalf2( ...@@ -456,7 +448,6 @@ __global__ void _InvHalf2(
#endif #endif
} }
} }
#endif
template <> void Inv<float16, CUDAContext>( template <> void Inv<float16, CUDAContext>(
const int n, const int n,
...@@ -464,7 +455,6 @@ template <> void Inv<float16, CUDAContext>( ...@@ -464,7 +455,6 @@ template <> void Inv<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_InvHalf2<half2> _InvHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -480,9 +470,6 @@ template <> void Inv<float16, CUDAContext>( ...@@ -480,9 +470,6 @@ template <> void Inv<float16, CUDAContext>(
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** Level-2 ********************/ /******************** Level-2 ********************/
...@@ -492,15 +479,11 @@ template <> void Scal<float16, CUDAContext>( ...@@ -492,15 +479,11 @@ template <> void Scal<float16, CUDAContext>(
const float alpha, const float alpha,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasScalEx( CUBLAS_CHECK(cublasScalEx(
ctx->cublas_handle(), n, ctx->cublas_handle(), n,
&alpha, CUDA_R_32F, &alpha, CUDA_R_32F,
y, CUDA_R_16F, 1, y, CUDA_R_16F, 1,
CUDA_R_32F)); CUDA_R_32F));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <> void Scale<float16, CUDAContext>( template <> void Scale<float16, CUDAContext>(
...@@ -519,7 +502,6 @@ template <> void Dot<float16, CUDAContext>( ...@@ -519,7 +502,6 @@ template <> void Dot<float16, CUDAContext>(
const float16* b, const float16* b,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasDotEx( CUBLAS_CHECK(cublasDotEx(
ctx->cublas_handle(), n, ctx->cublas_handle(), n,
a, CUDA_R_16F, 1, a, CUDA_R_16F, 1,
...@@ -527,12 +509,8 @@ template <> void Dot<float16, CUDAContext>( ...@@ -527,12 +509,8 @@ template <> void Dot<float16, CUDAContext>(
y, CUDA_R_16F, y, CUDA_R_16F,
CUDA_R_32F)); CUDA_R_32F));
ctx->FinishDeviceCompution(); ctx->FinishDeviceCompution();
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _AddScalarHalf( __global__ void _AddScalarHalf(
const int n, const int n,
...@@ -556,14 +534,12 @@ __global__ void _AddScalarHalf2( ...@@ -556,14 +534,12 @@ __global__ void _AddScalarHalf2(
#endif #endif
} }
} }
#endif
template <> void AddScalar<float16, CUDAContext>( template <> void AddScalar<float16, CUDAContext>(
const int n, const int n,
const float alpha, const float alpha,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_AddScalarHalf2<half2> _AddScalarHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -577,12 +553,8 @@ template <> void AddScalar<float16, CUDAContext>( ...@@ -577,12 +553,8 @@ template <> void AddScalar<float16, CUDAContext>(
dragon_cast<half, float>(alpha), dragon_cast<half, float>(alpha),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _MulScalarHalf( __global__ void _MulScalarHalf(
const int n, const int n,
...@@ -606,14 +578,12 @@ __global__ void _MulScalarHalf2( ...@@ -606,14 +578,12 @@ __global__ void _MulScalarHalf2(
#endif #endif
} }
} }
#endif
template <> void MulScalar<float16, CUDAContext>( template <> void MulScalar<float16, CUDAContext>(
const int n, const int n,
const float alpha, const float alpha,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if ((n & 1) == 0) { if ((n & 1) == 0) {
_MulScalarHalf2<half2> _MulScalarHalf2<half2>
<< < CUDA_BLOCKS(n >> 1), CUDA_THREADS, << < CUDA_BLOCKS(n >> 1), CUDA_THREADS,
...@@ -627,9 +597,6 @@ template <> void MulScalar<float16, CUDAContext>( ...@@ -627,9 +597,6 @@ template <> void MulScalar<float16, CUDAContext>(
dragon_cast<half, float>(alpha), dragon_cast<half, float>(alpha),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <> void Axpy<float16, CUDAContext>( template <> void Axpy<float16, CUDAContext>(
...@@ -638,16 +605,12 @@ template <> void Axpy<float16, CUDAContext>( ...@@ -638,16 +605,12 @@ template <> void Axpy<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
CUBLAS_CHECK(cublasAxpyEx( CUBLAS_CHECK(cublasAxpyEx(
ctx->cublas_handle(), n, ctx->cublas_handle(), n,
&alpha, CUDA_R_32F, &alpha, CUDA_R_32F,
x, CUDA_R_16F, 1, x, CUDA_R_16F, 1,
y, CUDA_R_16F, 1, y, CUDA_R_16F, 1,
CUDA_R_32F)); CUDA_R_32F));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <> void Axpby<float16, CUDAContext>( template <> void Axpby<float16, CUDAContext>(
...@@ -667,7 +630,6 @@ template <> void RandomUniform<float16, CUDAContext>( ...@@ -667,7 +630,6 @@ template <> void RandomUniform<float16, CUDAContext>(
const float high, const float high,
float16* x, float16* x,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
float* xf32 = (float*)ctx->New(n * sizeof(float)); float* xf32 = (float*)ctx->New(n * sizeof(float));
CURAND_CHECK(curandGenerateUniform( CURAND_CHECK(curandGenerateUniform(
ctx->curand_generator(), xf32, n)); ctx->curand_generator(), xf32, n));
...@@ -679,9 +641,6 @@ template <> void RandomUniform<float16, CUDAContext>( ...@@ -679,9 +641,6 @@ template <> void RandomUniform<float16, CUDAContext>(
if (range != 1.f) Scal<float16, CUDAContext>(n, range, x, ctx); if (range != 1.f) Scal<float16, CUDAContext>(n, range, x, ctx);
if (low != 0.f) AddScalar<float16, CUDAContext>(n, low, x, ctx); if (low != 0.f) AddScalar<float16, CUDAContext>(n, low, x, ctx);
ctx->Delete(xf32); ctx->Delete(xf32);
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** Level-3 ********************/ /******************** Level-3 ********************/
...@@ -699,7 +658,6 @@ template <> void Gemm<float16, CUDAContext>( ...@@ -699,7 +658,6 @@ template <> void Gemm<float16, CUDAContext>(
float16* C, float16* C,
CUDAContext* ctx, CUDAContext* ctx,
TensorProto_DataType math_type) { TensorProto_DataType math_type) {
#ifdef WITH_CUDA_FP16
int lda = (TransA == CblasNoTrans) ? K : M; int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K; int ldb = (TransB == CblasNoTrans) ? N : K;
cublasOperation_t cuTransA = (TransA == CblasNoTrans) ? cublasOperation_t cuTransA = (TransA == CblasNoTrans) ?
...@@ -782,9 +740,6 @@ template <> void Gemm<float16, CUDAContext>( ...@@ -782,9 +740,6 @@ template <> void Gemm<float16, CUDAContext>(
} else { } else {
LOG(FATAL) << "Unsupported math type"; LOG(FATAL) << "Unsupported math type";
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <> void Gemv<float16, CUDAContext>( template <> void Gemv<float16, CUDAContext>(
...@@ -798,7 +753,6 @@ template <> void Gemv<float16, CUDAContext>( ...@@ -798,7 +753,6 @@ template <> void Gemv<float16, CUDAContext>(
float16* y, float16* y,
CUDAContext* ctx, CUDAContext* ctx,
TensorProto_DataType math_type) { TensorProto_DataType math_type) {
#ifdef WITH_CUDA_FP16
cublasOperation_t cuTransA = (TransA == CblasNoTrans) ? cublasOperation_t cuTransA = (TransA == CblasNoTrans) ?
CUBLAS_OP_T : CUBLAS_OP_N; CUBLAS_OP_T : CUBLAS_OP_N;
int m = (cuTransA == CUBLAS_OP_N) ? N : M; int m = (cuTransA == CUBLAS_OP_N) ? N : M;
...@@ -881,9 +835,6 @@ template <> void Gemv<float16, CUDAContext>( ...@@ -881,9 +835,6 @@ template <> void Gemv<float16, CUDAContext>(
} else { } else {
LOG(FATAL) << "Unsupported math type"; LOG(FATAL) << "Unsupported math type";
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
} // namespace math } // namespace math
......
...@@ -21,30 +21,65 @@ template<> void Dropout<float, CPUContext>( ...@@ -21,30 +21,65 @@ template<> void Dropout<float, CPUContext>(
float prob, float prob,
float scale, float scale,
const float* x, const float* x,
uint32_t* mask, uint32_t* mask32,
uint8_t* mask8,
float* y, float* y,
CPUContext* ctx) { CPUContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob); math::RandomBernoulli<uint8_t, CPUContext>(
math::RandomBernoulli<float, CPUContext>(count, 1 - prob, mask, ctx); count, 1 - prob, mask8, ctx);
#ifdef WITH_OMP #ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count)) #pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif #endif
for (int i = 0; i < count; ++i) y[i] = x[i] * mask[i] * scale; for (int i = 0; i < count; ++i) {
y[i] = x[i] * mask8[i] * scale;
}
} }
template<> void DropoutGrad<float, CPUContext>( template<> void Dropout<float16, CPUContext>(
const int count, const int count,
float prob, float prob,
float scale, float scale,
const float* dy, const float16* x,
const uint32_t* mask, uint32_t* mask32,
float* dx, uint8_t* mask8,
float16* y,
CPUContext* ctx) { CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <typename Tx, typename Tm>
void _ApplyMask(
const int count,
const float scale,
const Tx* x,
const Tm* mask,
Tx* y) {
#ifdef WITH_OMP #ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count)) #pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif #endif
for (int i = 0; i < count; ++i) for (int i = 0; i < count; ++i) {
dx[i] = dy[i] * mask[i] * scale; y[i] = x[i] * mask[i] * scale;
}
}
template <> void ApplyMask<float, uint8_t, CPUContext>(
const int count,
const float scale,
const float* x,
const uint8_t* mask,
float* y,
CPUContext* ctx) {
_ApplyMask<float, uint8_t>(count, scale, x, mask, y);
}
template <> void ApplyMask<float16, uint8_t, CPUContext>(
const int count,
const float scale,
const float16* x,
const uint8_t* mask,
float16* y,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
} }
/******************** activation.elu ********************/ /******************** activation.elu ********************/
...@@ -479,19 +514,161 @@ template <> void Clip<float, CPUContext>( ...@@ -479,19 +514,161 @@ template <> void Clip<float, CPUContext>(
const float low, const float low,
const float high, const float high,
const float* x, const float* x,
float* mask,
float* y, float* y,
CPUContext* ctx) { CPUContext* ctx) {
#ifdef WITH_OMP #ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count)) #pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif #endif
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
mask[i] = 1.0;
if (x[i] < low || x[i] > high) mask[i] = 0.0;
y[i] = std::max(low, std::min(x[i], high)); y[i] = std::max(low, std::min(x[i], high));
} }
} }
template <> void ClipGrad<float, CPUContext>(
const int count,
const float low,
const float high,
const float* x,
const float* dy,
float* dx,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const float xi = x[i];
dx[i] = (xi < low || xi > high) ? 0 : dy[i];
}
}
/******************** arithmetic.maximum ********************/
template <> void MaximumE<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::max(x1[i], x2[i]);
}
}
template <> void MaximumB<float, CPUContext>(
const int count,
const float* x1,
const float x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::max(x1[i], x2);
}
}
template <> void MaximumEGrad<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const bool dy_to_dx1 = x1[i] > x2[i];
dx1[i] = dy_to_dx1 ? dy[i] : 0;
dx2[i] = dy_to_dx1 ? 0 : dy[i];
}
}
template <> void MaximumBGrad<float, CPUContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
dx1[i] = (x1[i] > x2) ? dy[i] : 0;
}
}
/******************** arithmetic.minimum ********************/
template <> void MinimumE<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::min(x1[i], x2[i]);
}
}
template <> void MinimumB<float, CPUContext>(
const int count,
const float* x1,
const float x2,
float* y,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
y[i] = std::min(x1[i], x2);
}
}
template <> void MinimumEGrad<float, CPUContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
const bool dy_to_dx1 = x1[i] < x2[i];
dx1[i] = dy_to_dx1 ? dy[i] : 0;
dx2[i] = dy_to_dx1 ? 0 : dy[i];
}
}
template <> void MinimumBGrad<float, CPUContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CPUContext* ctx) {
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i) {
dx1[i] = (x1[i] < x2) ? dy[i] : 0;
}
}
/******************** control_flow.compare ********************/ /******************** control_flow.compare ********************/
template <> void Equal<float, CPUContext>( template <> void Equal<float, CPUContext>(
...@@ -524,6 +701,189 @@ template<> void AbsGrad<float, CPUContext>( ...@@ -524,6 +701,189 @@ template<> void AbsGrad<float, CPUContext>(
} }
} }
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty>
void _NLLLoss(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* losses,
Tx* flags) {
for (int oix = 0; oix < outer_dim; ++oix) {
for (int iix = 0; iix < inner_dim; ++iix) {
const int idx = oix * inner_dim + iix;
const int label = labels[idx];
int k;
for (k = 0; k < num_ignores; ++k) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = -log_prob[
(oix * axis_dim + label) * inner_dim + iix];
flags[idx] = 1;
}
}
}
}
template <> void NLLLoss<float, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
_NLLLoss<float, float>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float16, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <> void NLLLoss<float, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
_NLLLoss<float, int64_t>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float16, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template <typename Tx, typename Ty>
void _NLLLossGrad(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
Tx* flags) {
flags[0] = 0;
for (int oix = 0; oix < outer_dim; ++oix) {
for (int iix = 0; iix < inner_dim; ++iix) {
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; ++k)
if (label == ignores[k]) break;
if (k == num_ignores) {
dx[(oix * axis_dim + label) * inner_dim + iix] = -1;
flags[0]++;
}
}
}
}
template<> void NLLLossGrad<float, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CPUContext* ctx) {
_NLLLossGrad<float, float>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float16, float, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
template<> void NLLLossGrad<float, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CPUContext* ctx) {
_NLLLossGrad<float, int64_t>(
outer_dim, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float16, int64_t, CPUContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED;
}
/******************** loss.sigmoid_cross_entropy ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <> void SigmoidCrossEntropy<float, CPUContext>( template <> void SigmoidCrossEntropy<float, CPUContext>(
...@@ -2706,6 +3066,94 @@ template<> void Col2Im2d<float, CPUContext>( ...@@ -2706,6 +3066,94 @@ template<> void Col2Im2d<float, CPUContext>(
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
} }
/******************** vision.drop_block ********************/
void _DropBlock2d_NCHW(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t* seed,
int* mask) {
TIndex seed_idx = 0;
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
const int nc = (n * C + c) * H;
for (int y = 0; y < seed_h; ++y) {
for (int x = 0; x < seed_w; ++x) {
if (seed[seed_idx] > 0) {
for (int i = 0; i < block_size; ++i) {
const int nch = (nc + y + i) * W;
for (int j = 0; j < block_size; ++j) {
mask[nch + x + j] &= 0;
} // end j
} // end i
}
seed_idx++;
} // end x
} // end y
} // end c
} // end n
}
void _DropBlock2d_NHWC(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t* seed,
int* mask) {
TIndex seed_idx = 0;
for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) {
for (int y = 0; y < seed_h; ++y) {
for (int x = 0; x < seed_w; ++x) {
if (seed[seed_idx] > 0) {
for (int i = 0; i < block_size; ++i) {
const int nh = (n * H + y + i) * W;
for (int j = 0; j < block_size; ++j) {
mask[(nh + x + j) * C + c] &= 0;
} // end j
} // end i
}
seed_idx++;
} // end x
} // end y
} // end c
} // end n
}
template <> void DropBlock2d<CPUContext>(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
CPUContext* ctx) {
const int count = N * C * seed_h * seed_w;
math::RandomBernoulli<uint32_t, CPUContext>(
count, gamma, seed, ctx);
if (data_format == "NCHW") {
_DropBlock2d_NCHW(N, C, H, W,
seed_h, seed_w, block_size, seed, mask);
} else if (data_format == "NHWC") {
_DropBlock2d_NHWC(N, C, H, W,
seed_h, seed_w, block_size, seed, mask);
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.nn_resize ********************/ /******************** vision.nn_resize ********************/
template <typename T> template <typename T>
......
...@@ -19,12 +19,14 @@ template<typename T> ...@@ -19,12 +19,14 @@ template<typename T>
__global__ void _Dropout( __global__ void _Dropout(
const int count, const int count,
const uint32_t thresh, const uint32_t thresh,
const T scale, const float scale,
const T* x, const T* x,
const uint32_t* mask, const uint32_t* mask32,
uint8_t* mask8,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = x[idx] * (mask[idx] > thresh) * scale; mask8[idx] = (mask32[idx] > thresh);
y[idx] = x[idx] * mask8[idx] * scale;
} }
} }
...@@ -33,44 +35,42 @@ template<> void Dropout<float, CUDAContext>( ...@@ -33,44 +35,42 @@ template<> void Dropout<float, CUDAContext>(
float prob, float prob,
float scale, float scale,
const float* x, const float* x,
uint32_t* mask, uint32_t* mask32,
uint8_t* mask8,
float* y, float* y,
CUDAContext* ctx) { CUDAContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob);
math::RandomUniform<uint32_t, CUDAContext>( math::RandomUniform<uint32_t, CUDAContext>(
count, float(0), float(UINT_MAX), mask, ctx); count, float(0), float(UINT_MAX), mask32, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * prob);
_Dropout<float> _Dropout<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(count,
count, thresh, scale, x, mask, y); thresh, scale, x, mask32, mask8, y);
} }
template <typename T> template <typename Tx, typename Tm>
__global__ void _DropoutGrad( __global__ void _ApplyMask(
const int count, const int count,
const uint32_t thresh, const float scale,
const T scale, const Tx* x,
const T* dy, const Tm* mask,
const uint32_t* mask, Tx* y) {
T* dx) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(idx, count) {
dx[idx] = dy[idx] * (mask[idx] > thresh) * scale; y[idx] = x[idx] * mask[idx] * scale;
} }
} }
template<> void DropoutGrad<float, CUDAContext>( template <> void ApplyMask<float, uint8_t, CUDAContext>(
const int count, const int count,
float prob, const float scale,
float scale, const float* x,
const float* dy, const uint8_t* mask,
const uint32_t* mask, float* y,
float* dx,
CUDAContext* ctx) { CUDAContext* ctx) {
uint32_t thresh = static_cast<uint32_t>(UINT_MAX * prob); _ApplyMask<float, uint8_t>
_DropoutGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(count,
count, thresh, scale, dy, mask, dx); scale, x, mask, y);
} }
/******************** activation.prelu ********************/ /******************** activation.prelu ********************/
...@@ -753,13 +753,9 @@ __global__ void _Clip( ...@@ -753,13 +753,9 @@ __global__ void _Clip(
const T low, const T low,
const T high, const T high,
const T* x, const T* x,
T* mask,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(idx, count) {
mask[idx] = 1.0; y[idx] = max(low, min(x[idx], high));
if (x[idx] > high || x[idx] < low) mask[idx] = 0.0;
y[idx] = x[idx] > high ? high : x[idx];
y[idx] = x[idx] < low ? low : x[idx];
} }
} }
...@@ -768,13 +764,237 @@ template <> void Clip<float, CUDAContext>( ...@@ -768,13 +764,237 @@ template <> void Clip<float, CUDAContext>(
const float low, const float low,
const float high, const float high,
const float* x, const float* x,
float* mask,
float* y, float* y,
CUDAContext* ctx) { CUDAContext* ctx) {
_Clip<float> _Clip<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, low, high, x, y);
}
template <typename T>
__global__ void _ClipGrad(
const int count,
const T low,
const T high,
const T* x,
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const T xi = x[idx];
dx[idx] = (xi < low || xi > high) ? 0 : dy[idx];
}
}
template <> void ClipGrad<float, CUDAContext>(
const int count,
const float low,
const float high,
const float* x,
const float* dy,
float* dx,
CUDAContext* ctx) {
_ClipGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
low, high, x, mask, y); low, high, x, dy, dx);
}
/******************** arithmetic.maximum ********************/
template <typename T>
__global__ void _MaximumE(
const int count,
const T* x1,
const T* x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = max(x1[idx], x2[idx]);
}
}
template <> void MaximumE<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CUDAContext* ctx) {
_MaximumE<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MaximumB(
const int count,
const T* x1,
const T x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = max(x1[idx], x2);
}
}
template <> void MaximumB<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
float* y,
CUDAContext* ctx) {
_MaximumB<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MaximumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const bool dy_to_dx1 = x1[idx] > x2[idx];
dx1[idx] = dy_to_dx1 ? dy[idx] : 0;
dx2[idx] = dy_to_dx1 ? 0 : dy[idx];
}
}
template <> void MaximumEGrad<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CUDAContext* ctx) {
_MaximumEGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1, dx2);
}
template <typename T>
__global__ void _MaximumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1) {
CUDA_1D_KERNEL_LOOP(idx, count) {
dx1[idx] = (x1[idx] > x2) ? dy[idx] : 0;
}
}
template <> void MaximumBGrad<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CUDAContext* ctx) {
_MaximumBGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1);
}
/******************** arithmetic.minimum ********************/
template <typename T>
__global__ void _MinimumE(
const int count,
const T* x1,
const T* x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = min(x1[idx], x2[idx]);
}
}
template <> void MinimumE<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
float* y,
CUDAContext* ctx) {
_MinimumE<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MinimumB(
const int count,
const T* x1,
const T x2,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
y[idx] = min(x1[idx], x2);
}
}
template <> void MinimumB<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
float* y,
CUDAContext* ctx) {
_MinimumB<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, y);
}
template <typename T>
__global__ void _MinimumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const bool dy_to_dx1 = x1[idx] < x2[idx];
dx1[idx] = dy_to_dx1 ? dy[idx] : 0;
dx2[idx] = dy_to_dx1 ? 0 : dy[idx];
}
}
template <> void MinimumEGrad<float, CUDAContext>(
const int count,
const float* x1,
const float* x2,
const float* dy,
float* dx1,
float* dx2,
CUDAContext* ctx) {
_MinimumEGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1, dx2);
}
template <typename T>
__global__ void _MinimumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1) {
CUDA_1D_KERNEL_LOOP(idx, count) {
dx1[idx] = (x1[idx] < x2) ? dy[idx] : 0;
}
}
template <> void MinimumBGrad<float, CUDAContext>(
const int count,
const float* x1,
const float x2,
const float* dy,
float* dx1,
/* float* dx2, */
CUDAContext* ctx) {
_MinimumBGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, x1, x2, dy, dx1);
} }
/******************** control_flow.compare ********************/ /******************** control_flow.compare ********************/
...@@ -825,6 +1045,145 @@ template<> void AbsGrad<float, CUDAContext>( ...@@ -825,6 +1045,145 @@ template<> void AbsGrad<float, CUDAContext>(
0, ctx->cuda_stream() >> >(count, dy, dx); 0, ctx->cuda_stream() >> >(count, dy, dx);
} }
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty>
__global__ void _NLLLoss(
const int count,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* losses,
Tx* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = -log_prob[
(oix * axis_dim + label) * inner_dim + iix];
flags[idx] = 1;
}
}
}
template <> void NLLLoss<float, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLoss<float, float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <> void NLLLoss<float, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLoss<float, int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, losses, flags);
}
template <typename Tx, typename Ty>
__global__ void _NLLLossGrad(
const int count,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
Tx* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break;
if (k != num_ignores) {
flags[idx] = 0;
} else {
dx[(oix * axis_dim + label) * inner_dim + iix] = -1;
flags[idx] = 1;
}
}
}
template<> void NLLLossGrad<float, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGrad<float, float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
template<> void NLLLossGrad<float, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGrad<float, int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
log_prob, labels, ignores,
num_ignores, dx, flags);
}
/******************** loss.sigmoid_cross_entropy ********************/ /******************** loss.sigmoid_cross_entropy ********************/
template <typename T> template <typename T>
...@@ -2856,8 +3215,7 @@ __global__ void _BiasAdd_NCHW( ...@@ -2856,8 +3215,7 @@ __global__ void _BiasAdd_NCHW(
const T* bias, const T* bias,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) { CUDA_1D_KERNEL_LOOP(idx, count) {
const int bias_idx = (idx / inner_dim) % dim; y[idx] += bias[(idx / inner_dim) % dim];
y[idx] += bias[bias_idx];
} }
} }
...@@ -3395,6 +3753,95 @@ template <> void Col2Im2d<float, CUDAContext>( ...@@ -3395,6 +3753,95 @@ template <> void Col2Im2d<float, CUDAContext>(
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
} }
/******************** vision.drop_block ********************/
template <typename T>
__global__ void _DropBlock2d_NCHW(
const int count,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t thresh,
const uint32_t* seed,
int* mask) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (seed[idx] < thresh) {
const int x = idx % seed_w;
const int y = (idx / seed_w) % seed_h;
const int c = (idx / seed_w / seed_h) % C;
const int n = (idx / seed_w / seed_h) / C;
const int nc = (n * C + c) * H;
for (int i = 0; i < block_size; ++i) {
const int nch = (nc + y + i) * W;
for (int j = 0; j < block_size; ++j)
atomicAnd(&mask[nch + x + j], 0);
}
}
}
}
template <typename T>
__global__ void _DropBlock2d_NHWC(
const int count,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const uint32_t thresh,
const uint32_t* seed,
int* mask) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (seed[idx] < thresh) {
const int x = idx % seed_w;
const int y = (idx / seed_w) % seed_h;
const int c = (idx / seed_w / seed_h) % C;
const int n = (idx / seed_w / seed_h) / C;
for (int i = 0; i < block_size; ++i) {
const int nh = (n * H + y + i) * W;
for (int j = 0; j < block_size; ++j)
atomicAnd(&mask[(nh + x + j) * C + c], 0);
}
}
}
}
template <> void DropBlock2d<CUDAContext>(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
CUDAContext* ctx) {
const int count = N * C * seed_h * seed_w;
math::RandomUniform<uint32_t, CUDAContext>(
count, 0.f, float(UINT_MAX), seed, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * gamma);
if (data_format == "NCHW") {
_DropBlock2d_NCHW<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
C, H, W, seed_h, seed_w, block_size,
thresh, seed, mask);
} else if(data_format == "NHWC") {
_DropBlock2d_NHWC<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
C, H, W, seed_h, seed_w, block_size,
thresh, seed, mask);
} else LOG(FATAL) << "Unknown data format: " << data_format;
}
/******************** vision.nn_resize ********************/ /******************** vision.nn_resize ********************/
template <typename T> template <typename T>
......
...@@ -13,9 +13,77 @@ namespace dragon { ...@@ -13,9 +13,77 @@ namespace dragon {
namespace kernel { namespace kernel {
/******************** activation.dropout ********************/
__global__ void _DropoutHalf(
const int count,
const uint32_t thresh,
const half scale,
const half* x,
const uint32_t* mask32,
uint8_t* mask8,
half* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
mask8[idx] = (mask32[idx] > thresh);
y[idx] = __hmul(__hmul(x[idx], scale),
__float2half((float)mask8[idx]));
#endif
}
}
template<> void Dropout<float16, CUDAContext>(
const int count,
float prob,
float scale,
const float16* x,
uint32_t* mask32,
uint8_t* mask8,
float16* y,
CUDAContext* ctx) {
math::RandomUniform<uint32_t, CUDAContext>(
count, float(0), float(UINT_MAX), mask32, ctx);
auto thresh = static_cast<uint32_t>(UINT_MAX * prob);
_DropoutHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
thresh, dragon_cast<half, float>(scale),
reinterpret_cast<const half*>(x),
mask32, mask8, reinterpret_cast<half*>(y));
}
template <typename Tm>
__global__ void _ApplyMaskHalf(
const int count,
const half scale,
const half* x,
const Tm* mask,
half* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
y[idx] = __hmul(__hmul(x[idx], scale),
__float2half((float)mask[idx]));
#endif
}
}
template <> void ApplyMask<float16, uint8_t, CUDAContext>(
const int count,
const float scale,
const float16* x,
const uint8_t* mask,
float16* y,
CUDAContext* ctx) {
_ApplyMaskHalf<uint8_t>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count,
dragon_cast<half, float>(scale),
reinterpret_cast<const half*>(x),
mask, reinterpret_cast<half*>(y));
}
/******************** activation.relu ********************/ /******************** activation.relu ********************/
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _ReluHalf( __global__ void _ReluHalf(
const int count, const int count,
...@@ -45,7 +113,6 @@ __global__ void _ReluHalf2( ...@@ -45,7 +113,6 @@ __global__ void _ReluHalf2(
#endif #endif
} }
} }
#endif
template<> void Relu<float16, CUDAContext>( template<> void Relu<float16, CUDAContext>(
const int count, const int count,
...@@ -53,8 +120,7 @@ template<> void Relu<float16, CUDAContext>( ...@@ -53,8 +120,7 @@ template<> void Relu<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16 if ((count & 1) == 0) {
if ((count & 1) == 0 == 0) {
_ReluHalf2<half2> _ReluHalf2<half2>
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS, << < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> > (count >> 1, 0, ctx->cuda_stream() >> > (count >> 1,
...@@ -69,14 +135,10 @@ template<> void Relu<float16, CUDAContext>( ...@@ -69,14 +135,10 @@ template<> void Relu<float16, CUDAContext>(
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** arithmetic.affine ********************/ /******************** arithmetic.affine ********************/
#ifdef WITH_CUDA_FP16
template <typename T> template <typename T>
__global__ void _AffineWithOBiasHalf( __global__ void _AffineWithOBiasHalf(
const int count, const int count,
...@@ -112,7 +174,6 @@ __global__ void _AffineWithBiasHalf( ...@@ -112,7 +174,6 @@ __global__ void _AffineWithBiasHalf(
#endif #endif
} }
} }
#endif
template<> void Affine<float16, CUDAContext>( template<> void Affine<float16, CUDAContext>(
const int count, const int count,
...@@ -125,7 +186,6 @@ template<> void Affine<float16, CUDAContext>( ...@@ -125,7 +186,6 @@ template<> void Affine<float16, CUDAContext>(
const float16* beta_multiplier, const float16* beta_multiplier,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (beta != nullptr) { if (beta != nullptr) {
_AffineWithBiasHalf<float> _AffineWithBiasHalf<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
...@@ -144,9 +204,151 @@ template<> void Affine<float16, CUDAContext>( ...@@ -144,9 +204,151 @@ template<> void Affine<float16, CUDAContext>(
reinterpret_cast<const half*>(alpha), reinterpret_cast<const half*>(alpha),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
} }
#else }
CUDA_FP16_NOT_COMPILED;
/******************** loss.nll_loss ********************/
template <typename Ty>
__global__ void _NLLLossHalf(
const int count,
const int axis_dim,
const int inner_dim,
const half* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) {
losses[idx] = flags[idx] = 0;
break;
}
}
if (k == num_ignores) {
losses[idx] = __half2float(__hneg(
log_prob[(oix * axis_dim + label) * inner_dim + iix]));
flags[idx] = 1;
}
#endif #endif
}
}
template <> void NLLLoss<float16, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossHalf<float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores, losses, flags);
}
template <> void NLLLoss<float16, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossHalf<int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores, losses, flags);
}
template <typename Ty>
__global__ void _NLLLossGradHalf(
const int count,
const int axis_dim,
const int inner_dim,
const half* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
half* dx,
float* flags) {
CUDA_1D_KERNEL_LOOP(idx, count) {
#if __CUDA_ARCH__ >= 530
const int oix = idx / inner_dim;
const int iix = idx % inner_dim;
const int label = labels[oix * inner_dim + iix];
int k;
for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break;
if (k != num_ignores) {
flags[idx] = 0;
} else {
dx[(oix * axis_dim + label) * inner_dim + iix] = __float2half(-1.);
flags[idx] = 1;
}
#endif
}
}
template<> void NLLLossGrad<float16, float, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const float* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGradHalf<float>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores,
reinterpret_cast<half*>(dx), flags);
}
template<> void NLLLossGrad<float16, int64_t, CUDAContext>(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const float16* log_prob,
const int64_t* labels,
const int* ignores,
const int num_ignores,
float16* dx,
float* flags,
CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim;
_NLLLossGradHalf<int64_t>
<< < CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >(
num_preds, axis_dim, inner_dim,
reinterpret_cast<const half*>(log_prob), labels,
ignores, num_ignores,
reinterpret_cast<half*>(dx), flags);
} }
/******************** loss.sparse_softmax_cross_entropy ********************/ /******************** loss.sparse_softmax_cross_entropy ********************/
...@@ -304,11 +506,11 @@ template<> void SparseSoftmaxCrossEntropyGrad<float16, int64_t, CUDAContext>( ...@@ -304,11 +506,11 @@ template<> void SparseSoftmaxCrossEntropyGrad<float16, int64_t, CUDAContext>(
reinterpret_cast<const half*>(prob), labels, reinterpret_cast<const half*>(prob), labels,
ignores, num_ignores, ignores, num_ignores,
reinterpret_cast<half*>(dx), flags); reinterpret_cast<half*>(dx), flags);
} }
/******************** misc.astype ********************/ /******************** misc.astype ********************/
#ifdef WITH_CUDA_FP16
__global__ void _TypeHalf2Float( __global__ void _TypeHalf2Float(
const int count, const int count,
const half* a, const half* a,
...@@ -334,7 +536,6 @@ __global__ void _TypeHalf2Half( ...@@ -334,7 +536,6 @@ __global__ void _TypeHalf2Half(
b[idx] = a[idx]; b[idx] = a[idx];
} }
} }
#endif
#define DEFINE_TYPE_DISABLE_FP16(type) \ #define DEFINE_TYPE_DISABLE_FP16(type) \
template <> void TypeA2B<float16, type, CUDAContext>( \ template <> void TypeA2B<float16, type, CUDAContext>( \
...@@ -376,7 +577,6 @@ __global__ void _TypeHalf2Half( ...@@ -376,7 +577,6 @@ __global__ void _TypeHalf2Half(
a, reinterpret_cast<half*>(b)); \ a, reinterpret_cast<half*>(b)); \
} }
#ifdef WITH_CUDA_FP16
template <> void TypeA2B<float16, float16, CUDAContext>( template <> void TypeA2B<float16, float16, CUDAContext>(
const int count, const int count,
const float16* a, const float16* a,
...@@ -388,29 +588,15 @@ template <> void TypeA2B<float16, float16, CUDAContext>( ...@@ -388,29 +588,15 @@ template <> void TypeA2B<float16, float16, CUDAContext>(
reinterpret_cast<const half*>(a), reinterpret_cast<const half*>(a),
reinterpret_cast<half*>(b)); reinterpret_cast<half*>(b));
} }
DEFINE_TYPE_ENABLE_FP16_FP32; DEFINE_TYPE_ENABLE_FP16_FP32;
DEFINE_TYPE_DISABLE_FP16(double); DEFINE_TYPE_DISABLE_FP16(double);
DEFINE_TYPE_DISABLE_FP16(int); DEFINE_TYPE_DISABLE_FP16(int);
DEFINE_TYPE_DISABLE_FP16(int64_t); DEFINE_TYPE_DISABLE_FP16(int64_t);
DEFINE_TYPE_DISABLE_FP16(uint8_t); DEFINE_TYPE_DISABLE_FP16(uint8_t);
#else
template <> void TypeA2B<float16, float16, CUDAContext>(
const int count,
const float16* a,
float16* b,
CUDAContext* ctx) {
LOG(FATAL) << "CUDAContext has not implemented: float16 -> float16";
}
DEFINE_TYPE_DISABLE_FP16(float);
DEFINE_TYPE_DISABLE_FP16(double);
DEFINE_TYPE_DISABLE_FP16(int);
DEFINE_TYPE_DISABLE_FP16(int64_t);
DEFINE_TYPE_DISABLE_FP16(uint8_t);
#endif
/******************** misc.image_data ********************/ /******************** misc.image_data ********************/
#ifdef WITH_CUDA_FP16
template <typename Tx, typename Ty> template <typename Tx, typename Ty>
__global__ void _ImageDataHalf_NCHW( __global__ void _ImageDataHalf_NCHW(
const int count, const int count,
...@@ -453,7 +639,6 @@ __global__ void _ImageDataHalf_NHWC( ...@@ -453,7 +639,6 @@ __global__ void _ImageDataHalf_NHWC(
y[idx] = __float2half(raw_value); y[idx] = __float2half(raw_value);
} }
} }
#endif
template <> void ImageData<float, float16, CUDAContext>( template <> void ImageData<float, float16, CUDAContext>(
const int count, const int count,
...@@ -467,7 +652,6 @@ template <> void ImageData<float, float16, CUDAContext>( ...@@ -467,7 +652,6 @@ template <> void ImageData<float, float16, CUDAContext>(
const float* x, const float* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (data_format == "NCHW") { if (data_format == "NCHW") {
_ImageDataHalf_NCHW<float, half> _ImageDataHalf_NCHW<float, half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
...@@ -481,9 +665,6 @@ template <> void ImageData<float, float16, CUDAContext>( ...@@ -481,9 +665,6 @@ template <> void ImageData<float, float16, CUDAContext>(
N, C, H, W, mean_values, std_values, N, C, H, W, mean_values, std_values,
x, reinterpret_cast<half*>(y)); x, reinterpret_cast<half*>(y));
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <> void ImageData<uint8_t, float16, CUDAContext>( template <> void ImageData<uint8_t, float16, CUDAContext>(
...@@ -498,7 +679,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>( ...@@ -498,7 +679,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>(
const uint8_t* x, const uint8_t* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
if (data_format == "NCHW") { if (data_format == "NCHW") {
_ImageDataHalf_NCHW<uint8_t, half> _ImageDataHalf_NCHW<uint8_t, half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
...@@ -512,9 +692,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>( ...@@ -512,9 +692,6 @@ template <> void ImageData<uint8_t, float16, CUDAContext>(
N, C, H, W, mean_values, std_values, N, C, H, W, mean_values, std_values,
x, reinterpret_cast<half*>(y)); x, reinterpret_cast<half*>(y));
} else LOG(FATAL) << "Unknown data format: " << data_format; } else LOG(FATAL) << "Unknown data format: " << data_format;
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** ndarray.concat ********************/ /******************** ndarray.concat ********************/
...@@ -549,7 +726,6 @@ template <> void Concat<float16, CUDAContext>( ...@@ -549,7 +726,6 @@ template <> void Concat<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_ConcatHalf<half> _ConcatHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
...@@ -557,9 +733,6 @@ template <> void Concat<float16, CUDAContext>( ...@@ -557,9 +733,6 @@ template <> void Concat<float16, CUDAContext>(
x_concat_dim, y_concat_dim, concat_offset, x_concat_dim, y_concat_dim, concat_offset,
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <typename T> template <typename T>
...@@ -592,7 +765,6 @@ template <> void ConcatGrad<float16, CUDAContext>( ...@@ -592,7 +765,6 @@ template <> void ConcatGrad<float16, CUDAContext>(
const float16* dy, const float16* dy,
float16* dx, float16* dx,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_ConcatGradHalf<half> _ConcatGradHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
...@@ -600,9 +772,6 @@ template <> void ConcatGrad<float16, CUDAContext>( ...@@ -600,9 +772,6 @@ template <> void ConcatGrad<float16, CUDAContext>(
x_concat_dim, y_concat_dim, concat_offset, x_concat_dim, y_concat_dim, concat_offset,
reinterpret_cast<const half*>(dy), reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx)); reinterpret_cast<half*>(dx));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** ndarray.transpose ********************/ /******************** ndarray.transpose ********************/
...@@ -636,16 +805,12 @@ template <> void Transpose<float16, CUDAContext>( ...@@ -636,16 +805,12 @@ template <> void Transpose<float16, CUDAContext>(
const float16* x, const float16* x,
float16* y, float16* y,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_TransposeHalf<half> _TransposeHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
ndim, order, old_steps, new_steps, ndim, order, old_steps, new_steps,
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
reinterpret_cast<half*>(y)); reinterpret_cast<half*>(y));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
template <typename T> template <typename T>
...@@ -677,21 +842,16 @@ template <> void TransposeGrad<float16, CUDAContext>( ...@@ -677,21 +842,16 @@ template <> void TransposeGrad<float16, CUDAContext>(
const float16* dy, const float16* dy,
float16* dx, float16* dx,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_TransposeGradHalf<half> _TransposeGradHalf<half>
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
ndim, order, old_steps, new_steps, ndim, order, old_steps, new_steps,
reinterpret_cast<const half*>(dy), reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx)); reinterpret_cast<half*>(dx));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** update.adam_update ********************/ /******************** update.adam_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _AdamUpdateHalf( __global__ void _AdamUpdateHalf(
const int count, const int count,
const half lr, const half lr,
...@@ -720,7 +880,6 @@ __global__ void _AdamUpdateHalf( ...@@ -720,7 +880,6 @@ __global__ void _AdamUpdateHalf(
#endif #endif
} }
} }
#endif
template <> void AdamUpdate<float16, CUDAContext>( template <> void AdamUpdate<float16, CUDAContext>(
const int count, const int count,
...@@ -732,7 +891,6 @@ template <> void AdamUpdate<float16, CUDAContext>( ...@@ -732,7 +891,6 @@ template <> void AdamUpdate<float16, CUDAContext>(
float16* m, float16* m,
float16* v, float16* v,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_AdamUpdateHalf _AdamUpdateHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
...@@ -743,14 +901,10 @@ template <> void AdamUpdate<float16, CUDAContext>( ...@@ -743,14 +901,10 @@ template <> void AdamUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g), reinterpret_cast<half*>(g),
reinterpret_cast<half*>(m), reinterpret_cast<half*>(m),
reinterpret_cast<half*>(v)); reinterpret_cast<half*>(v));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** update.nesterov_update ********************/ /******************** update.nesterov_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _NesterovUpdateHalf( __global__ void _NesterovUpdateHalf(
const int count, const int count,
const half lr, const half lr,
...@@ -794,7 +948,6 @@ __global__ void _NesterovUpdateHalf2( ...@@ -794,7 +948,6 @@ __global__ void _NesterovUpdateHalf2(
#endif #endif
} }
} }
#endif
template <> void NesterovUpdate<float16, CUDAContext>( template <> void NesterovUpdate<float16, CUDAContext>(
const int count, const int count,
...@@ -803,8 +956,7 @@ template <> void NesterovUpdate<float16, CUDAContext>( ...@@ -803,8 +956,7 @@ template <> void NesterovUpdate<float16, CUDAContext>(
float16* g, float16* g,
float16* h, float16* h,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16 if ((count & 1) == 0) {
if ((count & 1) == 0 == 0) {
_NesterovUpdateHalf2 _NesterovUpdateHalf2
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS, << < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count >> 1, 0, ctx->cuda_stream() >> >(count >> 1,
...@@ -821,14 +973,10 @@ template <> void NesterovUpdate<float16, CUDAContext>( ...@@ -821,14 +973,10 @@ template <> void NesterovUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g), reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h)); reinterpret_cast<half*>(h));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** update.rmsprop_update ********************/ /******************** update.rmsprop_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _RMSPropUpdateHalf( __global__ void _RMSPropUpdateHalf(
const int count, const int count,
const half lr, const half lr,
...@@ -851,7 +999,6 @@ __global__ void _RMSPropUpdateHalf( ...@@ -851,7 +999,6 @@ __global__ void _RMSPropUpdateHalf(
#endif #endif
} }
} }
#endif
template <> void RMSPropUpdate<float16, CUDAContext>( template <> void RMSPropUpdate<float16, CUDAContext>(
const int count, const int count,
...@@ -861,7 +1008,6 @@ template <> void RMSPropUpdate<float16, CUDAContext>( ...@@ -861,7 +1008,6 @@ template <> void RMSPropUpdate<float16, CUDAContext>(
float16* g, float16* g,
float16* h, float16* h,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16
_RMSPropUpdateHalf _RMSPropUpdateHalf
<< < CUDA_BLOCKS(count), CUDA_THREADS, << < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count, 0, ctx->cuda_stream() >> >(count,
...@@ -870,14 +1016,10 @@ template <> void RMSPropUpdate<float16, CUDAContext>( ...@@ -870,14 +1016,10 @@ template <> void RMSPropUpdate<float16, CUDAContext>(
dragon_cast<half, float>(eps), dragon_cast<half, float>(eps),
reinterpret_cast<half*>(g), reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h)); reinterpret_cast<half*>(h));
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
/******************** update.sgd_update ********************/ /******************** update.sgd_update ********************/
#ifdef WITH_CUDA_FP16
__global__ void _SGDUpdateHalf( __global__ void _SGDUpdateHalf(
const int count, const int count,
const half lr, const half lr,
...@@ -911,7 +1053,6 @@ __global__ void _SGDUpdateHalf2( ...@@ -911,7 +1053,6 @@ __global__ void _SGDUpdateHalf2(
#endif #endif
} }
} }
#endif
template <> void SGDUpdate<float16, CUDAContext>( template <> void SGDUpdate<float16, CUDAContext>(
const int count, const int count,
...@@ -920,8 +1061,7 @@ template <> void SGDUpdate<float16, CUDAContext>( ...@@ -920,8 +1061,7 @@ template <> void SGDUpdate<float16, CUDAContext>(
float16* g, float16* g,
float16* h, float16* h,
CUDAContext* ctx) { CUDAContext* ctx) {
#ifdef WITH_CUDA_FP16 if ((count & 1) == 0) {
if ((count & 1) == 0 == 0) {
_SGDUpdateHalf2 _SGDUpdateHalf2
<< < CUDA_BLOCKS(count >> 1), CUDA_THREADS, << < CUDA_BLOCKS(count >> 1), CUDA_THREADS,
0, ctx->cuda_stream() >> >(count >> 1, 0, ctx->cuda_stream() >> >(count >> 1,
...@@ -938,9 +1078,6 @@ template <> void SGDUpdate<float16, CUDAContext>( ...@@ -938,9 +1078,6 @@ template <> void SGDUpdate<float16, CUDAContext>(
reinterpret_cast<half*>(g), reinterpret_cast<half*>(g),
reinterpret_cast<half*>(h)); reinterpret_cast<half*>(h));
} }
#else
CUDA_FP16_NOT_COMPILED;
#endif
} }
} // namespace kernel } // namespace kernel
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!