Commit a96b9375 by Ting PAN

Update documentation

1 parent 01602fc9
<p align="center">
<img width="40%" src="https://dragon.seetatech.com/static/images/styles-dragon.png"/>
<img width="100%" src="https://dragon.seetatech.com/download/dragon/assets/banner.png"/>
</p>
[Dragon](https://dragon.seetatech.com) is a **C**(Computation)**G**(Graph)**V**(Virtual)**M**(Machine) based distributed deep learning framework.
It fuses several modern frameworks and integrations together, powered by a unified engine.
The computation between different programming styles is deterministic and reproduceable.
[Dragon](https://dragon.seetatech.com) is a machine learning library that provides diverse programming styles for AI modeling. It builds an virtual machine for computation graph by leveraging the carefully designed intermediate representation, makes execution decoupled from the specific invocation. As a result, it can fuse modern frameworks and integrations together, powered by a unified engine.
It is the first deep learning framework that focuses on developing multiple styles, rather than
promoting internal interfaces. We will always learn from the AI community to evolve Dragon over time.
Dragon devotes to provide universal but invisible interface for designing AI models. Developers can continue to use their codebase and familiar interface in this novel framework. It hopes to help developers to get rid of the burden in transferring projects written by other frameworks, while achieves similar or even better performance.
Dragon actively tracks the release of [PyTorch](https://www.pytorch.org/) and [TensorFlow](https://www.tensorflow.org), dispatches AI computation on diverse accelerators, including the newest NVIDIA GPUs and Apple Silicon processors. It is the first deep learning framework that focuses on developing multiple styles, rather than promoting private interface. We will always learn from the AI community to evolve Dragon over time.
## Installation
See the [install guide](https://dragon.seetatech.com/install) for the pip package
or how to build from source.
See the [install guide](https://dragon.seetatech.com/install) for the pip package or how to build from source.
## License
[BSD 2-Clause license](https://github.com/seetaresearch/dragon/blob/master/LICENSE)
......@@ -41,7 +41,6 @@ from dragon.vm.dali.core.ops.image_ops import WarpAffine
from dragon.vm.dali.core.ops.math_ops import Normalize
from dragon.vm.dali.core.ops.random_ops import CoinFlip
from dragon.vm.dali.core.ops.random_ops import Uniform
from dragon.vm.dali.core.ops.reader_ops import CGRecordReader
from dragon.vm.dali.core.ops.reader_ops import TFRecordReader
__all__ = [_s for _s in dir() if not _s.startswith('_')]
......@@ -14,147 +14,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import multiprocessing as mp
import json
import os
try:
from nvidia.dali import ops
from nvidia.dali import tfrecord
from nvidia.dali import tfrecord as tfrec
except ImportError:
from dragon.core.util import deprecation
ops = deprecation.NotInstalled('nvidia.dali')
tfrecord = deprecation.NotInstalled('nvidia.dali')
try:
import codewithgpu
except ImportError:
codewithgpu = deprecation.NotInstalled('codewithgpu')
from dragon.vm.dali.core.framework import context
from dragon.vm.dali.core.ops.builtin_ops import ExternalSource
class CGRecordReader(object):
"""Read examples from the CGRecord.
Examples:
```python
class MyPipeline(dali.Pipeline):
def __init__():
super(MyPipeline, self).__init__()
# Assume that we have the following files:
# /path/to/records/00000.data
# /path/to/records/00000.index
# /path/to/records/METADATA
self.reader = dali.ops.CGRecordReader(
path='/path/to/records'
features=('image', 'label'),
pipeline=self,
# Shuffle locally in the next ``initial_fill`` examples
# It turns to be weak with the decreasing of ``initial_fill``
# and disabled if ``initial_fill`` is set to **1**
random_shuffle=True, initial_fill=1024)
def iter_step(self):
self.reader.feed_inputs()
def define_graph(self):
inputs = self.reader()
```
"""
def __init__(
self,
path,
features,
pipeline,
shard_id=0,
num_shards=1,
random_shuffle=False,
initial_fill=1024,
**kwargs
):
"""Create a ``KPLRecordReader``.
Parameters
----------
path : str
The folder of record files.
features : Sequence[str], required
The name of features to extract.
pipeline : nvidia.dali.Pipeline, required
The pipeline to connect to.
shard_id : int, optional, default=0
The index of partition to read.
num_shards : int, optional, default=1
The total number of partitions over dataset.
random_shuffle : bool, optional, default=False
Whether to shuffle the data.
initial_fill : int, optional, default=1024
The length of sampling sequence for shuffle.
"""
self._pipe = pipeline
self._batch_size = pipeline.batch_size
self._prefetch_depth = pipeline._prefetch_queue_depth
self._buffer = mp.Queue(self._prefetch_depth * self._batch_size)
self._dataset_reader = codewithgpu.DatasetReader(
path=path, output_queue=self._buffer,
partition_idx=shard_id, num_partitions=num_shards,
shuffle=random_shuffle, initial_fill=initial_fill, **kwargs)
self._dataset_reader.start()
with context.device('cpu'):
self.features = dict((k, ExternalSource()) for k in features)
def cleanup():
self.terminate()
import atexit
atexit.register(cleanup)
def example_to_data(self, example):
"""Define the translation from example to array data.
Override this method to implement the translation.
"""
raise NotImplementedError
def feed_inputs(self):
"""Feed the data to edge references.
Call this method in the ``Pipeline.iter_setup(...)``.
"""
feed_dict = collections.defaultdict(list)
for i in range(self._pipe.batch_size):
data = self.example_to_data(self._buffer.get())
for k, v in data.items():
feed_dict[k].append(v)
for k, v in self.features.items():
self._pipe.feed_input(self.features[k], feed_dict[k])
def terminate(self):
"""Terminate the reader."""
self._dataset_reader.terminate()
self._dataset_reader.join()
def __call__(self, *args, **kwargs):
"""Create the edge references for features.
Call this method in the ``Pipeline.define_graph(...)``.
Returns
-------
Dict[str, _EdgeReference]
The feature reference dict.
"""
self.features = dict((k, v()) for k, v in self.features.items())
return self.features
tfrec = deprecation.NotInstalled('nvidia.dali')
class TFRecordReader(object):
......@@ -232,10 +101,14 @@ class TFRecordReader(object):
if meta_data_file is None:
raise FileNotFoundError('Excepted meta data file: %s' % meta_data_file)
with open(os.path.join(path, meta_data_file), 'r') as f:
features = f.read()
features = features.replace('tf.', 'tfrecord.')
features = features.replace('tf.io.', 'tfrecord.')
features = eval(features)
features = json.load(f)['features']
for k in list(features.keys()):
shape, dtype, default_value = features[k]
dtype = getattr(tfrec, 'string' if dtype == 'bytes' else dtype)
if shape is None:
features[k] = tfrec.VarLenFeature(dtype, default_value)
else:
features[k] = tfrec.FixedLenFeature(shape, dtype, default_value)
data_files.sort()
index_files.sort()
data = [os.path.join(path, e) for e in data_files]
......
......@@ -16,10 +16,10 @@ Requirements
pip install sphinx
```
- sphinx_seeta_theme
- sphinx-seeta-theme
```bash
pip install sphinx_seeta_theme
pip install sphinx-seeta-theme
```
- doxygen (C++ API only)
......
......@@ -2083,7 +2083,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED = DRAGON_API= USE_MPI USE_CUDA USE_CUDNN USE_NCCL
PREDEFINED = DRAGON_API= USE_MPI USE_CUDA USE_CUDNN USE_MPS USE_NCCL
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
......
......@@ -15,6 +15,9 @@ dragon/core
`class Graph <core/Graph.html>`_
: Graph to execute operators sequentially.
`class MPSContext <core/MPSContext.html>`_
: The mps device context.
`class Operator <core/Operator.html>`_
: The base operator class with context.
......@@ -22,10 +25,10 @@ dragon/core
: Class to record the schema of operator.
`class Tensor <core/Tensor.html>`_
: The base tensor class, manage memory or not.
: The base tensor class.
`class TypeMeta <core/TypeMeta.html>`_
: Metaclass for all types.
: The meta class for all types.
`class UnifiedMemory <core/UnifiedMemory.html>`_
: Memory to manage both the host and device data.
......@@ -39,6 +42,7 @@ dragon/core
core/CPUContext
core/CUDAContext
core/Graph
core/MPSContext
core/Operator
core/OpSchema
core/Tensor
......
......@@ -10,6 +10,29 @@ Constructors
.. doxygenfunction:: dragon::CPUContext::CPUContext(unsigned int random_seed)
.. doxygenfunction:: dragon::CPUContext::CPUContext(const DeviceOption &option)
Public Properties
-----------------
device
######
.. doxygenfunction:: dragon::CPUContext::device
rand_generator
##############
.. doxygenfunction:: dragon::CPUContext::rand_generator
stream
######
.. doxygenfunction:: dragon::CPUContext::stream
workspace
#########
.. doxygenfunction:: dragon::CPUContext::workspace
set_stream
##########
.. doxygenfunction:: dragon::CPUContext::set_stream
Public Functions
----------------
......@@ -49,26 +72,6 @@ SwitchToDevice
##############
.. doxygenfunction:: dragon::CPUContext::SwitchToDevice
device
######
.. doxygenfunction:: dragon::CPUContext::device
rand_generator
##############
.. doxygenfunction:: dragon::CPUContext::rand_generator
set_stream
##########
.. doxygenfunction:: dragon::CPUContext::set_stream
stream
######
.. doxygenfunction:: dragon::CPUContext::stream
workspace
#########
.. doxygenfunction:: dragon::CPUContext::workspace
.. raw:: html
<style>
......
......@@ -10,6 +10,65 @@ Constructors
.. doxygenfunction:: dragon::CUDAContext::CUDAContext(int device)
.. doxygenfunction:: dragon::CUDAContext::CUDAContext(const DeviceOption &option)
Public Properties
-----------------
cublas_handle
#############
.. doxygenfunction:: dragon::CUDAContext::cublas_handle
cuda_stream
###########
.. doxygenfunction:: dragon::CUDAContext::cuda_stream()
cuda_stream
###########
.. doxygenfunction:: dragon::CUDAContext::cuda_stream(int device, int stream)
cudnn_handle
############
.. doxygenfunction:: dragon::CUDAContext::cudnn_handle
curand_generator
################
.. doxygenfunction:: dragon::CUDAContext::curand_generator
current_device
##############
.. doxygenfunction:: dragon::CUDAContext::current_device
device
######
.. doxygenfunction:: dragon::CUDAContext::device
mutex
#####
.. doxygenfunction:: dragon::CUDAContext::mutex
objects
#######
.. doxygenfunction:: dragon::CUDAContext::objects
rand_generator
##############
.. doxygenfunction:: dragon::CUDAContext::rand_generator
stream
######
.. doxygenfunction:: dragon::CUDAContext::stream
workspace
#########
.. doxygenfunction:: dragon::CUDAContext::workspace()
workspace
#########
.. doxygenfunction:: dragon::CUDAContext::workspace(int device, int stream)
set_stream
##########
.. doxygenfunction:: dragon::CUDAContext::set_stream
Public Functions
----------------
......@@ -21,6 +80,10 @@ Delete
######
.. doxygenfunction:: dragon::CUDAContext::Delete
DeleteHost
##########
.. doxygenfunction:: dragon::CUDAContext::DeleteHost
FinishDeviceComputation
#######################
.. doxygenfunction:: dragon::CUDAContext::FinishDeviceComputation
......@@ -49,6 +112,10 @@ New
###
.. doxygenfunction:: dragon::CUDAContext::New
NewHost
#######
.. doxygenfunction:: dragon::CUDAContext::NewHost
SwitchToDevice
##############
.. doxygenfunction:: dragon::CUDAContext::SwitchToDevice
......@@ -57,50 +124,6 @@ SynchronizeStream
#################
.. doxygenfunction:: dragon::CUDAContext::SynchronizeStream
cublas_handle
#############
.. doxygenfunction:: dragon::CUDAContext::cublas_handle
cuda_stream
###########
.. doxygenfunction:: dragon::CUDAContext::cuda_stream()
cuda_stream
###########
.. doxygenfunction:: dragon::CUDAContext::cuda_stream(int device, int stream)
cudnn_handle
############
.. doxygenfunction:: dragon::CUDAContext::cudnn_handle
curand_generator
################
.. doxygenfunction:: dragon::CUDAContext::curand_generator
rand_generator
##############
.. doxygenfunction:: dragon::CUDAContext::rand_generator
device
######
.. doxygenfunction:: dragon::CUDAContext::device
set_stream
##########
.. doxygenfunction:: dragon::CUDAContext::set_stream
stream
######
.. doxygenfunction:: dragon::CUDAContext::stream
workspace
#########
.. doxygenfunction:: dragon::CUDAContext::workspace()
workspace
#########
.. doxygenfunction:: dragon::CUDAContext::workspace(int device, int stream)
.. raw:: html
<style>
......
......@@ -8,16 +8,8 @@ Constructors
.. doxygenfunction:: dragon::Graph::Graph(const GraphDef& def, Workspace* ws)
Public Functions
----------------
Create
######
.. doxygenfunction:: dragon::Graph::Create
Run
###
.. doxygenfunction:: dragon::Graph::Run
Public Properties
-----------------
arg
###
......@@ -31,14 +23,14 @@ def
###
.. doxygenfunction:: dragon::Graph::def
optimized_def
#############
.. doxygenfunction:: dragon::Graph::optimized_def
name
####
.. doxygenfunction:: dragon::Graph::name
optimized_def
#############
.. doxygenfunction:: dragon::Graph::optimized_def
phase
#####
.. doxygenfunction:: dragon::Graph::phase
......@@ -47,6 +39,17 @@ workspace
#########
.. doxygenfunction:: dragon::Graph::workspace
Public Functions
----------------
Create
######
.. doxygenfunction:: dragon::Graph::Create
Run
###
.. doxygenfunction:: dragon::Graph::Run
.. raw:: html
<style>
......
MPSContext
==========
.. doxygenclass:: dragon::MPSContext
Constructors
------------
.. doxygenfunction:: dragon::MPSContext::MPSContext()
.. doxygenfunction:: dragon::MPSContext::MPSContext(int device)
.. doxygenfunction:: dragon::MPSContext::MPSContext(const DeviceOption &option)
Public Properties
-----------------
current_device
##############
.. doxygenfunction:: dragon::MPSContext::current_device
device
######
.. doxygenfunction:: dragon::MPSContext::device
mps_stream
##########
.. doxygenfunction:: dragon::MPSContext::mps_stream
mutex
#####
.. doxygenfunction:: dragon::MPSContext::mutex
objects
#######
.. doxygenfunction:: dragon::MPSContext::objects
rand_generator
##############
.. doxygenfunction:: dragon::MPSContext::rand_generator
stream
######
.. doxygenfunction:: dragon::MPSContext::stream
workspace
#########
.. doxygenfunction:: dragon::MPSContext::workspace()
workspace
#########
.. doxygenfunction:: dragon::MPSContext::workspace(int device, int stream)
set_stream
##########
.. doxygenfunction:: dragon::MPSContext::set_stream
Public Functions
----------------
Delete
######
.. doxygenfunction:: dragon::MPSContext::Delete
FinishDeviceComputation
#######################
.. doxygenfunction:: dragon::MPSContext::FinishDeviceComputation
Memset
######
.. doxygenfunction:: dragon::MPSContext::Memset
MemsetAsync
###########
.. doxygenfunction:: dragon::MPSContext::MemsetAsync
Memcpy
######
.. doxygenfunction:: dragon::MPSContext::Memcpy(size_t n, void *dest, const void *src)
Memcpy
######
.. doxygenfunction:: dragon::MPSContext::Memcpy(size_t n, void *dest, const void *src, int device)
MemcpyAsync
###########
.. doxygenfunction:: dragon::MPSContext::MemcpyAsync
New
###
.. doxygenfunction:: dragon::MPSContext::New
NewShared
#########
.. doxygenfunction:: dragon::MPSContext::NewShared
NewSharedFromBytes
##################
.. doxygenfunction:: dragon::MPSContext::NewSharedFromBytes
NewSharedFromBuffer
###################
.. doxygenfunction:: dragon::MPSContext::NewSharedFromBuffer
SwitchToDevice
##############
.. doxygenfunction:: dragon::MPSContext::SwitchToDevice
SynchronizeStream
#################
.. doxygenfunction:: dragon::MPSContext::SynchronizeStream
.. raw:: html
<style>
h1:before {
content: "dragon::";
color: #103d3e;
}
</style>
......@@ -8,6 +8,45 @@ Constructors
.. doxygenfunction:: dragon::Operator::Operator(const OperatorDef &def, Workspace *ws)
Public Properties
-----------------
arg
###
.. doxygenfunction:: dragon::Operator::arg
args
####
.. doxygenfunction:: dragon::Operator::args
data_format
###########
.. doxygenfunction:: dragon::Operator::data_format
data_type
#########
.. doxygenfunction:: dragon::Operator::data_type
def
###
.. doxygenfunction:: dragon::Operator::def
name
####
.. doxygenfunction:: dragon::Operator::name
phase
#####
.. doxygenfunction:: dragon::Operator::phase
type
####
.. doxygenfunction:: dragon::Operator::type
workspace
#########
.. doxygenfunction:: dragon::Operator::workspace
Public Functions
----------------
......@@ -63,42 +102,6 @@ Run
###
.. doxygenfunction:: dragon::Operator::Run
arg
###
.. doxygenfunction:: dragon::Operator::arg
args
####
.. doxygenfunction:: dragon::Operator::args
data_format
###########
.. doxygenfunction:: dragon::Operator::data_format
data_type
#########
.. doxygenfunction:: dragon::Operator::data_type
def
###
.. doxygenfunction:: dragon::Operator::def
name
####
.. doxygenfunction:: dragon::Operator::name
type
####
.. doxygenfunction:: dragon::Operator::type
phase
#####
.. doxygenfunction:: dragon::Operator::phase
workspace
#########
.. doxygenfunction:: dragon::Operator::workspace
.. raw:: html
<style>
......
......@@ -12,48 +12,8 @@ Constructors
.. doxygenfunction:: dragon::Tensor::Tensor(const vec32_t &dims)
.. doxygenfunction:: dragon::Tensor::Tensor(const TypeMeta &meta)
Public Functions
----------------
CopyFrom
########
.. doxygenfunction:: dragon::Tensor::CopyFrom(Tensor &other, Context *ctx)
CopyFrom
########
.. doxygenfunction:: dragon::Tensor::CopyFrom(const vector<VectorType> &other)
CopyTo
######
.. doxygenfunction:: dragon::Tensor::CopyTo
DimString
#########
.. doxygenfunction:: dragon::Tensor::DimString() const
DimString
#########
.. doxygenfunction:: dragon::Tensor::DimString(const vector<int64_t> &dims)
IsType
######
.. doxygenfunction:: dragon::Tensor::IsType
MapFrom
#######
.. doxygenfunction:: dragon::Tensor::MapFrom
Reset
#####
.. doxygenfunction:: dragon::Tensor::Reset
Reshape
#######
.. doxygenfunction:: dragon::Tensor::Reshape
ReshapeLike
###########
.. doxygenfunction:: dragon::Tensor::ReshapeLike
Public Properties
-----------------
axis
####
......@@ -91,18 +51,6 @@ empty
#####
.. doxygenfunction:: dragon::Tensor::empty
has_memory
##########
.. doxygenfunction:: dragon::Tensor::has_memory
has_name
########
.. doxygenfunction:: dragon::Tensor::has_name
meta
####
.. doxygenfunction:: dragon::Tensor::meta
memory
######
.. doxygenfunction:: dragon::Tensor::memory
......@@ -111,6 +59,10 @@ memory_state
############
.. doxygenfunction:: dragon::Tensor::memory_state
meta
####
.. doxygenfunction:: dragon::Tensor::meta
mutable_data
############
.. doxygenfunction:: dragon::Tensor::mutable_data
......@@ -151,6 +103,57 @@ version
#######
.. doxygenfunction:: dragon::Tensor::version
has_memory
##########
.. doxygenfunction:: dragon::Tensor::has_memory
has_name
########
.. doxygenfunction:: dragon::Tensor::has_name
Public Functions
----------------
CopyFrom
########
.. doxygenfunction:: dragon::Tensor::CopyFrom(Tensor &other, Context *ctx)
CopyFrom
########
.. doxygenfunction:: dragon::Tensor::CopyFrom(const vector<VectorType> &other)
CopyTo
######
.. doxygenfunction:: dragon::Tensor::CopyTo
DimString
#########
.. doxygenfunction:: dragon::Tensor::DimString() const
DimString
#########
.. doxygenfunction:: dragon::Tensor::DimString(const vector<int64_t> &dims)
IsType
######
.. doxygenfunction:: dragon::Tensor::IsType
MapFrom
#######
.. doxygenfunction:: dragon::Tensor::MapFrom
Reset
#####
.. doxygenfunction:: dragon::Tensor::Reset
Reshape
#######
.. doxygenfunction:: dragon::Tensor::Reshape
ReshapeLike
###########
.. doxygenfunction:: dragon::Tensor::ReshapeLike
.. raw:: html
<style>
......
......@@ -9,6 +9,29 @@ Constructors
.. doxygenfunction:: dragon::TypeMeta::TypeMeta()
.. doxygenfunction:: dragon::TypeMeta::TypeMeta(const TypeMeta &src)
Public Properties
-----------------
copy
####
.. doxygenfunction:: dragon::TypeMeta::copy
ctor
####
.. doxygenfunction:: dragon::TypeMeta::ctor
dtor
####
.. doxygenfunction:: dragon::TypeMeta::dtor
id
##
.. doxygenfunction:: dragon::TypeMeta::id
itemsize
########
.. doxygenfunction:: dragon::TypeMeta::itemsize
Public Functions
----------------
......@@ -40,26 +63,6 @@ Match
#####
.. doxygenfunction:: dragon::TypeMeta::Match
copy
####
.. doxygenfunction:: dragon::TypeMeta::copy
ctor
####
.. doxygenfunction:: dragon::TypeMeta::ctor
dtor
####
.. doxygenfunction:: dragon::TypeMeta::dtor
id
##
.. doxygenfunction:: dragon::TypeMeta::id
itemsize
########
.. doxygenfunction:: dragon::TypeMeta::itemsize
.. raw:: html
<style>
......
......@@ -16,20 +16,8 @@ State
#####
.. doxygenenum:: dragon::UnifiedMemory::State
Public Functions
----------------
SwitchToCUDADevice
##################
.. doxygenfunction:: dragon::UnifiedMemory::SwitchToCUDADevice
ToCPU
#####
.. doxygenfunction:: dragon::UnifiedMemory::ToCPU
ToCUDA
######
.. doxygenfunction:: dragon::UnifiedMemory::ToCUDA
Public Properties
-----------------
cpu_data
########
......@@ -47,6 +35,26 @@ info
####
.. doxygenfunction:: dragon::UnifiedMemory::info
mps_data
########
.. doxygenfunction:: dragon::UnifiedMemory::mps_data
order
#####
.. doxygenfunction:: dragon::UnifiedMemory::order
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size() const
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size(const string &device_type, int device_id) const
state
#####
.. doxygenfunction:: dragon::UnifiedMemory::state
mutable_cpu_data
################
.. doxygenfunction:: dragon::UnifiedMemory::mutable_cpu_data
......@@ -63,17 +71,32 @@ set_cuda_data
#############
.. doxygenfunction:: dragon::UnifiedMemory::set_cuda_data
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size() const
set_order
#########
.. doxygenfunction:: dragon::UnifiedMemory::set_order
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size(const string &device_type, int device_id) const
Public Functions
----------------
state
SwitchToCUDADevice
##################
.. doxygenfunction:: dragon::UnifiedMemory::SwitchToCUDADevice
SwitchToMPSDevice
##################
.. doxygenfunction:: dragon::UnifiedMemory::SwitchToMPSDevice
ToCPU
#####
.. doxygenfunction:: dragon::UnifiedMemory::state
.. doxygenfunction:: dragon::UnifiedMemory::ToCPU
ToCUDA
######
.. doxygenfunction:: dragon::UnifiedMemory::ToCUDA
ToMPS
#####
.. doxygenfunction:: dragon::UnifiedMemory::ToMPS
.. raw:: html
......
......@@ -8,6 +8,29 @@ Constructors
.. doxygenfunction:: dragon::Workspace::Workspace(const string &name)
Public Properties
-----------------
data
####
.. doxygenfunction:: dragon::Workspace::data(size_t size, const string &name = "BufferShared")
data
####
.. doxygenfunction:: dragon::Workspace::data(int64_t size, const string &name = "BufferShared")
graphs
######
.. doxygenfunction:: dragon::Workspace::graphs
name
####
.. doxygenfunction:: dragon::Workspace::name
tensors
#######
.. doxygenfunction:: dragon::Workspace::tensors
Public Functions
----------------
......@@ -55,26 +78,6 @@ UniqueName
##########
.. doxygenfunction:: dragon::Workspace::UniqueName
data
####
.. doxygenfunction:: dragon::Workspace::data(size_t size, const string &name = "BufferShared")
data
####
.. doxygenfunction:: dragon::Workspace::data(int64_t size, const string &name = "BufferShared")
graphs
######
.. doxygenfunction:: dragon::Workspace::graphs
name
####
.. doxygenfunction:: dragon::Workspace::name
tensors
#######
.. doxygenfunction:: dragon::Workspace::tensors
.. raw:: html
<style>
......
......@@ -21,9 +21,6 @@ vm.dali.ops
`class Cast <ops/Cast.html>`_
: Cast the data type of input.
`class CGRecordReader <ops/CGRecordReader.html>`_
: Read examples from the cg-record file.
`class CoinFlip <ops/CoinFlip.html>`_
: Sample values from a bernoulli distribution.
......@@ -101,7 +98,6 @@ vm.dali.ops
ops/Brightness
ops/BrightnessContrast
ops/Cast
ops/CGRecordReader
ops/CoinFlip
ops/ColorSpaceConversion
ops/ColorTwist
......
CGRecordReader
===============
.. autoclass:: dragon.vm.dali.ops.CGRecordReader
__init__
--------
.. automethod:: dragon.vm.dali.ops.CGRecordReader.__init__
Methods
-------
example_to_data
###############
.. automethod:: dragon.vm.dali.ops.CGRecordReader.example_to_data
feed_inputs
###########
.. automethod:: dragon.vm.dali.ops.CGRecordReader.feed_inputs
__call__
########
.. automethod:: dragon.vm.dali.ops.CGRecordReader.__call__
.. raw:: html
<style>
h1:before {
content: "dali.ops.";
color: #103d3e;
}
</style>
......@@ -18,6 +18,9 @@ dragon.cuda
`get_device_capability(...) <cuda/get_device_capability.html>`_
: Return the capability of specified device.
`get_device_name(...) <cuda/get_device_name.html>`_
: Return the name of specified device.
`is_available(...) <cuda/is_available.html>`_
: Return a bool reporting if runtime is available.
......@@ -45,6 +48,7 @@ dragon.cuda
cuda/Stream
cuda/current_device
cuda/get_device_capability
cuda/get_device_name
cuda/is_available
cuda/memory_allocated
cuda/set_cublas_flags
......
get_device_name
===============
.. autofunction:: dragon.cuda.get_device_name
.. raw:: html
<style>
h1:before {
content: "dragon.cuda.";
color: #103d3e;
}
</style>
......@@ -12,6 +12,9 @@ vm.torch.cuda
`get_device_capability(...) <cuda/get_device_capability.html>`_
: Return the capability of specified device.
`get_device_name(...) <cuda/get_device_name.html>`_
: Return the name of specified device.
`is_available(...) <cuda/is_available.html>`_
: Return a bool reporting if runtime is available.
......@@ -26,6 +29,7 @@ vm.torch.cuda
cuda/current_device
cuda/get_device_capability
cuda/get_device_name
cuda/is_available
cuda/set_device
cuda/synchronize
......
get_device_name
===============
.. autofunction:: dragon.vm.torch.cuda.get_device_name
.. raw:: html
<style>
h1:before {
content: "torch.cuda.";
color: #103d3e;
}
</style>
......@@ -32,7 +32,7 @@ struct DRAGON_API TypeRegister {
};
/*!
* \brief Metaclass for all types.
* \brief The meta class for all types.
*
* TypeMeta is commonly used for type identification:
*
......
......@@ -7,16 +7,16 @@ namespace kernels {
namespace {
template <typename T, typename AccT>
template <typename T>
__global__ void
_BiasAdd(const int NxC, const int C, const T* x, const T* bias, T* y) {
const math::PlusFunctor<T> functor;
CUDA_1D_KERNEL_LOOP(i, NxC) {
y[i] = convert::To<T>(
convert::To<AccT>(x[i]) + convert::To<AccT>(__ldg(bias + i % C)));
y[i] = functor(x[i], __ldg(bias + i % C));
}
}
template <typename T, typename AccT>
template <typename T>
__global__ void _BiasAdd(
const int NxCxS,
const int S,
......@@ -24,43 +24,41 @@ __global__ void _BiasAdd(
const T* x,
const T* bias,
T* y) {
const math::PlusFunctor<T> functor;
CUDA_1D_KERNEL_LOOP(i, NxCxS) {
y[i] = convert::To<T>(
convert::To<AccT>(x[i]) + convert::To<AccT>(__ldg(bias + (i / S) % C)));
y[i] = functor(x[i], __ldg(bias + i / S % C));
}
}
} // namespace
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void BiasAdd<T, CUDAContext>( \
const int N, \
const int S, \
const int C, \
const T* x, \
const T* bias, \
T* y, \
CUDAContext* ctx) { \
const auto NxCxS = N * C * S; \
if (S == 1) { \
_BiasAdd<math::ScalarType<T>::type, math::AccumulatorType<T>::type> \
<<<CUDA_BLOCKS(NxCxS), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
NxCxS, \
C, \
reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<const math::ScalarType<T>::type*>(bias), \
reinterpret_cast<math::ScalarType<T>::type*>(y)); \
} else { \
_BiasAdd<math::ScalarType<T>::type, math::AccumulatorType<T>::type> \
<<<CUDA_BLOCKS(NxCxS), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
NxCxS, \
S, \
C, \
reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<const math::ScalarType<T>::type*>(bias), \
reinterpret_cast<math::ScalarType<T>::type*>(y)); \
} \
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void BiasAdd<T, CUDAContext>( \
const int N, \
const int S, \
const int C, \
const T* x, \
const T* bias, \
T* y, \
CUDAContext* ctx) { \
const auto NxCxS = N * C * S; \
if (S == 1) { \
_BiasAdd<<<CUDA_BLOCKS(NxCxS), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
NxCxS, \
C, \
reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<const math::ScalarType<T>::type*>(bias), \
reinterpret_cast<math::ScalarType<T>::type*>(y)); \
} else { \
_BiasAdd<<<CUDA_BLOCKS(NxCxS), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
NxCxS, \
S, \
C, \
reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<const math::ScalarType<T>::type*>(bias), \
reinterpret_cast<math::ScalarType<T>::type*>(y)); \
} \
}
DEFINE_KERNEL_LAUNCHER(uint8_t);
......
#include "dragon/kernels/vision/op_kernels.h"
namespace dragon {
namespace kernels {
namespace {
const static string METAL_SHADERS = R"(
#include <metal_stdlib>
using namespace metal;
constant uint uint_arg1 [[function_constant(0)]]; // C
constant uint uint_arg2 [[function_constant(1)]]; // S
template <typename T>
kernel void BiasAdd(
device const T* x,
device const T* bias,
device T* y,
const uint index [[thread_position_in_grid]]) {
y[index] = x[index] + bias[index % uint_arg1];
}
template <typename T>
kernel void SpatialBiasAdd(
device const T* x,
device const T* bias,
device T* y,
const uint index [[thread_position_in_grid]]) {
y[index] = x[index] + bias[index / uint_arg2 % uint_arg1];
}
#define INSTANTIATE_KERNEL(name, T) \
template [[host_name(#name"_"#T)]] \
kernel void name(device const T*, device const T*, device T*, uint);
INSTANTIATE_KERNEL(BiasAdd, half);
INSTANTIATE_KERNEL(BiasAdd, float);
INSTANTIATE_KERNEL(SpatialBiasAdd, half);
INSTANTIATE_KERNEL(SpatialBiasAdd, float);
#if defined(__HAVE_NATIVE_DOUBLE__)
INSTANTIATE_KERNEL(BiasAdd, double);
INSTANTIATE_KERNEL(SpatialBiasAdd, double);
#endif // defined(__HAVE_NATIVE_DOUBLE__)
#undef INSTANTIATE_KERNEL
)";
} // namespace
#define DEFINE_KERNEL_LAUNCHER(T) \
template <> \
void BiasAdd<T, MPSContext>( \
const int N, \
const int S, \
const int C, \
const T* x, \
const T* bias, \
T* y, \
MPSContext* ctx) { \
const uint arg1 = C, arg2 = S; \
auto kernel = MPSKernel::TypedString<T>("BiasAdd"); \
vector<MPSConstant> args({MPSConstant(&arg1, MTLDataTypeUInt, 0)}); \
MTLComputePipelineState_t pso = nil; \
if (S == 1) { \
pso = MPSKernel(kernel, METAL_SHADERS).GetState(ctx, args); \
} else { \
args.emplace_back(MPSConstant(&arg2, MTLDataTypeUInt, 1)); \
pso = MPSKernel("Spatial" + kernel, METAL_SHADERS).GetState(ctx, args); \
} \
auto* command_buffer = ctx->mps_stream()->command_buffer(); \
auto* encoder = [command_buffer computeCommandEncoder]; \
[encoder setComputePipelineState:pso]; \
[encoder setBuffer:id<MTLBuffer>(x) offset:0 atIndex:0]; \
[encoder setBuffer:id<MTLBuffer>(bias) offset:0 atIndex:1]; \
[encoder setBuffer:id<MTLBuffer>(y) offset:0 atIndex:2]; \
MPSDispatchThreads((N * C * S), encoder, pso); \
[encoder endEncoding]; \
[encoder release]; \
}
DEFINE_KERNEL_LAUNCHER(uint8_t);
DEFINE_KERNEL_LAUNCHER(int8_t);
DEFINE_KERNEL_LAUNCHER(int);
DEFINE_KERNEL_LAUNCHER(int64_t);
DEFINE_KERNEL_LAUNCHER(float16);
DEFINE_KERNEL_LAUNCHER(float);
DEFINE_KERNEL_LAUNCHER(double);
#undef DEFINE_KERNEL_LAUNCHER
} // namespace kernels
} // namespace dragon
......@@ -10,11 +10,11 @@ const static string METAL_SHADERS = R"(
#include <metal_stdlib>
using namespace metal;
constant int int_arg1 [[function_constant(0)]]; // C
constant int int_arg2 [[function_constant(1)]]; // H
constant int int_arg3 [[function_constant(2)]]; // W
constant int int_arg4 [[function_constant(3)]]; // out_h
constant int int_arg5 [[function_constant(4)]]; // out_w
constant int int_arg1 [[function_constant(0)]]; // C
constant int int_arg2 [[function_constant(1)]]; // H
constant int int_arg3 [[function_constant(2)]]; // W
constant int int_arg4 [[function_constant(3)]]; // out_h
constant int int_arg5 [[function_constant(4)]]; // out_w
constant float float_arg1 [[function_constant(5)]]; // spatial_scale
constant int int_arg6 [[function_constant(6)]]; // sampling_ratio
constant bool bool_arg1 [[function_constant(7)]]; // aligned
......
......@@ -31,11 +31,6 @@ void BiasAddOp<Context>::DoRunWithType() {
}
template <class Context>
void BiasAddOp<Context>::RunOnDevice() {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
template <class Context>
template <typename T>
void BiasAddGradientOp<Context>::DoRunWithType() {
auto &dY = Input(0), *dX = Output(0), *dB = Output(1);
......@@ -67,20 +62,16 @@ void BiasAddGradientOp<Context>::DoRunWithType() {
}
}
template <class Context>
void BiasAddGradientOp<Context>::RunOnDevice() {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
DEPLOY_CPU_OPERATOR(BiasAdd);
#ifdef USE_CUDA
DEPLOY_CUDA_OPERATOR(BiasAdd);
#endif
DEPLOY_CPU_OPERATOR(BiasAddGradient);
#ifdef USE_CUDA
DEPLOY_CUDA_OPERATOR(BiasAdd);
DEPLOY_CUDA_OPERATOR(BiasAddGradient);
#endif
#ifdef USE_MPS
DEPLOY_MPS_OPERATOR(BiasAdd, BiasAdd);
DEPLOY_MPS_OPERATOR(BiasAddGradient, BiasAddGradient);
#endif
OPERATOR_SCHEMA(BiasAdd)
/* X, B */
......
......@@ -23,7 +23,9 @@ class BiasAddOp final : public Operator<Context> {
SIMPLE_CTOR_DTOR(BiasAddOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
void RunOnDevice() override {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
template <typename T>
void DoRunWithType();
......@@ -35,7 +37,9 @@ class BiasAddGradientOp final : public Operator<Context> {
SIMPLE_CTOR_DTOR(BiasAddGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
void RunOnDevice() override {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
template <typename T>
void DoRunWithType();
......@@ -58,7 +62,9 @@ class CuDNNBiasAddGradientOp final : public Operator<Context> {
CuDNNDestroyTensorDesc(&input_desc_);
}
void RunOnDevice() override;
void RunOnDevice() override {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
template <typename T>
void DoRunWithType();
......
......@@ -39,11 +39,6 @@ void CuDNNBiasAddGradientOp<Context>::DoRunWithType() {
}
}
template <class Context>
void CuDNNBiasAddGradientOp<Context>::RunOnDevice() {
DispatchHelper<dtypes::Floating>::Call(this, Input(0));
}
DEPLOY_CUDNN_OPERATOR(BiasAddGradient);
} // namespace dragon
......
......@@ -22,6 +22,7 @@ try:
import onnx
except ImportError:
onnx = None
from packaging.version import parse as version_parse
from dragon.core.autograph import context as eager_context
from dragon.core.autograph.graph_lib import GraphLib
......@@ -50,6 +51,9 @@ class DragonFrontend(object):
(12, '1.7.0'),
(13, '1.8.0'),
(14, '1.9.0'),
(15, '1.10.0'),
(16, '1.11.0'),
(17, '1.12.0'),
])
@classmethod
......@@ -254,9 +258,9 @@ class DragonFrontend(object):
detail_msg += ' * Opset = %d, ONNX >= %s,\n' % (k, v)
raise ValueError(detail_msg + '}')
onnx_version = cls.OPSET_VERSIONS[opset_version]
if onnx.__version__ < onnx_version:
if version_parse(onnx.__version__) < version_parse(onnx_version):
raise RuntimeError(
'OpSet {} requires ONNX version >= {}. '
'OpSet {} requires ONNX version >= {} '
'({} currently installed.)'
.format(opset_version, onnx_version, onnx.__version__))
return opset_version
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Python setup script."""
from __future__ import absolute_import
from __future__ import division
......@@ -81,7 +82,6 @@ class BuildPyCommand(setuptools.command.build_py.build_py):
"""Enhanced 'build_py' command."""
def build_packages(self):
clean_builds()
shutil.copytree('dragon/python', self.build_lib + '/dragon')
shutil.copytree('dali', self.build_lib + '/dragon/vm/dali')
shutil.copytree('keras', self.build_lib + '/dragon/vm/keras')
......
......@@ -3988,6 +3988,11 @@ class TestVisionOps(OpTestCase):
with dragon.device('cuda'):
self.test_bias_add()
@unittest.skipIf(not TEST_MPS, 'MPS unavailable')
def test_bias_add_mps(self):
with dragon.device('mps'):
self.test_bias_add()
def test_conv1d(self, prec=1e-3, test_nhwc=True):
entries = [((2, 2, 2), (3, 2, 1), (3,), 1, 1, 0, 1, 1, 'NCHW'),
((2, 2, 2), (3, 2, 3), (3,), 3, 1, 1, 1, 1, 'NCHW'),
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!