|
dnnl_status_t DNNL_API | dnnl_primitive_attr_create (dnnl_primitive_attr_t *attr) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_clone (dnnl_primitive_attr_t *attr, const_dnnl_primitive_attr_t existing_attr) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_destroy (dnnl_primitive_attr_t attr) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_get_scratchpad_mode (const_dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t *mode) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_scratchpad_mode (dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t mode) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_get_output_scales (const_dnnl_primitive_attr_t attr, dnnl_dim_t *count, int *mask, const float **scales) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_output_scales (dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float *scales) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_get_scales (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t *count, int *mask, const float **scales) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_scales (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const float *scales) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_get_zero_points (const_dnnl_primitive_attr_t attr, int arg, dnnl_dim_t *count, int *mask, const int32_t **zero_points) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_zero_points (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const int32_t *zero_points) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_get_post_ops (const_dnnl_primitive_attr_t attr, const_dnnl_post_ops_t *post_ops) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_post_ops (dnnl_primitive_attr_t attr, const_dnnl_post_ops_t post_ops) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_create (dnnl_post_ops_t *post_ops) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_destroy (dnnl_post_ops_t post_ops) |
|
int DNNL_API | dnnl_post_ops_len (const_dnnl_post_ops_t post_ops) |
|
dnnl_primitive_kind_t DNNL_API | dnnl_post_ops_get_kind (const_dnnl_post_ops_t post_ops, int index) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_append_sum (dnnl_post_ops_t post_ops, float scale) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_append_sum_v2 (dnnl_post_ops_t post_ops, float scale, dnnl_data_type_t data_type) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_get_params_sum (const_dnnl_post_ops_t post_ops, int index, float *scale) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_get_params_sum_v2 (const_dnnl_post_ops_t post_ops, int index, float *scale, dnnl_data_type_t *data_type) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_append_eltwise (dnnl_post_ops_t post_ops, float scale, dnnl_alg_kind_t alg_kind, float alpha, float beta) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_get_params_eltwise (const_dnnl_post_ops_t post_ops, int index, float *scale, dnnl_alg_kind_t *alg_kind, float *alpha, float *beta) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_append_dw_k3s1p1 (dnnl_post_ops_t post_ops, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_get_params_dw_k3s1p1 (const_dnnl_post_ops_t post_ops, int index, dnnl_data_type_t *weights_data_type, dnnl_data_type_t *bias_data_type, dnnl_data_type_t *dst_data_type, dnnl_dim_t *count, int *mask, const float **scales) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_append_dw_k3s2p1 (dnnl_post_ops_t post_ops, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales) |
|
dnnl_status_t DNNL_API | dnnl_post_ops_get_params_dw_k3s2p1 (const_dnnl_post_ops_t post_ops, int index, dnnl_data_type_t *weights_data_type, dnnl_data_type_t *bias_data_type, dnnl_data_type_t *dst_data_type, dnnl_dim_t *count, int *mask, const float **scales) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_rnn_data_qparams (dnnl_primitive_attr_t attr, const float scale, const float shift) |
|
dnnl_status_t DNNL_API | dnnl_primitive_attr_set_rnn_weights_qparams (dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float *scales) |
|
Appends a depthwise post-op convolution with stride 1.
This post-op can only be fused with a 2D 1x1 convolution (convolution with weights spatial dimension equal to 1 i.e., kh=kw=1).
The kind of this post-op is dnnl_convolution.
The number of outputs for primitive remain same as before. The output size remain same as the original primitive due to stride=1.
The Post-op can be defined as:
dst[:] <- scales * (conv_dw(conv_1x1))
See dev_guide_attributes_post_ops_depthwise and dev_guide_attributes_post_ops_depthwise_fusion for more info.
- Parameters
-
post_ops | Post-ops. |
weights_data_type | Weights data type of depthwise post-op |
bias_data_type | Bias data type of depthwise post-op |
dst_data_type | Output data type of depthwise post-op |
count | Output length of the array of scaling factors scales . |
mask | Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the scales array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor. |
scales | Output pointer to a constant array of float scaling factors. |
- Returns
- dnnl_success on success and a status describing the error otherwise
Appends a depthwise post-op convolution with stride 2.
This post-op can only be fused with a 2D 1x1 convolution (convolution with weights spatial dimension equal to 1 i.e., kh=kw=1).
The kind of this post-op is dnnl_convolution.
The number of outputs for primitive remain same as before. The output spatial size can be derived as below:
output_height = ceil(output_height_1x1_convolution, stride) output_width = ceil(output_width_1x1_convolution, stride)
The Post-op can be defined as:
dst[:] <- scales * (conv_dw(conv_1x1))
See dev_guide_attributes_post_ops_depthwise and dev_guide_attributes_post_ops_depthwise_fusion for more info.
- Parameters
-
post_ops | Post-ops. |
weights_data_type | Weights data type of depthwise post-op |
bias_data_type | Bias data type of depthwise post-op |
dst_data_type | Output data type of depthwise post-op |
count | Output length of the array of scaling factors scales . |
mask | Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the scales array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor. |
scales | Output pointer to a constant array of float scaling factors. |
- Returns
- dnnl_success on success and a status describing the error otherwise
Appends an elementwise post-op.
The kind of this post operation is dnnl_eltwise.
In the simplest case when the elementwise is the only post operation, the computations would be:
dst[:] <- scale * eltwise_op (op(...)) // instead of dst[:] <- op(...)
where eltwise_op is configured with the given parameters.
- Parameters
-
post_ops | Post-ops. |
scale | Scaling factor. |
alg_kind | Elementwise algorithm for the post-op. |
alpha | Alpha parameter for the elementwise algorithm. |
beta | Beta parameter for the elementwise algorithm. |
- Returns
- dnnl_success on success and a status describing the error otherwise.
Appends an accumulation (sum) to post-ops. Prior to accumulating the result, the previous value is multiplied by a scale.
The kind of this post-op is dnnl_sum.
This feature may improve performance for cases like residual learning blocks, where the result of convolution is accumulated to the previously computed activations. The parameter scale
may be used for the integer-based computations when the result and previous activations have different logical scaling factors.
In the simplest case when the accumulation is the only post-op, the computations would be:
dst[:] <- scale * dst[:] + op(...) // instead of dst[:] <- op(...)
- Note
- This post-op executes in-place and does not change the destination layout.
- Parameters
-
post_ops | Post-ops. |
scale | Accumulation scaling factor. |
- Returns
- dnnl_success on success and a status describing the error otherwise.
Appends an accumulation v2 (sum) to post-ops. Prior to accumulating the result, the previous value is multiplied by a scale.
The kind of this post-op is dnnl_sum.
This feature may improve performance for cases like residual learning blocks, where the result of convolution is accumulated to the previously computed activations. The parameter scale
may be used for the integer-based computations when the result and previous activations have different logical scaling factors.
In the simplest case when the accumulation is the only post-op, the computations would be:
dst[:] <- scale * dst[:] + op(...) // instead of dst[:] <- op(...)
If data_type
is specified, original dst tensor will be reinterpreted as a tensor with provided data type. Since it is reinterpretation, data_type and dst data type should have same size. As a result, computations would be:
dst[:] <- scale * as_data_type(dst[:]) + op(...)
// instead of dst[:] <- op(...)
- Note
- This post-op executes in-place and does not change the destination layout.
- Parameters
-
post_ops | Post-ops. |
scale | Accumulation scaling factor. |
data_type | Accumulation data_type. |
- Returns
- dnnl_success on success and a status describing the error otherwise.
Set quantization scale and shift parameters for RNN data tensors.
For performance reasons, the low-precision configuration of the RNN primitives expects input activations to have the unsigned 8-bit integer data type. The scale and shift parameters are used to quantize floating-point data to unsigned integer and must be passed to the RNN primitive using attributes.
The quantization formula is scale * (data + shift)
.
- Note
- Quantization scale and shift are common for src_layer, src_iter, dst_iter, and dst_layer.
Example usage:
2 int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
3 // Activations quantization parameters
4 float scale = ..., shift = ..;
6 dnnl_primitive_attr_t rnn_attr;
7 // Create default attributes
8 dnnl_primitive_attr_create(&rnn_attr);
10 // Set scale and shift for int8 quantization of activation
11 dnnl_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
13 // Create and configure rnn op_desc
14 dnnl_rnn_desc_t rnn_d;
15 dnnl_primitive_desc_t rnn_pd;
16 dnnl_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);
- Parameters
-
attr | Primitive attributes. |
scale | The value to scale the data by. |
shift | The value to shift the data by. |
- Returns
- dnnl_success on success and a status describing the error otherwise.