3 Commits

55 changed files with 16700 additions and 824 deletions
Split View
  1. +1
    -1
      README.md
  2. +3
    -2
      mindnlp/__init__.py
  3. +4
    -1
      mindnlp/core/_C/__init__.py
  4. +0
    -0
      mindnlp/core/_apis/__init__.py
  5. +1223
    -0
      mindnlp/core/_apis/cpu.py
  6. +1123
    -0
      mindnlp/core/_apis/gpu.py
  7. +379
    -0
      mindnlp/core/_apis/meta.py
  8. +1596
    -0
      mindnlp/core/_apis/npu.py
  9. +10
    -2
      mindnlp/core/_dtype.py
  10. +3
    -0
      mindnlp/core/_jit_internal.py
  11. +0
    -0
      mindnlp/core/_op_prim/__init__.py
  12. +0
    -0
      mindnlp/core/_op_prim/ascend/__init__.py
  13. +3511
    -0
      mindnlp/core/_op_prim/ascend/legacy.py
  14. +877
    -0
      mindnlp/core/_op_prim/ascend/pyboost.py
  15. +0
    -0
      mindnlp/core/_op_prim/cpu/__init__.py
  16. +3511
    -0
      mindnlp/core/_op_prim/cpu/legacy.py
  17. +0
    -0
      mindnlp/core/_op_prim/gpu/__init__.py
  18. +3511
    -0
      mindnlp/core/_op_prim/gpu/legacy.py
  19. +2
    -0
      mindnlp/core/_prims/ascend/__init__.py
  20. +82
    -0
      mindnlp/core/_prims/ascend/aclop.py
  21. +0
    -0
      mindnlp/core/_prims/ascend/ascend.py
  22. +26
    -0
      mindnlp/core/_prims/ascend/pyboost.py
  23. +0
    -0
      mindnlp/core/_prims/ascend_310b/__init__.py
  24. +0
    -0
      mindnlp/core/_prims/ascend_310b/ascend_310b.py
  25. +0
    -211
      mindnlp/core/_prims/cpu.py
  26. +2
    -0
      mindnlp/core/_prims/cpu/__init__.py
  27. +155
    -0
      mindnlp/core/_prims/cpu/ms.py
  28. +0
    -0
      mindnlp/core/_prims/cpu/numpy.py
  29. +38
    -18
      mindnlp/core/_tensor.py
  30. +13
    -3
      mindnlp/core/configs.py
  31. +0
    -0
      mindnlp/core/cpu/__init__.py
  32. +10
    -1
      mindnlp/core/cuda/__init__.py
  33. +11
    -123
      mindnlp/core/dispatcher.py
  34. +0
    -3
      mindnlp/core/executor.py
  35. +79
    -96
      mindnlp/core/nn/functional.py
  36. +0
    -1
      mindnlp/core/nn/init.py
  37. +5
    -5
      mindnlp/core/nn/modules/adaptive.py
  38. +5
    -4
      mindnlp/core/nn/modules/rnn.py
  39. +1
    -1
      mindnlp/core/nn/utils/parametrize.py
  40. +3
    -1
      mindnlp/core/npu/__init__.py
  41. +123
    -100
      mindnlp/core/ops/array.py
  42. +2
    -2
      mindnlp/core/ops/blas.py
  43. +14
    -35
      mindnlp/core/ops/comparison.py
  44. +16
    -26
      mindnlp/core/ops/creation.py
  45. +19
    -12
      mindnlp/core/ops/inplace.py
  46. +169
    -77
      mindnlp/core/ops/other.py
  47. +21
    -20
      mindnlp/core/ops/pointwise.py
  48. +33
    -58
      mindnlp/core/ops/random.py
  49. +19
    -17
      mindnlp/core/ops/reduction.py
  50. +1
    -1
      mindnlp/core/random.py
  51. +3
    -0
      mindnlp/transformers/__init__.py
  52. +0
    -1
      mindnlp/transformers/masking_utils.py
  53. +4
    -2
      mindnlp/utils/safetensors_patch.py
  54. +0
    -0
      tools/__init__.py
  55. +92
    -0
      tools/op_auto_gen.py

+ 1
- 1
README.md View File

@@ -70,7 +70,7 @@
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

inputs = tokenizer("Hello world!")
inputs = tokenizer("Hello world!", return_tensors='ms')
outputs = model(**inputs)
```



+ 3
- 2
mindnlp/__init__.py View File

@@ -18,7 +18,6 @@ MindNLP library.
"""
import os
import platform
from packaging import version

# huggingface env
if os.environ.get('HF_ENDPOINT', None) is None:
@@ -29,13 +28,15 @@ if 'RANK_TABLE_FILE' in os.environ:
del os.environ['RANK_TABLE_FILE']

import mindspore
from mindspore import context
from mindspore._c_expression import MSContext # pylint: disable=no-name-in-module, import-error
try:
from mindspore._c_expression import disable_multi_thread
except:
disable_multi_thread = None

if os.environ.get('DEVICE_TARGET', None) is not None:
mindspore.set_device(os.environ.get('DEVICE_TARGET'))

# for different ascend devices
if platform.system().lower() == 'linux':
SOC = MSContext.get_instance().get_ascend_soc_version()


+ 4
- 1
mindnlp/core/_C/__init__.py View File

@@ -198,7 +198,10 @@ class Generator:
Returns:
Current seed and offset.
"""
return self._generator(STEP, (self._seed, self._offset, step,))[:2]
outs = self._generator(STEP, (self._seed, self._offset, step,))[:2]
for o in outs:
o._device = self.device
return outs
default_generator = Generator()


+ 0
- 0
mindnlp/core/_apis/__init__.py View File


+ 1223
- 0
mindnlp/core/_apis/cpu.py View File

@@ -0,0 +1,1223 @@
import ctypes
import numbers
import math
import numpy as np
import mindspore
from mindspore._c_expression import _empty_instance
from mindnlp import core
from .._op_prim.cpu import legacy

def empty(*args, **kwargs):
return _empty_instance(*args, **kwargs, device='CPU')

def inplace_normal(input, mean, std, generator_):
out = np.random.normal(mean, std, input.shape).astype(core.dtype2np[input.dtype])
numpy_to_tensor_overwrite(out, input)

return input

def select_ext_view(input, dim, index):
return legacy.select_view(input, index, dim)

def inplace_copy(input, value):
if value.shape != input.shape:
value = legacy.fill_v2(input.shape, value)
# inplace_copy(input, value)
# t2t_overwrite(input, value)
# legacy.assign(input, value)
if hasattr(input, '_base'):
input._base.assign_value(value)
input.assign_value(value)
return input

def fill_scalar(size, fill_value, dtype):
if dtype is None:
return legacy.fill_v2(size, mindspore.Tensor(fill_value))
return legacy.cast(legacy.fill_v2(size, mindspore.Tensor(fill_value)), dtype)

def fill_tensor(size, fill_value, dtype):
return legacy.cast(legacy.fill_v2(size, fill_value), dtype)


def inplace_fill_scalar(input, value):
out = np.full_like(input.numpy(), value)
numpy_to_tensor_overwrite(out, input)
return input

def zeros_like(input, dtype):
if dtype is None:
return legacy.zeros_like(input)
return legacy.cast(legacy.zeros_like(input), dtype)

def tensor_shape(input):
return legacy.tensor_shape(input)

def arange(start, end, step, dtype):
return core.Tensor.from_numpy(np.arange(start, end, step, core.dtype2np[dtype]))

def broadcast_to(input, shape):
return legacy.broadcast_to(input, shape)

def zeros(shape, dtype):
return legacy.zeros(shape, dtype)

def inplace_uniform(input, from_, to_, generator_):
seed, _ = generator_._step(12)
np.random.seed(seed.item())
out = np.random.uniform(from_, to_, input.shape).astype(core.dtype2np[input.dtype])
numpy_to_tensor_overwrite(out, input)
return input

def sub(input, other, alpha):
return legacy.sub(input, legacy.mul(other, alpha))

def contiguous(input):
return input

def inplace_zero(input):
inplace_copy(input, legacy.zeros_like(input))
return input

def abs(input):
return legacy.abs(input)

def identity(input):
return legacy.identity(input)

def clone(input):
return cast(legacy.mul(input, 1), input.dtype)

def max(input):
return legacy.reduce_max(input, (), False)

def ones(shape, dtype):
return legacy.ones(shape, dtype)

def mean(input, dim, keepdim, dtype):
if dtype is not None:
input = legacy.cast(input, dtype)
if dim is None:
dim = ()
return legacy.reduce_mean(input, dim, keepdim)

def transpose_view(input, dim0, dim1):
"""
Transposes the input tensor along the specified dimensions.

Args:
input (Tensor): The input tensor.
dim0 (int): The first dimension to transpose.
dim1 (int): The second dimension to transpose.

Returns:
Tensor: The transposed tensor.
"""
ranks = list(range(input.ndim))
rank0 = ranks[dim0]
rank1 = ranks[dim1]
ranks[dim0] = rank1
ranks[dim1] = rank0
return legacy.transpose(input, tuple(ranks))

def matmul(self, other):
if self.ndim > 2:
if self.ndim == other.ndim:
return legacy.batch_mat_mul(self, other, False, False)
else:
self_shape = self.shape
other_shape = other.shape
if other.ndim == 2:
self = reshape(self, (-1, self_shape[-1]))
out = legacy.mat_mul(self, other, False, False)
return reshape(out, (*self_shape[:-1], out.shape[-1]))
if self.ndim == 2:
other = reshape(other, (-1, other_shape[-1]))
out = legacy.mat_mul(self, other, False, False)
return reshape(out, (*other_shape[:-1], out.shape[-1]))
return legacy.mat_mul(self, other, False, False)

def div(input, other):
return legacy.div(input, other)

def mul(input, other):
return legacy.mul(input, other)

def reduce_all(input, axis, keepdims):
return legacy.reduce_all(input, axis, keepdims)

def isclose(input, other, rtol, atol, equal_nan):
return legacy.is_close(input, other, rtol, atol, equal_nan)

def equal(input, other):
return legacy.reduce_all(legacy.equal(input, other), (), False)

def eq(input, other):
return legacy.equal(input, other)


def expand_dims(input, dim):
return legacy.expand_dims(input, dim)

def tile(input, dims):
return legacy.tile(input, dims)

py_slice = slice
def slice(self, dim, start, end, step):
ndim = self.ndim
begins = [0] * ndim
ends = [i for i in self.shape]
strides = [1] * ndim
begins[dim] = start
ends[dim] = end
strides[dim] = step
return legacy.strided_slice(self, tuple(begins), tuple(ends), tuple(strides), 0, 0, 0, 0, 0)

def pad_v3(input, new_pad, mode, value=None, contiguous=True):
return legacy.pad_v3(input, new_pad, value, mode, contiguous)

def cumsum(self, dim, dtype):
if self.shape[dim] == 0:
return core.tensor([], dtype=self.dtype, device=self.device)
return legacy.cum_sum(self, dim, False, False)

def reduce_any(input, axis, keepdims):
return legacy.reduce_any(input, axis, keepdims)

def concat(tensors, axis):
return legacy.concat(tensors, axis)

def numpy_to_tensor_overwrite(np_array, tensor):
if not np_array.flags.c_contiguous:
np_array = np.ascontiguousarray(np_array)

tensor_ptr = tensor.data_ptr()
ctypes.memmove(tensor_ptr, np_array.ctypes.data, tensor.nbytes)
return tensor

def t2t_overwrite(input, other):
other._device = input.device
ctypes.memmove(input.data_ptr(), other.data_ptr(), input.nbytes)
return input


def inplace_random(input, from_val=0, to_val=None, generator=None):
# 选择随机数生成器
rng = np.random
arr = input.numpy()
if np.issubdtype(arr.dtype, np.floating):
# 浮点类型处理
if to_val is None:
# 默认 [0, 1) 均匀分布
rnd = rng.random(size=arr.shape).astype(arr.dtype)
else:
rnd = (from_val + (to_val - from_val) * rng.random(size=arr.shape)).astype(arr.dtype)
elif np.issubdtype(arr.dtype, np.integer):
# 整数类型处理
from_int = int(from_val)
if to_val is None:
# 默认范围 [0, dtype.max]
max_val = np.iinfo(arr.dtype).max
rnd = rng.randint(0, max_val + 1, size=arr.shape).astype(arr.dtype)
else:
# 指定范围 [from_int, to_val)
to_int = int(to_val)
# 验证参数有效性
if from_int >= to_int:
raise ValueError(f"Empty range for integers: from={from_int} >= to={to_int}")
# 处理整数边界问题
dtype_min = np.iinfo(arr.dtype).min
dtype_max = np.iinfo(arr.dtype).max
from_int = np.clip(from_int, dtype_min, dtype_max)
to_int = np.clip(to_int, dtype_min + 1, dtype_max + 1)
rnd = rng.randint(from_int, to_int, size=arr.shape).astype(arr.dtype)
elif arr.dtype == bool:
# 布尔类型处理 (忽略 from_val/to_val)
rnd = rng.random(size=arr.shape) > 0.5
else:
raise TypeError(f"Unsupported data type: {arr.dtype}")
numpy_to_tensor_overwrite(rnd, input)

return input

def gather_d(input, dim, index):
return legacy.gather_d(input, dim, index)

def reshape(input, shape):
return legacy.reshape(input, shape)

def flatten(input, start_dim, end_dim):
if start_dim < 0:
start_dim = start_dim + input.ndim
if end_dim < 0:
end_dim = end_dim + input.ndim
input_shape = list(input.shape)
input_shape[start_dim:end_dim+1] = [-1]
return legacy.reshape(input, tuple(input_shape))

def sort(input, dim, descending, stable):
return legacy.sort(input, dim, descending)

def gather(input_params, input_indices, axis, batch_dim):
return legacy.gather(input_params, input_indices, axis, batch_dim)

def randint(low, high, shape, generator, dtype):
value = legacy.uniform_int(shape,
mindspore.tensor(low, dtype=mindspore.int32),
mindspore.tensor(high, dtype=mindspore.int32), 0, 0)
return value

def add(input, other, alpha=1):
if alpha == 1.0:
return legacy.add(input, other)
return legacy.add(input, legacy.mul(other, alpha))

def non_zero(input):
return legacy.non_zero(input)

def stop_gradient(input):
return legacy.stop_gradient(input)

def squeeze(input, axis):
return legacy.squeeze(input, axis)

def softmax(input, axis):
if axis is None:
axis = -1
return legacy.softmax(input, axis)

def topk(input, k, dim, largest, sorted):
if not largest:
input = -input
if dim is None or dim == input.ndim - 1:
if not largest:
res = legacy.top_k(input, k, sorted)
values, indices = -res[0], res[1]
return values, indices
return legacy.top_k(input, k, sorted)
input = transpose_view(input, dim, input.ndim - 1)
output = legacy.top_k(input, k, sorted)
values = transpose_view(output[0], dim, input.ndim - 1)
indices = transpose_view(output[1], dim, input.ndim - 1)
if not largest:
res = (-values, indices)
else:
res = (values, indices)
return res

def strided_slice(input, begin, end, strides, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0):
return legacy.strided_slice(input, tuple(begin), tuple(end), tuple(strides), begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)

def strided_slice_grad(input, begin, end, strides, update, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):
return legacy.strided_slice_grad(update, input.shape, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)

def masked_select(input, mask):
return legacy.masked_select(input, mask)

def stack(values, axis=0):
return legacy.stack(values, axis)

def cast(input, dtype):
return legacy.cast(input, dtype)

def less(input, other):
return legacy.less(input, other)

def select(condition, x, y):
return legacy.select(condition, x, y)

def round(input, decimals):
return legacy.round(input, decimals)

def erfinv(input):
return legacy.erfinv(input)

def erf(input):
return legacy.erf(input)

def pow_scalar_tensor(input, exponent):
return legacy.pow(input, exponent)

def inplace_add(input, other, alpha):
if alpha != 1:
return inplace_copy(input, legacy.add(input, legacy.mul(other, alpha)))
return inplace_copy(input, legacy.add(input, other))

def clamp_scalar(value, min_value, max_value):
if min_value is not None:
value = legacy.maximum(value, min_value)
if max_value is not None:
value = legacy.minimum(value, max_value)
return value

def constant_pad_nd(input, pad, value):
return legacy.pad_v3(input, pad, value, 'constant', True)

def randn(size, generator, dtype):
return cast(legacy.standard_normal(tuple(size), 0, 0), dtype)

def rand(size, generator, dtype):
return cast(legacy.uniform_real(tuple(size), 0, 0), dtype)

def tril(input, diagonal):
return legacy.tril(input, diagonal)

def dense(input, weight, bias=None):
return legacy.dense(input, weight, bias)

def relu(input):
return legacy.re_lu(input)

def assign(input, value):
return inplace_copy(input, value)

def square(input):
return legacy.square(input)

def log(input):
if not input.dtype.is_floating_point:
input = cast(input, mindspore.float32)
return legacy.log(input)

def permute(input, dims):
return legacy.transpose(input, dims)

def ones_like(input, dtype):
if dtype is not None:
return cast(legacy.ones_like(input), dtype)
return legacy.ones_like(input)

def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
return cast(legacy.gather(weight, input, 0, 0), weight.dtype)

def linspace(start, end, steps, dtype):
start = float(start)
end = float(end)
return legacy.lin_space(mindspore.Tensor(start), mindspore.Tensor(end), steps)

def masked_fill(input, mask, value):
if input.dtype.is_floating_point and isinstance(value, numbers.Number):
value = float(value)
return legacy.masked_fill(input, mask, value)

def sum(input, dim, keepdim, dtype):
if dim is None:
dim = ()
if input.dtype == mindspore.bool_:
input = cast(input, mindspore.int64)
if dtype is None:
return legacy.reduce_sum(input, dim, keepdim, False)
return legacy.reduce_sum(input.astype(dtype), dim, keepdim, False)

def conv2d(input, weight, bias=None, stride=1, padding='valid', dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, (tuple, list)):
pad = (padding[0], padding[0], padding[1], padding[1])
elif isinstance(padding, int):
pad = (padding,) * 4
if not isinstance(padding, (int, tuple, list)):
pad_mode = padding
pad = (0,) * 4
if isinstance(stride, int):
stride = (stride,) * 4

out_channels = weight.shape[0]
kernel_size = weight.shape[2:]

output = legacy.conv2_d(
input, weight,
out_channels,
kernel_size,
1,#mode=1,
pad_mode, #pad_mode=pad_mode,
pad, #pad=pad,
tuple(stride), #stride=tuple(stride),
dilation, #dilation=dilation,
groups, #group=groups,
"NCHW", #data_format="NCHW"
)
if bias is not None:
output = legacy.bias_add(output, bias, "NCHW")
return output

def conv2d_padding(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
return conv2d(input, weight, bias, stride, padding, dilation, groups)

def pow_tensor_scalar(input, scalar):
return legacy.pow(input, scalar)

def rsqrt(input):
return legacy.rsqrt(input)

def layer_norm(input, normalized_shape, weight, bias, eps=1e-5):
if weight is not None:
begin_axis = input.ndim - weight.ndim
else:
begin_axis = -1
return legacy.layer_norm(input, weight, bias, begin_axis, begin_axis, eps)

def argmin_with_value(input, axis, keep_dims):
return legacy.arg_min_with_value(input, axis, keep_dims)

def argmax_with_value(input, axis, keep_dims):
return legacy.arg_max_with_value(input, axis, keep_dims)

def silu(input):
return legacy.mul(input, legacy.sigmoid(input))

def less_equal(input_x, input_y):
return legacy.less_equal(input_x, input_y)

def not_equal(input_x, input_y):
return legacy.not_equal(input_x, input_y)


def logical_not(input):
return legacy.logical_not(input)

def tensor_scatter_update(input, indices, updates):
return legacy.tensor_scatter_update(input, indices, updates)

def isinf(input):
return legacy.is_inf(input)

def gelu(input, approximate):
return legacy.ge_lu(input)

def greater(input_x, input_y):
return legacy.greater(input_x, input_y)

def greater_equal(input_x, input_y):
return legacy.greater_equal(input_x, input_y)

def eye(n, m, dtype):
return legacy.eye(n, m, dtype)

def argmax(input, axis, keep_dims):
return legacy.arg_max_with_value(input, axis, keep_dims)[0]

def argmin(input, axis, keep_dims):
return legacy.arg_min_with_value(input, axis, keep_dims)[0]

def exp(input):
return legacy.exp(input)

def split_with_size(tensor, split_sizes, dim=0):
chunks = []
start = 0
for chunk_size in split_sizes:
end = start + chunk_size
slice_obj = [py_slice(None)] * tensor.dim()
slice_obj[dim] = py_slice(start, end)
chunks.append(tensor[tuple(slice_obj)])
start = end

return tuple(chunks)


def cos(input):
return legacy.cos(input)

def sigmoid(input):
return legacy.sigmoid(input)

def sqrt(input):
return legacy.sqrt(input)

def chunk(input, chunks, dim=0):
return legacy.split(input, dim, chunks)

def sin(input):
return legacy.sin(input)

def neg(input):
return legacy.neg(input)

def bitwise_or_tensor(input_x, input_y):
return legacy.bitwise_or(input_x, input_y)

def bitwise_and_tensor(input_x, input_y):
return legacy.bitwise_and(input_x, input_y)

def non_zero_ext(input):
out = legacy.non_zero(input)
return unbind(out, 1, out.shape[1])

def unbind(input, dim, num):
return legacy.unstack(input, dim, num)

def log1p(input):
return legacy.log1p(input)

def log_softmax(input, axis, dtype):
if dtype is not None:
input = input.astype(dtype)
return legacy.log_softmax(input, axis)

def scatter(input, dim, index, src):
return legacy.tensor_scatter_elements(input, index, src, dim, "none")

def batch_norm(input, weight, bias, running_mean=None, runnning_var=None, training=False, momentum=0.1, epsilon=1e-5):
input_ndim = input.ndim
if input_ndim == 2:
return legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
else:
input = transpose_view(input, 1, -1)
input_shape = input.shape
input = reshape(input, (-1, input.shape[-1]))
outs = legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
out = reshape(outs[0], (*input_shape[:-1], -1))
out = transpose_view(out, 1, -1)

return out, outs[1], outs[2]

def tanh(input):
return legacy.tanh(input)

def dropout(input, p, seed, offset):
return legacy.dropout(input, 1-p, 0, 0)

def split_tensor(input, split_size_or_sections, dim):
if isinstance(split_size_or_sections, int):
num = input.shape[dim] // split_size_or_sections
return legacy.split(input, dim, num)

def bmm(input_x, input_y):
return legacy.batch_mat_mul(input_x, input_y, False, False)

def nllloss(input, target, weight, reduction, ingore_index):
return legacy.nll_loss(input, target, weight, reduction, ingore_index)

def nllloss_2d(input, target, weight, reduction, ingore_index):
input = reshape(transpose_view(input, 1, -1), (-1, input.shape[1]))
target = reshape(target, (-1,))
out = legacy.nll_loss(input, target, weight, reduction, ingore_index)
return out


def binary_cross_entropy_with_logits(input, target, weight, posWeight, reduction):
return legacy.bce_with_logits_loss(input, target, weight, posWeight, reduction)

def std(input, dim, correction, keepdim):
if dim is None:
dim = ()
return legacy.reduce_std(input, dim, bool(correction), keepdim)[0]

def linalg_vector_norm(x, ord=2, dim=None, keepdim=False, dtype=None):
return legacy.lp_norm(x, dim, int(ord), keepdim, 1e-12)

def rfft(input, n=None, dim=-1, norm=None):
if input.shape[dim] < n:
pad_inf = (0, n - input.shape[dim])
pad_dims = (0, 0) * (input.ndim - (dim + 1)) + pad_inf
input = constant_pad_nd(input, pad_dims, 0.)
else:
input = narrow(input, dim, 0, n)
return legacy.fft_with_size(input, input.ndim, False, True, norm, True, ())

def narrow(input, dim, start, length):
begin = [0] * input.ndim
size = [i for i in input.shape]
begin[dim] = start
size[dim] = length
return legacy.slice(input, begin, size)

def conj(input):
return legacy.conj(input)

def irfft(input, n, dim, norm):
if input.shape[dim] < n:
pad_inf = (0, n - input.shape[dim])
pad_dims = (0, 0) * (input.ndim - (dim + 1)) + pad_inf
input = constant_pad_nd(input, pad_dims, 0.)
else:
input = narrow(input, dim, 0, n)
return legacy.fft_with_size(input, input.ndim, True, True, norm, True, ())

def avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
if isinstance(padding, int):
padding = (0, 0, 0, 0, padding, padding)
elif isinstance(padding, tuple):
if len(padding) != 1:
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
padding = (0, 0, 0, 0, padding[0], padding[1])
else:
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")

if isinstance(stride, tuple):
if len(stride) != 1:
raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
stride = stride[0]

input = expand_dims(input, 2)
input = expand_dims(input, 2)
input = legacy.avg_pool3_d(input, (1, 1, kernel_size), (1, 1, stride), 'pad', padding, ceil_mode, count_include_pad, 0, 'NCDHW')
input = squeeze(input, (2, 3))
return input

def fmod_scalar(input, other):
return legacy.floor_mod(input, other)

def conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, tuple):
pad = (0, 0, padding[0], padding[0])
elif isinstance(padding, int):
pad = (0, 0) + (padding,) * 2
if not isinstance(padding, (int, tuple)):
pad_mode = padding
pad = (0,) * 4

input = expand_dims(input, 2)
weight = expand_dims(weight, 2)

output = legacy.conv2_d(
input, weight,
weight.shape[0],
(1, weight.shape[-1]),
1,#mode=1,
pad_mode, #pad_mode=pad_mode,
pad, #pad=pad,
(1, stride) if isinstance(stride, int) else (1, *stride), #stride=tuple(stride),
(1, dilation) if isinstance(dilation, int) else (1, *dilation), #dilation=dilation,
groups, #group=groups,
"NCHW", #data_format="NCHW"
)


if bias is not None:
output = legacy.bias_add(output, bias, "NCHW")

output = squeeze(output, 2)
return output

def maximum(input, other):
return legacy.maximum(input, other)

def prod(input, axis, keepdims, dtype):
if axis is None:
axis = ()
return legacy.reduce_prod(input, axis, keepdims)

def mse_loss(input, target, reduction):
x = square(input - target)
average_flag = True
reduce_flag = True
if reduction == 'sum':
average_flag = False
if reduction == 'none':
reduce_flag = False

if reduce_flag and average_flag:
x = mean(x, tuple(range(x.ndim)), False, None)

if reduce_flag and not average_flag:
x = sum(x, tuple(range(x.ndim)), False, None)

return x

def adaptive_avg_pool2d(input, output_size):
return legacy.adaptive_avg_pool2_d(input, output_size)

def avg_pool2d(input, kernel_size, stride, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None):
if isinstance(padding, int):
padding = (0, 0, padding, padding, padding, padding)
elif isinstance(padding, tuple):
if len(padding) != 1:
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
padding = (0, 0, padding[0], padding[1], padding[2], padding[3])
else:
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")

if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if isinstance(stride, int):
stride = (stride, stride)

input = expand_dims(input, 2)
input = legacy.avg_pool3_d(input, (1, *kernel_size), (1, *stride), 'pad', padding, ceil_mode, count_include_pad, 0, 'NCDHW')
input = squeeze(input, 2)
return input

def bitwise_or_scalar(input, value):
return legacy.bitwise_or(input, value)

def floor_div(input, other):
return legacy.floor_div(input, other)

def minimum(input, other):
return legacy.minimum(input, other)

def reverse_v2(input, axis):
if isinstance(axis, int):
axis = (axis,)
return legacy.reverse_v2(input, axis)

def divmod(input, other, rounding_mode):
if rounding_mode == 'floor':
return legacy.floor_div(input, other)
elif rounding_mode == 'trunc':
if isinstance(input, numbers.Number):
input = mindspore.Tensor(input)
return legacy.truncate_div(input, other)
else:
raise ValueError(f'Invalid rounding mode: {rounding_mode}')

def pow(input, exponent):
return legacy.pow(input, exponent)


def bitwise_and_scalar(input, value):
return legacy.bitwise_and(input, value)

def rand_like(input, generator, dtype):
return rand(input.shape, generator, dtype)

def bincount(input, weights=None, minlength=0):
if weights is None:
weights = mindspore.Tensor(1, dtype=mindspore.int32)
return legacy.bincount(cast(input, mindspore.int32),
mindspore.Tensor(minlength, dtype=mindspore.int32),
weights)

def lgamma(input):
return legacy.lgamma(input)

def _deconv_output_length(pad_mode, filter_size, stride_size, dilation_size, padding):
"""Calculate the width and height of output."""
length = 0
filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
if pad_mode == 'valid':
if filter_size - stride_size > 0:
length = filter_size - stride_size
elif pad_mode == 'pad':
length = - padding + filter_size - stride_size

return length


def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, tuple):
pad = (0, 0, padding[0], padding[0])
elif isinstance(padding, int):
pad = (0, 0) + (padding,) * 2
if not isinstance(padding, (int, tuple)):
pad_mode = padding
pad = (0,) * 4

if isinstance(dilation, int):
dilation = (dilation, dilation)

in_channel, out_channels = weight.shape[0], weight.shape[1] * groups
kernel_size = weight.shape[2:]

n, _, h, w = input.shape
h_add = _deconv_output_length(pad_mode, kernel_size[0], stride[0], dilation[0], pad[0] + pad[1])
w_add = _deconv_output_length(pad_mode, kernel_size[1], stride[1], dilation[1], pad[2] + pad[3])

out = legacy.conv2_d_transpose(
input, weight,
(n, out_channels, h * stride[0] + h_add, w * stride[1] + w_add),
out_channels,
kernel_size,
pad_mode,
pad,
None,
1,
stride,
dilation,
groups,
'NCHW'
)
if bias is not None:
out = legacy.bias_add(out, bias, 'NCHW')
return out

def expm1(x):
return legacy.expm1(x)

py_min = min
def min(input):
return legacy.reduce_min(input, (), False)

def acos(x):
return legacy.a_cos(x)

def upsample_bilinear2d(input, size=None, scale_factor=None, align_corners=False):
return legacy.resize_bilinear_v2(input, size, align_corners, not align_corners)

def unstack_view(input, dim):
return legacy.unstack(input, dim, input.shape[dim])

def triu(input, diagonal=0):
return legacy.triu(input, diagonal)

def masked_scatter(input, mask, value):
return legacy.masked_scatter(input, mask, value)

def max_pool2d(input, kernel_size, stride=1, padding=0, dilation=1, ceil_mode=False, return_indices=False):
out, indices = legacy.max_pool_with_argmax_v2(input, kernel_size, stride, padding, dilation, ceil_mode, mindspore.int64)

if return_indices:
return out, indices
return out

def baddbmm(input, batch1, batch2, alpha=1, beta=1):
return add(mul(beta, input), mul(alpha, bmm(batch1, batch2)))

def inplace_fill_tensor(input, value):
out = np.full_like(input.numpy(), value)
numpy_to_tensor_overwrite(out, input)
return input

def softplus(input, beta=1, threshold=20):
return legacy.softplus(input)

def gather_nd(input, indices):
return legacy.gather_nd(input, indices)

def unique_consecutive(input, return_inverse, return_counts, dim):
return legacy.unique_consecutive(input, return_inverse, return_counts, dim)

def meshgrid(input, lambd):
return legacy.meshgrid(input, lambd)

def addcmul(input, tensor1, tensor2, value=1.0):
return legacy.addcmul(input, tensor1, tensor2, mindspore.Tensor(value))

def addmm(input, mat1, mat2, alpha=1.0, beta=1.0):
return add(mul(beta, input), mul(alpha, bmm(mat1, mat2)))

def im2col(input, kernel_size, dilation=1, padding=0, stride=1):
out = legacy.im2_col(input, kernel_size, stride, dilation, padding)
out_shape = out.shape[:1] + (-1,) + out.shape[-1:]
out = reshape(out, out_shape)
return out

def floor(input):
return legacy.floor(input)

def upsample_nearest2d(input, output_size, scale_factors):
if output_size is None:
tuple_len = py_min(len(input.shape) - 2, len(scale_factors))
output_size = tuple([math.floor(input.shape[i + 2] * scale_factors[i])
for i in range(tuple_len)])

return legacy.resize_nearest_neighbor(input, output_size, False, False)

def upsample_bicubic2d(input, size=None, scale_factor=None, align_corners=False):
return legacy.resize_bicubic(input, size, align_corners, not align_corners)

def conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, (tuple, list)):
pad = (padding[0], padding[0], padding[1], padding[1], padding[2], padding[2])
elif isinstance(padding, int):
pad = (padding,) * 6
if not isinstance(padding, (int, tuple, list)):
pad_mode = padding
pad = (0,) * 6

out_channels = weight.shape[0]
kernel_size = weight.shape[2:]

output = legacy.conv3_d(input, weight,
out_channels,
kernel_size,
1,
pad_mode,
pad,
tuple(stride),
dilation,
groups,
"NCDHW")
if bias is not None:
output = legacy.bias_add(output, bias, 'NCHW')
return output


return legacy.conv3_d(input, weight, bias, stride, padding, dilation, groups)

def normal_float_float(mean, std, size, dtype, generator):
out = np.random.normal(mean, std, size).astype(core.dtype2np[dtype])
out = mindspore.Tensor(out)
return out

def normal_tensor_tensor(mean, std, size, dtype, generator):
out = np.random.normal(mean.item(), std.item(), size).astype(core.dtype2np[dtype])
out = mindspore.Tensor(out)
return out

def inplace_relu(input):
return legacy.assign(input, legacy.re_lu(input))

def adaptive_avg_pool1d(input, output_size):
x_in_shape = input.shape
width = x_in_shape[2]
stride = width // output_size
kernel_size = width - (output_size - 1) * stride
stride = (1, width // output_size)
kernel_size = (1, kernel_size)
input = expand_dims(input, 2)
input = legacy.avg_pool(input, kernel_size, stride, "VALID", "NCHW")
input = squeeze(input, 2)
return input

def remainder_tensor_scalar(input, other):
out = sub(input, mul(floor_div(input, other), other), 1)
return out

def outer(input, other):
input = reshape(input, (-1, 1))
y = mul(input, other)
return y

def view_as_complex(input):
real_part, imag_part = chunk(input, 2, -1)
return legacy.complex(squeeze(real_part, -1), squeeze(imag_part, -1))

def cdist(x1, x2, p):
return legacy.cdist(x1, x2, float(p))

def prelu(input, weight):
return legacy.p_re_lu(input, weight)

def reciprocal(input):
return legacy.reciprocal(input)

def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity):
loss, log_alpha = legacy.ctc_loss_v2(log_probs, targets, input_lengths, target_lengths, blank, 'none', zero_infinity)
if reduction == 'sum':
loss = sum(loss, (), False, None)
if reduction == 'mean':
# input_type = loss.dtype
# target_length_t = target_lengths.clip(1., None)
# loss = loss.astype("float32")
loss = div(loss, target_lengths)
loss = mean(loss, (), False, None)
# loss = loss.astype(input_type)
return (loss, log_alpha)

def glu(input, dim=-1):
return legacy.glu(input, dim)

def one_hot(tensor, num_classes):
on_value = mindspore.Tensor(1, dtype=tensor.dtype)
off_value = mindspore.Tensor(0, dtype=tensor.dtype)
return legacy.one_hot(tensor, num_classes, on_value, off_value, -1)

def polar(abs, angle):
return legacy.polar(abs, angle)

def scatter_value(input, dim, index, src, reduce='none'):
if isinstance(src, numbers.Number):
src = fill_scalar(index.shape, src, dtype=input.dtype)
return legacy.tensor_scatter_elements(input, index, src, dim, reduce)

def pixel_shuffle(input, upscale_factor):
idx = input.shape
length = input.ndim
pre = idx[:-3]
c, h, w = idx[-3:]
c = c // upscale_factor ** 2
input_perm = pre + (c, upscale_factor, upscale_factor, h, w)
input = reshape(input, input_perm)
input_perm = [i for i in range(length - 2)]
input_perm = input_perm + [length, length - 2, length + 1, length - 1]
input_perm = tuple(input_perm)
input = permute(input, input_perm)
input = reshape(input, (pre + (c, upscale_factor * h, upscale_factor * w)))
return input

def rms_norm(input, weight, eps=1e-5):
input_dtype = input.dtype
input = cast(input, mindspore.float32)
variance = mean(pow(input, 2), -1, True, None)
input = mul(input, rsqrt(add(variance, eps, 1)))
return mul(weight, cast(input, input_dtype))

def count_nonzero(input, dims):
return legacy.count_non_zero(input, dims)

def index_add_ext(input, dim, index, source, alpha):
if alpha != 1:
source = mul(alpha, source)
return legacy.index_add(input, cast(index, mindspore.int32), source, dim, True, True)

def real(input):
return legacy.real(input)

def upsample_linear1d(input, output_size, scale_factor, align_corners=False):
coordinate_transformation_mode = "align_corners" if align_corners else "half_pixel"
return legacy.resize_linear1_d(input, output_size, coordinate_transformation_mode)

def imag(input):
return legacy.imag(input)

def bitwise_xor_tensor(input, other):
return legacy.bitwise_xor(input, other)

def grid_sampler_2d(input, grid, mode='bilinear', padding_mode='zeros', align_corners=False):
return legacy.grid_sampler2_d(input, grid, mode, padding_mode, align_corners)

def l1_loss(input, target, reduction='mean'):
loss = abs(sub(input, target))
if reduction == 'mean':
return mean(loss, (), False, False)
elif reduction == 'sum':
return sum(loss, (), False, False)
return loss

def leaky_relu(input, negative_slope):
select_op = maximum
if negative_slope > 1:
select_op = minimum
return select_op(mul(negative_slope, input), input)

def ceil(input):
return legacy.ceil(input)

def reduce_max(input, axis, keepdims):
return legacy.reduce_max(input, axis, keepdims)

def nan_to_num(input, nan=0.0, posinf=None, neginf=None):
return legacy.nan_to_num(input, nan, posinf, neginf)

def elu(input, alpha):
return legacy.elu(input, alpha)

def sign(input):
return legacy.sign(input)

def inplace_fill_diagonal(input, fill_value, wrap):
inplace_copy(input, legacy.fill_diagonal(input, float(fill_value), wrap))
return input

def clamp_tensor(value, min_value, max_value):
if min_value is not None:
value = legacy.maximum(value, min_value)
if max_value is not None:
value = legacy.minimum(value, max_value)
return value

def lstm(input, h, c, w, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size):
return legacy.lstm(input, h, c, w, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size)

def var(input, dim=None, correction=1, keepdim=False):
if dim is None:
input_mean = mean(input, (), False, None)
else:
input_mean = mean(input, dim=dim, keepdim=True, dtype=None)
# 计算与均值的平方差
squared_diff = pow(sub(input, input_mean, 1), 2)
# 计算方差
if dim is None:
variance = mean(squared_diff, (), False, None)
n = input.numel() # 总元素个数
else:
variance = mean(squared_diff, dim=dim, keepdim=keepdim, dtype=None)
n = input.size(dim) # 指定维度的元素个数
# 无偏估计校正
if correction and n > 1:
variance = mul(variance, (n / (n - 1)))
return variance

def log2(input):
return div(log(input), math.log(2))

def bucketize(input, boundaries, right=False):
epsilon_ = 0. if right else 1.e-6
boundaries = [boundary + epsilon_ for boundary in boundaries]
return legacy.bucketize(input, boundaries)

def col2im(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
return legacy.col2_im(input, output_size, kernel_size, dilation, padding, stride)

def randperm(n, generator, dtype):
seed, offset = generator._step(12) # pylint: disable=protected-access
return legacy.randperm_v2(n, seed, offset, dtype)

def gamma(shape, alpha, beta):
out = np.random.gamma(alpha, 1/beta, shape)
return core.Tensor.from_numpy(out)

def logical_or(input_x, input_y):
return legacy.logical_or(input_x, input_y)

def hswish(input):
return legacy.h_swish(input)

def logical_and(input_x, input_y):
return legacy.logical_and(input_x, input_y)

def logsigmoid(input):
output = sigmoid(input)
ret = log(output)
return ret

def as_strided(input, size, stride, storage_offset):
if len(size) != len(stride):
raise RuntimeError("mismatch in length of strides and shape.")
index = np.arange(0, size[0]*stride[0], stride[0])
for i in np.arange(1, len(size)):
tmp = np.arange(0, size[i]*stride[i], stride[i])
index = np.expand_dims(index, -1)
index = index + tmp
if storage_offset is not None:
index = index + storage_offset

if index.size == 0:
input_indices = mindspore.numpy.empty(index.shape, dtype=mindspore.int32)
else:
input_indices = mindspore.tensor(index.astype(np.int32))
out = gather(reshape(input, (-1,)), input_indices, 0, 0)
return out

def dropout2d(input_x, p):
return legacy.dropout2_d(input_x, p)

def linalg_qr(input_x, mode):
full_matrices = 'mode' == 'complete'
return legacy.qr(input_x, full_matrices)

def diag(input, diagonal):
out = np.diag(input.numpy(), diagonal)
return core.Tensor.from_numpy(out)

def logit(input, eps=1e-5):
return legacy.logit(input, eps)

def relu6(input):
return legacy.re_lu6(input)

def logsumexp(input, dim, keepdim=False):
input_max = legacy.reduce_max(input, dim, keepdim)
input_exp = exp(sub(input, input_max))
input_sumexp = sum(input_exp, dim, keepdim, None)
input_logsumexp = log(input_sumexp)
if not keepdim:
input_max = squeeze(input_max, dim)
return add(input_logsumexp, input_max)

def bernoulli(input, generator):
return legacy.bernoulli(input, seed, offset)

+ 1123
- 0
mindnlp/core/_apis/gpu.py View File

@@ -0,0 +1,1123 @@
import ctypes
import numbers
import math
import mindspore
from mindspore._c_expression import _empty_instance
from mindnlp import core
from .._op_prim.cpu import legacy

try:
from mindspore._c_expression import TensorPy as Tensor_
except:
from mindspore._c_expression import Tensor as Tensor_


def empty(size, dtype):
return mindspore.Tensor(Tensor_(shape=size, dtype=dtype))

def select_ext_view(input, dim, index):
return legacy.select_view(input, index, dim)

def inplace_copy(input, value):
if value.shape != input.shape:
value = legacy.fill_v2(input.shape, value)
# inplace_copy(input, value)
# legacy.assign(input, value)
if hasattr(input, '_base'):
input._base.assign_value(value)
input.assign_value(value)
return input

def fill_scalar(size, fill_value, dtype):
if dtype is None:
return legacy.fill_v2(size, mindspore.Tensor(fill_value))
return legacy.cast(legacy.fill_v2(size, mindspore.Tensor(fill_value)), dtype)

def fill_tensor(size, fill_value, dtype):
return legacy.cast(legacy.fill_v2(size, fill_value), dtype)

def zeros_like(input, dtype):
if dtype is None:
return legacy.zeros_like(input)
return legacy.cast(legacy.zeros_like(input), dtype)

def tensor_shape(input):
return legacy.tensor_shape(input)

def broadcast_to(input, shape):
return legacy.broadcast_to(input, shape)

def zeros(shape, dtype):
return legacy.zeros(shape, dtype)

def sub(input, other, alpha=1):
return legacy.sub(input, legacy.mul(other, alpha))

def contiguous(input):
return input

def inplace_zero(input):
inplace_copy(input, legacy.zeros_like(input))
return input

def abs(input):
return legacy.abs(input)

def identity(input):
return legacy.identity(input)

def clone(input):
return cast(legacy.mul(input, 1), input.dtype)

def max(input):
return legacy.reduce_max(input, (), False)

def ones(shape, dtype):
return legacy.ones(shape, dtype)

def mean(input, dim, keepdim, dtype):
if dtype is not None:
input = legacy.cast(input, dtype)
if dim is None:
dim = ()
return legacy.reduce_mean(input, dim, keepdim)

def transpose_view(input, dim0, dim1):
"""
Transposes the input tensor along the specified dimensions.

Args:
input (Tensor): The input tensor.
dim0 (int): The first dimension to transpose.
dim1 (int): The second dimension to transpose.

Returns:
Tensor: The transposed tensor.
"""
ranks = list(range(input.ndim))
rank0 = ranks[dim0]
rank1 = ranks[dim1]
ranks[dim0] = rank1
ranks[dim1] = rank0
return legacy.transpose(input, tuple(ranks))

def matmul(self, other):
if self.ndim > 2:
if self.ndim == other.ndim:
return legacy.batch_mat_mul(self, other, False, False)
else:
self_shape = self.shape
other_shape = other.shape
if other.ndim == 2:
self = reshape(self, (-1, self_shape[-1]))
out = legacy.mat_mul(self, other, False, False)
return reshape(out, (*self_shape[:-1], out.shape[-1]))
if self.ndim == 2:
other = reshape(other, (-1, other_shape[-1]))
out = legacy.mat_mul(self, other, False, False)
return reshape(out, (*other_shape[:-1], out.shape[-1]))
return legacy.mat_mul(self, other, False, False)

def div(input, other):
return legacy.div(input, other)

def mul(input, other):
return legacy.mul(input, other)

def reduce_all(input, axis, keepdims):
return legacy.reduce_all(input, axis, keepdims)

def isclose(input, other, rtol, atol, equal_nan):
return legacy.is_close(input, other, rtol, atol, equal_nan)

def equal(input, other):
return legacy.reduce_all(legacy.equal(input, other), (), False)

def eq(input, other):
return legacy.equal(input, other)


def expand_dims(input, dim):
return legacy.expand_dims(input, dim)

def tile(input, dims):
return legacy.tile(input, dims)

py_slice = slice
def slice(self, dim, start, end, step):
ndim = self.ndim
begins = [0] * ndim
ends = [i for i in self.shape]
strides = [1] * ndim
begins[dim] = start
ends[dim] = end
strides[dim] = step
return legacy.strided_slice(self, tuple(begins), tuple(ends), tuple(strides), 0, 0, 0, 0, 0)

def pad_v3(input, new_pad, mode, value=None, contiguous=True):
return legacy.pad_v3(input, new_pad, value, mode, contiguous)

def cumsum(self, dim, dtype):
if self.shape[dim] == 0:
return core.tensor([], dtype=self.dtype, device=self.device)
return legacy.cum_sum(self, dim, False, False)

def reduce_any(input, axis, keepdims):
return legacy.reduce_any(input, axis, keepdims)

def concat(tensors, axis):
return legacy.concat(tensors, axis)

def gather_d(input, dim, index):
return legacy.gather_d(input, dim, index)

def reshape(input, shape):
return legacy.reshape(input, shape)

def flatten(input, start_dim, end_dim):
if start_dim < 0:
start_dim = start_dim + input.ndim
if end_dim < 0:
end_dim = end_dim + input.ndim
input_shape = list(input.shape)
input_shape[start_dim:end_dim+1] = [-1]
return legacy.reshape(input, tuple(input_shape))

def sort(input, dim, descending, stable):
return legacy.sort(input, dim, descending)

def gather(input_params, input_indices, axis, batch_dim):
return legacy.gather(input_params, input_indices, axis, batch_dim)

def randint(low, high, shape, generator, dtype):
value = legacy.uniform_int(shape,
mindspore.tensor(low, dtype=mindspore.int32),
mindspore.tensor(high, dtype=mindspore.int32), 0, 0)
return value

def add(input, other, alpha=1):
if alpha == 1.0:
return legacy.add(input, other)
return legacy.add(input, legacy.mul(other, alpha))

def non_zero(input):
return legacy.non_zero(input)

def stop_gradient(input):
return legacy.stop_gradient(input)

def squeeze(input, axis):
return legacy.squeeze(input, axis)

def softmax(input, axis):
if axis is None:
axis = -1
return legacy.softmax(input, axis)

def topk(input, k, dim, largest, sorted):
if not largest:
input = -input
if dim is None or dim == input.ndim - 1:
if not largest:
res = legacy.top_k(input, k, sorted)
values, indices = -res[0], res[1]
return values, indices
return legacy.top_k(input, k, sorted)
input = transpose_view(input, dim, input.ndim - 1)
output = legacy.top_k(input, k, sorted)
values = transpose_view(output[0], dim, input.ndim - 1)
indices = transpose_view(output[1], dim, input.ndim - 1)
if not largest:
res = (-values, indices)
else:
res = (values, indices)
return res

def strided_slice(input, begin, end, strides, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0):
return legacy.strided_slice(input, tuple(begin), tuple(end), tuple(strides), begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)

def strided_slice_grad(input, begin, end, strides, update, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):
return legacy.strided_slice_grad(update, input.shape, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)

def masked_select(input, mask):
return legacy.masked_select(input, mask)

def stack(values, axis=0):
return legacy.stack(values, axis)

def cast(input, dtype):
return legacy.cast(input, dtype)

def less(input, other):
return legacy.less(input, other)

def select(condition, x, y):
return legacy.select(condition, x, y)

def round(input, decimals):
return legacy.round(input, decimals)

def erfinv(input):
return legacy.erfinv(input)

def erf(input):
return legacy.erf(input)

def pow_scalar_tensor(input, exponent):
return legacy.pow(input, exponent)

def inplace_add(input, other, alpha):
if alpha != 1:
return inplace_copy(input, legacy.add(input, legacy.mul(other, alpha)))
return inplace_copy(input, legacy.add(input, other))

def clamp_scalar(value, min_value, max_value):
if min_value is not None:
value = legacy.maximum(value, min_value)
if max_value is not None:
value = legacy.minimum(value, max_value)
return value

def constant_pad_nd(input, pad, value):
return legacy.pad_v3(input, pad, value, 'constant', True)

def randn(size, generator, dtype):
return cast(legacy.standard_normal(tuple(size), 0, 0), dtype)

def rand(size, generator, dtype):
return cast(legacy.uniform_real(tuple(size), 0, 0), dtype)

def tril(input, diagonal):
return legacy.tril(input, diagonal)

def dense(input, weight, bias=None):
return legacy.dense(input, weight, bias)

def relu(input):
return legacy.re_lu(input)

def assign(input, value):
return inplace_copy(input, value)

def square(input):
return legacy.square(input)

def log(input):
if not input.dtype.is_floating_point:
input = cast(input, mindspore.float32)
return legacy.log(input)

def permute(input, dims):
return legacy.transpose(input, dims)

def ones_like(input, dtype):
if dtype is not None:
return cast(legacy.ones_like(input), dtype)
return legacy.ones_like(input)

def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
return cast(legacy.gather(weight, input, 0, 0), weight.dtype)

def linspace(start, end, steps, dtype):
start = float(start)
end = float(end)
return legacy.lin_space(mindspore.Tensor(start), mindspore.Tensor(end), steps)

def masked_fill(input, mask, value):
if input.dtype.is_floating_point and isinstance(value, numbers.Number):
value = float(value)
return legacy.masked_fill(input, mask, value)

def sum(input, dim, keepdim, dtype):
if dim is None:
dim = ()
if input.dtype == mindspore.bool_:
input = cast(input, mindspore.int64)
if dtype is None:
return legacy.reduce_sum(input, dim, keepdim, False)
return legacy.reduce_sum(input.astype(dtype), dim, keepdim, False)

def conv2d(input, weight, bias=None, stride=1, padding='valid', dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, (tuple, list)):
pad = (padding[0], padding[0], padding[1], padding[1])
elif isinstance(padding, int):
pad = (padding,) * 4
if not isinstance(padding, (int, tuple, list)):
pad_mode = padding
pad = (0,) * 4
if isinstance(stride, int):
stride = (stride,) * 4

out_channels = weight.shape[0]
kernel_size = weight.shape[2:]

output = legacy.conv2_d(
input, weight,
out_channels,
kernel_size,
1,#mode=1,
pad_mode, #pad_mode=pad_mode,
pad, #pad=pad,
tuple(stride), #stride=tuple(stride),
dilation, #dilation=dilation,
groups, #group=groups,
"NCHW", #data_format="NCHW"
)
if bias is not None:
output = legacy.bias_add(output, bias, "NCHW")
return output

def conv2d_padding(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
return conv2d(input, weight, bias, stride, padding, dilation, groups)

def pow_tensor_scalar(input, scalar):
return legacy.pow(input, scalar)

def rsqrt(input):
return legacy.rsqrt(input)

def layer_norm(input, normalized_shape, weight, bias, eps=1e-5):
if weight is not None:
begin_axis = input.ndim - weight.ndim
else:
begin_axis = -1
return legacy.layer_norm(input, weight, bias, begin_axis, begin_axis, eps)

def argmin_with_value(input, axis, keep_dims):
return legacy.arg_min_with_value(input, axis, keep_dims)

def argmax_with_value(input, axis, keep_dims):
return legacy.arg_max_with_value(input, axis, keep_dims)

def silu(input):
return legacy.mul(input, legacy.sigmoid(input))

def less_equal(input_x, input_y):
return legacy.less_equal(input_x, input_y)

def not_equal(input_x, input_y):
return legacy.not_equal(input_x, input_y)


def logical_not(input):
return legacy.logical_not(input)

def tensor_scatter_update(input, indices, updates):
return legacy.tensor_scatter_update(input, indices, updates)

def isinf(input):
return legacy.is_inf(input)

def gelu(input, approximate):
return legacy.ge_lu(input)

def greater(input_x, input_y):
return legacy.greater(input_x, input_y)

def greater_equal(input_x, input_y):
return legacy.greater_equal(input_x, input_y)

def eye(n, m, dtype):
return legacy.eye(n, m, dtype)

def argmax(input, axis, keep_dims):
return legacy.arg_max_with_value(input, axis, keep_dims)[0]

def argmin(input, axis, keep_dims):
return legacy.arg_min_with_value(input, axis, keep_dims)[0]

def exp(input):
return legacy.exp(input)

def split_with_size(tensor, split_sizes, dim=0):
chunks = []
start = 0
for chunk_size in split_sizes:
end = start + chunk_size
slice_obj = [py_slice(None)] * tensor.dim()
slice_obj[dim] = py_slice(start, end)
chunks.append(tensor[tuple(slice_obj)])
start = end

return tuple(chunks)


def cos(input):
return legacy.cos(input)

def sigmoid(input):
return legacy.sigmoid(input)

def sqrt(input):
return legacy.sqrt(input)

def chunk(input, chunks, dim=0):
return legacy.split(input, dim, chunks)

def sin(input):
return legacy.sin(input)

def neg(input):
return legacy.neg(input)

def bitwise_or_tensor(input_x, input_y):
return legacy.bitwise_or(input_x, input_y)

def bitwise_and_tensor(input_x, input_y):
return legacy.bitwise_and(input_x, input_y)

def non_zero_ext(input):
out = legacy.non_zero(input)
return unbind(out, 1, out.shape[1])

def unbind(input, dim, num):
return legacy.unstack(input, dim, num)

def log1p(input):
return legacy.log1p(input)

def log_softmax(input, axis, dtype):
if dtype is not None:
input = input.astype(dtype)
return legacy.log_softmax(input, axis)

def scatter(input, dim, index, src):
return legacy.tensor_scatter_elements(input, index, src, dim, "none")

def batch_norm(input, weight, bias, running_mean=None, runnning_var=None, training=False, momentum=0.1, epsilon=1e-5):
input_ndim = input.ndim
if input_ndim == 2:
return legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
else:
input = transpose_view(input, 1, -1)
input_shape = input.shape
input = reshape(input, (-1, input.shape[-1]))
outs = legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
out = reshape(outs[0], (*input_shape[:-1], -1))
out = transpose_view(out, 1, -1)

return out, outs[1], outs[2]

def tanh(input):
return legacy.tanh(input)

def dropout(input, p, seed, offset):
return legacy.dropout(input, 1-p, 0, 0)

def split_tensor(input, split_size_or_sections, dim):
if isinstance(split_size_or_sections, int):
num = input.shape[dim] // split_size_or_sections
return legacy.split(input, dim, num)

def bmm(input_x, input_y):
return legacy.batch_mat_mul(input_x, input_y, False, False)

def nllloss(input, target, weight, reduction, ingore_index):
return legacy.nll_loss(input, target, weight, reduction, ingore_index)

def nllloss_2d(input, target, weight, reduction, ingore_index):
input = reshape(transpose_view(input, 1, -1), (-1, input.shape[1]))
target = reshape(target, (-1,))
out = legacy.nll_loss(input, target, weight, reduction, ingore_index)
return out


def binary_cross_entropy_with_logits(input, target, weight, posWeight, reduction):
return legacy.bce_with_logits_loss(input, target, weight, posWeight, reduction)

def std(input, dim, correction, keepdim):
if dim is None:
dim = ()
return legacy.reduce_std(input, dim, bool(correction), keepdim)[0]

def linalg_vector_norm(x, ord=2, dim=None, keepdim=False, dtype=None):
return legacy.lp_norm(x, dim, int(ord), keepdim, 1e-12)

def rfft(input, n=None, dim=-1, norm=None):
if input.shape[dim] < n:
pad_inf = (0, n - input.shape[dim])
pad_dims = (0, 0) * (input.ndim - (dim + 1)) + pad_inf
input = constant_pad_nd(input, pad_dims, 0.)
else:
input = narrow(input, dim, 0, n)
return legacy.fft_with_size(input, input.ndim, False, True, norm, True, ())

def narrow(input, dim, start, length):
begin = [0] * input.ndim
size = [i for i in input.shape]
begin[dim] = start
size[dim] = length
return legacy.slice(input, begin, size)

def conj(input):
return legacy.conj(input)

def irfft(input, n, dim, norm):
if input.shape[dim] < n:
pad_inf = (0, n - input.shape[dim])
pad_dims = (0, 0) * (input.ndim - (dim + 1)) + pad_inf
input = constant_pad_nd(input, pad_dims, 0.)
else:
input = narrow(input, dim, 0, n)
return legacy.fft_with_size(input, input.ndim, True, True, norm, True, ())

def avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
if isinstance(padding, int):
padding = (0, 0, 0, 0, padding, padding)
elif isinstance(padding, tuple):
if len(padding) != 1:
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
padding = (0, 0, 0, 0, padding[0], padding[1])
else:
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")

if isinstance(stride, tuple):
if len(stride) != 1:
raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
stride = stride[0]

input = expand_dims(input, 2)
input = expand_dims(input, 2)
input = legacy.avg_pool3_d(input, (1, 1, kernel_size), (1, 1, stride), 'pad', padding, ceil_mode, count_include_pad, 0, 'NCDHW')
input = squeeze(input, (2, 3))
return input

def fmod_scalar(input, other):
return legacy.floor_mod(input, other)

def conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, tuple):
pad = (0, 0, padding[0], padding[0])
elif isinstance(padding, int):
pad = (0, 0) + (padding,) * 2
if not isinstance(padding, (int, tuple)):
pad_mode = padding
pad = (0,) * 4

input = expand_dims(input, 2)
weight = expand_dims(weight, 2)

output = legacy.conv2_d(
input, weight,
weight.shape[0],
(1, weight.shape[-1]),
1,#mode=1,
pad_mode, #pad_mode=pad_mode,
pad, #pad=pad,
(1, stride) if isinstance(stride, int) else (1, *stride), #stride=tuple(stride),
(1, dilation) if isinstance(dilation, int) else (1, *dilation), #dilation=dilation,
groups, #group=groups,
"NCHW", #data_format="NCHW"
)


if bias is not None:
output = legacy.bias_add(output, bias, "NCHW")

output = squeeze(output, 2)
return output

def maximum(input, other):
return legacy.maximum(input, other)

def prod(input, axis, keepdims, dtype):
if axis is None:
axis = ()
return legacy.reduce_prod(input, axis, keepdims)

def mse_loss(input, target, reduction):
x = square(input - target)
average_flag = True
reduce_flag = True
if reduction == 'sum':
average_flag = False
if reduction == 'none':
reduce_flag = False

if reduce_flag and average_flag:
x = mean(x, tuple(range(x.ndim)), False, None)

if reduce_flag and not average_flag:
x = sum(x, tuple(range(x.ndim)), False, None)

return x

def adaptive_avg_pool2d(input, output_size):
return legacy.adaptive_avg_pool2_d(input, output_size)

def avg_pool2d(input, kernel_size, stride, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None):
if isinstance(padding, int):
padding = (0, 0, padding, padding, padding, padding)
elif isinstance(padding, tuple):
if len(padding) != 1:
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
padding = (0, 0, padding[0], padding[1], padding[2], padding[3])
else:
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")

if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if isinstance(stride, int):
stride = (stride, stride)

input = expand_dims(input, 2)
input = legacy.avg_pool3_d(input, (1, *kernel_size), (1, *stride), 'pad', padding, ceil_mode, count_include_pad, 0, 'NCDHW')
input = squeeze(input, 2)
return input

def bitwise_or_scalar(input, value):
return legacy.bitwise_or(input, value)

def floor_div(input, other):
return legacy.floor_div(input, other)

def minimum(input, other):
return legacy.minimum(input, other)

def reverse_v2(input, axis):
if isinstance(axis, int):
axis = (axis,)
return legacy.reverse_v2(input, axis)

def divmod(input, other, rounding_mode):
if rounding_mode == 'floor':
return legacy.floor_div(input, other)
elif rounding_mode == 'trunc':
if isinstance(input, numbers.Number):
input = mindspore.Tensor(input)
return legacy.truncate_div(input, other)
else:
raise ValueError(f'Invalid rounding mode: {rounding_mode}')

def pow(input, exponent):
return legacy.pow(input, exponent)


def bitwise_and_scalar(input, value):
return legacy.bitwise_and(input, value)

def rand_like(input, generator, dtype):
return rand(input.shape, generator, dtype)

def bincount(input, weights=None, minlength=0):
if weights is None:
weights = mindspore.Tensor(1, dtype=mindspore.int32)
return legacy.bincount(cast(input, mindspore.int32),
mindspore.Tensor(minlength, dtype=mindspore.int32),
weights)

def lgamma(input):
return legacy.lgamma(input)

def _deconv_output_length(pad_mode, filter_size, stride_size, dilation_size, padding):
"""Calculate the width and height of output."""
length = 0
filter_size = filter_size + (filter_size - 1) * (dilation_size - 1)
if pad_mode == 'valid':
if filter_size - stride_size > 0:
length = filter_size - stride_size
elif pad_mode == 'pad':
length = - padding + filter_size - stride_size

return length


def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, tuple):
pad = (0, 0, padding[0], padding[0])
elif isinstance(padding, int):
pad = (0, 0) + (padding,) * 2
if not isinstance(padding, (int, tuple)):
pad_mode = padding
pad = (0,) * 4

if isinstance(dilation, int):
dilation = (dilation, dilation)

in_channel, out_channels = weight.shape[0], weight.shape[1] * groups
kernel_size = weight.shape[2:]

n, _, h, w = input.shape
h_add = _deconv_output_length(pad_mode, kernel_size[0], stride[0], dilation[0], pad[0] + pad[1])
w_add = _deconv_output_length(pad_mode, kernel_size[1], stride[1], dilation[1], pad[2] + pad[3])

out = legacy.conv2_d_transpose(
input, weight,
(n, out_channels, h * stride[0] + h_add, w * stride[1] + w_add),
out_channels,
kernel_size,
pad_mode,
pad,
None,
1,
stride,
dilation,
groups,
'NCHW'
)
if bias is not None:
out = legacy.bias_add(out, bias, 'NCHW')
return out

def expm1(x):
return legacy.expm1(x)

py_min = min
def min(input):
return legacy.reduce_min(input, (), False)

def acos(x):
return legacy.a_cos(x)

def upsample_bilinear2d(input, size=None, scale_factor=None, align_corners=False):
return legacy.resize_bilinear_v2(input, size, align_corners, not align_corners)

def unstack_view(input, dim):
return legacy.unstack(input, dim, input.shape[dim])

def triu(input, diagonal=0):
return legacy.triu(input, diagonal)

def masked_scatter(input, mask, value):
return legacy.masked_scatter(input, mask, value)

def max_pool2d(input, kernel_size, stride=1, padding=0, dilation=1, ceil_mode=False, return_indices=False):
out, indices = legacy.max_pool_with_argmax_v2(input, kernel_size, stride, padding, dilation, ceil_mode, mindspore.int64)

if return_indices:
return out, indices
return out

def baddbmm(input, batch1, batch2, alpha=1, beta=1):
return add(mul(beta, input), mul(alpha, bmm(batch1, batch2)))

def softplus(input, beta=1, threshold=20):
return legacy.softplus(input)

def gather_nd(input, indices):
return legacy.gather_nd(input, indices)

def unique_consecutive(input, return_inverse, return_counts, dim):
return legacy.unique_consecutive(input, return_inverse, return_counts, dim)

def meshgrid(input, lambd):
return legacy.meshgrid(input, lambd)

def addcmul(input, tensor1, tensor2, value=1.0):
return legacy.addcmul(input, tensor1, tensor2, mindspore.Tensor(value))

def addmm(input, mat1, mat2, alpha=1.0, beta=1.0):
return add(mul(beta, input), mul(alpha, bmm(mat1, mat2)))

def im2col(input, kernel_size, dilation=1, padding=0, stride=1):
out = legacy.im2_col(input, kernel_size, stride, dilation, padding)
out_shape = out.shape[:1] + (-1,) + out.shape[-1:]
out = reshape(out, out_shape)
return out

def floor(input):
return legacy.floor(input)

def upsample_nearest2d(input, output_size, scale_factors):
if output_size is None:
tuple_len = py_min(len(input.shape) - 2, len(scale_factors))
output_size = tuple([math.floor(input.shape[i + 2] * scale_factors[i])
for i in range(tuple_len)])

return legacy.resize_nearest_neighbor(input, output_size, False, False)

def upsample_bicubic2d(input, size=None, scale_factor=None, align_corners=False):
return legacy.resize_bicubic(input, size, align_corners, not align_corners)

def conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
pad_mode = 'pad'
pad = padding
if isinstance(padding, (tuple, list)):
pad = (padding[0], padding[0], padding[1], padding[1], padding[2], padding[2])
elif isinstance(padding, int):
pad = (padding,) * 6
if not isinstance(padding, (int, tuple, list)):
pad_mode = padding
pad = (0,) * 6

out_channels = weight.shape[0]
kernel_size = weight.shape[2:]

output = legacy.conv3_d(input, weight,
out_channels,
kernel_size,
1,
pad_mode,
pad,
tuple(stride),
dilation,
groups,
"NCDHW")
if bias is not None:
output = legacy.bias_add(output, bias, 'NCHW')
return output


return legacy.conv3_d(input, weight, bias, stride, padding, dilation, groups)

def inplace_relu(input):
return legacy.assign(input, legacy.re_lu(input))

def adaptive_avg_pool1d(input, output_size):
x_in_shape = input.shape
width = x_in_shape[2]
stride = width // output_size
kernel_size = width - (output_size - 1) * stride
stride = (1, width // output_size)
kernel_size = (1, kernel_size)
input = expand_dims(input, 2)
input = legacy.avg_pool(input, kernel_size, stride, "VALID", "NCHW")
input = squeeze(input, 2)
return input

def remainder_tensor_scalar(input, other):
out = sub(input, mul(floor_div(input, other), other), 1)
return out

def outer(input, other):
input = reshape(input, (-1, 1))
y = mul(input, other)
return y

def view_as_complex(input):
real_part, imag_part = chunk(input, 2, -1)
return legacy.complex(squeeze(real_part, -1), squeeze(imag_part, -1))

def cdist(x1, x2, p):
return legacy.cdist(x1, x2, float(p))

def prelu(input, weight):
return legacy.p_re_lu(input, weight)

def reciprocal(input):
return legacy.reciprocal(input)

def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity):
loss, log_alpha = legacy.ctc_loss_v2(log_probs, targets, input_lengths, target_lengths, blank, 'none', zero_infinity)
if reduction == 'sum':
loss = sum(loss, (), False, None)
if reduction == 'mean':
# input_type = loss.dtype
# target_length_t = target_lengths.clip(1., None)
# loss = loss.astype("float32")
loss = div(loss, target_lengths)
loss = mean(loss, (), False, None)
# loss = loss.astype(input_type)
return (loss, log_alpha)

def glu(input, dim=-1):
return legacy.glu(input, dim)

def one_hot(tensor, num_classes):
on_value = mindspore.Tensor(1, dtype=tensor.dtype)
off_value = mindspore.Tensor(0, dtype=tensor.dtype)
return legacy.one_hot(tensor, num_classes, on_value, off_value, -1)

def polar(abs, angle):
return legacy.polar(abs, angle)

def scatter_value(input, dim, index, src, reduce='none'):
if isinstance(src, numbers.Number):
src = fill_scalar(index.shape, src, dtype=input.dtype)
return legacy.tensor_scatter_elements(input, index, src, dim, reduce)

def pixel_shuffle(input, upscale_factor):
idx = input.shape
length = input.ndim
pre = idx[:-3]
c, h, w = idx[-3:]
c = c // upscale_factor ** 2
input_perm = pre + (c, upscale_factor, upscale_factor, h, w)
input = reshape(input, input_perm)
input_perm = [i for i in range(length - 2)]
input_perm = input_perm + [length, length - 2, length + 1, length - 1]
input_perm = tuple(input_perm)
input = permute(input, input_perm)
input = reshape(input, (pre + (c, upscale_factor * h, upscale_factor * w)))
return input

def rms_norm(input, weight, eps=1e-5):
input_dtype = input.dtype
input = cast(input, mindspore.float32)
variance = mean(pow(input, 2), -1, True, None)
input = mul(input, rsqrt(add(variance, eps, 1)))
return mul(weight, cast(input, input_dtype))

def count_nonzero(input, dims):
return legacy.count_non_zero(input, dims)

def index_add_ext(input, dim, index, source, alpha):
if alpha != 1:
source = mul(alpha, source)
return legacy.index_add(input, cast(index, mindspore.int32), source, dim, True, True)

def real(input):
return legacy.real(input)

def upsample_linear1d(input, output_size, scale_factor, align_corners=False):
coordinate_transformation_mode = "align_corners" if align_corners else "half_pixel"
return legacy.resize_linear1_d(input, output_size, coordinate_transformation_mode)

def imag(input):
return legacy.imag(input)

def bitwise_xor_tensor(input, other):
return legacy.bitwise_xor(input, other)

def grid_sampler_2d(input, grid, mode='bilinear', padding_mode='zeros', align_corners=False):
return legacy.grid_sampler2_d(input, grid, mode, padding_mode, align_corners)

def l1_loss(input, target, reduction='mean'):
loss = abs(sub(input, target))
if reduction == 'mean':
return mean(loss, (), False, False)
elif reduction == 'sum':
return sum(loss, (), False, False)
return loss

def leaky_relu(input, negative_slope):
select_op = maximum
if negative_slope > 1:
select_op = minimum
return select_op(mul(negative_slope, input), input)

def ceil(input):
return legacy.ceil(input)

def reduce_max(input, axis, keepdims):
return legacy.reduce_max(input, axis, keepdims)

def nan_to_num(input, nan=0.0, posinf=None, neginf=None):
return legacy.nan_to_num(input, nan, posinf, neginf)

def elu(input, alpha):
return legacy.elu(input, alpha)

def sign(input):
return legacy.sign(input)

def inplace_fill_diagonal(input, fill_value, wrap):
inplace_copy(input, legacy.fill_diagonal(input, float(fill_value), wrap))
return input

def clamp_tensor(value, min_value, max_value):
if min_value is not None:
value = legacy.maximum(value, min_value)
if max_value is not None:
value = legacy.minimum(value, max_value)
return value

def lstm(input, h, c, w, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size):
return legacy.lstm(input, h, c, w, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size)

def var(input, dim=None, correction=1, keepdim=False):
if dim is None:
input_mean = mean(input, (), False, None)
else:
input_mean = mean(input, dim=dim, keepdim=True, dtype=None)
# 计算与均值的平方差
squared_diff = pow(sub(input, input_mean, 1), 2)
# 计算方差
if dim is None:
variance = mean(squared_diff, (), False, None)
n = input.numel() # 总元素个数
else:
variance = mean(squared_diff, dim=dim, keepdim=keepdim, dtype=None)
n = input.size(dim) # 指定维度的元素个数
# 无偏估计校正
if correction and n > 1:
variance = mul(variance, (n / (n - 1)))
return variance

def log2(input):
return div(log(input), math.log(2))

def bucketize(input, boundaries, right=False):
epsilon_ = 0. if right else 1.e-6
boundaries = [boundary + epsilon_ for boundary in boundaries]
return legacy.bucketize(input, boundaries)

def col2im(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
return legacy.col2_im(input, output_size, kernel_size, dilation, padding, stride)

def randperm(n, generator, dtype):
seed, offset = generator._step(12) # pylint: disable=protected-access
return legacy.randperm_v2(n, seed, offset, dtype)

def logical_or(input_x, input_y):
return legacy.logical_or(input_x, input_y)

def hswish(input):
return legacy.h_swish(input)

def logical_and(input_x, input_y):
return legacy.logical_and(input_x, input_y)

def logsigmoid(input):
output = sigmoid(input)
ret = log(output)
return ret

def dropout2d(input_x, p):
return legacy.dropout2_d(input_x, p)

def linalg_qr(input_x, mode):
full_matrices = 'mode' == 'complete'
return legacy.qr(input_x, full_matrices)

def logit(input, eps=1e-5):
return legacy.logit(input, eps)

def relu6(input):
return legacy.re_lu6(input)

def logsumexp(input, dim, keepdim=False):
input_max = legacy.reduce_max(input, dim, keepdim)
input_exp = exp(sub(input, input_max))
input_sumexp = sum(input_exp, dim, keepdim, None)
input_logsumexp = log(input_sumexp)
if not keepdim:
input_max = squeeze(input_max, dim)
return add(input_logsumexp, input_max)

def bernoulli(input, generator):
return legacy.bernoulli(input, seed, offset)

def arange(start, end, step, dtype):
return legacy.range(start, end, step, 100000)

def inplace_fill_scalar(input, value):
input.assign_value(fill_scalar(input.shape, value, input.dtype))
return input

def inplace_normal(input, mean, std, generator):
out = legacy.standard_normal(input.shape, 0, 0)
value = add(mul(out, std), mean)
return input.assign_value(value)

def inplace_uniform(input, from_, to_, generator_):
if input.dtype.is_floating_point:
uniform_real = legacy.uniform_real(tuple(input.shape), 0, 0)
value = add(mul(uniform_real, sub(to_, from_)), from_)
else:
value = legacy.uniform_int(input.shape,
mindspore.tensor(from_, dtype=mindspore.int32),
mindspore.tensor(to_, dtype=mindspore.int32), 0, 0)
return input.assign_value(value)

+ 379
- 0
mindnlp/core/_apis/meta.py View File

@@ -0,0 +1,379 @@
try:
from mindspore._c_expression import TensorPy as Tensor_
except:
from mindspore._c_expression import Tensor as Tensor_

import math
import numpy as np
from mindnlp import core

__all__ = []

def arange(start, end, step, dtype):
out = Tensor_(shape=(math.ceil((end - start) / step), ), dtype=dtype)
return core.Tensor(out)

__all__.append('arange')

def broadcast_to(input, shape):
out_shape = ()
input_shape = input.shape
if len(input_shape) != shape:
input_shape = (1,) + input_shape
for idx, s in enumerate(shape):
if s == -1:
s = input_shape[idx]
out_shape += (s,)

out = Tensor_(shape=out_shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('broadcast_to')

def zeros(size, dtype):
out = Tensor_(shape=size, dtype=dtype)
return core.Tensor(out)

__all__.append('zeros')

def ones(size, dtype):
out = Tensor_(shape=size, dtype=dtype)
return core.Tensor(out)

__all__.append('ones')

def inplace_uniform(input, *args):
return input

__all__.append('inplace_uniform')

def inplace_fill_scalar(input, value):
return input

__all__.append('inplace_fill_scalar')

def inplace_normal(input, *args):
return input

__all__.append('inplace_normal')

def getitem(input, slice):
out = input.asnumpy()[slice]
out = Tensor_(shape=out.shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('getitem')

def sub(input, other, alpha):
if isinstance(input, core.Tensor):
return input
return other

__all__.append('sub')

def pad_v3(input, pad, mode, value):
out = np.pad(input.asnumpy(), pad, mode, constant_values=value)
out = Tensor_(shape=out.shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('pad_v3')

def abs(input):
return input

__all__.append('abs')

def cast(input, dtype):
out = Tensor_(shape=input.shape, dtype=dtype)
return core.Tensor(out)

__all__.append('cast')

def index_select(input, dim, index):
out = np.take(input.asnumpy(), index.asnumpy(), dim)
out = Tensor_(shape=out.shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('index_select')

def identity(input):
out = Tensor_(shape=input.shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('identity')

def contiguous(input):
return input

__all__.append('contiguous')

def inplace_copy(input, other):
return input

__all__.append('inplace_copy')

def div(input, other):
if isinstance(input, core.Tensor):
shape = input.shape
dtype = input.dtype
else:
shape = other.shape
dtype = other.dtype
out = Tensor_(shape=shape, dtype=dtype)
return core.Tensor(out)

__all__.append('div')

def pow_scalar_tensor(input, other):
out = Tensor_(shape=other.shape, dtype=other.dtype)
return core.Tensor(out)

__all__.append('pow_scalar_tensor')

def concat(tensors, dim):
shape = list(tensors[0].shape)
shape[dim] = sum([t.shape[dim] for t in tensors])
out = Tensor_(shape=tuple(shape), dtype=tensors[0].dtype)
return core.Tensor(out)

__all__.append('concat')

def tril(input, k):
return input

__all__.append('tril')

def reshape(input, shape):
out = Tensor_(shape=tuple(shape), dtype=input.dtype)
return core.Tensor(out)

__all__.append('reshape')

def linalg_vector_norm(input, p, dim, keepdim, dtype):
input_shape = list(input.shape)
if isinstance(dim, int):
dim = (dim,)
for d in dim:
input_shape[d] = 1 if keepdim else 0
new_shape = []
for s in input_shape:
if s != 0:
new_shape.append(s)
if dtype is None:
dtype = input.dtype
out = Tensor_(shape=tuple(new_shape), dtype=dtype)
return core.Tensor(out)

__all__.append('linalg_vector_norm')

def erfinv(input):
return input
__all__.append('erfinv')


def stop_gradient(input):
out = Tensor_(shape=input.shape, dtype=input.dtype)
return core.Tensor(out)

__all__.append('stop_gradient')

def log(input):
return input
__all__.append('log')

def mul(input, other):
out = Tensor_(shape=input.shape, dtype=input.dtype)
return core.Tensor(out)
__all__.append('mul')

def randn(size, generator, dtype):
out = Tensor_(shape=size, dtype=dtype)
return core.Tensor(out)

__all__.append('randn')

def zeros_like(input, *args, **kwargs):
out = Tensor_(shape=input.shape, dtype=input.dtype)
return core.Tensor(out)
__all__.append('zeros_like')

def inplace_add(input, other, alpha):
return input
__all__.append('inplace_add')

def clamp_scalar(input, *args):
return input
__all__.append('clamp_scalar')

def expand_dims(input, dim):
input_shape = list(input.shape)
input_shape.insert(dim, 1)

out = Tensor_(shape=tuple(input_shape), dtype=input.dtype)
return core.Tensor(out)


def floor_div(input, other):
return input
__all__.append('floor_div')

def sin(input):
return input

__all__.append('sin')

def cos(input):
return input

__all__.append('cos')

def triu(input, diagonal):
return input

__all__.append('triu')

def fill_scalar(size, fill_value, dtype):
if dtype is None:
dtype = core.get_default_dtype()
out = Tensor_(shape=size, dtype=dtype)
return core.Tensor(out)

__all__.append('fill_scalar')

def sqrt(input):
return input

__all__.append('sqrt')

def normal_float_float(mean, std, size, geneartor):
out = Tensor_(shape=size, dtype=core.float32)
return core.Tensor(out)


__all__.append('normal_float_float')

def stack(tensors, dim):
x_shape = list(tensors[0].shape)
x_shape.insert(dim, len(tensors))
out = Tensor_(shape=tuple(x_shape), dtype=tensors[0].dtype)
return core.Tensor(out)

__all__.append('stack')

def argmax_with_value(input, dim, keepdim):
out_shape = list(input.shape)
if keepdim:
out_shape[dim] = 1
else:
out_shape.pop(dim)

indices = Tensor_(shape=out_shape, dtype=core.int64)
values = Tensor_(shape=out_shape, dtype=input.dtype)

return core.Tensor(indices), core.Tensor(values)

__all__.append('argmax_with_value')

def tile(input, dims):
input_shape = input.shape
out_shape = [input_shape[i] * dims[i] for i in range(input.ndim)]
out = Tensor_(shape=tuple(out_shape), dtype=input.dtype)
return core.Tensor(out)

__all__.append('tile')

def flatten(input, start_dim, end_dim):
input_shape = list(input.shape)
if start_dim < 0:
start_dim = start_dim + input.ndim
if end_dim < 0:
end_dim = end_dim + input.ndim

flatten_shape = input_shape[:start_dim] + input_shape[start_dim:end_dim+1] + input_shape[end_dim+1:]
out = Tensor_(shape=tuple(flatten_shape), dtype=input.dtype)
return core.Tensor(out)

__all__.append('flatten')

def cumsum(input, dim, dtype):
return input

__all__.append('cumsum')

def squeeze(input, dim):
input_shape = list(input.shape)
if isinstance(dim, int):
dim = (dim,)
new_shape = ()
for idx, s in enumerate(input_shape):
if idx not in dim and s != 1:
new_shape += (s,)

out = Tensor_(shape=tuple(new_shape), dtype=input.dtype)
return core.Tensor(out)

__all__.append('squeeze')

def exp(input):
return input

__all__.append('exp')

def rand(size, generator, dtype):
out = Tensor_(shape=size, dtype=dtype)
return core.Tensor(out)

__all__.append('rand')

def add(input, other, alpha):
return input

__all__.append('add')

def neg(input):
return input

__all__.append('neg')

def expm1(input):
return input

__all__.append('expm1')

def reverse_v2(input, dims):
return input

__all__.append('reverse_v2')

def rsqrt(input):
return input

__all__.append('rsqrt')

def bitwise_xor_tensor(input, other):
return input

__all__.append('bitwise_xor_tensor')

def divmod(input, other, rounding_mode):
if isinstance(input, core.Tensor):
return input
return other

__all__.append('divmod')

def greater_equal(input, other):
if isinstance(input, core.Tensor):
return input
return other

__all__.append('greater_equal')

def inplace_zero(input):
return input

def clone(input):
return input


+ 1596
- 0
mindnlp/core/_apis/npu.py View File

@@ -0,0 +1,1596 @@
import mindspore
from mindspore._c_expression import _empty_instance
from ..configs import use_pyboost, ON_A1, ON_ORANGE_PI
from .._op_prim.ascend import legacy, pyboost


def empty(*args, **kwargs):
return _empty_instance(*args, **kwargs, device='Ascend')

def reshape(x, shape):
"""
Reshape the input tensor to the given shape.

Args:
x (Tensor): The input tensor.
shape (tuple): The target shape.

Returns:
Tensor: The reshaped tensor.
"""
if use_pyboost():
return pyboost.reshape_op(x, shape)
else:
return legacy.reshape(x, shape)

def contiguous(x):
"""
Returns a contiguous tensor containing the same data as the input tensor.

Args:
x (Tensor): The input tensor.

Returns:
Tensor: The contiguous tensor.
"""
if use_pyboost:
return pyboost.contiguous_op(x)
else:
return x

def select_ext_view(input, dim, index):
"""
Selects a slice from the input tensor along the specified dimension.

Args:
input (Tensor): The input tensor.
dim (int): The dimension along which to select the slice.
index (int): The index of the slice to select.

Returns:
Tensor: The selected slice.
"""
if use_pyboost():
return pyboost.select_ext_view_op(input, dim, index)
else:
return legacy.select_view(input, index, dim)

def inplace_copy(self, value):
"""
Copies the data from the given tensor to the current tensor.

Args:
value (Tensor): The tensor from which to copy the data.
"""
if use_pyboost:
return pyboost.inplace_copy_op(self, value)
else:
self.assign_value(value)
return self

def slice(input, dim, start, end, step):
"""
Slices the input tensor along the specified dimension.

Args:
input (Tensor): The input tensor.
dim (int): The dimension along which to slice.
start (int): The starting index of the slice.
end (int): The ending index of the slice.
step (int): The step size of the slice.

Returns:
Tensor: The sliced tensor.
"""
if use_pyboost():
return pyboost.slice_ext_op(input, dim, start, end, step)
else:
return legacy.slice(input, dim, start, end, step)

def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
"""
Applies embedding to the input tensor.

Args:
input (Tensor): The input tensor.
weight (Tensor): The embedding weight tensor.
padding_idx (int): The index of the padding element.
max_norm (float): The maximum norm of the embedding vectors.
norm_type (float): The p-norm to use for normalization.
scale_grad_by_freq (bool): Whether to scale the gradient by frequency.
sparse (bool): Whether to use sparse gradients.

Returns:
Tensor: The embedded tensor.
"""
return pyboost.embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)

def add(input, other, alpha): # pylint: disable=unused-argument
"""
Adds two tensors element-wise.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.
alpha (float): The scaling factor for the other tensor.

Returns:
Tensor: The result of the addition.
"""
if use_pyboost():
return pyboost.add_ext_op(input, other, alpha)
if alpha == 1.0:
return legacy.add(input, other)
return legacy.add(input, legacy.mul(other, alpha))

def layer_norm(input, normalized_shape, weight, bias, eps=1e-5):
"""
Applies layer normalization to the input tensor.

Args:
input (Tensor): The input tensor.
normalized_shape (tuple): The shape of the input tensor to be normalized.
weight (Tensor): The weight tensor.
bias (Tensor): The bias tensor.
eps (float): The epsilon value for numerical stability.

Returns:
Tensor: The normalized tensor.
"""
if use_pyboost():
return pyboost.layer_norm_ext_op(input, normalized_shape, weight, bias, eps)
if weight is not None:
begin_axis = input.ndim - weight.ndim
else:
begin_axis = -1
return legacy.layer_norm(input, weight, bias, begin_axis, begin_axis, eps)

def expand_dims(input, axis):
"""
Adds an extra dimension to the input tensor.

Args:
input (Tensor): The input tensor.
axis (int): The axis along which to add the dimension.

Returns:
Tensor: The expanded tensor.
"""
if use_pyboost():
return pyboost.expand_dims_op(input, axis)
return legacy.expand_dims(input, axis)

def cast(input, dtype):
"""
Casts the input tensor to the specified data type.

Args:
input (Tensor): The input tensor.
dtype (str): The target data type.

Returns:
Tensor: The casted tensor.
"""
return legacy.cast(input, dtype)

def sub(input, other, alpha):
"""
Subtracts the other tensor from the input tensor.

Args:
input (Tensor): The input tensor.
other (Tensor): The tensor to subtract.
alpha (float): The scale factor for the other tensor.

Returns:
Tensor: The result of the subtraction.
"""
if use_pyboost():
return pyboost.sub_ext_op(input, other, alpha)
return legacy.sub(input, legacy.mul(other, alpha))

def mul(input, other):
"""
Multiplies the input tensor with the other tensor.

Args:
input (Tensor): The input tensor.
other (Tensor): The tensor to multiply.

Returns:
Tensor: The result of the multiplication.
"""
if use_pyboost():
return pyboost.mul_op(input, other)
return legacy.mul(input, other)

def dense(input, weight, bias=None):
"""
Performs a dense (fully connected) operation.

Args:
input (Tensor): The input tensor.
weight (Tensor): The weight tensor.
bias (Tensor, optional): The bias tensor. Defaults to None.

Returns:
Tensor: The result of the dense operation.
"""
if use_pyboost():
return pyboost.dense_op(input, weight, bias)
return legacy.dense(input, weight, bias)

def transpose_view(input, dim0, dim1):
"""
Transposes the input tensor along the specified dimensions.

Args:
input (Tensor): The input tensor.
dim0 (int): The first dimension to transpose.
dim1 (int): The second dimension to transpose.

Returns:
Tensor: The transposed tensor.
"""
if use_pyboost():
return pyboost.transpose_ext_view_op(input, dim0, dim1)
ranks = list(range(input.ndim))
rank0 = ranks[dim0]
rank1 = ranks[dim1]
ranks[dim0] = rank1
ranks[dim1] = rank0
return legacy.transpose(input, ranks)

def matmul(input, other):
"""
Performs a matrix multiplication of the input tensor with another tensor.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.

Returns:
Tensor: The result of the matrix multiplication.
"""
if use_pyboost():
return pyboost.matmul_ext_op(input, other)
return legacy.mat_mul(input, other)

def div(input, other):
"""
Divides the input tensor by another tensor.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.

Returns:
Tensor: The result of the division.
"""
if use_pyboost():
return pyboost.div_op(input, other)
return legacy.div(input, other)

def divmod(input, other, rounding_mode):
"""
Divides the input tensor by another tensor and returns both the quotient and the remainder.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.
rounding_mode (str): The rounding mode to use.

Returns:
Tuple[Tensor, Tensor]: The quotient and the remainder.
"""
if use_pyboost():
return pyboost.divmod_op(input, other, rounding_mode)
if rounding_mode == 'floor':
return legacy.floor_div(input, other)
elif rounding_mode == 'trunc':
return legacy.truncate_div(input, other)
else:
raise ValueError(f'Invalid rounding mode: {rounding_mode}')

def softmax(input, axis=-1):
"""
Computes the softmax of the input tensor along the specified axis.

Args:
input (Tensor): The input tensor.
axis (int): The axis along which to compute the softmax.

Returns:
Tensor: The softmax of the input tensor.
"""
if use_pyboost():
return pyboost.softmax_impl(input, axis)
return legacy.softmax(input, axis)

def permute(input, axes=None):
"""
Transposes the dimensions of the input tensor according to the specified axes.

Args:
input (Tensor): The input tensor.
axes (Tuple[int]): The axes to transpose.

Returns:
Tensor: The transposed tensor.
"""
if use_pyboost():
return pyboost.transpose_view_op(input, axes)
return legacy.transpose(input, axes)

def gelu(input, approximate):
"""
Computes the Gaussian Error Linear Unit (GELU) activation function.

Args:
input (Tensor): The input tensor.

Returns:
Tensor: The GELU activation of the input tensor.
"""
if use_pyboost():
return pyboost.gelu_ext_op(input, approximate)
return legacy.ge_lu(input)

def tanh(input):
"""
Computes the hyperbolic tangent of the input tensor.

Args:
input (Tensor): The input tensor.

Returns:
Tensor: The hyperbolic tangent of the input tensor.
"""
if use_pyboost():
return pyboost.tanh_op(input)
return legacy.tanh(input)

def broadcast_to(input, shape):
"""
Broadcasts the input tensor to the specified shape.

Args:
input (Tensor): The input tensor.
shape (Tuple[int]): The shape to broadcast to.

Returns:
Tensor: The broadcasted tensor.
"""
if use_pyboost():
return pyboost.broadcast_to_view_op(input, shape)
return legacy.broadcast_to(input, shape)

def split_tensor(tensor, split_size_or_sections, dim):
"""
Splits a tensor into multiple sub-tensors.

Args:
tensor (Tensor): The input tensor.
split_size_or_sections (Union[int, Tuple[int]]): The size or number of sections to split the tensor into.
dim (int): The dimension along which to split the tensor.

Returns:
List[Tensor]: The list of split sub-tensors.
"""
if use_pyboost():
return pyboost.split_tensor_op(tensor, split_size_or_sections, dim)
return legacy.split(tensor, split_size_or_sections, dim)

def squeeze(input, dim):
"""
Removes dimensions of size 1 from the shape of the input tensor.

Args:
input (Tensor): The input tensor.
dim (Union[int, Tuple[int]]): The dimensions to squeeze.

Returns:
Tensor: The squeezed tensor.
"""
if use_pyboost():
return pyboost.squeeze_impl(input, dim)
return legacy.squeeze(input, dim)

def zeros(shape, dtype):
"""
Returns a tensor filled with zeros.

Args:
shape (Union[int, Tuple[int]]): The shape of the tensor.
dtype (str): The data type of the tensor.

Returns:
Tensor: The tensor filled with zeros.
"""
return legacy.zeros(shape, dtype)

def equal(input, other):
"""
Returns a tensor with boolean values, indicating element-wise equality.

Args:
input (Tensor): The input tensor.
other (Tensor): The tensor to compare with.

Returns:
Tensor: The tensor with boolean values.
"""
if use_pyboost():
return pyboost.equal_ext_op(input, other)
return legacy.equal(input, other).all()

def eq(input, other):
"""
Returns a tensor with boolean values, indicating element-wise equality.

Args:
input (Tensor): The input tensor.
other (Tensor): The tensor to compare with.

Returns:
Tensor: The tensor with boolean values.
"""
if use_pyboost():
return pyboost.equal_op(input, other)
return legacy.equal(input, other)


def sum(input, dim, keepdim, dtype):
"""
Returns the sum of elements over a specified dimension.

Args:
input (Tensor): The input tensor.
dim (Union[int, Tuple[int]]): The dimensions to sum over.
keepdim (bool): Whether to keep the dimensions of size one.

Returns:
Tensor: The tensor with summed elements.
"""
if use_pyboost():
return pyboost.sum_ext_op(input, dim, keepdim, dtype)
return legacy.reduce_sum(input.astype(dtype), dim, keepdim)

def dropout(input, p, seed, offset):
"""
Returns a tensor with dropout applied element-wise.

Args:
input (Tensor): The input tensor.
p (float): The dropout probability.
seed (int): The random seed.

Returns:
Tensor: The tensor with dropout applied.
"""
if use_pyboost():
return pyboost.dropout_ext_op(input, p, seed, offset)
return legacy.dropout(input, 1-p, 0, 0)

def clone(input):
"""
Returns a copy of the input tensor.

Args:
input (Tensor): The input tensor.

Returns:
Tensor: The copied tensor.
"""
if use_pyboost():
return pyboost.clone_op(input)
return legacy.identity(input)

def inplace_normal(input, mean, std, generator):
"""
Returns a tensor with normal distribution applied element-wise.

Args:
input (Tensor): The input tensor.
mean (float): The mean of the normal distribution.
std (float): The standard deviation of the normal distribution.
seed (int): The random seed.

Returns:
Tensor: The tensor with normal distribution applied.
"""
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.inplace_normal_op(input, mean, std, seed, offset)
return legacy.normal(input, mean, std, 0, 0)

def reduce_all(input, dim, keepdim):
"""
Returns the sum of all elements in the tensor.

Args:
input (Tensor): The input tensor.
dim (int): The dimension to reduce.
keepdim (bool): Whether to keep the reduced dimension.

Returns:
Tensor: The tensor with the sum of all elements.
"""
if use_pyboost():
return pyboost.reduce_all_impl(input, dim, keepdim)
return legacy.reduce_all(input, dim, keepdim)

def masked_fill(input, mask, value):
"""
Fills elements of the input tensor with the specified value where the mask is True.

Args:
input (Tensor): The input tensor.
mask (Tensor): The mask tensor.
value (float): The value to fill.

Returns:
Tensor: The tensor with elements filled.
"""
if use_pyboost():
return pyboost.masked_fill_op(input, mask, value)
return legacy.masked_fill(input, mask, value)

def isin(input, test_elements, assume_unique=False, invert=False):
"""
Checks if elements of input tensor are in test_elements.

Args:
input (Tensor): The input tensor.
test_elements (Tensor): The tensor to test against.
assume_unique (bool): If True, assumes that test_elements contains unique elements.
invert (bool): If True, inverts the result.

Returns:
Tensor: The tensor with boolean values indicating whether elements are in test_elements.
"""
if use_pyboost():
return pyboost.isin(input, test_elements, assume_unique, invert)
return legacy.isin(input, test_elements, assume_unique, invert)

def pad_v3(input, new_pad, mode, value=None, contiguous=True):
if input.dtype == mindspore.bool_:
input = cast(input, mindspore.int8)
out = legacy.pad_v3(input, new_pad, int(value), mode, contiguous)
return cast(out, mindspore.bool_)
return legacy.pad_v3(input, new_pad, value, mode, contiguous)

def log_softmax(input, axis=-1, dtype=None):
"""
Computes the log softmax of the input tensor along the specified axis.

Args:
input (Tensor): The input tensor.
axis (int): The axis along which to compute the log softmax.
dtype (dtype): The data type of the output tensor.

Returns:
Tensor: The tensor with log softmax values.
"""
if use_pyboost():
return pyboost.log_softmax_impl(input, axis)
return legacy.log_softmax(input, axis)

def not_equal(input, other):
"""
Computes the element-wise comparison of two tensors for inequality.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.

Returns:
Tensor: The tensor with boolean values indicating whether elements are not equal.
"""
if use_pyboost():
return pyboost.not_equal_op(input, other)
return legacy.not_equal(input, other)

def chunk(input, chunks, dim=0):
"""
Splits a tensor into a specified number of chunks.

Args:
input (Tensor): The input tensor.
chunks (int): The number of chunks to split the tensor into.
dim (int): The dimension along which to split the tensor.

Returns:
Tensor: The tensor split into chunks.
"""
if use_pyboost():
return pyboost.chunk_op(input, chunks, dim)
return legacy.split(input, dim, chunks)

def ones(shape, dtype):
"""
Returns a tensor filled with ones.

Args:
shape (tuple): The shape of the tensor.
dtype (dtype): The data type of the tensor.

Returns:
Tensor: The tensor filled with ones.
"""
return legacy.ones(shape, dtype)

def greater(input, other):
"""
Returns a tensor with boolean values indicating whether elements in the input tensor are greater than those in the other tensor.

Args:
input (Tensor): The input tensor.
other (Tensor): The other tensor.

Returns:
Tensor: The tensor with boolean values indicating whether elements are greater.
"""
if use_pyboost():
return pyboost.greater_op(input, other)
return legacy.greater(input, other)

def randint(low, high, shape, generator, dtype):
"""
Returns a tensor filled with random integers from low (inclusive) to high (exclusive).

Args:
low (int): The lower bound of the range.
high (int): The upper bound of the range.
shape (tuple): The shape of the tensor.
dtype (dtype): The data type of the tensor.

Returns:

Tensor: The tensor filled with random integers.
"""
seed, offset = generator._step(12)

if use_pyboost():
return pyboost.randint_op(low, high, shape, seed, offset, dtype)
value = legacy.uniform_int(shape,
mindspore.tensor(low, dtype=mindspore.int32),
mindspore.tensor(high, dtype=mindspore.int32), 0, 0)
return value
def nllloss(input, target, weight, reduction, ingore_index):
if use_pyboost():
return pyboost.nllloss_impl(input, target, weight, reduction, ingore_index)
return legacy.nll_loss(input, target, weight, reduction, ingore_index)

def clamp_scalar(value, min_value, max_value):
if use_pyboost():
return pyboost.clamp_scalar_op(value, min_value, max_value)
if min_value is not None:
value = legacy.maximum(value, min_value)
if max_value is not None:
value = legacy.minimum(value, max_value)
return value

def cumsum(self, dim, dtype):
if use_pyboost():
return pyboost.cumsum_ext_op(self, dim, dtype)
return legacy.cum_sum(self, dim, False, False)

def reduce_any(input, axis, keepdims):
if use_pyboost():
return pyboost.reduce_any_impl(input, axis, keepdims)
return legacy.reduce_any(input, axis, keepdims)

def concat(tensors, axis):
if use_pyboost():
return pyboost.concat_impl(tensors, axis)
return legacy.concat(tensors, axis)

def gather_d(input, dim, index):
if use_pyboost():
return pyboost.gather_d_op(input, dim, index)
return legacy.gather_d(input, dim, index)

def greater_equal(input, other):
if use_pyboost():
return pyboost.greater_equal_op(input, other)
return legacy.greater_equal(input, other)

def less(input, other):
if use_pyboost():
return pyboost.less_op(input, other)
return legacy.less(input, other)

def less_equal(input, other):
if use_pyboost():
return pyboost.less_equal_op(input, other)
return legacy.less_equal(input, other)

def select(condition, input, other):
if use_pyboost():
return pyboost.select_op(condition, input, other)
return legacy.select(condition, input, other)

def mean(input, axis, keepdims, dtype):
if use_pyboost():
return pyboost.mean_ext_op(input, axis, keepdims, dtype)
return legacy.reduce_mean(input, axis, keepdims)

def index(input, index):
if use_pyboost():
return pyboost.index_op(input, index)
return legacy.index(input, index)

def scatter(input, dim, index, src):
if use_pyboost():
return pyboost.scatter_op(input, dim, index, src)
return legacy.tensor_scatter_elements(input, index, src, dim)

def tril(input, diagonal=0):
if use_pyboost():
return pyboost.tril_ext_op(input, diagonal)
return legacy.tril(input, diagonal)

def triu(input, diagonal=0):
if use_pyboost():
return pyboost.triu_impl(input, diagonal)
return legacy.triu(input, diagonal)

def inplace_index_put(input, indices, values, accumulate):
if use_pyboost():
return pyboost.inplace_index_put_op(input, indices, values, accumulate)
return legacy.tensor_scatter_elements(input, indices, values, accumulate)

def zeros_like(input, dtype):
if use_pyboost():
return pyboost.zeros_like_ext_op(input, dtype)
return legacy.zeros_like(input)

def ones_like(input, dtype):
if use_pyboost():
return pyboost.ones_like_ext_op(input, dtype)
return legacy.ones_like(input)

def tile(input, multiples):
return legacy.tile(input, multiples)

def arange(start, end, step, dtype):
if use_pyboost():
return pyboost.arange_op(start, end, step, dtype)
return legacy.range(start, end, step, 100000)

def fill_scalar(input, value, dtype):
if use_pyboost():
return pyboost.fill_scalar_op(input, value, dtype)
return legacy.fill(input, value)

def stop_gradient(input):
return legacy.stop_gradient(input)

def isinf(input):
if use_pyboost():
return pyboost.isinf_op(input)
return legacy.is_inf(input)

def sort(input, dim, descending, stable):
if use_pyboost():
return pyboost.sort_ext_op(input, dim, descending, stable)
return legacy.sort(input, dim, descending)

def prod(input, axis, keepdims, dtype):
if use_pyboost():
return pyboost.prod_ext_op(input, axis, keepdims, dtype)
return legacy.reduce_prod(input, axis, keepdims)

def isclose(input, other, rtol, atol, equal_nan):
if use_pyboost():
return pyboost.isclose_impl(input, other, rtol, atol, equal_nan)
return legacy.is_close(input, other, rtol, atol, equal_nan)

def argmax(input, axis, keepdims):
if use_pyboost():
return pyboost.argmax_ext_op(input, axis, keepdims)
return legacy.argmax(input, axis, keepdims)

def argmin(input, axis, keepdims):
if use_pyboost():
return pyboost.argmin_ext_op(input, axis, keepdims)
return legacy.argmin(input, axis, keepdims)


def bmm(input, other):
if use_pyboost():
return pyboost.bmm_ext_op(input, other)
return legacy.batch_mat_mul(input, other)

def topk(input, k, dim, largest, sorted):
if use_pyboost():
return pyboost.topk_ext_op(input, k, dim, largest, sorted)

if not largest:
input = -input
if dim is None or dim == input.ndim - 1:
if not largest:
res = legacy.top_k(input, k, sorted)
values, indices = -res[0], res[1]
return values, indices
return legacy.top_k(input, k, sorted)
input = transpose_view(input, dim, input.ndim - 1)
output = legacy.top_k(input, k, sorted)
values = transpose_view(output[0], dim, input.ndim - 1)
indices = transpose_view(output[1], dim, input.ndim - 1)
if not largest:
res = (-values, indices)
else:
res = (values, indices)
return res

def logical_not(input):
if use_pyboost():
return pyboost.logical_not_op(input)
return legacy.logical_not(input)

def rand(size, generator, dtype):
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.rand_ext_op(size, seed, offset, dtype)
return legacy.uniform_real(size, 0, 0)

def inplace_uniform(input, from_, to, generator):
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.uniform_ext_op(input, from_, to, seed, offset)

if input.dtype.is_floating_point:
out = legacy.uniform_real(input.shape, 0, 0)
value = legacy.add(legacy.mul(out, (legacy.sub(to, from_))), from_)
else:
value = legacy.uniform_int(input.shape,
mindspore.tensor(from_, dtype=mindspore.int32),
mindspore.tensor(to, dtype=mindspore.int32), 0, 0)
input.assign_value(legacy.cast(value, input.dtype))

def bitwise_or_tensor(input, other):
if use_pyboost():
return pyboost.bitwise_or_tensor_op(input, other)
return legacy.bitwise_or(input, other)

def bitwise_and_tensor(input, other):
if use_pyboost():
return pyboost.bitwise_and_tensor_op(input, other)
return legacy.bitwise_and(input, other)

def bitwise_or_scalar(input, other):
if use_pyboost():
return pyboost.bitwise_or_scalar_op(input, other)
return legacy.bitwise_or(input, other)


def max(input):
if use_pyboost():
return pyboost.max_op(input)
return legacy.reduce_max(input, (), False)

def stack(tensors, axis=0):
if use_pyboost():
return pyboost.stack_ext_impl(tensors, axis)
return legacy.stack(tensors, axis)

def narrow(input, dim, start, length):
if use_pyboost():
return pyboost.narrow_op(input, dim, start, length)
begin = [0] * input.ndim
size = [i for i in input.shape]
begin[dim] = start
size[dim] = length
return legacy.slice(input, begin, size)

def std(input, dim, correction, keepdim):
if use_pyboost():
return pyboost.std_op(input, dim, correction, keepdim)
return legacy.reduce_std(input, dim, keepdim)


def log(input):
if use_pyboost():
return pyboost.log_op(input)
return legacy.log(input)

def gather(input_params, input_indices, axis, batch_dim):
return legacy.gather(input_params, input_indices, axis, batch_dim)

def non_zero_ext(input):
if use_pyboost():
return pyboost.non_zero_ext_op(input)
return legacy.non_zero(input)

def binary_cross_entropy_with_logits(input, target, weight, posWeight, reduction):
if use_pyboost():
return pyboost.binary_cross_entropy_with_logits_impl(input, target, weight, posWeight, reduction)
return legacy.bce_with_logits_loss(input, target, weight, posWeight, reduction)

def rand_like(input, generator, dtype):
seed, offset = generator._step(123)
if use_pyboost():
return pyboost.rand_like_ext_op(input, seed, offset, dtype)
return rand(input.shape, dtype)

def floor_div(input, other):
if use_pyboost():
return pyboost.floor_div_op(input, other)
return legacy.floor_div(input, other)

def inplace_fill_scalar(input, value):
if use_pyboost():
return pyboost.inplace_fill_scalar_op(input, value)
input.assign_value(fill_scalar(input.shape, value, input.dtype))
return input

def linalg_vector_norm(x, ord=2, dim=None, keepdim=False, dtype=None):
if use_pyboost():
return pyboost.linalg_vector_norm_op(x, ord, dim, keepdim, dtype)

def non_zero(input):
if use_pyboost():
return pyboost.non_zero_op(input)
return legacy.non_zero(input)

def fmod_scalar(input, other):
if use_pyboost():
return pyboost.fmod_scalar_op(input, other)
return legacy.floor_mod(input, other)

def inplace_zero(input):
if use_pyboost():
return pyboost.inplace_zero_op(input)
input.assign_value(zeros(input.shape, input.dtype))
return input

def mse_loss(input, target, reduction):
if use_pyboost():
return pyboost.mse_loss_ext_op(input, target, reduction)

def abs(input):
if use_pyboost():
return pyboost.abs_op(input)
return legacy.abs(input)

def bincount(input, weights=None, minlength=0):
if use_pyboost():
return pyboost.bincount_ext_op(input, weights, minlength)
return legacy.bincount(input, minlength, weights)

def bitwise_and_scalar(input, other):
if use_pyboost():
return pyboost.bitwise_and_scalar_op(input, other)
return legacy.bitwise_and(input, other)

def argmax_with_value(input, axis, keep_dims):
if use_pyboost():
return pyboost.argmax_with_value_impl(input, axis, keep_dims)
return legacy.argmax(input, axis, keep_dims)

def index_select(input, dim, index):
if use_pyboost():
return pyboost.index_select_op(input, dim, index)
return legacy.gather(input, index, dim, 0)

def min(input):
if use_pyboost():
return pyboost.min_op(input)
return legacy.reduce_min(input, (), False)

def minimum(input, other):
if use_pyboost():
return pyboost.minimum_op(input, other)
return legacy.minimum(input, other)

def argmin_with_value(input, axis, keep_dims):
if use_pyboost():
return pyboost.argmin_with_value_impl(input, axis, keep_dims)
return legacy.argmin(input, axis, keep_dims)

def flatten(input, start_dim, end_dim):
if use_pyboost():
return pyboost.flatten_ext_op(input, start_dim, end_dim)
if start_dim < 0:
start_dim = start_dim + input.ndim
if end_dim < 0:
end_dim = end_dim + input.ndim
input_shape = list(input.shape)
input_shape[start_dim:end_dim] = [-1]
return legacy.reshape(input, tuple(input_shape))

def conv2d_padding(input, weight, bias=None, stride=1, padding='valid', dilation=1, groups=1):
if use_pyboost():
return pyboost.conv2d_padding_op(input, weight, bias, stride, padding, dilation, groups)
return legacy.conv2d(input, weight, bias, stride, padding, dilation, groups)

def conv2d(input, weight, bias=None, stride=1, padding='valid', dilation=1, groups=1):
if use_pyboost():
return pyboost.conv2d_ext_op(input, weight, bias, stride, padding, dilation, groups)
return legacy.conv2d(input, weight, bias, stride, padding, dilation, groups)

def cos(input):
if use_pyboost():
return pyboost.cos_op(input)
return legacy.cos(input)

def pow_tensor_scalar(input, exponent):
if use_pyboost():
return pyboost.pow_tensor_scalar_op(input, exponent)
return legacy.pow(input, exponent)

def sin(input):
if use_pyboost():
return pyboost.sin_op(input)
return legacy.sin(input)

def batch_norm(input, weight, bias, running_mean=None, runnning_var=None, training=False, momentum=0.1, epsilon=1e-5):
if use_pyboost():
return pyboost.batch_norm_ext_op(input, weight, bias, running_mean, runnning_var, training, momentum, epsilon)
return legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, momentum, epsilon, 'NHWC')

def silu(input):
if use_pyboost():
return pyboost.silu_op(input)
return legacy.silu(input)

def rsqrt(input):
if use_pyboost():
return pyboost.rsqrt_op(input)
return legacy.rsqrt(input)

def sqrt(input):
if use_pyboost():
return pyboost.sqrt_op(input)
return legacy.sqrt(input)

def masked_scatter(input, mask, value):
return legacy.masked_scatter(input, mask, value)

def neg(input):
if use_pyboost():
return pyboost.neg_op(input)
return legacy.neg(input)

def log1p(input):
if use_pyboost():
return pyboost.log1p_op(input)
return legacy.log1p(input)

def pow_scalar_tensor(input, scalar):
if use_pyboost():
return pyboost.pow_scalar_tensor_op(input, scalar)
return legacy.pow(input, scalar)

def adaptive_avg_pool2d(input, output_size):
if use_pyboost():
return pyboost.adaptive_avg_pool2d_ext_op(input, output_size)
return legacy.adaptive_avg_pool2_d(input, output_size)


def exp(input):
if use_pyboost():
return pyboost.exp_op(input)
return legacy.exp(input)

def sigmoid(input):
if use_pyboost():
return pyboost.sigmoid_op(input)
return legacy.sigmoid(input)

def constant_pad_nd(input, pad, value=0.0):
if use_pyboost():
return pyboost.constant_pad_nd_op(input, pad, value)

def rfft(input, n=None, dim=-1, norm=None):
if use_pyboost():
return pyboost.rfft_op(input, n, dim, norm)
if input.shape[dim] < n:
pad_inf = (0, n - input.shape[dim])
pad_dims = (0, 0) * (input.ndim - (dim + 1)) + pad_inf
input = constant_pad_nd(input, pad_dims)
else:
input = narrow(input, dim, 0, n)
return legacy.fft_with_size(input, input.ndim, False, True, norm, True, ())

def avg_pool2d(input, kernel_size, stride, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None):
if use_pyboost():
return pyboost.avg_pool2d_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)

def conj(input):
if use_pyboost():
return pyboost.conj_op(input)
return legacy.conj(input)

def fill_tensor(size, value, dtype):
if use_pyboost():
return pyboost.fill_tensor_op(size, value, dtype)
return legacy.fill_v2(size, value)

def maximum(input, other):
if use_pyboost():
return pyboost.maximum_op(input, other)
return legacy.maximum(input, other)

def irfft(input, n, dim, norm):
if use_pyboost():
return pyboost.irfft_op(input, n, dim, norm)
return legacy.fft_with_size(input, input.ndim, True, True, norm)

def randn(size, generator, dtype):
if use_pyboost():
seed, offset = generator._step(12)
return pyboost.randn_op(size, seed, offset, dtype)
return cast(legacy.standard_normal(size, 0, 0), dtype)

def avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
if use_pyboost():
return pyboost.avg_pool1d_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad)
return legacy.avg_pool1d(input, kernel_size, stride, padding, ceil_mode, count_include_pad)

def pow(input, exponent):
if use_pyboost():
return pyboost.pow_op(input, exponent)
return legacy.pow(input, exponent)

def roll(input, shifts, axis):
if use_pyboost():
return pyboost.roll_impl(input, shifts, axis)
return legacy.roll(input, shifts, axis)

def conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if use_pyboost():
return pyboost.conv1d_ext_op(input, weight, bias, stride, padding, dilation, groups)
return legacy.conv1d(input, weight, bias, pad, stride, dilation)

def conv1d_padding(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if use_pyboost():
return pyboost.conv1d_padding_op(input, weight, bias, stride, padding, dilation, groups)
return legacy.conv1d(input, weight, bias, pad, stride, dilation)

def square(input):
if use_pyboost():
return pyboost.square_op(input)
return legacy.square(input)

def lgamma(input):
return legacy.lgamma(input)

def reverse_v2(input, axis):
if isinstance(axis, int):
axis = (axis,)
if use_pyboost():
return pyboost.reverse_v2_impl(input, axis)
return legacy.reverse_v2(input, axis)

def unique_consecutive(input, return_inverse, return_counts, dim):
if use_pyboost():
return pyboost.unique_consecutive_impl(input, return_inverse, return_counts, dim)
return legacy.unique_consecutive(input, return_inverse, return_counts, dim)

def split_with_size(input, size, dim=0):
if use_pyboost():
return pyboost.split_with_size_op(input, size, dim)
return legacy.split_with_size(input, size, dim)

def softplus(input, beta=1, threshold=20):
if use_pyboost():
return pyboost.softplus_ext_op(input, beta, threshold)
return legacy.softplus(input, beta, threshold)

def remainder_tensor_scalar(input, other):
if use_pyboost():
return pyboost.remainder_tensor_scalar_op(input, other)
out = input - floor_div(input, other) * other
return out

def baddbmm(input, batch1, batch2, alpha=1, beta=1):
if use_pyboost():
return pyboost.baddbmm_op(input, batch1, batch2, alpha, beta)
return legacy.baddbmm(input, batch1, batch2, alpha, beta)

def floor(input):
if use_pyboost():
return pyboost.floor_op(input)
return legacy.floor(input)

def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
if use_pyboost():
return pyboost.conv_transpose2d_op(input, weight, bias, stride, padding, output_padding, groups, dilation)
return legacy.conv_transpose2d(input, weight, bias, stride, padding, output_padding, groups, dilation)

def relu(input):
if use_pyboost():
return pyboost.relu_op(input)
return legacy.re_lu(input)

def max_pool2d(input, kernel_size, stride=1, padding=0, dilation=1, ceil_mode=False, return_indices=False):
# out, indices = legacy.max_pool_with_argmax_v2(input, kernel_size, stride, padding, dilation, ceil_mode)

out, indices = legacy.max_pool_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
if return_indices:
return out, indices
return out

def upsample_bilinear2d(input, size=None, scale_factor=None, align_corners=False):
if use_pyboost():
return pyboost.upsample_bilinear2d_op(input, size, scale_factor, align_corners)
return legacy.resize_bilinear_v2(input, size, scale_factor, align_corners)

def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
if use_pyboost():
return pyboost.group_norm_op(input, num_groups, weight, bias, eps)
return legacy.group_norm(input, num_groups, eps, affine)

def nllloss_2d(input, target, weight, reduction='mean', ignore_index=-100):
if use_pyboost():
return pyboost.nllloss_2d_op(input, target, weight, reduction, ignore_index)
return legacy.nll_loss(input, target, weight, ignore_index, reduction)

def inplace_relu(input):
if use_pyboost():
return pyboost.inplace_relu_op(input)
return legacy.assign(input, legacy.re_lu(input))

def expm1(input):
if use_pyboost():
return pyboost.expm1_op(input)
return legacy.expm1(input)

def upsample_bicubic2d(input, size=None, scale_factor=None, align_corners=False):
if use_pyboost():
return pyboost.upsample_bicubic2d_op(input, size, scale_factor, align_corners)
return legacy.resize_bicubic(input, size, scale_factor, align_corners)

def acos(input):
if use_pyboost():
return pyboost.acos_op(input)
return legacy.acos(input)

def cdist(x1, x2, p):
return legacy.cdist(x1, x2, float(p))

def unstack_view(input, dim):
if use_pyboost():
return pyboost.unstack_ext_view_op(input, dim)
return legacy.unstack(input, dim, input.shape[dim])

def l1_loss(input, target, reduction='mean'):
if use_pyboost():
return pyboost.l1_loss_ext_op(input, target, reduction)
return legacy.l1(input, target, reduction)

def diag(input, diagonal):
if use_pyboost():
return pyboost.diag_ext_op(input, diagonal)
return legacy.diag(input, diagonal)

def logsigmoid(input):
if use_pyboost():
return pyboost.logsigmoid_op(input)
return legacy.logsigmoid(input)

def one_hot(tensor, num_classes):
if use_pyboost():
on_value = mindspore.Tensor(1, dtype=tensor.dtype)
off_value = mindspore.Tensor(0, dtype=tensor.dtype)
return pyboost.one_hot_ext_impl(tensor, num_classes, on_value, off_value, -1)
return legacy.one_hot(tensor, num_classes, on_value, off_value, -1)

def var(input, dim=None, correction=1, keepdim=False):
if use_pyboost():
return pyboost.var_op(input, dim, correction, keepdim)
return legacy.var(input, dim, correction, keepdim)

def linspace(start, end, steps, dtype=None):
if use_pyboost():
return pyboost.lin_space_ext_op(start, end, steps, dtype)
return legacy.lin_space(start, end, steps)

def masked_select(input, mask):
if use_pyboost():
return pyboost.masked_select_op(input, mask)
return legacy.masked_select(input, mask)

def glu(input, dim=-1):
if use_pyboost():
return pyboost.glu_impl(input, dim)
return legacy.glu(input, dim)

def scatter_value(input, dim, index, src, reduce='none'):
if use_pyboost():
return pyboost.scatter_value_op(input, dim, index, src, reduce)
return legacy.scatter(input, dim, index, src, reduce)

def unique_dim(input, sorted, return_inverse, dim):
if use_pyboost():
return pyboost.unique_dim_op(input, sorted, return_inverse, dim)
return legacy.unique_dim(input, sorted, return_inverse, dim)

def inplace_add(input, other, alpha):
if use_pyboost():
return pyboost.inplace_add_ext_op(input, other, alpha)
return legacy.inplace_add(input, other)

def logsumexp(input, dim, keepdim):
if use_pyboost():
return pyboost.logsumexp_op(input, dim, keepdim)
return legacy.logsumexp(input, dim, keepdim)

def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity):
loss, log_alpha = legacy.ctc_loss_v2(log_probs, targets, input_lengths, target_lengths, blank, 'none', zero_infinity)
if reduction == 'sum':
loss = sum(loss, (), False, None)
if reduction == 'mean':
# input_type = loss.dtype
# target_length_t = target_lengths.clip(1., None)
# loss = loss.astype("float32")
loss = div(loss, target_lengths)
loss = mean(loss, (), False, None)
# loss = loss.astype(input_type)
return (loss, log_alpha)

def inplace_exponential(self, lambd, generator):
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.inplace_exponential_op(self, lambd, seed, offset)
return legacy.expo(self, lambd, generator)

def im2col(input, kernel_size, dilation=1, padding=0, stride=1):
if use_pyboost() and not ON_A1:
return pyboost.im2col_ext_op(input, kernel_size, dilation, padding, stride)
out = legacy.im2_col(input, kernel_size, stride, dilation, padding)
out_shape = out.shape[:1] + (-1,) + out.shape[-1:]
out = reshape(out, out_shape)
return out

def upsample_nearest2d(input, output_size, scale_factors):
if use_pyboost():
return pyboost.upsample_nearest2d_op(input, output_size, scale_factors)
return legacy.upsample_nearest2d(input, scale_factor, align_corners)

def addmm(input, mat1, mat2, alpha=1.0, beta=1.0):
if use_pyboost():
return pyboost.addmm_op(input, mat1, mat2, alpha, beta)
return legacy.addmm(input, mat1, mat2, alpha, beta)

def meshgrid(input, lambd):
if use_pyboost():
return pyboost.meshgrid_impl(input, lambd)
return legacy.meshgrid(input, lambd)

def adaptive_avg_pool1d(input, output_size):
if use_pyboost():
return pyboost.adaptive_avg_pool1d_op(input, output_size)
return legacy.adaptive_avg_pool1d(input, output_size)

def conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if use_pyboost():
return pyboost.conv3d_ext_op(input, weight, bias, stride, padding, dilation, groups)
return legacy.conv3d(input, weight, bias, stride, padding, dilation, groups)

def outer(input, other):
if use_pyboost():
return pyboost.outer_op(input, other)
return legacy.outer(input, other)

def addcmul(input, tensor1, tensor2, value=1.0):
if use_pyboost():
return pyboost.addcmul_op(input, tensor1, tensor2, value)
return legacy.addcmul(input, tensor1, tensor2, value)

def prelu(input, weight):
if use_pyboost():
return pyboost.prelu_op(input, weight)
return legacy.p_re_lu(input, weight)

def reciprocal(input):
if use_pyboost():
return pyboost.reciprocal_op(input)
return legacy.reciprocal(input)

def index_add_ext(input, dim, index, source, alpha):
if use_pyboost():
return pyboost.index_add_ext_op(input, dim, index, source, alpha)
return legacy.index_add(input, dim, index, source, alpha)

def polar(abs, angle):
if use_pyboost():
return pyboost.polar_op(abs, angle)
return legacy.polar(abs, angle)

def upsample_linear1d(input, output_size, scale_factor, align_corners=False):
if use_pyboost():
return pyboost.upsample_linear1d_op(input, output_size, scale_factor, align_corners)
return legacy.upsample_linear1d(input, output_size, scale_factor, align_corners)

def grid_sampler_2d(input, grid, mode='bilinear', padding_mode='zeros', align_corners=False):
if use_pyboost():
return pyboost.grid_sampler_2d_impl(input, grid, mode, padding_mode, align_corners)
return legacy.grid_sampler_2d(input, grid, mode, padding_mode, align_corners)

def pixel_shuffle(input, upscale_factor):
if use_pyboost():
return pyboost.pixel_shuffle_op(input, upscale_factor)
return legacy.pixel_shuffle(input, upscale_factor)

def view_as_complex(input):
real_part, imag_part = chunk(input, 2, -1)
return legacy.complex(squeeze(real_part, -1), squeeze(imag_part, -1))

def rms_norm(input, weight, eps=1e-5):
if use_pyboost():
return pyboost.rms_norm_impl(input, weight, eps)[0]
input_dtype = input.dtype
input = cast(input, mindspore.float32)
variance = mean(pow(input, 2), -1, True, None)
input = mul(input, rsqrt(add(variance, eps, 1)))
return mul(weight, cast(input, input_dtype))

def normal_float_float(mean, std, size, dtype, generator):
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.normal_float_float_op(mean, std, size, seed, offset)

def real(input):
if use_pyboost():
return pyboost.real_op(input)
return legacy.real(input)

def imag(input):
return legacy.imag(input)

def leaky_relu(input, negative_slope):
if use_pyboost():
return pyboost.leaky_relu_ext_op(input, negative_slope)
return legacy.leaky_relu(input, negative_slope)

def ceil(input):
if use_pyboost():
return pyboost.ceil_op(input)
return legacy.ceil(input)

def erf(input):
if use_pyboost():
return pyboost.erf_op(input)
return legacy.erf(input)

def cross(input, other, dim):
if use_pyboost():
return pyboost.cross_impl(input, other, dim)
return legacy.cross(input, other, dim)

def elu(input, alpha):
if use_pyboost():
return pyboost.elu_ext_impl(input, alpha)
return legacy.elu(input, alpha)

def reduce_max(input, axis, keepdims):
if use_pyboost():
return pyboost.reduce_max_impl(input, axis, keepdims)
return legacy.reduce_max(input, axis, keepdims)

def dynamic_rnn(x, w, b, seq_length, init_h, init_c):
return legacy.dynamic_rnn(x, w, b, seq_length, init_h, init_c,
'LSTM', 'UNIDIRECTIONAL', 1, False, 1.0, -1.0, 0, True, 'tanh', 0.0, True)

def nan_to_num(input, nan=0.0, posinf=None, neginf=None):
return legacy.nan_to_num(input, nan, posinf, neginf)

def round(input, decimals):
if use_pyboost():
return pyboost.round_op(input, decimals)
return legacy.round(input, decimals)

def fftn(input, s=None, dim=None, norm=None):
if use_pyboost():
return pyboost.fftn_op(input, s, dim, norm)

def eye(n, m=None, dtype=None):
if use_pyboost():
return pyboost.eye_op(n, m, dtype)
return legacy.eye(n, m, dtype)

def erfinv(input):
if use_pyboost():
return pyboost.erfinv_op(input)
return legacy.erfinv(input)

def logit(input, eps=1e-5):
return legacy.logit(input, eps)

def bitwise_xor_tensor(input, other):
if use_pyboost():
return pyboost.bitwise_xor_tensor_op(input, other)
return legacy.bitwise_xor(input, other)

def unique2(input, sorted, return_inverse, return_counts):
if use_pyboost():
return pyboost.unique2_op(input, sorted, return_inverse, return_counts)
return legacy.unique(input, sorted, return_inverse, return_counts)

def sign(input):
if use_pyboost():
return pyboost.sign_op(input)
return legacy.sign(input)

def log2(input):
if use_pyboost():
return pyboost.log2_op(input)
return legacy.log2(input)

def bucketize(input, boundaries, right=False):
epsilon_ = 0. if right else 1.e-6
boundaries = [boundary + epsilon_ for boundary in boundaries]
return legacy.bucketize(input, boundaries)

def inplace_fill_diagonal(input, fill_value, wrap):
if use_pyboost():
return pyboost.inplace_fill_diagonal_op(input, fill_value, wrap)
return legacy.fill_diagonal(input, fill_value, wrap)

def clamp_tensor(input, min, max):
if use_pyboost():
return pyboost.clamp_tensor_op(input, min, max)

def hswish(input):
if use_pyboost():
return pyboost.hswish_op(input)
return legacy.h_swish(input)

def logical_and(input, other):
if use_pyboost():
return pyboost.logical_and_op(input, other)
return legacy.logical_and(input, other)

def as_strided(input, size, stride, storage_offset):
if use_pyboost():
return pyboost.as_strided_op(input, size, stride, storage_offset)
return legacy.as_strided(input, size, stride, storage_offset)

def relu6(input):
if use_pyboost():
return pyboost.relu6_op(input)
return legacy.re_lu6(input)

def col2im(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
if use_pyboost():
return pyboost.col2im_ext_op(input, output_size, kernel_size, dilation, padding, stride)
return legacy.col2im(input, output_size, kernel_size, dilation, padding, stride)

def flash_attention_score(query, key, value, real_shift, drop_mask, padding_mask, attn_mask, prefix, actual_seq_qlen, actual_seq_kvlen, head_num, keep_prob, scale_value, pre_tokens, next_tokens, inner_precise, input_layout, sparse_mode):
if use_pyboost():
return pyboost.flash_attention_score_impl(query, key, value, real_shift, drop_mask, padding_mask, attn_mask, prefix, actual_seq_qlen, actual_seq_kvlen, head_num, keep_prob, scale_value, pre_tokens, next_tokens, inner_precise, input_layout, sparse_mode)
return legacy.flash_attention_score(query, key, value, real_shift, drop_mask, padding_mask, attn_mask, prefix, actual_seq_qlen, actual_seq_kvlen, head_num, keep_prob, scale_value, pre_tokens, next_tokens, inner_precise, input_layout, sparse_mode)

def randperm(n, generator, dtype):
if use_pyboost():
return pyboost.randperm_ext_op(n, seed, offset, dtype)
return legacy.randperm(n, seed)

def logical_or(input_x, input_y):
if use_pyboost():
return pyboost.logical_or_op(input_x, input_y)
return legacy.logical_or(input_x, input_y)

def dropout2d(input_x, p):
return legacy.dropout2_d(input_x, p)

def linalg_qr(input_x, mode):
if use_pyboost():
return pyboost.linalg_qr_op(input_x, mode)
full_matrices = 'mode' == 'complete'
return legacy.qr(input_x, full_matrices)

def bernoulli(input, generator):
seed, offset = generator._step(12)
if use_pyboost():
return pyboost.bernoulli_ext_op(input, seed, offset)
return legacy.bernoulli(input, seed, offset)

def multinomial(input, num_samples, replacement, generator):
seed, offset = generator._step(12) # pylint: disable=protected-access
return pyboost.multinomial_ext_op(input, num_samples, replacement, seed, offset)

+ 10
- 2
mindnlp/core/_dtype.py View File

@@ -4,7 +4,7 @@ from mindspore.common.dtype import *
from mindspore._c_expression import typing
from mindspore._c_expression.typing import Type

from .configs import ON_A1, SUPPORT_BF16
from .configs import ON_A1, SUPPORT_BF16, DEVICE_TARGET

if SUPPORT_BF16:
from mindspore.common.np_dtype import bfloat16 as np_bfloat16# pylint: disable=import-error
@@ -15,7 +15,7 @@ bool_alias = bool
float_alias = float
int_alias = int

if ON_A1:
if ON_A1 or DEVICE_TARGET == 'GPU':
warnings.warn('MindSpore on GPU/910A do not support bfloat16, use float16 instead.')
bfloat16 = float16

@@ -124,3 +124,11 @@ py2dtype = {
float_alias: float,
int_alias: int64
}

mantissa_bits_map = {
int64: 63,
float32: 23, # FP32, 单精度[6](@ref)
float64: 52, # FP64, 双精度
float16: 10, # FP16, 半精度[1,7](@ref)
bfloat16: 7, # BF16, Brain浮点16位[7](@ref)
}

+ 3
- 0
mindnlp/core/_jit_internal.py View File

@@ -95,3 +95,6 @@ class BroadcastingListCls:
BroadcastingList1 = BroadcastingListCls()
for i in range(2, 7):
globals()[f"BroadcastingList{i}"] = BroadcastingList1

def is_scripting():
False

+ 0
- 0
mindnlp/core/_op_prim/__init__.py View File


+ 0
- 0
mindnlp/core/_op_prim/ascend/__init__.py View File


+ 3511
- 0
mindnlp/core/_op_prim/ascend/legacy.py
File diff suppressed because it is too large
View File


+ 877
- 0
mindnlp/core/_op_prim/ascend/pyboost.py View File

@@ -0,0 +1,877 @@
from mindspore.ops.auto_generate.gen_ops_prim import *
from mindspore.ops.auto_generate.pyboost_inner_prim import *

abs_op = Abs().set_device('Ascend')

acos_ext_op = AcosExt().set_device('Ascend')

acosh_ext_op = AcoshExt().set_device('Ascend')

adamw_op = AdamW().set_device('Ascend')

adaptive_avg_pool1d_op = AdaptiveAvgPool1D().set_device('Ascend')

adaptive_avg_pool2d_ext_op = AdaptiveAvgPool2DExt().set_device('Ascend')

adaptive_avg_pool2d_grad_ext_op = AdaptiveAvgPool2DGradExt().set_device('Ascend')

adaptive_avg_pool3d_ext_op = AdaptiveAvgPool3DExt().set_device('Ascend')

adaptive_avg_pool3d_grad_ext_op = AdaptiveAvgPool3DGradExt().set_device('Ascend')

adaptive_max_pool1d_op = AdaptiveMaxPool1D().set_device('Ascend')

add_op = Add().set_device('Ascend')

add_ext_op = AddExt().set_device('Ascend')

add_layer_norm_grad_op = AddLayerNormGrad().set_device('Ascend')

add_layernorm_v2_op = AddLayerNormV2().set_device('Ascend')

add_rms_norm_op = AddRmsNorm().set_device('Ascend')

add_scalar_op = AddScalar().set_device('Ascend')

addbmm_op = Addbmm().set_device('Ascend')

addcdiv_ext_op = AddcdivExt().set_device('Ascend')

addcmul_ext_op = AddcmulExt().set_device('Ascend')

addmm_op = Addmm().set_device('Ascend')

addmv_op = Addmv().set_device('Ascend')

all_gather_matmul_op = AllGatherMatmul().set_device('Ascend')

arange_op = Arange().set_device('Ascend')

argmax_ext_op = ArgMaxExt().set_device('Ascend')

argmin_ext_op = ArgMinExt().set_device('Ascend')

argsort_op = ArgSort().set_device('Ascend')

as_strided_op = AsStrided().set_device('Ascend')

asin_ext_op = AsinExt().set_device('Ascend')

asinh_ext_op = AsinhExt().set_device('Ascend')

atan2_ext_op = Atan2Ext().set_device('Ascend')

atan_ext_op = AtanExt().set_device('Ascend')

atanh_op = Atanh().set_device('Ascend')

avg_pool1d_op = AvgPool1D().set_device('Ascend')

avg_pool2d_op = AvgPool2D().set_device('Ascend')

avg_pool2d_grad_op = AvgPool2DGrad().set_device('Ascend')

avg_pool3d_ext_op = AvgPool3DExt().set_device('Ascend')

avg_pool3d_grad_ext_op = AvgPool3DGradExt().set_device('Ascend')

baddbmm_op = Baddbmm().set_device('Ascend')

batch_norm_elemt_op = BatchNormElemt().set_device('Ascend')

batch_norm_elemt_grad_op = BatchNormElemtGrad().set_device('Ascend')

batch_norm_ext_op = BatchNormExt().set_device('Ascend')

batch_norm_gather_stats_with_counts_op = BatchNormGatherStatsWithCounts().set_device('Ascend')

batch_norm_reduce_grad_op = BatchNormReduceGrad().set_device('Ascend')

batch_norm_stats_op = BatchNormStats().set_device('Ascend')

bernoulli_ext_op = BernoulliExt().set_device('Ascend')

binary_cross_entropy_with_logits_backward_op = BinaryCrossEntropyWithLogitsBackward().set_device('Ascend')

bincount_ext_op = BincountExt().set_device('Ascend')

bitwise_and_scalar_op = BitwiseAndScalar().set_device('Ascend')

bitwise_and_tensor_op = BitwiseAndTensor().set_device('Ascend')

bitwise_not_op = BitwiseNot().set_device('Ascend')

bitwise_or_scalar_op = BitwiseOrScalar().set_device('Ascend')

bitwise_or_tensor_op = BitwiseOrTensor().set_device('Ascend')

bitwise_xor_scalar_op = BitwiseXorScalar().set_device('Ascend')

bitwise_xor_tensor_op = BitwiseXorTensor().set_device('Ascend')

bmm_ext_op = BatchMatMulExt().set_device('Ascend')

broadcast_to_view_op = BroadcastToView().set_device('Ascend')

ceil_op = Ceil().set_device('Ascend')

chunk_op = Chunk().set_device('Ascend')

chunk_view_op = ChunkView().set_device('Ascend')

clamp_scalar_op = ClampScalar().set_device('Ascend')

clamp_tensor_op = ClampTensor().set_device('Ascend')

clone_op = Clone().set_device('Ascend')

col2im_ext_op = Col2ImExt().set_device('Ascend')

col2im_grad_op = Col2ImGrad().set_device('Ascend')

constant_pad_nd_op = ConstantPadND().set_device('Ascend')

contiguous_op = Contiguous().set_device('Ascend')

conv1d_ext_op = Conv1DExt().set_device('Ascend')

conv1d_padding_op = Conv1DPadding().set_device('Ascend')

conv2d_ext_op = Conv2DExt().set_device('Ascend')

conv2d_padding_op = Conv2DPadding().set_device('Ascend')

conv3d_ext_op = Conv3DExt().set_device('Ascend')

conv3d_padding_op = Conv3DPadding().set_device('Ascend')

conv_transpose2d_op = ConvTranspose2D().set_device('Ascend')

convolution_op = Convolution().set_device('Ascend')

convolution_grad_op = ConvolutionGrad().set_device('Ascend')

convolution_str_op = ConvolutionStr().set_device('Ascend')

convolution_str_grad_op = ConvolutionStrGrad().set_device('Ascend')

copy_op = Copy().set_device('Ascend')

cos_op = Cos().set_device('Ascend')

cosh_op = Cosh().set_device('Ascend')

count_nonzero_op = CountNonZero().set_device('Ascend')

cummin_ext_op = CumminExt().set_device('Ascend')

cumsum_ext_op = CumsumExt().set_device('Ascend')

dense_op = Dense().set_device('Ascend')

diag_ext_op = DiagExt().set_device('Ascend')

dist_comm_all_gather_op = DistCommAllGather().set_device('Ascend')

dist_comm_all_gather_into_tensor_op = DistCommAllGatherIntoTensor().set_device('Ascend')

dist_comm_all_reduce_op = DistCommAllReduce().set_device('Ascend')

dist_comm_all_to_all_v_op = DistCommAllToAllV().set_device('Ascend')

dist_comm_all_to_all_v_single_op = DistCommAllToAllVSingle().set_device('Ascend')

dist_comm_barrier_op = DistCommBarrier().set_device('Ascend')

dist_comm_batch_isend_irecv_op = DistCommBatchIsendIrecv().set_device('Ascend')

dist_comm_broadcast_op = DistCommBroadcast().set_device('Ascend')

dist_comm_gather_op = DistCommGather().set_device('Ascend')

dist_comm_gather_into_tensor_op = DistCommGatherIntoTensor().set_device('Ascend')

dist_comm_irecv_op = DistCommIrecv().set_device('Ascend')

dist_comm_isend_op = DistCommIsend().set_device('Ascend')

dist_comm_reduce_op = DistCommReduce().set_device('Ascend')

dist_comm_reduce_scatter_op = DistCommReduceScatter().set_device('Ascend')

dist_comm_reduce_scatter_tensor_op = DistCommReduceScatterTensor().set_device('Ascend')

dist_comm_scatter_op = DistCommScatter().set_device('Ascend')

dist_comm_scatter_tensor_op = DistCommScatterTensor().set_device('Ascend')

div_op = Div().set_device('Ascend')

divmod_op = DivMod().set_device('Ascend')

divmods_op = DivMods().set_device('Ascend')

divs_op = Divs().set_device('Ascend')

dot_op = Dot().set_device('Ascend')

dropout_do_mask_ext_op = DropoutDoMaskExt().set_device('Ascend')

dropout_ext_op = DropoutExt().set_device('Ascend')

dropout_gen_mask_ext_op = DropoutGenMaskExt().set_device('Ascend')

dropout_grad_ext_op = DropoutGradExt().set_device('Ascend')

dynamic_quant_ext_op = DynamicQuantExt().set_device('Ascend')

elu_grad_ext_op = EluGradExt().set_device('Ascend')

embedding_op = Embedding().set_device('Ascend')

embedding_dense_backward_op = EmbeddingDenseBackward().set_device('Ascend')

equal_op = Equal().set_device('Ascend')

equal_ext_op = EqualExt().set_device('Ascend')

erf_op = Erf().set_device('Ascend')

erfc_op = Erfc().set_device('Ascend')

erfinv_op = Erfinv().set_device('Ascend')

exp_op = Exp().set_device('Ascend')

exp2_op = Exp2().set_device('Ascend')

expand_as_op = ExpandAs().set_device('Ascend')

expand_dims_op = ExpandDims().set_device('Ascend')

expand_dims_view_op = ExpandDimsView().set_device('Ascend')

expm1_op = Expm1().set_device('Ascend')

eye_op = Eye().set_device('Ascend')

fill_scalar_op = FillScalar().set_device('Ascend')

fill_tensor_op = FillTensor().set_device('Ascend')

flatten_ext_op = FlattenExt().set_device('Ascend')

floor_op = Floor().set_device('Ascend')

floor_div_op = FloorDiv().set_device('Ascend')

floor_div_scalar_op = FloorDivScalar().set_device('Ascend')

fmod_scalar_op = FmodScalar().set_device('Ascend')

fmod_tensor_op = FmodTensor().set_device('Ascend')

frac_op = Frac().set_device('Ascend')

full_like_op = FullLike().set_device('Ascend')

gather_d_op = GatherD().set_device('Ascend')

gather_d_grad_v2_op = GatherDGradV2().set_device('Ascend')

gcd_op = Gcd().set_device('Ascend')

gelu_op = GeLU().set_device('Ascend')

gelu_ext_op = GeluExt().set_device('Ascend')

gelu_grad_op = GeLUGrad().set_device('Ascend')

gelu_grad_ext_op = GeluGradExt().set_device('Ascend')

generator_op = Generator().set_device('Ascend')

gmm_op = Gmm().set_device('Ascend')

gmm_backward_op = GmmBackward().set_device('Ascend')

gmm_backward_fusion_op = GmmBackwardFusion().set_device('Ascend')

gmm_v2_op = GmmV2().set_device('Ascend')

gmm_v2_backward_op = GmmV2Backward().set_device('Ascend')

gmm_v2_backward_fusion_op = GmmV2BackwardFusion().set_device('Ascend')

greater_op = Greater().set_device('Ascend')

greater_equal_op = GreaterEqual().set_device('Ascend')

greater_equal_scalar_op = GreaterEqualScalar().set_device('Ascend')

group_norm_op = GroupNorm().set_device('Ascend')

group_norm_grad_op = GroupNormGrad().set_device('Ascend')

grouped_matmul_v2_op = GroupedMatmulV2().set_device('Ascend')

grouped_matmul_v4_op = GroupedMatmulV4().set_device('Ascend')

hardtanh_op = Hardtanh().set_device('Ascend')

hardtanh_grad_op = HardtanhGrad().set_device('Ascend')

histc_ext_op = HistcExt().set_device('Ascend')

hsigmoid_op = HSigmoid().set_device('Ascend')

hsigmoid_grad_op = HSigmoidGrad().set_device('Ascend')

hswish_op = HSwish().set_device('Ascend')

hswish_grad_op = HSwishGrad().set_device('Ascend')

im2col_ext_op = Im2ColExt().set_device('Ascend')

index_op = Index().set_device('Ascend')

index_add_ext_op = IndexAddExt().set_device('Ascend')

index_fill_scalar_op = IndexFillScalar().set_device('Ascend')

index_fill_tensor_op = IndexFillTensor().set_device('Ascend')

index_select_op = IndexSelect().set_device('Ascend')

inner_comm_all_gather_op = InnerCommAllGather().set_device('Ascend')

inner_comm_all_reduce_op = InnerCommAllReduce().set_device('Ascend')

inner_comm_all_to_all_v_op = InnerCommAllToAllV().set_device('Ascend')

inner_comm_irecv_op = InnerCommIrecv().set_device('Ascend')

inner_comm_isend_op = InnerCommIsend().set_device('Ascend')

inner_comm_reduce_scatter_op = InnerCommReduceScatter().set_device('Ascend')

inner_index_op = InnerIndex().set_device('Ascend')

inner_inplace_index_put_op = InnerInplaceIndexPut().set_device('Ascend')

inner_non_zero_op = InnerNonZero().set_device('Ascend')

inplace_add_ext_op = InplaceAddExt().set_device('Ascend')

inplace_addmm_op = InplaceAddmm().set_device('Ascend')

inplace_adds_ext_op = InplaceAddsExt().set_device('Ascend')

inplace_clamp_scalar_op = InplaceClampScalar().set_device('Ascend')

inplace_clamp_tensor_op = InplaceClampTensor().set_device('Ascend')

inplace_copy_op = InplaceCopy().set_device('Ascend')

inplace_div_op = InplaceDiv().set_device('Ascend')

inplace_divmod_op = InplaceDivMod().set_device('Ascend')

inplace_divmods_op = InplaceDivMods().set_device('Ascend')

inplace_divs_op = InplaceDivs().set_device('Ascend')

inplace_elu_op = InplaceElu().set_device('Ascend')

inplace_erfinv_op = InplaceErfinv().set_device('Ascend')

inplace_exp_op = InplaceExp().set_device('Ascend')

inplace_exponential_op = InplaceExponential().set_device('Ascend')

inplace_fill_diagonal_op = InplaceFillDiagonal().set_device('Ascend')

inplace_fill_scalar_op = InplaceFillScalar().set_device('Ascend')

inplace_fill_tensor_op = InplaceFillTensor().set_device('Ascend')

inplace_floor_op = InplaceFloor().set_device('Ascend')

inplace_floor_divide_op = InplaceFloorDivide().set_device('Ascend')

inplace_floor_divides_op = InplaceFloorDivides().set_device('Ascend')

inplace_grouped_matmul_add_op = InplaceGroupedMatmulAdd().set_device('Ascend')

inplace_hardtanh_op = InplaceHardtanh().set_device('Ascend')

inplace_index_add_op = InplaceIndexAddExt().set_device('Ascend')

inplace_index_put_op = InplaceIndexPut().set_device('Ascend')

inplace_log_op = InplaceLog().set_device('Ascend')

inplace_masked_fill_scalar_op = InplaceMaskedFillScalar().set_device('Ascend')

inplace_masked_fill_tensor_op = InplaceMaskedFillTensor().set_device('Ascend')

inplace_mul_op = InplaceMul().set_device('Ascend')

inplace_muls_op = InplaceMuls().set_device('Ascend')

inplace_normal_op = InplaceNormal().set_device('Ascend')

inplace_put_op = InplacePut().set_device('Ascend')

inplace_random_op = InplaceRandom().set_device('Ascend')

inplace_relu_op = InplaceReLU().set_device('Ascend')

inplace_scatter_add_op = InplaceScatterAdd().set_device('Ascend')

inplace_scatter_src_op = InplaceScatterSrc().set_device('Ascend')

inplace_scatter_src_reduce_op = InplaceScatterSrcReduce().set_device('Ascend')

inplace_scatter_value_op = InplaceScatterValue().set_device('Ascend')

inplace_scatter_value_reduce_op = InplaceScatterValueReduce().set_device('Ascend')

inplace_stop_gradient_op = InplaceStopGradient().set_device('Ascend')

inplace_sub_ext_op = InplaceSubExt().set_device('Ascend')

inplace_sub_scalar_op = InplaceSubScalar().set_device('Ascend')

inplace_tanh_op = InplaceTanh().set_device('Ascend')

inplace_threshold_op = InplaceThreshold().set_device('Ascend')

inplace_uniform_op = InplaceUniform().set_device('Ascend')

inplace_zero_op = InplaceZero().set_device('Ascend')

isfinite_op = IsFinite().set_device('Ascend')

isinf_op = IsInf().set_device('Ascend')

isneginf_op = IsNegInf().set_device('Ascend')

kl_div_op = KLDiv().set_device('Ascend')

kl_div_grad_op = KLDivGrad().set_device('Ascend')

kthvalue_op = Kthvalue().set_device('Ascend')

kv_cache_scatter_update_op = KVCacheScatterUpdate().set_device('Ascend')

l1_loss_backward_ext_op = L1LossBackwardExt().set_device('Ascend')

l1_loss_ext_op = L1LossExt().set_device('Ascend')

layer_norm_ext_op = LayerNormExt().set_device('Ascend')

layer_norm_grad_ext_op = LayerNormGradExt().set_device('Ascend')

leaky_relu_ext_op = LeakyReLUExt().set_device('Ascend')

leaky_relu_grad_ext_op = LeakyReLUGradExt().set_device('Ascend')

lerp_op = Lerp().set_device('Ascend')

lerp_scalar_op = LerpScalar().set_device('Ascend')

less_op = Less().set_device('Ascend')

less_equal_op = LessEqual().set_device('Ascend')

lin_space_ext_op = LinSpaceExt().set_device('Ascend')

linalg_qr_op = LinalgQr().set_device('Ascend')

linalg_vector_norm_op = LinalgVectorNorm().set_device('Ascend')

log_op = Log().set_device('Ascend')

log10_op = Log10().set_device('Ascend')

log1p_op = Log1p().set_device('Ascend')

log2_op = Log2().set_device('Ascend')

log_softmax_ext_op = LogSoftmaxExt().set_device('Ascend')

logaddexp_op = LogAddExp().set_device('Ascend')

logaddexp2_op = LogAddExp2().set_device('Ascend')

logical_and_op = LogicalAnd().set_device('Ascend')

logical_not_op = LogicalNot().set_device('Ascend')

logical_or_op = LogicalOr().set_device('Ascend')

logical_xor_op = LogicalXor().set_device('Ascend')

logsigmoid_op = LogSigmoid().set_device('Ascend')

logsigmoid_grad_op = LogSigmoidGrad().set_device('Ascend')

logsumexp_op = LogSumExp().set_device('Ascend')

masked_fill_op = MaskedFill().set_device('Ascend')

masked_select_op = MaskedSelect().set_device('Ascend')

masked_select_grad_op = MaskedSelectGrad().set_device('Ascend')

matmul_allreduce_add_rmsnorm_op = MatmulAllReduceAddRmsNorm().set_device('Ascend')

matmul_ext_op = MatMulExt().set_device('Ascend')

matmul_reduce_scatter_op = MatmulReduceScatter().set_device('Ascend')

matrix_inverse_ext_op = MatrixInverseExt().set_device('Ascend')

max_op = Max().set_device('Ascend')

max_dim_op = MaxDim().set_device('Ascend')

max_unpool2d_ext_op = MaxUnpool2DExt().set_device('Ascend')

maximum_op = Maximum().set_device('Ascend')

mean_ext_op = MeanExt().set_device('Ascend')

median_dim_op = MedianDim().set_device('Ascend')

median_ext_op = MedianExt().set_device('Ascend')

min_op = Min().set_device('Ascend')

min_dim_op = MinDim().set_device('Ascend')

minimum_op = Minimum().set_device('Ascend')

mish_ext_op = MishExt().set_device('Ascend')

mish_grad_ext_op = MishGradExt().set_device('Ascend')

mm_ext_op = Mm().set_device('Ascend')

moe_compute_expert_tokens_op = MoeComputeExpertTokens().set_device('Ascend')

moe_finalize_routing_op = MoeFinalizeRouting().set_device('Ascend')

moe_gating_top_k_softmax_op = MoeGatingTopKSoftmax().set_device('Ascend')

moe_init_routing_op = MoeInitRouting().set_device('Ascend')

moe_init_routing_v2_op = MoeInitRoutingV2().set_device('Ascend')

moe_token_permute_op = MoeTokenPermute().set_device('Ascend')

moe_token_permute_grad_op = MoeTokenPermuteGrad().set_device('Ascend')

moe_token_unpermute_op = MoeTokenUnpermute().set_device('Ascend')

moe_token_unpermute_grad_op = MoeTokenUnpermuteGrad().set_device('Ascend')

mse_loss_ext_op = MSELossExt().set_device('Ascend')

mse_loss_grad_ext_op = MSELossGradExt().set_device('Ascend')

mul_op = Mul().set_device('Ascend')

muls_op = Muls().set_device('Ascend')

multi_scale_deformable_attn_op = MultiScaleDeformableAttn().set_device('Ascend')

multi_scale_deformable_attn_grad_op = MultiScaleDeformableAttnGrad().set_device('Ascend')

multinomial_ext_op = MultinomialExt().set_device('Ascend')

mv_op = Mv().set_device('Ascend')

nansum_op = Nansum().set_device('Ascend')

narrow_op = Narrow().set_device('Ascend')

narrow_view_op = NarrowView().set_device('Ascend')

neg_op = Neg().set_device('Ascend')

new_ones_op = NewOnes().set_device('Ascend')

new_zeros_op = NewZeros().set_device('Ascend')

nllloss_2d_op = NLLLoss2d().set_device('Ascend')

nllloss_2d_grad_op = NLLLoss2dGrad().set_device('Ascend')

non_zero_op = NonZero().set_device('Ascend')

non_zero_ext_op = NonZeroExt().set_device('Ascend')

norm_op = Norm().set_device('Ascend')

normal_float_float_op = NormalFloatFloat().set_device('Ascend')

normal_float_tensor_op = NormalFloatTensor().set_device('Ascend')

normal_tensor_float_op = NormalTensorFloat().set_device('Ascend')

normal_tensor_tensor_op = NormalTensorTensor().set_device('Ascend')

not_equal_op = NotEqual().set_device('Ascend')

ones_like_ext_op = OnesLikeExt().set_device('Ascend')

outer_op = Outer().set_device('Ascend')

pixel_shuffle_op = PixelShuffle().set_device('Ascend')

polar_op = Polar().set_device('Ascend')

pow_op = Pow().set_device('Ascend')

pow_scalar_tensor_op = PowScalarTensor().set_device('Ascend')

pow_tensor_scalar_op = PowTensorScalar().set_device('Ascend')

prelu_op = PReLU().set_device('Ascend')

prelu_grad_op = PReLUGrad().set_device('Ascend')

prod_ext_op = ProdExt().set_device('Ascend')

quant_v2_op = QuantV2().set_device('Ascend')

rand_ext_op = RandExt().set_device('Ascend')

rand_like_ext_op = RandLikeExt().set_device('Ascend')

randint_op = RandInt().set_device('Ascend')

randint_like_op = RandIntLike().set_device('Ascend')

randn_op = Randn().set_device('Ascend')

randn_like_op = RandnLike().set_device('Ascend')

randperm_ext_op = RandpermExt().set_device('Ascend')

reciprocal_op = Reciprocal().set_device('Ascend')

reflection_pad_1d_op = ReflectionPad1D().set_device('Ascend')

reflection_pad_1d_grad_op = ReflectionPad1DGrad().set_device('Ascend')

reflection_pad_2d_op = ReflectionPad2D().set_device('Ascend')

reflection_pad_2d_grad_op = ReflectionPad2DGrad().set_device('Ascend')

reflection_pad_3d_op = ReflectionPad3D().set_device('Ascend')

reflection_pad_3d_grad_op = ReflectionPad3DGrad().set_device('Ascend')

relu_op = ReLU().set_device('Ascend')

relu_grad_op = ReluGrad().set_device('Ascend')

remainder_scalar_tensor_op = RemainderScalarTensor().set_device('Ascend')

remainder_tensor_scalar_op = RemainderTensorScalar().set_device('Ascend')

remainder_tensor_tensor_op = RemainderTensorTensor().set_device('Ascend')

repeat_op = Repeat().set_device('Ascend')

repeat_interleave_grad_op = RepeatInterleaveGrad().set_device('Ascend')

repeat_interleave_int_op = RepeatInterleaveInt().set_device('Ascend')

repeat_interleave_tensor_op = RepeatInterleaveTensor().set_device('Ascend')

replication_pad_1d_op = ReplicationPad1D().set_device('Ascend')

replication_pad_1d_grad_op = ReplicationPad1DGrad().set_device('Ascend')

replication_pad_2d_op = ReplicationPad2D().set_device('Ascend')

replication_pad_2d_grad_op = ReplicationPad2DGrad().set_device('Ascend')

replication_pad_3d_op = ReplicationPad3D().set_device('Ascend')

replication_pad_3d_grad_op = ReplicationPad3DGrad().set_device('Ascend')

reshape_op = Reshape().set_device('Ascend')

rms_norm_grad_op = RmsNormGrad().set_device('Ascend')

rotary_position_embedding_op = RotaryPositionEmbedding().set_device('Ascend')

rotary_position_embedding_grad_op = RotaryPositionEmbeddingGrad().set_device('Ascend')

round_op = Round().set_device('Ascend')

rsqrt_op = Rsqrt().set_device('Ascend')

scatter_op = Scatter().set_device('Ascend')

scatter_add_ext_op = ScatterAddExt().set_device('Ascend')

scatter_value_op = ScatterValue().set_device('Ascend')

select_op = Select().set_device('Ascend')

select_ext_view_op = SelectExtView().set_device('Ascend')

select_v2_op = SelectV2().set_device('Ascend')

selu_ext_op = SeLUExt().set_device('Ascend')

selu_grad_op = SeluGrad().set_device('Ascend')

sigmoid_op = Sigmoid().set_device('Ascend')

sigmoid_grad_op = SigmoidGrad().set_device('Ascend')

sign_op = Sign().set_device('Ascend')

silent_check_v2_op = SilentCheckV2().set_device('Ascend')

silent_check_v3_op = SilentCheckV3().set_device('Ascend')

silu_op = SiLU().set_device('Ascend')

silu_grad_op = SiLUGrad().set_device('Ascend')

sin_op = Sin().set_device('Ascend')

sinc_op = Sinc().set_device('Ascend')

sinh_op = Sinh().set_device('Ascend')

slice_op = Slice().set_device('Ascend')

slice_ext_op = SliceExt().set_device('Ascend')

slice_ext_view_op = SliceExtView().set_device('Ascend')

softmax_backward_op = SoftmaxBackward().set_device('Ascend')

softplus_ext_op = SoftplusExt().set_device('Ascend')

softplus_grad_ext_op = SoftplusGradExt().set_device('Ascend')

sort_ext_op = SortExt().set_device('Ascend')

speed_fusion_attention_op = SpeedFusionAttention().set_device('Ascend')

speed_fusion_attention_grad_op = SpeedFusionAttentionGrad().set_device('Ascend')

split_tensor_op = SplitTensor().set_device('Ascend')

split_tensor_view_op = SplitTensorView().set_device('Ascend')

split_with_size_op = SplitWithSize().set_device('Ascend')

split_with_size_view_op = SplitWithSizeView().set_device('Ascend')

sqrt_op = Sqrt().set_device('Ascend')

square_op = Square().set_device('Ascend')

std_op = Std().set_device('Ascend')

std_mean_op = StdMean().set_device('Ascend')

sub_op = Sub().set_device('Ascend')

sub_ext_op = SubExt().set_device('Ascend')

sub_scalar_op = SubScalar().set_device('Ascend')

sum_ext_op = SumExt().set_device('Ascend')

swiglu_op = Swiglu().set_device('Ascend')

swiglu_grad_op = SwigluGrad().set_device('Ascend')

t_ext_op = TExt().set_device('Ascend')

take_op = Take().set_device('Ascend')

tan_op = Tan().set_device('Ascend')

tanh_op = Tanh().set_device('Ascend')

tanh_grad_op = TanhGrad().set_device('Ascend')

threshold_op = Threshold().set_device('Ascend')

threshold_grad_op = ThresholdGrad().set_device('Ascend')

topk_ext_op = TopkExt().set_device('Ascend')

trace_ext_op = TraceExt().set_device('Ascend')

transpose_op = Transpose().set_device('Ascend')

transpose_ext_view_op = TransposeExtView().set_device('Ascend')

transpose_view_op = TransposeView().set_device('Ascend')

triangular_solve_op = TriangularSolve().set_device('Ascend')

tril_ext_op = TrilExt().set_device('Ascend')

trunc_op = Trunc().set_device('Ascend')

uniform_ext_op = UniformExt().set_device('Ascend')

unique2_op = Unique2().set_device('Ascend')

unique_dim_op = UniqueDim().set_device('Ascend')

unstack_ext_view_op = UnstackExtView().set_device('Ascend')

upsample_bicubic2d_op = UpsampleBicubic2D().set_device('Ascend')

upsample_bicubic2d_grad_op = UpsampleBicubic2DGrad().set_device('Ascend')

upsample_bilinear2d_op = UpsampleBilinear2D().set_device('Ascend')

upsample_bilinear2d_grad_op = UpsampleBilinear2DGrad().set_device('Ascend')

upsample_linear1d_op = UpsampleLinear1D().set_device('Ascend')

upsample_linear1d_grad_op = UpsampleLinear1DGrad().set_device('Ascend')

upsample_nearest1d_op = UpsampleNearest1D().set_device('Ascend')

upsample_nearest1d_grad_op = UpsampleNearest1DGrad().set_device('Ascend')

upsample_nearest2d_op = UpsampleNearest2D().set_device('Ascend')

upsample_nearest2d_grad_op = UpsampleNearest2DGrad().set_device('Ascend')

upsample_nearest3d_op = UpsampleNearest3D().set_device('Ascend')

upsample_nearest3d_grad_op = UpsampleNearest3DGrad().set_device('Ascend')

var_op = Var().set_device('Ascend')

var_mean_op = VarMean().set_device('Ascend')

view_as_op = ViewAs().set_device('Ascend')

xlogy_op = Xlogy().set_device('Ascend')

xlogy_scalar_other_op = XLogYScalarOther().set_device('Ascend')

xlogy_scalar_self_op = XLogYScalarSelf().set_device('Ascend')

zeros_like_ext_op = ZerosLikeExt().set_device('Ascend')


+ 0
- 0
mindnlp/core/_op_prim/cpu/__init__.py View File


+ 3511
- 0
mindnlp/core/_op_prim/cpu/legacy.py
File diff suppressed because it is too large
View File


+ 0
- 0
mindnlp/core/_op_prim/gpu/__init__.py View File


+ 3511
- 0
mindnlp/core/_op_prim/gpu/legacy.py
File diff suppressed because it is too large
View File


+ 2
- 0
mindnlp/core/_prims/ascend/__init__.py View File

@@ -0,0 +1,2 @@
from . import aclop
from . import pyboost

+ 82
- 0
mindnlp/core/_prims/ascend/aclop.py View File

@@ -0,0 +1,82 @@
import re
import inspect
from mindspore import ops
from mindspore.ops._primitive_cache import _get_cache_prim
from mindspore.ops.operations._grad_ops import StridedSliceGrad

__all__ = []

def camel_to_snake_case(camel_case_str):
snake_case_str = re.sub(r'(?<!^)(?=[A-Z])', '_', camel_case_str).lower()
return snake_case_str

op_func_no_init = '''
def {name}(*args):
op = _get_cache_prim(ops.{op})().set_device('CPU')
return op(*args)
'''

op_func_with_init = '''
def {name}(*args):
op = _get_cache_prim(ops.{op})(*args[-{idx}:]).set_device('CPU')
return op(*args[:-{idx}])
'''


old_op_list = list(filter(lambda s: s[0].isupper(), dir(ops)))
for old_op_name in old_op_list:
if old_op_name in ['P', 'Print', 'Assert', 'Custom', 'CustomOpBuilder', 'DataType', 'ReduceOp', 'TBERegOp', 'Tensor']:
continue
# print(old_op_name)
ops_class = getattr(ops, old_op_name, None)
init_signature = inspect.signature(ops_class.__init__)
if len(init_signature.parameters) > 1:
name = camel_to_snake_case(old_op_name)
init_args = list(init_signature.parameters.keys())
init_args.pop(0)
exec(op_func_with_init.format(name=name, op=old_op_name, idx=len(init_args)), globals())

else:
name = camel_to_snake_case(old_op_name)
exec(op_func_no_init.format(name=name, op=old_op_name), globals())

__all__.append(name)
# print(old_op_name, init_signature.parameters, call_signature.parameters)
# print(old_op_name, len(init_signature.parameters), len(call_signature.parameters))
# break

# normal_op = ops.StandardNormal().set_device('CPU')
# def normal(size):
# return normal_op(size)

# __all__.append('normal')

dyn_shape_op = ops.TensorShape().set_device('CPU')
def dyn_shape(self):
return dyn_shape_op(self)

__all__.append('dyn_shape')

# def strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):
# strided_slice_op = ops.StridedSlice(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
# return strided_slice_op(input, begin, end, strides)

# __all__.append('strided_slice')

# def broadcast_to(input, shape):
# broadcast_to_op = ops.BroadcastTo(shape).set_device('CPU')
# return broadcast_to_op(input)

# __all__.append('broadcast_to')

def strided_slice_grad(input, begin, end, strides, update, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0):
strided_slice_grad = _get_cache_prim(StridedSliceGrad)(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
return strided_slice_grad(update, input.shape, begin, end, strides)

__all__.append('strided_slice_grad')

# full_op = ops.FillV2().set_device('CPU')
# def full(shape, value):
# return full_op(shape, value)

# __all__.append('full')

mindnlp/core/_prims/ascend.py → mindnlp/core/_prims/ascend/ascend.py View File


+ 26
- 0
mindnlp/core/_prims/ascend/pyboost.py View File

@@ -0,0 +1,26 @@
from mindspore.ops.auto_generate import gen_ops_prim
from mindspore.ops.auto_generate import pyboost_inner_prim
from mindspore._c_expression import _empty_instance

gen_ops_list = list(filter(lambda s: s.startswith("pyboost"), dir(gen_ops_prim)))
pyboost_inner_list = list(filter(lambda s: s.endswith("_impl"), dir(pyboost_inner_prim)))

__all__ = []

for pyboost_op_name in gen_ops_list:
op_name = pyboost_op_name.replace('pyboost_', '') + '_op'
func_name = op_name.replace('_op', '')
op_instance = getattr(gen_ops_prim, op_name, None)
if op_instance is not None:
__all__.append(func_name)
globals()[func_name] = getattr(gen_ops_prim, op_name).__class__().set_device('Ascend')

for op_name in pyboost_inner_list:
func_name = op_name.replace('_impl', '')
__all__.append(func_name)
globals()[func_name] = getattr(pyboost_inner_prim, op_name).__class__()

def empty(*args, **kwargs):
return _empty_instance(*args, **kwargs, device='Ascend')

__all__.append('empty')

+ 0
- 0
mindnlp/core/_prims/ascend_310b/__init__.py View File


mindnlp/core/_prims/ascend_310b.py → mindnlp/core/_prims/ascend_310b/ascend_310b.py View File


+ 0
- 211
mindnlp/core/_prims/cpu.py View File

@@ -1,211 +0,0 @@
import numbers
from mindspore.ops.auto_generate import gen_ops_prim
from mindspore.ops._primitive_cache import _get_cache_prim
from mindspore._c_expression import _empty_instance
from mindspore.ops.operations._grad_ops import StridedSliceGrad

import mindspore
from mindspore import ops

from mindnlp import core

__all__ = []
op_list = list(filter(lambda s: s.endswith("_op"), dir(gen_ops_prim)))

for op_name in op_list:
func_name = op_name.replace('_op', '')
__all__.append(func_name)
globals()[func_name] = getattr(gen_ops_prim, op_name).__class__().set_device('CPU')

def empty(*args, **kwargs):
return _empty_instance(*args, **kwargs, device='CPU')

normal_op = ops.StandardNormal().set_device('CPU')
def normal(*args, **kwargs):
return normal_op(*args, **kwargs)

__all__.append('normal')

full_op = ops.FillV2().set_device('CPU')
def full(*args):
return full_op(*args)

__all__.append('full')

range_op = ops.Range().set_device('CPU')
def arange(start, end, step, dtype):
return cast(range_op(start, end, step), dtype)

__all__.append('arange')


broadcast_to_op = ops.Primitive('BroadcastTo').set_device('CPU')
def broadcast_to(*args):
return broadcast_to_op(*args)

__all__.append('broadcast_to')

def concat(tensors, dim):
concat_op = ops.Concat(dim).set_device('CPU')
return concat_op(tensors)

__all__.append('concat')

zeros_op = ops.Zeros().set_device('CPU')
def zeros(*args):
return zeros_op(*args)

__all__.append('zeros')

ones_op = ops.Ones().set_device('CPU')
def ones(*args):
return ones_op(*args)

__all__.append('ones')

uniform_real_op = ops.UniformReal().set_device('CPU')
def uniform_real(*args):
return uniform_real_op(*args)

__all__.append('uniform_real')

def pad_v3(input_x, padding, mode='constant', value=None):
pad_op = ops.PadV3(mode=mode, paddings_contiguous=True).set_device('CPU')
if isinstance(value, (float, int)):
value = core.tensor(value, dtype=input_x.dtype)
return pad_op(input_x, padding, value)

__all__.append('pad_v3')

reduce_any_op = ops.ReduceAny().set_device('CPU')
reduce_any_keepdim_op = ops.ReduceAny(True).set_device('CPU')
def reduce_any(input, dim, keepdim):
if keepdim:
return reduce_any_keepdim_op(input, dim)
return reduce_any_op(input, dim)

__all__.append('reduce_any')

reduce_all_op = ops.ReduceAll().set_device('CPU')
reduce_all_keepdim_op = ops.ReduceAll(True).set_device('CPU')
def reduce_all(input, dim, keepdim):
if keepdim:
return reduce_all_keepdim_op(input, dim)
return reduce_all_op(input, dim)

__all__.append('reduce_all')

def isclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False):
is_close = _get_cache_prim(ops.IsClose)(rtol=rtol, atol=atol, equal_nan=equal_nan).set_device('CPU')
return is_close(input, other)

__all__.append('isclose')

tile_op = ops.Primitive('Tile').set_device('CPU')
def tile(*args):
return tile_op(*args)

__all__.append('tile')

def randint(low, high, shape, dtype, generator):
rand_op = ops.UniformInt().set_device('CPU')
output = rand_op(shape, mindspore.Tensor(low, mindspore.int32), mindspore.Tensor(high, mindspore.int32))
return cast(output, dtype)
# return mindspore.Tensor(np.random.randint(low, high, shape))

cast_op = ops.Cast().set_device('CPU')
def cast(input, dtype):
return cast_op(input, dtype)

__all__.append('cast')

def tril_ext(input, diagonal):
tril_op = ops.Tril(diagonal).set_device('CPU')
return tril_op(input)

def strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):
strided_slice_op = _get_cache_prim(ops.StridedSlice)(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
return strided_slice_op(input, begin, end, strides)

__all__.append('strided_slice')

def strided_slice_grad(input, begin, end, strides, update, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0):
strided_slice_grad = _get_cache_prim(StridedSliceGrad)(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
return strided_slice_grad(update, input.shape, begin, end, strides)

__all__.append('strided_slice_grad')

def squeeze(input, dim):
squeeze_op = ops.Squeeze(dim).set_device('CPU')
return squeeze_op(input)

__all__.append('squeeze')

def sort_ext(input, dim, descending, stable):
sort_op = ops.Sort(dim, descending).set_device('CPU')
return sort_op(input)

__all__.append('sort_ext')

def stack(tensors, dim):
stack_op = ops.Stack(dim).set_device('CPU')
return stack_op(tensors)

__all__.append('stack')

def gather(input_params, input_indices, axis, batch_dims=0):
gather_op = _get_cache_prim(ops.Gather)(batch_dims).set_device('CPU')
return gather_op(input_params, input_indices, axis)

__all__.append('gather')

def softmax(input, dim):
softmax_op = ops.Softmax(dim).set_device('CPU')
return softmax_op(input)

__all__.append('softmax')

def topk(input, k, sorted=True):
topk_op = ops.TopK(sorted).set_device('CPU')
return topk_op(input, k)

__all__.append('topk')

dyn_shape_op = ops.TensorShape().set_device('CPU')
def dyn_shape(self):
return dyn_shape_op(self)

__all__.append('dyn_shape')

bitwise_and_op = ops.BitwiseAnd().set_device('CPU')
def bitwise_and_scalar(input, other):
return bitwise_and_op(input, other)

bitwise_right_shift_op = ops.RightShift().set_device('CPU')
def bitwise_right_shift(input, other):
if isinstance(input, numbers.Number):
if not isinstance(input, int):
raise TypeError(f"For 'bitwise_left_shift', 'input' must be an integer, but got input:{type(input)}.")
input = cast(input, other.dtype)
elif isinstance(other, numbers.Number):
if not isinstance(other, int):
raise TypeError(f"For 'bitwise_left_shift', 'other' must be an integer, but got other:{type(other)}.")
other = cast(other, input.dtype)
return bitwise_right_shift_op(input, other)

__all__.append('bitwise_right_shift')

embedding_op = ops.Gather().set_device('CPU')
def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
return embedding_op(weight, input, 0)

__all__.append('embedding')


def randn(size, seed, offset, dtype):
rand_op = ops.StandardNormal()
output = rand_op(size)
return output

__all__.append('randn')


+ 2
- 0
mindnlp/core/_prims/cpu/__init__.py View File

@@ -0,0 +1,2 @@
from . import ms
from . import numpy

+ 155
- 0
mindnlp/core/_prims/cpu/ms.py View File

@@ -0,0 +1,155 @@
import re
import inspect
import ctypes
import numpy as np
from mindnlp import core
from mindspore import ops
from mindspore.ops.auto_generate import gen_ops_prim
from mindspore._c_expression import _empty_instance
from mindspore.ops._primitive_cache import _get_cache_prim
from mindspore.ops.operations._grad_ops import StridedSliceGrad

gen_ops_list = list(filter(lambda s: s.startswith("pyboost"), dir(gen_ops_prim)))

__all__ = []

def camel_to_snake_case(camel_case_str):
snake_case_str = re.sub(r'(?<!^)(?=[A-Z])', '_', camel_case_str).lower()
return snake_case_str

op_func_no_init = '''
def {name}(*args):
op = _get_cache_prim(ops.{op})().set_device('CPU')
return op(*args)
'''

op_func_with_init = '''
def {name}(*args):
op = _get_cache_prim(ops.{op})(*args[-{idx}:]).set_device('CPU')
return op(*args[:-{idx}])
'''


for pyboost_op_name in gen_ops_list:
op_name = pyboost_op_name.replace('pyboost_', '') + '_op'
func_name = op_name.replace('_op', '')
op_instance = getattr(gen_ops_prim, op_name, None)
if op_instance is not None:
__all__.append(func_name)
globals()[func_name] = getattr(gen_ops_prim, op_name).__class__().set_device('CPU')

def empty(*args, **kwargs):
return _empty_instance(*args, **kwargs, device='CPU')

__all__.append('empty')

old_op_list = list(filter(lambda s: s[0].isupper(), dir(ops)))
for old_op_name in old_op_list:
if old_op_name in ['P', 'Print', 'Assert', 'Custom', 'CustomOpBuilder', 'DataType', 'ReduceOp', 'TBERegOp', 'Tensor']:
continue
# print(old_op_name)
ops_class = getattr(ops, old_op_name, None)
init_signature = inspect.signature(ops_class.__init__)
if len(init_signature.parameters) > 1:
name = camel_to_snake_case(old_op_name)
init_args = list(init_signature.parameters.keys())
init_args.pop(0)
exec(op_func_with_init.format(name=name, op=old_op_name, idx=len(init_args)), globals())

else:
name = camel_to_snake_case(old_op_name)
exec(op_func_no_init.format(name=name, op=old_op_name), globals())

__all__.append(name)
# print(old_op_name, init_signature.parameters, call_signature.parameters)
# print(old_op_name, len(init_signature.parameters), len(call_signature.parameters))
# break

# normal_op = ops.StandardNormal().set_device('CPU')
# def normal(size):
# return normal_op(size)

# __all__.append('normal')
dyn_shape_op = ops.TensorShape().set_device('CPU')
def dyn_shape(self):
return dyn_shape_op(self)

__all__.append('dyn_shape')

# def strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask):
# strided_slice_op = ops.StridedSlice(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
# return strided_slice_op(input, begin, end, strides)

# __all__.append('strided_slice')

# def broadcast_to(input, shape):
# broadcast_to_op = ops.BroadcastTo(shape).set_device('CPU')
# return broadcast_to_op(input)

# __all__.append('broadcast_to')

def strided_slice_grad(input, begin, end, strides, update, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0):
strided_slice_grad = _get_cache_prim(StridedSliceGrad)(begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask).set_device('CPU')
return strided_slice_grad(update, input.shape, begin, end, strides)

__all__.append('strided_slice_grad')

# full_op = ops.FillV2().set_device('CPU')
# def full(shape, value):
# return full_op(shape, value)

# __all__.append('full')

def numpy_to_tensor_overwrite(np_array, torch_tensor):
if not np_array.flags.c_contiguous:
np_array = np.ascontiguousarray(np_array)

tensor_ptr = torch_tensor.data_ptr()
ctypes.memmove(tensor_ptr, np_array.ctypes.data, torch_tensor.nbytes)
return torch_tensor

def inplace_uniform(input, from_, to_, seed, offset):
np.random.seed(seed.item())
out = np.random.uniform(from_, to_, input.shape).astype(core.dtype2np[input.dtype])
numpy_to_tensor_overwrite(out, input)
return input

__all__.append('inplace_uniform')

def inplace_normal(input, mean, std, seed, offset):
np.random.seed(seed.item())
out = np.random.normal(mean, std, input.shape).astype(core.dtype2np[input.dtype])
numpy_to_tensor_overwrite(out, input)

return input

__all__.append('inplace_normal')

# class GetItem(core.autograd.Function):
# @staticmethod
# def forward(ctx, input, slice):
# if isinstance(slice, tuple):
# new_slice = ()
# for s in slice:
# if isinstance(s, core.Tensor):
# s = s.numpy()
# new_slice += (s,)
# else:
# new_slice = slice
# out = input.asnumpy()[new_slice]

# ctx.save_for_backward(input)
# ctx.slice = slice
# if not isinstance(out, np.ndarray):
# out = np.array(out)
# return core.Tensor.from_numpy(out)

# @staticmethod
# def backward(ctx, grad_output):
# input, = ctx.saved_tensors
# slice = ctx.slice
# grad_input = core.zeros_like(input)
# grad_input[slice] = grad_output
# return grad_input

mindnlp/core/_prims/numpy.py → mindnlp/core/_prims/cpu/numpy.py View File


+ 38
- 18
mindnlp/core/_tensor.py View File

@@ -24,7 +24,12 @@ from ._bind import get_device_in_context, device_, get_default_dtype
from ._utils import _rebuild_tensor_v2
from ._C.size import Size
from .configs import DEVICE_TARGET, CPU_USE_NUMPY_OP
from .dispatcher import device_map
device_map = {
'cpu': 'CPU',
'npu': 'Ascend',
'cuda': 'GPU'
}
if DEVICE_TARGET == 'Ascend':
import acl
@@ -134,6 +139,8 @@ def tensor(data, *, dtype=None, device=None, requires_grad=False):
if isinstance(data, float) and data == float('-inf'):
data = core.finfo(get_default_dtype()).min
elif isinstance(data, list) and float('-inf') in data:
data = [core.finfo(get_default_dtype()).min if d == float('-inf') else d for d in data]
if dtype is not None:
tensor = Tensor(data, dtype=dtype)
@@ -145,7 +152,8 @@ def tensor(data, *, dtype=None, device=None, requires_grad=False):
device.type = 'npu'
if device.type not in ['meta', 'cpu']:
tensor = tensor.to(device)
tensor.requires_grad_(requires_grad)
if requires_grad:
tensor.requires_grad_(requires_grad)
return tensor
def scalar_tensor(*args, **kwargs):
@@ -203,7 +211,7 @@ class TensorPlaceHolder:
return self.shape[0]
def __repr__(self) -> str:
# self.data_sync(True)
self.data_sync(True)
return Tensor_.__repr__(self)[:-1] + f', device={self.device})'
def __format__(self, format_spec):
@@ -226,16 +234,19 @@ class TensorPlaceHolder:
if isinstance(s, range):
s = list(s)
if isinstance(s, np.ndarray):
s = tensor(s)
s = tensor(s, device=self.device)
new_slices += (s,)
slices = new_slices
if self.device.type == 'npu':
out = ops.tensor_getitem(self, slices)
elif self.device.type == 'meta':
out = ops.getitem_np(self, slices)
else:
if CPU_USE_NUMPY_OP:
out = ops.getitem_np(self, slices)
else:
out = ops.getitem(self, slices)
# if CPU_USE_NUMPY_OP:
# out = ops.getitem_np(self, slices)
# else:
# out = ops.getitem(self, slices)
out = ops.tensor_getitem(self, slices)
out._device = self.device
return out
@@ -264,7 +275,11 @@ class TensorPlaceHolder:
if self.device.type == 'meta':
return self
elif self.device.type == 'npu':
if value.device != self.device:
value._device = self.device
if self.device.type == 'npu':
if value.device != self.device:
value._device = self.device
out = ops.tensor_setitem(self, slices, value)
@@ -913,6 +928,7 @@ class TensorPlaceHolder:
def data(self):
out = Tensor(self)
out._device = self.device
out._base = self
return out
@data.setter
@@ -920,13 +936,15 @@ class TensorPlaceHolder:
if isinstance(self, StubTensor) and isinstance(new_value, StubTensor):
self.stub = new_value.stub
else:
if self.device.type == 'cpu' and new_value.device.type == 'cpu' \
and self.shape == new_value.shape and self.dtype == new_value.dtype:
src_ct = ctypes.c_void_p(new_value.data_ptr())
dst_ct = ctypes.c_void_p(self.data_ptr())
ctypes.memmove(dst_ct, src_ct, self.nbytes)
else:
self.assign_value(new_value)
# if self.device.type == 'cpu' and new_value.device.type == 'cpu' \
# and self.shape == new_value.shape and self.dtype == new_value.dtype:
# src_ct = ctypes.c_void_p(new_value.data_ptr())
# dst_ct = ctypes.c_void_p(self.data_ptr())
# ctypes.memmove(dst_ct, src_ct, self.nbytes)
# else:
if getattr(self, '_base', None) is not None:
self._base.assign_value(new_value)
self.assign_value(new_value)
self._device = new_value.device
# Tensor.data_ptr
@@ -970,7 +988,8 @@ class TensorPlaceHolder:
# Tensor.diagonal_scatter
# Tensor.fill_diagonal_
def fill_diagonal_(self, value, wrap=False):
return ops.inplace_fill_diagonal(self, value, wrap)
# Tensor.fmax
@@ -1092,7 +1111,8 @@ class TensorPlaceHolder:
return self.expand(other.size())
# Tensor.exponential_
def exponential_(self, lambd=1, *, generator=None):
return ops.inplace_exponential(self, lambd, generator)
# Tensor.fix


+ 13
- 3
mindnlp/core/configs.py View File

@@ -8,11 +8,12 @@ SUPPORT_BF16 = DEVICE_TARGET == 'Ascend' and SOC not in ['ascend910', 'ascend310
ON_A1 = SOC == 'ascend910'
ON_A2 = SOC in ['ascend910b', 'ascend910_93']
ON_ORANGE_PI = '310b' in SOC
USE_PYBOOST = DEVICE_TARGET == 'Ascend'
DEFAULT_DTYPE = mindspore.float32
MS27 = '.'.join(mindspore.__version__.split('.')[:2]) >= '2.7'

CPU_USE_NUMPY_OP = DEVICE_TARGET != 'CPU'
# OP backend select
USE_PYBOOST = True
CPU_USE_NUMPY_OP = False

def set_pyboost(mode: bool):
"""set global pyboost"""
@@ -21,4 +22,13 @@ def set_pyboost(mode: bool):

def use_pyboost():
"""set global pyboost"""
return USE_PYBOOST
return USE_PYBOOST

def set_cpu_use_numpy(mode: bool):
"""set global pyboost"""
global CPU_USE_NUMPY_OP
CPU_USE_NUMPY_OP = mode

def cpu_use_numpy():
"""set global pyboost"""
return CPU_USE_NUMPY_OP

+ 0
- 0
mindnlp/core/cpu/__init__.py View File


+ 10
- 1
mindnlp/core/cuda/__init__.py View File

@@ -6,7 +6,9 @@ from mindspore.runtime import memory_reserved as ms_memory_reserved, \
memory_allocated as ms_memory_allocated, StreamCtx as StreamContext, Stream, empty_cache, \
reset_peak_memory_stats, reset_max_memory_allocated, max_memory_allocated, synchronize, \
current_stream
from mindspore.device_context.gpu import device_count
from mindspore.device_context.gpu import device_count as ms_device_count
from mindspore.hal import get_device_properties
from mindspore.communication import GlobalComm, get_group_size
from mindnlp import core
@@ -14,6 +16,13 @@ FloatTensor = core.FloatTensor
HalfTensor = core.FloatTensor
BFloat16Tensor = core.BFloat16Tensor
def device_count():
if not is_available():
return 0
if GlobalComm.INITED:
return get_group_size()
return 1
def manual_seed_all(seed: int):
manual_seed(seed)


+ 11
- 123
mindnlp/core/dispatcher.py View File

@@ -1,99 +1,8 @@
from mindnlp import core
from ._prims import ascend, cpu, numpy, meta, ascend_310b
from .configs import DEVICE_TARGET, CPU_USE_NUMPY_OP, SOC
from ._apis import npu, cpu, gpu, meta
from .configs import DEVICE_TARGET, SOC
from ._bind import is_autocast_enabled

device_map = {"cpu": "CPU", "npu": "Ascend", "cuda": "GPU"}

"""
__matmul__, addbmm, addmm, addmv, addr, baddbmm, bmm, chain_matmul, multi_dot,
conv1d, conv2d, conv3d, conv_transpose1d, conv_transpose2d, conv_transpose3d, GRUCell,
linear, LSTMCell, matmul, mm, mv, prelu, RNNCell
"""
AMP_AUTO_WHITE_LIST = [
"dense",
"matmul",
"addbmm",
"addmm",
"addmv",
"addr",
"baddbmm",
"bmm",
"chain_matmul",
"multi_dot",
"conv1d",
"conv2d",
"conv3d",
"conv_transpose1d",
"conv_transpose2d",
"conv_transpose3d",
"mm",
"mv",
"prelu",
]


"""
__pow__, __rdiv__, __rpow__, __rtruediv__, acos, asin, binary_cross_entropy_with_logits,
cosh, cosine_embedding_loss, cdist, cosine_similarity, cross_entropy,
cumprod, cumsum, dist, erfinv, exp, expm1, group_norm, hinge_embedding_loss,
kl_div, l1_loss, layer_norm, log, log_softmax, log10, log1p, log2, margin_ranking_loss, mse_loss,
multilabel_margin_loss, multi_margin_loss, nll_loss, norm, normalize, pdist, poisson_nll_loss,
pow, prod, reciprocal, rsqrt, sinh, smooth_l1_loss, soft_margin_loss, softmax, softmin, softplus,
sum, renorm, tan, triplet_margin_loss
"""

AMP_AUTO_BLACK_LIST = [
'acos',
'asin',
'binary_cross_entropy_with_logits',
'cosh',
'cosine_embedding_loss',
'cdist',
'cosine_similarity',
'cross_entropy',
'cumprod',
'cumsum',
'dist',
'erfinv',
'exp',
'expm1',
'group_norm',
'hinge_embedding_loss',
'kl_div',
'l1_loss',
'layer_norm',
'log',
'log_softmax',
'log10',
'log1p',
'log2',
'margin_ranking_loss',
'mse_loss',
'multilabel_margin_loss',
'multi_margin_loss',
'nll_loss',
'norm',
'normalize',
'pdist',
'poisson_nll_loss',
'pow',
'prod',
'reciprocal',
'rsqrt',
'sinh',
'smooth_l1_loss',
'soft_margin_loss',
'softmax',
'softmin',
'softplus',
'sum',
'renorm',
'tan',
'triplet_margin_loss',
]


class SingletonMeta(type):
_instances = {}

@@ -104,13 +13,14 @@ class SingletonMeta(type):
return cls._instances[cls]


class Dispatcher(metaclass=SingletonMeta):
def __init__(self):
self._registry = {"cpu": {}, "npu": {}, "gpu": {}, "numpy": {}, "meta": {}}
def register(self, func_name, device, func):
self._registry[device][func_name] = func
api_map = {
'cpu': cpu,
'npu': npu,
'meta': meta,
'cuda': gpu
}

class Dispatcher(metaclass=SingletonMeta):
def dispatch(self, func_name, *args, **kwargs):
device = kwargs.pop("device", None)
if isinstance(device, str):
@@ -138,16 +48,8 @@ class Dispatcher(metaclass=SingletonMeta):

device_type = device.type

if CPU_USE_NUMPY_OP and device_type == "cpu":
device_type = "numpy"

# if is_autocast_enabled(device_type):
# if func_name in AMP_AUTO_WHITE_LIST or func_name.replace('_ext', '') in AMP_AUTO_WHITE_LIST:
# func_name = func_name + "_fp16"

# elif func_name in AMP_AUTO_BLACK_LIST or func_name.replace('_ext', '') in AMP_AUTO_BLACK_LIST:
# func_name = func_name + "_fp32"
func = self._registry[device_type].get(func_name, None)
# func = self._registry[device_type].get(func_name, None)
func = getattr(api_map[device_type], func_name, None)
if func is None:
raise RuntimeError(
f"No implementation for function: {func_name} on {device_type}."
@@ -156,18 +58,4 @@ class Dispatcher(metaclass=SingletonMeta):


dispatcher = Dispatcher()
if SOC == "ascend310b":
for func_name in ascend_310b.__all__:
dispatcher.register(func_name, "npu", getattr(ascend_310b, func_name))
else:
for func_name in ascend.__all__:
dispatcher.register(func_name, "npu", getattr(ascend, func_name))

for func_name in cpu.__all__:
dispatcher.register(func_name, "cpu", getattr(cpu, func_name))

for func_name in numpy.__all__:
dispatcher.register(func_name, "numpy", getattr(numpy, func_name))

for func_name in meta.__all__:
dispatcher.register(func_name, "meta", getattr(meta, func_name))

+ 0
- 3
mindnlp/core/executor.py View File

@@ -2,9 +2,6 @@ from mindnlp import core
from .dispatcher import dispatcher

def execute(func_name, *args, **kwargs):
requires_grad = kwargs.pop('requires_grad', False)
user_created = kwargs.pop('user_created', False)

out, device = dispatcher.dispatch(func_name, *args, **kwargs)
if not isinstance(out, (tuple, list)):
out._device = device


+ 79
- 96
mindnlp/core/nn/functional.py View File

@@ -6,18 +6,15 @@ from typing import Optional, Tuple, List
from mindnlp import core
from mindnlp.core.executor import execute
from mindnlp.core._C import default_generator
from mindnlp.core.nn.modules.utils import _pair
from ..configs import ON_ORANGE_PI, use_pyboost, ON_A1, ON_A2
from ..configs import ON_A2, ON_A1
generator_step_ = 12
def gelu(input, *, approximate='none'):
if input.device.type == 'npu':
return execute('gelu_ext', input, approximate)
if approximate == 'tanh':
return execute('gelu', input)
return input * 0.5 * (1.0 + core.erf(input / core.sqrt(2.0)))
return execute('gelu', input, approximate)
def relu(input, inplace=False):
if inplace:
@@ -53,13 +50,13 @@ def glu(input, dim=-1):
return execute('glu', input, dim)
def softplus(input, beta=1, threshold=20):
return execute('softplus_ext', input, beta, threshold)
return execute('softplus', input, beta, threshold)
def logsigmoid(input):
return execute('logsigmoid', input)[0]
def leaky_relu(input, alpha=0.2):
return execute('leaky_relu_ext', input, alpha)
return execute('leaky_relu', input, alpha)
def prelu(input, weight):
return execute('prelu', input, weight)
@@ -114,11 +111,6 @@ def avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, coun
return execute('avg_pool2d', input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
def avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None):
if use_pyboost() and has_avg_pool3d:
return mint.nn.functional.avg_pool3d(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
if divisor_override is None:
divisor_override = 0
return ops.avg_pool3d(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
@@ -126,12 +118,15 @@ def adaptive_avg_pool1d(input, output_size):
return execute('adaptive_avg_pool1d', input, output_size)
def adaptive_avg_pool2d(input, output_size):
return execute('adaptive_avg_pool2d_ext', input, output_size)
return execute('adaptive_avg_pool2d', input, output_size)
def dropout(input, p=0.5, training=True, inplace=False):
if not training or p==0:
return input
out, _ = execute('dropout_ext', input, p)
seed, offset = default_generator._step(generator_step_)
seed._device = input.device
offset._device = input.device
out, _ = execute('dropout', input, p, seed, offset)
if inplace:
input.copy_(out)
return input
@@ -143,21 +138,7 @@ def dropout2d(input, p=0.5, training=False):
out, _ = execute('dropout2d', input, p)
return out
def drop_and_mask(keep_prob, seed=None):
seed0, seed1 = _get_seed(seed, "dropout")
dropout_op = ops.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
out, mask = dropout_op(input)
return out, mask
def linear(input, weight, bias=None):
if ON_ORANGE_PI:
input = input.to(core.float16)
weight = weight.to(core.float16)
if bias is not None:
bias = bias.to(core.float16)
return execute('dense', input, weight) + bias
return execute('dense', input, weight)
return execute('dense', input, weight, bias)
def binary_cross_entropy_with_logits(input, target, weight=None, reduction='mean', pos_weight=None):
@@ -165,31 +146,36 @@ def binary_cross_entropy_with_logits(input, target, weight=None, reduction='mean
target = target.unsqueeze(1).expand_as(input).to(input.dtype)
return execute('binary_cross_entropy_with_logits', input, target, weight, pos_weight, reduction)
return ops.binary_cross_entropy_with_logits(input, target.astype(input.dtype), weight, pos_weight, reduction)
def gumbel_softmax(logits: core.Tensor, tau: float = 1, hard: bool = False, eps: float = 1e-10, dim: int = -1) -> core.Tensor:
if eps != 1e-10:
warnings.warn("`eps` parameter is deprecated and has no effect.")
uniform_samples = _get_cache_prim(ops.UniformReal)()(logits.shape)
gumbels = -ops.log(-ops.log(uniform_samples + eps) + eps) # ~Gumbel(0, 1)
if eps != 1e-10:
warnings.warn("`eps` parameter is deprecated and has no effect.")
gumbels = (
-core.empty_like(logits, memory_format=core.legacy_contiguous_format)
.exponential_()
.log()
) # ~Gumbel(0,1)
gumbels = (logits + gumbels) / tau # ~Gumbel(logits,tau)
y_soft = softmax(gumbels, dim)
y_soft = gumbels.softmax(dim)
if hard:
# Straight through.
index = y_soft.argmax(dim)
y_hard = one_hot(index, logits.shape[dim])
ret = ops.stop_gradient(y_hard - y_soft) + y_soft
index = y_soft.max(dim, keepdim=True)[1]
y_hard = core.zeros_like(
logits, memory_format=core.legacy_contiguous_format
).scatter_(dim, index, 1.0)
ret = y_hard - y_soft.detach() + y_soft
else:
# Reparametrization trick.
ret = y_soft
return ret
def log_softmax(input, dim=None, dtype=None):
if input.device.type == 'cpu':
return execute('log_softmax', input, dim)
return execute('log_softmax_ext', input, dim, dtype)
return execute('log_softmax', input, dim, dtype)
def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False):
return execute('embedding', input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
@@ -309,6 +295,12 @@ def pad(input, pad, mode='constant', value=None):
if mode == "replicate":
mode = "edge"
return execute('pad_v3', input, new_pad, mode)
if input.dtype.is_floating_point:
value = float(value)
elif input.dtype == core.bool:
value = bool(value)
elif input.dtype in [core.int32, core.int64]:
value = int(value)
return execute('pad_v3', input, new_pad, mode, value)
out = input
if (isinstance(pad, tuple) and not pad):
@@ -332,9 +324,9 @@ def pad(input, pad, mode='constant', value=None):
return out
def nll_loss(input, target, weight=None, ignore_index=-100, reduction='mean'):
if input.device.type == 'npu':
return _nllloss_nd(input, target, weight, ignore_index, reduction)
return _inner_nll_loss(input, target, weight, ignore_index, reduction)
# if input.device.type == 'npu':
return _nllloss_nd(input, target, weight, ignore_index, reduction)
# return _inner_nll_loss(input, target, weight, ignore_index, reduction)
def _inner_nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
ndim = inputs.ndim
@@ -362,29 +354,29 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
if target.ndim == inputs.ndim - 1:
target = target.expand_dims(target_dim)
if ignore_index is not None:
non_pad_mask = ops.equal(target, ignore_index)
target = target.masked_fill(non_pad_mask, ops.cast(0, target.dtype))
non_pad_mask = core.eq(target, ignore_index)
target = target.masked_fill(non_pad_mask, core.cast(0, target.dtype))
else:
non_pad_mask = target
if weight is not None:
loss_weights = ops.gather(weight, target, 0)
loss_weights = core.gather(weight, target, 0)
orig_shape = inputs.shape
if inputs.ndim != 2:
inputs = inputs.view(orig_shape[:2] + (-1,))
weight = weight.view(weight.shape + (1,))
weighted_inputs = inputs * weight
weighted_inputs = weighted_inputs.view(orig_shape)
loss = ops.neg(ops.gather_d(weighted_inputs, target_dim, target))
smooth_loss = ops.neg(weighted_inputs.sum(axis=target_dim, keepdims=True))
loss = core.neg(core.gather_d(weighted_inputs, target_dim, target))
smooth_loss = core.neg(weighted_inputs.sum(axis=target_dim, keepdims=True))
else:
loss = ops.neg(ops.gather_d(inputs, target_dim, target))
smooth_loss = ops.neg(inputs.sum(axis=target_dim, keepdims=True))
loss_weights = ops.ones_like(loss)
loss = core.neg(core.gather_d(inputs, target_dim, target))
smooth_loss = core.neg(inputs.sum(axis=target_dim, keepdims=True))
loss_weights = core.ones_like(loss)
if ignore_index is not None:
loss = loss.masked_fill(non_pad_mask, ops.cast(0, loss.dtype))
loss_weights = loss_weights.masked_fill(non_pad_mask, ops.cast(0, loss_weights.dtype))
smooth_loss = smooth_loss.masked_fill(non_pad_mask, ops.cast(0, smooth_loss.dtype))
loss = loss.masked_fill(non_pad_mask, core.cast(0, loss.dtype))
loss_weights = loss_weights.masked_fill(non_pad_mask, core.cast(0, loss_weights.dtype))
smooth_loss = smooth_loss.masked_fill(non_pad_mask, core.cast(0, smooth_loss.dtype))
loss = loss.squeeze(target_dim)
smooth_loss = smooth_loss.squeeze(target_dim)
@@ -493,7 +485,7 @@ def _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduct
else:
smooth_loss = -input.sum(class_dim)
ignore_mask = core.eq(target, ingore_index)
smooth_loss = core.masked_fill(smooth_loss, ignore_mask, 0)
smooth_loss = core.masked_fill(smooth_loss, ignore_mask, 0.)
if reduction == "mean":
true_mask = ~ignore_mask
if weight is not None:
@@ -519,10 +511,10 @@ def _cross_entropy_for_class_indices(input, target, weight, ingore_index, reduct
def mse_loss(input, target, reduction='mean'):
return execute('mse_loss_ext', input, target, reduction)
return execute('mse_loss', input, target, reduction)
def l1_loss(input, target, reduction='mean'):
return execute('l1_loss_ext', input, target, reduction)
return execute('l1_loss', input, target, reduction)
def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
input = input.to(core.float32)
@@ -534,10 +526,6 @@ def kl_div(logits, labels, reduction='mean', log_target=False):
labels = ops.log(labels)
return ops.kl_div(logits, labels, reduction)
def manual_softmax(x, dim=-1):
exp_x = ops.exp(x - ops.max(x, axis=dim, keepdims=True)[0])
return exp_x / ops.sum(exp_x, dim=dim, keepdim=True)
def softmax(input, dim=-1, *, dtype=None):
if dtype is not None:
input = input.to(dtype)
@@ -549,7 +537,7 @@ def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
weight = core.ones(normalized_shape, dtype=input.dtype, device=input.device)
if bias is None:
bias = core.zeros(normalized_shape, dtype=input.dtype, device=input.device)
return execute('layer_norm_ext', input, normalized_shape, weight, bias, eps)[0]
return execute('layer_norm', input, normalized_shape, weight, bias, eps)[0]
def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False):
@@ -749,7 +737,7 @@ def batch_norm(input, running_mean, running_var, weight=None, bias=None, trainin
bias = core.zeros(input.shape[1], dtype=input.dtype, device=input.device)
return execute(
'batch_norm_ext',
'batch_norm',
input,
running_mean,
running_var,
@@ -763,17 +751,17 @@ def batch_norm(input, running_mean, running_var, weight=None, bias=None, trainin
def conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if isinstance(padding, str):
return execute('conv1d_padding', input, weight, bias, stride, padding, dilation, groups)
return execute('conv1d_ext', input, weight, bias, stride, padding, dilation, groups)
return execute('conv1d', input, weight, bias, stride, padding, dilation, groups)
def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if isinstance(padding, str):
return execute('conv2d_padding', input, weight, bias, stride, padding, dilation, groups)
return execute('conv2d_ext', input, weight, bias, stride, padding, dilation, groups)
return execute('conv2d', input, weight, bias, stride, padding, dilation, groups)
def conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
if isinstance(padding, str):
return execute('conv3d_padding', input, weight, bias, stride, padding, dilation, groups)
return execute('conv3d_ext', input, weight, bias, stride, padding, dilation, groups)
return execute('conv3d', input, weight, bias, stride, padding, dilation, groups)
pad_mode = 'pad'
pad = padding
@@ -836,8 +824,7 @@ def _deconv_output_length(pad_mode, filter_size, stride_size, dilation_size, pad
return length
def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
if use_pyboost():
return execute('conv_transpose2d', input, weight, bias, stride, padding, output_padding, groups, dilation)
return execute('conv_transpose2d', input, weight, bias, stride, padding, output_padding, groups, dilation)
# pad_mode = 'pad'
# pad = padding
@@ -927,7 +914,7 @@ def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode
input_ndim = input.ndim
if input_ndim == 3:
input = input.unsqueeze(1)
out = execute('max_pool2d', input, kernel_size, stride, padding, dilation, ceil_mode=ceil_mode, return_indices=return_indices)
out = execute('max_pool2d', input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)
if input_ndim == 3:
out = out.squeeze(1)
return out
@@ -959,28 +946,26 @@ def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
weight = core.ones([input.shape[1]], dtype=input.dtype, device=input.device)
if bias is None:
bias = core.zeros([input.shape[1]], dtype=input.dtype, device=input.device)
return execute('group_norm', input, num_groups, weight, bias, eps)[0]
# input_shape = input.shape
# N = input_shape[0]
# C = input_shape[1]
# input_reshaped = input.view(1, N * num_groups, -1 if N!=0 else 1)
# outputs = batch_norm(input_reshaped, None, None, None, None, True, 0., eps)
# out = outputs.view(input_shape)
# affine_param_shape = [1] * input.ndim
# affine_param_shape[1] = C
# affine_param_shape = tuple(affine_param_shape)
# if weight is not None and bias is not None:
# if not ON_ORANGE_PI:
# out = bias.view(affine_param_shape).addcmul(out, weight.view(affine_param_shape), 1)
# else:
# out = core.addcmul(bias.view(affine_param_shape), out, weight.view(affine_param_shape), value=1)
# elif weight is not None:
# out = out.mul(weight.view(affine_param_shape))
# elif bias is not None:
# out = out.add(bias.view(affine_param_shape))
# return out
if input.device.type == 'npu':
return execute('group_norm', input, num_groups, weight, bias, eps)[0]
input_shape = input.shape
N = input_shape[0]
C = input_shape[1]
input_reshaped = input.view(1, N * num_groups, -1 if N!=0 else 1)
outputs = batch_norm(input_reshaped, None, None, None, None, True, 0., eps)
out = outputs.view(input_shape)
affine_param_shape = [1] * input.ndim
affine_param_shape[1] = C
affine_param_shape = tuple(affine_param_shape)
if weight is not None and bias is not None:
out = core.addcmul(bias.view(affine_param_shape), out, weight.view(affine_param_shape), value=1)
elif weight is not None:
out = out.mul(weight.view(affine_param_shape))
elif bias is not None:
out = out.add(bias.view(affine_param_shape))
return out
def _in_projection(
@@ -1576,18 +1561,16 @@ def _none_or_dtype(input: Optional[core.Tensor]) -> Optional[int]:
raise RuntimeError("input to _none_or_dtype() must be None or core.Tensor")
def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
if ON_A1:
return execute('im2col', input, kernel_size, dilation, padding, stride)
return execute('im2col_ext', input, kernel_size, dilation, padding, stride)
return execute('im2col', input, _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
def fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
return execute('col2im_ext', input, output_size, kernel_size, dilation, padding, stride)
return execute('col2im', input, output_size, kernel_size, dilation, padding, stride)
def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean', zero_infinity=False):
return execute('ctc_loss', log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity)
def one_hot(tensor, num_classes=-1):
return execute('one_hot_ext', tensor, num_classes)
return execute('one_hot', tensor, num_classes)
def pixel_shuffle(input, upscale_factor):
return execute('pixel_shuffle', input, upscale_factor)


+ 0
- 1
mindnlp/core/nn/init.py View File

@@ -565,7 +565,6 @@ def kaiming_uniform_(
with core.no_grad():
return tensor.uniform_(-bound, bound, generator=generator)
def kaiming_normal_(
tensor: Tensor,
a: float = 0,


+ 5
- 5
mindnlp/core/nn/modules/adaptive.py View File

@@ -5,10 +5,10 @@ from collections import namedtuple
from typing import List, Sequence
from mindnlp.core import Tensor
import mindnlp.core.nn.functional as F
from . import Sequential, ModuleList, Linear
from .module import Module
from ..functional import log_softmax
from ... import ops
__all__ = ['AdaptiveLogSoftmaxWithLoss']
@@ -223,7 +223,7 @@ class AdaptiveLogSoftmaxWithLoss(Module):
cluster_index = self.shortlist_size + i - 1
gather_inds = ops.index_fill(gather_inds, 0, row_indices, cluster_index)
cluster_logprob = log_softmax(cluster_output, dim=1)
cluster_logprob = F.log_softmax(cluster_output, dim=1)
local_logprob = cluster_logprob.gather(1, relative_target.unsqueeze(1))
output = ops.index_add(output, 0, row_indices, local_logprob.squeeze(1))
@@ -235,7 +235,7 @@ class AdaptiveLogSoftmaxWithLoss(Module):
"were found. ")
head_output = self.head(input)
head_logprob = log_softmax(head_output, dim=1)
head_logprob = F.log_softmax(head_output, dim=1)
output += ops.gather(head_logprob, 1, gather_inds.unsqueeze(1)).squeeze()
loss = (-output).mean()
@@ -247,13 +247,13 @@ class AdaptiveLogSoftmaxWithLoss(Module):
def _get_full_log_prob(self, input, head_output):
"""Given input tensor, and output of ``self.head``, compute the log of the full distribution."""
out = ops.zeros((head_output.shape[0], self.n_classes), dtype=input.dtype)
head_logprob = log_softmax(head_output, dim=1)
head_logprob = F.log_softmax(head_output, dim=1)
out[:, :self.shortlist_size] = head_logprob[:, :self.shortlist_size]
for i, (start_idx, stop_idx) in enumerate(zip(self.cutoffs, self.cutoffs[1:])):
cluster_output = self.tail[i](input)
cluster_logprob = log_softmax(cluster_output, dim=1)
cluster_logprob = F.log_softmax(cluster_output, dim=1)
output_logprob = cluster_logprob + head_logprob[:, self.shortlist_size + i].unsqueeze(1)
out[:, start_idx:stop_idx] = output_logprob


+ 5
- 4
mindnlp/core/nn/modules/rnn.py View File

@@ -247,7 +247,7 @@ class _DynamicLSTMCPUGPU(Module):
has_bias = False
else:
has_bias = True
if self.is_gpu:
if x.device.type == 'cuda':
weights = ops.concat((
w_ih.view(-1, 1, 1),
w_hh.view(-1, 1, 1),
@@ -261,12 +261,13 @@ class _DynamicLSTMCPUGPU(Module):
w_hh.view(-1, 1, 1),
bias.view(-1, 1, 1)
))
_lstm = _get_cache_prim(LSTMOP)(input_size, hidden_size, 1, has_bias, False, 0.0)
output, h_n, c_n, _, _ = _lstm(
output, h_n, c_n, _, _ = execute(
'lstm',
x,
h_0[0].unsqueeze(0),
h_0[1].unsqueeze(0),
weights.astype(x.dtype)
weights.astype(x.dtype),
input_size, hidden_size, 1, has_bias, False, 0.0, 0
)
return output, (h_n, c_n)


+ 1
- 1
mindnlp/core/nn/utils/parametrize.py View File

@@ -78,7 +78,7 @@ def _register_parameter_or_buffer(module, name, X):
def _maybe_set(dest: Tensor, src: Tensor) -> None:
dest.assign_value(src) # type: ignore[call-overload]
dest.copy_(src) # type: ignore[call-overload]
class ParametrizationList(ModuleList):


+ 3
- 1
mindnlp/core/npu/__init__.py View File

@@ -39,9 +39,11 @@ def manual_seed_all(seed: int):
manual_seed(seed)
def device_count():
if not is_available():
return 0
if GlobalComm.INITED:
return get_group_size()
return ms_device_count()
return 1
def current_device():
return core.device('npu', 0)


+ 123
- 100
mindnlp/core/ops/array.py View File

@@ -8,7 +8,6 @@ import mindspore
from mindnlp import core
from mindnlp.core.executor import execute
from .other import broadcast_tensors, broadcast_to
from ..configs import ON_ORANGE_PI
def t(input):
@@ -74,8 +73,6 @@ def chunk(input, chunks, dim=0):
# gather
def gather(input, dim, index):
if ON_ORANGE_PI:
return torch_gather(input, index, dim)
return execute("gather_d", input, dim, index)
def torch_gather(x, indices, axis=1):
@@ -131,7 +128,9 @@ def index_add(input, dim, index, source, *, alpha=1):
# index_select
def index_select(input, dim, index):
return execute("index_select", input, dim, index)
if input.device.type in ['npu', 'meta']:
return execute("index_select", input, dim, index)
return execute("gather", input, index, dim, 0)
# masked_select
def masked_select(input, mask):
@@ -167,15 +166,69 @@ def movedim(x, source, destination):
>>> print(output.shape)
(4, 3, 5)
"""
ndim = x.ndim
if len(source) != len(destination):
raise ValueError(
f"For `source` and `destination` arguments, the number of elements must be the same, but got 'source':"
f" {len(source)} and 'destination': {len(destination)}.")
perm = _get_moved_perm(ndim, source, destination)
return permute(x, perm)
return movedim(x, source, destination)
# moveaxis
def moveaxis(a, source, destination):
"""Raises ValueError if source, destination not in (-ndim(a), ndim(a))."""
if not source and not destination:
return a
if isinstance(source, int):
source = (source,)
if isinstance(destination, int):
destination = (destination,)
if len(source) != len(destination):
raise ValueError('The lengths of source and destination must equal')
a_rank = a.ndim
def _correct_axis(axis, rank):
if axis < 0:
return axis + rank
return axis
source = tuple(_correct_axis(axis, a_rank) for axis in source)
destination = tuple(_correct_axis(axis, a_rank) for axis in destination)
if a.ndim is not None:
perm = [i for i in range(a_rank) if i not in source]
for dest, src in sorted(zip(destination, source)):
assert dest <= len(perm)
perm.insert(dest, src)
else:
r = core.range(0, a_rank, 1)
def _remove_indices(a, b):
"""Remove indices (`b`) from `a`."""
items = core.unbind(
core.sort(core.stack(b))
)
i = 0
result = []
for item in items:
result.append(a[i:item])
i = item + 1
result.append(a[i:])
return core.concat(result, 0)
minus_sources = _remove_indices(r, source)
minus_dest = _remove_indices(r, destination)
perm = execute('scatter_nd',
core.unsqueeze(minus_dest, 1), minus_sources, [a_rank]
)
perm = execute('tensor_scatter_update',
perm, core.unsqueeze(destination, 1), source
)
a = core.permute(a, tuple(perm))
return a
def _get_moved_perm(ndim, source, destination):
"""
Helper function for movedim, returns permutation after moving axis
@@ -203,6 +256,7 @@ def _get_moved_perm(ndim, source, destination):
# narrow
def narrow(input, dim, start, length):
length = length.item() if not isinstance(length, int) else length
start = start.item() if not isinstance(start, int) else start
return execute("narrow", input, dim, start, length)
@@ -219,7 +273,7 @@ def nonzero(input, *, as_tuple=False):
# permute
def permute(input, dims):
assert isinstance(dims, tuple)
return execute("transpose_view", input, dims)
return execute("permute", input, dims)
# reshape
@@ -305,8 +359,6 @@ def squeeze(input, dim=None):
def stack(tensors, dim=0):
if tensors[0].device.type == "npu":
return execute("stack_ext", tensors, dim)
return execute("stack", tensors, dim)
@@ -373,6 +425,7 @@ def _take_along_dim_helper(self, indices, dim):
# take_along_dim
def take_along_dim(input, indices, dim=None, *, out=None):
input = input.clone() # input wiil be modified on CPU
if dim:
self_broadcasted, indices_broadcasted, dim = _take_along_dim_helper(input, indices, dim)
return gather(self_broadcasted, dim, indices_broadcasted)
@@ -400,17 +453,23 @@ def tensor_split(input, indices_or_sections, dim=0):
def tile(input, dims):
if isinstance(dims[0], (tuple, list)):
dims = dims[0]
return execute("tile", input, tuple(dims))
new_dims = ()
for d in dims:
if not isinstance(d, int):
d = d.item()
new_dims += (d,)
return execute("tile", input, tuple(new_dims))
# transpose
def transpose(input, dim0, dim1):
return execute("transpose_ext_view", input, dim0, dim1)
return execute("transpose_view", input, dim0, dim1)
# unbind
def unbind(input, dim=0):
return execute("unstack_ext_view", input, dim)
return execute("unstack_view", input, dim)
# unravel_index
@@ -418,7 +477,7 @@ def unbind(input, dim=0):
# unsqueeze
def unsqueeze(input, dim):
return execute("expand_dims_view", input, dim)
return execute("expand_dims", input, dim)
# vsplit
@@ -430,9 +489,6 @@ def unsqueeze(input, dim):
def where(condition, input=None, other=None):
if input is None and other is None:
return nonzero(condition, as_tuple=True)
if ON_ORANGE_PI:
out = condition * input + (~condition) * other
return out
return execute("select", condition, input, other)
@@ -469,7 +525,7 @@ def _do_slice(self, dim: int, index: slice, self_shape: list):
end = _get_index(index.stop, self_shape[dim])
if start == 0 and end == self_shape[dim] and step == 1:
return self
return execute('slice_ext', self, dim, start, end, step)
return execute('slice', self, dim, start, end, step)
def _wrap_index_to_tuple(index):
"""Wrap index to tuple"""
@@ -494,7 +550,7 @@ def _count_indexed_dims(indexes):
count += 1
return count
def _record_tensor_index(index, remain_indexes, dim):
def _record_tensor_index(index, remain_indexes, dim, device):
"""Record indexes remained to be used by aclnnIndex/aclnnIndexPut"""
if len(remain_indexes) > dim:
remain_indexes[dim] = index
@@ -502,7 +558,10 @@ def _record_tensor_index(index, remain_indexes, dim):
while dim > len(remain_indexes):
# use empty_tensor with dim_num 9 to indicate unused dim
remain_indexes.append(empty_tensor_9d)
if device.type == 'npu':
remain_indexes.append(empty_tensor_9d)
else:
remain_indexes.append(slice(None, None, None))
remain_indexes.append(index)
return remain_indexes
@@ -513,7 +572,7 @@ def _process_dim_in_multi_dim_index(prev_result, orig_tensor, index, dim, indexe
if isinstance(index, bool):
result = unsqueeze(prev_result, dim)
index_for_bool = tensor_1d if index else empty_tensor_1d
_record_tensor_index(index_for_bool, remain_indexes, dim)
_record_tensor_index(index_for_bool, remain_indexes, dim, prev_result.device)
prev_shape.insert(dim, 1)
dim += 1
return result, dim, remain_indexes, prev_shape
@@ -544,11 +603,11 @@ def _process_dim_in_multi_dim_index(prev_result, orig_tensor, index, dim, indexe
# process index with Tensor bool type
result = unsqueeze(prev_result, dim)
index_for_bool = tensor_1d if index else empty_tensor_1d
_record_tensor_index(index_for_bool, remain_indexes, dim)
_record_tensor_index(index_for_bool, remain_indexes, dim, prev_result.device)
prev_shape.insert(dim, 1)
dim += 1
return result, dim, remain_indexes, prev_shape
_record_tensor_index(index, remain_indexes, dim)
_record_tensor_index(index, remain_indexes, dim, prev_result.device)
dim += 1
return result, dim, remain_indexes, prev_shape
raise IndexError(f"Invalid tensor index type {index}")
@@ -597,7 +656,11 @@ def tensor_getitem(self, index):
self_viewed, remain_indexes = _process_multi_dim_index(self, indexes, remain_indexes, indexed_dims)
if not remain_indexes:
return self_viewed
return execute('index', self_viewed, remain_indexes)
if self.device.type == 'npu':
return execute('index', self_viewed, remain_indexes)
return getitem(self_viewed, tuple(remain_indexes) if len(remain_indexes) > 1 else remain_indexes[0])
def tensor_setitem(self, index, value):
@@ -634,7 +697,11 @@ def tensor_setitem(self, index, value):
if not remain_indexes:
execute('inplace_copy', self_viewed, value)
return self
execute('inplace_index_put', self_viewed, remain_indexes, value, False) # accumulate=False
if self.device.type == 'npu':
execute('inplace_index_put', self_viewed, remain_indexes, value, False) # accumulate=False
else:
setitem(self_viewed, tuple(remain_indexes) if len(remain_indexes) > 1 else remain_indexes[0], value)
return self
_SLICE_ERROR = (
@@ -642,18 +709,23 @@ _SLICE_ERROR = (
'newaxis (`None`) and integer or boolean arrays are valid indices'
)
def _as_index(idx, need_scalar=True):
def _as_index(idx, device, need_scalar=True):
"""Helper function to parse idx as an index.
"""
if isinstance(idx, numbers.Integral):
return idx, True
idx = core.tensor(idx)
if not isinstance(idx, core.Tensor):
idx = core.tensor(idx, dtype=core.int64, device=device)
if need_scalar and idx.ndim not in (None, 0):
raise IndexError(_SLICE_ERROR + ', got {!r}'.format(idx))
if idx.ndim == 0:
return idx.item(), True
if idx.device != device:
idx._device = device
return idx, False
def cumprod(x, axis=0, exclusive=False, reverse=False):
@@ -676,66 +748,6 @@ def cumprod(x, axis=0, exclusive=False, reverse=False):
return result
def moveaxis(a, source, destination):
"""Raises ValueError if source, destination not in (-ndim(a), ndim(a))."""
if not source and not destination:
return a
if isinstance(source, int):
source = (source,)
if isinstance(destination, int):
destination = (destination,)
if len(source) != len(destination):
raise ValueError('The lengths of source and destination must equal')
a_rank = a.ndim
def _correct_axis(axis, rank):
if axis < 0:
return axis + rank
return axis
source = tuple(_correct_axis(axis, a_rank) for axis in source)
destination = tuple(_correct_axis(axis, a_rank) for axis in destination)
if a.ndim is not None:
perm = [i for i in range(a_rank) if i not in source]
for dest, src in sorted(zip(destination, source)):
assert dest <= len(perm)
perm.insert(dest, src)
else:
r = core.range(0, a_rank, 1)
def _remove_indices(a, b):
"""Remove indices (`b`) from `a`."""
items = core.unbind(
core.sort(core.stack(b))
)
i = 0
result = []
for item in items:
result.append(a[i:item])
i = item + 1
result.append(a[i:])
return core.concat(result, 0)
minus_sources = _remove_indices(r, source)
minus_dest = _remove_indices(r, destination)
perm = execute('scatter_nd',
core.unsqueeze(minus_dest, 1), minus_sources, [a_rank]
)
perm = execute('tensor_scatter_update',
perm, core.unsqueeze(destination, 1), source
)
a = core.permute(a, tuple(perm))
return a
def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
"""Helper function for __getitem__ and _with_index_update_helper.
"""
@@ -772,7 +784,7 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
# strides.append(1)
new_axis_mask |= (1 << index)
else:
s, is_scalar = _as_index(s, False)
s, is_scalar = _as_index(s, tensor.device, False)
if is_scalar:
begin.append(s)
end.append(s + 1)
@@ -788,6 +800,8 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
advanced_indices.append((index, s, ellipsis_mask != 0))
if do_update and not advanced_indices:
if 0 in updates.shape:
return tensor
return strided_slice_update(
tensor,
begin,
@@ -841,7 +855,7 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
if not dims_contiguous or updates is not None:
if range(len(dims)) != dims:
tensor = moveaxis(tensor, dims, range(len(dims)))
tensor_shape_prefix = core.tensor(tensor.shape[: len(dims)])
tensor_shape_prefix = core.tensor(tensor.shape[: len(dims)], device=stacked_indices.device)
stacked_indices = where(
stacked_indices < 0,
stacked_indices + tensor_shape_prefix,
@@ -854,7 +868,7 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
# only in this case the result dimensions of advanced indexing are in
# the middle of `updates`. In the non-contiguous case, those dimensions
# are always at the front.
if dims_contiguous:
if dims_contiguous and updates.ndim > 1:
batch_size = stacked_indices.ndim - 1
batch_start = dims[0]
if batch_start < 0:
@@ -866,6 +880,7 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
updates = moveaxis(
updates, range_(batch_start, batch_size), range(batch_size)
)
updates = updates.broadcast_to(stacked_indices.shape[:-1] + tensor.shape[stacked_indices.shape[-1]:])
tensor = execute('tensor_scatter_update', tensor, stacked_indices, updates)
if range(len(dims)) != dims:
tensor = moveaxis(tensor, range(len(dims)), dims)
@@ -909,7 +924,7 @@ def _slice_helper(tensor, slice_spec, do_update=False, updates=None):
flat_shape = shape_tensor[:axis] + (-1,) + shape_tensor[axis + len(dims) :]
tensor = tensor.reshape(flat_shape)
return execute('gather', tensor, stacked_indices, axis)
return execute('gather', tensor, stacked_indices, axis, 0)
def _as_spec_tuple(slice_spec):
"""Convert slice_spec to tuple."""
@@ -930,8 +945,10 @@ def getitem(self, slice_spec):
isinstance(slice_spec, core.Tensor)
and slice_spec.dtype == core.bool
)
):
return masked_select(self, slice_spec)
):
if self.shape == slice_spec.shape:
return masked_select(self, slice_spec)
slice_spec = nonzero(slice_spec, as_tuple=True)
if not isinstance(slice_spec, tuple):
slice_spec = _as_spec_tuple(slice_spec)
@@ -948,7 +965,10 @@ def setitem(a, slice_spec, updates):
and slice_spec.dtype == core.bool
)
):
slice_spec = nonzero(slice_spec)
if slice_spec.shape == a.shape and (isinstance(updates, numbers.Number) or updates.ndim == 0):
a.masked_fill_(slice_spec, updates)
return a
slice_spec = nonzero(slice_spec, as_tuple=True)
if not isinstance(slice_spec, tuple):
slice_spec = _as_spec_tuple(slice_spec)
@@ -963,9 +983,12 @@ def strided_slice_update(input, begin, end, strides, update, begin_mask=0, end_m
sliced_tensor = execute('strided_slice', input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)
if update.shape != sliced_tensor.shape:
update = update.broadcast_to(sliced_tensor.shape)
update = update - sliced_tensor
update = update - sliced_tensor
updated_tensor = execute('strided_slice_grad', input, begin, end, strides, update, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask)
input.data = input + updated_tensor
out = input + updated_tensor
if input.dtype == core.bool:
out = out.astype(core.bool)
input.copy_(out)
return input
def getitem_np(input, slice):


+ 2
- 2
mindnlp/core/ops/blas.py View File

@@ -22,7 +22,7 @@ def baddbmm(input, batch1, batch2, *, beta=1, alpha=1):
# bmm
def bmm(input, other):
return execute('bmm_ext', input, other)
return execute('bmm', input, other)
# chain_matmul
@@ -61,7 +61,7 @@ def dot(input, other):
# matmul
def matmul(input, other):
return execute('matmul_ext', input, other)
return execute('matmul', input, other)
# matrix_power


+ 14
- 35
mindnlp/core/ops/comparison.py View File

@@ -19,16 +19,11 @@ def argsort(input, dim=-1, descending=False, stable=False):
def eq(input, other):
if not isinstance(other, numbers.Number) and other.device != input.device:
other = other.to(input.device)
return execute('equal', input, other)
return execute('eq', input, other)
# equal
def equal(input, other):
if input.device.type == 'npu':
return execute('equal_ext', input, other)
# if input.shape != other.shape:
# return False
out = eq(input, other)
return out.all()
return execute('equal', input, other)
# ge
def ge(input, other):
@@ -44,7 +39,10 @@ def greater(input, other):
# isclose
def isclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False):
if not isinstance(atol, numbers.Number):
atol = atol.item()
return execute('isclose', input, other, rtol, atol, equal_nan)
# isfinite
def isfinite(input):
@@ -53,7 +51,8 @@ def isfinite(input):
# isin
def in1d(ar1, ar2, invert=False):
ar1 = core.unsqueeze(ar1.ravel(), -1)
ar2 = ar2.ravel()
if not isinstance(ar2, numbers.Number):
ar2 = ar2.ravel()
included = core.eq(ar1, ar2)
# ops.reduce_sum only supports float
res = core.sum(included.to(core.float32), -1).to(core.bool_)
@@ -62,11 +61,8 @@ def in1d(ar1, ar2, invert=False):
return res
def isin(elements, test_elements, invert=False):
if elements.device.type != 'cpu':
res = in1d(elements, test_elements, invert=invert)
return core.reshape(res, elements.shape)
return execute('isin', elements, test_elements)
res = in1d(elements, test_elements, invert=invert)
return core.reshape(res, elements.shape)
# isinf
def isinf(input):
@@ -108,6 +104,8 @@ def maximum(input, other):
# minimum
def minimum(input, other):
if other.device != input.device:
other = other.to(input.device)
return execute('minimum', input, other)
# fmax
@@ -124,32 +122,13 @@ def not_equal(input, other):
# sort
def sort(input, *, dim=-1, descending=False, stable=False):
out = execute('sort_ext', input, dim, descending, stable)
out = execute('sort', input, dim, descending, stable)
return sort_out(values=out[0], indices=out[1])
# topk
def topk(input, k, dim=-1, largest=True, sorted=True):
if input.device.type == 'npu':
out = execute('topk_ext', input, k, dim, largest, sorted)
else:
if not largest:
input = -input
if dim is None or dim == input.ndim - 1:
if not largest:
res = execute('topk', input, k, sorted)
values, indices = -res[0], res[1]
return topk_out(values=values, indices=indices)
out = execute('topk', input, k, sorted)
return topk_out(values=out[0], indices=out[1])
input = input.swapaxes(dim, input.ndim - 1)
output = execute('topk', input, k, sorted)
values = output[0].swapaxes(dim, input.ndim - 1)
indices = output[1].swapaxes(dim, input.ndim - 1)
if not largest:
res = (-values, indices)
else:
res = (values, indices)
out = res
out = execute('topk', input, k, dim, largest, sorted)
return topk_out(values=out[0], indices=out[1])


+ 16
- 26
mindnlp/core/ops/creation.py View File

@@ -51,7 +51,7 @@ def zeros(*size, out=None, dtype=None, layout=None, device=None, requires_grad=F
s = s.item()
new_size += (s,)
output = execute('zeros', new_size, dtype, device=device, requires_grad=requires_grad, user_created=True)
output = execute('zeros', new_size, dtype, device=device)
if out is None:
return output
out.data = output
@@ -63,10 +63,7 @@ def zeros_like(input, *, dtype=None, layout=None, device=None, requires_grad=Fal
dtype = input.dtype
if device is None:
device = input.device
if device.type == 'cpu':
return execute('zeros_like', input, device=device, requires_grad=requires_grad, user_created=True)
return execute('zeros_like_ext', input, dtype,
device=device, requires_grad=requires_grad, user_created=True)
return execute('zeros_like', input, dtype, device=device)
# ones
def ones(*size, out=None, dtype=None, layout=None, device=None, requires_grad=False, **kwargs):
@@ -87,7 +84,7 @@ def ones(*size, out=None, dtype=None, layout=None, device=None, requires_grad=Fa
new_size += (s,)
output = execute('ones', new_size, dtype,
device=device, requires_grad=requires_grad, user_created=True)
device=device)
if out is None:
return output
out.data = output
@@ -101,10 +98,7 @@ def ones_like(input, *, dtype=None, layout=None, device=None, requires_grad=Fals
device = input.device
if isinstance(device, str):
device = core.device(device)
if device.type == 'cpu':
return execute('ones_like', input, device=device, requires_grad=requires_grad, user_created=True)
return execute('ones_like_ext', input, dtype,
device=device, requires_grad=requires_grad, user_created=True)
return execute('ones_like', input, dtype, device=device)
# arange
def arange(start=0, end=None, step=1, *, out=None, dtype=None, layout=None, device=None, requires_grad=False):
@@ -121,7 +115,7 @@ def arange(start=0, end=None, step=1, *, out=None, dtype=None, layout=None, devi
end = end.item() if isinstance(end, (core.Tensor, np.integer)) else end
step = step.item() if isinstance(step, (core.Tensor, np.integer)) else step
output = execute('arange', start, end, step, dtype, device=device, requires_grad=requires_grad, user_created=True)
output = execute('arange', start, end, step, dtype, device=device)
if out is None:
return output
out.data = output
@@ -136,7 +130,7 @@ def range(start=0, end=None, step=1, *, out=None, dtype=None, layout=None, devic
if device is None:
device = get_device_in_context()
output = execute('range', start, end + 1, step, 1000000,
device=device, requires_grad=requires_grad, user_created=True)
device=device)
if out is None:
return output
out.data = output
@@ -155,8 +149,7 @@ def linspace(start, end, steps, *, out=None, dtype=None, layout=None, device=Non
end = end.item() if isinstance(end, (core.Tensor, np.integer)) else end
steps = steps.item() if isinstance(steps, (core.Tensor, np.integer)) else steps
output = execute('lin_space_ext', start, end, steps, dtype,
device=device, requires_grad=requires_grad, user_created=True)
output = execute('linspace', start, end, steps, dtype, device=device)
if out is None:
return output
out.data = output
@@ -173,7 +166,7 @@ def eye(n, m=None, *, out=None, dtype=None, layout=None, device=None, requires_g
if m is None:
m = n
output = execute('eye', n, m, dtype,
device=device, requires_grad=requires_grad, user_created=True)
device=device)
if out is None:
return output
out.data = output
@@ -205,7 +198,7 @@ def empty(*size, out=None, dtype=None, layout=None, device=None,
def empty_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=None):
if device is None:
device = input.device
return empty(input.shape, dtype=input.dtype, layout=layout, device=device, requires_grad=requires_grad)
return empty(input.shape, dtype=input.dtype, layout=layout, device=device)
# empty_strided
@@ -216,16 +209,13 @@ def full(size, fill_value, *, out=None, dtype=None, layout=None, device=None, re
# dtype = get_default_dtype()
if device is None:
device = get_device_in_context()
if device.type == 'cpu':
output = execute('full', size, fill_value, device=device, requires_grad=requires_grad, user_created=True)
size = tuple([s if isinstance(s, int) else s.item() for s in size])
if isinstance(fill_value, numbers.Number):
output = execute('fill_scalar', size, fill_value, dtype,
device=device)
else:
size = [s if isinstance(s, int) else s.item() for s in size]
if isinstance(fill_value, numbers.Number):
output = execute('fill_scalar', size, fill_value, dtype,
device=device, requires_grad=requires_grad, user_created=True)
else:
output = execute('fill_tensor', size, fill_value, dtype,
device=device, requires_grad=requires_grad, user_created=True)
output = execute('fill_tensor', size, fill_value, dtype,
device=device)
if out is None:
return output
out.data = output
@@ -235,7 +225,7 @@ def full(size, fill_value, *, out=None, dtype=None, layout=None, device=None, re
def full_like(input, fill_value, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=None):
if dtype is None:
dtype = input.dtype
return full(input.shape, fill_value, dtype=dtype, layout=layout, device=input.device, requires_grad=requires_grad)
return full(input.shape, fill_value, dtype=dtype, layout=layout, device=input.device)
# quantize_per_tensor


+ 19
- 12
mindnlp/core/ops/inplace.py View File

@@ -12,12 +12,7 @@ def inplace_copy(self, other):
return self

def inplace_zero(input):
if input.device.type == 'npu':
execute('inplace_zero', input)
elif input.device.type == 'meta':
pass
else:
input.data = core.zeros_like(input)
execute('inplace_zero', input)
return input

def inplace_fill(input, value):
@@ -25,6 +20,7 @@ def inplace_fill(input, value):
execute('inplace_fill_scalar', input, value)
else:
execute('inplace_fill_tensor', input, value)

return input

def inplace_normal(input, mean=0, std=1, *, generator=None):
@@ -37,7 +33,6 @@ def inplace_normal(input, mean=0, std=1, *, generator=None):
std = std.item()

execute('inplace_normal', input, mean, std, generator, device=input.device)

return input

# uniform_
@@ -62,22 +57,32 @@ def inplace_uniform(input, *args, **kwargs):
generator_ = default_generator

execute("inplace_uniform", input, from_, to_, generator_)

return input

def inplace_add(input, other, alpha):
if isinstance(other, numbers.Number):
other = core.tensor(other, dtype=input.dtype, device=input.device)
execute('inplace_add_ext', input, other, alpha)
execute('inplace_add', input, other, alpha)
return input


def inplace_random(self, from_=0, to=None, *, generator=None):
if not generator:
generator = default_generator
seed, offset = generator._step( # pylint: disable=protected-access
generator_step_)
execute('inplace_random', self, from_, to, seed, offset, device=self.device)
execute('inplace_random', self, from_, to, generator, device=self.device)

return self

def inplace_exponential(self, lambd, generator):
if not generator:
generator = default_generator
execute('inplace_exponential', self, lambd, generator, device=self.device)
return self

def inplace_fill_diagonal(input, value, wrap):
execute("inplace_fill_diagonal", input, value, wrap)
return input

__all__ = [
'inplace_copy',
@@ -86,5 +91,7 @@ __all__ = [
'inplace_fill',
'inplace_uniform',
'inplace_add',
'inplace_random'
'inplace_random',
'inplace_exponential',
'inplace_fill_diagonal'
]

+ 169
- 77
mindnlp/core/ops/other.py View File

@@ -1,10 +1,9 @@
"""other op"""
import numpy as np
import mindspore
from mindspore.ops import gather
from mindnlp import core
from mindnlp.core.executor import execute
from ..configs import ON_A1
from ..configs import ON_A2
# atleast_2d
@@ -14,7 +13,7 @@ from ..configs import ON_A1
# bincount
def bincount(input, weights=None, minlength=0):
return execute('bincount_ext', input, weights, minlength)
return execute('bincount', input, weights, minlength)
# block_diag
@@ -28,7 +27,12 @@ def broadcast_tensors(*tensors):
# broadcast_to
def broadcast_to(input, shape):
return execute('broadcast_to', input, shape)
new_shape = ()
for s in shape:
if not isinstance(s, int):
s = s.item()
new_shape += (s,)
return execute('broadcast_to', input, new_shape)
# broadcast_shapes
@@ -74,9 +78,7 @@ def cdist(x1, x2, p=2.0, compute_mode="use_mm_for_euclid_dist_if_necessary"):
# clone
def clone(input, *, memory_format=core.preserve_format):
if input.device.type == 'npu':
return execute('clone', input)
return execute('identity', input)
return execute('clone', input)
# combinations
@@ -100,14 +102,14 @@ def clone(input, *, memory_format=core.preserve_format):
def cumsum(input, dim=None, dtype=None, **kwargs):
dim = kwargs.pop('axis', dim)
if input.dtype in [core.int64, core.bool]:
return execute('cumsum_ext', input.int(), dim, None).long()
return execute('cumsum', input.int(), dim, None).long()
if dtype is not None and dtype == core.int64:
return execute('cumsum_ext', input, dim, None).long()
return execute('cumsum_ext', input, dim, dtype)
return execute('cumsum', input, dim, None).long()
return execute('cumsum', input, dim, dtype)
# diag
def diag(input, diagonal=0, *, out=None):
return execute('diag_ext', input, diagonal)
return execute('diag', input, diagonal)
# diag_embed
@@ -548,7 +550,7 @@ def einsum(equation, *operands):
You can use this operator to perform diagonal, reducesum, transpose, matmul, mul, inner product operations, etc.
Note:
The sublist format is also supported. For example, einsum_ext(op1, sublist1, op2, sublist2, ..., sublist_out).
The sublist format is also supported. For example, einsum(op1, sublist1, op2, sublist2, ..., sublist_out).
In this format, equation can be derived by the sublists which are made up of Python's Ellipsis and list of
integers in [0, 52). Each operand is followed by a sublist and an output sublist is at the end.
Dynamic shape, dynamic rank input is not supported in `graph mode (mode=mindspore.GRAPH_MODE)
@@ -585,50 +587,50 @@ def einsum(equation, *operands):
>>> from mindspore import Tensor, ops
>>> x = Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32)
>>> equation = "i->"
>>> output = ops.einsum_ext(equation, x)
>>> output = ops.einsum(equation, x)
>>> print(output)
7.0
>>> x = Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32)
>>> y = Tensor(np.array([2.0, 4.0, 3.0]), mindspore.float32)
>>> equation = "i,i->i"
>>> output = ops.einsum_ext(equation, x, y)
>>> output = ops.einsum(equation, x, y)
>>> print(output)
[ 2. 8. 12.]
>>> x = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32)
>>> y = Tensor(np.array([[2.0, 3.0], [1.0, 2.0], [4.0, 5.0]]), mindspore.float32)
>>> equation = "ij,jk->ik"
>>> output = ops.einsum_ext(equation, x, y)
>>> output = ops.einsum(equation, x, y)
>>> print(output)
[[16. 22.]
[37. 52.]]
>>> x = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32)
>>> equation = "ij->ji"
>>> output = ops.einsum_ext(equation, x)
>>> output = ops.einsum(equation, x)
>>> print(output)
[[1. 4.]
[2. 5.]
[3. 6.]]
>>> x = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32)
>>> equation = "ij->j"
>>> output = ops.einsum_ext(equation, x)
>>> output = ops.einsum(equation, x)
>>> print(output)
[5. 7. 9.]
>>> x = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32)
>>> equation = "...->"
>>> output = ops.einsum_ext(equation, x)
>>> output = ops.einsum(equation, x)
>>> print(output)
21.0
>>> x = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
>>> y = Tensor(np.array([2.0, 4.0, 1.0]), mindspore.float32)
>>> equation = "j,i->ji"
>>> output = ops.einsum_ext(equation, x, y)
>>> output = ops.einsum(equation, x, y)
>>> print(output)
[[ 2. 4. 1.]
[ 4. 8. 2.]
[ 6. 12. 3.]]
>>> x = mindspore.Tensor([1, 2, 3, 4], mindspore.float32)
>>> y = mindspore.Tensor([1, 2], mindspore.float32)
>>> output = ops.einsum_ext(x, [..., 1], y, [..., 2], [..., 1, 2])
>>> output = ops.einsum(x, [..., 1], y, [..., 2], [..., 1, 2])
>>> print(output)
[[1. 2.]
[2. 4.]
@@ -637,20 +639,13 @@ def einsum(equation, *operands):
"""
if isinstance(operands[0], (list, tuple)):
operands = operands[0]
if operands[0].device.type != 'npu':
return execute('einsum', equation, operands)
_equation, _operands = _einsum_convert_sublist(equation, *operands)
_einsum_check_inputargs(_equation, _operands)
return _einsum(_equation, _operands)
# flatten
def flatten(input, start_dim=0, end_dim=-1):
if input.device.type == 'cpu':
if end_dim < 0:
end_dim = input.ndim + end_dim
new_shape = input.shape[:start_dim] + (-1,) + input.shape[end_dim + 1:]
return input.reshape(new_shape)
return execute('flatten_ext', input, start_dim, end_dim)
return execute('flatten', input, start_dim, end_dim)
# flip
@@ -704,57 +699,139 @@ def ravel(input):
# repeat_interleave
def repeat_interleave(input, repeats, dim=None, *, output_size=None):
if input.device.type == 'npu' and ON_A1:
if isinstance(repeats, core.Tensor):
repeats = repeats.tolist()
if not isinstance(repeats, (tuple, list)):
repeats = (repeats,)
for index, element in enumerate(repeats):
if not isinstance(element, int):
raise TypeError(f"For 'Tensor.repeat', each element in {repeats} should be int, but got "
f"{type(element)} at index {index}.")
if dim is None:
input = input.ravel()
dim = 0
dim = dim + input.ndim if dim < 0 else dim
if sum(repeats) == 0:
out_shape = list(input.shape)
out_shape[dim] = 0
return core.Tensor(shape=tuple(out_shape), dtype=input.dtype)
if len(repeats) == 1:
repeats = repeats[0]
if input.dtype == mindspore.bool_:
input = input.to(mindspore.int32)
out = execute('repeat_elements', input, repeats, dim)
return out.to(mindspore.bool_)
return execute('repeat_elements', input, repeats, dim)
size = input.shape[dim]
if len(repeats) != size:
raise ValueError(f"For 'Tensor.repeat', the length of 'repeats' must be the same as the shape of the "
f"original tensor in the 'axis' dimension, but got the length of 'repeats' "
f"{len(repeats)}, the shape of the original tensor in the 'axis' dimension {size}.")
subs = core.split(input, 1, dim)
repeated_subs = []
for sub, rep in zip(subs, repeats):
if rep != 0:
repeated_subs.append(execute('repeat_elements', sub, rep, dim))
return core.concat(repeated_subs, dim)
def efficient_repeat_interleave(input_tensor, repeats, dim=None):
"""
高效实现 core.repeat_interleave 的功能,支持 repeats 为 int 或 list/tensor。
参数:
input_tensor (Tensor): 输入张量。
repeats (int 或 list 或 Tensor): 每个元素的重复次数。
dim (int, optional): 沿着哪个维度进行重复。如果为None,则先将输入张量展平。
返回:
Tensor: 重复后的张量。
"""
if dim is None:
input_tensor = input_tensor.flatten()
dim = 0
# 确保 dim 是有效的维度
if dim < 0:
dim += input_tensor.dim()
# 将 repeats 统一转换为 LongTensor 并确保其在正确的设备上
if isinstance(repeats, int):
return execute('repeat_interleave_int', input, repeats, dim, None)
return execute('repeat_interleave_tensor', input, repeats, dim, None)
repeats_tensor = core.tensor([repeats], device=input_tensor.device, dtype=core.long)
uniform_repeat = True
elif isinstance(repeats, (list, tuple)):
repeats_tensor = core.tensor(repeats, device=input_tensor.device, dtype=core.long)
uniform_repeat = False
elif isinstance(repeats, core.Tensor):
repeats_tensor = repeats.to(device=input_tensor.device, dtype=core.long)
uniform_repeat = False
else:
raise TypeError("repeats must be an int, a list, or a core.Tensor")
# 获取输入张量在目标维度上的大小
dim_size = input_tensor.size(dim)
if uniform_repeat:
# ✅ 优化路径:当所有元素重复次数相同时,使用 expand 和 reshape 避免循环
# 此方法利用广播机制,非常高效
unsqueezed_tensor = input_tensor.unsqueeze(dim + 1)
expanded_shape = list(input_tensor.shape)
expanded_shape[dim] = -1
expanded_shape.insert(dim + 1, repeats_tensor.item())
expanded_tensor = unsqueezed_tensor.expand(*expanded_shape)
final_shape = list(input_tensor.shape)
final_shape[dim] *= repeats_tensor.item()
output = expanded_tensor.reshape(*final_shape)
else:
# 🔄 当重复次数不同时,需要构建索引
# 检查 repeats_tensor 的长度是否与目标维度的长度匹配
if len(repeats_tensor) != dim_size:
raise ValueError(f"repeats must have length {dim_size} along dimension {dim}, but got {len(repeats_tensor)}")
# 生成索引:例如 repeats_tensor = [2, 3, 1] -> index = [0, 0, 1, 1, 1, 2]
# 使用 cumsum 计算总重复次数以预分配空间
total_repeats = repeats_tensor.sum().item()
index = core.zeros(total_repeats, dtype=core.long, device=input_tensor.device)
# 计算每个块的起始位置
# start_positions = core.cat([core.tensor([0], device=input_tensor.device), core.cumsum(repeats_tensor, dim=0)[:-1]])
# 使用 scatter 或高级索引填充(这里用循环填充,但可考虑更底层的优化)
# 注意:对于非常大的非均匀重复,此部分可能成为瓶颈
current_pos = 0
for i in range(dim_size):
repeat_count = repeats_tensor[i].item()
index[current_pos:current_pos + repeat_count] = i
current_pos += repeat_count
output = input_tensor.index_select(dim, index)
return output
def repeat_interleave(input, repeats, dim=None, *, output_size=None):
if input.device.type == 'npu' and ON_A2:
if isinstance(repeats, int):
return execute('repeat_interleave_int', input, repeats, dim, None)
return execute('repeat_interleave_tensor', input, repeats, dim, None)
return efficient_repeat_interleave(input, repeats, dim)
# roll
def roll(input, shifts, dims=None):
return execute('roll', input, shifts, dims)
if input.device.type == 'npu':
return execute('roll', input, shifts, dims)
# 处理 dims 为 None 的情况:先展平,操作后再恢复形状[4,6](@ref)
if dims is None:
original_shape = input.shape
flattened = input.flatten()
rolled_flattened = roll(flattened, shifts, dims=0)
return rolled_flattened.reshape(original_shape)
# 确保 shifts 和 dims 为元组以便统一处理[1,6](@ref)
if not isinstance(shifts, tuple):
shifts = (shifts,)
if not isinstance(dims, tuple):
dims = (dims,)
# 检查 shifts 和 dims 长度是否匹配
if len(shifts) != len(dims):
raise ValueError("shifts 和 dims 必须具有相同的长度")
result = input.clone() # 创建输入张量的副本以避免修改原张量
# 对每个需要移动的维度依次进行处理[2](@ref)
for shift, dim in zip(shifts, dims):
# 确保维度有效
if dim >= result.dim():
raise ValueError("维度索引超出张量的维度范围")
# 获取该维度的长度
dim_size = result.size(dim)
# 处理负的 shift 值:正向移动 shift + dim_size 等同于反向移动 dim_size - shift
effective_shift = shift % dim_size
if effective_shift == 0:
continue # 移动 0 步,无需操作
# 沿指定维度切片并重新拼接[1,3](@ref)
# 将张量沿该维度分成两部分:[第一部分: 从开始到 (dim_size - effective_shift)], [第二部分: 从 (dim_size - effective_shift) 到结束]
# 然后交换这两部分的位置
slices_pre = [slice(None)] * result.dim()
slices_pre[dim] = slice(dim_size - effective_shift, None)
part1 = result[slices_pre]
slices_post = [slice(None)] * result.dim()
slices_post[dim] = slice(0, dim_size - effective_shift)
part2 = result[slices_post]
# 沿该维度拼接两部分
result = core.concat((part1, part2), dim)
return result
# searchsorted
def searchsorted(
@@ -774,7 +851,7 @@ def searchsorted(
# tril
def tril(input, diagonal=0):
return execute('tril_ext', input, diagonal)
return execute('tril', input, diagonal)
# tril_indices
@@ -921,6 +998,8 @@ def contains(self, key):
def stop_gradient(input):
return execute('stop_gradient', input)
def detach(input):
return stop_gradient(input)
def _get_unfold_indices(input_shape, dimension, size, step):
if dimension < 0:
@@ -935,7 +1014,7 @@ def _get_unfold_indices(input_shape, dimension, size, step):
def unfold(input, dimension, size, step):
_indices, _dimension = _get_unfold_indices(input.shape, dimension, size, step)
indices = core.tensor(_indices, device=input.device)
output = execute('gather', input, indices, _dimension)
output = execute('gather', input, indices, _dimension, 0)
output = core.moveaxis(output, _dimension + 1, -1)
return output
@@ -944,13 +1023,24 @@ def contiguous(input):
return execute('contiguous', input)
def dyn_shape(input):
return execute('dyn_shape', input)
return execute('tensor_shape', input)
def cross(input, other, dim=None, *, out=None):
if dim is None:
dim = -65530
return execute('cross', input, other, dim)
def cosine_similarity(x1, x2, dim=1, eps=1e-8):
dot_product = core.sum(x1 * x2, dim=dim)
# 2. 计算L2范数 (||x|| 和 ||y||)
norm_vec1 = core.norm(x1, p=2, dim=dim)
norm_vec2 = core.norm(x2, p=2, dim=dim)
# 3. 计算余弦相似度: (x · y) / (||x|| * ||y|| + eps)
cosine_sim = dot_product / (norm_vec1 * norm_vec2 + eps)
return cosine_sim
__all__ = [
"bincount",
@@ -985,5 +1075,7 @@ __all__ = [
"diff",
'view_as_complex',
'view_as_real',
'bucketize'
'bucketize',
'cosine_similarity',
'detach'
]

+ 21
- 20
mindnlp/core/ops/pointwise.py View File

@@ -28,7 +28,7 @@ def arrcos(input):
# acosh
def acosh(input):
return execute("acosh_ext", input)
return execute("acosh", input)
# arccosh
@@ -38,19 +38,16 @@ def arccosh(input):
# add
def add(input, other, *, alpha=1):
if alpha != 1:
return execute("add_ext", input, other, alpha)
return execute('add', input, other)
return execute("add", input, other, alpha)
# addcdiv
def addcdiv(input, tensor1, tensor2, *, value=1):
return execute("addcdiv_ext", input, tensor1, tensor2, value)
return execute("addcdiv", input, tensor1, tensor2, value)
# addcmul
def addcmul(input, tensor1, tensor2, *, value=1):
return execute("addcmul_ext", input, tensor1, tensor2, value)
return execute("addcmul", input, tensor1, tensor2, value)
# angle
@@ -60,7 +57,7 @@ def angle(input):
# asin
def asin(input):
return execute("asin_ext", input)
return execute("asin", input)
# arcsin
@@ -70,7 +67,7 @@ def arcsin(input):
# asinh
def asinh(input):
return execute("asinh_ext", input)
return execute("asinh", input)
# arcsinh
@@ -80,7 +77,7 @@ def arcsinh(input):
# atan
def atan(input):
return execute("atan_ext", input)
return execute("atan", input)
# arctan
@@ -100,7 +97,7 @@ def arctanh(input):
# atan2
def atan2(input, other):
return execute("atan2_ext", input, other)
return execute("atan2", input, other)
# arctan2
@@ -155,6 +152,13 @@ def clamp(input, min=None, max=None):
return execute("clamp_scalar", input, min, max)
return execute("clamp_tensor", input, min, max)
def clamp_min(self, min):
return clamp(self, min, None)
def clamp_max(self, max):
return clamp(self, None, max)
# clip
def clip(input, min=None, max=None):
@@ -457,8 +461,8 @@ def remainder(input, other):
# round
def round(input):
return execute("round", input)
def round(input, *, decimals=0):
return execute("round", input, decimals)
# rsqrt
@@ -521,10 +525,7 @@ def sub(input, other, *, alpha=1, out=None):
device = other.device
else:
device = input.device
if device == 'cpu':
output = execute("sub", input, alpha * other)
else:
output = execute("sub_ext", input, other, alpha)
output = execute("sub", input, other, alpha)
if out is None:
return output
out.copy_(output)
@@ -571,9 +572,7 @@ def relu(input):
def log_softmax(input, dim=None, dtype=None):
if input.device.type == 'cpu':
return execute('log_softmax', input, dim)
return execute('log_softmax_ext', input, dim, dtype)
return execute('log_softmax', input, dim, dtype)
__all__ = [
@@ -604,6 +603,8 @@ __all__ = [
"bitwise_right_shift",
"ceil",
"clamp",
"clamp_min",
"clamp_max",
"clip",
"cos",
"cosh",


+ 33
- 58
mindnlp/core/ops/random.py View File

@@ -12,7 +12,7 @@ generator_step_ = 12
def bernoulli(input, *, generator=None, out=None, **kwargs):
if generator is None:
generator = default_generator
output = execute("bernoulli_ext", input, generator)
output = execute("bernoulli", input, generator)
if out is None:
return output
out.data = output
@@ -22,10 +22,12 @@ def bernoulli(input, *, generator=None, out=None, **kwargs):
# multinomial
def multinomial(input, num_samples, replacement=False, *, generator=None, out=None):
"""custom multinomial"""
if not isinstance(num_samples, int):
num_samples = num_samples.item()
if generator is None:
generator = default_generator
if not ON_A1:
output = execute("multinomial_ext", input, num_samples, replacement, generator)
if input.device.type == 'npu':
output = execute("multinomial", input, num_samples, replacement, generator)
else:
if replacement:
@@ -60,7 +62,9 @@ def normal(mean=0.0, std=1.0, *, size=None, generator=None, out=None,
dtype=None, layout=None, device=None, pin_memory=None, requires_grad=False):
if generator is None:
generator = default_generator
seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
if dtype is None:
dtype = get_default_dtype()
if device is None:
if out is None:
device = get_device_in_context()
@@ -70,28 +74,16 @@ def normal(mean=0.0, std=1.0, *, size=None, generator=None, out=None,
is_mean_tensor = isinstance(mean, core.Tensor)
is_std_tensor = isinstance(std, core.Tensor)
if device.type == 'cpu':
if is_mean_tensor and is_std_tensor:
size = (mean * std).shape
if is_mean_tensor and not is_std_tensor:
size = mean.shape
if not is_mean_tensor and is_std_tensor:
size = std.shape
if out is not None:
size = out.shape
output = execute('normal', size)
output = output * std - mean
else:
if is_mean_tensor and is_std_tensor:
output = execute("normal_tensor_tensor", mean, std, seed, offset, device=device)
if is_mean_tensor and not is_std_tensor:
output = execute("normal_tensor_float", mean, std, seed, offset, device=device)
if not is_mean_tensor and is_std_tensor:
output = execute("normal_float_tensor", mean, std, seed, offset, device=device)
if out is not None:
size = out.shape
output = execute("normal_float_float", float(mean), float(std), size, seed, offset, device=device)
if is_mean_tensor and is_std_tensor:
output = execute("normal_tensor_tensor", mean, std, size, dtype, generator, device=device)
if is_mean_tensor and not is_std_tensor:
output = execute("normal_tensor_float", mean, std, size, dtype, generator, device=device)
if not is_mean_tensor and is_std_tensor:
output = execute("normal_float_tensor", mean, std, size, dtype, generator, device=device)
if out is not None:
size = out.shape
output = execute("normal_float_float", float(mean), float(std), size, dtype, generator, device=device)
if out is None:
return output
@@ -120,18 +112,15 @@ def rand(
dtype = get_default_dtype()
if not generator:
generator = default_generator
seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
if size and isinstance(size[0], (tuple, list)):
size = size[0]
output = execute(
"rand_ext",
"rand",
size,
seed,
offset,
generator,
dtype,
device=device,
requires_grad=requires_grad,
user_created=True,
)
if out is None:
return output
@@ -156,17 +145,13 @@ def rand_like(
if dtype is None:
dtype = input.dtype
seed, offset = default_generator._step( # pylint: disable=protected-access
generator_step_
)
return execute(
"rand_like_ext",
"rand_like",
input,
seed,
offset,
default_generator,
dtype,
device=device,
requires_grad=requires_grad,
)
@@ -197,10 +182,11 @@ def randint(
output = execute(
"randint",
low, high, size,
dtype,
generator,
dtype,
device=device,
)
if out is None:
return output
out.data = output
@@ -228,11 +214,12 @@ def randint_like(
if dtype is None:
dtype = input.dtype
seed, offset = default_generator._step( # pylint: disable=protected-access
generator_step_
)
return execute(
"randint_like_ext",
"randint_like",
input,
low,
high,
@@ -240,7 +227,6 @@ def randint_like(
offset,
dtype,
device=device,
requires_grad=requires_grad,
)
@@ -264,18 +250,15 @@ def randn(
dtype = get_default_dtype()
if not generator:
generator = default_generator
seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
if size and isinstance(size[0], (tuple, list)):
size = size[0]
output = execute(
"randn",
size,
seed,
offset,
generator,
dtype,
device=device,
requires_grad=requires_grad,
user_created=True,
)
if out is None:
return output
@@ -300,17 +283,12 @@ def randn_like(
if dtype is None:
dtype = input.dtype
seed, offset = default_generator._step( # pylint: disable=protected-access
generator_step_
)
return execute(
"rand_like_ext",
"rand_like",
input,
seed,
offset,
default_generator,
dtype,
device=device,
requires_grad=requires_grad,
)
@@ -333,15 +311,12 @@ def randperm(
if not generator:
generator = default_generator
seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
output = execute(
"randperm_ext",
"randperm",
n,
seed,
offset,
generator,
dtype,
device=device,
requires_grad=requires_grad,
)
if out is None:
return output


+ 19
- 17
mindnlp/core/ops/reduction.py View File

@@ -9,11 +9,11 @@ min_out = namedtuple('min_out', ['values', 'indices'])
# argmax
def argmax(input, dim=None, keepdim=False):
return execute('argmax_ext', input, dim, keepdim)
return execute('argmax', input, dim, keepdim)
# argmin
def argmin(input, dim=None, keepdim=False):
return execute('argmin_ext', input, dim, keepdim)
return execute('argmin', input, dim, keepdim)
# amax
def amax(input, dim, keepdim=False):
@@ -37,6 +37,8 @@ def all(input, dim=None, keepdim=False, *, dtype=None, **kwargs):
# any
def any(input, dim=None, keepdim=False):
if dim is None:
dim = ()
return execute('reduce_any', input, dim, keepdim)
# max
@@ -77,7 +79,7 @@ def logsumexp(input, dim, keepdim=False):
# mean
def mean(input, dim=None, keepdim=False, *, dtype=None, **kwargs):
dim = kwargs.pop('axis', dim)
return execute('mean_ext', input, dim, keepdim, dtype)
return execute('mean', input, dim, keepdim, dtype)
# nanmean
@@ -85,7 +87,7 @@ def mean(input, dim=None, keepdim=False, *, dtype=None, **kwargs):
# median
def median(input, dim=-1, keepdim=False):
if dim is None:
return execute('median_ext', input)
return execute('median', input)
return execute('median_dim', input, dim, keepdim)
# nanmedian
@@ -95,7 +97,7 @@ def median(input, dim=-1, keepdim=False):
# norm
def vector_norm_ext(input, p=2, dim=None, keepdim=False, *, dtype=None):
def vector_norm(input, p=2, dim=None, keepdim=False, *, dtype=None):
if float(p) in [0.0, 1.0, 2.0, 3.0]:
return execute('linalg_vector_norm', input, float(p), dim, keepdim, dtype)
if input.dtype in [core.bfloat16, core.float16, core.float32]:
@@ -107,12 +109,12 @@ def vector_norm_ext(input, p=2, dim=None, keepdim=False, *, dtype=None):
input = input.to(core.float32)
return execute('lp_norm_v2', input, p, dim, keepdim, 0.0).to(cast_dtype)
def matrix_norm_ext(A, ord='fro', dim=(-2, -1), keepdim=False, *, dtype=None):
def matrix_norm(A, ord='fro', dim=(-2, -1), keepdim=False, *, dtype=None):
ndim = A.ndim
row_axis, col_axis = _check_matrix_norm_axis(dim, ndim)
_check_matrix_norm_ord(ord)
if ord == 'fro':
return vector_norm_ext(A, 2, dim, keepdim, dtype=dtype)
return vector_norm(A, 2, dim, keepdim, dtype=dtype)
if ord == 'nuc':
res = _multi_svd_norm(A, row_axis, col_axis, 'sum')
return _reshape_matrix_norm(A, res, dim, keepdim)
@@ -127,24 +129,24 @@ def matrix_norm_ext(A, ord='fro', dim=(-2, -1), keepdim=False, *, dtype=None):
if not keepdim and col_axis > row_axis:
col_axis -= 1
if ord < 0:
return amin(vector_norm_ext(A, 1, row_axis, keepdim, dtype=dtype), col_axis, keepdim)
return amax(vector_norm_ext(A, 1, row_axis, keepdim, dtype=dtype), col_axis, keepdim)
return amin(vector_norm(A, 1, row_axis, keepdim, dtype=dtype), col_axis, keepdim)
return amax(vector_norm(A, 1, row_axis, keepdim, dtype=dtype), col_axis, keepdim)
def norm(input, p='fro', dim=None, keepdim=False, dtype=None):
if not isinstance(input, core.Tensor):
raise TypeError(f"For `norm_ext`, the `input` must be Tensor!, but get {type(input)}.")
raise TypeError(f"For `norm`, the `input` must be Tensor!, but get {type(input)}.")
if isinstance(p, (bool, int, float)):
return vector_norm_ext(input, p, dim, keepdim, dtype=dtype)
return vector_norm(input, p, dim, keepdim, dtype=dtype)
if p == 'fro':
if isinstance(dim, (list, tuple)) and len(dim) > 2:
raise ValueError(f"For `norm_ext`, the size of `dim` cannot be greater than 2 "
raise ValueError(f"For `norm`, the size of `dim` cannot be greater than 2 "
f"when the norm mode is `fro`.")
return execute('linalg_vector_norm', input, 2.0, dim, keepdim,
dtype if dtype is None else dtype)
if p == 'nuc':
dim = tuple(range(input.ndim)) if dim is None else dim
return matrix_norm_ext(input, p, dim, keepdim, dtype=dtype)
raise ValueError(f"For `norm_ext`, the value of `p` must be one of [int, float, inf, -inf, 'fro', 'nuc',] "
return matrix_norm(input, p, dim, keepdim, dtype=dtype)
raise ValueError(f"For `norm`, the value of `p` must be one of [int, float, inf, -inf, 'fro', 'nuc',] "
f"but got `{p}`.")
# nansum
@@ -153,7 +155,7 @@ def nansum(input, dim=None, keepdim=False, *, dtype=None):
# prod
def prod(input, dim=None, keepdim=False, *, dtype=None):
return execute('prod_ext', input, dim, keepdim, dtype)
return execute('prod', input, dim, keepdim, dtype)
# quantile
@@ -173,7 +175,7 @@ def sum(input, dim=None, keepdim=False, *, dtype=None, **kwargs):
dim = kwargs.pop('axis', dim)
if 0 in input.shape:
return core.tensor(0, dtype=dtype, device=input.device)
return execute('sum_ext', input, dim, keepdim, dtype)
return execute('sum', input, dim, keepdim, dtype)
# unique
def unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None):
@@ -210,7 +212,7 @@ def var_mean(input, dim=None, *, correction=1, keepdim=False):
return execute('var_mean', input, dim, correction, keepdim)
# count_nonzero
def count_nonzero(input, dim=None):
def count_nonzero(input, dim=-1):
return execute('count_nonzero', input, dim)
__all__ = ['all', 'amax', 'amin', 'aminmax', 'any', 'argmax', 'argmin', 'count_nonzero',


+ 1
- 1
mindnlp/core/random.py View File

@@ -54,7 +54,7 @@ def manual_seed(seed):
is raised. Negative inputs are remapped to positive values with the formula
`0xffff_ffff_ffff_ffff + seed`.
"""
mindspore.set_seed(seed)
# mindspore.set_seed(seed + 1)
seed = int(seed)
# set_seed(seed)
return default_generator.manual_seed(seed)


+ 3
- 0
mindnlp/transformers/__init__.py View File

@@ -56,6 +56,9 @@ transformers.modeling_utils._get_resolved_checkpoint_files = _get_resolved_check
transformers.tokenization_utils_base.PreTrainedTokenizerBase.apply_chat_template = apply_chat_template_wrapper(
transformers.tokenization_utils_base.PreTrainedTokenizerBase.apply_chat_template
)
transformers.tokenization_utils_base.PreTrainedTokenizerBase.__call__ = apply_chat_template_wrapper(
transformers.tokenization_utils_base.PreTrainedTokenizerBase.__call__
)

transformers.pipelines.pipeline = dtype_wrapper(transformers.pipelines.pipeline)
transformers.modeling_utils.caching_allocator_warmup = empty_fn


+ 0
- 1
mindnlp/transformers/masking_utils.py View File

@@ -419,7 +419,6 @@ def sdpa_mask_older_torch(

if padding_mask is not None:
causal_mask = causal_mask * padding_mask[:, None, None, :]

# # Due to a bug in versions of torch<2.5, we need to update the mask in case a query is not attending to any
# # tokens (due to padding). See details in https://github.com/pytorch/pytorch/issues/110213
# if not _is_torch_greater_or_equal_than_2_5 and allow_torch_fix:


+ 4
- 2
mindnlp/utils/safetensors_patch.py View File

@@ -84,13 +84,15 @@ class PySafeSlice:
def ndim(self):
return len(self.shape)

def get(self, *args, **kwargs):
def get(self, slice=None):
nbytes = int(np.prod(self.shape)) * np.dtype(self.dtype).itemsize
buffer = bytearray(nbytes)
self.bufferfile.seek(self.start_offset)
self.bufferfile.readinto(buffer)
array = np.frombuffer(buffer, dtype=self.dtype).reshape(self.shape)
array = array.reshape(self.shape)
if slice is not None:
array = array[slice]
if not SUPPORT_BF16 and self.info["dtype"] == 'BF16':
array = array.astype(np.float16)
tensor = core.from_numpy(array)
@@ -128,7 +130,7 @@ class PySafeSlice:
return self.nelements * self.bits

def __getitem__(self, slice):
return self.get()[slice]
return self.get(slice)

def getSize(fileobject):
fileobject.seek(0, 2) # move the cursor to the end of the file


+ 0
- 0
tools/__init__.py View File


+ 92
- 0
tools/op_auto_gen.py View File

@@ -0,0 +1,92 @@
import re
import inspect
import importlib
import argparse

import mindspore
from mindspore import ops
from mindspore.ops.auto_generate import gen_ops_prim
from mindspore.ops.auto_generate import pyboost_inner_prim

def camel_to_snake_case_improved(camel_case_str):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel_case_str)
snake_case_str = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
return snake_case_str

op_func_no_init = '''
{name}_op = {op}().set_device('{device}')
def {name}(*args):
return {name}_op(*args)
'''

op_func_with_init = '''
def {name}(*args):
op = _get_cache_prim({op})(*args[-{idx}:]).set_device('{device}')
return op(*args[:-{idx}])
'''

def gen_legacy_op(gen_file, device='CPU'):
op_list = list(filter(lambda s: s[0].isupper(), ops.operations.__all__))
grad_op = list(filter(lambda s: s[0].isupper(), dir(mindspore.ops.operations._grad_ops)))

op_dict = {
'mindspore.ops.operations._grad_ops': grad_op,
'mindspore.ops.operations': op_list
}

with open(gen_file, 'w') as f:
f.write("from mindspore.ops.operations import *\n"
"from mindspore.ops.operations._grad_ops import *\n"
"from mindspore.ops._primitive_cache import _get_cache_prim\n\n")
for op_module, op_list in op_dict.items():
for old_op_name in op_list:
if old_op_name in ['P', 'Print', 'Assert', 'Custom', 'CustomOpBuilder', 'DataType', 'ReduceOp', 'TBERegOp', 'Tensor']:
continue

op_mod = importlib.import_module(op_module)
ops_class = getattr(op_mod, old_op_name, None)
init_signature = inspect.signature(ops_class.__init__)
name = camel_to_snake_case_improved(old_op_name)
if len(init_signature.parameters) > 1:
init_args = list(init_signature.parameters.keys())
init_args.pop(0)
code = op_func_with_init.format(name=name, op=old_op_name, idx=len(init_args), device=device)

else:
code = op_func_no_init.format(name=name, op=old_op_name, device=device)
f.write(code + '\n')
f.close()

def gen_aclnn_op(gen_file, device):
gen_ops_list = list(filter(lambda s: s.startswith("pyboost"), dir(gen_ops_prim)))
pyboost_inner_list = list(filter(lambda s: s.endswith("_impl"), dir(pyboost_inner_prim)))

with open(gen_file, 'w') as f:
f.write("from mindspore.ops.auto_generate.gen_ops_prim import *\n"
"from mindspore.ops.auto_generate.pyboost_inner_prim import *\n\n")

for pyboost_op_name in gen_ops_list:
op_name = pyboost_op_name.replace('pyboost_', '') + '_op'
op_instance = getattr(gen_ops_prim, op_name, None)
if op_instance is not None:
f.write(f"{op_name} = {getattr(gen_ops_prim, op_name).__class__.__name__}().set_device('Ascend')\n\n")

# for op_name in pyboost_inner_list:
# f.write(f"{op_name} = {getattr(pyboost_inner_prim, op_name).__class__.__name__}()\n\n")
f.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser()
# 添加位置参数
parser.add_argument('output_file', type=str)
parser.add_argument('--device', type=str, choices=['CPU', 'GPU', 'Ascend'])
parser.add_argument('--op_type', type=str, default='legacy', required=False ,choices=['legacy', 'pyboost'])


args = parser.parse_args()
print(args)
if args.op_type == 'legacy':
gen_legacy_op(args.output_file, args.device)
elif args.op_type == 'pyboost':
gen_aclnn_op(args.output_file, args.device)

Loading…
Cancel
Save
Baidu
map