16 Commits

23 changed files with 2121 additions and 3 deletions
Split View
  1. +49
    -0
      configs/mobilenet_v3_large.yaml
  2. +39
    -0
      configs/parser.py
  3. +49
    -0
      configs/resnet50.yaml
  4. BIN
      tools/__init__.py
  5. +68
    -0
      tools/eval.py
  6. +48
    -0
      tools/export.py
  7. +87
    -0
      tools/train.py
  8. +123
    -0
      xbm/args.py
  9. BIN
      xbm/core/__init__.py
  10. +1
    -1
      xbm/core/criterion.py
  11. +45
    -0
      xbm/core/ema.py
  12. +4
    -0
      xbm/core/get_misc.py
  13. +8
    -1
      xbm/core/optimizer.py
  14. +1
    -1
      xbm/core/schedulers.py
  15. +77
    -0
      xbm/core/train_one_step_with_ema.py
  16. +16
    -0
      xbm/datasets/__init__.py
  17. +18
    -0
      xbm/datasets/augment/__init__.py
  18. +896
    -0
      xbm/datasets/augment/auto_augment.py
  19. +247
    -0
      xbm/datasets/augment/mixup.py
  20. +113
    -0
      xbm/datasets/augment/random_erasing.py
  21. BIN
      xbm/datasets/data_utils/__init__.py
  22. +72
    -0
      xbm/datasets/data_utils/moxing_adapter.py
  23. +160
    -0
      xbm/datasets/imagenet.py

+ 49
- 0
configs/mobilenet_v3_large.yaml View File

@@ -0,0 +1,49 @@
# Architecture
arch: mobilenet_v3_large

# ===== Dataset ===== #
data_url: obs://zhengxiawu/data/ImageNet2012
train_url: obs://zhengxiawu/project/wzhuang/output/mobilenetv3_large
set: ImageNet
num_classes: 1000
mix_up: 0.8
cutmix: 1.0
auto_augment: rand-m9-mstd0.5-inc1
interpolation: bicubic
re_prob: 0.1
re_mode: pixel
re_count: 1
mixup_prob: 1.
switch_prob: 0.5
mixup_mode: batch
image_size: 224


# ===== Learning Rate Policy ======== #
optimizer: adamw
base_lr: 0.004
warmup_lr: 0.00000007
min_lr: 0.0000006
lr_scheduler: cosine_lr
warmup_length: 20


# ===== Network training config ===== #
amp_level: O1
keep_bn_fp32: True
beta: [ 0.9, 0.999 ]
clip_global_norm_value: 5.
is_dynamic_loss_scale: True
epochs: 600
label_smoothing: 0.1
weight_decay: 0.05
momentum: 0.9
batch_size: 256

# ===== EMA ===== #
with_ema: True
ema_decay: 0.9999

# ===== Hardware setup ===== #
num_parallel_workers: 16
device_target: Ascend

+ 39
- 0
configs/parser.py View File

@@ -0,0 +1,39 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""parser function"""
USABLE_TYPES = set([float, int])


def trim_preceding_hyphens(st):
i = 0
while st[i] == "-":
i += 1

return st[i:]


def arg_to_varname(st: str):
st = trim_preceding_hyphens(st)
st = st.replace("-", "_")

return st.split("=")[0]


def argv_to_vars(argv):
var_names = []
for arg in argv:
if arg.startswith("-") and arg_to_varname(arg) != "config":
var_names.append(arg_to_varname(arg))
return var_names

+ 49
- 0
configs/resnet50.yaml View File

@@ -0,0 +1,49 @@
# Architecture
arch: resnet50

# ===== Dataset ===== #
data_url: obs://zhengxiawu/data/ImageNet2012
train_url: obs://zhengxiawu/project/wzhuang/output/resnet50
set: ImageNet
num_classes: 1000
mix_up: 0.8
cutmix: 1.0
auto_augment: rand-m9-mstd0.5-inc1
interpolation: bicubic
re_prob: 0.1
re_mode: pixel
re_count: 1
mixup_prob: 1.
switch_prob: 0.5
mixup_mode: batch
image_size: 224


# ===== Learning Rate Policy ======== #
optimizer: adamw
base_lr: 0.004
warmup_lr: 0.00000007
min_lr: 0.0000006
lr_scheduler: cosine_lr
warmup_length: 20


# ===== Network training config ===== #
amp_level: O1
keep_bn_fp32: True
beta: [ 0.9, 0.999 ]
clip_global_norm_value: 5.
is_dynamic_loss_scale: True
epochs: 600
label_smoothing: 0.1
weight_decay: 0.05
momentum: 0.9
batch_size: 256

# ===== EMA ===== #
with_ema: True
ema_decay: 0.9999

# ===== Hardware setup ===== #
num_parallel_workers: 16
device_target: Ascend

BIN
tools/__init__.py View File


+ 68
- 0
tools/eval.py View File

@@ -0,0 +1,68 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""eval"""

from mindspore import Model
from mindspore import context
from mindspore import nn
from mindspore.common import set_seed

from src.args import args
from src.tools.cell import cast_amp
from src.tools.criterion import get_criterion, NetWithLoss
from src.tools.get_misc import get_dataset, set_device, get_model, pretrained, get_train_one_step
from src.tools.optimizer import get_optimizer

set_seed(args.seed)


def main():
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(enable_graph_kernel=False)
if args.device_target == "Ascend":
context.set_context(enable_auto_mixed_precision=True)
set_device(args)

# get model
net = get_model(args)
cast_amp(net)
criterion = get_criterion(args)

net_with_loss = NetWithLoss(net, criterion)
if args.pretrained:
pretrained(args, net)

data = get_dataset(args, training=False)
batch_num = data.val_dataset.get_dataset_size()
optimizer = get_optimizer(args, net, batch_num)
# save a yaml file to read to record parameters

net_with_loss = get_train_one_step(args, net_with_loss, optimizer)
eval_network = nn.WithEvalCell(net, criterion, args.amp_level in ["O2", "O3", "auto"])
eval_indexes = [0, 1, 2]
eval_metrics = {'Loss': nn.Loss(),
'Top1-Acc': nn.Top1CategoricalAccuracy(),
'Top5-Acc': nn.Top5CategoricalAccuracy()}
model = Model(net_with_loss, metrics=eval_metrics,
eval_network=eval_network,
eval_indexes=eval_indexes)
print(f"=> begin eval")
results = model.eval(data.val_dataset)
print(f"=> eval results:{results}")
print(f"=> eval success")


if __name__ == '__main__':
main()

+ 48
- 0
tools/export.py View File

@@ -0,0 +1,48 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
##############export checkpoint file into air, onnx or mindir model#################
python export.py
"""

import numpy as np
from mindspore import Tensor, load_checkpoint, load_param_into_net, export, context
from mindspore import dtype as mstype

from src.args import args
from src.tools.cell import cast_amp
from src.tools.criterion import get_criterion, NetWithLoss
from src.tools.get_misc import get_model

context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

if args.device_target in ["Ascend", "GPU"]:
context.set_context(device_id=args.device_id)

if __name__ == '__main__':
net = get_model(args)
criterion = get_criterion(args)
cast_amp(net)
net_with_loss = NetWithLoss(net, criterion)
assert args.pretrained is not None, "checkpoint_path is None."

param_dict = load_checkpoint(args.pretrained)
load_param_into_net(net, param_dict)

net.set_train(False)
net.to_float(mstype.float32)

input_arr = Tensor(np.zeros([1, 3, args.image_size, args.image_size], np.float32))
export(net, input_arr, file_name=args.arch, file_format=args.file_format)

+ 87
- 0
tools/train.py View File

@@ -0,0 +1,87 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train"""
import os

from mindspore import Model
from mindspore import context
from mindspore import nn
from mindspore.common import set_seed
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor

from src.args import args
from src.tools.callback import EvaluateCallBack
from src.tools.cell import cast_amp
from src.tools.criterion import get_criterion, NetWithLoss
from src.tools.get_misc import get_dataset, set_device, get_model, pretrained, get_train_one_step
from src.tools.optimizer import get_optimizer


def main():
set_seed(args.seed)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(enable_graph_kernel=False)
if args.device_target == "Ascend":
context.set_context(enable_auto_mixed_precision=True)
rank = set_device(args)

# get model and cast amp_level
net = get_model(args)
cast_amp(net)
criterion = get_criterion(args)
net_with_loss = NetWithLoss(net, criterion)
if args.pretrained:
pretrained(args, net)

data = get_dataset(args)
batch_num = data.train_dataset.get_dataset_size()
optimizer = get_optimizer(args, net, batch_num)

net_with_loss = get_train_one_step(args, net_with_loss, optimizer)

eval_network = nn.WithEvalCell(net, criterion, args.amp_level in ["O2", "O3", "auto"])
eval_indexes = [0, 1, 2]
model = Model(net_with_loss, metrics={"acc", "loss"},
eval_network=eval_network,
eval_indexes=eval_indexes)

config_ck = CheckpointConfig(save_checkpoint_steps=data.train_dataset.get_dataset_size(),
keep_checkpoint_max=args.save_every)
time_cb = TimeMonitor(data_size=data.train_dataset.get_dataset_size())

ckpt_save_dir = "./ckpt_" + str(rank)
if args.run_modelarts:
ckpt_save_dir = "/cache/ckpt_" + str(rank)

ckpoint_cb = ModelCheckpoint(prefix=args.arch + str(rank), directory=ckpt_save_dir,
config=config_ck)
loss_cb = LossMonitor()
eval_cb = EvaluateCallBack(model, eval_dataset=data.val_dataset, src_url=ckpt_save_dir,
train_url=os.path.join(args.train_url, "ckpt_" + str(rank)),
total_epochs=args.epochs - args.start_epoch, save_freq=args.save_every)

print("begin train")
model.train(int(args.epochs - args.start_epoch), data.train_dataset,
callbacks=[time_cb, ckpoint_cb, loss_cb, eval_cb],
dataset_sink_mode=True)
print("train success")

if args.run_modelarts:
import moxing as mox
mox.file.copy_parallel(src_url=ckpt_save_dir, dst_url=os.path.join(args.train_url, "ckpt_" + str(rank)))


if __name__ == '__main__':
main()

+ 123
- 0
xbm/args.py View File

@@ -0,0 +1,123 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""global args for Transformer in Transformer(TNT)"""
import argparse
import ast
import os
import sys

import yaml

from src.configs import parser as _parser

args = None


def parse_arguments():
"""parse_arguments"""
global args
parser = argparse.ArgumentParser(description="MindSpore TNT Training")

parser.add_argument("-a", "--arch", metavar="ARCH", default="ResNet50", help="model architecture")
parser.add_argument("--accumulation_step", default=1, type=int, help="accumulation step")
parser.add_argument("--amp_level", default="O2", choices=["O0", "O1", "O2", "O3"], help="AMP Level")
parser.add_argument("--batch_size", default=256, type=int, metavar="N",
help="mini-batch size (default: 256), this is the total "
"batch size of all Devices on the current node when "
"using Data Parallel or Distributed Data Parallel")
parser.add_argument("--beta", default=[0.9, 0.999], type=lambda x: [float(a) for a in x.split(",")],
help="beta for optimizer")
parser.add_argument("--with_ema", default=False, type=ast.literal_eval, help="training with ema")
parser.add_argument("--ema_decay", default=0.9999, type=float, help="ema decay")
parser.add_argument('--data_url', default="obs://zhengxiawu/data/ImageNet2012", help='location of data.')
parser.add_argument("--device_id", default=0, type=int, help="device id")
parser.add_argument("--device_num", default=8, type=int, help="device num")
parser.add_argument("--device_target", default="Ascend", choices=["GPU", "Ascend"], type=str)
parser.add_argument("--epochs", default=300, type=int, metavar="N", help="number of total epochs to run")
parser.add_argument("--eps", default=1e-8, type=float)
parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"], default="MINDIR", help="file format")
parser.add_argument("--in_chans", default=3, type=int)
parser.add_argument("--is_dynamic_loss_scale", default=1, type=int, help="is_dynamic_loss_scale ")
parser.add_argument("--keep_checkpoint_max", default=20, type=int, help="keep checkpoint max num")
parser.add_argument("--optimizer", help="Which optimizer to use", default="sgd")
parser.add_argument("--set", help="name of dataset", type=str, default="ImageNet")
parser.add_argument("--mix_up", default=0., type=float, help="mix up")
parser.add_argument("--mlp_ratio", help="mlp ", default=4., type=float)
parser.add_argument("-j", "--num_parallel_workers", default=20, type=int, metavar="N",
help="number of data loading workers (default: 20)")
parser.add_argument("--start_epoch", default=0, type=int, metavar="N",
help="manual epoch number (useful on restarts)")
parser.add_argument("--warmup_length", default=0, type=int, help="number of warmup iterations")
parser.add_argument("--warmup_lr", default=5e-7, type=float, help="warm up learning rate")
parser.add_argument("--wd", "--weight_decay", default=0.05, type=float, metavar="W",
help="weight decay (default: 0.05)", dest="weight_decay")
parser.add_argument("--loss_scale", default=1024, type=int, help="loss_scale")
parser.add_argument("--lr", "--learning_rate", default=5e-4, type=float, help="initial lr", dest="lr")
parser.add_argument("--lr_scheduler", default="cosine_annealing", help="schedule for the learning rate.")
parser.add_argument("--lr_adjust", default=30, type=float, help="interval to drop lr")
parser.add_argument("--lr_gamma", default=0.97, type=int, help="multistep multiplier")
parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
parser.add_argument("--num_classes", default=1000, type=int)
parser.add_argument("--pretrained", dest="pretrained", default=None, type=str, help="use pre-trained model")
parser.add_argument("--config", help="Config file to use (see configs dir)", default=None, required=True)
parser.add_argument("--seed", default=0, type=int, help="seed for initializing training. ")
parser.add_argument("--save_every", default=20, type=int, help="save every ___ epochs(default:2)")
parser.add_argument("--label_smoothing", type=float, help="label smoothing to use, default 0.1", default=0.1)
parser.add_argument("--image_size", default=224, help="image Size.", type=int)
parser.add_argument('--train_url', default=None, help='location of training outputs.')
parser.add_argument("--run_modelarts", type=ast.literal_eval, default=True, help="whether run on modelarts")
args = parser.parse_args()

get_config()


def get_config():
"""get_config"""
global args
override_args = _parser.argv_to_vars(sys.argv)
# load yaml file
if args.run_modelarts:
import moxing as mox
if not args.config.startswith("obs:/"):
args.config = "obs:/" + args.config
with mox.file.File(args.config, 'r') as f:
yaml_txt = f.read()
else:
yaml_txt = open(args.config).read()

# override args
loaded_yaml = yaml.load(yaml_txt, Loader=yaml.FullLoader)

for v in override_args:
loaded_yaml[v] = getattr(args, v)

print(f"=> Reading YAML config from {args.config}")

args.__dict__.update(loaded_yaml)
print(args)

if "DEVICE_NUM" not in os.environ.keys():
os.environ["DEVICE_NUM"] = str(args.device_num)
os.environ["RANK_SIZE"] = str(args.device_num)


def run_args():
"""run and get args"""
global args
if args is None:
parse_arguments()


run_args()

BIN
xbm/core/__init__.py View File


+ 1
- 1
xbm/core/criterion.py View File

@@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.


+ 45
- 0
xbm/core/ema.py View File

@@ -0,0 +1,45 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ema define"""

import mindspore.nn as nn
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P

_ema_op = C.MultitypeFuncGraph("grad_ema_op")
Assign = P.Assign()
AssignAdd = P.AssignAdd()


@_ema_op.register("Tensor", "Tensor", "Tensor")
def _ema_weights(factor, ema_weight, weight):
"""Apply grad sum to cumulative gradient."""
return AssignAdd(ema_weight, ema_weight * factor + weight * (1 - factor))


class EMACell(nn.Cell):
"""EMACell Define"""
def __init__(self, weights, ema_decay=0.9999):
super(EMACell, self).__init__()
self.ema_weights = weights.clone(prefix="_ema_weights")
self.ema_decay = Tensor(ema_decay, mstype.float32)
self.hyper_map = C.HyperMap()

def construct(self, weights):
success = self.hyper_map(F.partial(_ema_op, self.ema_decay), self.ema_weights, weights)
return success

+ 4
- 0
xbm/core/get_misc.py View File

@@ -121,4 +121,8 @@ def get_train_one_step(args, net_with_loss, optimizer):
net_with_loss = TrainClipGrad(net_with_loss, optimizer, scale_sense=scale_sense,
clip_global_norm_value=args.clip_global_norm_value,
use_global_norm=True)
if args.with_ema:
net_with_loss = TrainOneStepWithEMA(
net_with_loss, optimizer, scale_sense=scale_sense, with_ema=args.with_ema,
ema_decay=args.ema_decay)
return net_with_loss

+ 8
- 1
xbm/core/optimizer.py View File

@@ -61,6 +61,13 @@ def get_optimizer(args, model, batch_num):
eps=args.eps,
weight_decay=args.weight_decay
)
elif optim_type == "sgd":
optim = SGD(
params=params,
learning_rate=learning_rate,
momentum=args.momentum,
weight_decay=args.weight_decay
)
else:
raise ValueError(f"optimizer {optim_type} is not supported")

@@ -81,4 +88,4 @@ def get_param_groups(network):
# bn weight bias not using weight decay, be carefully for now x not include LN
no_decay_params.append(x)

return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}]
return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}]

+ 1
- 1
xbm/core/schedulers.py View File

@@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.


+ 77
- 0
xbm/core/train_one_step_with_ema.py View File

@@ -0,0 +1,77 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""TrainOneStepWithEMA"""
import mindspore.nn as nn
from mindspore.common import RowTensor
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P

from src.trainer.ema import EMACell

_grad_scale = C.MultitypeFuncGraph("grad_scale")
reciprocal = P.Reciprocal()


@_grad_scale.register("Tensor", "Tensor")
def tensor_grad_scale(scale, grad):
return grad * F.cast(reciprocal(scale), F.dtype(grad))


@_grad_scale.register("Tensor", "RowTensor")
def tensor_grad_scale_row_tensor(scale, grad):
return RowTensor(grad.indices,
grad.values * F.cast(reciprocal(scale), F.dtype(grad.values)),
grad.dense_shape)


_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
grad_overflow = P.FloatStatus()


class TrainOneStepWithEMA(nn.TrainOneStepWithLossScaleCell):
"""TrainOneStepWithEMA"""

def __init__(self, network, optimizer, scale_sense=1.0, with_ema=False, ema_decay=0.9999):
super(TrainOneStepWithEMA, self).__init__(network, optimizer, scale_sense)
self.print = P.Print()
self.with_ema = with_ema
if self.with_ema:
self.ema_model = EMACell(self.weights, ema_decay=ema_decay)

def construct(self, *inputs):
"""construct"""
weights = self.weights
loss = self.network(*inputs)
scaling_sens = self.scale_sense

status, scaling_sens = self.start_overflow_check(loss, scaling_sens)

scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
# apply grad reducer on grads
grads = self.grad_reducer(grads)
# get the overflow buffer
cond = self.get_overflow_status(status, grads)
overflow = self.process_loss_scale(cond)
# if there is no overflow, do optimize
if not overflow:
loss = F.depend(loss, self.optimizer(grads))
if self.with_ema:
self.ema_model(self.weights)
else:
self.print("=============Over Flow, skipping=============")
return loss

+ 16
- 0
xbm/datasets/__init__.py View File

@@ -0,0 +1,16 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""init datasets"""
from .imagenet import ImageNet

+ 18
- 0
xbm/datasets/augment/__init__.py View File

@@ -0,0 +1,18 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""init augment"""
from .auto_augment import pil_interp, rand_augment_transform
from .mixup import Mixup
from .random_erasing import RandomErasing

+ 896
- 0
xbm/datasets/augment/auto_augment.py View File

@@ -0,0 +1,896 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" AutoAugment, RandAugment, and AugMix for MindSpore

This code implements the searched ImageNet policies with various tweaks and improvements and
does not include any of the search code.

AA and RA Implementation adapted from:
https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py

AugMix adapted from:
https://github.com/google-research/augmix

Papers:
AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501
Learning Data Augmentation Strategies for Object Detection - https://arxiv.org/abs/1906.11172
RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719
AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781

Hacked together by / Copyright 2020 Ross Wightman
"""
import math
import random
import re

import numpy as np
import PIL
from PIL import Image, ImageOps, ImageEnhance

_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]])

_FILL = (128, 128, 128)

# This signifies the max integer that the controller RNN could predict for the
# augmentation scheme.
_MAX_LEVEL = 10.

_HPARAMS_DEFAULT = dict(
translate_const=250,
img_mean=_FILL,
)

_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)


def pil_interp(method):
"""Interpolation method selection"""
if method == 'bicubic':
func = Image.BICUBIC
elif method == 'lanczos':
func = Image.LANCZOS
elif method == 'hamming':
func = Image.HAMMING
else:
func = Image.BILINEAR
return func


def _interpolation(kwargs):
"""_interpolation"""
interpolation = kwargs.pop('resample', Image.BILINEAR)
interpolation = random.choice(interpolation) \
if isinstance(interpolation, (list, tuple)) else interpolation
return interpolation

def _check_args_tf(kwargs):
"""_check_args_tf"""
if 'fillcolor' in kwargs and _PIL_VER < (5, 0):
kwargs.pop('fillcolor')
kwargs['resample'] = _interpolation(kwargs)


def shear_x(img, factor, **kwargs):
"""shear_x"""
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs)


def shear_y(img, factor, **kwargs):
"""shear_y"""
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs)


def translate_x_rel(img, pct, **kwargs):
"""translate_x_rel"""
pixels = pct * img.size[0]
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)


def translate_y_rel(img, pct, **kwargs):
"""translate_y_rel"""
pixels = pct * img.size[1]
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)


def translate_x_abs(img, pixels, **kwargs):
"""translate_x_abs"""
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)


def translate_y_abs(img, pixels, **kwargs):
"""translate_y_abs"""
_check_args_tf(kwargs)
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)


def rotate(img, degrees, **kwargs):
"""rotate"""
_check_args_tf(kwargs)
if _PIL_VER >= (5, 2):
func = img.rotate(degrees, **kwargs)
elif _PIL_VER >= (5, 0):
w, h = img.size
post_trans = (0, 0)
rotn_center = (w / 2.0, h / 2.0)
angle = -math.radians(degrees)
matrix = [
round(math.cos(angle), 15),
round(math.sin(angle), 15),
0.0,
round(-math.sin(angle), 15),
round(math.cos(angle), 15),
0.0,
]

def transform(x, y, matrix):
(a, b, c, d, e, f) = matrix
return a * x + b * y + c, d * x + e * y + f

matrix[2], matrix[5] = transform(
-rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix
)
matrix[2] += rotn_center[0]
matrix[5] += rotn_center[1]
func = img.transform(img.size, Image.AFFINE, matrix, **kwargs)
else:
func = img.rotate(degrees, resample=kwargs['resample'])
return func


def auto_contrast(img, **__):
"""auto_contrast"""
return ImageOps.autocontrast(img)


def invert(img, **__):
"""invert"""
return ImageOps.invert(img)


def equalize(img, **__):
"""equalize"""
return ImageOps.equalize(img)


def solarize(img, thresh, **__):
"""solarize"""
return ImageOps.solarize(img, thresh)


def solarize_add(img, add, thresh=128, **__):
"""solarize_add"""
lut = []
for i in range(256):
if i < thresh:
lut.append(min(255, i + add))
else:
lut.append(i)
if img.mode in ("L", "RGB"):
if img.mode == "RGB" and len(lut) == 256:
lut = lut + lut + lut
func = img.point(lut)
else:
func = img
return func


def posterize(img, bits_to_keep, **__):
"""posterize"""
if bits_to_keep >= 8:
func = img
else:
func = ImageOps.posterize(img, bits_to_keep)
return func


def contrast(img, factor, **__):
"""contrast"""
return ImageEnhance.Contrast(img).enhance(factor)


def color(img, factor, **__):
"""color"""
return ImageEnhance.Color(img).enhance(factor)


def brightness(img, factor, **__):
"""brightness"""
return ImageEnhance.Brightness(img).enhance(factor)


def sharpness(img, factor, **__):
"""sharpness"""
return ImageEnhance.Sharpness(img).enhance(factor)


def _randomly_negate(v):
"""With 50% prob, negate the value"""
return -v if random.random() > 0.5 else v


def _rotate_level_to_arg(level, _hparams):
"""_randomly_negate"""
# range [-30, 30]
level = (level / _MAX_LEVEL) * 30.
level = _randomly_negate(level)
return (level,)


def _enhance_level_to_arg(level, _hparams):
"""_enhance_level_to_arg"""
# range [0.1, 1.9]
return ((level / _MAX_LEVEL) * 1.8 + 0.1,)


def _enhance_increasing_level_to_arg(level, _hparams):
"""_enhance_increasing_level_to_arg"""
# the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend
# range [0.1, 1.9]
level = (level / _MAX_LEVEL) * .9
level = 1.0 + _randomly_negate(level)
return (level,)


def _shear_level_to_arg(level, _hparams):
"""_shear_level_to_arg"""
# range [-0.3, 0.3]
level = (level / _MAX_LEVEL) * 0.3
level = _randomly_negate(level)
return (level,)


def _translate_abs_level_to_arg(level, hparams):
"""_translate_abs_level_to_arg"""
translate_const = hparams['translate_const']
level = (level / _MAX_LEVEL) * float(translate_const)
level = _randomly_negate(level)
return (level,)


def _translate_rel_level_to_arg(level, hparams):
"""_translate_rel_level_to_arg"""
# default range [-0.45, 0.45]
translate_pct = hparams.get('translate_pct', 0.45)
level = (level / _MAX_LEVEL) * translate_pct
level = _randomly_negate(level)
return (level,)


def _posterize_level_to_arg(level, _hparams):
"""_posterize_level_to_arg"""
# As per Tensorflow TPU EfficientNet impl
# range [0, 4], 'keep 0 up to 4 MSB of original image'
# intensity/severity of augmentation decreases with level
return (int((level / _MAX_LEVEL) * 4),)


def _posterize_increasing_level_to_arg(level, hparams):
"""_posterize_increasing_level_to_arg"""
# As per Tensorflow models research and UDA impl
# range [4, 0], 'keep 4 down to 0 MSB of original image',
# intensity/severity of augmentation increases with level
return (4 - _posterize_level_to_arg(level, hparams)[0],)


def _posterize_original_level_to_arg(level, _hparams):
"""_posterize_original_level_to_arg"""
# As per original AutoAugment paper description
# range [4, 8], 'keep 4 up to 8 MSB of image'
# intensity/severity of augmentation decreases with level
return (int((level / _MAX_LEVEL) * 4) + 4,)


def _solarize_level_to_arg(level, _hparams):
"""_solarize_level_to_arg"""
# range [0, 256]
# intensity/severity of augmentation decreases with level
return (int((level / _MAX_LEVEL) * 256),)


def _solarize_increasing_level_to_arg(level, _hparams):
"""_solarize_increasing_level_to_arg"""
# range [0, 256]
# intensity/severity of augmentation increases with level
return (256 - _solarize_level_to_arg(level, _hparams)[0],)


def _solarize_add_level_to_arg(level, _hparams):
"""_solarize_add_level_to_arg"""
# range [0, 110]
return (int((level / _MAX_LEVEL) * 110),)


LEVEL_TO_ARG = {
'AutoContrast': None,
'Equalize': None,
'Invert': None,
'Rotate': _rotate_level_to_arg,
# There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers
'Posterize': _posterize_level_to_arg,
'PosterizeIncreasing': _posterize_increasing_level_to_arg,
'PosterizeOriginal': _posterize_original_level_to_arg,
'Solarize': _solarize_level_to_arg,
'SolarizeIncreasing': _solarize_increasing_level_to_arg,
'SolarizeAdd': _solarize_add_level_to_arg,
'Color': _enhance_level_to_arg,
'ColorIncreasing': _enhance_increasing_level_to_arg,
'Contrast': _enhance_level_to_arg,
'ContrastIncreasing': _enhance_increasing_level_to_arg,
'Brightness': _enhance_level_to_arg,
'BrightnessIncreasing': _enhance_increasing_level_to_arg,
'Sharpness': _enhance_level_to_arg,
'SharpnessIncreasing': _enhance_increasing_level_to_arg,
'ShearX': _shear_level_to_arg,
'ShearY': _shear_level_to_arg,
'TranslateX': _translate_abs_level_to_arg,
'TranslateY': _translate_abs_level_to_arg,
'TranslateXRel': _translate_rel_level_to_arg,
'TranslateYRel': _translate_rel_level_to_arg,
}

NAME_TO_OP = {
'AutoContrast': auto_contrast,
'Equalize': equalize,
'Invert': invert,
'Rotate': rotate,
'Posterize': posterize,
'PosterizeIncreasing': posterize,
'PosterizeOriginal': posterize,
'Solarize': solarize,
'SolarizeIncreasing': solarize,
'SolarizeAdd': solarize_add,
'Color': color,
'ColorIncreasing': color,
'Contrast': contrast,
'ContrastIncreasing': contrast,
'Brightness': brightness,
'BrightnessIncreasing': brightness,
'Sharpness': sharpness,
'SharpnessIncreasing': sharpness,
'ShearX': shear_x,
'ShearY': shear_y,
'TranslateX': translate_x_abs,
'TranslateY': translate_y_abs,
'TranslateXRel': translate_x_rel,
'TranslateYRel': translate_y_rel,
}


class AugmentOp:
"""AugmentOp"""

def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
hparams = hparams or _HPARAMS_DEFAULT
self.aug_fn = NAME_TO_OP[name]
self.level_fn = LEVEL_TO_ARG[name]
self.prob = prob
self.magnitude = magnitude
self.hparams = hparams.copy()
self.kwargs = dict(
fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL,
resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION,
)

# If magnitude_std is > 0, we introduce some randomness
# in the usually fixed policy and sample magnitude from a normal distribution
# with mean `magnitude` and std-dev of `magnitude_std`.
# NOTE This is my own hack, being tested, not in papers or reference impls.
# If magnitude_std is inf, we sample magnitude from a uniform distribution
self.magnitude_std = self.hparams.get('magnitude_std', 0)

def __call__(self, img):
"""apply augment"""
if self.prob < 1.0 and random.random() > self.prob:
return img
magnitude = self.magnitude
if self.magnitude_std:
if self.magnitude_std == float('inf'):
magnitude = random.uniform(0, magnitude)
elif self.magnitude_std > 0:
magnitude = random.gauss(magnitude, self.magnitude_std)
magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range
level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple()
return self.aug_fn(img, *level_args, **self.kwargs)


def auto_augment_policy_v0(hparams):
"""auto_augment_policy_v0"""
# ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference.
policy = [
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
[('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)],
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
[('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], # This results in black image with Tpu posterize
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
]
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
return pc


def auto_augment_policy_v0r(hparams):
"""auto_augment_policy_v0r"""
# ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used
# in Google research implementation (number of bits discarded increases with magnitude)
policy = [
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
[('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)],
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
[('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)],
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
]
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
return pc


def auto_augment_policy_original(hparams):
"""auto_augment_policy_original"""
# ImageNet policy from https://arxiv.org/abs/1805.09501
policy = [
[('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)],
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
[('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)],
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
[('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)],
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
[('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)],
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
]
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
return pc


def auto_augment_policy_originalr(hparams):
"""auto_augment_policy_originalr"""
# ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation
policy = [
[('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)],
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
[('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)],
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
[('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)],
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
[('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)],
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
]
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
return pc


def auto_augment_policy(name='v0', hparams=None):
"""auto_augment_policy"""
hparams = hparams or _HPARAMS_DEFAULT
if name == 'original':
func = auto_augment_policy_original(hparams)
elif name == 'originalr':
func = auto_augment_policy_originalr(hparams)
elif name == 'v0':
func = auto_augment_policy_v0(hparams)
elif name == 'v0r':
func = auto_augment_policy_v0r(hparams)
else:
assert False, 'Unknown AA policy (%s)' % name
return func

class AutoAugment:
"""AutoAugment"""
def __init__(self, policy):
self.policy = policy

def __call__(self, img):
"""apply autoaugment"""
sub_policy = random.choice(self.policy)
for op in sub_policy:
img = op(img)
return img


def auto_augment_transform(config_str, hparams):
"""
Create a AutoAugment transform

:param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr').
The remaining sections, not order specific determine
'mstd' - float std deviation of magnitude noise applied
Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5

:param hparams: Other hparams (kwargs) for the AutoAugmentation scheme

:return: A MindSpore compatible Transform
"""
config = config_str.split('-')
policy_name = config[0]
config = config[1:]
for c in config:
cs = re.split(r'(\d.*)', c)
if len(cs) < 2:
continue
key, val = cs[:2]
if key == 'mstd':
# noise param injected via hparams for now
hparams.setdefault('magnitude_std', float(val))
else:
assert False, 'Unknown AutoAugment config section'
aa_policy = auto_augment_policy(policy_name, hparams=hparams)
return AutoAugment(aa_policy)


_RAND_TRANSFORMS = [
'AutoContrast',
'Equalize',
'Invert',
'Rotate',
'Posterize',
'Solarize',
'SolarizeAdd',
'Color',
'Contrast',
'Brightness',
'Sharpness',
'ShearX',
'ShearY',
'TranslateXRel',
'TranslateYRel',
# 'Cutout' # NOTE I've implement this as random erasing separately
]

_RAND_INCREASING_TRANSFORMS = [
'AutoContrast',
'Equalize',
'Invert',
'Rotate',
'PosterizeIncreasing',
'SolarizeIncreasing',
'SolarizeAdd',
'ColorIncreasing',
'ContrastIncreasing',
'BrightnessIncreasing',
'SharpnessIncreasing',
'ShearX',
'ShearY',
'TranslateXRel',
'TranslateYRel',
# 'Cutout' # NOTE I've implement this as random erasing separately
]

# These experimental weights are based loosely on the relative improvements mentioned in paper.
# They may not result in increased performance, but could likely be tuned to so.
_RAND_CHOICE_WEIGHTS_0 = {
'Rotate': 0.3,
'ShearX': 0.2,
'ShearY': 0.2,
'TranslateXRel': 0.1,
'TranslateYRel': 0.1,
'Color': .025,
'Sharpness': 0.025,
'AutoContrast': 0.025,
'Solarize': .005,
'SolarizeAdd': .005,
'Contrast': .005,
'Brightness': .005,
'Equalize': .005,
'Posterize': 0,
'Invert': 0,
}


def _select_rand_weights(weight_idx=0, transforms=None):
"""_select_rand_weights"""
transforms = transforms or _RAND_TRANSFORMS
assert weight_idx == 0 # only one set of weights currently
rand_weights = _RAND_CHOICE_WEIGHTS_0
probs = [rand_weights[k] for k in transforms]
probs /= np.sum(probs)
return probs


def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
"""rand_augment_ops"""
hparams = hparams or _HPARAMS_DEFAULT
transforms = transforms or _RAND_TRANSFORMS
return [AugmentOp(
name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms]


class RandAugment:
"""RandAugment"""
def __init__(self, ops, num_layers=2, choice_weights=None):
self.ops = ops
self.num_layers = num_layers
self.choice_weights = choice_weights

def __call__(self, img):
# no replacement when using weighted choice
ops = np.random.choice(
self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights)
for op in ops:
img = op(img)
return img


def rand_augment_transform(config_str, hparams):
"""
Create a RandAugment transform

:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
sections, not order specific determine
'm' - integer magnitude of rand augment
'n' - integer num layers (number of transform ops selected per image)
'w' - integer probabiliy weight index (index of a set of weights to influence choice of op)
'mstd' - float std deviation of magnitude noise applied
'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0)
Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5
'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2

:param hparams: Other hparams (kwargs) for the RandAugmentation scheme

:return: A MindSpore compatible Transform
"""
magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10)
num_layers = 2 # default to 2 ops per image
weight_idx = None # default to no probability weights for op choice
transforms = _RAND_TRANSFORMS
config = config_str.split('-')
assert config[0] == 'rand'
# [rand, m9, mstd0.5, inc1]
config = config[1:]
for c in config:
cs = re.split(r'(\d.*)', c)
if len(cs) < 2:
continue
key, val = cs[:2]
if key == 'mstd':
# noise param injected via hparams for now
hparams.setdefault('magnitude_std', float(val))
elif key == 'inc':
if bool(val):
transforms = _RAND_INCREASING_TRANSFORMS
elif key == 'm':
magnitude = int(val)
elif key == 'n':
num_layers = int(val)
elif key == 'w':
weight_idx = int(val)
else:
assert False, 'Unknown RandAugment config section'
ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)


_AUGMIX_TRANSFORMS = [
'AutoContrast',
'ColorIncreasing', # not in paper
'ContrastIncreasing', # not in paper
'BrightnessIncreasing', # not in paper
'SharpnessIncreasing', # not in paper
'Equalize',
'Rotate',
'PosterizeIncreasing',
'SolarizeIncreasing',
'ShearX',
'ShearY',
'TranslateXRel',
'TranslateYRel',
]


def augmix_ops(magnitude=10, hparams=None, transforms=None):
"""augmix_ops"""
hparams = hparams or _HPARAMS_DEFAULT
transforms = transforms or _AUGMIX_TRANSFORMS
return [AugmentOp(
name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms]


class AugMixAugment:
""" AugMix Transform
Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py
From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty -
https://arxiv.org/abs/1912.02781
"""

def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False):
self.ops = ops
self.alpha = alpha
self.width = width
self.depth = depth
self.blended = blended # blended mode is faster but not well tested

def _calc_blended_weights(self, ws, m):
"""_calc_blended_weights"""
ws = ws * m
cump = 1.
rws = []
for w in ws[::-1]:
alpha = w / cump
cump *= (1 - alpha)
rws.append(alpha)
return np.array(rws[::-1], dtype=np.float32)

def _apply_blended(self, img, mixing_weights, m):
"""_apply_blended"""
# This is my first crack and implementing a slightly faster mixed augmentation. Instead
# of accumulating the mix for each chain in a Numpy array and then blending with original,
# it recomputes the blending coefficients and applies one PIL image blend per chain.
# TODO the results appear in the right ballpark but they differ by more than rounding.
img_orig = img.copy()
ws = self._calc_blended_weights(mixing_weights, m)
for w in ws:
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
ops = np.random.choice(self.ops, depth, replace=True)
img_aug = img_orig # no ops are in-place, deep copy not necessary
for op in ops:
img_aug = op(img_aug)
img = Image.blend(img, img_aug, w)
return img

def _apply_basic(self, img, mixing_weights, m):
"""_apply_basic"""
# This is a literal adaptation of the paper/official implementation without normalizations and
# PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the
# typical augmentation transforms, could use a GPU / Kornia implementation.
img_shape = img.size[0], img.size[1], len(img.getbands())
mixed = np.zeros(img_shape, dtype=np.float32)
for mw in mixing_weights:
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
ops = np.random.choice(self.ops, depth, replace=True)
img_aug = img # no ops are in-place, deep copy not necessary
for op in ops:
img_aug = op(img_aug)
mixed += mw * np.asarray(img_aug, dtype=np.float32)
np.clip(mixed, 0, 255., out=mixed)
mixed = Image.fromarray(mixed.astype(np.uint8))
return Image.blend(img, mixed, m)

def __call__(self, img):
"""AugMixAugment apply"""
mixing_weights = np.float32(np.random.dirichlet([self.alpha] * self.width))
m = np.float32(np.random.beta(self.alpha, self.alpha))
if self.blended:
mixed = self._apply_blended(img, mixing_weights, m)
else:
mixed = self._apply_basic(img, mixing_weights, m)
return mixed


def augment_and_mix_transform(config_str, hparams):
""" Create AugMix MindSpore transform

:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
sections, not order specific determine
'm' - integer magnitude (severity) of augmentation mix (default: 3)
'w' - integer width of augmentation chain (default: 3)
'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0)
'mstd' - float std deviation of magnitude noise applied (default: 0)
Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2

:param hparams: Other hparams (kwargs) for the Augmentation transforms

:return: A MindSpore compatible Transform
"""
magnitude = 3
width = 3
depth = -1
alpha = 1.
blended = False
hparams['magnitude_std'] = float('inf')
config = config_str.split('-')
assert config[0] == 'augmix'
config = config[1:]
for c in config:
cs = re.split(r'(\d.*)', c)
if len(cs) < 2:
continue
key, val = cs[:2]
if key == 'mstd':
# noise param injected via hparams for now
hparams.setdefault('magnitude_std', float(val))
elif key == 'm':
magnitude = int(val)
elif key == 'w':
width = int(val)
elif key == 'd':
depth = int(val)
elif key == 'a':
alpha = float(val)
elif key == 'b':
blended = bool(val)
else:
assert False, 'Unknown AugMix config section'
ops = augmix_ops(magnitude=magnitude, hparams=hparams)
return AugMixAugment(ops, alpha=alpha, width=width, depth=depth, blended=blended)

+ 247
- 0
xbm/datasets/augment/mixup.py View File

@@ -0,0 +1,247 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" Mixup and Cutmix

Papers:
mixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412)

CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features (https://arxiv.org/abs/1905.04899)

Code Reference:
CutMix: https://github.com/clovaai/CutMix-PyTorch

Hacked together by / Copyright 2020 Ross Wightman
"""
import numpy as np
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore import ops as P


def one_hot(x, num_classes, on_value=1., off_value=0.):
"""one hot to label"""
x = x.reshape(-1)
x = np.eye(num_classes)[x]
x = np.clip(x, a_min=off_value, a_max=on_value, dtype=np.float32)
return x


def mixup_target(target, num_classes, lam=1., smoothing=0.0):
"""mixup_target"""
off_value = smoothing / num_classes
on_value = 1. - smoothing + off_value
y1 = one_hot(target, num_classes, on_value=on_value, off_value=off_value)
y2 = one_hot(np.flip(target, axis=0), num_classes, on_value=on_value, off_value=off_value)
return y1 * lam + y2 * (1. - lam)


def rand_bbox(img_shape, lam, margin=0., count=None):
""" Standard CutMix bounding-box
Generates a random square bbox based on lambda value. This impl includes
support for enforcing a border margin as percent of bbox dimensions.

Args:
img_shape (tuple): Image shape as tuple
lam (float): Cutmix lambda value
margin (float): Percentage of bbox dimension to enforce as margin (reduce amount of box outside image)
count (int): Number of bbox to generate
"""
ratio = np.sqrt(1 - lam)
img_h, img_w = img_shape[-2:]
cut_h, cut_w = int(img_h * ratio), int(img_w * ratio)
margin_y, margin_x = int(margin * cut_h), int(margin * cut_w)
cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count)
cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count)
yl = np.clip(cy - cut_h // 2, 0, img_h)
yh = np.clip(cy + cut_h // 2, 0, img_h)
xl = np.clip(cx - cut_w // 2, 0, img_w)
xh = np.clip(cx + cut_w // 2, 0, img_w)
return yl, yh, xl, xh


def rand_bbox_minmax(img_shape, minmax, count=None):
""" Min-Max CutMix bounding-box
Inspired by Darknet cutmix impl, generates a random rectangular bbox
based on min/max percent values applied to each dimension of the input image.

Typical defaults for minmax are usually in the .2-.3 for min and .8-.9 range for max.

Args:
img_shape (tuple): Image shape as tuple
minmax (tuple or list): Min and max bbox ratios (as percent of image size)
count (int): Number of bbox to generate
"""
assert len(minmax) == 2
img_h, img_w = img_shape[-2:]
cut_h = np.random.randint(int(img_h * minmax[0]), int(img_h * minmax[1]), size=count)
cut_w = np.random.randint(int(img_w * minmax[0]), int(img_w * minmax[1]), size=count)
yl = np.random.randint(0, img_h - cut_h, size=count)
xl = np.random.randint(0, img_w - cut_w, size=count)
yu = yl + cut_h
xu = xl + cut_w
return yl, yu, xl, xu


def cutmix_bbox_and_lam(img_shape, lam, ratio_minmax=None, correct_lam=True, count=None):
""" Generate bbox and apply lambda correction.
"""
if ratio_minmax is not None:
yl, yu, xl, xu = rand_bbox_minmax(img_shape, ratio_minmax, count=count)
else:
yl, yu, xl, xu = rand_bbox(img_shape, lam, count=count)
if correct_lam or ratio_minmax is not None:
bbox_area = (yu - yl) * (xu - xl)
lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1])
return (yl, yu, xl, xu), lam


class Mixup:
""" Mixup/Cutmix that applies different params to each element or whole batch

Args:
mixup_alpha (float): mixup alpha value, mixup is active if > 0.
cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0.
cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.
prob (float): probability of applying mixup or cutmix per batch or element
switch_prob (float): probability of switching to cutmix instead of mixup when both are active
mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)
correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders
label_smoothing (float): apply label smoothing to the mixed target tensor
num_classes (int): number of classes for target
"""

def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None, prob=1.0, switch_prob=0.5,
mode='batch', correct_lam=True, label_smoothing=0.1, num_classes=1000):
self.mixup_alpha = mixup_alpha
self.cutmix_alpha = cutmix_alpha
self.cutmix_minmax = cutmix_minmax
if self.cutmix_minmax is not None:
assert len(self.cutmix_minmax) == 2
# force cutmix alpha == 1.0 when minmax active to keep logic simple & safe
self.cutmix_alpha = 1.0
self.mix_prob = prob
self.switch_prob = switch_prob
self.label_smoothing = label_smoothing
self.num_classes = num_classes
self.mode = mode
self.correct_lam = correct_lam # correct lambda based on clipped area for cutmix
self.mixup_enabled = True # set to false to disable mixing (intended tp be set by train loop)

def _params_per_elem(self, batch_size):
"""_params_per_elem"""
lam = np.ones(batch_size, dtype=np.float32)
use_cutmix = np.zeros(batch_size, dtype=np.bool)
if self.mixup_enabled:
if self.mixup_alpha > 0. and self.cutmix_alpha > 0.:
use_cutmix = np.random.rand(batch_size) < self.switch_prob
lam_mix = np.where(
use_cutmix,
np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size),
np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size))
elif self.mixup_alpha > 0.:
lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)
elif self.cutmix_alpha > 0.:
use_cutmix = np.ones(batch_size, dtype=np.bool)
lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size)
else:
assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam)
return lam, use_cutmix

def _params_per_batch(self):
"""_params_per_batch"""
lam = 1.
use_cutmix = False
if self.mixup_enabled and np.random.rand() < self.mix_prob:
if self.mixup_alpha > 0. and self.cutmix_alpha > 0.:
use_cutmix = np.random.rand() < self.switch_prob
lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) if use_cutmix else \
np.random.beta(self.mixup_alpha, self.mixup_alpha)
elif self.mixup_alpha > 0.:
lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)
elif self.cutmix_alpha > 0.:
use_cutmix = True
lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
else:
assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
lam = float(lam_mix)
return lam, use_cutmix

def _mix_elem(self, x):
"""_mix_elem"""
batch_size = len(x)
lam_batch, use_cutmix = self._params_per_elem(batch_size)
x_orig = x.clone() # need to keep an unmodified original for mixing source
for i in range(batch_size):
j = batch_size - i - 1
lam = lam_batch[i]
if lam != 1.:
if use_cutmix[i]:
(yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam)
x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
lam_batch[i] = lam
else:
x[i] = x[i] * lam + x_orig[j] * (1 - lam)
return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)

def _mix_pair(self, x):
"""_mix_pair"""
batch_size = len(x)
lam_batch, use_cutmix = self._params_per_elem(batch_size // 2)
x_orig = x.clone() # need to keep an unmodified original for mixing source
for i in range(batch_size // 2):
j = batch_size - i - 1
lam = lam_batch[i]
if lam != 1.:
if use_cutmix[i]:
(yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam)
x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh]
lam_batch[i] = lam
else:
x[i] = x[i] * lam + x_orig[j] * (1 - lam)
x[j] = x[j] * lam + x_orig[i] * (1 - lam)
lam_batch = np.concatenate((lam_batch, lam_batch[::-1]))
return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)

def _mix_batch(self, x):
"""_mix_batch"""
lam, use_cutmix = self._params_per_batch()
if lam == 1.:
return 1.
if use_cutmix:
(yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam)
x[:, :, yl:yh, xl:xh] = np.flip(x, axis=0)[:, :, yl:yh, xl:xh]
else:
x_flipped = np.flip(x, axis=0) * (1. - lam)
x *= lam
x += x_flipped
return lam

def __call__(self, x, target):
"""Mixup apply"""
# the same to image, label
assert len(x) % 2 == 0, 'Batch size should be even when using this'
if self.mode == 'elem':
lam = self._mix_elem(x)
elif self.mode == 'pair':
lam = self._mix_pair(x)
else:
lam = self._mix_batch(x)
target = mixup_target(target, self.num_classes, lam, self.label_smoothing)
return x.astype(np.float32), target.astype(np.float32)

+ 113
- 0
xbm/datasets/augment/random_erasing.py View File

@@ -0,0 +1,113 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" Random Erasing (Cutout)

Originally inspired by impl at https://github.com/zhunzhong07/Random-Erasing, Apache 2.0
Copyright Zhun Zhong & Liang Zheng

Hacked together by / Copyright 2020 Ross Wightman
"""
import math
import random

import numpy as np


def _get_pixels(per_pixel, rand_color, patch_size, dtype=np.float32):
"""_get_pixels"""
if per_pixel:
func = np.random.normal(size=patch_size).astype(dtype)
elif rand_color:
func = np.random.normal(size=(patch_size[0], 1, 1)).astype(dtype)
else:
func = np.zeros((patch_size[0], 1, 1), dtype=dtype)
return func


class RandomErasing:
""" Randomly selects a rectangle region in an image and erases its pixels.
'Random Erasing Data Augmentation' by Zhong et al.
See https://arxiv.org/pdf/1708.04896.pdf

This variant of RandomErasing is intended to be applied to either a batch
or single image tensor after it has been normalized by dataset mean and std.
Args:
probability: Probability that the Random Erasing operation will be performed.
min_area: Minimum percentage of erased area wrt input image area.
max_area: Maximum percentage of erased area wrt input image area.
min_aspect: Minimum aspect ratio of erased area.
mode: pixel color mode, one of 'const', 'rand', or 'pixel'
'const' - erase block is constant color of 0 for all channels
'rand' - erase block is same per-channel random (normal) color
'pixel' - erase block is per-pixel random (normal) color
max_count: maximum number of erasing blocks per image, area per box is scaled by count.
per-image count is randomly chosen between 1 and this value.
"""

def __init__(self, probability=0.5, min_area=0.02, max_area=1 / 3, min_aspect=0.3,
max_aspect=None, mode='const', min_count=1, max_count=None, num_splits=0):
self.probability = probability
self.min_area = min_area
self.max_area = max_area
max_aspect = max_aspect or 1 / min_aspect
self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
self.min_count = min_count
self.max_count = max_count or min_count
self.num_splits = num_splits
mode = mode.lower()
self.rand_color = False
self.per_pixel = False
if mode == 'rand':
self.rand_color = True # per block random normal
elif mode == 'pixel':
self.per_pixel = True # per pixel random normal
else:
assert not mode or mode == 'const'

def _erase(self, img, chan, img_h, img_w, dtype):
"""_erase"""
if random.random() > self.probability:
pass
else:
area = img_h * img_w
count = self.min_count if self.min_count == self.max_count else \
random.randint(self.min_count, self.max_count)
for _ in range(count):
for _ in range(10):
target_area = random.uniform(self.min_area, self.max_area) * area / count
aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio))
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < img_w and h < img_h:
top = random.randint(0, img_h - h)
left = random.randint(0, img_w - w)
img[:, top:top + h, left:left + w] = _get_pixels(
self.per_pixel, self.rand_color, (chan, h, w),
dtype=dtype)
break
return img

def __call__(self, x):
"""RandomErasing apply"""
if len(x.shape) == 3:
output = self._erase(x, *x.shape, x.dtype)
else:
output = np.zeros_like(x)
batch_size, chan, img_h, img_w = x.shape
# skip first slice of batch if num_splits is set (for clean portion of samples)
batch_start = batch_size // self.num_splits if self.num_splits > 1 else 0
for i in range(batch_start, batch_size):
output[i] = self._erase(x[i], chan, img_h, img_w, x.dtype)
return output

BIN
xbm/datasets/data_utils/__init__.py View File


+ 72
- 0
xbm/datasets/data_utils/moxing_adapter.py View File

@@ -0,0 +1,72 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Moxing adapter for ModelArts"""

import os

_global_sync_count = 0


def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)


def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)


def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)


def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id


def sync_data(from_path, to_path, threads=16):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local path
Upload data from local directory to remote obs in contrast.
"""
import moxing as mox
import time
global _global_sync_count
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
_global_sync_count += 1

# Each server contains 8 devices as most.
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print("from path: ", from_path)
print("to path: ", to_path)
mox.file.copy_parallel(from_path, to_path, threads=threads)
print("===finish data synchronization===")
try:
os.mknod(sync_lock)
except IOError:
pass
print("===save flag===")

while True:
if os.path.exists(sync_lock):
break
time.sleep(1)

print("Finish sync data from {} to {}.".format(from_path, to_path))

+ 160
- 0
xbm/datasets/imagenet.py View File

@@ -0,0 +1,160 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Data operations, will be used in train.py and eval.py
"""
import os

import mindspore.common.dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as vision
import mindspore.dataset.vision.py_transforms as py_vision
from mindspore.dataset.vision.utils import Inter

from src.data.augment.auto_augment import pil_interp, rand_augment_transform
from src.data.augment.mixup import Mixup
from src.data.augment.random_erasing import RandomErasing
from .data_utils.moxing_adapter import sync_data


class ImageNet:
"""ImageNet Define"""

def __init__(self, args, training=True):
if args.run_modelarts:
print('Syncing data.')
local_data_path = '/cache/data'
sync_data(args.data_url, local_data_path, threads=128)
print('Create train and evaluate dataset.')
train_dir = os.path.join(local_data_path, "train")
val_ir = os.path.join(local_data_path, "val")
self.train_dataset = create_dataset_imagenet(train_dir, training=True, args=args)
self.val_dataset = create_dataset_imagenet(val_ir, training=False, args=args)
else:
train_dir = os.path.join(args.data_url, "train")
val_ir = os.path.join(args.data_url, "val")
if training:
self.train_dataset = create_dataset_imagenet(train_dir, training=True, args=args)
self.val_dataset = create_dataset_imagenet(val_ir, training=False, args=args)


def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True):
"""
create a train or eval imagenet2012 dataset for TNT

Args:
dataset_dir(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1

Returns:
dataset
"""

device_num, rank_id = _get_rank_info()
shuffle = training
if device_num == 1 or not training:
data_set = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=args.num_parallel_workers,
shuffle=shuffle)
else:
data_set = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=args.num_parallel_workers, shuffle=shuffle,
num_shards=device_num, shard_id=rank_id)

image_size = args.image_size

# define map operations
# BICUBIC: 3

if training:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
aa_params = dict(
translate_const=int(image_size * 0.45),
img_mean=tuple([min(255, round(255 * x)) for x in mean]),
)
interpolation = args.interpolation#bicubic
auto_augment = args.auto_augment#rand-m9-mstd0.5-inc1
assert auto_augment.startswith('rand')
aa_params['interpolation'] = pil_interp(interpolation)

transform_img = [
vision.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(3 / 4, 4 / 3),
interpolation=Inter.PILCUBIC),
vision.RandomHorizontalFlip(prob=0.5),
py_vision.ToPIL()
]
transform_img += [rand_augment_transform(auto_augment, aa_params)]
transform_img += [
py_vision.ToTensor(),
py_vision.Normalize(mean=mean, std=std),
RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count)
]
else:
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
# test transform complete
transform_img = [
vision.Decode(),
vision.Resize(int(256 / 224 * image_size), interpolation=Inter.PILCUBIC),
vision.CenterCrop(image_size),
vision.Normalize(mean=mean, std=std),
vision.HWC2CHW()
]

transform_label = C.TypeCast(mstype.int32)

data_set = data_set.map(input_columns="image", num_parallel_workers=args.num_parallel_workers,
operations=transform_img)
data_set = data_set.map(input_columns="label", num_parallel_workers=args.num_parallel_workers,
operations=transform_label)
if (args.mix_up > 0. or args.cutmix > 0.) and not training:
# if use mixup and not training(False), one hot val data label
one_hot = C.OneHot(num_classes=args.num_classes)
data_set = data_set.map(input_columns="label", num_parallel_workers=args.num_parallel_workers,
operations=one_hot)
# apply batch operations
data_set = data_set.batch(args.batch_size, drop_remainder=True,
num_parallel_workers=args.num_parallel_workers)

if (args.mix_up > 0. or args.cutmix > 0.) and training:
mixup_fn = Mixup(
mixup_alpha=args.mix_up, cutmix_alpha=args.cutmix, cutmix_minmax=None,
prob=args.mixup_prob, switch_prob=args.switch_prob, mode=args.mixup_mode,
label_smoothing=args.label_smoothing, num_classes=args.num_classes)

data_set = data_set.map(operations=mixup_fn, input_columns=["image", "label"],
num_parallel_workers=args.num_parallel_workers)

# apply dataset repeat operation
data_set = data_set.repeat(repeat_num)

return data_set


def _get_rank_info():
"""
get rank size and rank id
"""
rank_size = int(os.environ.get("RANK_SIZE", 1))

if rank_size > 1:
from mindspore.communication.management import get_rank, get_group_size
rank_size = get_group_size()
rank_id = get_rank()
else:
rank_size = rank_id = None

return rank_size, rank_id

Loading…
Cancel
Save
Baidu
map