I want to replace mx.symbol.SoftmaxOutput with the weighted version (assign different weight respect to label's frequency in the whole dataset)
The original function works well like below:
cls_prob = mx.symbol.SoftmaxOutput(data=data, label=label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, name='cls_prob')
The current code I wrote as below. The code can run without errors, but the loss quickly explode to nan. I am dealing with detection problem, RCNNL1 loss with quickly become nan when I use my code as CustomOp. Another thing is that I have to ignore label -1 and I am not sure how to do it properly. Any help will be greatly appreciated.
import mxnet as mx import numpy as np class WeightedSoftmaxCrossEntropyLoss(mx.operator.CustomOp): def __init__(self, num_class): self.num_class = int(num_class) def forward(self, is_train, req, in_data, out_data, aux): data = in_data[0] label = in_data[1] pred = mx.nd.SoftmaxOutput(data, label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, name='rcnn_cls_prob') self.assign(out_data[0], req[0], pred) def backward(self, req, out_grad, in_data, out_data, in_grad, aux): cls_weight = np.array([ 0.002852781814876101, 0.30715984513157385, 1.0932468996115976, 1.1598757152765971, 0.20739109264009636, 1.1984256112776808, 0.18746186040248036, 2.9009928470737023, 0.92140970338602113, 1.200317380251021 ]) label = in_data[1] pred = out_data[0] label = label.asnumpy().astype('int32').reshape((-1)) pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) pred = pred.reshape((label.shape[0], -1)) # Need to ignore label (how) out_inds = np.where(label == -1)[0] #label = label[keep_inds] one_hot = np.zeros((label.shape[0], self.num_class)) one_hot[np.arange(label.shape[0]), label] = 1 # gradient dx = pred - one_hot #dx[out_inds] = 0.0 weighted_dx = cls_weight * dx / 4 self.assign(in_grad[0], req[0], weighted_dx) @mx.operator.register("weighted_softmax_ce_loss") class WeightedSoftmaxCrossEntropyLossProp(mx.operator.CustomOpProp): def __init__(self, num_class): super(WeightedSoftmaxCrossEntropyLossProp, self).__init__(need_top_grad=False) self.num_class = num_class def list_arguments(self): return ['data', 'label'] def list_outputs(self): return ['output'] def infer_shape(self, in_shapes): data_shape = in_shapes[0] label_shape = (in_shapes[0][0],) output_shape = in_shapes[0] return [data_shape, label_shape], [output_shape], [] def create_operator(self, ctx, in_shapes, in_dtypes): # create and return the CustomOp class. `enter code here`return WeightedSoftmaxCrossEntropyLoss(self.num_class)