How to train ResNet101 model from scratch in Caffe?

问题

I'm using the DeepLab_v2 version of Caffe in order to do semantic segmentation. I can finetune the ResNet101 using imagenet model, but I cannot train the model from scratch using custom data. Did anyone have similar experience and managed to solve this issue?

This is how a functional block of the ResNet looks like, that I'm currently using for training:

layer {
    bottom: "data"
    top: "conv1"
    name: "conv1"
    type: "Convolution"
    param {
           name: "conv1_0"
               lr_mult: 1
               decay_mult: 1
        }
        convolution_param {
        num_output: 64
        kernel_size: 3
        pad: 1
        stride: 2
        bias_term: false
        weight_filler {
          type: "msra"

        }
    }
}

layer {
    bottom: "conv1"
    top: "conv1"
    name: "bn_conv1"
    type: "BatchNorm"
    batch_norm_param {
        use_global_stats: true
    }
    param {
        name: "bn_conv1_0"
          lr_mult: 0
    }
    param {
        name: "bn_conv1_1"
          lr_mult: 0
        }
    param {
        name: "bn_conv1_2"
          lr_mult: 0
        }
}

layer {
    bottom: "conv1"
    top: "conv1"
    name: "scale_conv1"
    type: "Scale"
    scale_param {
        bias_term: true
        filler {
          value: 0.5    
        }
        bias_filler {
          value: -2
        }
    }
    param {
        name: "scale_conv1_0"
          lr_mult: 0
    }
    param {
        name: "scale_conv1_1"
          lr_mult: 0
    }
}

layer {
    top: "conv1"
    bottom: "conv1"
    name: "conv1_relu"
    type: "ReLU"
}

I tried all kinds of variations including use_global_stats: false. I am able to train one single block of the type above, but when I try to use all 101 layers, the model does not converge anymore. Any ideas?

来源：https://stackoverflow.com/questions/41595991/how-to-train-resnet101-model-from-scratch-in-caffe

标签

caffe