Hi,

Thank you very much for your interest in our work!

We didn't implement anything special. Here are the links to the codes, that we compiled together.

- Bilinear model: https://github.com/gy20073/compact_bilinear_pooling
- C3D: https://github.com/chuckcho/video-caffe, and Also take a look at C3Dv1.1 from Du Tran, pre-trained ResNet model is available.
- 2D Inception: https://github.com/yjxiong/temporal-segment-networks/tree/master/models/hmdb51
- We changed the inception_4e layer, such that we have 14X14X1024 Conv, instead of 7X7X1024. Below is attached the modified codes.

Compile them together and you're good to use bilinear models with Inception/C3D. Other implementation details for training the network are provided in the paper. Good luck!

best, Vivek​


############################################################### inception_4e #######################################
layer { name: "inception_4e/3x3_reduce" type: "Convolution" bottom: "inception_4d/output" top: "inception_4e/3x3_reduce"
  param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
  convolution_param { num_output: 128 kernel_size: 1
    weight_filler { type: "xavier"}
    bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_4e/3x3_reduce_bn" type: "BN" bottom: "inception_4e/3x3_reduce" top: "inception_4e/3x3_reduce_bn"
  param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
  bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_4e/relu_3x3_reduce" type: "ReLU" bottom: "inception_4e/3x3_reduce_bn" top: "inception_4e/3x3_reduce_bn" }
layer { name: "inception_4e/3x3" type: "Convolution" bottom: "inception_4e/3x3_reduce_bn" top: "inception_4e/3x3"
  param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
  convolution_param { num_output: 192 pad: 1 kernel_size: 3 stride: 1
    weight_filler { type: "xavier" }
    bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_4e/3x3_bn" type: "BN" bottom: "inception_4e/3x3" top: "inception_4e/3x3_bn"
  param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
  bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_4e/relu_3x3" type: "ReLU" bottom: "inception_4e/3x3_bn" top: "inception_4e/3x3_bn" }
layer { name: "inception_4e/double_3x3_reduce" type: "Convolution" bottom: "inception_4d/output" top: "inception_4e/double_3x3_reduce"
  param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
  convolution_param { num_output: 192 kernel_size: 1
    weight_filler { type: "xavier" }
    bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_4e/double_3x3_reduce_bn" type: "BN" bottom: "inception_4e/double_3x3_reduce" top: "inception_4e/double_3x3_reduce_bn"
  param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
  bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_4e/relu_double_3x3_reduce" type: "ReLU" bottom: "inception_4e/double_3x3_reduce_bn" top: "inception_4e/double_3x3_reduce_bn" }
layer { name: "inception_4e/double_3x3_1" type: "Convolution" bottom: "inception_4e/double_3x3_reduce_bn" top: "inception_4e/double_3x3_1"
  param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
  convolution_param { num_output: 256 pad: 1 kernel_size: 3
    weight_filler { type: "xavier"}
    bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_4e/double_3x3_1_bn" type: "BN" bottom: "inception_4e/double_3x3_1" top: "inception_4e/double_3x3_1_bn"
  param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
  bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_4e/relu_double_3x3_1" type: "ReLU" bottom: "inception_4e/double_3x3_1_bn" top: "inception_4e/double_3x3_1_bn" }
layer { name: "inception_4e/double_3x3_2" type: "Convolution" bottom: "inception_4e/double_3x3_1_bn" top: "inception_4e/double_3x3_2"
  param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
  convolution_param { num_output: 256 pad: 1 kernel_size: 3 stride: 1
    weight_filler { type: "xavier"}
    bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_4e/double_3x3_2_bn" type: "BN" bottom: "inception_4e/double_3x3_2" top: "inception_4e/double_3x3_2_bn"
  param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
  bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_4e/relu_double_3x3_2" type: "ReLU" bottom: "inception_4e/double_3x3_2_bn" top: "inception_4e/double_3x3_2_bn" }
layer { name: "inception_4e/pool" type: "Pooling" bottom: "inception_4d/output" top: "inception_4e/pool"
  pooling_param { pool: MAX kernel_size: 3 pad: 1 stride: 1 } }
layer { name: "inception_4e/output" type: "Concat"
  bottom: "inception_4e/3x3_bn"
  bottom: "inception_4e/double_3x3_2_bn"
  bottom: "inception_4e/pool"
  top: "inception_4e/output" }