|
| 1 | +## @package conv |
| 2 | +# Module caffe2.python.layers.conv |
| 3 | +from __future__ import absolute_import |
| 4 | +from __future__ import division |
| 5 | +from __future__ import print_function |
| 6 | +from __future__ import unicode_literals |
| 7 | + |
| 8 | +from caffe2.python import schema |
| 9 | +from caffe2.python.layers.layers import ( |
| 10 | + ModelLayer, |
| 11 | +) |
| 12 | +import numpy as np |
| 13 | + |
| 14 | + |
| 15 | +class Conv(ModelLayer): |
| 16 | + """ |
| 17 | + Convolutional layer |
| 18 | + Input: |
| 19 | + - input_record: at least has the shape info of C (num_channels) |
| 20 | + - output_dim: number of convolutional filters |
| 21 | + - kernel_h, kernel_w: kernel size for h and w |
| 22 | + - stride_h, stride_w: stride for h and w |
| 23 | + - pad_b, pad_l, pad_r, pad_t: padding sizes, if stride == 1, |
| 24 | + 'None' value will do auto padding |
| 25 | + - order: either 'NHWC' or 'NCHW' |
| 26 | + """ |
| 27 | + |
| 28 | + def __init__(self, model, input_record, output_dim, kernel_h, kernel_w, |
| 29 | + stride_h, stride_w, pad_b=None, pad_l=None, pad_r=None, |
| 30 | + pad_t=None, order='NHWC', kernel_init=None, bias_init=None, |
| 31 | + kernel_optim=None, bias_optim=None, |
| 32 | + name='conv', **kwargs): |
| 33 | + |
| 34 | + super(Conv, self).__init__(model, name, input_record, **kwargs) |
| 35 | + assert isinstance(input_record, schema.Scalar), "Incorrect input type" |
| 36 | + # input num_channels (C) is needed |
| 37 | + input_dims = input_record.field_type().shape |
| 38 | + |
| 39 | + assert (kernel_h > 0 and isinstance(kernel_h, int)), ( |
| 40 | + "kernel_h should be positive integer") |
| 41 | + assert (kernel_w > 0 and isinstance(kernel_w, int)), ( |
| 42 | + "kernel_w should be positive integer") |
| 43 | + self.kernel_h = kernel_h |
| 44 | + self.kernel_w = kernel_w |
| 45 | + |
| 46 | + assert (stride_h > 0 and isinstance(stride_h, int)), ( |
| 47 | + "stride_h should be positive integer") |
| 48 | + assert (stride_w > 0 and isinstance(stride_w, int)), ( |
| 49 | + "stride_w should be positive integer") |
| 50 | + self.stride_h = stride_h |
| 51 | + self.stride_w = stride_w |
| 52 | + |
| 53 | + # output_dim calculation (http://cs231n.github.io/convolutional-networks/) |
| 54 | + # output_dim_w = (input_dim_w - kernel_w + pad_r + pad_l) / stride_w + 1 |
| 55 | + # so, do auto_padding requires |
| 56 | + # pad_r, pad_l = [(input_dim_w - 1) * stride_w - input_dim_w + kernel_w] / 2 |
| 57 | + # similair for pad_t and pad_b to auto pad kernel_h |
| 58 | + # here we only do auto padding for stride = 1 case |
| 59 | + if stride_h == 1: |
| 60 | + pad_t = int((kernel_h - 1) / 2) if pad_t is None else pad_t |
| 61 | + pad_b = int((kernel_h - 1) / 2) if pad_b is None else pad_b |
| 62 | + else: |
| 63 | + pad_t = 0 if pad_t is None else pad_t |
| 64 | + pad_b = 0 if pad_b is None else pad_b |
| 65 | + |
| 66 | + if stride_w == 1: |
| 67 | + pad_r = int((kernel_w - 1) / 2) if pad_r is None else pad_r |
| 68 | + pad_l = int((kernel_w - 1) / 2) if pad_l is None else pad_l |
| 69 | + else: |
| 70 | + pad_r = 0 if pad_r is None else pad_r |
| 71 | + pad_l = 0 if pad_l is None else pad_l |
| 72 | + |
| 73 | + assert (pad_t >= 0 and isinstance(pad_t, int)), "pad_t should be int >= 0" |
| 74 | + assert (pad_b >= 0 and isinstance(pad_b, int)), "pad_b should be int >= 0" |
| 75 | + assert (pad_r >= 0 and isinstance(pad_r, int)), "pad_r should be int >= 0" |
| 76 | + assert (pad_l >= 0 and isinstance(pad_l, int)), "pad_l should be int >= 0" |
| 77 | + self.pad_t = pad_t |
| 78 | + self.pad_b = pad_b |
| 79 | + self.pad_r = pad_r |
| 80 | + self.pad_l = pad_l |
| 81 | + |
| 82 | + assert order in ['NHWC', 'NCHW'], "order should either 'NHWC' or 'NCHW'" |
| 83 | + self.order = order |
| 84 | + |
| 85 | + if order == 'NHWC': |
| 86 | + input_c = input_dims[-1] |
| 87 | + kernel_shape = [output_dim, kernel_h, kernel_w, input_c] |
| 88 | + elif order == 'NCHW': |
| 89 | + input_c = input_dims[0] |
| 90 | + kernel_shape = [output_dim, input_c, kernel_h, kernel_w] |
| 91 | + assert input_c > 0, ( |
| 92 | + "Number of input channels in conv parameters should be positive") |
| 93 | + |
| 94 | + kernel_init = kernel_init if kernel_init else ( |
| 95 | + 'XavierFill', {} |
| 96 | + ) |
| 97 | + bias_init = bias_init if bias_init else ( |
| 98 | + 'ConstantFill', {'value': 0.0} |
| 99 | + ) |
| 100 | + |
| 101 | + self.kernel = self.create_param( |
| 102 | + param_name='conv_kernel', |
| 103 | + shape=kernel_shape, |
| 104 | + initializer=kernel_init, |
| 105 | + optimizer=kernel_optim, |
| 106 | + ) |
| 107 | + |
| 108 | + self.bias = self.create_param( |
| 109 | + param_name='conv_bias', |
| 110 | + shape=[output_dim], |
| 111 | + initializer=bias_init, |
| 112 | + optimizer=bias_optim, |
| 113 | + ) |
| 114 | + |
| 115 | + # the output_schema only has the num of output channels |
| 116 | + # output_h and output_w would be inferred internally |
| 117 | + self.output_schema = schema.Scalar( |
| 118 | + (np.float32, (output_dim,)), |
| 119 | + self.get_next_blob_reference('output') |
| 120 | + ) |
| 121 | + |
| 122 | + def add_ops(self, net): |
| 123 | + net.Conv( |
| 124 | + self.input_record.field_blobs() + [self.kernel, self.bias], |
| 125 | + self.output_schema.field_blobs(), |
| 126 | + kernel_h=self.kernel_h, |
| 127 | + kernel_w=self.kernel_w, |
| 128 | + stride_h=self.stride_h, |
| 129 | + stride_w=self.stride_w, |
| 130 | + pad_t=self.pad_t, |
| 131 | + pad_l=self.pad_l, |
| 132 | + pad_b=self.pad_b, |
| 133 | + pad_r=self.pad_r, |
| 134 | + order=self.order |
| 135 | + ) |
0 commit comments