6
6
import torch .nn as nn
7
7
import torch .nn .functional as F
8
8
9
- from pytorch_toolbelt .modules .dropblock import DropBlockScheduled , DropBlock2D
10
- from pytorch_toolbelt .modules import Identity
11
-
12
-
13
- def swish (x ):
14
- return x * x .sigmoid ()
15
-
16
-
17
- def hard_sigmoid (x , inplace = False ):
18
- return F .relu6 (x + 3 , inplace ) / 6
19
-
20
-
21
- def hard_swish (x , inplace = False ):
22
- return x * hard_sigmoid (x , inplace )
23
-
24
-
25
- class HardSigmoid (nn .Module ):
26
- def __init__ (self , inplace = False ):
27
- super (HardSigmoid , self ).__init__ ()
28
- self .inplace = inplace
29
-
30
- def forward (self , x ):
31
- return hard_sigmoid (x , inplace = self .inplace )
32
-
33
-
34
- class HardSwish (nn .Module ):
35
- def __init__ (self , inplace = False ):
36
- super (HardSwish , self ).__init__ ()
37
- self .inplace = inplace
38
-
39
- def forward (self , x ):
40
- return hard_swish (x , inplace = self .inplace )
9
+ # from pytorch_toolbelt.modules.dropblock import DropBlockScheduled, DropBlock2D
10
+ from pytorch_toolbelt .modules .activations import HardSwish , HardSigmoid
11
+ from pytorch_toolbelt .modules .identity import Identity
41
12
42
13
43
14
def _make_divisible (v , divisor , min_value = None ):
44
15
"""
45
16
Ensure that all layers have a channel number that is divisible by 8
17
+
46
18
It can be seen here:
47
19
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
48
20
:param v:
@@ -59,9 +31,9 @@ def _make_divisible(v, divisor, min_value=None):
59
31
return new_v
60
32
61
33
62
- # https://github.com/jonnedtc/Squeeze-Excitation-PyTorch/blob/master/networks.py
63
34
class SqEx (nn .Module ):
64
- """Squeeze-Excitation block, implemented in ONNX & CoreML friendly way
35
+ """Squeeze-Excitation block. Implemented in ONNX & CoreML friendly way.
36
+ Original implementation: https://github.com/jonnedtc/Squeeze-Excitation-PyTorch/blob/master/networks.py
65
37
"""
66
38
67
39
def __init__ (self , n_features , reduction = 4 ):
@@ -89,24 +61,26 @@ def __init__(self, inplanes, outplanes, expplanes, k=3, stride=1, drop_prob=0, n
89
61
super (LinearBottleneck , self ).__init__ ()
90
62
self .conv1 = nn .Conv2d (inplanes , expplanes , kernel_size = 1 , bias = False )
91
63
self .bn1 = nn .BatchNorm2d (expplanes )
92
- self .db1 = DropBlockScheduled (DropBlock2D (drop_prob = drop_prob , block_size = 7 ), start_value = 0. ,
93
- stop_value = drop_prob , nr_steps = num_steps , start_step = start_step )
94
- # TODO: first doesn't have act?
64
+ self .db1 = nn .Dropout2d (drop_prob )
65
+ # self.db1 = DropBlockScheduled(DropBlock2D(drop_prob=drop_prob, block_size=7), start_value=0.,
66
+ # stop_value=drop_prob, nr_steps=num_steps, start_step=start_step)
67
+ self .act1 = activation (** act_params ) # first does have act according to MobileNetV2
95
68
96
69
self .conv2 = nn .Conv2d (expplanes , expplanes , kernel_size = k , stride = stride , padding = k // 2 , bias = False ,
97
70
groups = expplanes )
98
71
self .bn2 = nn .BatchNorm2d (expplanes )
99
- self .db2 = DropBlockScheduled (DropBlock2D (drop_prob = drop_prob , block_size = 7 ), start_value = 0. ,
100
- stop_value = drop_prob , nr_steps = num_steps , start_step = start_step )
72
+ self .db2 = nn .Dropout2d (drop_prob )
73
+ # self.db2 = DropBlockScheduled(DropBlock2D(drop_prob=drop_prob, block_size=7), start_value=0.,
74
+ # stop_value=drop_prob, nr_steps=num_steps, start_step=start_step)
101
75
self .act2 = activation (** act_params )
102
76
103
77
self .se = SqEx (expplanes ) if SE else Identity ()
104
78
105
79
self .conv3 = nn .Conv2d (expplanes , outplanes , kernel_size = 1 , bias = False )
106
80
self .bn3 = nn .BatchNorm2d (outplanes )
107
- self .db3 = DropBlockScheduled ( DropBlock2D ( drop_prob = drop_prob , block_size = 7 ), start_value = 0. ,
108
- stop_value = drop_prob , nr_steps = num_steps , start_step = start_step )
109
- self . act3 = activation ( ** act_params )
81
+ self .db3 = nn . Dropout2d ( drop_prob )
82
+ # self.db3 = DropBlockScheduled(DropBlock2D(drop_prob =drop_prob, block_size=7), start_value=0.,
83
+ # stop_value=drop_prob, nr_steps=num_steps, start_step=start_step )
110
84
111
85
self .stride = stride
112
86
self .expplanes = expplanes
@@ -119,6 +93,7 @@ def forward(self, x):
119
93
out = self .conv1 (x )
120
94
out = self .bn1 (out )
121
95
out = self .db1 (out )
96
+ out = self .act1 (out )
122
97
123
98
out = self .conv2 (out )
124
99
out = self .bn2 (out )
@@ -130,10 +105,9 @@ def forward(self, x):
130
105
out = self .conv3 (out )
131
106
out = self .bn3 (out )
132
107
out = self .db3 (out )
133
- out = self .act3 (out )
134
108
135
109
if self .stride == 1 and self .inplanes == self .outplanes : # TODO: or add 1x1?
136
- out = out + residual # No inplace if there is in-place activation before
110
+ out += residual # No inplace if there is in-place activation before
137
111
138
112
return out
139
113
@@ -187,7 +161,6 @@ def __init__(self, inplanes, num_classes, expplanes1, expplanes2):
187
161
self .avgpool = nn .AdaptiveAvgPool2d (1 )
188
162
189
163
self .conv2 = nn .Conv2d (expplanes1 , expplanes2 , kernel_size = 1 , stride = 1 , bias = False )
190
- self .bn2 = nn .BatchNorm2d (expplanes2 )
191
164
self .act2 = HardSwish (inplace = True )
192
165
193
166
self .dropout = nn .Dropout (p = 0.2 , inplace = True )
@@ -207,7 +180,6 @@ def forward(self, x):
207
180
out = self .avgpool (out )
208
181
209
182
out = self .conv2 (out )
210
- out = self .bn2 (out )
211
183
out = self .act2 (out )
212
184
213
185
# flatten for input to fully-connected layer
@@ -246,16 +218,16 @@ def __init__(self, num_classes=1000, scale=1., in_channels=3, drop_prob=0.0, num
246
218
[80 , 184 , 80 , 1 , 3 , drop_prob , False , HardSwish ], # -> 14x14
247
219
[80 , 480 , 112 , 1 , 3 , drop_prob , True , HardSwish ], # -> 14x14
248
220
[112 , 672 , 112 , 1 , 3 , drop_prob , True , HardSwish ], # -> 14x14
249
- [112 , 672 , 160 , 1 , 5 , drop_prob , True , HardSwish ], # -> 14x14
250
- [160 , 672 , 160 , 2 , 5 , drop_prob , True , HardSwish ], # -> 7x7 #TODO
221
+ [112 , 672 , 160 , 2 , 5 , drop_prob , True , HardSwish ], # -> 7x7
222
+ [160 , 672 , 160 , 1 , 5 , drop_prob , True , HardSwish ], # -> 7x7
251
223
[160 , 960 , 160 , 1 , 5 , drop_prob , True , HardSwish ], # -> 7x7
252
224
]
253
225
self .bottlenecks_setting_small = [
254
226
# in, exp, out, s, k, dp, se, act
255
- [16 , 64 , 24 , 2 , 3 , 0 , True , nn .ReLU ], # -> 56x56 #TODO
256
- [24 , 72 , 24 , 2 , 3 , 0 , False , nn .ReLU ], # -> 28x28
257
- [24 , 88 , 40 , 1 , 3 , 0 , False , nn .ReLU ], # -> 28x28
258
- [40 , 96 , 40 , 2 , 5 , 0 , True , HardSwish ], # -> 14x14 #TODO
227
+ [16 , 64 , 16 , 2 , 3 , 0 , True , nn .ReLU ], # -> 56x56
228
+ [16 , 72 , 24 , 2 , 3 , 0 , False , nn .ReLU ], # -> 28x28
229
+ [24 , 88 , 24 , 1 , 3 , 0 , False , nn .ReLU ], # -> 28x28
230
+ [24 , 96 , 40 , 2 , 5 , 0 , True , HardSwish ], # -> 14x14
259
231
[40 , 240 , 40 , 1 , 5 , drop_prob , True , HardSwish ], # -> 14x14
260
232
[40 , 240 , 40 , 1 , 5 , drop_prob , True , HardSwish ], # -> 14x14
261
233
[40 , 120 , 48 , 1 , 5 , drop_prob , True , HardSwish ], # -> 14x14
@@ -290,7 +262,6 @@ def __init__(self, num_classes=1000, scale=1., in_channels=3, drop_prob=0.0, num
290
262
291
263
def _make_bottlenecks (self ):
292
264
layers = []
293
-
294
265
modules = OrderedDict ()
295
266
stage_name = "Bottleneck"
296
267
0 commit comments