Meta Byte Track
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

darknet.py 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4. from torch import nn
  5. from .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck
  6. class Darknet(nn.Module):
  7. # number of blocks from dark2 to dark5.
  8. depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]}
  9. def __init__(
  10. self,
  11. depth,
  12. in_channels=3,
  13. stem_out_channels=32,
  14. out_features=("dark3", "dark4", "dark5"),
  15. ):
  16. """
  17. Args:
  18. depth (int): depth of darknet used in model, usually use [21, 53] for this param.
  19. in_channels (int): number of input channels, for example, use 3 for RGB image.
  20. stem_out_channels (int): number of output chanels of darknet stem.
  21. It decides channels of darknet layer2 to layer5.
  22. out_features (Tuple[str]): desired output layer name.
  23. """
  24. super().__init__()
  25. assert out_features, "please provide output features of Darknet"
  26. self.out_features = out_features
  27. self.stem = nn.Sequential(
  28. BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act="lrelu"),
  29. *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2),
  30. )
  31. in_channels = stem_out_channels * 2 # 64
  32. num_blocks = Darknet.depth2blocks[depth]
  33. # create darknet with `stem_out_channels` and `num_blocks` layers.
  34. # to make model structure more clear, we don't use `for` statement in python.
  35. self.dark2 = nn.Sequential(
  36. *self.make_group_layer(in_channels, num_blocks[0], stride=2)
  37. )
  38. in_channels *= 2 # 128
  39. self.dark3 = nn.Sequential(
  40. *self.make_group_layer(in_channels, num_blocks[1], stride=2)
  41. )
  42. in_channels *= 2 # 256
  43. self.dark4 = nn.Sequential(
  44. *self.make_group_layer(in_channels, num_blocks[2], stride=2)
  45. )
  46. in_channels *= 2 # 512
  47. self.dark5 = nn.Sequential(
  48. *self.make_group_layer(in_channels, num_blocks[3], stride=2),
  49. *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2),
  50. )
  51. def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1):
  52. "starts with conv layer then has `num_blocks` `ResLayer`"
  53. return [
  54. BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act="lrelu"),
  55. *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)],
  56. ]
  57. def make_spp_block(self, filters_list, in_filters):
  58. m = nn.Sequential(
  59. *[
  60. BaseConv(in_filters, filters_list[0], 1, stride=1, act="lrelu"),
  61. BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"),
  62. SPPBottleneck(
  63. in_channels=filters_list[1],
  64. out_channels=filters_list[0],
  65. activation="lrelu",
  66. ),
  67. BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"),
  68. BaseConv(filters_list[1], filters_list[0], 1, stride=1, act="lrelu"),
  69. ]
  70. )
  71. return m
  72. def forward(self, x):
  73. outputs = {}
  74. x = self.stem(x)
  75. outputs["stem"] = x
  76. x = self.dark2(x)
  77. outputs["dark2"] = x
  78. x = self.dark3(x)
  79. outputs["dark3"] = x
  80. x = self.dark4(x)
  81. outputs["dark4"] = x
  82. x = self.dark5(x)
  83. outputs["dark5"] = x
  84. return {k: v for k, v in outputs.items() if k in self.out_features}
  85. class CSPDarknet(nn.Module):
  86. def __init__(
  87. self,
  88. dep_mul,
  89. wid_mul,
  90. out_features=("dark3", "dark4", "dark5"),
  91. depthwise=False,
  92. act="silu",
  93. ):
  94. super().__init__()
  95. assert out_features, "please provide output features of Darknet"
  96. self.out_features = out_features
  97. Conv = DWConv if depthwise else BaseConv
  98. base_channels = int(wid_mul * 64) # 64
  99. base_depth = max(round(dep_mul * 3), 1) # 3
  100. # stem
  101. self.stem = Focus(3, base_channels, ksize=3, act=act)
  102. # dark2
  103. self.dark2 = nn.Sequential(
  104. Conv(base_channels, base_channels * 2, 3, 2, act=act),
  105. CSPLayer(
  106. base_channels * 2,
  107. base_channels * 2,
  108. n=base_depth,
  109. depthwise=depthwise,
  110. act=act,
  111. ),
  112. )
  113. # dark3
  114. self.dark3 = nn.Sequential(
  115. Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
  116. CSPLayer(
  117. base_channels * 4,
  118. base_channels * 4,
  119. n=base_depth * 3,
  120. depthwise=depthwise,
  121. act=act,
  122. ),
  123. )
  124. # dark4
  125. self.dark4 = nn.Sequential(
  126. Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
  127. CSPLayer(
  128. base_channels * 8,
  129. base_channels * 8,
  130. n=base_depth * 3,
  131. depthwise=depthwise,
  132. act=act,
  133. ),
  134. )
  135. # dark5
  136. self.dark5 = nn.Sequential(
  137. Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
  138. SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
  139. CSPLayer(
  140. base_channels * 16,
  141. base_channels * 16,
  142. n=base_depth,
  143. shortcut=False,
  144. depthwise=depthwise,
  145. act=act,
  146. ),
  147. )
  148. def forward(self, x):
  149. outputs = {}
  150. x = self.stem(x)
  151. outputs["stem"] = x
  152. x = self.dark2(x)
  153. outputs["dark2"] = x
  154. x = self.dark3(x)
  155. outputs["dark3"] = x
  156. x = self.dark4(x)
  157. outputs["dark4"] = x
  158. x = self.dark5(x)
  159. outputs["dark5"] = x
  160. return {k: v for k, v in outputs.items() if k in self.out_features}