PytorchでSSDのあれこれ
SSD512に対応している。クラス数を変えてもOK。オリジナルのamdegroot/ssd.pytorch
はSSD512には対応していないので、そのあたりが優れてる。
midasklr/SSD.Pytorch
のコードはamdegroot/ssd.pytorch
をコピペして改変したようなのになっている。./data/scripts
下のシェルの使い方はオリジナルのREADMEに書いてある。また、midasklr/SSD.Pytorch
のデータセットを置いている場所が、違っているので少々コードを触らないと動かないコードがある。vgg16をefficient-netに変更するコードもあるけど、動かし方は良くわからない。
オリジナルのnet.vgg
のところがnet.base
に変わっている関係で学習済みモデル
https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth
が、そのままだとロードできない。オリジナルをssd300_mAP_77.43_v2_origin.pth
として保存して、以下のコードでmidasklr/SSD.Pytorch
でロードできるようになる(vgg
で始まるキーをbase
で始まるキーで書き直して保存し直しているだけ)。
from collections import OrderedDict
import torch
if torch.cuda.is_available():
torch.set_default_tensor_type('torch.cuda.FloatTensor')
from ssd import build_ssd
new_weights = OrderedDict()
weight = torch.load('./weights/ssd300_mAP_77.43_v2_origin.pth')
for (key, value) in weight.items():
if key.startswith('vgg'):
parts = key.split('.')
parts[0] = 'base'
key = '.'.join(parts)
new_weights[key] = value
net = build_ssd('test', 300, 21)
net.load_state_dict(new_weights)
torch.save(net.state_dict(), './weights/ssd300_mAP_77.43_v2.pth')
pytorch 1.7.1にすると、2箇所でエラーが発生する
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)
RuntimeError: index_select(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.
次の修正で動くようになる。
@@ -21,7 +21,7 @@ class Detect(Function):
self.conf_thresh = conf_thresh
self.variance = cfg['SSD{}'.format(size)]['variance']
- def forward(self, loc_data, conf_data, prior_data):
+ def __call__(self, loc_data, conf_data, prior_data):
"""
Args:
loc_data: (tensor) Loc preds from loc layers
@@ -98,12 +98,13 @@ class SSD(nn.Module):
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if self.phase == "test":
- output = self.detect(
- loc.view(loc.size(0), -1, 4), # loc preds
- self.softmax(conf.view(conf.size(0), -1,
- self.num_classes)), # conf preds
- self.priors.type(type(x.data)) # default boxes
- )
+ with torch.no_grad():
+ output = self.detect(
+ loc.view(loc.size(0), -1, 4), # loc preds
+ self.softmax(conf.view(conf.size(0), -1,
+ self.num_classes)), # conf preds
+ self.priors.type(type(x.data)) # default boxes
+ )
else:
output = (
loc.view(loc.size(0), -1, 4),
https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
が使えなくなっていて、cocoデータセットで学習できない。
次のリンクを使えば良いらしい。
https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0
参考
ここを見るとvalからminivalを引いたものみたい。35000程度だから数的には35kだけど違うのかな?文章的には118287個の方をtrainval35kと読んでるっぽい。
とりあえず、こんな感じに修正
@@ -22,7 +22,7 @@ if [ -z "$1" ]
echo "navigating to " $1 " ..."
cd $1
fi
-
+cp ~/note/data/coco_labels.txt ~/data/coco/
if [ ! -d images ]
then
mkdir -p ./images
@@ -64,7 +64,10 @@ echo "Creating trainval35k dataset..."
# Download annotations json
echo "Downloading trainval35k annotations from S3"
-curl -LO https://s3.amazonaws.com/amdegroot-datasets/instances_trainval35k.json.zip
+curl -L https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0 -o instances_valminusminival2014.json.zip
+unzip instances_valminusminival2014.json.zip
+mv instances_valminusminival2014.json instances_trainval35k.json
+rm instances_valminusminival2014.json.zip
vocの学習
python3 train.py
cocoの学習
python3 train.py --dataset COCO --num_class 81 --dataset_root=/home/{user}/data/coco
学習時に次のような警告が出る
VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
dtypeを指定して直す
@@ -219,7 +219,7 @@ class RandomSampleCrop(object):
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
- self.sample_options = (
+ self.sample_options = np.array((
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
@@ -229,7 +229,7 @@ class RandomSampleCrop(object):
(0.9, None),
# randomly sample a patch
(None, None),
- )
+ ), dtype=object)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
pylintがおかしいので、これを試してみる
scriptモジュールを保存するコード。実行してTrue
が返ってきたら、(少なくともその入力に対しては)元のモデルと同等ということ。
import torch
from ssd import build_ssd
net = build_ssd('test', 300, 21)
net_weights = torch.load('./weights/ssd300_mAP_77.43_v2.pth',
map_location={'cuda:0': 'cpu'})
net.load_state_dict(net_weights)
net_scripted = torch.jit.script(net)
dummy_input = torch.rand(1, 3, 300, 300)
a, _, _ = net(dummy_input)
b, _, _ = net_scripted(dummy_input)
print(torch.all(a == b).item())
net_scripted.save('./weights/ssd300_voc.pt')
型宣言を追加。torchscriptを使用するとき、これはintでないと駄目ですとか、いろいろ出るので修正。nmsはいろいろ書き換えないとエラーが出るので掲載。
@@ -172,7 +173,7 @@ def log_sum_exp(x):
# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
-def nms(boxes, scores, overlap=0.5, top_k=200):
+def nms(boxes: torch.Tensor, scores: torch.Tensor, overlap: float =0.5, top_k: int =200) -> Tuple[torch.Tensor, torch.Tensor, int]:
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
@@ -184,9 +185,9 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
The indices of the kept boxes with respect to num_priors.
"""
- keep = scores.new(scores.size(0)).zero_().long()
+ keep = scores.new_zeros(scores.size(0)).long()
if boxes.numel() == 0:
- return keep
+ return keep, scores, 0
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
@@ -195,14 +196,11 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
v, idx = scores.sort(0) # sort in ascending order
# I = I[v >= 0.01]
idx = idx[-top_k:] # indices of the top-k largest vals
- xx1 = boxes.new()
- yy1 = boxes.new()
- xx2 = boxes.new()
- yy2 = boxes.new()
- w = boxes.new()
- h = boxes.new()
-
- # keep = torch.Tensor()
+ xx1 = boxes.new_zeros(0)
+ yy1 = boxes.new_zeros(0)
+ xx2 = boxes.new_zeros(0)
+ yy2 = boxes.new_zeros(0)
+
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
@@ -222,8 +220,6 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
- w.resize_as_(xx2)
- h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
@@ -236,4 +232,4 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
IoU = inter/union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
- return keep, count
+ return keep[:count], scores[keep[:count]], count
nmsの返す型を変えてしまった(あとで入れるsoft_nms
と合わせたいという理由で変更)のも込み込みでDetection
も修正が必要。
@@ -1,16 +1,15 @@
import torch
-from torch.autograd import Function
from ..box_utils import decode, nms
-from data import voc as cfg
+from typing import List
-
-class Detect(Function):
+@torch.jit.script
+class Detect:
@@ -19,9 +18,9 @@ class Detect(Function):
if nms_thresh <= 0:
raise ValueError('nms_threshold must be non negative.')
self.conf_thresh = conf_thresh
- self.variance = cfg['SSD{}'.format(size)]['variance']
+ self.variance = variance
- def __call__(self, loc_data, conf_data, prior_data):
+ def __call__(self, loc_data: torch.Tensor, conf_data: torch.Tensor, prior_data: torch.Tensor) -> torch.Tensor:
"""
Args:
loc_data: (tensor) Loc preds from loc layers
@@ -54,10 +53,9 @@ class Detect(Function):
l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
boxes = decoded_boxes[l_mask].view(-1, 4)
# idx of highest scoring and non-overlapping boxes per class
- ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
+ ids, scores, count = nms(boxes, scores, self.nms_thresh, self.top_k)
output[i, cl, :count] = \
- torch.cat((scores[ids[:count]].unsqueeze(1),
- boxes[ids[:count]]), 1)
+ torch.cat((scores.unsqueeze(1), boxes[ids]), 1)
flt = output.contiguous().view(num, -1, 5)
_, idx = flt[:, :, 0].sort(1, descending=True)
_, rank = idx.sort(1)
SSDも変更が必要。torchscriptだと途中でループを止めたりできないので、ループのindexを見て、途中でsources
への要素追加を挿んでる。torchscript抜きにしてもこう書いたほうが良いような。forward
メソッドが学習時と推論時で返す型がもともと違っていたので学習時に合わせる変更。
@@ -47,9 +51,9 @@ class SSD(nn.Module):
if phase == 'test':
self.softmax = nn.Softmax(dim=-1)
self.detect = Detect(num_classes, 0, 200, 0.01, 0.45, cfg['variance'])
- def forward(self, x):
+ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
"""Applies network layers and ops on input image(s) x.
Args:
@@ -68,20 +72,16 @@ class SSD(nn.Module):
2: localization layers, Shape: [batch,num_priors*4]
3: priorbox layers, Shape: [2,num_priors*4]
"""
- sources = list()
- loc = list()
- conf = list()
+ sources = []
+ loc = []
+ conf = []
# apply vgg up to conv4_3 relu
- for k in range(23):
- x = self.base[k](x)
-
- s = self.L2Norm(x)
- sources.append(s)
-
- # apply vgg up to fc7
- for k in range(23, len(self.base)):
- x = self.base[k](x)
+ for i, layer in enumerate(self.base):
+ x = layer(x)
+ if i == 22:
+ s = self.L2Norm(x)
+ sources.append(s)
sources.append(x)
# apply extra layers and cache source layer outputs
@@ -91,19 +91,21 @@ class SSD(nn.Module):
sources.append(x)
# apply multibox head to source layers
- for (x, l, c) in zip(sources, self.loc, self.conf):
+ for i, (l, c) in enumerate(zip(self.loc, self.conf)):
+ x = sources[i]
loc.append(l(x).permute(0, 2, 3, 1).contiguous())
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if self.phase == "test":
with torch.no_grad():
output = (self.detect(
loc.view(loc.size(0), -1, 4), # loc preds
self.softmax(conf.view(conf.size(0), -1,
self.num_classes)), # conf preds
self.priors, # default boxes
- )
+ ), None, None)
else:
output = (
loc.view(loc.size(0), -1, 4),
@@ -112,7 +114,7 @@ class SSD(nn.Module):
)
return output
変更したSSDのコードと、公式ドキュメントを参考にもう少し調べたい。
trainモードでtorchscriptモジュールを保存。回帰テストに使える
import torch
from ssd import build_ssd
net = build_ssd('train', 300, 21)
net_weights = torch.load('./weights/ssd300_mAP_77.43_v2.pth',
map_location={'cuda:0': 'cpu'})
net.load_state_dict(net_weights)
net_scripted = torch.jit.script(net)
dummy_input = torch.rand(1, 3, 300, 300)
loc, conf, prior = net(dummy_input)
loc2, conf2, prior2 = net_scripted(dummy_input)
print(torch.all(loc == loc2).item())
print(torch.all(conf == conf2).item())
print(torch.all(prior == prior2).item())
net_scripted.save('./weights/ssd300_voc_train.pt')
webカメラを使ったデモの実行。
python demo/live.py --weights ./weights/ssd300_mAP_77.43_v2.pth
cv2.waitKey(1)
はキー押しっぱなしでも反応しないくらい待ち時間無いのでcv2.waitKey(100)
とかにする。そうするとESCキーで停止出来る。