This commit is contained in:
sosokker 2024-04-28 02:25:11 +07:00
parent fda46f5b1b
commit adb6ec6497
50 changed files with 5800 additions and 1 deletions

3
.gitignore vendored
View File

@ -186,4 +186,5 @@ dist-ssr
config.json
ActionDetector/
*.pth
*.cfg

View File

@ -0,0 +1,52 @@
import os
import torch
import numpy as np
from .Actionsrecognition.Models import TwoStreamSpatialTemporalGraph
from .pose_utils import normalize_points_with_size, scale_pose
class TSSTG(object):
"""Two-Stream Spatial Temporal Graph Model Loader.
Args:
weight_file: (str) Path to trained weights file.
device: (str) Device to load the model on 'cpu' or 'cuda'.
"""
def __init__(self,
weight_file='./Models/TSSTG/tsstg-model.pth',
device='cuda'):
self.graph_args = {'strategy': 'spatial'}
self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
'Stand up', 'Sit down', 'Fall Down']
self.num_class = len(self.class_names)
self.device = device
self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device)
self.model.load_state_dict(torch.load(weight_file))
self.model.eval()
def predict(self, pts, image_size):
"""Predict actions from single person skeleton points and score in time sequence.
Args:
pts: (numpy array) points and score in shape `(t, v, c)` where
t : inputs sequence (time steps).,
v : number of graph node (body parts).,
c : channel (x, y, score).,
image_size: (tuple of int) width, height of image frame.
Returns:
(numpy array) Probability of each class actions.
"""
pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1])
pts[:, :, :2] = scale_pose(pts[:, :, :2])
pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1)
pts = torch.tensor(pts, dtype=torch.float32)
pts = pts.permute(2, 0, 1)[None, :]
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
mot = mot.to(self.device)
pts = pts.to(self.device)
out = self.model((pts, mot))
return out.detach().cpu().numpy()

View File

@ -0,0 +1,244 @@
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from .Utils import Graph
class GraphConvolution(nn.Module):
"""The basic module for applying a graph convolution.
Args:
- in_channel: (int) Number of channels in the input sequence data.
- out_channels: (int) Number of channels produced by the convolution.
- kernel_size: (int) Size of the graph convolving kernel.
- t_kernel_size: (int) Size of the temporal convolving kernel.
- t_stride: (int, optional) Stride of the temporal convolution. Default: 1
- t_padding: (int, optional) Temporal zero-padding added to both sides of
the input. Default: 0
- t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
- bias: (bool, optional) If `True`, adds a learnable bias to the output.
Default: `True`
Shape:
- Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
A: Graph adjacency matrix in :math:`(K, V, V)`,
- Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`
where
:math:`N` is a batch size,
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
:math:`T_{in}/T_{out}` is a length of input/output sequence,
:math:`V` is the number of graph nodes.
"""
def __init__(self, in_channels, out_channels, kernel_size,
t_kernel_size=1,
t_stride=1,
t_padding=0,
t_dilation=1,
bias=True):
super().__init__()
self.kernel_size = kernel_size
self.conv = nn.Conv2d(in_channels,
out_channels * kernel_size,
kernel_size=(t_kernel_size, 1),
padding=(t_padding, 0),
stride=(t_stride, 1),
dilation=(t_dilation, 1),
bias=bias)
def forward(self, x, A):
x = self.conv(x)
n, kc, t, v = x.size()
x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
x = torch.einsum('nkctv,kvw->nctw', (x, A))
return x.contiguous()
class st_gcn(nn.Module):
"""Applies a spatial temporal graph convolution over an input graph sequence.
Args:
- in_channels: (int) Number of channels in the input sequence data.
- out_channels: (int) Number of channels produced by the convolution.
- kernel_size: (tuple) Size of the temporal convolving kernel and
graph convolving kernel.
- stride: (int, optional) Stride of the temporal convolution. Default: 1
- dropout: (int, optional) Dropout rate of the final output. Default: 0
- residual: (bool, optional) If `True`, applies a residual mechanism.
Default: `True`
Shape:
- Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
A: Graph Adjecency matrix in :math: `(K, V, V)`,
- Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
where
:math:`N` is a batch size,
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
:math:`T_{in}/T_{out}` is a length of input/output sequence,
:math:`V` is the number of graph nodes.
"""
def __init__(self, in_channels, out_channels, kernel_size,
stride=1,
dropout=0,
residual=True):
super().__init__()
assert len(kernel_size) == 2
assert kernel_size[0] % 2 == 1
padding = ((kernel_size[0] - 1) // 2, 0)
self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels,
out_channels,
(kernel_size[0], 1),
(stride, 1),
padding),
nn.BatchNorm2d(out_channels),
nn.Dropout(dropout, inplace=True)
)
if not residual:
self.residual = lambda x: 0
elif (in_channels == out_channels) and (stride == 1):
self.residual = lambda x: x
else:
self.residual = nn.Sequential(nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=(stride, 1)),
nn.BatchNorm2d(out_channels)
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x, A):
res = self.residual(x)
x = self.gcn(x, A)
x = self.tcn(x) + res
return self.relu(x)
class StreamSpatialTemporalGraph(nn.Module):
"""Spatial temporal graph convolutional networks.
Args:
- in_channels: (int) Number of input channels.
- graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
- num_class: (int) Number of class outputs. If `None` return pooling features of
the last st-gcn layer instead.
- edge_importance_weighting: (bool) If `True`, adds a learnable importance
weighting to the edges of the graph.
- **kwargs: (optional) Other parameters for graph convolution units.
Shape:
- Input: :math:`(N, in_channels, T_{in}, V_{in})`
- Output: :math:`(N, num_class)` where
:math:`N` is a batch size,
:math:`T_{in}` is a length of input sequence,
:math:`V_{in}` is the number of graph nodes,
or If num_class is `None`: `(N, out_channels)`
:math:`out_channels` is number of out_channels of the last layer.
"""
def __init__(self, in_channels, graph_args, num_class=None,
edge_importance_weighting=True, **kwargs):
super().__init__()
# Load graph.
graph = Graph(**graph_args)
A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
self.register_buffer('A', A)
# Networks.
spatial_kernel_size = A.size(0)
temporal_kernel_size = 9
kernel_size = (temporal_kernel_size, spatial_kernel_size)
kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
self.st_gcn_networks = nn.ModuleList((
st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
st_gcn(64, 64, kernel_size, 1, **kwargs),
st_gcn(64, 64, kernel_size, 1, **kwargs),
st_gcn(64, 64, kernel_size, 1, **kwargs),
st_gcn(64, 128, kernel_size, 2, **kwargs),
st_gcn(128, 128, kernel_size, 1, **kwargs),
st_gcn(128, 128, kernel_size, 1, **kwargs),
st_gcn(128, 256, kernel_size, 2, **kwargs),
st_gcn(256, 256, kernel_size, 1, **kwargs),
st_gcn(256, 256, kernel_size, 1, **kwargs)
))
# initialize parameters for edge importance weighting.
if edge_importance_weighting:
self.edge_importance = nn.ParameterList([
nn.Parameter(torch.ones(A.size()))
for i in self.st_gcn_networks
])
else:
self.edge_importance = [1] * len(self.st_gcn_networks)
if num_class is not None:
self.cls = nn.Conv2d(256, num_class, kernel_size=1)
else:
self.cls = lambda x: x
def forward(self, x):
# data normalization.
N, C, T, V = x.size()
x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T)
x = x.view(N, V * C, T)
x = self.data_bn(x)
x = x.view(N, V, C, T)
x = x.permute(0, 2, 3, 1).contiguous()
x = x.view(N, C, T, V)
# forward.
for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
x = gcn(x, self.A * importance)
x = F.avg_pool2d(x, x.size()[2:])
x = self.cls(x)
x = x.view(x.size(0), -1)
return x
class TwoStreamSpatialTemporalGraph(nn.Module):
"""Two inputs spatial temporal graph convolutional networks.
Args:
- graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
- num_class: (int) Number of class outputs.
- edge_importance_weighting: (bool) If `True`, adds a learnable importance
weighting to the edges of the graph.
- **kwargs: (optional) Other parameters for graph convolution units.
Shape:
- Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
for points and motions stream where.
:math:`N` is a batch size,
:math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
:math:`T` is a length of input sequence,
:math:`V` is the number of graph nodes,
- Output: :math:`(N, num_class)`
"""
def __init__(self, graph_args, num_class, edge_importance_weighting=True,
**kwargs):
super().__init__()
self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
edge_importance_weighting,
**kwargs)
self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None,
edge_importance_weighting,
**kwargs)
self.fcn = nn.Linear(256 * 2, num_class)
def forward(self, inputs):
out1 = self.pts_stream(inputs[0])
out2 = self.mot_stream(inputs[1])
concat = torch.cat([out1, out2], dim=-1)
out = self.fcn(concat)
return torch.sigmoid(out)

View File

@ -0,0 +1,123 @@
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py
import os
import torch
import numpy as np
class Graph:
"""The Graph to model the skeletons extracted by the Alpha-Pose.
Args:
- strategy: (string) must be one of the follow candidates
- uniform: Uniform Labeling,
- distance: Distance Partitioning,
- spatial: Spatial Configuration,
For more information, please refer to the section 'Partition Strategies'
in our paper (https://arxiv.org/abs/1801.07455).
- layout: (string) must be one of the follow candidates
- coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
- max_hop: (int) the maximal distance between two connected nodes.
- dilation: (int) controls the spacing between the kernel points.
"""
def __init__(self,
layout='coco_cut',
strategy='uniform',
max_hop=1,
dilation=1):
self.max_hop = max_hop
self.dilation = dilation
self.get_edge(layout)
self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
self.get_adjacency(strategy)
def get_edge(self, layout):
if layout == 'coco_cut':
self.num_node = 14
self_link = [(i, i) for i in range(self.num_node)]
neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
(10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
self.edge = self_link + neighbor_link
self.center = 13
else:
raise ValueError('This layout is not supported!')
def get_adjacency(self, strategy):
valid_hop = range(0, self.max_hop + 1, self.dilation)
adjacency = np.zeros((self.num_node, self.num_node))
for hop in valid_hop:
adjacency[self.hop_dis == hop] = 1
normalize_adjacency = normalize_digraph(adjacency)
if strategy == 'uniform':
A = np.zeros((1, self.num_node, self.num_node))
A[0] = normalize_adjacency
self.A = A
elif strategy == 'distance':
A = np.zeros((len(valid_hop), self.num_node, self.num_node))
for i, hop in enumerate(valid_hop):
A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
hop]
self.A = A
elif strategy == 'spatial':
A = []
for hop in valid_hop:
a_root = np.zeros((self.num_node, self.num_node))
a_close = np.zeros((self.num_node, self.num_node))
a_further = np.zeros((self.num_node, self.num_node))
for i in range(self.num_node):
for j in range(self.num_node):
if self.hop_dis[j, i] == hop:
if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
a_root[j, i] = normalize_adjacency[j, i]
elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
a_close[j, i] = normalize_adjacency[j, i]
else:
a_further[j, i] = normalize_adjacency[j, i]
if hop == 0:
A.append(a_root)
else:
A.append(a_root + a_close)
A.append(a_further)
A = np.stack(A)
self.A = A
#self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
else:
raise ValueError("This strategy is not supported!")
def get_hop_distance(num_node, edge, max_hop=1):
A = np.zeros((num_node, num_node))
for i, j in edge:
A[j, i] = 1
A[i, j] = 1
# compute hop steps
hop_dis = np.zeros((num_node, num_node)) + np.inf
transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
arrive_mat = (np.stack(transfer_mat) > 0)
for d in range(max_hop, -1, -1):
hop_dis[arrive_mat[d]] = d
return hop_dis
def normalize_digraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-1)
AD = np.dot(A, Dn)
return AD
def normalize_undigraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-0.5)
DAD = np.dot(np.dot(Dn, A), Dn)
return DAD

View File

@ -0,0 +1,216 @@
import os
import time
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from shutil import copyfile
from tqdm import tqdm
from torch.utils import data
from torch.optim.adadelta import Adadelta
from sklearn.model_selection import train_test_split
from .Models import *
from Visualizer import plot_graphs, plot_confusion_metrix
save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)'
device = 'cuda'
epochs = 30
batch_size = 32
# DATA FILES.
# Should be in format of
# inputs: (N_samples, time_steps, graph_node, channels),
# labels: (N_samples, num_class)
# and do some of normalizations on it. Default data create from:
# Data.create_dataset_(1-3).py
# where
# time_steps: Number of frame input sequence, Default: 30
# graph_node: Number of node in skeleton, Default: 14
# channels: Inputs data (x, y and scores), Default: 3
# num_class: Number of pose class to train, Default: 7
data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl',
'../Data/Home_new-set(labelXscrw).pkl']
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
'Stand up', 'Sit down', 'Fall Down']
num_class = len(class_names)
def load_dataset(data_files, batch_size, split_size=0):
"""Load data files into torch DataLoader with/without spliting train-test.
"""
features, labels = [], []
for fil in data_files:
with open(fil, 'rb') as f:
fts, lbs = pickle.load(f)
features.append(fts)
labels.append(lbs)
del fts, lbs
features = np.concatenate(features, axis=0)
labels = np.concatenate(labels, axis=0)
if split_size > 0:
x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,
random_state=9)
train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2),
torch.tensor(y_train, dtype=torch.float32))
valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2),
torch.tensor(y_valid, dtype=torch.float32))
train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
valid_loader = data.DataLoader(valid_set, batch_size)
else:
train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2),
torch.tensor(labels, dtype=torch.float32))
train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
valid_loader = None
return train_loader, valid_loader
def accuracy_batch(y_pred, y_true):
return (y_pred.argmax(1) == y_true.argmax(1)).mean()
def set_training(model, mode=True):
for p in model.parameters():
p.requires_grad = mode
model.train(mode)
return model
if __name__ == '__main__':
save_folder = os.path.join(os.path.dirname(__file__), save_folder)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
# DATA.
train_loader, _ = load_dataset(data_files[0:1], batch_size)
valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2)
train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]),
batch_size, shuffle=True)
dataloader = {'train': train_loader, 'valid': valid_loader}
del train_loader_
# MODEL.
graph_args = {'strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = Adadelta(model.parameters())
losser = torch.nn.BCELoss()
# TRAINING.
loss_list = {'train': [], 'valid': []}
accu_list = {'train': [], 'valid': []}
for e in range(epochs):
print('Epoch {}/{}'.format(e, epochs - 1))
for phase in ['train', 'valid']:
if phase == 'train':
model = set_training(model, True)
else:
model = set_training(model, False)
run_loss = 0.0
run_accu = 0.0
with tqdm(dataloader[phase], desc=phase) as iterator:
for pts, lbs in iterator:
# Create motion input by distance of points (x, y) of the same node
# in two frames.
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
mot = mot.to(device)
pts = pts.to(device)
lbs = lbs.to(device)
# Forward.
out = model((pts, mot))
loss = losser(out, lbs)
if phase == 'train':
# Backward.
model.zero_grad()
loss.backward()
optimizer.step()
run_loss += loss.item()
accu = accuracy_batch(out.detach().cpu().numpy(),
lbs.detach().cpu().numpy())
run_accu += accu
iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
loss.item(), accu))
iterator.update()
#break
loss_list[phase].append(run_loss / len(iterator))
accu_list[phase].append(run_accu / len(iterator))
#break
print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
loss_list['valid'][-1], accu_list['valid'][-1]))
# SAVE.
torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth'))
plot_graphs(list(loss_list.values()), list(loss_list.keys()),
'Last Train: {:.2f}, Valid: {:.2f}'.format(
loss_list['train'][-1], loss_list['valid'][-1]
), 'Loss', xlim=[0, epochs],
save=os.path.join(save_folder, 'loss_graph.png'))
plot_graphs(list(accu_list.values()), list(accu_list.keys()),
'Last Train: {:.2f}, Valid: {:.2f}'.format(
accu_list['train'][-1], accu_list['valid'][-1]
), 'Accu', xlim=[0, epochs],
save=os.path.join(save_folder, 'accu_graph.png'))
#break
del train_loader, valid_loader
model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth')))
# EVALUATION.
model = set_training(model, False)
data_file = data_files[1]
eval_loader, _ = load_dataset([data_file], 32)
print('Evaluation.')
run_loss = 0.0
run_accu = 0.0
y_preds = []
y_trues = []
with tqdm(eval_loader, desc='eval') as iterator:
for pts, lbs in iterator:
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
mot = mot.to(device)
pts = pts.to(device)
lbs = lbs.to(device)
out = model((pts, mot))
loss = losser(out, lbs)
run_loss += loss.item()
accu = accuracy_batch(out.detach().cpu().numpy(),
lbs.detach().cpu().numpy())
run_accu += accu
y_preds.extend(out.argmax(1).detach().cpu().numpy())
y_trues.extend(lbs.argmax(1).cpu().numpy())
iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
loss.item(), accu))
iterator.update()
run_loss = run_loss / len(iterator)
run_accu = run_accu / len(iterator)
plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format(
os.path.basename(data_file), run_loss, run_accu
), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format(
os.path.basename(data_file).split('.')[0])))
print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu))

View File

@ -0,0 +1,204 @@
import os
import cv2
import time
import torch
import numpy as np
from queue import Queue
from threading import Thread, Lock
class CamLoader:
"""Use threading to capture a frame from camera for faster frame load.
Recommend for camera or webcam.
Args:
camera: (int, str) Source of camera or video.,
preprocess: (Callable function) to process the frame before return.
"""
def __init__(self, camera, preprocess=None, ori_return=False):
self.stream = cv2.VideoCapture(camera)
assert self.stream.isOpened(), 'Cannot read camera source!'
self.fps = self.stream.get(cv2.CAP_PROP_FPS)
self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
self.stopped = False
self.ret = False
self.frame = None
self.ori_frame = None
self.read_lock = Lock()
self.ori = ori_return
self.preprocess_fn = preprocess
def start(self):
self.t = Thread(target=self.update, args=()) # , daemon=True)
self.t.start()
c = 0
while not self.ret:
time.sleep(0.1)
c += 1
if c > 20:
self.stop()
raise TimeoutError('Can not get a frame from camera!!!')
return self
def update(self):
while not self.stopped:
ret, frame = self.stream.read()
self.read_lock.acquire()
self.ori_frame = frame.copy()
if ret and self.preprocess_fn is not None:
frame = self.preprocess_fn(frame)
self.ret, self.frame = ret, frame
self.read_lock.release()
def grabbed(self):
"""Return `True` if can read a frame."""
return self.ret
def getitem(self):
self.read_lock.acquire()
frame = self.frame.copy()
ori_frame = self.ori_frame.copy()
self.read_lock.release()
if self.ori:
return frame, ori_frame
else:
return frame
def stop(self):
if self.stopped:
return
self.stopped = True
if self.t.is_alive():
self.t.join()
self.stream.release()
def __del__(self):
if self.stream.isOpened():
self.stream.release()
def __exit__(self, exc_type, exc_val, exc_tb):
if self.stream.isOpened():
self.stream.release()
class CamLoader_Q:
"""Use threading and queue to capture a frame and store to queue for pickup in sequence.
Recommend for video file.
Args:
camera: (int, str) Source of camera or video.,
batch_size: (int) Number of batch frame to store in queue. Default: 1,
queue_size: (int) Maximum queue size. Default: 256,
preprocess: (Callable function) to process the frame before return.
"""
def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None):
self.stream = cv2.VideoCapture(camera)
assert self.stream.isOpened(), 'Cannot read camera source!'
self.fps = self.stream.get(cv2.CAP_PROP_FPS)
self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# Queue for storing each frames.
self.stopped = False
self.batch_size = batch_size
self.Q = Queue(maxsize=queue_size)
self.preprocess_fn = preprocess
def start(self):
t = Thread(target=self.update, args=(), daemon=True).start()
c = 0
while not self.grabbed():
time.sleep(0.1)
c += 1
if c > 20:
self.stop()
raise TimeoutError('Can not get a frame from camera!!!')
return self
def update(self):
while not self.stopped:
if not self.Q.full():
frames = []
for k in range(self.batch_size):
ret, frame = self.stream.read()
if not ret:
self.stop()
return
if self.preprocess_fn is not None:
frame = self.preprocess_fn(frame)
frames.append(frame)
frames = np.stack(frames)
self.Q.put(frames)
else:
with self.Q.mutex:
self.Q.queue.clear()
# time.sleep(0.05)
def grabbed(self):
"""Return `True` if can read a frame."""
return self.Q.qsize() > 0
def getitem(self):
return self.Q.get().squeeze()
def stop(self):
if self.stopped:
return
self.stopped = True
self.stream.release()
def __len__(self):
return self.Q.qsize()
def __del__(self):
if self.stream.isOpened():
self.stream.release()
def __exit__(self, exc_type, exc_val, exc_tb):
if self.stream.isOpened():
self.stream.release()
if __name__ == '__main__':
fps_time = 0
# Using threading.
cam = CamLoader(0).start()
while cam.grabbed():
frames = cam.getitem()
frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
fps_time = time.time()
cv2.imshow('frame', frames)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cam.stop()
cv2.destroyAllWindows()
# Normal video capture.
"""cam = cv2.VideoCapture(0)
while True:
ret, frame = cam.read()
if ret:
#time.sleep(0.05)
#frame = (cv2.flip(frame, 1) / 255.).astype(np.float)
frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
fps_time = time.time()
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()"""

View File

@ -0,0 +1,85 @@
"""
This script to create .csv videos frames action annotation file.
- It will play a video frame by frame control the flow by [a] and [d]
to play previos or next frame.
- Open the annot_file (.csv) and label each frame of video with number
of action class.
"""
import os
import cv2
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
'Stand up', 'Sit down', 'Fall Down'] # label.
video_folder = '../Data/falldata/Home/Videos'
annot_file = '../Data/Home_new.csv'
index_video_to_play = 0 # Choose video to play.
def create_csv(folder):
list_file = sorted(os.listdir(folder))
cols = ['video', 'frame', 'label']
df = pd.DataFrame(columns=cols)
for fil in list_file:
cap = cv2.VideoCapture(os.path.join(folder, fil))
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video = np.array([fil] * frames_count)
frame = np.arange(1, frames_count + 1)
label = np.array([0] * frames_count)
rows = np.stack([video, frame, label], axis=1)
df = df.append(pd.DataFrame(rows, columns=cols),
ignore_index=True)
cap.release()
df.to_csv(annot_file, index=False)
if not os.path.exists(annot_file):
create_csv(video_folder)
annot = pd.read_csv(annot_file)
video_list = annot.iloc[:, 0].unique()
video_file = os.path.join(video_folder, video_list[index_video_to_play])
print(os.path.basename(video_file))
annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True)
frames_idx = annot.iloc[:, 1].tolist()
cap = cv2.VideoCapture(video_file)
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format(
len(frames_idx), frames_count
)
i = 0
while True:
cap.set(cv2.CAP_PROP_POS_FRAMES, i)
ret, frame = cap.read()
if ret:
cls_name = class_names[int(annot.iloc[i, -1]) - 1]
frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name),
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.imshow('frame', frame)
key = cv2.waitKey(0) & 0xFF
if key == ord('q'):
break
elif key == ord('d'):
i += 1
continue
elif key == ord('a'):
i -= 1
continue
else:
break
cap.release()
cv2.destroyAllWindows()

View File

@ -0,0 +1,137 @@
"""
This script to extract skeleton joints position and score.
- This 'annot_folder' is a action class and bounding box for each frames that came with dataset.
Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax]
Use for crop a person to use in pose estimation model.
- If have no annotation file you can leave annot_folder = '' for use Detector model to get the
bounding box.
"""
import os
import cv2
import time
import torch
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
from DetectorLoader import TinyYOLOv3_onecls
from PoseEstimateLoader import SPPE_FastPose
from fn import vis_frame_fast
save_path = '../../Data/Home_new-pose+score.csv'
annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py
video_folder = '../Data/falldata/Home/Videos'
annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame.
# DETECTION MODEL.
detector = TinyYOLOv3_onecls()
# POSE MODEL.
inp_h = 320
inp_w = 256
pose_estimator = SPPE_FastPose(inp_h, inp_w)
# with score.
columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s',
'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x',
'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s',
'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s',
'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y',
'RAnkle_s', 'label']
def normalize_points_with_size(points_xy, width, height, flip=False):
points_xy[:, 0] /= width
points_xy[:, 1] /= height
if flip:
points_xy[:, 0] = 1 - points_xy[:, 0]
return points_xy
annot = pd.read_csv(annot_file)
vid_list = annot['video'].unique()
for vid in vid_list:
print(f'Process on: {vid}')
df = pd.DataFrame(columns=columns)
cur_row = 0
# Pose Labels.
frames_label = annot[annot['video'] == vid].reset_index(drop=True)
cap = cv2.VideoCapture(os.path.join(video_folder, vid))
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# Bounding Boxs Labels.
annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt')
annot = None
if os.path.exists(annot_file):
annot = pd.read_csv(annot_file, header=None,
names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax'])
annot = annot.dropna().reset_index(drop=True)
assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot))
fps_time = 0
i = 1
while True:
ret, frame = cap.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cls_idx = int(frames_label[frames_label['frame'] == i]['label'])
if annot:
bb = np.array(annot.iloc[i-1, 2:].astype(int))
else:
bb = detector.detect(frame)[0, :4].numpy().astype(int)
bb[:2] = np.maximum(0, bb[:2] - 5)
bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:]
result = []
if bb.any() != 0:
result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]),
torch.tensor([[1.0]]))
if len(result) > 0:
pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(),
frame_size[0], frame_size[1])
pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1)
#idx = result[0]['kp_score'] <= 0.05
#pt_norm[idx.squeeze()] = np.nan
row = [vid, i, *pt_norm.flatten().tolist(), cls_idx]
scr = result[0]['kp_score'].mean()
else:
row = [vid, i, *[np.nan] * (13 * 3), cls_idx]
scr = 0.0
df.loc[cur_row] = row
cur_row += 1
# VISUALIZE.
frame = vis_frame_fast(frame, result)
frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr),
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
frame = frame[:, :, ::-1]
fps_time = time.time()
i += 1
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
cv2.destroyAllWindows()
if os.path.exists(save_path):
df.to_csv(save_path, mode='a', header=False, index=False)
else:
df.to_csv(save_path, mode='w', index=False)

View File

@ -0,0 +1,127 @@
"""
This script to create dataset and labels by clean off some NaN, do a normalization,
label smoothing and label weights by scores.
"""
import os
import pickle
import numpy as np
import pandas as pd
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
'Stand up', 'Sit down', 'Fall Down']
main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y',
'RHip_x', 'RHip_y']
main_idx_parts = [1, 2, 7, 8, -1] # 1.5
csv_pose_file = '../Data/Coffee_room_new-pose+score.csv'
save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl'
# Params.
smooth_labels_step = 8
n_frames = 30
skip_frame = 1
annot = pd.read_csv(csv_pose_file)
# Remove NaN.
idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0
idx = np.where(idx)[0]
annot = annot.drop(idx)
# One-Hot Labels.
label_onehot = pd.get_dummies(annot['label'])
annot = annot.drop('label', axis=1).join(label_onehot)
cols = label_onehot.columns.values
def scale_pose(xy):
"""
Normalize pose points by scale with max/min value of each pose.
xy : (frames, parts, xy) or (parts, xy)
"""
if xy.ndim == 2:
xy = np.expand_dims(xy, 0)
xy_min = np.nanmin(xy, axis=1)
xy_max = np.nanmax(xy, axis=1)
for i in range(xy.shape[0]):
xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
return xy.squeeze()
def seq_label_smoothing(labels, max_step=10):
steps = 0
remain_step = 0
target_label = 0
active_label = 0
start_change = 0
max_val = np.max(labels)
min_val = np.min(labels)
for i in range(labels.shape[0]):
if remain_step > 0:
if i >= start_change:
labels[i][active_label] = max_val * remain_step / steps
labels[i][target_label] = max_val * (steps - remain_step) / steps \
if max_val * (steps - remain_step) / steps else min_val
remain_step -= 1
continue
diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0]
if len(diff_index) > 0:
start_change = i + remain_step // 2
steps = diff_index[0]
remain_step = steps
target_label = np.argmax(labels[i + remain_step])
active_label = np.argmax(labels[i])
return labels
feature_set = np.empty((0, n_frames, 14, 3))
labels_set = np.empty((0, len(cols)))
vid_list = annot['video'].unique()
for vid in vid_list:
print(f'Process on: {vid}')
data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video')
# Label Smoothing.
esp = 0.1
data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1)
data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step)
# Separate continuous frames.
frames = data['frame'].values
frames_set = []
fs = [0]
for i in range(1, len(frames)):
if frames[i] < frames[i-1] + 10:
fs.append(i)
else:
frames_set.append(fs)
fs = [i]
frames_set.append(fs)
for fs in frames_set:
xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3)
# Scale pose normalize.
xys[:, :, :2] = scale_pose(xys[:, :, :2])
# Add center point.
xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1)
# Weighting main parts score.
scr = xys[:, :, -1].copy()
scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0)
# Mean score.
scr = scr.mean(1)
# Targets.
lb = data.iloc[fs, -len(cols):].values
# Apply points score mean to all labels.
lb = lb * scr[:, None]
for i in range(xys.shape[0] - n_frames):
feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0)
labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0)
"""with open(save_path, 'wb') as f:
pickle.dump((feature_set, labels_set), f)"""

View File

@ -0,0 +1,348 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from .Utils import build_targets, to_cpu, parse_model_config
def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams["channels"])] # [3]
module_list = nn.ModuleList()
for module_i, module_def in enumerate(module_defs):
modules = nn.Sequential()
if module_def["type"] == "convolutional":
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2
modules.add_module(
f"conv_{module_i}",
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
if module_def["activation"] == "leaky":
modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
elif module_def["type"] == "maxpool":
kernel_size = int(module_def["size"])
stride = int(module_def["stride"])
if kernel_size == 2 and stride == 1:
modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
modules.add_module(f"maxpool_{module_i}", maxpool)
elif module_def["type"] == "upsample":
upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
modules.add_module(f"upsample_{module_i}", upsample)
elif module_def["type"] == "route":
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[1:][i] for i in layers])
modules.add_module(f"route_{module_i}", EmptyLayer())
elif module_def["type"] == "shortcut":
filters = output_filters[1:][int(module_def["from"])]
modules.add_module(f"shortcut_{module_i}", EmptyLayer())
elif module_def["type"] == "yolo":
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def["classes"])
img_size = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
modules.add_module(f"yolo_{module_i}", yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return hyperparams, module_list
class Upsample(nn.Module):
""" nn.Upsample is deprecated """
def __init__(self, scale_factor, mode="nearest"):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
return x
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
class YOLOLayer(nn.Module):
"""Detection layer"""
def __init__(self, anchors, num_classes, img_dim=416):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.ignore_thres = 0.5
self.mse_loss = nn.MSELoss()
self.bce_loss = nn.BCELoss()
self.obj_scale = 1
self.noobj_scale = 100
self.metrics = {}
self.img_dim = img_dim
self.grid_size = 0 # grid size
def compute_grid_offsets(self, grid_size, cuda=True):
self.grid_size = grid_size
g = self.grid_size
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.stride = self.img_dim / self.grid_size
# Calculate offsets for each grid
self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
def forward(self, x, targets=None, img_dim=None):
# Tensors for cuda support
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
self.img_dim = img_dim
num_samples = x.size(0)
grid_size = x.size(2)
prediction = (
x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
.permute(0, 1, 3, 4, 2)
.contiguous()
)
# Get outputs
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
# If grid size does not match current we compute new offsets
if grid_size != self.grid_size:
self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
# Add offset and scale with anchors
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + self.grid_x
pred_boxes[..., 1] = y.data + self.grid_y
pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
output = torch.cat(
(
pred_boxes.view(num_samples, -1, 4) * self.stride,
pred_conf.view(num_samples, -1, 1),
pred_cls.view(num_samples, -1, self.num_classes),
),
-1,
)
if targets is None:
return output, 0
else:
iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
pred_boxes=pred_boxes,
pred_cls=pred_cls,
target=targets,
anchors=self.scaled_anchors,
ignore_thres=self.ignore_thres,
)
# Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
loss_x = self.mse_loss(x[obj_mask.bool()], tx[obj_mask.bool()])
loss_y = self.mse_loss(y[obj_mask.bool()], ty[obj_mask.bool()])
loss_w = self.mse_loss(w[obj_mask.bool()], tw[obj_mask.bool()])
loss_h = self.mse_loss(h[obj_mask.bool()], th[obj_mask.bool()])
loss_conf_obj = self.bce_loss(pred_conf[obj_mask.bool()], tconf[obj_mask.bool()])
loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask.bool()], tconf[noobj_mask.bool()])
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
loss_cls = self.bce_loss(pred_cls[obj_mask.bool()], tcls[obj_mask.bool()])
total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
# Metrics
cls_acc = 100 * class_mask[obj_mask.bool()].mean()
conf_obj = pred_conf[obj_mask.bool()].mean()
conf_noobj = pred_conf[noobj_mask.bool()].mean()
conf50 = (pred_conf > 0.5).float()
iou50 = (iou_scores > 0.5).float()
iou75 = (iou_scores > 0.75).float()
detected_mask = conf50 * class_mask * tconf
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
self.metrics = {
"loss": to_cpu(total_loss).item(),
"x": to_cpu(loss_x).item(),
"y": to_cpu(loss_y).item(),
"w": to_cpu(loss_w).item(),
"h": to_cpu(loss_h).item(),
"conf": to_cpu(loss_conf).item(),
"cls": to_cpu(loss_cls).item(),
"cls_acc": to_cpu(cls_acc).item(),
"recall50": to_cpu(recall50).item(),
"recall75": to_cpu(recall75).item(),
"precision": to_cpu(precision).item(),
"conf_obj": to_cpu(conf_obj).item(),
"conf_noobj": to_cpu(conf_noobj).item(),
"grid_size": grid_size,
}
return output, total_loss
class Darknet(nn.Module):
"""YOLOv3 object detection model"""
def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.module_defs = parse_model_config(config_path)
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
self.img_size = img_size
self.seen = 0
self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
def forward(self, x, targets=None):
img_dim = x.shape[2]
loss = 0
layer_outputs, yolo_outputs = [], []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif module_def["type"] == "route":
x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
elif module_def["type"] == "shortcut":
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif module_def["type"] == "yolo":
x, layer_loss = module[0](x, targets, img_dim)
loss += layer_loss
yolo_outputs.append(x)
layer_outputs.append(x)
yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
return yolo_outputs if targets is None else (loss, yolo_outputs)
def load_darknet_weights(self, weights_path):
"""Parses and loads the weights stored in 'weights_path'"""
# Open the weights file
with open(weights_path, "rb") as f:
header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
self.header_info = header # Needed to write header when saving weights
self.seen = header[3] # number of images seen during training
weights = np.fromfile(f, dtype=np.float32) # The rest are weights
# Establish cutoff for loading backbone weights
cutoff = None
if "darknet53.conv.74" in weights_path:
cutoff = 75
ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if i == cutoff:
break
if module_def["type"] == "convolutional":
conv_layer = module[0]
if module_def["batch_normalize"]:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w
def save_darknet_weights(self, path, cutoff=-1):
"""
@:param path - path of the new weights file
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
"""
fp = open(path, "wb")
self.header_info[3] = self.seen
self.header_info.tofile(fp)
# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def["type"] == "convolutional":
conv_layer = module[0]
# If batch norm, load bn first
if module_def["batch_normalize"]:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(fp)
bn_layer.weight.data.cpu().numpy().tofile(fp)
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
bn_layer.running_var.data.cpu().numpy().tofile(fp)
# Load conv bias
else:
conv_layer.bias.data.cpu().numpy().tofile(fp)
# Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(fp)
fp.close()
def load_pretrain_to_custom_class(self, weights_pth_path):
state = torch.load(weights_pth_path)
own_state = self.state_dict()
for name, param in state.items():
if name not in own_state:
print(f'Model does not have this param: {name}!')
continue
if param.shape != own_state[name].shape:
print(f'Do not load this param: {name} cause it shape not equal! : '
f'{param.shape} into {own_state[name].shape}')
continue
own_state[name].copy_(param)

View File

@ -0,0 +1,415 @@
import cv2
import math
import time
import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
def to_cpu(tensor):
return tensor.detach().cpu()
def load_classes(path):
"""
Loads class labels at 'path'
"""
fp = open(path, "r")
names = fp.read().split("\n")[:-1]
return names
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
def rescale_boxes(boxes, current_dim, original_shape):
""" Rescales bounding boxes to the original shape """
orig_h, orig_w = original_shape
# The amount of padding that was added
pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
# Image height and width after padding is removed
unpad_h = current_dim - pad_y
unpad_w = current_dim - pad_x
# Rescale bounding boxes to dimension of original image
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
return boxes
def xywh2xyxy(x):
y = x.new(x.shape)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
i = pred_cls == c
n_gt = (target_cls == c).sum() # Number of ground truth objects
n_p = i.sum() # Number of predicted objects
if n_p == 0 and n_gt == 0:
continue
elif n_p == 0 or n_gt == 0:
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum()
tpc = (tp[i]).cumsum()
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(recall_curve[-1])
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(precision_curve[-1])
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
# Compute F1 score (harmonic mean of precision and recall)
p, r, ap = np.array(p), np.array(r), np.array(ap)
f1 = 2 * p * r / (p + r + 1e-16)
return p, r, ap, f1, unique_classes.astype("int32")
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([0.0], precision, [0.0]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def get_batch_statistics(outputs, targets, iou_threshold):
""" Compute true positives, predicted scores and predicted labels per sample """
batch_metrics = []
for sample_i in range(len(outputs)):
if outputs[sample_i] is None:
continue
output = outputs[sample_i]
pred_boxes = output[:, :4]
pred_scores = output[:, 4]
pred_labels = output[:, -1]
true_positives = np.zeros(pred_boxes.shape[0])
annotations = targets[targets[:, 0] == sample_i][:, 1:]
target_labels = annotations[:, 0] if len(annotations) else []
if len(annotations):
detected_boxes = []
target_boxes = annotations[:, 1:]
for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
# If targets are found break
if len(detected_boxes) == len(annotations):
break
# Ignore if label is not one of the target labels
if pred_label not in target_labels:
continue
iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
if iou >= iou_threshold and box_index not in detected_boxes:
true_positives[pred_i] = 1
detected_boxes += [box_index]
batch_metrics.append([true_positives, pred_scores, pred_labels])
return batch_metrics
def bbox_wh_iou(wh1, wh2):
wh2 = wh2.t()
w1, h1 = wh1[0], wh1[1]
w2, h2 = wh2[0], wh2[1]
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area
def bbox_iou(box1, box2, x1y1x2y2=True):
"""
Returns the IoU of two bounding boxes
"""
if not x1y1x2y2:
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
# get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
inter_rect_y2 - inter_rect_y1 + 1, min=0
)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres' and performs
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_score, class_pred)
"""
# From (center x, center y, width, height) to (x1, y1, x2, y2)
prediction[..., :4] = xywh2xyxy(prediction[..., :4])
output = [None for _ in range(len(prediction))]
for image_i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
image_pred = image_pred[image_pred[:, 4] >= conf_thres]
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Object confidence times class confidence
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
# Sort by it
image_pred = image_pred[(-score).argsort()]
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
# Perform non-maximum suppression
keep_boxes = []
while detections.size(0):
large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
label_match = detections[0, -1] == detections[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
weights = detections[invalid, 4:5]
# Merge overlapping bboxes by order of confidence
detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
keep_boxes += [detections[0]]
detections = detections[~invalid]
if keep_boxes:
output[image_i] = torch.stack(keep_boxes)
return output
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
nB = pred_boxes.size(0)
nA = pred_boxes.size(1)
nC = pred_cls.size(-1)
nG = pred_boxes.size(2)
# Output tensors
obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
tx = FloatTensor(nB, nA, nG, nG).fill_(0)
ty = FloatTensor(nB, nA, nG, nG).fill_(0)
tw = FloatTensor(nB, nA, nG, nG).fill_(0)
th = FloatTensor(nB, nA, nG, nG).fill_(0)
tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
# Convert to position relative to box
target_boxes = target[:, 2:6] * nG
gxy = target_boxes[:, :2]
gwh = target_boxes[:, 2:]
# Get anchors with best iou
ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
best_ious, best_n = ious.max(0)
# Separate target values
b, target_labels = target[:, :2].long().t()
gx, gy = gxy.t()
gw, gh = gwh.t()
gi, gj = gxy.long().t()
# Set masks
obj_mask[b, best_n, gj, gi] = 1
noobj_mask[b, best_n, gj, gi] = 0
# Set noobj mask to zero where iou exceeds ignore threshold
for i, anchor_ious in enumerate(ious.t()):
noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
# Coordinates
tx[b, best_n, gj, gi] = gx - gx.floor()
ty[b, best_n, gj, gi] = gy - gy.floor()
# Width and height
tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
# One-hot encoding of label
tcls[b, best_n, gj, gi, target_labels] = 1
# Compute label correctness and iou at best anchor
class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
tconf = obj_mask.float()
return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
lines = file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
return module_defs
def parse_data_config(path):
"""Parses the data configuration file"""
options = dict()
options['gpus'] = '0,1,2,3'
options['num_workers'] = '10'
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options
def ResizePadding(height, width):
desized_size = (height, width)
def resizePadding(image, **kwargs):
old_size = image.shape[:2]
max_size_idx = old_size.index(max(old_size))
ratio = float(desized_size[max_size_idx]) / max(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
if new_size > desized_size:
min_size_idx = old_size.index(min(old_size))
ratio = float(desized_size[min_size_idx]) / min(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
image = cv2.resize(image, (new_size[1], new_size[0]))
delta_w = desized_size[1] - new_size[1]
delta_h = desized_size[0] - new_size[0]
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
left, right = delta_w // 2, delta_w - (delta_w // 2)
image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
return image
return resizePadding
class AverageValueMeter(object):
def __init__(self):
self.reset()
self.val = 0
def add(self, value, n=1):
self.val = value
self.sum += value
self.var += value * value
self.n += n
if self.n == 0:
self.mean, self.std = np.nan, np.nan
elif self.n == 1:
self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy
self.std = np.inf
self.mean_old = self.mean
self.m_s = 0.0
else:
self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
self.m_s += (value - self.mean_old) * (value - self.mean)
self.mean_old = self.mean
self.std = np.sqrt(self.m_s / (self.n - 1.0))
def value(self):
return self.mean, self.std
def reset(self):
self.n = 0
self.sum = 0.0
self.var = 0.0
self.val = 0.0
self.mean = np.nan
self.mean_old = 0.0
self.m_s = 0.0
self.std = np.nan

View File

@ -0,0 +1,117 @@
import time
import torch
import numpy as np
import torchvision.transforms as transforms
from queue import Queue
from threading import Thread
from .Detection.Models import Darknet
from .Detection.Utils import non_max_suppression, ResizePadding
class TinyYOLOv3_onecls(object):
"""Load trained Tiny-YOLOv3 one class (person) detection model.
Args:
input_size: (int) Size of input image must be divisible by 32. Default: 416,
config_file: (str) Path to Yolo model structure config file.,
weight_file: (str) Path to trained weights file.,
nms: (float) Non-Maximum Suppression overlap threshold.,
conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off.,
device: (str) Device to load the model on 'cpu' or 'cuda'.
"""
def __init__(self,
input_size=416,
config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg',
weight_file='Models/yolo-tiny-onecls/best-model.pth',
nms=0.2,
conf_thres=0.45,
device='cuda'):
self.input_size = input_size
self.model = Darknet(config_file).to(device)
self.model.load_state_dict(torch.load(weight_file))
self.model.eval()
self.device = device
self.nms = nms
self.conf_thres = conf_thres
self.resize_fn = ResizePadding(input_size, input_size)
self.transf_fn = transforms.ToTensor()
def detect(self, image, need_resize=True, expand_bb=5):
"""Feed forward to the model.
Args:
image: (numpy array) Single RGB image to detect.,
need_resize: (bool) Resize to input_size before feed and will return bboxs
with scale to image original size.,
expand_bb: (int) Expand boundary of the boxs.
Returns:
(torch.float32) Of each detected object contain a
[top, left, bottom, right, bbox_score, class_score, class]
return `None` if no detected.
"""
image_size = (self.input_size, self.input_size)
if need_resize:
image_size = image.shape[:2]
image = self.resize_fn(image)
image = self.transf_fn(image)[None, ...]
scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0]
detected = self.model(image.to(self.device))
detected = non_max_suppression(detected, self.conf_thres, self.nms)[0]
if detected is not None:
detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2
detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2
detected[:, 0:4] /= scf
detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb)
detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb)
return detected
class ThreadDetection(object):
def __init__(self,
dataloader,
model,
queue_size=256):
self.model = model
self.dataloader = dataloader
self.stopped = False
self.Q = Queue(maxsize=queue_size)
def start(self):
t = Thread(target=self.update, args=(), daemon=True).start()
return self
def update(self):
while True:
if self.stopped:
return
images = self.dataloader.getitem()
outputs = self.model.detect(images)
if self.Q.full():
time.sleep(2)
self.Q.put((images, outputs))
def getitem(self):
return self.Q.get()
def stop(self):
self.stopped = True
def __len__(self):
return self.Q.qsize()

View File

@ -0,0 +1 @@
tsstg-model.pth

View File

@ -0,0 +1,2 @@
fast_res50_256x192.pth
fast_res101_320x256.pth

View File

@ -0,0 +1,2 @@
best-model.pth
yolov3-tiny-onecls.cfg

View File

@ -0,0 +1,40 @@
import os
import cv2
import torch
from .SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50
from .SPPE.src.utils.img import crop_dets
from .pPose_nms import pose_nms
from .SPPE.src.utils.eval import getPrediction
class SPPE_FastPose(object):
def __init__(self,
backbone,
input_height=320,
input_width=256,
device='cuda',
path='./SPPE/models/sppe/'):
assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone)
self.inp_h = input_height
self.inp_w = input_width
self.device = device
if backbone == 'resnet101':
self.model = InferenNet_fast(path).to(device)
else:
self.model = InferenNet_fastRes50(path).to(device)
self.model.eval()
def predict(self, image, bboxs, bboxs_scores):
inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w)
pose_hm = self.model(inps.to(self.device)).cpu().data
# Cut eyes and ears.
pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1)
xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w,
pose_hm.shape[-2], pose_hm.shape[-1])
result = pose_nms(bboxs, bboxs_scores, xy_img, scores)
return result

View File

@ -0,0 +1,51 @@
<h1> Human Falling Detection and Tracking <a href="https://github.com/GajuuzZ/Human-Falling-Detect-Tracks">https://github.com/GajuuzZ/Human-Falling-Detect-Tracks</a> </h1>
Using Tiny-YOLO oneclass to detect each person in the frame and use
[AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use
[ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames
of each person tracks.
Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down.
<div align="center">
<img src="sample1.gif" width="416">
</div>
## Prerequisites
- Python > 3.6
- Pytorch > 1.3.1
Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2
## Data
This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing
model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset
for more robust person detection in a variant of angle pose.
For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr)
Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action
frames by hand for training ST-GCN model.
## Pre-Trained Models
- Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing),
[.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing)
- SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing),
[resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing)
- ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing)
## Basic Use
1. Download all pre-trained models into ./Models folder.
2. Run main.py
```
python main.py ${video file or camera source}
```
## Reference
- AlphaPose : https://github.com/Amanbhandula/AlphaPose
- ST-GCN : https://github.com/yysijie/st-gcn

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Jeff-sjtu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1 @@
# pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose

View File

@ -0,0 +1,82 @@
import torch
import torch.nn as nn
import torch.utils.data
import torch.utils.data.distributed
import torch.nn.functional as F
import numpy as np
from .utils.img import flip, shuffleLR
from .utils.eval import getPrediction
from .models.FastPose import FastPose
import time
import sys
import torch._utils
try:
torch._utils._rebuild_tensor_v2
except AttributeError:
def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
tensor.requires_grad = requires_grad
tensor._backward_hooks = backward_hooks
return tensor
torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
class InferenNet(nn.Module):
def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'):
super().__init__()
self.pyranet = FastPose('resnet101').cuda()
print('Loading pose model from {}'.format(weights_file))
sys.stdout.flush()
self.pyranet.load_state_dict(torch.load(weights_file))
self.pyranet.eval()
self.pyranet = model
self.dataset = dataset
def forward(self, x):
out = self.pyranet(x)
out = out.narrow(1, 0, 17)
flip_out = self.pyranet(flip(x))
flip_out = flip_out.narrow(1, 0, 17)
flip_out = flip(shuffleLR(
flip_out, self.dataset))
out = (flip_out + out) / 2
return out
class InferenNet_fast(nn.Module):
def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'):
super().__init__()
self.pyranet = FastPose('resnet101').cuda()
print('Loading pose model from {}'.format(weights_file))
self.pyranet.load_state_dict(torch.load(weights_file))
self.pyranet.eval()
def forward(self, x):
out = self.pyranet(x)
out = out.narrow(1, 0, 17)
return out
class InferenNet_fastRes50(nn.Module):
def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'):
super().__init__()
self.pyranet = FastPose('resnet50', 17).cuda()
print('Loading pose model from {}'.format(weights_file))
self.pyranet.load_state_dict(torch.load(weights_file))
self.pyranet.eval()
def forward(self, x):
out = self.pyranet(x)
return out

View File

@ -0,0 +1,32 @@
import torch.nn as nn
from torch.autograd import Variable
from .layers.SE_Resnet import SEResnet
from .layers.DUC import DUC
from ..opt import opt
class FastPose(nn.Module):
DIM = 128
def __init__(self, backbone='resnet101', num_join=opt.nClasses):
super(FastPose, self).__init__()
assert backbone in ['resnet50', 'resnet101']
self.preact = SEResnet(backbone)
self.suffle1 = nn.PixelShuffle(2)
self.duc1 = DUC(512, 1024, upscale_factor=2)
self.duc2 = DUC(256, 512, upscale_factor=2)
self.conv_out = nn.Conv2d(
self.DIM, num_join, kernel_size=3, stride=1, padding=1)
def forward(self, x: Variable):
out = self.preact(x)
out = self.suffle1(out)
out = self.duc1(out)
out = self.duc2(out)
out = self.conv_out(out)
return out

View File

@ -0,0 +1 @@
from . import *

View File

@ -0,0 +1,126 @@
import torch.nn as nn
from .layers.PRM import Residual as ResidualPyramid
from .layers.Residual import Residual as Residual
from torch.autograd import Variable
from SPPE.src.opt import opt
from collections import defaultdict
class Hourglass(nn.Module):
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
super(Hourglass, self).__init__()
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
self.depth = n
self.nModules = nModules
self.nFeats = nFeats
self.net_type = net_type
self.B = B
self.C = C
self.inputResH = inputResH
self.inputResW = inputResW
self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
self.low1 = nn.Sequential(
nn.MaxPool2d(2),
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
)
if n > 1:
self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
else:
self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
self.upperBranch = self.up1
self.lowerBranch = nn.Sequential(
self.low1,
self.low2,
self.low3,
self.up2
)
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
layer_list = []
for i in range(self.nModules):
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
stride=1, net_type=self.net_type, useConv=useConv,
baseWidth=self.B, cardinality=self.C))
return nn.Sequential(*layer_list)
def forward(self, x: Variable):
up1 = self.upperBranch(x)
up2 = self.lowerBranch(x)
out = up1 + up2
return out
class PyraNet(nn.Module):
def __init__(self):
super(PyraNet, self).__init__()
B, C = opt.baseWidth, opt.cardinality
self.inputResH = opt.inputResH / 4
self.inputResW = opt.inputResW / 4
self.nStack = opt.nStack
self.cnv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(True)
)
self.r1 = nn.Sequential(
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
nn.MaxPool2d(2)
)
self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.preact = nn.Sequential(
self.cnv1,
self.r1,
self.r4,
self.r5
)
self.stack_layers = defaultdict(list)
for i in range(self.nStack):
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
lin = nn.Sequential(
hg,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True),
nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True)
)
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
self.stack_layers['lin'].append(lin)
self.stack_layers['out'].append(tmpOut)
if i < self.nStack - 1:
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
self.stack_layers['lin_'].append(lin_)
self.stack_layers['out_'].append(tmpOut_)
def forward(self, x: Variable):
out = []
inter = self.preact(x)
for i in range(self.nStack):
lin = self.stack_layers['lin'][i](inter)
tmpOut = self.stack_layers['out'][i](lin)
out.append(tmpOut)
if i < self.nStack - 1:
lin_ = self.stack_layers['lin_'][i](lin)
tmpOut_ = self.stack_layers['out_'][i](tmpOut)
inter = inter + lin_ + tmpOut_
return out
def createModel(**kw):
model = PyraNet()
return model

View File

@ -0,0 +1,236 @@
import torch.nn as nn
from .layers.PRM import Residual as ResidualPyramid
from .layers.Residual import Residual as Residual
from torch.autograd import Variable
import torch
from SPPE.src.opt import opt
import math
class Hourglass(nn.Module):
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
super(Hourglass, self).__init__()
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
self.depth = n
self.nModules = nModules
self.nFeats = nFeats
self.net_type = net_type
self.B = B
self.C = C
self.inputResH = inputResH
self.inputResW = inputResW
up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
low1 = nn.Sequential(
nn.MaxPool2d(2),
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
)
if n > 1:
low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
else:
low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
up2 = nn.UpsamplingNearest2d(scale_factor=2)
self.upperBranch = up1
self.lowerBranch = nn.Sequential(
low1,
low2,
low3,
up2
)
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
layer_list = []
for i in range(self.nModules):
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
stride=1, net_type=self.net_type, useConv=useConv,
baseWidth=self.B, cardinality=self.C))
return nn.Sequential(*layer_list)
def forward(self, x: Variable):
up1 = self.upperBranch(x)
up2 = self.lowerBranch(x)
# out = up1 + up2
out = torch.add(up1, up2)
return out
class PyraNet(nn.Module):
def __init__(self):
super(PyraNet, self).__init__()
B, C = opt.baseWidth, opt.cardinality
self.inputResH = opt.inputResH / 4
self.inputResW = opt.inputResW / 4
self.nStack = opt.nStack
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
cnv1 = nn.Sequential(
conv1,
nn.BatchNorm2d(64),
nn.ReLU(True)
)
r1 = nn.Sequential(
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
nn.MaxPool2d(2)
)
r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.preact = nn.Sequential(
cnv1,
r1,
r4,
r5
)
self.stack_lin = nn.ModuleList()
self.stack_out = nn.ModuleList()
self.stack_lin_ = nn.ModuleList()
self.stack_out_ = nn.ModuleList()
for i in range(self.nStack):
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
lin = nn.Sequential(
hg,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True),
conv1,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True)
)
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(tmpOut.weight)
self.stack_lin.append(lin)
self.stack_out.append(tmpOut)
if i < self.nStack - 1:
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(lin_.weight)
nn.init.xavier_normal(tmpOut_.weight)
self.stack_lin_.append(lin_)
self.stack_out_.append(tmpOut_)
def forward(self, x: Variable):
out = []
inter = self.preact(x)
for i in range(self.nStack):
lin = self.stack_lin[i](inter)
tmpOut = self.stack_out[i](lin)
out.append(tmpOut)
if i < self.nStack - 1:
lin_ = self.stack_lin_[i](lin)
tmpOut_ = self.stack_out_[i](tmpOut)
inter = inter + lin_ + tmpOut_
return out
class PyraNet_Inference(nn.Module):
def __init__(self):
super(PyraNet_Inference, self).__init__()
B, C = opt.baseWidth, opt.cardinality
self.inputResH = opt.inputResH / 4
self.inputResW = opt.inputResW / 4
self.nStack = opt.nStack
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
cnv1 = nn.Sequential(
conv1,
nn.BatchNorm2d(64),
nn.ReLU(True)
)
r1 = nn.Sequential(
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
nn.MaxPool2d(2)
)
r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
self.preact = nn.Sequential(
cnv1,
r1,
r4,
r5
)
self.stack_lin = nn.ModuleList()
self.stack_out = nn.ModuleList()
self.stack_lin_ = nn.ModuleList()
self.stack_out_ = nn.ModuleList()
for i in range(self.nStack):
hg = Hourglass(4, opt.nFeats, opt.nResidual,
self.inputResH, self.inputResW, 'preact', B, C)
conv1 = nn.Conv2d(opt.nFeats, opt.nFeats,
kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
lin = nn.Sequential(
hg,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True),
conv1,
nn.BatchNorm2d(opt.nFeats),
nn.ReLU(True)
)
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses,
kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(tmpOut.weight)
self.stack_lin.append(lin)
self.stack_out.append(tmpOut)
if i < self.nStack - 1:
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats,
kernel_size=1, stride=1, padding=0)
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats,
kernel_size=1, stride=1, padding=0)
if opt.init:
nn.init.xavier_normal(lin_.weight)
nn.init.xavier_normal(tmpOut_.weight)
self.stack_lin_.append(lin_)
self.stack_out_.append(tmpOut_)
def forward(self, x: Variable):
inter = self.preact(x)
for i in range(self.nStack):
lin = self.stack_lin[i](inter)
tmpOut = self.stack_out[i](lin)
out = tmpOut
if i < self.nStack - 1:
lin_ = self.stack_lin_[i](lin)
tmpOut_ = self.stack_out_[i](tmpOut)
inter = inter + lin_ + tmpOut_
return out
def createModel(**kw):
model = PyraNet()
return model
def createModel_Inference(**kw):
model = PyraNet_Inference()
return model

View File

@ -0,0 +1,23 @@
import torch.nn as nn
import torch.nn.functional as F
class DUC(nn.Module):
"""
INPUT: inplanes, planes, upscale_factor
OUTPUT: (planes // 4)* ht * wd
"""
def __init__(self, inplanes, planes, upscale_factor=2):
super(DUC, self).__init__()
self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.ReLU()
self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
x = self.pixel_shuffle(x)
return x

View File

@ -0,0 +1,135 @@
import torch.nn as nn
from .util_models import ConcatTable, CaddTable, Identity
import math
from opt import opt
class Residual(nn.Module):
def __init__(self, numIn, numOut, inputResH, inputResW, stride=1,
net_type='preact', useConv=False, baseWidth=9, cardinality=4):
super(Residual, self).__init__()
self.con = ConcatTable([convBlock(numIn, numOut, inputResH,
inputResW, net_type, baseWidth, cardinality, stride),
skipLayer(numIn, numOut, stride, useConv)])
self.cadd = CaddTable(True)
def forward(self, x):
out = self.con(x)
out = self.cadd(out)
return out
def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride):
numIn = int(numIn)
numOut = int(numOut)
addTable = ConcatTable()
s_list = []
if net_type != 'no_preact':
s_list.append(nn.BatchNorm2d(numIn))
s_list.append(nn.ReLU(True))
conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
s_list.append(conv1)
s_list.append(nn.BatchNorm2d(numOut // 2))
s_list.append(nn.ReLU(True))
conv2 = nn.Conv2d(numOut // 2, numOut // 2,
kernel_size=3, stride=stride, padding=1)
if opt.init:
nn.init.xavier_normal(conv2.weight)
s_list.append(conv2)
s = nn.Sequential(*s_list)
addTable.add(s)
D = math.floor(numOut // baseWidth)
C = cardinality
s_list = []
if net_type != 'no_preact':
s_list.append(nn.BatchNorm2d(numIn))
s_list.append(nn.ReLU(True))
conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C))
s_list.append(conv1)
s_list.append(nn.BatchNorm2d(D))
s_list.append(nn.ReLU(True))
s_list.append(pyramid(D, C, inputResH, inputResW))
s_list.append(nn.BatchNorm2d(D))
s_list.append(nn.ReLU(True))
a = nn.Conv2d(D, numOut // 2, kernel_size=1)
a.nBranchIn = C
if opt.init:
nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C))
s_list.append(a)
s = nn.Sequential(*s_list)
addTable.add(s)
elewiswAdd = nn.Sequential(
addTable,
CaddTable(False)
)
conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
if opt.init:
nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2))
model = nn.Sequential(
elewiswAdd,
nn.BatchNorm2d(numOut // 2),
nn.ReLU(True),
conv2
)
return model
def pyramid(D, C, inputResH, inputResW):
pyraTable = ConcatTable()
sc = math.pow(2, 1 / C)
for i in range(C):
scaled = 1 / math.pow(sc, i + 1)
conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1)
if opt.init:
nn.init.xavier_normal(conv1.weight)
s = nn.Sequential(
nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)),
conv1,
nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW))))
pyraTable.add(s)
pyra = nn.Sequential(
pyraTable,
CaddTable(False)
)
return pyra
class skipLayer(nn.Module):
def __init__(self, numIn, numOut, stride, useConv):
super(skipLayer, self).__init__()
self.identity = False
if numIn == numOut and stride == 1 and not useConv:
self.identity = True
else:
conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
self.m = nn.Sequential(
nn.BatchNorm2d(numIn),
nn.ReLU(True),
conv1
)
def forward(self, x):
if self.identity:
return x
else:
return self.m(x)

View File

@ -0,0 +1,54 @@
import torch.nn as nn
import math
from .util_models import ConcatTable, CaddTable, Identity
from opt import opt
def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw):
con = ConcatTable([convBlock(numIn, numOut, stride, net_type),
skipLayer(numIn, numOut, stride, useConv)])
cadd = CaddTable(True)
return nn.Sequential(con, cadd)
def convBlock(numIn, numOut, stride, net_type):
s_list = []
if net_type != 'no_preact':
s_list.append(nn.BatchNorm2d(numIn))
s_list.append(nn.ReLU(True))
conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
s_list.append(conv1)
s_list.append(nn.BatchNorm2d(numOut // 2))
s_list.append(nn.ReLU(True))
conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1)
if opt.init:
nn.init.xavier_normal(conv2.weight)
s_list.append(conv2)
s_list.append(nn.BatchNorm2d(numOut // 2))
s_list.append(nn.ReLU(True))
conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
if opt.init:
nn.init.xavier_normal(conv3.weight)
s_list.append(conv3)
return nn.Sequential(*s_list)
def skipLayer(numIn, numOut, stride, useConv):
if numIn == numOut and stride == 1 and not useConv:
return Identity()
else:
conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
if opt.init:
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
return nn.Sequential(
nn.BatchNorm2d(numIn),
nn.ReLU(True),
conv1
)

View File

@ -0,0 +1,82 @@
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = F.relu(self.bn1(self.conv1(x)), inplace=True)
out = F.relu(self.bn2(self.conv2(out)), inplace=True)
out = self.bn3(self.conv3(out))
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = F.relu(out, inplace=True)
return out
class ResNet(nn.Module):
""" Resnet """
def __init__(self, architecture):
super(ResNet, self).__init__()
assert architecture in ["resnet50", "resnet101"]
self.inplanes = 64
self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
self.block = Bottleneck
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
self.layer1 = self.make_layer(self.block, 64, self.layers[0])
self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2)
self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2)
self.layer4 = self.make_layer(
self.block, 512, self.layers[3], stride=2)
def forward(self, x):
x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def stages(self):
return [self.layer1, self.layer2, self.layer3, self.layer4]
def make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)

View File

@ -0,0 +1,99 @@
import torch.nn as nn
from .SE_module import SELayer
import torch.nn.functional as F
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
if reduction:
self.se = SELayer(planes * 4)
self.reduc = reduction
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = F.relu(self.bn1(self.conv1(x)), inplace=True)
out = F.relu(self.bn2(self.conv2(out)), inplace=True)
out = self.conv3(out)
out = self.bn3(out)
if self.reduc:
out = self.se(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = F.relu(out)
return out
class SEResnet(nn.Module):
""" SEResnet """
def __init__(self, architecture):
super(SEResnet, self).__init__()
assert architecture in ["resnet50", "resnet101"]
self.inplanes = 64
self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
self.block = Bottleneck
self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self.make_layer(self.block, 64, self.layers[0])
self.layer2 = self.make_layer(
self.block, 128, self.layers[1], stride=2)
self.layer3 = self.make_layer(
self.block, 256, self.layers[2], stride=2)
self.layer4 = self.make_layer(
self.block, 512, self.layers[3], stride=2)
def forward(self, x):
x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4
x = self.layer1(x) # 256 * h/4 * w/4
x = self.layer2(x) # 512 * h/8 * w/8
x = self.layer3(x) # 1024 * h/16 * w/16
x = self.layer4(x) # 2048 * h/32 * w/32
return x
def stages(self):
return [self.layer1, self.layer2, self.layer3, self.layer4]
def make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
if downsample is not None:
layers.append(block(self.inplanes, planes, stride, downsample, reduction=True))
else:
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)

View File

@ -0,0 +1,19 @@
from torch import nn
class SELayer(nn.Module):
def __init__(self, channel, reduction=1):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y

View File

@ -0,0 +1 @@
from . import *

View File

@ -0,0 +1,37 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
class ConcatTable(nn.Module):
def __init__(self, module_list=None):
super(ConcatTable, self).__init__()
self.modules_list = nn.ModuleList(module_list)
def forward(self, x: Variable):
y = []
for i in range(len(self.modules_list)):
y.append(self.modules_list[i](x))
return y
def add(self, module):
self.modules_list.append(module)
class CaddTable(nn.Module):
def __init__(self, inplace=False):
super(CaddTable, self).__init__()
self.inplace = inplace
def forward(self, x: Variable or list):
return torch.stack(x, 0).sum(0)
class Identity(nn.Module):
def __init__(self, params=None):
super(Identity, self).__init__()
self.params = nn.ParameterList(params)
def forward(self, x: Variable or list):
return x

View File

@ -0,0 +1,115 @@
"""import argparse
import torch
parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training')
parser.add_argument("--return_counts", type=bool, default=True)
parser.add_argument("--mode", default='client')
parser.add_argument("--port", default=52162)
"----------------------------- General options -----------------------------"
parser.add_argument('--expID', default='default', type=str,
help='Experiment ID')
parser.add_argument('--dataset', default='coco', type=str,
help='Dataset choice: mpii | coco')
parser.add_argument('--nThreads', default=30, type=int,
help='Number of data loading threads')
parser.add_argument('--debug', default=False, type=bool,
help='Print the debug information')
parser.add_argument('--snapshot', default=1, type=int,
help='How often to take a snapshot of the model (0 = never)')
"----------------------------- AlphaPose options -----------------------------"
parser.add_argument('--addDPG', default=False, type=bool,
help='Train with data augmentation')
"----------------------------- Model options -----------------------------"
parser.add_argument('--netType', default='hgPRM', type=str,
help='Options: hgPRM | resnext')
parser.add_argument('--loadModel', default=None, type=str,
help='Provide full path to a previously trained model')
parser.add_argument('--Continue', default=False, type=bool,
help='Pick up where an experiment left off')
parser.add_argument('--nFeats', default=256, type=int,
help='Number of features in the hourglass')
parser.add_argument('--nClasses', default=33, type=int,
help='Number of output channel')
parser.add_argument('--nStack', default=8, type=int,
help='Number of hourglasses to stack')
"----------------------------- Hyperparameter options -----------------------------"
parser.add_argument('--LR', default=2.5e-4, type=float,
help='Learning rate')
parser.add_argument('--momentum', default=0, type=float,
help='Momentum')
parser.add_argument('--weightDecay', default=0, type=float,
help='Weight decay')
parser.add_argument('--crit', default='MSE', type=str,
help='Criterion type')
parser.add_argument('--optMethod', default='rmsprop', type=str,
help='Optimization method: rmsprop | sgd | nag | adadelta')
"----------------------------- Training options -----------------------------"
parser.add_argument('--nEpochs', default=50, type=int,
help='Number of hourglasses to stack')
parser.add_argument('--epoch', default=0, type=int,
help='Current epoch')
parser.add_argument('--trainBatch', default=40, type=int,
help='Train-batch size')
parser.add_argument('--validBatch', default=20, type=int,
help='Valid-batch size')
parser.add_argument('--trainIters', default=0, type=int,
help='Total train iters')
parser.add_argument('--valIters', default=0, type=int,
help='Total valid iters')
parser.add_argument('--init', default=None, type=str,
help='Initialization')
"----------------------------- Data options -----------------------------"
parser.add_argument('--inputResH', default=384, type=int,
help='Input image height')
parser.add_argument('--inputResW', default=320, type=int,
help='Input image width')
parser.add_argument('--outputResH', default=96, type=int,
help='Output heatmap height')
parser.add_argument('--outputResW', default=80, type=int,
help='Output heatmap width')
parser.add_argument('--scale', default=0.25, type=float,
help='Degree of scale augmentation')
parser.add_argument('--rotate', default=30, type=float,
help='Degree of rotation augmentation')
parser.add_argument('--hmGauss', default=1, type=int,
help='Heatmap gaussian size')
"----------------------------- PyraNet options -----------------------------"
parser.add_argument('--baseWidth', default=9, type=int,
help='Heatmap gaussian size')
parser.add_argument('--cardinality', default=5, type=int,
help='Heatmap gaussian size')
parser.add_argument('--nResidual', default=1, type=int,
help='Number of residual modules at each location in the pyranet')
"----------------------------- Distribution options -----------------------------"
parser.add_argument('--dist', dest='dist', type=int, default=1,
help='distributed training or not')
parser.add_argument('--backend', dest='backend', type=str, default='gloo',
help='backend for distributed training')
parser.add_argument('--port', dest='port',
help='port of server')
opt = parser.parse_args()"""
"""if opt.Continue:
opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID))
opt.Continue = True
opt.nEpochs = 50
print("--- Continue ---")"""
class opt:
nClasses = 33
inputResH = 384
inputResW = 320
outputResH = 96
outputResW = 80
scale = 0.25
rotate = 30
hmGauss = 1

View File

@ -0,0 +1 @@
from . import *

View File

@ -0,0 +1,85 @@
import os
import h5py
from functools import reduce
import torch.utils.data as data
from ..pose import generateSampleBox
from opt import opt
class Mscoco(data.Dataset):
def __init__(self, train=True, sigma=1,
scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
self.img_folder = '../data/coco/images' # root image folders
self.is_train = train # training set or test set
self.inputResH = opt.inputResH
self.inputResW = opt.inputResW
self.outputResH = opt.outputResH
self.outputResW = opt.outputResW
self.sigma = sigma
self.scale_factor = scale_factor
self.rot_factor = rot_factor
self.label_type = label_type
self.nJoints_coco = 17
self.nJoints_mpii = 16
self.nJoints = 33
self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17)
self.flipRef = ((2, 3), (4, 5), (6, 7),
(8, 9), (10, 11), (12, 13),
(14, 15), (16, 17))
# create train/val split
with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
# train
self.imgname_coco_train = annot['imgname'][:-5887]
self.bndbox_coco_train = annot['bndbox'][:-5887]
self.part_coco_train = annot['part'][:-5887]
# val
self.imgname_coco_val = annot['imgname'][-5887:]
self.bndbox_coco_val = annot['bndbox'][-5887:]
self.part_coco_val = annot['part'][-5887:]
self.size_train = self.imgname_coco_train.shape[0]
self.size_val = self.imgname_coco_val.shape[0]
def __getitem__(self, index):
sf = self.scale_factor
if self.is_train:
part = self.part_coco_train[index]
bndbox = self.bndbox_coco_train[index]
imgname = self.imgname_coco_train[index]
else:
part = self.part_coco_val[index]
bndbox = self.bndbox_coco_val[index]
imgname = self.imgname_coco_val[index]
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
img_path = os.path.join(self.img_folder, imgname)
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
'coco', sf, self, train=self.is_train)
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
label = []
for i in range(opt.nStack):
if i < 2:
# label.append(out_bigcircle.clone())
label.append(out.clone())
elif i < 4:
# label.append(out_smallcircle.clone())
label.append(out.clone())
else:
label.append(out.clone())
return inp, label, setMask, 'coco'
def __len__(self):
if self.is_train:
return self.size_train
else:
return self.size_val

View File

@ -0,0 +1,122 @@
import os
import h5py
from functools import reduce
import torch.utils.data as data
from ..pose import generateSampleBox
from opt import opt
class Mscoco(data.Dataset):
def __init__(self, train=True, sigma=1,
scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
self.img_folder = '../data/' # root image folders
self.is_train = train # training set or test set
self.inputResH = 320
self.inputResW = 256
self.outputResH = 80
self.outputResW = 64
self.sigma = sigma
self.scale_factor = (0.2, 0.3)
self.rot_factor = rot_factor
self.label_type = label_type
self.nJoints_coco = 17
self.nJoints_mpii = 16
self.nJoints = 33
self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO
9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, # MPII
28, 29, 32, 33)
self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO
(8, 9), (10, 11), (12, 13),
(14, 15), (16, 17),
(18, 23), (19, 22), (20, 21), # MPII
(28, 33), (29, 32), (30, 31))
'''
Create train/val split
'''
# COCO
with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
# train
self.imgname_coco_train = annot['imgname'][:-5887]
self.bndbox_coco_train = annot['bndbox'][:-5887]
self.part_coco_train = annot['part'][:-5887]
# val
self.imgname_coco_val = annot['imgname'][-5887:]
self.bndbox_coco_val = annot['bndbox'][-5887:]
self.part_coco_val = annot['part'][-5887:]
# MPII
with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
# train
self.imgname_mpii_train = annot['imgname'][:-1358]
self.bndbox_mpii_train = annot['bndbox'][:-1358]
self.part_mpii_train = annot['part'][:-1358]
# val
self.imgname_mpii_val = annot['imgname'][-1358:]
self.bndbox_mpii_val = annot['bndbox'][-1358:]
self.part_mpii_val = annot['part'][-1358:]
self.size_coco_train = self.imgname_coco_train.shape[0]
self.size_coco_val = self.imgname_coco_val.shape[0]
self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0]
self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0]
self.train, self.valid = [], []
def __getitem__(self, index):
sf = self.scale_factor
if self.is_train and index < self.size_coco_train: # COCO
part = self.part_coco_train[index]
bndbox = self.bndbox_coco_train[index]
imgname = self.imgname_coco_train[index]
imgset = 'coco'
elif self.is_train: # MPII
part = self.part_mpii_train[index - self.size_coco_train]
bndbox = self.bndbox_mpii_train[index - self.size_coco_train]
imgname = self.imgname_mpii_train[index - self.size_coco_train]
imgset = 'mpii'
elif index < self.size_coco_val:
part = self.part_coco_val[index]
bndbox = self.bndbox_coco_val[index]
imgname = self.imgname_coco_val[index]
imgset = 'coco'
else:
part = self.part_mpii_val[index - self.size_coco_val]
bndbox = self.bndbox_mpii_val[index - self.size_coco_val]
imgname = self.imgname_mpii_val[index - self.size_coco_val]
imgset = 'mpii'
if imgset == 'coco':
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
else:
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
img_path = os.path.join(self.img_folder, imgset, 'images', imgname)
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
imgset, sf, self, train=self.is_train)
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
label = []
for i in range(opt.nStack):
if i < 2:
# label.append(out_bigcircle.clone())
label.append(out.clone())
elif i < 4:
# label.append(out_smallcircle.clone())
label.append(out.clone())
else:
label.append(out.clone())
return inp, label, setMask, imgset
def __len__(self):
if self.is_train:
return self.size_train
else:
return self.size_val

View File

@ -0,0 +1,84 @@
import os
import h5py
from functools import reduce
import torch.utils.data as data
from ..pose import generateSampleBox
from opt import opt
class Mpii(data.Dataset):
def __init__(self, train=True, sigma=1,
scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
self.img_folder = '../data/mpii/images' # root image folders
self.is_train = train # training set or test set
self.inputResH = 320
self.inputResW = 256
self.outputResH = 80
self.outputResW = 64
self.sigma = sigma
self.scale_factor = (0.2, 0.3)
self.rot_factor = rot_factor
self.label_type = label_type
self.nJoints_mpii = 16
self.nJoints = 16
self.accIdxs = (1, 2, 3, 4, 5, 6,
11, 12, 15, 16)
self.flipRef = ((1, 6), (2, 5), (3, 4),
(11, 16), (12, 15), (13, 14))
# create train/val split
with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
# train
self.imgname_mpii_train = annot['imgname'][:-1358]
self.bndbox_mpii_train = annot['bndbox'][:-1358]
self.part_mpii_train = annot['part'][:-1358]
# val
self.imgname_mpii_val = annot['imgname'][-1358:]
self.bndbox_mpii_val = annot['bndbox'][-1358:]
self.part_mpii_val = annot['part'][-1358:]
self.size_train = self.imgname_mpii_train.shape[0]
self.size_val = self.imgname_mpii_val.shape[0]
self.train, self.valid = [], []
def __getitem__(self, index):
sf = self.scale_factor
if self.is_train:
part = self.part_mpii_train[index]
bndbox = self.bndbox_mpii_train[index]
imgname = self.imgname_mpii_train[index]
else:
part = self.part_mpii_val[index]
bndbox = self.bndbox_mpii_val[index]
imgname = self.imgname_mpii_val[index]
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
img_path = os.path.join(self.img_folder, imgname)
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
'mpii', sf, self, train=self.is_train)
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
label = []
for i in range(opt.nStack):
if i < 2:
#label.append(out_bigcircle.clone())
label.append(out.clone())
elif i < 4:
#label.append(out_smallcircle.clone())
label.append(out.clone())
else:
label.append(out.clone())
return inp, label, setMask
def __len__(self):
if self.is_train:
return self.size_train
else:
return self.size_val

View File

@ -0,0 +1,216 @@
from ..opt import opt
try:
from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
except ImportError:
from .img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
import torch
class DataLogger(object):
def __init__(self):
self.clear()
def clear(self):
self.value = 0
self.sum = 0
self.cnt = 0
self.avg = 0
def update(self, value, n=1):
self.value = value
self.sum += value * n
self.cnt += n
self._cal_avg()
def _cal_avg(self):
self.avg = self.sum / self.cnt
def accuracy(output, label, dataset):
if type(output) == list:
return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset)
else:
return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs)
def heatmapAccuracy(output, label, idxs):
preds = getPreds(output)
gt = getPreds(label)
norm = torch.ones(preds.size(0)) * opt.outputResH / 10
dists = calc_dists(preds, gt, norm)
#print(dists)
acc = torch.zeros(len(idxs) + 1)
avg_acc = 0
cnt = 0
for i in range(len(idxs)):
acc[i + 1] = dist_acc(dists[idxs[i] - 1])
if acc[i + 1] >= 0:
avg_acc = avg_acc + acc[i + 1]
cnt += 1
if cnt != 0:
acc[0] = avg_acc / cnt
return acc
def getPreds(hm):
""" get predictions from score maps in torch Tensor
return type: torch.LongTensor
"""
assert hm.dim() == 4, 'Score maps should be 4-dim'
maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2)
maxval = maxval.view(hm.size(0), hm.size(1), 1)
idx = idx.view(hm.size(0), hm.size(1), 1) + 1
preds = idx.repeat(1, 1, 2).float()
preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3)
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3))
# pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
# preds *= pred_mask
return preds
def calc_dists(preds, target, normalize):
preds = preds.float().clone()
target = target.float().clone()
dists = torch.zeros(preds.size(1), preds.size(0))
for n in range(preds.size(0)):
for c in range(preds.size(1)):
if target[n, c, 0] > 0 and target[n, c, 1] > 0:
dists[c, n] = torch.dist(
preds[n, c, :], target[n, c, :]) / normalize[n]
else:
dists[c, n] = -1
return dists
def dist_acc(dists, thr=0.5):
""" Return percentage below threshold while ignoring values with a -1 """
if dists.ne(-1).sum() > 0:
return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum()
else:
return - 1
def postprocess(output):
p = getPreds(output)
for i in range(p.size(0)):
for j in range(p.size(1)):
hm = output[i][j]
pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1]))
if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
p[i][j] += diff.sign() * 0.25
p -= 0.5
return p
def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
"""
Get keypoint location from heatmaps
"""
assert hms.dim() == 4, 'Score maps should be 4-dim'
maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2)
maxval = maxval.view(hms.size(0), hms.size(1), 1)
idx = idx.view(hms.size(0), hms.size(1), 1) + 1
preds = idx.repeat(1, 1, 2).float()
preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
preds *= pred_mask
# Very simple post-processing step to improve performance at tight PCK thresholds
"""for i in range(preds.size(0)):
for j in range(preds.size(1)):
hm = hms[i][j]
pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1])))
if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
diff = torch.Tensor(
(hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
preds[i][j] += diff.sign() * 0.25
preds += 0.2"""
preds_tf = torch.zeros(preds.size())
preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
return preds, preds_tf, maxval
def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
assert hms.dim() == 4, 'Score maps should be 4-dim'
preds_img = {}
hms = hms.numpy()
for n in range(hms.shape[0]): # Number of samples
preds_img[n] = {} # Result of sample: n
for k in range(hms.shape[1]): # Number of keypoints
preds_img[n][k] = [] # Result of keypoint: k
hm = hms[n][k]
candidate_points = findPeak(hm)
res_pt = processPeaks(candidate_points, hm,
pt1[n], pt2[n], inpH, inpW, resH, resW)
preds_img[n][k] = res_pt
return preds_img
def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW):
"""
Get keypoint location from heatmaps
pt1, pt2: [n, 2]
OUTPUT:
preds: [n, 17, 2]
"""
assert hms.dim() == 4, 'Score maps should be 4-dim'
flat_hms = hms.view(hms.size(0), hms.size(1), -1)
maxval, idx = torch.max(flat_hms, 2)
maxval = maxval.view(hms.size(0), hms.size(1), 1)
idx = idx.view(hms.size(0), hms.size(1), 1) + 1
preds = idx.repeat(1, 1, 2).float()
preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
preds *= pred_mask
# Very simple post-processing step to improve performance at tight PCK thresholds
idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1)
idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1)
idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1)
idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1)
maxval_up = flat_hms.gather(2, idx_up)
maxval_down = flat_hms.gather(2, idx_down)
maxval_left = flat_hms.gather(2, idx_left)
maxval_right = flat_hms.gather(2, idx_right)
diff1 = (maxval_right - maxval_left).sign() * 0.25
diff2 = (maxval_down - maxval_up).sign() * 0.25
diff1[idx_up <= hms.size(3)] = 0
diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0
diff2[(idx_left % hms.size(3)) == 0] = 0
diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0
preds[:, :, 0] += diff1.squeeze(-1)
preds[:, :, 1] += diff2.squeeze(-1)
preds_tf = torch.zeros(preds.size())
preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
return preds, preds_tf, maxval

View File

@ -0,0 +1,534 @@
import numpy as np
import cv2
import torch
import scipy.misc
from torchvision import transforms
import torch.nn.functional as F
from scipy.ndimage import maximum_filter
from PIL import Image
from copy import deepcopy
import matplotlib
#matplotlib.use('agg')
import matplotlib.pyplot as plt
def im_to_torch(img):
img = np.array(img)
img = np.transpose(img, (2, 0, 1)) # C*H*W
img = to_torch(img).float()
if img.max() > 1:
img /= 255
return img
def torch_to_im(img):
img = to_numpy(img)
img = np.transpose(img, (1, 2, 0)) # C*H*W
return img
def load_image(img_path):
# H x W x C => C x H x W
return im_to_torch(scipy.misc.imread(img_path, mode='RGB'))
def to_numpy(tensor):
if torch.is_tensor(tensor):
return tensor.cpu().numpy()
elif type(tensor).__module__ != 'numpy':
raise ValueError("Cannot convert {} to numpy array"
.format(type(tensor)))
return tensor
def to_torch(ndarray):
if type(ndarray).__module__ == 'numpy':
return torch.from_numpy(ndarray)
elif not torch.is_tensor(ndarray):
raise ValueError("Cannot convert {} to torch tensor"
.format(type(ndarray)))
return ndarray
def drawCircle(img, pt, sigma):
img = to_numpy(img)
tmpSize = 3 * sigma
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return to_torch(img)
# Generate gaussian
size = 2 * tmpSize + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
sigma = size / 4.0
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
g[g > 0] = 1
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return to_torch(img)
def drawGaussian(img, pt, sigma):
img = to_numpy(img)
tmpSize = 3 * sigma
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return to_torch(img)
# Generate gaussian
size = 2 * tmpSize + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
sigma = size / 4.0
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return to_torch(img)
def drawBigCircle(img, pt, sigma):
img = to_numpy(img)
tmpSize = 3 * sigma
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return to_torch(img)
# Generate gaussian
size = 2 * tmpSize + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
sigma = size / 4.0
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
g[g > 0.4] = 1
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return to_torch(img)
def drawSmallCircle(img, pt, sigma):
img = to_numpy(img)
tmpSize = 3 * sigma
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return to_torch(img)
# Generate gaussian
size = 2 * tmpSize + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
sigma = size / 4.0
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
g[g > 0.5] = 1
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return to_torch(img)
def transformBox(pt, ul, br, inpH, inpW, resH, resW):
center = torch.zeros(2)
center[0] = (br[0] - 1 - ul[0]) / 2
center[1] = (br[1] - 1 - ul[1]) / 2
lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
lenW = lenH * inpW / inpH
_pt = torch.zeros(2)
_pt[0] = pt[0] - ul[0]
_pt[1] = pt[1] - ul[1]
# Move to center
_pt[0] = _pt[0] + max(0, (lenW - 1) / 2 - center[0])
_pt[1] = _pt[1] + max(0, (lenH - 1) / 2 - center[1])
pt = (_pt * resH) / lenH
pt[0] = round(float(pt[0]))
pt[1] = round(float(pt[1]))
return pt.int()
def transformBoxInvert(pt, ul, br, inpH, inpW, resH, resW):
center = np.zeros(2)
center[0] = (br[0] - 1 - ul[0]) / 2
center[1] = (br[1] - 1 - ul[1]) / 2
lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
lenW = lenH * inpW / inpH
_pt = (pt * lenH) / resH
_pt[0] = _pt[0] - max(0, (lenW - 1) / 2 - center[0])
_pt[1] = _pt[1] - max(0, (lenH - 1) / 2 - center[1])
new_point = np.zeros(2)
new_point[0] = _pt[0] + ul[0]
new_point[1] = _pt[1] + ul[1]
return new_point
def transformBoxInvert_batch(pt, ul, br, inpH, inpW, resH, resW):
"""
pt: [n, 17, 2]
ul: [n, 2]
br: [n, 2]
"""
num_pt = pt.shape[1]
center = (br - 1 - ul) / 2
size = br - ul
size[:, 0] *= (inpH / inpW)
lenH, _ = torch.max(size, dim=1) # [n,]
lenW = lenH * (inpW / inpH)
_pt = (pt * lenH[:, np.newaxis, np.newaxis]) / resH
_pt[:, :, 0] = _pt[:, :, 0] - ((lenW[:, np.newaxis].repeat(1, num_pt) - 1) /
2 - center[:, 0].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
_pt[:, :, 1] = _pt[:, :, 1] - ((lenH[:, np.newaxis].repeat(1, num_pt) - 1) /
2 - center[:, 1].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
new_point = torch.zeros(pt.size())
new_point[:, :, 0] = _pt[:, :, 0] + ul[:, 0].unsqueeze(-1).repeat(1, num_pt)
new_point[:, :, 1] = _pt[:, :, 1] + ul[:, 1].unsqueeze(-1).repeat(1, num_pt)
return new_point
def cropBox(img, ul, br, resH, resW):
ul = ul.int()
br = (br - 1).int()
# br = br.int()
lenH = max((br[1] - ul[1]).item(), (br[0] - ul[0]).item() * resH / resW)
lenW = lenH * resW / resH
if img.dim() == 2:
img = img[np.newaxis, :]
box_shape = [(br[1] - ul[1]).item(), (br[0] - ul[0]).item()]
pad_size = [(lenH - box_shape[0]) // 2, (lenW - box_shape[1]) // 2]
# Padding Zeros
if ul[1] > 0:
img[:, :ul[1], :] = 0
if ul[0] > 0:
img[:, :, :ul[0]] = 0
if br[1] < img.shape[1] - 1:
img[:, br[1] + 1:, :] = 0
if br[0] < img.shape[2] - 1:
img[:, :, br[0] + 1:] = 0
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = np.array(
[ul[0] - pad_size[1], ul[1] - pad_size[0]], np.float32)
src[1, :] = np.array(
[br[0] + pad_size[1], br[1] + pad_size[0]], np.float32)
dst[0, :] = 0
dst[1, :] = np.array([resW - 1, resH - 1], np.float32)
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
dst_img = cv2.warpAffine(torch_to_im(img), trans,
(resW, resH), flags=cv2.INTER_LINEAR)
return im_to_torch(torch.Tensor(dst_img))
def cv_rotate(img, rot, resW, resH):
center = np.array((resW - 1, resH - 1)) / 2
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, (resH - 1) * -0.5], rot_rad)
dst_dir = np.array([0, (resH - 1) * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center
src[1, :] = center + src_dir
dst[0, :] = [(resW - 1) * 0.5, (resH - 1) * 0.5]
dst[1, :] = np.array([(resW - 1) * 0.5, (resH - 1) * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
dst_img = cv2.warpAffine(torch_to_im(img), trans,
(resW, resH), flags=cv2.INTER_LINEAR)
return im_to_torch(torch.Tensor(dst_img))
def flip(x):
assert (x.dim() == 3 or x.dim() == 4)
dim = x.dim() - 1
if '0.4.1' in torch.__version__ or '1.0' in torch.__version__:
return x.flip(dims=(dim,))
else:
is_cuda = False
if x.is_cuda:
is_cuda = True
x = x.cpu()
x = x.numpy().copy()
if x.ndim == 3:
x = np.transpose(np.fliplr(np.transpose(x, (0, 2, 1))), (0, 2, 1))
elif x.ndim == 4:
for i in range(x.shape[0]):
x[i] = np.transpose(
np.fliplr(np.transpose(x[i], (0, 2, 1))), (0, 2, 1))
# x = x.swapaxes(dim, 0)
# x = x[::-1, ...]
# x = x.swapaxes(0, dim)
x = torch.from_numpy(x.copy())
if is_cuda:
x = x.cuda()
return x
def shuffleLR(x, dataset):
flipRef = dataset.flipRef
assert (x.dim() == 3 or x.dim() == 4)
for pair in flipRef:
dim0, dim1 = pair
dim0 -= 1
dim1 -= 1
if x.dim() == 4:
tmp = x[:, dim1].clone()
x[:, dim1] = x[:, dim0].clone()
x[:, dim0] = tmp.clone()
#x[:, dim0], x[:, dim1] = deepcopy((x[:, dim1], x[:, dim0]))
else:
tmp = x[dim1].clone()
x[dim1] = x[dim0].clone()
x[dim0] = tmp.clone()
#x[dim0], x[dim1] = deepcopy((x[dim1], x[dim0]))
return x
def drawMPII(inps, preds):
assert inps.dim() == 4
p_color = ['g', 'b', 'purple', 'b', 'purple',
'y', 'o', 'y', 'o', 'y', 'o',
'pink', 'r', 'pink', 'r', 'pink', 'r']
p_color = ['r', 'r', 'r', 'b', 'b', 'b',
'black', 'black', 'black', 'black',
'y', 'y', 'white', 'white', 'g', 'g']
nImg = inps.size(0)
imgs = []
for n in range(nImg):
img = to_numpy(inps[n])
img = np.transpose(img, (1, 2, 0))
imgs.append(img)
fig = plt.figure()
plt.imshow(imgs[0])
ax = fig.add_subplot(1, 1, 1)
#print(preds.shape)
for p in range(16):
x, y = preds[0][p]
cor = (round(x), round(y)), 10
ax.add_patch(plt.Circle(*cor, color=p_color[p]))
plt.axis('off')
plt.show()
return imgs
def drawCOCO(inps, preds, scores):
assert inps.dim() == 4
p_color = ['g', 'b', 'purple', 'b', 'purple',
'y', 'orange', 'y', 'orange', 'y', 'orange',
'pink', 'r', 'pink', 'r', 'pink', 'r']
nImg = inps.size(0)
imgs = []
for n in range(nImg):
img = to_numpy(inps[n])
img = np.transpose(img, (1, 2, 0))
imgs.append(img)
fig = plt.figure()
plt.imshow(imgs[0])
ax = fig.add_subplot(1, 1, 1)
#print(preds.shape)
for p in range(17):
if scores[0][p][0] < 0.2:
continue
x, y = preds[0][p]
cor = (round(x), round(y)), 3
ax.add_patch(plt.Circle(*cor, color=p_color[p]))
plt.axis('off')
plt.show()
return imgs
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def findPeak(hm):
mx = maximum_filter(hm, size=5)
idx = zip(*np.where((mx == hm) * (hm > 0.1)))
candidate_points = []
for (y, x) in idx:
candidate_points.append([x, y, hm[y][x]])
if len(candidate_points) == 0:
return torch.zeros(0)
candidate_points = np.array(candidate_points)
candidate_points = candidate_points[np.lexsort(-candidate_points.T)]
return torch.Tensor(candidate_points)
def processPeaks(candidate_points, hm, pt1, pt2, inpH, inpW, resH, resW):
# type: (Tensor, Tensor, Tensor, Tensor, float, float, float, float) -> List[Tensor]
if candidate_points.shape[0] == 0: # Low Response
maxval = np.max(hm.reshape(1, -1), 1)
idx = np.argmax(hm.reshape(1, -1), 1)
x = idx % resW
y = int(idx / resW)
candidate_points = np.zeros((1, 3))
candidate_points[0, 0:1] = x
candidate_points[0, 1:2] = y
candidate_points[0, 2:3] = maxval
res_pts = []
for i in range(candidate_points.shape[0]):
x, y, maxval = candidate_points[i][0], candidate_points[i][1], candidate_points[i][2]
if bool(maxval < 0.05) and len(res_pts) > 0:
pass
else:
if bool(x > 0) and bool(x < resW - 2):
if bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] > 0):
x += 0.25
elif bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] < 0):
x -= 0.25
if bool(y > 0) and bool(y < resH - 2):
if bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] > 0):
y += (0.25 * inpH / inpW)
elif bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] < 0):
y -= (0.25 * inpH / inpW)
#pt = torch.zeros(2)
pt = np.zeros(2)
pt[0] = x + 0.2
pt[1] = y + 0.2
pt = transformBoxInvert(pt, pt1, pt2, inpH, inpW, resH, resW)
res_pt = np.zeros(3)
res_pt[:2] = pt
res_pt[2] = maxval
res_pts.append(res_pt)
if maxval < 0.05:
break
return res_pts
def crop_dets(img, boxes, height, width):
img = im_to_torch(img)
img_h = img.size(1)
img_w = img.size(2)
img[0].add_(-0.406)
img[1].add_(-0.457)
img[2].add_(-0.480)
inps = torch.zeros(len(boxes), 3, height, width)
pt1 = torch.zeros(len(boxes), 2)
pt2 = torch.zeros(len(boxes), 2)
for i, box in enumerate(boxes):
upLeft = torch.Tensor((float(box[0]), float(box[1])))
bottomRight = torch.Tensor((float(box[2]), float(box[3])))
h = bottomRight[1] - upLeft[1]
w = bottomRight[0] - upLeft[0]
if w > 100:
scaleRate = 0.2
else:
scaleRate = 0.3
upLeft[0] = max(0, upLeft[0] - w * scaleRate / 2)
upLeft[1] = max(0, upLeft[1] - h * scaleRate / 2)
bottomRight[0] = max(min(img_w - 1, bottomRight[0] + w * scaleRate / 2), upLeft[0] + 5)
bottomRight[1] = max(min(img_h - 1, bottomRight[1] + h * scaleRate / 2), upLeft[1] + 5)
inps[i] = cropBox(img.clone(), upLeft, bottomRight, height, width)
pt1[i] = upLeft
pt2[i] = bottomRight
return inps, pt1, pt2

View File

@ -0,0 +1,169 @@
from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate,
cropBox, transformBox, flip, shuffleLR, drawCOCO)
from utils import getPrediction
import torch
import numpy as np
import random
from SPPE.src.opt import opt
def rnd(x):
return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x))
def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True):
nJoints_coco = 17
nJoints_mpii = 16
img = load_image(img_path)
if train:
img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
ori_img = img.clone()
img[0].add_(-0.406)
img[1].add_(-0.457)
img[2].add_(-0.480)
upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1])))
bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3])))
ht = bottomRight[1] - upLeft[1]
width = bottomRight[0] - upLeft[0]
imght = img.shape[1]
imgwidth = img.shape[2]
scaleRate = random.uniform(*scale_factor)
upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2)
bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2)
# Doing Random Sample
if opt.addDPG:
PatchScale = random.uniform(0, 1)
if PatchScale > 0.85:
ratio = ht / width
if width < ht:
patchWidth = PatchScale * width
patchHt = patchWidth * ratio
else:
patchHt = PatchScale * ht
patchWidth = patchHt / ratio
xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth)
ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt)
xmax = xmin + patchWidth + 1
ymax = ymin + patchHt + 1
else:
xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3))
ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3))
xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3)
ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3)
upLeft[0] = xmin
upLeft[1] = ymin
bottomRight[0] = xmax
bottomRight[1] = ymax
# Counting Joints number
jointNum = 0
if imgset == 'coco':
for i in range(17):
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
jointNum += 1
else:
for i in range(16):
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
jointNum += 1
# Doing Random Crop
if opt.addDPG:
if jointNum > 13 and train:
switch = random.uniform(0, 1)
if switch > 0.96:
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
elif switch > 0.92:
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
elif switch > 0.88:
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
elif switch > 0.84:
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
elif switch > 0.80:
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
elif switch > 0.76:
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
elif switch > 0.72:
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
elif switch > 0.68:
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
if jointNum == 0:
inp = torch.zeros(3, opt.inputResH, opt.inputResW)
out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
out = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
# Draw Label
if imgset == 'coco':
for i in range(nJoints_coco):
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
setMask[i].add_(1)
elif imgset == 'mpii':
for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
setMask[i].add_(1)
else:
for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
if i != 6 + nJoints_coco and i != 7 + nJoints_coco:
setMask[i].add_(1)
if opt.debug:
preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH,
opt.inputResW, opt.outputResH, opt.outputResW)
tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH)
drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores)
if train:
# Flip
if random.uniform(0, 1) < 0.5:
inp = flip(inp)
ori_inp = flip(ori_inp)
out_bigcircle = shuffleLR(flip(out_bigcircle), dataset)
out_smallcircle = shuffleLR(flip(out_smallcircle), dataset)
out = shuffleLR(flip(out), dataset)
# Rotate
r = rnd(opt.rotate)
if random.uniform(0, 1) < 0.6:
r = 0
if r != 0:
inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH)
out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH)
out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH)
out = cv_rotate(out, r, opt.outputResW, opt.outputResH)
return inp, out_bigcircle, out_smallcircle, out, setMask

View File

@ -0,0 +1,192 @@
import time
import numpy as np
from collections import deque
from .linear_assignment import min_cost_matching, matching_cascade
from .kalman_filter import KalmanFilter
from .iou_matching import iou_cost
class TrackState:
"""Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Detection(object):
"""This class represents a bounding box, keypoints, score of person detected
in a single image.
Args:
tlbr: (float array) Of shape [top, left, bottom, right].,
keypoints: (float array) Of shape [node, pts].,
confidence: (float) Confidence score of detection.
"""
def __init__(self, tlbr, keypoints, confidence):
self.tlbr = tlbr
self.keypoints = keypoints
self.confidence = confidence
def to_tlwh(self):
"""Get (top, left, width, height).
"""
ret = self.tlbr.copy()
ret[2:] = ret[2:] - ret[:2]
return ret
def to_xyah(self):
"""Get (x_center, y_center, aspect ratio, height).
"""
ret = self.to_tlwh()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
class Track:
def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hist = 1
self.age = 1
self.time_since_update = 0
self.n_init = n_init
self.max_age = max_age
# keypoints list for use in Actions prediction.
self.keypoints_list = deque(maxlen=buffer)
self.state = TrackState.Tentative
def to_tlwh(self):
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
ret = self.to_tlwh()
ret[2:] = ret[:2] + ret[2:]
return ret
def get_center(self):
return self.mean[:2].copy()
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
"""
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
self.age += 1
self.time_since_update += 1
def update(self, kf, detection):
"""Perform Kalman filter measurement update step.
"""
self.mean, self.covariance = kf.update(self.mean, self.covariance,
detection.to_xyah())
self.keypoints_list.append(detection.keypoints)
self.hist += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hist >= self.n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self.max_age:
self.state = TrackState.Deleted
def is_tentative(self):
return self.state == TrackState.Tentative
def is_confirmed(self):
return self.state == TrackState.Confirmed
def is_deleted(self):
return self.state == TrackState.Deleted
class Tracker:
def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5):
self.max_iou_dist = max_iou_distance
self.max_age = max_age
self.n_init = n_init
self.kf = KalmanFilter()
self.tracks = []
self._next_id = 1
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
def update(self, detections):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = self._match(detections)
# Update matched tracks set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.kf, detections[detection_idx])
# Update tracks that missing.
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
# Create new detections track.
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
# Remove deleted tracks.
self.tracks = [t for t in self.tracks if not t.is_deleted()]
def _match(self, detections):
confirmed_tracks, unconfirmed_tracks = [], []
for i, t in enumerate(self.tracks):
if t.is_confirmed():
confirmed_tracks.append(i)
else:
unconfirmed_tracks.append(i)
matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade(
iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks
)
track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1]
unmatched_tracks_a = [
k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1]
matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching(
iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections
)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
if detection.confidence < 0.4:
return
mean, covariance = self.kf.initiate(detection.to_xyah())
self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age))
self._next_id += 1

View File

@ -0,0 +1,78 @@
import numpy as np
INFTY_COST = 1e+5
def iou(bbox, candidates):
"""Compute intersection over union.
Parameters
----------
bbox : ndarray
A bounding box in format `(xmin, ymin, xmax, ymax)`.
candidates : ndarray
A matrix of candidate bounding boxes (one per row) in the same format
as `bbox`.
Returns
-------
ndarray
The intersection over union in [0, 1] between the `bbox` and each
candidate. A higher score means a larger fraction of the `bbox` is
occluded by the candidate.
"""
#bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
bbox_tl, bbox_br = bbox[:2], bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, 2:] # + candidates[:, :2]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = (bbox[2:] - bbox[:2]).prod()
area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1)
return area_intersection / (area_bbox + area_candidates - area_intersection)
def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
"""An intersection over union distance metric.
Parameters
----------
tracks : List[Track]
A list of tracks.
detections : List[Detection]
A list of detections.
track_indices : Optional[List[int]]
A list of indices to tracks that should be matched. Defaults to
all `tracks`.
detection_indices : Optional[List[int]]
A list of indices to detections that should be matched. Defaults
to all `detections`.
Returns
-------
ndarray
Returns a cost matrix of shape
len(track_indices), len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
for row, track_idx in enumerate(track_indices):
#if tracks[track_idx].time_since_update > 1:
# cost_matrix[row, :] = INFTY_COST
# continue
bbox = tracks[track_idx].to_tlbr()
candidates = np.asarray([detections[i].tlbr for i in detection_indices])
cost_matrix[row, :] = 1. - iou(bbox, candidates)
return cost_matrix

View File

@ -0,0 +1,198 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
class KalmanFilter(object):
"""A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3],
1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3],
1e-5,
10 * self._std_weight_velocity * measurement[3]]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The 8 dimensional mean vector of the object state at the previous
time step.
covariance : ndarray
The 8x8 dimensional covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-2,
self._std_weight_position * mean[3]]
std_vel = [
self._std_weight_velocity * mean[3],
self._std_weight_velocity * mean[3],
1e-5,
self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
mean = np.dot(self._motion_mat, mean)
covariance = np.linalg.multi_dot((
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector (8 dimensional array).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state
estimate.
"""
std = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-1,
self._std_weight_position * mean[3]]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((
self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def update(self, mean, covariance, measurement):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(
projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
new_covariance = covariance - np.linalg.multi_dot((
kalman_gain, projected_cov, kalman_gain.T))
return new_mean, new_covariance
def gating_distance(self, mean, covariance, measurements,
only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha

View File

@ -0,0 +1,191 @@
import numpy as np
#from sklearn.utils.linear_assignment_ import linear_assignment
from scipy.optimize import linear_sum_assignment
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
INFTY_COST = 1e+5
def min_cost_matching(distance_metric, max_distance, tracks, detections,
track_indices=None, detection_indices=None):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[Track]
A list of predicted tracks at the current time step.
detections : List[Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
indices = linear_sum_assignment(cost_matrix)
indices = np.array(indices).transpose()
matches, unmatched_tracks, unmatched_detections = [], [], []
for col, detection_idx in enumerate(detection_indices):
if col not in indices[:, 1]:
unmatched_detections.append(detection_idx)
for row, track_idx in enumerate(track_indices):
if row not in indices[:, 0]:
unmatched_tracks.append(track_idx)
for row, col in indices:
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None):
"""Run matching cascade.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
cascade_depth: int
The cascade depth, should be se to the maximum track age.
tracks : List[Track]
A list of predicted tracks at the current time step.
detections : List[Detection]
A list of detections at the current time step.
track_indices : Optional[List[int]]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above). Defaults to all tracks.
detection_indices : Optional[List[int]]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above). Defaults to all
detections.
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [k for k in track_indices
if tracks[k].time_since_update == 1 + level]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = min_cost_matching(
distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
gated_cost=INFTY_COST, only_position=False):
"""Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Parameters
----------
kf : The Kalman filter.
cost_matrix : ndarray
The NxM dimensional cost matrix, where N is the number of track indices
and M is the number of detection indices, such that entry (i, j) is the
association cost between `tracks[track_indices[i]]` and
`detections[detection_indices[j]]`.
tracks : List[Track]
A list of predicted tracks at the current time step.
detections : List[Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
gated_cost : Optional[float]
Entries in the cost matrix corresponding to infeasible associations are
set this value. Defaults to a very large value.
only_position : Optional[bool]
If True, only the x, y position of the state distribution is considered
during gating. Defaults to False.
Returns
-------
ndarray
Returns the modified cost matrix.
"""
gating_dim = 2 if only_position else 4
gating_threshold = chi2inv95[gating_dim]
measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
for row, track_idx in enumerate(track_indices):
track = tracks[track_idx]
gating_distance = kf.gating_distance(track.mean, track.covariance,
measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix

View File

@ -0,0 +1,155 @@
import os
import cv2
import time
from fastapi import HTTPException
import torch
import argparse
import numpy as np
from .Detection.Utils import ResizePadding
from .CameraLoader import CamLoader, CamLoader_Q
from .DetectorLoader import TinyYOLOv3_onecls
from .PoseEstimateLoader import SPPE_FastPose
from .fn import draw_single
from .Track.Tracker import Detection, Tracker
from .ActionsEstLoader import TSSTG
from config import CONFIG_FILE, YOLO_WEIGHT_FILE, SPPE_WEIGHT_FILE, TSSTG_WEIGHT_FILE
CONFIG_FILE = CONFIG_FILE
YOLO_WEIGHT_FILE = YOLO_WEIGHT_FILE
SPPE_WEIGHT_FILE = SPPE_WEIGHT_FILE
TSSTG_WEIGHT_FILE = TSSTG_WEIGHT_FILE
INP_DETS = 384
INP_POSE = (224, 160)
POSE_BACKBONE = 'resnet50'
SHOW_DETECTED = False
SHOW_SKELETON = True
DEVICE = 'cuda'
resize_fn = ResizePadding(INP_DETS, INP_DETS)
def preproc(image):
"""preprocess function for CameraLoader.
"""
image = resize_fn(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image
def kpt2bbox(kpt, ex=20):
"""Get bbox that hold on all of the keypoints (x,y)
kpt: array of shape `(N, 2)`,
ex: (int) expand bounding box,
"""
return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
def generate_action_model_frame(source):
CAM_SOURCE = source
# Model initialization
detect_model = TinyYOLOv3_onecls(INP_DETS, device=DEVICE, config_file=CONFIG_FILE,
weight_file=YOLO_WEIGHT_FILE)
pose_model = SPPE_FastPose(POSE_BACKBONE, INP_POSE[0], INP_POSE[1], device=DEVICE, path=SPPE_WEIGHT_FILE)
action_model = TSSTG(weight_file=TSSTG_WEIGHT_FILE) # action model
# Tracker.
max_age = 30
tracker = Tracker(max_age=max_age, n_init=3)
cam = CamLoader(int(CAM_SOURCE) if CAM_SOURCE.isdigit() else CAM_SOURCE,
preprocess=preproc).start()
fps_time = 0
f = 0
while cam.grabbed():
f += 1
frame = cam.getitem()
image = frame.copy()
# Detect humans bbox in the frame with detector model.
detected = detect_model.detect(frame, need_resize=False, expand_bb=10)
# Predict each tracks bbox of current frame from previous frames information with Kalman filter.
tracker.predict()
# Merge two source of predicted bbox together.
for track in tracker.tracks:
det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32)
detected = torch.cat([detected, det], dim=0) if detected is not None else det
detections = [] # List of Detections object for tracking.
if detected is not None:
#detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0]
# Predict skeleton pose of each bboxs.
poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
# Create Detections object.
detections = [Detection(kpt2bbox(ps['keypoints'].numpy()),
np.concatenate((ps['keypoints'].numpy(),
ps['kp_score'].numpy()), axis=1),
ps['kp_score'].mean().numpy()) for ps in poses]
# VISUALIZE.
if SHOW_DETECTED:
for bb in detected[:, 0:5]:
frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
# Update tracks by matching each track information of current and previous frame or
# create a new track if no matched.
tracker.update(detections)
# Predict Actions of each track.
for i, track in enumerate(tracker.tracks):
if not track.is_confirmed():
continue
track_id = track.track_id
bbox = track.to_tlbr().astype(int)
center = track.get_center().astype(int)
action = 'pending'
clr = (0, 255, 0)
# Use 30 frames time-steps to prediction.
if len(track.keypoints_list) == 30:
pts = np.array(track.keypoints_list, dtype=np.float32)
out = action_model.predict(pts, frame.shape[:2])
action_name = action_model.class_names[out[0].argmax()]
action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
if action_name == 'Fall Down':
clr = (255, 0, 0)
elif action_name == 'Lying Down':
clr = (255, 200, 0)
# VISUALIZE.
if track.time_since_update == 0:
if SHOW_SKELETON:
frame = draw_single(frame, track.keypoints_list[-1])
frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX,
0.4, (255, 0, 0), 2)
frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
0.4, clr, 1)
# Show Frame.
frame = cv2.resize(frame, (0, 0), fx=2., fy=2.)
frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)),
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
frame = frame[:, :, ::-1]
fps_time = time.time()
# return frame for video streaming
ret, buffer = cv2.imencode('.jpg', frame)
if not ret:
# If encoding fails, raise an error to stop the streaming
raise HTTPException(status_code=500, detail="Frame encoding failed")
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
def output_action_detection():
pass

View File

@ -0,0 +1,234 @@
import re
import cv2
import time
import math
import torch
import numpy as np
RED = (0, 0, 255)
GREEN = (0, 255, 0)
BLUE = (255, 0, 0)
CYAN = (255, 255, 0)
YELLOW = (0, 255, 255)
ORANGE = (0, 165, 255)
PURPLE = (255, 0, 255)
"""COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head
(5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
(17, 11), (17, 12), # Body
(11, 13), (12, 14), (13, 15), (14, 16)]"""
COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body
(7, 9), (8, 10), (9, 11), (10, 12)]
POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar
(77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
(204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
(77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
(255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]
MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5),
(8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)]
numpy_type_map = {
'float64': torch.DoubleTensor,
'float32': torch.FloatTensor,
'float16': torch.HalfTensor,
'int64': torch.LongTensor,
'int32': torch.IntTensor,
'int16': torch.ShortTensor,
'int8': torch.CharTensor,
'uint8': torch.ByteTensor,
}
_use_shared_memory = True
def collate_fn(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
elem_type = type(batch[0])
if isinstance(batch[0], torch.Tensor):
out = None
if _use_shared_memory:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
numel = sum([x.numel() for x in batch])
storage = batch[0].storage()._new_shared(numel)
out = batch[0].new(storage)
return torch.stack(batch, 0, out=out)
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
and elem_type.__name__ != 'string_':
elem = batch[0]
if elem_type.__name__ == 'ndarray':
# array of string classes and object
if re.search('[SaUO]', elem.dtype.str) is not None:
raise TypeError(error_msg.format(elem.dtype))
return torch.stack([torch.from_numpy(b) for b in batch], 0)
if elem.shape == (): # scalars
py_type = float if elem.dtype.name.startswith('float') else int
return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
elif isinstance(batch[0], int):
return torch.LongTensor(batch)
elif isinstance(batch[0], float):
return torch.DoubleTensor(batch)
elif isinstance(batch[0], (str, bytes)):
return batch
elif isinstance(batch[0], collections.Mapping):
return {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
elif isinstance(batch[0], collections.Sequence):
transposed = zip(*batch)
return [collate_fn(samples) for samples in transposed]
raise TypeError((error_msg.format(type(batch[0]))))
def collate_fn_list(batch):
img, inp, im_name = zip(*batch)
img = collate_fn(img)
im_name = collate_fn(im_name)
return img, inp, im_name
def draw_single(frame, pts, joint_format='coco'):
if joint_format == 'coco':
l_pair = COCO_PAIR
p_color = POINT_COLORS
line_color = LINE_COLORS
elif joint_format == 'mpii':
l_pair = MPII_PAIR
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
else:
NotImplementedError
part_line = {}
pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
for n in range(pts.shape[0]):
if pts[n, 2] <= 0.05:
continue
cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
part_line[n] = (cor_x, cor_y)
cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
for i, (start_p, end_p) in enumerate(l_pair):
if start_p in part_line and end_p in part_line:
start_xy = part_line[start_p]
end_xy = part_line[end_p]
cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1))
return frame
def vis_frame_fast(frame, im_res, joint_format='coco'):
"""
frame: frame image
im_res: im_res of predictions
format: coco or mpii
return rendered image
"""
if joint_format == 'coco':
l_pair = COCO_PAIR
p_color = POINT_COLORS
line_color = LINE_COLORS
elif joint_format == 'mpii':
l_pair = MPII_PAIR
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
else:
NotImplementedError
#im_name = im_res['imgname'].split('/')[-1]
img = frame
for human in im_res: # ['result']:
part_line = {}
kp_preds = human['keypoints']
kp_scores = human['kp_score']
kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0)))
kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0)))
# Draw keypoints
for n in range(kp_scores.shape[0]):
if kp_scores[n] <= 0.05:
continue
cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
part_line[n] = (cor_x, cor_y)
cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)
# Draw limbs
for i, (start_p, end_p) in enumerate(l_pair):
if start_p in part_line and end_p in part_line:
start_xy = part_line[start_p]
end_xy = part_line[end_p]
cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1)
return img
def vis_frame(frame, im_res, joint_format='coco'):
"""
frame: frame image
im_res: im_res of predictions
format: coco or mpii
return rendered image
"""
if joint_format == 'coco':
l_pair = COCO_PAIR
p_color = POINT_COLORS
line_color = LINE_COLORS
elif joint_format == 'mpii':
l_pair = MPII_PAIR
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
else:
raise NotImplementedError
im_name = im_res['imgname'].split('/')[-1]
img = frame
height, width = img.shape[:2]
img = cv2.resize(img, (int(width/2), int(height/2)))
for human in im_res['result']:
part_line = {}
kp_preds = human['keypoints']
kp_scores = human['kp_score']
kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0)))
kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0)))
# Draw keypoints
for n in range(kp_scores.shape[0]):
if kp_scores[n] <= 0.05:
continue
cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
part_line[n] = (int(cor_x/2), int(cor_y/2))
bg = img.copy()
cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1)
# Now create a mask of logo and create its inverse mask also
transparency = max(0, min(1, kp_scores[n]))
img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
# Draw limbs
for i, (start_p, end_p) in enumerate(l_pair):
if start_p in part_line and end_p in part_line:
start_xy = part_line[start_p]
end_xy = part_line[end_p]
bg = img.copy()
X = (start_xy[0], end_xy[0])
Y = (start_xy[1], end_xy[1])
mX = np.mean(X)
mY = np.mean(Y)
length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1
polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
cv2.fillConvexPoly(bg, polygon, line_color[i])
#cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1)
transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p])))
img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC)
return img
def getTime(time1=0):
if not time1:
return time.time()
else:
interval = time.time() - time1
return time.time(), interval

View File

@ -0,0 +1,284 @@
# -*- coding: utf-8 -*-
import torch
import json
import os
import zipfile
import time
from multiprocessing.dummy import Pool as ThreadPool
import numpy as np
''' Constant Configuration '''
delta1 = 1
mu = 1.7
delta2 = 2.65
gamma = 22.48
scoreThreds = 0.3
matchThreds = 5
areaThres = 0 # 40 * 40.5
alpha = 0.1
#pool = ThreadPool(4)
def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores):
"""
Parametric Pose NMS algorithm
bboxes: bbox locations list (n, 4)
bbox_scores: bbox scores list (n,)
pose_preds: pose locations list (n, 17, 2)
pose_scores: pose scores list (n, 17, 1)
"""
global ori_pose_preds, ori_pose_scores, ref_dists
pose_scores[pose_scores == 0] = 1e-5
final_result = []
ori_bboxes = bboxes.clone()
ori_bbox_scores = bbox_scores.clone()
ori_pose_preds = pose_preds.clone()
ori_pose_scores = pose_scores.clone()
xmax = bboxes[:, 2]
xmin = bboxes[:, 0]
ymax = bboxes[:, 3]
ymin = bboxes[:, 1]
widths = xmax - xmin
heights = ymax - ymin
ref_dists = alpha * np.maximum(widths, heights)
nsamples = bboxes.shape[0]
human_scores = pose_scores.mean(dim=1)
human_ids = np.arange(nsamples)
# Do pPose-NMS
pick = []
merge_ids = []
while human_scores.shape[0] != 0:
# Pick the one with highest score
pick_id = torch.argmax(human_scores)
pick.append(human_ids[pick_id])
# num_visPart = torch.sum(pose_scores[pick_id] > 0.2)
# Get numbers of match keypoints by calling PCK_match
ref_dist = ref_dists[human_ids[pick_id]]
simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist)
num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist)
# Delete humans who have more than matchThreds keypoints overlap and high similarity
delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[
(simi > gamma) | (num_match_keypoints >= matchThreds)]
if delete_ids.shape[0] == 0:
delete_ids = pick_id
#else:
# delete_ids = torch.from_numpy(delete_ids)
merge_ids.append(human_ids[delete_ids])
pose_preds = np.delete(pose_preds, delete_ids, axis=0)
pose_scores = np.delete(pose_scores, delete_ids, axis=0)
human_ids = np.delete(human_ids, delete_ids)
human_scores = np.delete(human_scores, delete_ids, axis=0)
bbox_scores = np.delete(bbox_scores, delete_ids, axis=0)
assert len(merge_ids) == len(pick)
bboxs_pick = ori_bboxes[pick]
preds_pick = ori_pose_preds[pick]
scores_pick = ori_pose_scores[pick]
bbox_scores_pick = ori_bbox_scores[pick]
#final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick))
#final_result = [item for item in final_result if item is not None]
for j in range(len(pick)):
ids = np.arange(pose_preds.shape[1])
max_score = torch.max(scores_pick[j, ids, 0])
if max_score < scoreThreds:
continue
# Merge poses
merge_id = merge_ids[j]
merge_pose, merge_score = p_merge_fast(
preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]])
max_score = torch.max(merge_score[ids])
if max_score < scoreThreds:
continue
xmax = max(merge_pose[:, 0])
xmin = min(merge_pose[:, 0])
ymax = max(merge_pose[:, 1])
ymin = min(merge_pose[:, 1])
if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres:
continue
final_result.append({
'bbox': bboxs_pick[j],
'bbox_score': bbox_scores_pick[j],
'keypoints': merge_pose - 0.3,
'kp_score': merge_score,
'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score)
})
return final_result
def filter_result(args):
score_pick, merge_id, pred_pick, pick, bbox_score_pick = args
global ori_pose_preds, ori_pose_scores, ref_dists
ids = np.arange(17)
max_score = torch.max(score_pick[ids, 0])
if max_score < scoreThreds:
return None
# Merge poses
merge_pose, merge_score = p_merge_fast(
pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick])
max_score = torch.max(merge_score[ids])
if max_score < scoreThreds:
return None
xmax = max(merge_pose[:, 0])
xmin = min(merge_pose[:, 0])
ymax = max(merge_pose[:, 1])
ymin = min(merge_pose[:, 1])
if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5:
return None
return {
'keypoints': merge_pose - 0.3,
'kp_score': merge_score,
'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score)
}
def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist):
"""
Score-weighted pose merging
INPUT:
ref_pose: reference pose -- [17, 2]
cluster_preds: redundant poses -- [n, 17, 2]
cluster_scores: redundant poses score -- [n, 17, 1]
ref_dist: reference scale -- Constant
OUTPUT:
final_pose: merged pose -- [17, 2]
final_score: merged score -- [17]
"""
dist = torch.sqrt(torch.sum(
torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
dim=2
)) # [n, 17]
kp_num = 17
ref_dist = min(ref_dist, 15)
mask = (dist <= ref_dist)
final_pose = torch.zeros(kp_num, 2)
final_score = torch.zeros(kp_num)
if cluster_preds.dim() == 2:
cluster_preds.unsqueeze_(0)
cluster_scores.unsqueeze_(0)
if mask.dim() == 1:
mask.unsqueeze_(0)
for i in range(kp_num):
cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1]
cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze(
-1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1))
# Get an normalized score
normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores)
# Merge poses by a weighted sum
final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1))
final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1))
final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1))
return final_pose, final_score
def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist):
"""
Score-weighted pose merging
INPUT:
ref_pose: reference pose -- [17, 2]
cluster_preds: redundant poses -- [n, 17, 2]
cluster_scores: redundant poses score -- [n, 17, 1]
ref_dist: reference scale -- Constant
OUTPUT:
final_pose: merged pose -- [17, 2]
final_score: merged score -- [17]
"""
dist = torch.sqrt(torch.sum(
torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
dim=2
))
kp_num = 17
ref_dist = min(ref_dist, 15)
mask = (dist <= ref_dist)
final_pose = torch.zeros(kp_num, 2)
final_score = torch.zeros(kp_num)
if cluster_preds.dim() == 2:
cluster_preds.unsqueeze_(0)
cluster_scores.unsqueeze_(0)
if mask.dim() == 1:
mask.unsqueeze_(0)
# Weighted Merge
masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1))
normed_scores = masked_scores / torch.sum(masked_scores, dim=0)
final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0)
final_score = torch.mul(masked_scores, normed_scores).sum(dim=0)
return final_pose, final_score
def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist):
pick_preds = all_preds[i]
pred_scores = keypoint_scores[i]
dist = torch.sqrt(torch.sum(
torch.pow(pick_preds[np.newaxis, :] - all_preds, 2),
dim=2
))
mask = (dist <= 1)
# Define a keypoints distance
score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1])
keypoint_scores.squeeze_()
if keypoint_scores.dim() == 1:
keypoint_scores.unsqueeze_(0)
if pred_scores.dim() == 1:
pred_scores.unsqueeze_(1)
# The predicted scores are repeated up to do broadcast
pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1)
score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\
torch.tanh(keypoint_scores[mask] / delta1)
point_dist = torch.exp((-1) * dist / delta2)
final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1)
return final_dist
def PCK_match(pick_pred, all_preds, ref_dist):
dist = torch.sqrt(torch.sum(
torch.pow(pick_pred[np.newaxis, :] - all_preds, 2),
dim=2
))
ref_dist = min(ref_dist, 7)
num_match_keypoints = torch.sum(
dist / ref_dist <= 1,
dim=1
)
return num_match_keypoints

View File

@ -0,0 +1,27 @@
import numpy as np
def normalize_points_with_size(xy, width, height, flip=False):
"""Normalize scale points in image with size of image to (0-1).
xy : (frames, parts, xy) or (parts, xy)
"""
if xy.ndim == 2:
xy = np.expand_dims(xy, 0)
xy[:, :, 0] /= width
xy[:, :, 1] /= height
if flip:
xy[:, :, 0] = 1 - xy[:, :, 0]
return xy
def scale_pose(xy):
"""Normalize pose points by scale with max/min value of each pose.
xy : (frames, parts, xy) or (parts, xy)
"""
if xy.ndim == 2:
xy = np.expand_dims(xy, 0)
xy_min = np.nanmin(xy, axis=1)
xy_max = np.nanmax(xy, axis=1)
for i in range(xy.shape[0]):
xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
return xy.squeeze()