mirror of
https://github.com/Sosokker/HomieCare.git
synced 2025-12-19 02:04:03 +01:00
Add action detection model base on https://github.com/GajuuzZ/Human-Falling-Detect-Tracks
This commit is contained in:
parent
fda46f5b1b
commit
adb6ec6497
3
.gitignore
vendored
3
.gitignore
vendored
@ -186,4 +186,5 @@ dist-ssr
|
||||
|
||||
config.json
|
||||
|
||||
ActionDetector/
|
||||
*.pth
|
||||
*.cfg
|
||||
52
StreamServer/src/analytic/action/ActionsEstLoader.py
Normal file
52
StreamServer/src/analytic/action/ActionsEstLoader.py
Normal file
@ -0,0 +1,52 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from .Actionsrecognition.Models import TwoStreamSpatialTemporalGraph
|
||||
from .pose_utils import normalize_points_with_size, scale_pose
|
||||
|
||||
|
||||
class TSSTG(object):
|
||||
"""Two-Stream Spatial Temporal Graph Model Loader.
|
||||
Args:
|
||||
weight_file: (str) Path to trained weights file.
|
||||
device: (str) Device to load the model on 'cpu' or 'cuda'.
|
||||
"""
|
||||
def __init__(self,
|
||||
weight_file='./Models/TSSTG/tsstg-model.pth',
|
||||
device='cuda'):
|
||||
self.graph_args = {'strategy': 'spatial'}
|
||||
self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
|
||||
'Stand up', 'Sit down', 'Fall Down']
|
||||
self.num_class = len(self.class_names)
|
||||
self.device = device
|
||||
|
||||
self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device)
|
||||
self.model.load_state_dict(torch.load(weight_file))
|
||||
self.model.eval()
|
||||
|
||||
def predict(self, pts, image_size):
|
||||
"""Predict actions from single person skeleton points and score in time sequence.
|
||||
Args:
|
||||
pts: (numpy array) points and score in shape `(t, v, c)` where
|
||||
t : inputs sequence (time steps).,
|
||||
v : number of graph node (body parts).,
|
||||
c : channel (x, y, score).,
|
||||
image_size: (tuple of int) width, height of image frame.
|
||||
Returns:
|
||||
(numpy array) Probability of each class actions.
|
||||
"""
|
||||
pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1])
|
||||
pts[:, :, :2] = scale_pose(pts[:, :, :2])
|
||||
pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1)
|
||||
|
||||
pts = torch.tensor(pts, dtype=torch.float32)
|
||||
pts = pts.permute(2, 0, 1)[None, :]
|
||||
|
||||
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
|
||||
mot = mot.to(self.device)
|
||||
pts = pts.to(self.device)
|
||||
|
||||
out = self.model((pts, mot))
|
||||
|
||||
return out.detach().cpu().numpy()
|
||||
244
StreamServer/src/analytic/action/Actionsrecognition/Models.py
Normal file
244
StreamServer/src/analytic/action/Actionsrecognition/Models.py
Normal file
@ -0,0 +1,244 @@
|
||||
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
|
||||
from .Utils import Graph
|
||||
|
||||
|
||||
class GraphConvolution(nn.Module):
|
||||
"""The basic module for applying a graph convolution.
|
||||
Args:
|
||||
- in_channel: (int) Number of channels in the input sequence data.
|
||||
- out_channels: (int) Number of channels produced by the convolution.
|
||||
- kernel_size: (int) Size of the graph convolving kernel.
|
||||
- t_kernel_size: (int) Size of the temporal convolving kernel.
|
||||
- t_stride: (int, optional) Stride of the temporal convolution. Default: 1
|
||||
- t_padding: (int, optional) Temporal zero-padding added to both sides of
|
||||
the input. Default: 0
|
||||
- t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
|
||||
- bias: (bool, optional) If `True`, adds a learnable bias to the output.
|
||||
Default: `True`
|
||||
Shape:
|
||||
- Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
|
||||
A: Graph adjacency matrix in :math:`(K, V, V)`,
|
||||
- Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`
|
||||
|
||||
where
|
||||
:math:`N` is a batch size,
|
||||
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
|
||||
:math:`T_{in}/T_{out}` is a length of input/output sequence,
|
||||
:math:`V` is the number of graph nodes.
|
||||
|
||||
"""
|
||||
def __init__(self, in_channels, out_channels, kernel_size,
|
||||
t_kernel_size=1,
|
||||
t_stride=1,
|
||||
t_padding=0,
|
||||
t_dilation=1,
|
||||
bias=True):
|
||||
super().__init__()
|
||||
|
||||
self.kernel_size = kernel_size
|
||||
self.conv = nn.Conv2d(in_channels,
|
||||
out_channels * kernel_size,
|
||||
kernel_size=(t_kernel_size, 1),
|
||||
padding=(t_padding, 0),
|
||||
stride=(t_stride, 1),
|
||||
dilation=(t_dilation, 1),
|
||||
bias=bias)
|
||||
|
||||
def forward(self, x, A):
|
||||
x = self.conv(x)
|
||||
n, kc, t, v = x.size()
|
||||
x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
|
||||
x = torch.einsum('nkctv,kvw->nctw', (x, A))
|
||||
|
||||
return x.contiguous()
|
||||
|
||||
|
||||
class st_gcn(nn.Module):
|
||||
"""Applies a spatial temporal graph convolution over an input graph sequence.
|
||||
Args:
|
||||
- in_channels: (int) Number of channels in the input sequence data.
|
||||
- out_channels: (int) Number of channels produced by the convolution.
|
||||
- kernel_size: (tuple) Size of the temporal convolving kernel and
|
||||
graph convolving kernel.
|
||||
- stride: (int, optional) Stride of the temporal convolution. Default: 1
|
||||
- dropout: (int, optional) Dropout rate of the final output. Default: 0
|
||||
- residual: (bool, optional) If `True`, applies a residual mechanism.
|
||||
Default: `True`
|
||||
Shape:
|
||||
- Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
|
||||
A: Graph Adjecency matrix in :math: `(K, V, V)`,
|
||||
- Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
|
||||
where
|
||||
:math:`N` is a batch size,
|
||||
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
|
||||
:math:`T_{in}/T_{out}` is a length of input/output sequence,
|
||||
:math:`V` is the number of graph nodes.
|
||||
"""
|
||||
def __init__(self, in_channels, out_channels, kernel_size,
|
||||
stride=1,
|
||||
dropout=0,
|
||||
residual=True):
|
||||
super().__init__()
|
||||
assert len(kernel_size) == 2
|
||||
assert kernel_size[0] % 2 == 1
|
||||
|
||||
padding = ((kernel_size[0] - 1) // 2, 0)
|
||||
|
||||
self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
|
||||
self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(out_channels,
|
||||
out_channels,
|
||||
(kernel_size[0], 1),
|
||||
(stride, 1),
|
||||
padding),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.Dropout(dropout, inplace=True)
|
||||
)
|
||||
|
||||
if not residual:
|
||||
self.residual = lambda x: 0
|
||||
elif (in_channels == out_channels) and (stride == 1):
|
||||
self.residual = lambda x: x
|
||||
else:
|
||||
self.residual = nn.Sequential(nn.Conv2d(in_channels,
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
stride=(stride, 1)),
|
||||
nn.BatchNorm2d(out_channels)
|
||||
)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x, A):
|
||||
res = self.residual(x)
|
||||
x = self.gcn(x, A)
|
||||
x = self.tcn(x) + res
|
||||
|
||||
return self.relu(x)
|
||||
|
||||
|
||||
class StreamSpatialTemporalGraph(nn.Module):
|
||||
"""Spatial temporal graph convolutional networks.
|
||||
Args:
|
||||
- in_channels: (int) Number of input channels.
|
||||
- graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
|
||||
- num_class: (int) Number of class outputs. If `None` return pooling features of
|
||||
the last st-gcn layer instead.
|
||||
- edge_importance_weighting: (bool) If `True`, adds a learnable importance
|
||||
weighting to the edges of the graph.
|
||||
- **kwargs: (optional) Other parameters for graph convolution units.
|
||||
Shape:
|
||||
- Input: :math:`(N, in_channels, T_{in}, V_{in})`
|
||||
- Output: :math:`(N, num_class)` where
|
||||
:math:`N` is a batch size,
|
||||
:math:`T_{in}` is a length of input sequence,
|
||||
:math:`V_{in}` is the number of graph nodes,
|
||||
or If num_class is `None`: `(N, out_channels)`
|
||||
:math:`out_channels` is number of out_channels of the last layer.
|
||||
"""
|
||||
def __init__(self, in_channels, graph_args, num_class=None,
|
||||
edge_importance_weighting=True, **kwargs):
|
||||
super().__init__()
|
||||
# Load graph.
|
||||
graph = Graph(**graph_args)
|
||||
A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
|
||||
self.register_buffer('A', A)
|
||||
|
||||
# Networks.
|
||||
spatial_kernel_size = A.size(0)
|
||||
temporal_kernel_size = 9
|
||||
kernel_size = (temporal_kernel_size, spatial_kernel_size)
|
||||
kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
|
||||
|
||||
self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
|
||||
self.st_gcn_networks = nn.ModuleList((
|
||||
st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
|
||||
st_gcn(64, 64, kernel_size, 1, **kwargs),
|
||||
st_gcn(64, 64, kernel_size, 1, **kwargs),
|
||||
st_gcn(64, 64, kernel_size, 1, **kwargs),
|
||||
st_gcn(64, 128, kernel_size, 2, **kwargs),
|
||||
st_gcn(128, 128, kernel_size, 1, **kwargs),
|
||||
st_gcn(128, 128, kernel_size, 1, **kwargs),
|
||||
st_gcn(128, 256, kernel_size, 2, **kwargs),
|
||||
st_gcn(256, 256, kernel_size, 1, **kwargs),
|
||||
st_gcn(256, 256, kernel_size, 1, **kwargs)
|
||||
))
|
||||
|
||||
# initialize parameters for edge importance weighting.
|
||||
if edge_importance_weighting:
|
||||
self.edge_importance = nn.ParameterList([
|
||||
nn.Parameter(torch.ones(A.size()))
|
||||
for i in self.st_gcn_networks
|
||||
])
|
||||
else:
|
||||
self.edge_importance = [1] * len(self.st_gcn_networks)
|
||||
|
||||
if num_class is not None:
|
||||
self.cls = nn.Conv2d(256, num_class, kernel_size=1)
|
||||
else:
|
||||
self.cls = lambda x: x
|
||||
|
||||
def forward(self, x):
|
||||
# data normalization.
|
||||
N, C, T, V = x.size()
|
||||
x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T)
|
||||
x = x.view(N, V * C, T)
|
||||
x = self.data_bn(x)
|
||||
x = x.view(N, V, C, T)
|
||||
x = x.permute(0, 2, 3, 1).contiguous()
|
||||
x = x.view(N, C, T, V)
|
||||
|
||||
# forward.
|
||||
for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
|
||||
x = gcn(x, self.A * importance)
|
||||
|
||||
x = F.avg_pool2d(x, x.size()[2:])
|
||||
x = self.cls(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class TwoStreamSpatialTemporalGraph(nn.Module):
|
||||
"""Two inputs spatial temporal graph convolutional networks.
|
||||
Args:
|
||||
- graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
|
||||
- num_class: (int) Number of class outputs.
|
||||
- edge_importance_weighting: (bool) If `True`, adds a learnable importance
|
||||
weighting to the edges of the graph.
|
||||
- **kwargs: (optional) Other parameters for graph convolution units.
|
||||
Shape:
|
||||
- Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
|
||||
for points and motions stream where.
|
||||
:math:`N` is a batch size,
|
||||
:math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
|
||||
:math:`T` is a length of input sequence,
|
||||
:math:`V` is the number of graph nodes,
|
||||
- Output: :math:`(N, num_class)`
|
||||
"""
|
||||
def __init__(self, graph_args, num_class, edge_importance_weighting=True,
|
||||
**kwargs):
|
||||
super().__init__()
|
||||
self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
|
||||
edge_importance_weighting,
|
||||
**kwargs)
|
||||
self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None,
|
||||
edge_importance_weighting,
|
||||
**kwargs)
|
||||
|
||||
self.fcn = nn.Linear(256 * 2, num_class)
|
||||
|
||||
def forward(self, inputs):
|
||||
out1 = self.pts_stream(inputs[0])
|
||||
out2 = self.mot_stream(inputs[1])
|
||||
|
||||
concat = torch.cat([out1, out2], dim=-1)
|
||||
out = self.fcn(concat)
|
||||
|
||||
return torch.sigmoid(out)
|
||||
123
StreamServer/src/analytic/action/Actionsrecognition/Utils.py
Normal file
123
StreamServer/src/analytic/action/Actionsrecognition/Utils.py
Normal file
@ -0,0 +1,123 @@
|
||||
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py
|
||||
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Graph:
|
||||
"""The Graph to model the skeletons extracted by the Alpha-Pose.
|
||||
Args:
|
||||
- strategy: (string) must be one of the follow candidates
|
||||
- uniform: Uniform Labeling,
|
||||
- distance: Distance Partitioning,
|
||||
- spatial: Spatial Configuration,
|
||||
For more information, please refer to the section 'Partition Strategies'
|
||||
in our paper (https://arxiv.org/abs/1801.07455).
|
||||
- layout: (string) must be one of the follow candidates
|
||||
- coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
|
||||
- max_hop: (int) the maximal distance between two connected nodes.
|
||||
- dilation: (int) controls the spacing between the kernel points.
|
||||
"""
|
||||
def __init__(self,
|
||||
layout='coco_cut',
|
||||
strategy='uniform',
|
||||
max_hop=1,
|
||||
dilation=1):
|
||||
self.max_hop = max_hop
|
||||
self.dilation = dilation
|
||||
|
||||
self.get_edge(layout)
|
||||
self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
|
||||
self.get_adjacency(strategy)
|
||||
|
||||
def get_edge(self, layout):
|
||||
if layout == 'coco_cut':
|
||||
self.num_node = 14
|
||||
self_link = [(i, i) for i in range(self.num_node)]
|
||||
neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
|
||||
(10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
|
||||
self.edge = self_link + neighbor_link
|
||||
self.center = 13
|
||||
else:
|
||||
raise ValueError('This layout is not supported!')
|
||||
|
||||
def get_adjacency(self, strategy):
|
||||
valid_hop = range(0, self.max_hop + 1, self.dilation)
|
||||
adjacency = np.zeros((self.num_node, self.num_node))
|
||||
for hop in valid_hop:
|
||||
adjacency[self.hop_dis == hop] = 1
|
||||
normalize_adjacency = normalize_digraph(adjacency)
|
||||
|
||||
if strategy == 'uniform':
|
||||
A = np.zeros((1, self.num_node, self.num_node))
|
||||
A[0] = normalize_adjacency
|
||||
self.A = A
|
||||
elif strategy == 'distance':
|
||||
A = np.zeros((len(valid_hop), self.num_node, self.num_node))
|
||||
for i, hop in enumerate(valid_hop):
|
||||
A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
|
||||
hop]
|
||||
self.A = A
|
||||
elif strategy == 'spatial':
|
||||
A = []
|
||||
for hop in valid_hop:
|
||||
a_root = np.zeros((self.num_node, self.num_node))
|
||||
a_close = np.zeros((self.num_node, self.num_node))
|
||||
a_further = np.zeros((self.num_node, self.num_node))
|
||||
for i in range(self.num_node):
|
||||
for j in range(self.num_node):
|
||||
if self.hop_dis[j, i] == hop:
|
||||
if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
|
||||
a_root[j, i] = normalize_adjacency[j, i]
|
||||
elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
|
||||
a_close[j, i] = normalize_adjacency[j, i]
|
||||
else:
|
||||
a_further[j, i] = normalize_adjacency[j, i]
|
||||
if hop == 0:
|
||||
A.append(a_root)
|
||||
else:
|
||||
A.append(a_root + a_close)
|
||||
A.append(a_further)
|
||||
A = np.stack(A)
|
||||
self.A = A
|
||||
#self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
|
||||
else:
|
||||
raise ValueError("This strategy is not supported!")
|
||||
|
||||
|
||||
def get_hop_distance(num_node, edge, max_hop=1):
|
||||
A = np.zeros((num_node, num_node))
|
||||
for i, j in edge:
|
||||
A[j, i] = 1
|
||||
A[i, j] = 1
|
||||
|
||||
# compute hop steps
|
||||
hop_dis = np.zeros((num_node, num_node)) + np.inf
|
||||
transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
|
||||
arrive_mat = (np.stack(transfer_mat) > 0)
|
||||
for d in range(max_hop, -1, -1):
|
||||
hop_dis[arrive_mat[d]] = d
|
||||
return hop_dis
|
||||
|
||||
|
||||
def normalize_digraph(A):
|
||||
Dl = np.sum(A, 0)
|
||||
num_node = A.shape[0]
|
||||
Dn = np.zeros((num_node, num_node))
|
||||
for i in range(num_node):
|
||||
if Dl[i] > 0:
|
||||
Dn[i, i] = Dl[i]**(-1)
|
||||
AD = np.dot(A, Dn)
|
||||
return AD
|
||||
|
||||
|
||||
def normalize_undigraph(A):
|
||||
Dl = np.sum(A, 0)
|
||||
num_node = A.shape[0]
|
||||
Dn = np.zeros((num_node, num_node))
|
||||
for i in range(num_node):
|
||||
if Dl[i] > 0:
|
||||
Dn[i, i] = Dl[i]**(-0.5)
|
||||
DAD = np.dot(np.dot(Dn, A), Dn)
|
||||
return DAD
|
||||
216
StreamServer/src/analytic/action/Actionsrecognition/train.py
Normal file
216
StreamServer/src/analytic/action/Actionsrecognition/train.py
Normal file
@ -0,0 +1,216 @@
|
||||
import os
|
||||
import time
|
||||
import torch
|
||||
import pickle
|
||||
import numpy as np
|
||||
import torch.nn.functional as F
|
||||
from shutil import copyfile
|
||||
from tqdm import tqdm
|
||||
from torch.utils import data
|
||||
from torch.optim.adadelta import Adadelta
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from .Models import *
|
||||
from Visualizer import plot_graphs, plot_confusion_metrix
|
||||
|
||||
|
||||
save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)'
|
||||
|
||||
device = 'cuda'
|
||||
epochs = 30
|
||||
batch_size = 32
|
||||
|
||||
# DATA FILES.
|
||||
# Should be in format of
|
||||
# inputs: (N_samples, time_steps, graph_node, channels),
|
||||
# labels: (N_samples, num_class)
|
||||
# and do some of normalizations on it. Default data create from:
|
||||
# Data.create_dataset_(1-3).py
|
||||
# where
|
||||
# time_steps: Number of frame input sequence, Default: 30
|
||||
# graph_node: Number of node in skeleton, Default: 14
|
||||
# channels: Inputs data (x, y and scores), Default: 3
|
||||
# num_class: Number of pose class to train, Default: 7
|
||||
|
||||
data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl',
|
||||
'../Data/Home_new-set(labelXscrw).pkl']
|
||||
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
|
||||
'Stand up', 'Sit down', 'Fall Down']
|
||||
num_class = len(class_names)
|
||||
|
||||
|
||||
def load_dataset(data_files, batch_size, split_size=0):
|
||||
"""Load data files into torch DataLoader with/without spliting train-test.
|
||||
"""
|
||||
features, labels = [], []
|
||||
for fil in data_files:
|
||||
with open(fil, 'rb') as f:
|
||||
fts, lbs = pickle.load(f)
|
||||
features.append(fts)
|
||||
labels.append(lbs)
|
||||
del fts, lbs
|
||||
features = np.concatenate(features, axis=0)
|
||||
labels = np.concatenate(labels, axis=0)
|
||||
|
||||
if split_size > 0:
|
||||
x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,
|
||||
random_state=9)
|
||||
train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2),
|
||||
torch.tensor(y_train, dtype=torch.float32))
|
||||
valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2),
|
||||
torch.tensor(y_valid, dtype=torch.float32))
|
||||
train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
|
||||
valid_loader = data.DataLoader(valid_set, batch_size)
|
||||
else:
|
||||
train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2),
|
||||
torch.tensor(labels, dtype=torch.float32))
|
||||
train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
|
||||
valid_loader = None
|
||||
return train_loader, valid_loader
|
||||
|
||||
|
||||
def accuracy_batch(y_pred, y_true):
|
||||
return (y_pred.argmax(1) == y_true.argmax(1)).mean()
|
||||
|
||||
|
||||
def set_training(model, mode=True):
|
||||
for p in model.parameters():
|
||||
p.requires_grad = mode
|
||||
model.train(mode)
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
save_folder = os.path.join(os.path.dirname(__file__), save_folder)
|
||||
if not os.path.exists(save_folder):
|
||||
os.makedirs(save_folder)
|
||||
|
||||
# DATA.
|
||||
train_loader, _ = load_dataset(data_files[0:1], batch_size)
|
||||
valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2)
|
||||
|
||||
train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]),
|
||||
batch_size, shuffle=True)
|
||||
dataloader = {'train': train_loader, 'valid': valid_loader}
|
||||
del train_loader_
|
||||
|
||||
# MODEL.
|
||||
graph_args = {'strategy': 'spatial'}
|
||||
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
|
||||
|
||||
#optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
||||
optimizer = Adadelta(model.parameters())
|
||||
|
||||
losser = torch.nn.BCELoss()
|
||||
|
||||
# TRAINING.
|
||||
loss_list = {'train': [], 'valid': []}
|
||||
accu_list = {'train': [], 'valid': []}
|
||||
for e in range(epochs):
|
||||
print('Epoch {}/{}'.format(e, epochs - 1))
|
||||
for phase in ['train', 'valid']:
|
||||
if phase == 'train':
|
||||
model = set_training(model, True)
|
||||
else:
|
||||
model = set_training(model, False)
|
||||
|
||||
run_loss = 0.0
|
||||
run_accu = 0.0
|
||||
with tqdm(dataloader[phase], desc=phase) as iterator:
|
||||
for pts, lbs in iterator:
|
||||
# Create motion input by distance of points (x, y) of the same node
|
||||
# in two frames.
|
||||
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
|
||||
|
||||
mot = mot.to(device)
|
||||
pts = pts.to(device)
|
||||
lbs = lbs.to(device)
|
||||
|
||||
# Forward.
|
||||
out = model((pts, mot))
|
||||
loss = losser(out, lbs)
|
||||
|
||||
if phase == 'train':
|
||||
# Backward.
|
||||
model.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
run_loss += loss.item()
|
||||
accu = accuracy_batch(out.detach().cpu().numpy(),
|
||||
lbs.detach().cpu().numpy())
|
||||
run_accu += accu
|
||||
|
||||
iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
|
||||
loss.item(), accu))
|
||||
iterator.update()
|
||||
#break
|
||||
loss_list[phase].append(run_loss / len(iterator))
|
||||
accu_list[phase].append(run_accu / len(iterator))
|
||||
#break
|
||||
|
||||
print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
|
||||
' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
|
||||
loss_list['valid'][-1], accu_list['valid'][-1]))
|
||||
|
||||
# SAVE.
|
||||
torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth'))
|
||||
|
||||
plot_graphs(list(loss_list.values()), list(loss_list.keys()),
|
||||
'Last Train: {:.2f}, Valid: {:.2f}'.format(
|
||||
loss_list['train'][-1], loss_list['valid'][-1]
|
||||
), 'Loss', xlim=[0, epochs],
|
||||
save=os.path.join(save_folder, 'loss_graph.png'))
|
||||
plot_graphs(list(accu_list.values()), list(accu_list.keys()),
|
||||
'Last Train: {:.2f}, Valid: {:.2f}'.format(
|
||||
accu_list['train'][-1], accu_list['valid'][-1]
|
||||
), 'Accu', xlim=[0, epochs],
|
||||
save=os.path.join(save_folder, 'accu_graph.png'))
|
||||
|
||||
#break
|
||||
|
||||
del train_loader, valid_loader
|
||||
|
||||
model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth')))
|
||||
|
||||
# EVALUATION.
|
||||
model = set_training(model, False)
|
||||
data_file = data_files[1]
|
||||
eval_loader, _ = load_dataset([data_file], 32)
|
||||
|
||||
print('Evaluation.')
|
||||
run_loss = 0.0
|
||||
run_accu = 0.0
|
||||
y_preds = []
|
||||
y_trues = []
|
||||
with tqdm(eval_loader, desc='eval') as iterator:
|
||||
for pts, lbs in iterator:
|
||||
mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
|
||||
mot = mot.to(device)
|
||||
pts = pts.to(device)
|
||||
lbs = lbs.to(device)
|
||||
|
||||
out = model((pts, mot))
|
||||
loss = losser(out, lbs)
|
||||
|
||||
run_loss += loss.item()
|
||||
accu = accuracy_batch(out.detach().cpu().numpy(),
|
||||
lbs.detach().cpu().numpy())
|
||||
run_accu += accu
|
||||
|
||||
y_preds.extend(out.argmax(1).detach().cpu().numpy())
|
||||
y_trues.extend(lbs.argmax(1).cpu().numpy())
|
||||
|
||||
iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
|
||||
loss.item(), accu))
|
||||
iterator.update()
|
||||
|
||||
run_loss = run_loss / len(iterator)
|
||||
run_accu = run_accu / len(iterator)
|
||||
|
||||
plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format(
|
||||
os.path.basename(data_file), run_loss, run_accu
|
||||
), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format(
|
||||
os.path.basename(data_file).split('.')[0])))
|
||||
|
||||
print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu))
|
||||
204
StreamServer/src/analytic/action/CameraLoader.py
Normal file
204
StreamServer/src/analytic/action/CameraLoader.py
Normal file
@ -0,0 +1,204 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from queue import Queue
|
||||
from threading import Thread, Lock
|
||||
|
||||
|
||||
class CamLoader:
|
||||
"""Use threading to capture a frame from camera for faster frame load.
|
||||
Recommend for camera or webcam.
|
||||
|
||||
Args:
|
||||
camera: (int, str) Source of camera or video.,
|
||||
preprocess: (Callable function) to process the frame before return.
|
||||
"""
|
||||
def __init__(self, camera, preprocess=None, ori_return=False):
|
||||
self.stream = cv2.VideoCapture(camera)
|
||||
assert self.stream.isOpened(), 'Cannot read camera source!'
|
||||
self.fps = self.stream.get(cv2.CAP_PROP_FPS)
|
||||
self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
|
||||
self.stopped = False
|
||||
self.ret = False
|
||||
self.frame = None
|
||||
self.ori_frame = None
|
||||
self.read_lock = Lock()
|
||||
self.ori = ori_return
|
||||
|
||||
self.preprocess_fn = preprocess
|
||||
|
||||
def start(self):
|
||||
self.t = Thread(target=self.update, args=()) # , daemon=True)
|
||||
self.t.start()
|
||||
c = 0
|
||||
while not self.ret:
|
||||
time.sleep(0.1)
|
||||
c += 1
|
||||
if c > 20:
|
||||
self.stop()
|
||||
raise TimeoutError('Can not get a frame from camera!!!')
|
||||
return self
|
||||
|
||||
def update(self):
|
||||
while not self.stopped:
|
||||
ret, frame = self.stream.read()
|
||||
self.read_lock.acquire()
|
||||
self.ori_frame = frame.copy()
|
||||
if ret and self.preprocess_fn is not None:
|
||||
frame = self.preprocess_fn(frame)
|
||||
|
||||
self.ret, self.frame = ret, frame
|
||||
self.read_lock.release()
|
||||
|
||||
def grabbed(self):
|
||||
"""Return `True` if can read a frame."""
|
||||
return self.ret
|
||||
|
||||
def getitem(self):
|
||||
self.read_lock.acquire()
|
||||
frame = self.frame.copy()
|
||||
ori_frame = self.ori_frame.copy()
|
||||
self.read_lock.release()
|
||||
if self.ori:
|
||||
return frame, ori_frame
|
||||
else:
|
||||
return frame
|
||||
|
||||
def stop(self):
|
||||
if self.stopped:
|
||||
return
|
||||
self.stopped = True
|
||||
if self.t.is_alive():
|
||||
self.t.join()
|
||||
self.stream.release()
|
||||
|
||||
def __del__(self):
|
||||
if self.stream.isOpened():
|
||||
self.stream.release()
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.stream.isOpened():
|
||||
self.stream.release()
|
||||
|
||||
|
||||
class CamLoader_Q:
|
||||
"""Use threading and queue to capture a frame and store to queue for pickup in sequence.
|
||||
Recommend for video file.
|
||||
|
||||
Args:
|
||||
camera: (int, str) Source of camera or video.,
|
||||
batch_size: (int) Number of batch frame to store in queue. Default: 1,
|
||||
queue_size: (int) Maximum queue size. Default: 256,
|
||||
preprocess: (Callable function) to process the frame before return.
|
||||
"""
|
||||
def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None):
|
||||
self.stream = cv2.VideoCapture(camera)
|
||||
assert self.stream.isOpened(), 'Cannot read camera source!'
|
||||
self.fps = self.stream.get(cv2.CAP_PROP_FPS)
|
||||
self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
|
||||
# Queue for storing each frames.
|
||||
|
||||
self.stopped = False
|
||||
self.batch_size = batch_size
|
||||
self.Q = Queue(maxsize=queue_size)
|
||||
|
||||
self.preprocess_fn = preprocess
|
||||
|
||||
def start(self):
|
||||
t = Thread(target=self.update, args=(), daemon=True).start()
|
||||
c = 0
|
||||
while not self.grabbed():
|
||||
time.sleep(0.1)
|
||||
c += 1
|
||||
if c > 20:
|
||||
self.stop()
|
||||
raise TimeoutError('Can not get a frame from camera!!!')
|
||||
return self
|
||||
|
||||
def update(self):
|
||||
while not self.stopped:
|
||||
if not self.Q.full():
|
||||
frames = []
|
||||
for k in range(self.batch_size):
|
||||
ret, frame = self.stream.read()
|
||||
if not ret:
|
||||
self.stop()
|
||||
return
|
||||
|
||||
if self.preprocess_fn is not None:
|
||||
frame = self.preprocess_fn(frame)
|
||||
|
||||
frames.append(frame)
|
||||
frames = np.stack(frames)
|
||||
self.Q.put(frames)
|
||||
else:
|
||||
with self.Q.mutex:
|
||||
self.Q.queue.clear()
|
||||
# time.sleep(0.05)
|
||||
|
||||
def grabbed(self):
|
||||
"""Return `True` if can read a frame."""
|
||||
return self.Q.qsize() > 0
|
||||
|
||||
def getitem(self):
|
||||
return self.Q.get().squeeze()
|
||||
|
||||
def stop(self):
|
||||
if self.stopped:
|
||||
return
|
||||
self.stopped = True
|
||||
self.stream.release()
|
||||
|
||||
def __len__(self):
|
||||
return self.Q.qsize()
|
||||
|
||||
def __del__(self):
|
||||
if self.stream.isOpened():
|
||||
self.stream.release()
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.stream.isOpened():
|
||||
self.stream.release()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
fps_time = 0
|
||||
|
||||
# Using threading.
|
||||
cam = CamLoader(0).start()
|
||||
while cam.grabbed():
|
||||
frames = cam.getitem()
|
||||
|
||||
frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
|
||||
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
fps_time = time.time()
|
||||
cv2.imshow('frame', frames)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
cam.stop()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Normal video capture.
|
||||
"""cam = cv2.VideoCapture(0)
|
||||
while True:
|
||||
ret, frame = cam.read()
|
||||
if ret:
|
||||
#time.sleep(0.05)
|
||||
#frame = (cv2.flip(frame, 1) / 255.).astype(np.float)
|
||||
|
||||
frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
|
||||
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
fps_time = time.time()
|
||||
cv2.imshow('frame', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
cam.release()
|
||||
cv2.destroyAllWindows()"""
|
||||
85
StreamServer/src/analytic/action/Data/create_dataset_1.py
Normal file
85
StreamServer/src/analytic/action/Data/create_dataset_1.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""
|
||||
This script to create .csv videos frames action annotation file.
|
||||
|
||||
- It will play a video frame by frame control the flow by [a] and [d]
|
||||
to play previos or next frame.
|
||||
- Open the annot_file (.csv) and label each frame of video with number
|
||||
of action class.
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
|
||||
'Stand up', 'Sit down', 'Fall Down'] # label.
|
||||
|
||||
video_folder = '../Data/falldata/Home/Videos'
|
||||
annot_file = '../Data/Home_new.csv'
|
||||
|
||||
index_video_to_play = 0 # Choose video to play.
|
||||
|
||||
|
||||
def create_csv(folder):
|
||||
list_file = sorted(os.listdir(folder))
|
||||
cols = ['video', 'frame', 'label']
|
||||
df = pd.DataFrame(columns=cols)
|
||||
for fil in list_file:
|
||||
cap = cv2.VideoCapture(os.path.join(folder, fil))
|
||||
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
video = np.array([fil] * frames_count)
|
||||
frame = np.arange(1, frames_count + 1)
|
||||
label = np.array([0] * frames_count)
|
||||
rows = np.stack([video, frame, label], axis=1)
|
||||
df = df.append(pd.DataFrame(rows, columns=cols),
|
||||
ignore_index=True)
|
||||
cap.release()
|
||||
df.to_csv(annot_file, index=False)
|
||||
|
||||
|
||||
if not os.path.exists(annot_file):
|
||||
create_csv(video_folder)
|
||||
|
||||
annot = pd.read_csv(annot_file)
|
||||
video_list = annot.iloc[:, 0].unique()
|
||||
video_file = os.path.join(video_folder, video_list[index_video_to_play])
|
||||
print(os.path.basename(video_file))
|
||||
|
||||
annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True)
|
||||
frames_idx = annot.iloc[:, 1].tolist()
|
||||
|
||||
cap = cv2.VideoCapture(video_file)
|
||||
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format(
|
||||
len(frames_idx), frames_count
|
||||
)
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, i)
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
cls_name = class_names[int(annot.iloc[i, -1]) - 1]
|
||||
frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
|
||||
frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name),
|
||||
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
cv2.imshow('frame', frame)
|
||||
|
||||
key = cv2.waitKey(0) & 0xFF
|
||||
if key == ord('q'):
|
||||
break
|
||||
elif key == ord('d'):
|
||||
i += 1
|
||||
continue
|
||||
elif key == ord('a'):
|
||||
i -= 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
137
StreamServer/src/analytic/action/Data/create_dataset_2.py
Normal file
137
StreamServer/src/analytic/action/Data/create_dataset_2.py
Normal file
@ -0,0 +1,137 @@
|
||||
"""
|
||||
This script to extract skeleton joints position and score.
|
||||
|
||||
- This 'annot_folder' is a action class and bounding box for each frames that came with dataset.
|
||||
Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax]
|
||||
Use for crop a person to use in pose estimation model.
|
||||
- If have no annotation file you can leave annot_folder = '' for use Detector model to get the
|
||||
bounding box.
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import torch
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
from DetectorLoader import TinyYOLOv3_onecls
|
||||
from PoseEstimateLoader import SPPE_FastPose
|
||||
from fn import vis_frame_fast
|
||||
|
||||
save_path = '../../Data/Home_new-pose+score.csv'
|
||||
|
||||
annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py
|
||||
video_folder = '../Data/falldata/Home/Videos'
|
||||
annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame.
|
||||
|
||||
# DETECTION MODEL.
|
||||
detector = TinyYOLOv3_onecls()
|
||||
|
||||
# POSE MODEL.
|
||||
inp_h = 320
|
||||
inp_w = 256
|
||||
pose_estimator = SPPE_FastPose(inp_h, inp_w)
|
||||
|
||||
# with score.
|
||||
columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s',
|
||||
'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x',
|
||||
'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s',
|
||||
'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s',
|
||||
'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y',
|
||||
'RAnkle_s', 'label']
|
||||
|
||||
|
||||
def normalize_points_with_size(points_xy, width, height, flip=False):
|
||||
points_xy[:, 0] /= width
|
||||
points_xy[:, 1] /= height
|
||||
if flip:
|
||||
points_xy[:, 0] = 1 - points_xy[:, 0]
|
||||
return points_xy
|
||||
|
||||
|
||||
annot = pd.read_csv(annot_file)
|
||||
vid_list = annot['video'].unique()
|
||||
for vid in vid_list:
|
||||
print(f'Process on: {vid}')
|
||||
df = pd.DataFrame(columns=columns)
|
||||
cur_row = 0
|
||||
|
||||
# Pose Labels.
|
||||
frames_label = annot[annot['video'] == vid].reset_index(drop=True)
|
||||
|
||||
cap = cv2.VideoCapture(os.path.join(video_folder, vid))
|
||||
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
|
||||
# Bounding Boxs Labels.
|
||||
annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt')
|
||||
annot = None
|
||||
if os.path.exists(annot_file):
|
||||
annot = pd.read_csv(annot_file, header=None,
|
||||
names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax'])
|
||||
annot = annot.dropna().reset_index(drop=True)
|
||||
|
||||
assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot))
|
||||
|
||||
fps_time = 0
|
||||
i = 1
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
cls_idx = int(frames_label[frames_label['frame'] == i]['label'])
|
||||
|
||||
if annot:
|
||||
bb = np.array(annot.iloc[i-1, 2:].astype(int))
|
||||
else:
|
||||
bb = detector.detect(frame)[0, :4].numpy().astype(int)
|
||||
bb[:2] = np.maximum(0, bb[:2] - 5)
|
||||
bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:]
|
||||
|
||||
result = []
|
||||
if bb.any() != 0:
|
||||
result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]),
|
||||
torch.tensor([[1.0]]))
|
||||
|
||||
if len(result) > 0:
|
||||
pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(),
|
||||
frame_size[0], frame_size[1])
|
||||
pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1)
|
||||
|
||||
#idx = result[0]['kp_score'] <= 0.05
|
||||
#pt_norm[idx.squeeze()] = np.nan
|
||||
row = [vid, i, *pt_norm.flatten().tolist(), cls_idx]
|
||||
scr = result[0]['kp_score'].mean()
|
||||
else:
|
||||
row = [vid, i, *[np.nan] * (13 * 3), cls_idx]
|
||||
scr = 0.0
|
||||
|
||||
df.loc[cur_row] = row
|
||||
cur_row += 1
|
||||
|
||||
# VISUALIZE.
|
||||
frame = vis_frame_fast(frame, result)
|
||||
frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
|
||||
frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr),
|
||||
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
frame = frame[:, :, ::-1]
|
||||
fps_time = time.time()
|
||||
i += 1
|
||||
|
||||
cv2.imshow('frame', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if os.path.exists(save_path):
|
||||
df.to_csv(save_path, mode='a', header=False, index=False)
|
||||
else:
|
||||
df.to_csv(save_path, mode='w', index=False)
|
||||
|
||||
127
StreamServer/src/analytic/action/Data/create_dataset_3.py
Normal file
127
StreamServer/src/analytic/action/Data/create_dataset_3.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""
|
||||
This script to create dataset and labels by clean off some NaN, do a normalization,
|
||||
label smoothing and label weights by scores.
|
||||
|
||||
"""
|
||||
import os
|
||||
import pickle
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
|
||||
'Stand up', 'Sit down', 'Fall Down']
|
||||
main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y',
|
||||
'RHip_x', 'RHip_y']
|
||||
main_idx_parts = [1, 2, 7, 8, -1] # 1.5
|
||||
|
||||
csv_pose_file = '../Data/Coffee_room_new-pose+score.csv'
|
||||
save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl'
|
||||
|
||||
# Params.
|
||||
smooth_labels_step = 8
|
||||
n_frames = 30
|
||||
skip_frame = 1
|
||||
|
||||
annot = pd.read_csv(csv_pose_file)
|
||||
|
||||
# Remove NaN.
|
||||
idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0
|
||||
idx = np.where(idx)[0]
|
||||
annot = annot.drop(idx)
|
||||
# One-Hot Labels.
|
||||
label_onehot = pd.get_dummies(annot['label'])
|
||||
annot = annot.drop('label', axis=1).join(label_onehot)
|
||||
cols = label_onehot.columns.values
|
||||
|
||||
|
||||
def scale_pose(xy):
|
||||
"""
|
||||
Normalize pose points by scale with max/min value of each pose.
|
||||
xy : (frames, parts, xy) or (parts, xy)
|
||||
"""
|
||||
if xy.ndim == 2:
|
||||
xy = np.expand_dims(xy, 0)
|
||||
xy_min = np.nanmin(xy, axis=1)
|
||||
xy_max = np.nanmax(xy, axis=1)
|
||||
for i in range(xy.shape[0]):
|
||||
xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
|
||||
return xy.squeeze()
|
||||
|
||||
|
||||
def seq_label_smoothing(labels, max_step=10):
|
||||
steps = 0
|
||||
remain_step = 0
|
||||
target_label = 0
|
||||
active_label = 0
|
||||
start_change = 0
|
||||
max_val = np.max(labels)
|
||||
min_val = np.min(labels)
|
||||
for i in range(labels.shape[0]):
|
||||
if remain_step > 0:
|
||||
if i >= start_change:
|
||||
labels[i][active_label] = max_val * remain_step / steps
|
||||
labels[i][target_label] = max_val * (steps - remain_step) / steps \
|
||||
if max_val * (steps - remain_step) / steps else min_val
|
||||
remain_step -= 1
|
||||
continue
|
||||
|
||||
diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0]
|
||||
if len(diff_index) > 0:
|
||||
start_change = i + remain_step // 2
|
||||
steps = diff_index[0]
|
||||
remain_step = steps
|
||||
target_label = np.argmax(labels[i + remain_step])
|
||||
active_label = np.argmax(labels[i])
|
||||
return labels
|
||||
|
||||
|
||||
feature_set = np.empty((0, n_frames, 14, 3))
|
||||
labels_set = np.empty((0, len(cols)))
|
||||
vid_list = annot['video'].unique()
|
||||
for vid in vid_list:
|
||||
print(f'Process on: {vid}')
|
||||
data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video')
|
||||
|
||||
# Label Smoothing.
|
||||
esp = 0.1
|
||||
data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1)
|
||||
data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step)
|
||||
|
||||
# Separate continuous frames.
|
||||
frames = data['frame'].values
|
||||
frames_set = []
|
||||
fs = [0]
|
||||
for i in range(1, len(frames)):
|
||||
if frames[i] < frames[i-1] + 10:
|
||||
fs.append(i)
|
||||
else:
|
||||
frames_set.append(fs)
|
||||
fs = [i]
|
||||
frames_set.append(fs)
|
||||
|
||||
for fs in frames_set:
|
||||
xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3)
|
||||
# Scale pose normalize.
|
||||
xys[:, :, :2] = scale_pose(xys[:, :, :2])
|
||||
# Add center point.
|
||||
xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1)
|
||||
|
||||
# Weighting main parts score.
|
||||
scr = xys[:, :, -1].copy()
|
||||
scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0)
|
||||
# Mean score.
|
||||
scr = scr.mean(1)
|
||||
|
||||
# Targets.
|
||||
lb = data.iloc[fs, -len(cols):].values
|
||||
# Apply points score mean to all labels.
|
||||
lb = lb * scr[:, None]
|
||||
|
||||
for i in range(xys.shape[0] - n_frames):
|
||||
feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0)
|
||||
labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0)
|
||||
|
||||
|
||||
"""with open(save_path, 'wb') as f:
|
||||
pickle.dump((feature_set, labels_set), f)"""
|
||||
348
StreamServer/src/analytic/action/Detection/Models.py
Normal file
348
StreamServer/src/analytic/action/Detection/Models.py
Normal file
@ -0,0 +1,348 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
|
||||
from .Utils import build_targets, to_cpu, parse_model_config
|
||||
|
||||
|
||||
def create_modules(module_defs):
|
||||
"""
|
||||
Constructs module list of layer blocks from module configuration in module_defs
|
||||
"""
|
||||
hyperparams = module_defs.pop(0)
|
||||
output_filters = [int(hyperparams["channels"])] # [3]
|
||||
module_list = nn.ModuleList()
|
||||
for module_i, module_def in enumerate(module_defs):
|
||||
modules = nn.Sequential()
|
||||
|
||||
if module_def["type"] == "convolutional":
|
||||
bn = int(module_def["batch_normalize"])
|
||||
filters = int(module_def["filters"])
|
||||
kernel_size = int(module_def["size"])
|
||||
pad = (kernel_size - 1) // 2
|
||||
modules.add_module(
|
||||
f"conv_{module_i}",
|
||||
nn.Conv2d(
|
||||
in_channels=output_filters[-1],
|
||||
out_channels=filters,
|
||||
kernel_size=kernel_size,
|
||||
stride=int(module_def["stride"]),
|
||||
padding=pad,
|
||||
bias=not bn,
|
||||
),
|
||||
)
|
||||
if bn:
|
||||
modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
|
||||
if module_def["activation"] == "leaky":
|
||||
modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
|
||||
|
||||
elif module_def["type"] == "maxpool":
|
||||
kernel_size = int(module_def["size"])
|
||||
stride = int(module_def["stride"])
|
||||
if kernel_size == 2 and stride == 1:
|
||||
modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
|
||||
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
|
||||
modules.add_module(f"maxpool_{module_i}", maxpool)
|
||||
|
||||
elif module_def["type"] == "upsample":
|
||||
upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
|
||||
modules.add_module(f"upsample_{module_i}", upsample)
|
||||
|
||||
elif module_def["type"] == "route":
|
||||
layers = [int(x) for x in module_def["layers"].split(",")]
|
||||
filters = sum([output_filters[1:][i] for i in layers])
|
||||
modules.add_module(f"route_{module_i}", EmptyLayer())
|
||||
|
||||
elif module_def["type"] == "shortcut":
|
||||
filters = output_filters[1:][int(module_def["from"])]
|
||||
modules.add_module(f"shortcut_{module_i}", EmptyLayer())
|
||||
|
||||
elif module_def["type"] == "yolo":
|
||||
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
|
||||
# Extract anchors
|
||||
anchors = [int(x) for x in module_def["anchors"].split(",")]
|
||||
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
|
||||
anchors = [anchors[i] for i in anchor_idxs]
|
||||
num_classes = int(module_def["classes"])
|
||||
img_size = int(hyperparams["height"])
|
||||
# Define detection layer
|
||||
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
|
||||
modules.add_module(f"yolo_{module_i}", yolo_layer)
|
||||
# Register module list and number of output filters
|
||||
module_list.append(modules)
|
||||
output_filters.append(filters)
|
||||
|
||||
return hyperparams, module_list
|
||||
|
||||
|
||||
class Upsample(nn.Module):
|
||||
""" nn.Upsample is deprecated """
|
||||
def __init__(self, scale_factor, mode="nearest"):
|
||||
super(Upsample, self).__init__()
|
||||
self.scale_factor = scale_factor
|
||||
self.mode = mode
|
||||
|
||||
def forward(self, x):
|
||||
x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
|
||||
return x
|
||||
|
||||
|
||||
class EmptyLayer(nn.Module):
|
||||
"""Placeholder for 'route' and 'shortcut' layers"""
|
||||
def __init__(self):
|
||||
super(EmptyLayer, self).__init__()
|
||||
|
||||
|
||||
class YOLOLayer(nn.Module):
|
||||
"""Detection layer"""
|
||||
def __init__(self, anchors, num_classes, img_dim=416):
|
||||
super(YOLOLayer, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.num_anchors = len(anchors)
|
||||
self.num_classes = num_classes
|
||||
self.ignore_thres = 0.5
|
||||
self.mse_loss = nn.MSELoss()
|
||||
self.bce_loss = nn.BCELoss()
|
||||
self.obj_scale = 1
|
||||
self.noobj_scale = 100
|
||||
self.metrics = {}
|
||||
self.img_dim = img_dim
|
||||
self.grid_size = 0 # grid size
|
||||
|
||||
def compute_grid_offsets(self, grid_size, cuda=True):
|
||||
self.grid_size = grid_size
|
||||
g = self.grid_size
|
||||
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
|
||||
self.stride = self.img_dim / self.grid_size
|
||||
# Calculate offsets for each grid
|
||||
self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
|
||||
self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
|
||||
self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
|
||||
self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
|
||||
self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
|
||||
|
||||
def forward(self, x, targets=None, img_dim=None):
|
||||
# Tensors for cuda support
|
||||
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
|
||||
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
|
||||
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
|
||||
|
||||
self.img_dim = img_dim
|
||||
num_samples = x.size(0)
|
||||
grid_size = x.size(2)
|
||||
|
||||
prediction = (
|
||||
x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
|
||||
.permute(0, 1, 3, 4, 2)
|
||||
.contiguous()
|
||||
)
|
||||
|
||||
# Get outputs
|
||||
x = torch.sigmoid(prediction[..., 0]) # Center x
|
||||
y = torch.sigmoid(prediction[..., 1]) # Center y
|
||||
w = prediction[..., 2] # Width
|
||||
h = prediction[..., 3] # Height
|
||||
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
|
||||
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
|
||||
|
||||
# If grid size does not match current we compute new offsets
|
||||
if grid_size != self.grid_size:
|
||||
self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
|
||||
|
||||
# Add offset and scale with anchors
|
||||
pred_boxes = FloatTensor(prediction[..., :4].shape)
|
||||
pred_boxes[..., 0] = x.data + self.grid_x
|
||||
pred_boxes[..., 1] = y.data + self.grid_y
|
||||
pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
|
||||
pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
|
||||
|
||||
output = torch.cat(
|
||||
(
|
||||
pred_boxes.view(num_samples, -1, 4) * self.stride,
|
||||
pred_conf.view(num_samples, -1, 1),
|
||||
pred_cls.view(num_samples, -1, self.num_classes),
|
||||
),
|
||||
-1,
|
||||
)
|
||||
|
||||
if targets is None:
|
||||
return output, 0
|
||||
else:
|
||||
iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
|
||||
pred_boxes=pred_boxes,
|
||||
pred_cls=pred_cls,
|
||||
target=targets,
|
||||
anchors=self.scaled_anchors,
|
||||
ignore_thres=self.ignore_thres,
|
||||
)
|
||||
|
||||
# Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
|
||||
loss_x = self.mse_loss(x[obj_mask.bool()], tx[obj_mask.bool()])
|
||||
loss_y = self.mse_loss(y[obj_mask.bool()], ty[obj_mask.bool()])
|
||||
loss_w = self.mse_loss(w[obj_mask.bool()], tw[obj_mask.bool()])
|
||||
loss_h = self.mse_loss(h[obj_mask.bool()], th[obj_mask.bool()])
|
||||
loss_conf_obj = self.bce_loss(pred_conf[obj_mask.bool()], tconf[obj_mask.bool()])
|
||||
loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask.bool()], tconf[noobj_mask.bool()])
|
||||
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
|
||||
loss_cls = self.bce_loss(pred_cls[obj_mask.bool()], tcls[obj_mask.bool()])
|
||||
total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
|
||||
|
||||
# Metrics
|
||||
cls_acc = 100 * class_mask[obj_mask.bool()].mean()
|
||||
conf_obj = pred_conf[obj_mask.bool()].mean()
|
||||
conf_noobj = pred_conf[noobj_mask.bool()].mean()
|
||||
conf50 = (pred_conf > 0.5).float()
|
||||
iou50 = (iou_scores > 0.5).float()
|
||||
iou75 = (iou_scores > 0.75).float()
|
||||
detected_mask = conf50 * class_mask * tconf
|
||||
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
|
||||
recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
|
||||
recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
|
||||
|
||||
self.metrics = {
|
||||
"loss": to_cpu(total_loss).item(),
|
||||
"x": to_cpu(loss_x).item(),
|
||||
"y": to_cpu(loss_y).item(),
|
||||
"w": to_cpu(loss_w).item(),
|
||||
"h": to_cpu(loss_h).item(),
|
||||
"conf": to_cpu(loss_conf).item(),
|
||||
"cls": to_cpu(loss_cls).item(),
|
||||
"cls_acc": to_cpu(cls_acc).item(),
|
||||
"recall50": to_cpu(recall50).item(),
|
||||
"recall75": to_cpu(recall75).item(),
|
||||
"precision": to_cpu(precision).item(),
|
||||
"conf_obj": to_cpu(conf_obj).item(),
|
||||
"conf_noobj": to_cpu(conf_noobj).item(),
|
||||
"grid_size": grid_size,
|
||||
}
|
||||
|
||||
return output, total_loss
|
||||
|
||||
|
||||
class Darknet(nn.Module):
|
||||
"""YOLOv3 object detection model"""
|
||||
def __init__(self, config_path, img_size=416):
|
||||
super(Darknet, self).__init__()
|
||||
self.module_defs = parse_model_config(config_path)
|
||||
self.hyperparams, self.module_list = create_modules(self.module_defs)
|
||||
self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
|
||||
self.img_size = img_size
|
||||
self.seen = 0
|
||||
self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
|
||||
|
||||
def forward(self, x, targets=None):
|
||||
img_dim = x.shape[2]
|
||||
loss = 0
|
||||
layer_outputs, yolo_outputs = [], []
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
|
||||
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
|
||||
x = module(x)
|
||||
elif module_def["type"] == "route":
|
||||
x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
|
||||
elif module_def["type"] == "shortcut":
|
||||
layer_i = int(module_def["from"])
|
||||
x = layer_outputs[-1] + layer_outputs[layer_i]
|
||||
elif module_def["type"] == "yolo":
|
||||
x, layer_loss = module[0](x, targets, img_dim)
|
||||
loss += layer_loss
|
||||
yolo_outputs.append(x)
|
||||
layer_outputs.append(x)
|
||||
yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
|
||||
return yolo_outputs if targets is None else (loss, yolo_outputs)
|
||||
|
||||
def load_darknet_weights(self, weights_path):
|
||||
"""Parses and loads the weights stored in 'weights_path'"""
|
||||
# Open the weights file
|
||||
with open(weights_path, "rb") as f:
|
||||
header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
|
||||
self.header_info = header # Needed to write header when saving weights
|
||||
self.seen = header[3] # number of images seen during training
|
||||
weights = np.fromfile(f, dtype=np.float32) # The rest are weights
|
||||
|
||||
# Establish cutoff for loading backbone weights
|
||||
cutoff = None
|
||||
if "darknet53.conv.74" in weights_path:
|
||||
cutoff = 75
|
||||
|
||||
ptr = 0
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
|
||||
if i == cutoff:
|
||||
break
|
||||
if module_def["type"] == "convolutional":
|
||||
conv_layer = module[0]
|
||||
if module_def["batch_normalize"]:
|
||||
# Load BN bias, weights, running mean and running variance
|
||||
bn_layer = module[1]
|
||||
num_b = bn_layer.bias.numel() # Number of biases
|
||||
# Bias
|
||||
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.bias)
|
||||
bn_layer.bias.data.copy_(bn_b)
|
||||
ptr += num_b
|
||||
# Weight
|
||||
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.weight)
|
||||
bn_layer.weight.data.copy_(bn_w)
|
||||
ptr += num_b
|
||||
# Running Mean
|
||||
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean)
|
||||
bn_layer.running_mean.data.copy_(bn_rm)
|
||||
ptr += num_b
|
||||
# Running Var
|
||||
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_var)
|
||||
bn_layer.running_var.data.copy_(bn_rv)
|
||||
ptr += num_b
|
||||
else:
|
||||
# Load conv. bias
|
||||
num_b = conv_layer.bias.numel()
|
||||
conv_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(conv_layer.bias)
|
||||
conv_layer.bias.data.copy_(conv_b)
|
||||
ptr += num_b
|
||||
# Load conv. weights
|
||||
num_w = conv_layer.weight.numel()
|
||||
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv_layer.weight)
|
||||
conv_layer.weight.data.copy_(conv_w)
|
||||
ptr += num_w
|
||||
|
||||
def save_darknet_weights(self, path, cutoff=-1):
|
||||
"""
|
||||
@:param path - path of the new weights file
|
||||
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
|
||||
"""
|
||||
fp = open(path, "wb")
|
||||
self.header_info[3] = self.seen
|
||||
self.header_info.tofile(fp)
|
||||
|
||||
# Iterate through layers
|
||||
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
||||
if module_def["type"] == "convolutional":
|
||||
conv_layer = module[0]
|
||||
# If batch norm, load bn first
|
||||
if module_def["batch_normalize"]:
|
||||
bn_layer = module[1]
|
||||
bn_layer.bias.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.weight.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
|
||||
bn_layer.running_var.data.cpu().numpy().tofile(fp)
|
||||
# Load conv bias
|
||||
else:
|
||||
conv_layer.bias.data.cpu().numpy().tofile(fp)
|
||||
# Load conv weights
|
||||
conv_layer.weight.data.cpu().numpy().tofile(fp)
|
||||
|
||||
fp.close()
|
||||
|
||||
def load_pretrain_to_custom_class(self, weights_pth_path):
|
||||
state = torch.load(weights_pth_path)
|
||||
|
||||
own_state = self.state_dict()
|
||||
for name, param in state.items():
|
||||
if name not in own_state:
|
||||
print(f'Model does not have this param: {name}!')
|
||||
continue
|
||||
|
||||
if param.shape != own_state[name].shape:
|
||||
print(f'Do not load this param: {name} cause it shape not equal! : '
|
||||
f'{param.shape} into {own_state[name].shape}')
|
||||
continue
|
||||
|
||||
own_state[name].copy_(param)
|
||||
415
StreamServer/src/analytic/action/Detection/Utils.py
Normal file
415
StreamServer/src/analytic/action/Detection/Utils.py
Normal file
@ -0,0 +1,415 @@
|
||||
import cv2
|
||||
import math
|
||||
import time
|
||||
import tqdm
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
def to_cpu(tensor):
|
||||
return tensor.detach().cpu()
|
||||
|
||||
|
||||
def load_classes(path):
|
||||
"""
|
||||
Loads class labels at 'path'
|
||||
"""
|
||||
fp = open(path, "r")
|
||||
names = fp.read().split("\n")[:-1]
|
||||
return names
|
||||
|
||||
|
||||
def weights_init_normal(m):
|
||||
classname = m.__class__.__name__
|
||||
if classname.find("Conv") != -1:
|
||||
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
|
||||
elif classname.find("BatchNorm2d") != -1:
|
||||
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
|
||||
torch.nn.init.constant_(m.bias.data, 0.0)
|
||||
|
||||
|
||||
def rescale_boxes(boxes, current_dim, original_shape):
|
||||
""" Rescales bounding boxes to the original shape """
|
||||
orig_h, orig_w = original_shape
|
||||
# The amount of padding that was added
|
||||
pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
|
||||
pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
|
||||
# Image height and width after padding is removed
|
||||
unpad_h = current_dim - pad_y
|
||||
unpad_w = current_dim - pad_x
|
||||
# Rescale bounding boxes to dimension of original image
|
||||
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
|
||||
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
|
||||
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
|
||||
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
|
||||
return boxes
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
y = x.new(x.shape)
|
||||
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
||||
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
||||
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
||||
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
||||
return y
|
||||
|
||||
|
||||
def ap_per_class(tp, conf, pred_cls, target_cls):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||
# Arguments
|
||||
tp: True positives (list).
|
||||
conf: Objectness value from 0-1 (list).
|
||||
pred_cls: Predicted object classes (list).
|
||||
target_cls: True object classes (list).
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
# Sort by objectness
|
||||
i = np.argsort(-conf)
|
||||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||||
|
||||
# Find unique classes
|
||||
unique_classes = np.unique(target_cls)
|
||||
|
||||
# Create Precision-Recall curve and compute AP for each class
|
||||
ap, p, r = [], [], []
|
||||
for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
|
||||
i = pred_cls == c
|
||||
n_gt = (target_cls == c).sum() # Number of ground truth objects
|
||||
n_p = i.sum() # Number of predicted objects
|
||||
|
||||
if n_p == 0 and n_gt == 0:
|
||||
continue
|
||||
elif n_p == 0 or n_gt == 0:
|
||||
ap.append(0)
|
||||
r.append(0)
|
||||
p.append(0)
|
||||
else:
|
||||
# Accumulate FPs and TPs
|
||||
fpc = (1 - tp[i]).cumsum()
|
||||
tpc = (tp[i]).cumsum()
|
||||
|
||||
# Recall
|
||||
recall_curve = tpc / (n_gt + 1e-16)
|
||||
r.append(recall_curve[-1])
|
||||
|
||||
# Precision
|
||||
precision_curve = tpc / (tpc + fpc)
|
||||
p.append(precision_curve[-1])
|
||||
|
||||
# AP from recall-precision curve
|
||||
ap.append(compute_ap(recall_curve, precision_curve))
|
||||
|
||||
# Compute F1 score (harmonic mean of precision and recall)
|
||||
p, r, ap = np.array(p), np.array(r), np.array(ap)
|
||||
f1 = 2 * p * r / (p + r + 1e-16)
|
||||
|
||||
return p, r, ap, f1, unique_classes.astype("int32")
|
||||
|
||||
|
||||
def compute_ap(recall, precision):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
||||
# Arguments
|
||||
recall: The recall curve (list).
|
||||
precision: The precision curve (list).
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
# correct AP calculation
|
||||
# first append sentinel values at the end
|
||||
mrec = np.concatenate(([0.0], recall, [1.0]))
|
||||
mpre = np.concatenate(([0.0], precision, [0.0]))
|
||||
|
||||
# compute the precision envelope
|
||||
for i in range(mpre.size - 1, 0, -1):
|
||||
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
||||
|
||||
# to calculate area under PR curve, look for points
|
||||
# where X axis (recall) changes value
|
||||
i = np.where(mrec[1:] != mrec[:-1])[0]
|
||||
|
||||
# and sum (\Delta recall) * prec
|
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
||||
return ap
|
||||
|
||||
|
||||
def get_batch_statistics(outputs, targets, iou_threshold):
|
||||
""" Compute true positives, predicted scores and predicted labels per sample """
|
||||
batch_metrics = []
|
||||
for sample_i in range(len(outputs)):
|
||||
|
||||
if outputs[sample_i] is None:
|
||||
continue
|
||||
|
||||
output = outputs[sample_i]
|
||||
pred_boxes = output[:, :4]
|
||||
pred_scores = output[:, 4]
|
||||
pred_labels = output[:, -1]
|
||||
|
||||
true_positives = np.zeros(pred_boxes.shape[0])
|
||||
|
||||
annotations = targets[targets[:, 0] == sample_i][:, 1:]
|
||||
target_labels = annotations[:, 0] if len(annotations) else []
|
||||
if len(annotations):
|
||||
detected_boxes = []
|
||||
target_boxes = annotations[:, 1:]
|
||||
|
||||
for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
|
||||
|
||||
# If targets are found break
|
||||
if len(detected_boxes) == len(annotations):
|
||||
break
|
||||
|
||||
# Ignore if label is not one of the target labels
|
||||
if pred_label not in target_labels:
|
||||
continue
|
||||
|
||||
iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
|
||||
if iou >= iou_threshold and box_index not in detected_boxes:
|
||||
true_positives[pred_i] = 1
|
||||
detected_boxes += [box_index]
|
||||
batch_metrics.append([true_positives, pred_scores, pred_labels])
|
||||
return batch_metrics
|
||||
|
||||
|
||||
def bbox_wh_iou(wh1, wh2):
|
||||
wh2 = wh2.t()
|
||||
w1, h1 = wh1[0], wh1[1]
|
||||
w2, h2 = wh2[0], wh2[1]
|
||||
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
|
||||
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
|
||||
return inter_area / union_area
|
||||
|
||||
|
||||
def bbox_iou(box1, box2, x1y1x2y2=True):
|
||||
"""
|
||||
Returns the IoU of two bounding boxes
|
||||
"""
|
||||
if not x1y1x2y2:
|
||||
# Transform from center and width to exact coordinates
|
||||
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||||
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||||
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||||
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||||
else:
|
||||
# Get the coordinates of bounding boxes
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||||
|
||||
# get the corrdinates of the intersection rectangle
|
||||
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
||||
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
||||
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
||||
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
||||
# Intersection area
|
||||
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
|
||||
inter_rect_y2 - inter_rect_y1 + 1, min=0
|
||||
)
|
||||
# Union Area
|
||||
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
|
||||
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
|
||||
|
||||
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
||||
|
||||
return iou
|
||||
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
|
||||
"""
|
||||
Removes detections with lower object confidence score than 'conf_thres' and performs
|
||||
Non-Maximum Suppression to further filter detections.
|
||||
Returns detections with shape:
|
||||
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
||||
"""
|
||||
# From (center x, center y, width, height) to (x1, y1, x2, y2)
|
||||
prediction[..., :4] = xywh2xyxy(prediction[..., :4])
|
||||
output = [None for _ in range(len(prediction))]
|
||||
for image_i, image_pred in enumerate(prediction):
|
||||
# Filter out confidence scores below threshold
|
||||
image_pred = image_pred[image_pred[:, 4] >= conf_thres]
|
||||
# If none are remaining => process next image
|
||||
if not image_pred.size(0):
|
||||
continue
|
||||
# Object confidence times class confidence
|
||||
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
|
||||
# Sort by it
|
||||
image_pred = image_pred[(-score).argsort()]
|
||||
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
|
||||
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
|
||||
# Perform non-maximum suppression
|
||||
keep_boxes = []
|
||||
while detections.size(0):
|
||||
large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
|
||||
label_match = detections[0, -1] == detections[:, -1]
|
||||
# Indices of boxes with lower confidence scores, large IOUs and matching labels
|
||||
invalid = large_overlap & label_match
|
||||
weights = detections[invalid, 4:5]
|
||||
# Merge overlapping bboxes by order of confidence
|
||||
detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
|
||||
keep_boxes += [detections[0]]
|
||||
detections = detections[~invalid]
|
||||
if keep_boxes:
|
||||
output[image_i] = torch.stack(keep_boxes)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
|
||||
ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
|
||||
FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
|
||||
|
||||
nB = pred_boxes.size(0)
|
||||
nA = pred_boxes.size(1)
|
||||
nC = pred_cls.size(-1)
|
||||
nG = pred_boxes.size(2)
|
||||
|
||||
# Output tensors
|
||||
obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
|
||||
noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
|
||||
class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
tx = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
ty = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
tw = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
th = FloatTensor(nB, nA, nG, nG).fill_(0)
|
||||
tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
|
||||
|
||||
# Convert to position relative to box
|
||||
target_boxes = target[:, 2:6] * nG
|
||||
gxy = target_boxes[:, :2]
|
||||
gwh = target_boxes[:, 2:]
|
||||
# Get anchors with best iou
|
||||
ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
|
||||
best_ious, best_n = ious.max(0)
|
||||
# Separate target values
|
||||
b, target_labels = target[:, :2].long().t()
|
||||
gx, gy = gxy.t()
|
||||
gw, gh = gwh.t()
|
||||
gi, gj = gxy.long().t()
|
||||
# Set masks
|
||||
obj_mask[b, best_n, gj, gi] = 1
|
||||
noobj_mask[b, best_n, gj, gi] = 0
|
||||
|
||||
# Set noobj mask to zero where iou exceeds ignore threshold
|
||||
for i, anchor_ious in enumerate(ious.t()):
|
||||
noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
|
||||
|
||||
# Coordinates
|
||||
tx[b, best_n, gj, gi] = gx - gx.floor()
|
||||
ty[b, best_n, gj, gi] = gy - gy.floor()
|
||||
# Width and height
|
||||
tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
|
||||
th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
|
||||
# One-hot encoding of label
|
||||
tcls[b, best_n, gj, gi, target_labels] = 1
|
||||
# Compute label correctness and iou at best anchor
|
||||
class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
|
||||
iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
|
||||
|
||||
tconf = obj_mask.float()
|
||||
return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
|
||||
|
||||
|
||||
def parse_model_config(path):
|
||||
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
|
||||
file = open(path, 'r')
|
||||
lines = file.read().split('\n')
|
||||
lines = [x for x in lines if x and not x.startswith('#')]
|
||||
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
|
||||
module_defs = []
|
||||
for line in lines:
|
||||
if line.startswith('['): # This marks the start of a new block
|
||||
module_defs.append({})
|
||||
module_defs[-1]['type'] = line[1:-1].rstrip()
|
||||
if module_defs[-1]['type'] == 'convolutional':
|
||||
module_defs[-1]['batch_normalize'] = 0
|
||||
else:
|
||||
key, value = line.split("=")
|
||||
value = value.strip()
|
||||
module_defs[-1][key.rstrip()] = value.strip()
|
||||
|
||||
return module_defs
|
||||
|
||||
|
||||
def parse_data_config(path):
|
||||
"""Parses the data configuration file"""
|
||||
options = dict()
|
||||
options['gpus'] = '0,1,2,3'
|
||||
options['num_workers'] = '10'
|
||||
with open(path, 'r') as fp:
|
||||
lines = fp.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
key, value = line.split('=')
|
||||
options[key.strip()] = value.strip()
|
||||
return options
|
||||
|
||||
|
||||
def ResizePadding(height, width):
|
||||
desized_size = (height, width)
|
||||
|
||||
def resizePadding(image, **kwargs):
|
||||
old_size = image.shape[:2]
|
||||
max_size_idx = old_size.index(max(old_size))
|
||||
ratio = float(desized_size[max_size_idx]) / max(old_size)
|
||||
new_size = tuple([int(x * ratio) for x in old_size])
|
||||
|
||||
if new_size > desized_size:
|
||||
min_size_idx = old_size.index(min(old_size))
|
||||
ratio = float(desized_size[min_size_idx]) / min(old_size)
|
||||
new_size = tuple([int(x * ratio) for x in old_size])
|
||||
|
||||
image = cv2.resize(image, (new_size[1], new_size[0]))
|
||||
delta_w = desized_size[1] - new_size[1]
|
||||
delta_h = desized_size[0] - new_size[0]
|
||||
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
|
||||
left, right = delta_w // 2, delta_w - (delta_w // 2)
|
||||
|
||||
image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
|
||||
return image
|
||||
return resizePadding
|
||||
|
||||
|
||||
class AverageValueMeter(object):
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
self.val = 0
|
||||
|
||||
def add(self, value, n=1):
|
||||
self.val = value
|
||||
self.sum += value
|
||||
self.var += value * value
|
||||
self.n += n
|
||||
|
||||
if self.n == 0:
|
||||
self.mean, self.std = np.nan, np.nan
|
||||
elif self.n == 1:
|
||||
self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy
|
||||
self.std = np.inf
|
||||
self.mean_old = self.mean
|
||||
self.m_s = 0.0
|
||||
else:
|
||||
self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
|
||||
self.m_s += (value - self.mean_old) * (value - self.mean)
|
||||
self.mean_old = self.mean
|
||||
self.std = np.sqrt(self.m_s / (self.n - 1.0))
|
||||
|
||||
def value(self):
|
||||
return self.mean, self.std
|
||||
|
||||
def reset(self):
|
||||
self.n = 0
|
||||
self.sum = 0.0
|
||||
self.var = 0.0
|
||||
self.val = 0.0
|
||||
self.mean = np.nan
|
||||
self.mean_old = 0.0
|
||||
self.m_s = 0.0
|
||||
self.std = np.nan
|
||||
117
StreamServer/src/analytic/action/DetectorLoader.py
Normal file
117
StreamServer/src/analytic/action/DetectorLoader.py
Normal file
@ -0,0 +1,117 @@
|
||||
import time
|
||||
import torch
|
||||
import numpy as np
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
from queue import Queue
|
||||
from threading import Thread
|
||||
|
||||
from .Detection.Models import Darknet
|
||||
from .Detection.Utils import non_max_suppression, ResizePadding
|
||||
|
||||
|
||||
class TinyYOLOv3_onecls(object):
|
||||
"""Load trained Tiny-YOLOv3 one class (person) detection model.
|
||||
Args:
|
||||
input_size: (int) Size of input image must be divisible by 32. Default: 416,
|
||||
config_file: (str) Path to Yolo model structure config file.,
|
||||
weight_file: (str) Path to trained weights file.,
|
||||
nms: (float) Non-Maximum Suppression overlap threshold.,
|
||||
conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off.,
|
||||
device: (str) Device to load the model on 'cpu' or 'cuda'.
|
||||
"""
|
||||
def __init__(self,
|
||||
input_size=416,
|
||||
config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg',
|
||||
weight_file='Models/yolo-tiny-onecls/best-model.pth',
|
||||
nms=0.2,
|
||||
conf_thres=0.45,
|
||||
device='cuda'):
|
||||
self.input_size = input_size
|
||||
self.model = Darknet(config_file).to(device)
|
||||
self.model.load_state_dict(torch.load(weight_file))
|
||||
self.model.eval()
|
||||
self.device = device
|
||||
|
||||
self.nms = nms
|
||||
self.conf_thres = conf_thres
|
||||
|
||||
self.resize_fn = ResizePadding(input_size, input_size)
|
||||
self.transf_fn = transforms.ToTensor()
|
||||
|
||||
def detect(self, image, need_resize=True, expand_bb=5):
|
||||
"""Feed forward to the model.
|
||||
Args:
|
||||
image: (numpy array) Single RGB image to detect.,
|
||||
need_resize: (bool) Resize to input_size before feed and will return bboxs
|
||||
with scale to image original size.,
|
||||
expand_bb: (int) Expand boundary of the boxs.
|
||||
Returns:
|
||||
(torch.float32) Of each detected object contain a
|
||||
[top, left, bottom, right, bbox_score, class_score, class]
|
||||
return `None` if no detected.
|
||||
"""
|
||||
image_size = (self.input_size, self.input_size)
|
||||
if need_resize:
|
||||
image_size = image.shape[:2]
|
||||
image = self.resize_fn(image)
|
||||
|
||||
image = self.transf_fn(image)[None, ...]
|
||||
scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0]
|
||||
|
||||
detected = self.model(image.to(self.device))
|
||||
detected = non_max_suppression(detected, self.conf_thres, self.nms)[0]
|
||||
if detected is not None:
|
||||
detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2
|
||||
detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2
|
||||
detected[:, 0:4] /= scf
|
||||
|
||||
detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb)
|
||||
detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb)
|
||||
|
||||
return detected
|
||||
|
||||
|
||||
class ThreadDetection(object):
|
||||
def __init__(self,
|
||||
dataloader,
|
||||
model,
|
||||
queue_size=256):
|
||||
self.model = model
|
||||
|
||||
self.dataloader = dataloader
|
||||
self.stopped = False
|
||||
self.Q = Queue(maxsize=queue_size)
|
||||
|
||||
def start(self):
|
||||
t = Thread(target=self.update, args=(), daemon=True).start()
|
||||
return self
|
||||
|
||||
def update(self):
|
||||
while True:
|
||||
if self.stopped:
|
||||
return
|
||||
|
||||
images = self.dataloader.getitem()
|
||||
|
||||
outputs = self.model.detect(images)
|
||||
|
||||
if self.Q.full():
|
||||
time.sleep(2)
|
||||
self.Q.put((images, outputs))
|
||||
|
||||
def getitem(self):
|
||||
return self.Q.get()
|
||||
|
||||
def stop(self):
|
||||
self.stopped = True
|
||||
|
||||
def __len__(self):
|
||||
return self.Q.qsize()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1
StreamServer/src/analytic/action/Models/TSSTG/_.txt
Normal file
1
StreamServer/src/analytic/action/Models/TSSTG/_.txt
Normal file
@ -0,0 +1 @@
|
||||
tsstg-model.pth
|
||||
2
StreamServer/src/analytic/action/Models/sppe/_.txt
Normal file
2
StreamServer/src/analytic/action/Models/sppe/_.txt
Normal file
@ -0,0 +1,2 @@
|
||||
fast_res50_256x192.pth
|
||||
fast_res101_320x256.pth
|
||||
@ -0,0 +1,2 @@
|
||||
best-model.pth
|
||||
yolov3-tiny-onecls.cfg
|
||||
40
StreamServer/src/analytic/action/PoseEstimateLoader.py
Normal file
40
StreamServer/src/analytic/action/PoseEstimateLoader.py
Normal file
@ -0,0 +1,40 @@
|
||||
import os
|
||||
import cv2
|
||||
import torch
|
||||
|
||||
from .SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50
|
||||
from .SPPE.src.utils.img import crop_dets
|
||||
from .pPose_nms import pose_nms
|
||||
from .SPPE.src.utils.eval import getPrediction
|
||||
|
||||
|
||||
class SPPE_FastPose(object):
|
||||
def __init__(self,
|
||||
backbone,
|
||||
input_height=320,
|
||||
input_width=256,
|
||||
device='cuda',
|
||||
path='./SPPE/models/sppe/'):
|
||||
assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone)
|
||||
|
||||
self.inp_h = input_height
|
||||
self.inp_w = input_width
|
||||
self.device = device
|
||||
|
||||
if backbone == 'resnet101':
|
||||
self.model = InferenNet_fast(path).to(device)
|
||||
else:
|
||||
self.model = InferenNet_fastRes50(path).to(device)
|
||||
self.model.eval()
|
||||
|
||||
def predict(self, image, bboxs, bboxs_scores):
|
||||
inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w)
|
||||
pose_hm = self.model(inps.to(self.device)).cpu().data
|
||||
|
||||
# Cut eyes and ears.
|
||||
pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1)
|
||||
|
||||
xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w,
|
||||
pose_hm.shape[-2], pose_hm.shape[-1])
|
||||
result = pose_nms(bboxs, bboxs_scores, xy_img, scores)
|
||||
return result
|
||||
51
StreamServer/src/analytic/action/README.md
Normal file
51
StreamServer/src/analytic/action/README.md
Normal file
@ -0,0 +1,51 @@
|
||||
<h1> Human Falling Detection and Tracking <a href="https://github.com/GajuuzZ/Human-Falling-Detect-Tracks">https://github.com/GajuuzZ/Human-Falling-Detect-Tracks</a> </h1>
|
||||
|
||||
Using Tiny-YOLO oneclass to detect each person in the frame and use
|
||||
[AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use
|
||||
[ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames
|
||||
of each person tracks.
|
||||
|
||||
Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down.
|
||||
|
||||
<div align="center">
|
||||
<img src="sample1.gif" width="416">
|
||||
</div>
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python > 3.6
|
||||
- Pytorch > 1.3.1
|
||||
|
||||
Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2
|
||||
|
||||
## Data
|
||||
|
||||
This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing
|
||||
model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset
|
||||
for more robust person detection in a variant of angle pose.
|
||||
|
||||
For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr)
|
||||
Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action
|
||||
frames by hand for training ST-GCN model.
|
||||
|
||||
## Pre-Trained Models
|
||||
|
||||
- Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing),
|
||||
[.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing)
|
||||
- SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing),
|
||||
[resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing)
|
||||
- ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing)
|
||||
|
||||
## Basic Use
|
||||
|
||||
1. Download all pre-trained models into ./Models folder.
|
||||
2. Run main.py
|
||||
|
||||
```
|
||||
python main.py ${video file or camera source}
|
||||
```
|
||||
|
||||
## Reference
|
||||
|
||||
- AlphaPose : https://github.com/Amanbhandula/AlphaPose
|
||||
- ST-GCN : https://github.com/yysijie/st-gcn
|
||||
21
StreamServer/src/analytic/action/SPPE/LICENSE
Normal file
21
StreamServer/src/analytic/action/SPPE/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Jeff-sjtu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
1
StreamServer/src/analytic/action/SPPE/README.md
Normal file
1
StreamServer/src/analytic/action/SPPE/README.md
Normal file
@ -0,0 +1 @@
|
||||
# pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose
|
||||
@ -0,0 +1,82 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from .utils.img import flip, shuffleLR
|
||||
from .utils.eval import getPrediction
|
||||
from .models.FastPose import FastPose
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
import torch._utils
|
||||
try:
|
||||
torch._utils._rebuild_tensor_v2
|
||||
except AttributeError:
|
||||
def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
|
||||
tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
|
||||
tensor.requires_grad = requires_grad
|
||||
tensor._backward_hooks = backward_hooks
|
||||
return tensor
|
||||
torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
|
||||
|
||||
|
||||
class InferenNet(nn.Module):
|
||||
def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'):
|
||||
super().__init__()
|
||||
|
||||
self.pyranet = FastPose('resnet101').cuda()
|
||||
print('Loading pose model from {}'.format(weights_file))
|
||||
sys.stdout.flush()
|
||||
self.pyranet.load_state_dict(torch.load(weights_file))
|
||||
self.pyranet.eval()
|
||||
self.pyranet = model
|
||||
|
||||
self.dataset = dataset
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pyranet(x)
|
||||
out = out.narrow(1, 0, 17)
|
||||
|
||||
flip_out = self.pyranet(flip(x))
|
||||
flip_out = flip_out.narrow(1, 0, 17)
|
||||
|
||||
flip_out = flip(shuffleLR(
|
||||
flip_out, self.dataset))
|
||||
|
||||
out = (flip_out + out) / 2
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class InferenNet_fast(nn.Module):
|
||||
def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'):
|
||||
super().__init__()
|
||||
|
||||
self.pyranet = FastPose('resnet101').cuda()
|
||||
print('Loading pose model from {}'.format(weights_file))
|
||||
self.pyranet.load_state_dict(torch.load(weights_file))
|
||||
self.pyranet.eval()
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pyranet(x)
|
||||
out = out.narrow(1, 0, 17)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class InferenNet_fastRes50(nn.Module):
|
||||
def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'):
|
||||
super().__init__()
|
||||
|
||||
self.pyranet = FastPose('resnet50', 17).cuda()
|
||||
print('Loading pose model from {}'.format(weights_file))
|
||||
self.pyranet.load_state_dict(torch.load(weights_file))
|
||||
self.pyranet.eval()
|
||||
|
||||
def forward(self, x):
|
||||
out = self.pyranet(x)
|
||||
|
||||
return out
|
||||
32
StreamServer/src/analytic/action/SPPE/src/models/FastPose.py
Normal file
32
StreamServer/src/analytic/action/SPPE/src/models/FastPose.py
Normal file
@ -0,0 +1,32 @@
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
|
||||
from .layers.SE_Resnet import SEResnet
|
||||
from .layers.DUC import DUC
|
||||
from ..opt import opt
|
||||
|
||||
|
||||
class FastPose(nn.Module):
|
||||
DIM = 128
|
||||
|
||||
def __init__(self, backbone='resnet101', num_join=opt.nClasses):
|
||||
super(FastPose, self).__init__()
|
||||
assert backbone in ['resnet50', 'resnet101']
|
||||
|
||||
self.preact = SEResnet(backbone)
|
||||
|
||||
self.suffle1 = nn.PixelShuffle(2)
|
||||
self.duc1 = DUC(512, 1024, upscale_factor=2)
|
||||
self.duc2 = DUC(256, 512, upscale_factor=2)
|
||||
|
||||
self.conv_out = nn.Conv2d(
|
||||
self.DIM, num_join, kernel_size=3, stride=1, padding=1)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
out = self.preact(x)
|
||||
out = self.suffle1(out)
|
||||
out = self.duc1(out)
|
||||
out = self.duc2(out)
|
||||
|
||||
out = self.conv_out(out)
|
||||
return out
|
||||
@ -0,0 +1 @@
|
||||
from . import *
|
||||
126
StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py
Normal file
126
StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py
Normal file
@ -0,0 +1,126 @@
|
||||
import torch.nn as nn
|
||||
from .layers.PRM import Residual as ResidualPyramid
|
||||
from .layers.Residual import Residual as Residual
|
||||
from torch.autograd import Variable
|
||||
from SPPE.src.opt import opt
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class Hourglass(nn.Module):
|
||||
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
|
||||
super(Hourglass, self).__init__()
|
||||
|
||||
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
|
||||
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
|
||||
|
||||
self.depth = n
|
||||
self.nModules = nModules
|
||||
self.nFeats = nFeats
|
||||
self.net_type = net_type
|
||||
self.B = B
|
||||
self.C = C
|
||||
self.inputResH = inputResH
|
||||
self.inputResW = inputResW
|
||||
|
||||
self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
|
||||
self.low1 = nn.Sequential(
|
||||
nn.MaxPool2d(2),
|
||||
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
)
|
||||
if n > 1:
|
||||
self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
|
||||
else:
|
||||
self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
|
||||
self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
|
||||
self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
|
||||
|
||||
self.upperBranch = self.up1
|
||||
self.lowerBranch = nn.Sequential(
|
||||
self.low1,
|
||||
self.low2,
|
||||
self.low3,
|
||||
self.up2
|
||||
)
|
||||
|
||||
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
|
||||
layer_list = []
|
||||
for i in range(self.nModules):
|
||||
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
|
||||
stride=1, net_type=self.net_type, useConv=useConv,
|
||||
baseWidth=self.B, cardinality=self.C))
|
||||
return nn.Sequential(*layer_list)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
up1 = self.upperBranch(x)
|
||||
up2 = self.lowerBranch(x)
|
||||
out = up1 + up2
|
||||
return out
|
||||
|
||||
|
||||
class PyraNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(PyraNet, self).__init__()
|
||||
|
||||
B, C = opt.baseWidth, opt.cardinality
|
||||
self.inputResH = opt.inputResH / 4
|
||||
self.inputResW = opt.inputResW / 4
|
||||
self.nStack = opt.nStack
|
||||
|
||||
self.cnv1 = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
self.r1 = nn.Sequential(
|
||||
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
|
||||
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
|
||||
nn.MaxPool2d(2)
|
||||
)
|
||||
self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.preact = nn.Sequential(
|
||||
self.cnv1,
|
||||
self.r1,
|
||||
self.r4,
|
||||
self.r5
|
||||
)
|
||||
self.stack_layers = defaultdict(list)
|
||||
for i in range(self.nStack):
|
||||
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
|
||||
lin = nn.Sequential(
|
||||
hg,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True),
|
||||
nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
|
||||
self.stack_layers['lin'].append(lin)
|
||||
self.stack_layers['out'].append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
self.stack_layers['lin_'].append(lin_)
|
||||
self.stack_layers['out_'].append(tmpOut_)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
out = []
|
||||
inter = self.preact(x)
|
||||
for i in range(self.nStack):
|
||||
lin = self.stack_layers['lin'][i](inter)
|
||||
tmpOut = self.stack_layers['out'][i](lin)
|
||||
out.append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = self.stack_layers['lin_'][i](lin)
|
||||
tmpOut_ = self.stack_layers['out_'][i](tmpOut)
|
||||
inter = inter + lin_ + tmpOut_
|
||||
return out
|
||||
|
||||
|
||||
def createModel(**kw):
|
||||
model = PyraNet()
|
||||
return model
|
||||
236
StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py
Normal file
236
StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py
Normal file
@ -0,0 +1,236 @@
|
||||
import torch.nn as nn
|
||||
from .layers.PRM import Residual as ResidualPyramid
|
||||
from .layers.Residual import Residual as Residual
|
||||
from torch.autograd import Variable
|
||||
import torch
|
||||
from SPPE.src.opt import opt
|
||||
import math
|
||||
|
||||
|
||||
class Hourglass(nn.Module):
|
||||
def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
|
||||
super(Hourglass, self).__init__()
|
||||
|
||||
self.ResidualUp = ResidualPyramid if n >= 2 else Residual
|
||||
self.ResidualDown = ResidualPyramid if n >= 3 else Residual
|
||||
|
||||
self.depth = n
|
||||
self.nModules = nModules
|
||||
self.nFeats = nFeats
|
||||
self.net_type = net_type
|
||||
self.B = B
|
||||
self.C = C
|
||||
self.inputResH = inputResH
|
||||
self.inputResW = inputResW
|
||||
|
||||
up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
|
||||
low1 = nn.Sequential(
|
||||
nn.MaxPool2d(2),
|
||||
self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
)
|
||||
if n > 1:
|
||||
low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
|
||||
else:
|
||||
low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
|
||||
|
||||
low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
|
||||
up2 = nn.UpsamplingNearest2d(scale_factor=2)
|
||||
|
||||
self.upperBranch = up1
|
||||
self.lowerBranch = nn.Sequential(
|
||||
low1,
|
||||
low2,
|
||||
low3,
|
||||
up2
|
||||
)
|
||||
|
||||
def _make_residual(self, resBlock, useConv, inputResH, inputResW):
|
||||
layer_list = []
|
||||
for i in range(self.nModules):
|
||||
layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
|
||||
stride=1, net_type=self.net_type, useConv=useConv,
|
||||
baseWidth=self.B, cardinality=self.C))
|
||||
return nn.Sequential(*layer_list)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
up1 = self.upperBranch(x)
|
||||
up2 = self.lowerBranch(x)
|
||||
# out = up1 + up2
|
||||
out = torch.add(up1, up2)
|
||||
return out
|
||||
|
||||
|
||||
class PyraNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(PyraNet, self).__init__()
|
||||
|
||||
B, C = opt.baseWidth, opt.cardinality
|
||||
self.inputResH = opt.inputResH / 4
|
||||
self.inputResW = opt.inputResW / 4
|
||||
self.nStack = opt.nStack
|
||||
|
||||
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
|
||||
|
||||
cnv1 = nn.Sequential(
|
||||
conv1,
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
|
||||
r1 = nn.Sequential(
|
||||
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
|
||||
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
|
||||
nn.MaxPool2d(2)
|
||||
)
|
||||
r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.preact = nn.Sequential(
|
||||
cnv1,
|
||||
r1,
|
||||
r4,
|
||||
r5
|
||||
)
|
||||
|
||||
self.stack_lin = nn.ModuleList()
|
||||
self.stack_out = nn.ModuleList()
|
||||
self.stack_lin_ = nn.ModuleList()
|
||||
self.stack_out_ = nn.ModuleList()
|
||||
|
||||
for i in range(self.nStack):
|
||||
hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
|
||||
conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
lin = nn.Sequential(
|
||||
hg,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True),
|
||||
conv1,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(tmpOut.weight)
|
||||
self.stack_lin.append(lin)
|
||||
self.stack_out.append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(lin_.weight)
|
||||
nn.init.xavier_normal(tmpOut_.weight)
|
||||
self.stack_lin_.append(lin_)
|
||||
self.stack_out_.append(tmpOut_)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
out = []
|
||||
inter = self.preact(x)
|
||||
for i in range(self.nStack):
|
||||
lin = self.stack_lin[i](inter)
|
||||
tmpOut = self.stack_out[i](lin)
|
||||
out.append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = self.stack_lin_[i](lin)
|
||||
tmpOut_ = self.stack_out_[i](tmpOut)
|
||||
inter = inter + lin_ + tmpOut_
|
||||
return out
|
||||
|
||||
|
||||
class PyraNet_Inference(nn.Module):
|
||||
def __init__(self):
|
||||
super(PyraNet_Inference, self).__init__()
|
||||
|
||||
B, C = opt.baseWidth, opt.cardinality
|
||||
self.inputResH = opt.inputResH / 4
|
||||
self.inputResW = opt.inputResW / 4
|
||||
self.nStack = opt.nStack
|
||||
|
||||
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
|
||||
|
||||
cnv1 = nn.Sequential(
|
||||
conv1,
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
|
||||
r1 = nn.Sequential(
|
||||
ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
|
||||
stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
|
||||
nn.MaxPool2d(2)
|
||||
)
|
||||
r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
|
||||
stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
|
||||
self.preact = nn.Sequential(
|
||||
cnv1,
|
||||
r1,
|
||||
r4,
|
||||
r5
|
||||
)
|
||||
|
||||
self.stack_lin = nn.ModuleList()
|
||||
self.stack_out = nn.ModuleList()
|
||||
self.stack_lin_ = nn.ModuleList()
|
||||
self.stack_out_ = nn.ModuleList()
|
||||
|
||||
for i in range(self.nStack):
|
||||
hg = Hourglass(4, opt.nFeats, opt.nResidual,
|
||||
self.inputResH, self.inputResW, 'preact', B, C)
|
||||
conv1 = nn.Conv2d(opt.nFeats, opt.nFeats,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
lin = nn.Sequential(
|
||||
hg,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True),
|
||||
conv1,
|
||||
nn.BatchNorm2d(opt.nFeats),
|
||||
nn.ReLU(True)
|
||||
)
|
||||
tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(tmpOut.weight)
|
||||
self.stack_lin.append(lin)
|
||||
self.stack_out.append(tmpOut)
|
||||
if i < self.nStack - 1:
|
||||
lin_ = nn.Conv2d(opt.nFeats, opt.nFeats,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(lin_.weight)
|
||||
nn.init.xavier_normal(tmpOut_.weight)
|
||||
self.stack_lin_.append(lin_)
|
||||
self.stack_out_.append(tmpOut_)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
inter = self.preact(x)
|
||||
for i in range(self.nStack):
|
||||
lin = self.stack_lin[i](inter)
|
||||
tmpOut = self.stack_out[i](lin)
|
||||
out = tmpOut
|
||||
if i < self.nStack - 1:
|
||||
lin_ = self.stack_lin_[i](lin)
|
||||
tmpOut_ = self.stack_out_[i](tmpOut)
|
||||
inter = inter + lin_ + tmpOut_
|
||||
return out
|
||||
|
||||
|
||||
def createModel(**kw):
|
||||
model = PyraNet()
|
||||
return model
|
||||
|
||||
|
||||
def createModel_Inference(**kw):
|
||||
model = PyraNet_Inference()
|
||||
return model
|
||||
@ -0,0 +1,23 @@
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class DUC(nn.Module):
|
||||
"""
|
||||
INPUT: inplanes, planes, upscale_factor
|
||||
OUTPUT: (planes // 4)* ht * wd
|
||||
"""
|
||||
def __init__(self, inplanes, planes, upscale_factor=2):
|
||||
super(DUC, self).__init__()
|
||||
self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False)
|
||||
self.bn = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
x = self.pixel_shuffle(x)
|
||||
return x
|
||||
135
StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py
Normal file
135
StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py
Normal file
@ -0,0 +1,135 @@
|
||||
import torch.nn as nn
|
||||
from .util_models import ConcatTable, CaddTable, Identity
|
||||
import math
|
||||
from opt import opt
|
||||
|
||||
|
||||
class Residual(nn.Module):
|
||||
def __init__(self, numIn, numOut, inputResH, inputResW, stride=1,
|
||||
net_type='preact', useConv=False, baseWidth=9, cardinality=4):
|
||||
super(Residual, self).__init__()
|
||||
|
||||
self.con = ConcatTable([convBlock(numIn, numOut, inputResH,
|
||||
inputResW, net_type, baseWidth, cardinality, stride),
|
||||
skipLayer(numIn, numOut, stride, useConv)])
|
||||
self.cadd = CaddTable(True)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.con(x)
|
||||
out = self.cadd(out)
|
||||
return out
|
||||
|
||||
|
||||
def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride):
|
||||
numIn = int(numIn)
|
||||
numOut = int(numOut)
|
||||
|
||||
addTable = ConcatTable()
|
||||
s_list = []
|
||||
if net_type != 'no_preact':
|
||||
s_list.append(nn.BatchNorm2d(numIn))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
s_list.append(conv1)
|
||||
|
||||
s_list.append(nn.BatchNorm2d(numOut // 2))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv2 = nn.Conv2d(numOut // 2, numOut // 2,
|
||||
kernel_size=3, stride=stride, padding=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv2.weight)
|
||||
s_list.append(conv2)
|
||||
|
||||
s = nn.Sequential(*s_list)
|
||||
addTable.add(s)
|
||||
|
||||
D = math.floor(numOut // baseWidth)
|
||||
C = cardinality
|
||||
s_list = []
|
||||
|
||||
if net_type != 'no_preact':
|
||||
s_list.append(nn.BatchNorm2d(numIn))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C))
|
||||
|
||||
s_list.append(conv1)
|
||||
s_list.append(nn.BatchNorm2d(D))
|
||||
s_list.append(nn.ReLU(True))
|
||||
s_list.append(pyramid(D, C, inputResH, inputResW))
|
||||
s_list.append(nn.BatchNorm2d(D))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
a = nn.Conv2d(D, numOut // 2, kernel_size=1)
|
||||
a.nBranchIn = C
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C))
|
||||
s_list.append(a)
|
||||
|
||||
s = nn.Sequential(*s_list)
|
||||
addTable.add(s)
|
||||
|
||||
elewiswAdd = nn.Sequential(
|
||||
addTable,
|
||||
CaddTable(False)
|
||||
)
|
||||
conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2))
|
||||
model = nn.Sequential(
|
||||
elewiswAdd,
|
||||
nn.BatchNorm2d(numOut // 2),
|
||||
nn.ReLU(True),
|
||||
conv2
|
||||
)
|
||||
return model
|
||||
|
||||
|
||||
def pyramid(D, C, inputResH, inputResW):
|
||||
pyraTable = ConcatTable()
|
||||
sc = math.pow(2, 1 / C)
|
||||
for i in range(C):
|
||||
scaled = 1 / math.pow(sc, i + 1)
|
||||
conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight)
|
||||
s = nn.Sequential(
|
||||
nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)),
|
||||
conv1,
|
||||
nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW))))
|
||||
pyraTable.add(s)
|
||||
pyra = nn.Sequential(
|
||||
pyraTable,
|
||||
CaddTable(False)
|
||||
)
|
||||
return pyra
|
||||
|
||||
|
||||
class skipLayer(nn.Module):
|
||||
def __init__(self, numIn, numOut, stride, useConv):
|
||||
super(skipLayer, self).__init__()
|
||||
self.identity = False
|
||||
|
||||
if numIn == numOut and stride == 1 and not useConv:
|
||||
self.identity = True
|
||||
else:
|
||||
conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
self.m = nn.Sequential(
|
||||
nn.BatchNorm2d(numIn),
|
||||
nn.ReLU(True),
|
||||
conv1
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
if self.identity:
|
||||
return x
|
||||
else:
|
||||
return self.m(x)
|
||||
@ -0,0 +1,54 @@
|
||||
import torch.nn as nn
|
||||
import math
|
||||
from .util_models import ConcatTable, CaddTable, Identity
|
||||
from opt import opt
|
||||
|
||||
|
||||
def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw):
|
||||
con = ConcatTable([convBlock(numIn, numOut, stride, net_type),
|
||||
skipLayer(numIn, numOut, stride, useConv)])
|
||||
cadd = CaddTable(True)
|
||||
return nn.Sequential(con, cadd)
|
||||
|
||||
|
||||
def convBlock(numIn, numOut, stride, net_type):
|
||||
s_list = []
|
||||
if net_type != 'no_preact':
|
||||
s_list.append(nn.BatchNorm2d(numIn))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
s_list.append(conv1)
|
||||
|
||||
s_list.append(nn.BatchNorm2d(numOut // 2))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv2.weight)
|
||||
s_list.append(conv2)
|
||||
s_list.append(nn.BatchNorm2d(numOut // 2))
|
||||
s_list.append(nn.ReLU(True))
|
||||
|
||||
conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv3.weight)
|
||||
s_list.append(conv3)
|
||||
|
||||
return nn.Sequential(*s_list)
|
||||
|
||||
|
||||
def skipLayer(numIn, numOut, stride, useConv):
|
||||
if numIn == numOut and stride == 1 and not useConv:
|
||||
return Identity()
|
||||
else:
|
||||
conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
|
||||
if opt.init:
|
||||
nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
|
||||
return nn.Sequential(
|
||||
nn.BatchNorm2d(numIn),
|
||||
nn.ReLU(True),
|
||||
conv1
|
||||
)
|
||||
@ -0,0 +1,82 @@
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = F.relu(self.bn1(self.conv1(x)), inplace=True)
|
||||
out = F.relu(self.bn2(self.conv2(out)), inplace=True)
|
||||
out = self.bn3(self.conv3(out))
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = F.relu(out, inplace=True)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
""" Resnet """
|
||||
def __init__(self, architecture):
|
||||
super(ResNet, self).__init__()
|
||||
assert architecture in ["resnet50", "resnet101"]
|
||||
self.inplanes = 64
|
||||
self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
|
||||
self.block = Bottleneck
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
|
||||
|
||||
self.layer1 = self.make_layer(self.block, 64, self.layers[0])
|
||||
self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2)
|
||||
self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2)
|
||||
|
||||
self.layer4 = self.make_layer(
|
||||
self.block, 512, self.layers[3], stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
return x
|
||||
|
||||
def stages(self):
|
||||
return [self.layer1, self.layer2, self.layer3, self.layer4]
|
||||
|
||||
def make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
@ -0,0 +1,99 @@
|
||||
import torch.nn as nn
|
||||
from .SE_module import SELayer
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
if reduction:
|
||||
self.se = SELayer(planes * 4)
|
||||
|
||||
self.reduc = reduction
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = F.relu(self.bn1(self.conv1(x)), inplace=True)
|
||||
out = F.relu(self.bn2(self.conv2(out)), inplace=True)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
if self.reduc:
|
||||
out = self.se(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = F.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SEResnet(nn.Module):
|
||||
""" SEResnet """
|
||||
|
||||
def __init__(self, architecture):
|
||||
super(SEResnet, self).__init__()
|
||||
assert architecture in ["resnet50", "resnet101"]
|
||||
self.inplanes = 64
|
||||
self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
|
||||
self.block = Bottleneck
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
|
||||
stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.layer1 = self.make_layer(self.block, 64, self.layers[0])
|
||||
self.layer2 = self.make_layer(
|
||||
self.block, 128, self.layers[1], stride=2)
|
||||
self.layer3 = self.make_layer(
|
||||
self.block, 256, self.layers[2], stride=2)
|
||||
|
||||
self.layer4 = self.make_layer(
|
||||
self.block, 512, self.layers[3], stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4
|
||||
x = self.layer1(x) # 256 * h/4 * w/4
|
||||
x = self.layer2(x) # 512 * h/8 * w/8
|
||||
x = self.layer3(x) # 1024 * h/16 * w/16
|
||||
x = self.layer4(x) # 2048 * h/32 * w/32
|
||||
return x
|
||||
|
||||
def stages(self):
|
||||
return [self.layer1, self.layer2, self.layer3, self.layer4]
|
||||
|
||||
def make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
if downsample is not None:
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, reduction=True))
|
||||
else:
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
@ -0,0 +1,19 @@
|
||||
from torch import nn
|
||||
|
||||
|
||||
class SELayer(nn.Module):
|
||||
def __init__(self, channel, reduction=1):
|
||||
super(SELayer, self).__init__()
|
||||
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(channel, channel // reduction),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(channel // reduction, channel),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
b, c, _, _ = x.size()
|
||||
y = self.avg_pool(x).view(b, c)
|
||||
y = self.fc(y).view(b, c, 1, 1)
|
||||
return x * y
|
||||
@ -0,0 +1 @@
|
||||
from . import *
|
||||
@ -0,0 +1,37 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
|
||||
|
||||
class ConcatTable(nn.Module):
|
||||
def __init__(self, module_list=None):
|
||||
super(ConcatTable, self).__init__()
|
||||
|
||||
self.modules_list = nn.ModuleList(module_list)
|
||||
|
||||
def forward(self, x: Variable):
|
||||
y = []
|
||||
for i in range(len(self.modules_list)):
|
||||
y.append(self.modules_list[i](x))
|
||||
return y
|
||||
|
||||
def add(self, module):
|
||||
self.modules_list.append(module)
|
||||
|
||||
|
||||
class CaddTable(nn.Module):
|
||||
def __init__(self, inplace=False):
|
||||
super(CaddTable, self).__init__()
|
||||
self.inplace = inplace
|
||||
|
||||
def forward(self, x: Variable or list):
|
||||
return torch.stack(x, 0).sum(0)
|
||||
|
||||
|
||||
class Identity(nn.Module):
|
||||
def __init__(self, params=None):
|
||||
super(Identity, self).__init__()
|
||||
self.params = nn.ParameterList(params)
|
||||
|
||||
def forward(self, x: Variable or list):
|
||||
return x
|
||||
115
StreamServer/src/analytic/action/SPPE/src/opt.py
Normal file
115
StreamServer/src/analytic/action/SPPE/src/opt.py
Normal file
@ -0,0 +1,115 @@
|
||||
"""import argparse
|
||||
import torch
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training')
|
||||
parser.add_argument("--return_counts", type=bool, default=True)
|
||||
parser.add_argument("--mode", default='client')
|
||||
parser.add_argument("--port", default=52162)
|
||||
|
||||
"----------------------------- General options -----------------------------"
|
||||
parser.add_argument('--expID', default='default', type=str,
|
||||
help='Experiment ID')
|
||||
parser.add_argument('--dataset', default='coco', type=str,
|
||||
help='Dataset choice: mpii | coco')
|
||||
parser.add_argument('--nThreads', default=30, type=int,
|
||||
help='Number of data loading threads')
|
||||
parser.add_argument('--debug', default=False, type=bool,
|
||||
help='Print the debug information')
|
||||
parser.add_argument('--snapshot', default=1, type=int,
|
||||
help='How often to take a snapshot of the model (0 = never)')
|
||||
|
||||
"----------------------------- AlphaPose options -----------------------------"
|
||||
parser.add_argument('--addDPG', default=False, type=bool,
|
||||
help='Train with data augmentation')
|
||||
|
||||
"----------------------------- Model options -----------------------------"
|
||||
parser.add_argument('--netType', default='hgPRM', type=str,
|
||||
help='Options: hgPRM | resnext')
|
||||
parser.add_argument('--loadModel', default=None, type=str,
|
||||
help='Provide full path to a previously trained model')
|
||||
parser.add_argument('--Continue', default=False, type=bool,
|
||||
help='Pick up where an experiment left off')
|
||||
parser.add_argument('--nFeats', default=256, type=int,
|
||||
help='Number of features in the hourglass')
|
||||
parser.add_argument('--nClasses', default=33, type=int,
|
||||
help='Number of output channel')
|
||||
parser.add_argument('--nStack', default=8, type=int,
|
||||
help='Number of hourglasses to stack')
|
||||
|
||||
"----------------------------- Hyperparameter options -----------------------------"
|
||||
parser.add_argument('--LR', default=2.5e-4, type=float,
|
||||
help='Learning rate')
|
||||
parser.add_argument('--momentum', default=0, type=float,
|
||||
help='Momentum')
|
||||
parser.add_argument('--weightDecay', default=0, type=float,
|
||||
help='Weight decay')
|
||||
parser.add_argument('--crit', default='MSE', type=str,
|
||||
help='Criterion type')
|
||||
parser.add_argument('--optMethod', default='rmsprop', type=str,
|
||||
help='Optimization method: rmsprop | sgd | nag | adadelta')
|
||||
|
||||
|
||||
"----------------------------- Training options -----------------------------"
|
||||
parser.add_argument('--nEpochs', default=50, type=int,
|
||||
help='Number of hourglasses to stack')
|
||||
parser.add_argument('--epoch', default=0, type=int,
|
||||
help='Current epoch')
|
||||
parser.add_argument('--trainBatch', default=40, type=int,
|
||||
help='Train-batch size')
|
||||
parser.add_argument('--validBatch', default=20, type=int,
|
||||
help='Valid-batch size')
|
||||
parser.add_argument('--trainIters', default=0, type=int,
|
||||
help='Total train iters')
|
||||
parser.add_argument('--valIters', default=0, type=int,
|
||||
help='Total valid iters')
|
||||
parser.add_argument('--init', default=None, type=str,
|
||||
help='Initialization')
|
||||
"----------------------------- Data options -----------------------------"
|
||||
parser.add_argument('--inputResH', default=384, type=int,
|
||||
help='Input image height')
|
||||
parser.add_argument('--inputResW', default=320, type=int,
|
||||
help='Input image width')
|
||||
parser.add_argument('--outputResH', default=96, type=int,
|
||||
help='Output heatmap height')
|
||||
parser.add_argument('--outputResW', default=80, type=int,
|
||||
help='Output heatmap width')
|
||||
parser.add_argument('--scale', default=0.25, type=float,
|
||||
help='Degree of scale augmentation')
|
||||
parser.add_argument('--rotate', default=30, type=float,
|
||||
help='Degree of rotation augmentation')
|
||||
parser.add_argument('--hmGauss', default=1, type=int,
|
||||
help='Heatmap gaussian size')
|
||||
|
||||
"----------------------------- PyraNet options -----------------------------"
|
||||
parser.add_argument('--baseWidth', default=9, type=int,
|
||||
help='Heatmap gaussian size')
|
||||
parser.add_argument('--cardinality', default=5, type=int,
|
||||
help='Heatmap gaussian size')
|
||||
parser.add_argument('--nResidual', default=1, type=int,
|
||||
help='Number of residual modules at each location in the pyranet')
|
||||
|
||||
"----------------------------- Distribution options -----------------------------"
|
||||
parser.add_argument('--dist', dest='dist', type=int, default=1,
|
||||
help='distributed training or not')
|
||||
parser.add_argument('--backend', dest='backend', type=str, default='gloo',
|
||||
help='backend for distributed training')
|
||||
parser.add_argument('--port', dest='port',
|
||||
help='port of server')
|
||||
opt = parser.parse_args()"""
|
||||
|
||||
"""if opt.Continue:
|
||||
opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID))
|
||||
opt.Continue = True
|
||||
opt.nEpochs = 50
|
||||
print("--- Continue ---")"""
|
||||
|
||||
|
||||
class opt:
|
||||
nClasses = 33
|
||||
inputResH = 384
|
||||
inputResW = 320
|
||||
outputResH = 96
|
||||
outputResW = 80
|
||||
scale = 0.25
|
||||
rotate = 30
|
||||
hmGauss = 1
|
||||
@ -0,0 +1 @@
|
||||
from . import *
|
||||
@ -0,0 +1,85 @@
|
||||
import os
|
||||
import h5py
|
||||
from functools import reduce
|
||||
|
||||
import torch.utils.data as data
|
||||
from ..pose import generateSampleBox
|
||||
from opt import opt
|
||||
|
||||
|
||||
class Mscoco(data.Dataset):
|
||||
def __init__(self, train=True, sigma=1,
|
||||
scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
|
||||
self.img_folder = '../data/coco/images' # root image folders
|
||||
self.is_train = train # training set or test set
|
||||
self.inputResH = opt.inputResH
|
||||
self.inputResW = opt.inputResW
|
||||
self.outputResH = opt.outputResH
|
||||
self.outputResW = opt.outputResW
|
||||
self.sigma = sigma
|
||||
self.scale_factor = scale_factor
|
||||
self.rot_factor = rot_factor
|
||||
self.label_type = label_type
|
||||
|
||||
self.nJoints_coco = 17
|
||||
self.nJoints_mpii = 16
|
||||
self.nJoints = 33
|
||||
|
||||
self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16, 17)
|
||||
self.flipRef = ((2, 3), (4, 5), (6, 7),
|
||||
(8, 9), (10, 11), (12, 13),
|
||||
(14, 15), (16, 17))
|
||||
|
||||
# create train/val split
|
||||
with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
|
||||
# train
|
||||
self.imgname_coco_train = annot['imgname'][:-5887]
|
||||
self.bndbox_coco_train = annot['bndbox'][:-5887]
|
||||
self.part_coco_train = annot['part'][:-5887]
|
||||
# val
|
||||
self.imgname_coco_val = annot['imgname'][-5887:]
|
||||
self.bndbox_coco_val = annot['bndbox'][-5887:]
|
||||
self.part_coco_val = annot['part'][-5887:]
|
||||
|
||||
self.size_train = self.imgname_coco_train.shape[0]
|
||||
self.size_val = self.imgname_coco_val.shape[0]
|
||||
|
||||
def __getitem__(self, index):
|
||||
sf = self.scale_factor
|
||||
|
||||
if self.is_train:
|
||||
part = self.part_coco_train[index]
|
||||
bndbox = self.bndbox_coco_train[index]
|
||||
imgname = self.imgname_coco_train[index]
|
||||
else:
|
||||
part = self.part_coco_val[index]
|
||||
bndbox = self.bndbox_coco_val[index]
|
||||
imgname = self.imgname_coco_val[index]
|
||||
|
||||
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
|
||||
img_path = os.path.join(self.img_folder, imgname)
|
||||
|
||||
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
|
||||
'coco', sf, self, train=self.is_train)
|
||||
|
||||
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
|
||||
|
||||
label = []
|
||||
for i in range(opt.nStack):
|
||||
if i < 2:
|
||||
# label.append(out_bigcircle.clone())
|
||||
label.append(out.clone())
|
||||
elif i < 4:
|
||||
# label.append(out_smallcircle.clone())
|
||||
label.append(out.clone())
|
||||
else:
|
||||
label.append(out.clone())
|
||||
|
||||
return inp, label, setMask, 'coco'
|
||||
|
||||
def __len__(self):
|
||||
if self.is_train:
|
||||
return self.size_train
|
||||
else:
|
||||
return self.size_val
|
||||
122
StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py
Normal file
122
StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py
Normal file
@ -0,0 +1,122 @@
|
||||
import os
|
||||
import h5py
|
||||
from functools import reduce
|
||||
|
||||
import torch.utils.data as data
|
||||
from ..pose import generateSampleBox
|
||||
from opt import opt
|
||||
|
||||
|
||||
class Mscoco(data.Dataset):
|
||||
def __init__(self, train=True, sigma=1,
|
||||
scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
|
||||
self.img_folder = '../data/' # root image folders
|
||||
self.is_train = train # training set or test set
|
||||
self.inputResH = 320
|
||||
self.inputResW = 256
|
||||
self.outputResH = 80
|
||||
self.outputResW = 64
|
||||
self.sigma = sigma
|
||||
self.scale_factor = (0.2, 0.3)
|
||||
self.rot_factor = rot_factor
|
||||
self.label_type = label_type
|
||||
|
||||
self.nJoints_coco = 17
|
||||
self.nJoints_mpii = 16
|
||||
self.nJoints = 33
|
||||
|
||||
self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO
|
||||
9, 10, 11, 12, 13, 14, 15, 16, 17,
|
||||
18, 19, 20, 21, 22, 23, # MPII
|
||||
28, 29, 32, 33)
|
||||
|
||||
self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO
|
||||
(8, 9), (10, 11), (12, 13),
|
||||
(14, 15), (16, 17),
|
||||
(18, 23), (19, 22), (20, 21), # MPII
|
||||
(28, 33), (29, 32), (30, 31))
|
||||
|
||||
'''
|
||||
Create train/val split
|
||||
'''
|
||||
# COCO
|
||||
with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
|
||||
# train
|
||||
self.imgname_coco_train = annot['imgname'][:-5887]
|
||||
self.bndbox_coco_train = annot['bndbox'][:-5887]
|
||||
self.part_coco_train = annot['part'][:-5887]
|
||||
# val
|
||||
self.imgname_coco_val = annot['imgname'][-5887:]
|
||||
self.bndbox_coco_val = annot['bndbox'][-5887:]
|
||||
self.part_coco_val = annot['part'][-5887:]
|
||||
# MPII
|
||||
with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
|
||||
# train
|
||||
self.imgname_mpii_train = annot['imgname'][:-1358]
|
||||
self.bndbox_mpii_train = annot['bndbox'][:-1358]
|
||||
self.part_mpii_train = annot['part'][:-1358]
|
||||
# val
|
||||
self.imgname_mpii_val = annot['imgname'][-1358:]
|
||||
self.bndbox_mpii_val = annot['bndbox'][-1358:]
|
||||
self.part_mpii_val = annot['part'][-1358:]
|
||||
|
||||
self.size_coco_train = self.imgname_coco_train.shape[0]
|
||||
self.size_coco_val = self.imgname_coco_val.shape[0]
|
||||
self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0]
|
||||
self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0]
|
||||
self.train, self.valid = [], []
|
||||
|
||||
def __getitem__(self, index):
|
||||
sf = self.scale_factor
|
||||
|
||||
if self.is_train and index < self.size_coco_train: # COCO
|
||||
part = self.part_coco_train[index]
|
||||
bndbox = self.bndbox_coco_train[index]
|
||||
imgname = self.imgname_coco_train[index]
|
||||
imgset = 'coco'
|
||||
elif self.is_train: # MPII
|
||||
part = self.part_mpii_train[index - self.size_coco_train]
|
||||
bndbox = self.bndbox_mpii_train[index - self.size_coco_train]
|
||||
imgname = self.imgname_mpii_train[index - self.size_coco_train]
|
||||
imgset = 'mpii'
|
||||
elif index < self.size_coco_val:
|
||||
part = self.part_coco_val[index]
|
||||
bndbox = self.bndbox_coco_val[index]
|
||||
imgname = self.imgname_coco_val[index]
|
||||
imgset = 'coco'
|
||||
else:
|
||||
part = self.part_mpii_val[index - self.size_coco_val]
|
||||
bndbox = self.bndbox_mpii_val[index - self.size_coco_val]
|
||||
imgname = self.imgname_mpii_val[index - self.size_coco_val]
|
||||
imgset = 'mpii'
|
||||
|
||||
if imgset == 'coco':
|
||||
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
|
||||
else:
|
||||
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
|
||||
|
||||
img_path = os.path.join(self.img_folder, imgset, 'images', imgname)
|
||||
|
||||
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
|
||||
imgset, sf, self, train=self.is_train)
|
||||
|
||||
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
|
||||
|
||||
label = []
|
||||
for i in range(opt.nStack):
|
||||
if i < 2:
|
||||
# label.append(out_bigcircle.clone())
|
||||
label.append(out.clone())
|
||||
elif i < 4:
|
||||
# label.append(out_smallcircle.clone())
|
||||
label.append(out.clone())
|
||||
else:
|
||||
label.append(out.clone())
|
||||
|
||||
return inp, label, setMask, imgset
|
||||
|
||||
def __len__(self):
|
||||
if self.is_train:
|
||||
return self.size_train
|
||||
else:
|
||||
return self.size_val
|
||||
@ -0,0 +1,84 @@
|
||||
import os
|
||||
import h5py
|
||||
from functools import reduce
|
||||
|
||||
import torch.utils.data as data
|
||||
from ..pose import generateSampleBox
|
||||
from opt import opt
|
||||
|
||||
|
||||
class Mpii(data.Dataset):
|
||||
def __init__(self, train=True, sigma=1,
|
||||
scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
|
||||
self.img_folder = '../data/mpii/images' # root image folders
|
||||
self.is_train = train # training set or test set
|
||||
self.inputResH = 320
|
||||
self.inputResW = 256
|
||||
self.outputResH = 80
|
||||
self.outputResW = 64
|
||||
self.sigma = sigma
|
||||
self.scale_factor = (0.2, 0.3)
|
||||
self.rot_factor = rot_factor
|
||||
self.label_type = label_type
|
||||
|
||||
self.nJoints_mpii = 16
|
||||
self.nJoints = 16
|
||||
|
||||
self.accIdxs = (1, 2, 3, 4, 5, 6,
|
||||
11, 12, 15, 16)
|
||||
self.flipRef = ((1, 6), (2, 5), (3, 4),
|
||||
(11, 16), (12, 15), (13, 14))
|
||||
|
||||
# create train/val split
|
||||
with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
|
||||
# train
|
||||
self.imgname_mpii_train = annot['imgname'][:-1358]
|
||||
self.bndbox_mpii_train = annot['bndbox'][:-1358]
|
||||
self.part_mpii_train = annot['part'][:-1358]
|
||||
# val
|
||||
self.imgname_mpii_val = annot['imgname'][-1358:]
|
||||
self.bndbox_mpii_val = annot['bndbox'][-1358:]
|
||||
self.part_mpii_val = annot['part'][-1358:]
|
||||
|
||||
self.size_train = self.imgname_mpii_train.shape[0]
|
||||
self.size_val = self.imgname_mpii_val.shape[0]
|
||||
self.train, self.valid = [], []
|
||||
|
||||
def __getitem__(self, index):
|
||||
sf = self.scale_factor
|
||||
|
||||
if self.is_train:
|
||||
part = self.part_mpii_train[index]
|
||||
bndbox = self.bndbox_mpii_train[index]
|
||||
imgname = self.imgname_mpii_train[index]
|
||||
else:
|
||||
part = self.part_mpii_val[index]
|
||||
bndbox = self.bndbox_mpii_val[index]
|
||||
imgname = self.imgname_mpii_val[index]
|
||||
|
||||
imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
|
||||
img_path = os.path.join(self.img_folder, imgname)
|
||||
|
||||
metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
|
||||
'mpii', sf, self, train=self.is_train)
|
||||
|
||||
inp, out_bigcircle, out_smallcircle, out, setMask = metaData
|
||||
|
||||
label = []
|
||||
for i in range(opt.nStack):
|
||||
if i < 2:
|
||||
#label.append(out_bigcircle.clone())
|
||||
label.append(out.clone())
|
||||
elif i < 4:
|
||||
#label.append(out_smallcircle.clone())
|
||||
label.append(out.clone())
|
||||
else:
|
||||
label.append(out.clone())
|
||||
|
||||
return inp, label, setMask
|
||||
|
||||
def __len__(self):
|
||||
if self.is_train:
|
||||
return self.size_train
|
||||
else:
|
||||
return self.size_val
|
||||
216
StreamServer/src/analytic/action/SPPE/src/utils/eval.py
Normal file
216
StreamServer/src/analytic/action/SPPE/src/utils/eval.py
Normal file
@ -0,0 +1,216 @@
|
||||
from ..opt import opt
|
||||
try:
|
||||
from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
|
||||
except ImportError:
|
||||
from .img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
|
||||
import torch
|
||||
|
||||
|
||||
class DataLogger(object):
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
|
||||
def clear(self):
|
||||
self.value = 0
|
||||
self.sum = 0
|
||||
self.cnt = 0
|
||||
self.avg = 0
|
||||
|
||||
def update(self, value, n=1):
|
||||
self.value = value
|
||||
self.sum += value * n
|
||||
self.cnt += n
|
||||
self._cal_avg()
|
||||
|
||||
def _cal_avg(self):
|
||||
self.avg = self.sum / self.cnt
|
||||
|
||||
|
||||
def accuracy(output, label, dataset):
|
||||
if type(output) == list:
|
||||
return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset)
|
||||
else:
|
||||
return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs)
|
||||
|
||||
|
||||
def heatmapAccuracy(output, label, idxs):
|
||||
preds = getPreds(output)
|
||||
gt = getPreds(label)
|
||||
|
||||
norm = torch.ones(preds.size(0)) * opt.outputResH / 10
|
||||
dists = calc_dists(preds, gt, norm)
|
||||
#print(dists)
|
||||
acc = torch.zeros(len(idxs) + 1)
|
||||
avg_acc = 0
|
||||
cnt = 0
|
||||
for i in range(len(idxs)):
|
||||
acc[i + 1] = dist_acc(dists[idxs[i] - 1])
|
||||
if acc[i + 1] >= 0:
|
||||
avg_acc = avg_acc + acc[i + 1]
|
||||
cnt += 1
|
||||
if cnt != 0:
|
||||
acc[0] = avg_acc / cnt
|
||||
return acc
|
||||
|
||||
|
||||
def getPreds(hm):
|
||||
""" get predictions from score maps in torch Tensor
|
||||
return type: torch.LongTensor
|
||||
"""
|
||||
assert hm.dim() == 4, 'Score maps should be 4-dim'
|
||||
maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2)
|
||||
|
||||
maxval = maxval.view(hm.size(0), hm.size(1), 1)
|
||||
idx = idx.view(hm.size(0), hm.size(1), 1) + 1
|
||||
|
||||
preds = idx.repeat(1, 1, 2).float()
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3)
|
||||
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3))
|
||||
|
||||
# pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
|
||||
# preds *= pred_mask
|
||||
return preds
|
||||
|
||||
|
||||
def calc_dists(preds, target, normalize):
|
||||
preds = preds.float().clone()
|
||||
target = target.float().clone()
|
||||
dists = torch.zeros(preds.size(1), preds.size(0))
|
||||
for n in range(preds.size(0)):
|
||||
for c in range(preds.size(1)):
|
||||
if target[n, c, 0] > 0 and target[n, c, 1] > 0:
|
||||
dists[c, n] = torch.dist(
|
||||
preds[n, c, :], target[n, c, :]) / normalize[n]
|
||||
else:
|
||||
dists[c, n] = -1
|
||||
return dists
|
||||
|
||||
|
||||
def dist_acc(dists, thr=0.5):
|
||||
""" Return percentage below threshold while ignoring values with a -1 """
|
||||
if dists.ne(-1).sum() > 0:
|
||||
return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum()
|
||||
else:
|
||||
return - 1
|
||||
|
||||
|
||||
def postprocess(output):
|
||||
p = getPreds(output)
|
||||
|
||||
for i in range(p.size(0)):
|
||||
for j in range(p.size(1)):
|
||||
hm = output[i][j]
|
||||
pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1]))
|
||||
if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
|
||||
diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
|
||||
p[i][j] += diff.sign() * 0.25
|
||||
p -= 0.5
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
|
||||
"""
|
||||
Get keypoint location from heatmaps
|
||||
"""
|
||||
assert hms.dim() == 4, 'Score maps should be 4-dim'
|
||||
maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2)
|
||||
|
||||
maxval = maxval.view(hms.size(0), hms.size(1), 1)
|
||||
idx = idx.view(hms.size(0), hms.size(1), 1) + 1
|
||||
|
||||
preds = idx.repeat(1, 1, 2).float()
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
|
||||
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
|
||||
|
||||
pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
|
||||
preds *= pred_mask
|
||||
|
||||
# Very simple post-processing step to improve performance at tight PCK thresholds
|
||||
"""for i in range(preds.size(0)):
|
||||
for j in range(preds.size(1)):
|
||||
hm = hms[i][j]
|
||||
pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1])))
|
||||
if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
|
||||
diff = torch.Tensor(
|
||||
(hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
|
||||
preds[i][j] += diff.sign() * 0.25
|
||||
preds += 0.2"""
|
||||
|
||||
preds_tf = torch.zeros(preds.size())
|
||||
preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
|
||||
return preds, preds_tf, maxval
|
||||
|
||||
|
||||
def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
|
||||
|
||||
assert hms.dim() == 4, 'Score maps should be 4-dim'
|
||||
|
||||
preds_img = {}
|
||||
hms = hms.numpy()
|
||||
for n in range(hms.shape[0]): # Number of samples
|
||||
preds_img[n] = {} # Result of sample: n
|
||||
for k in range(hms.shape[1]): # Number of keypoints
|
||||
preds_img[n][k] = [] # Result of keypoint: k
|
||||
hm = hms[n][k]
|
||||
|
||||
candidate_points = findPeak(hm)
|
||||
|
||||
res_pt = processPeaks(candidate_points, hm,
|
||||
pt1[n], pt2[n], inpH, inpW, resH, resW)
|
||||
|
||||
preds_img[n][k] = res_pt
|
||||
|
||||
return preds_img
|
||||
|
||||
|
||||
def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW):
|
||||
"""
|
||||
Get keypoint location from heatmaps
|
||||
pt1, pt2: [n, 2]
|
||||
OUTPUT:
|
||||
preds: [n, 17, 2]
|
||||
"""
|
||||
|
||||
assert hms.dim() == 4, 'Score maps should be 4-dim'
|
||||
flat_hms = hms.view(hms.size(0), hms.size(1), -1)
|
||||
maxval, idx = torch.max(flat_hms, 2)
|
||||
|
||||
maxval = maxval.view(hms.size(0), hms.size(1), 1)
|
||||
idx = idx.view(hms.size(0), hms.size(1), 1) + 1
|
||||
|
||||
preds = idx.repeat(1, 1, 2).float()
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
|
||||
preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
|
||||
|
||||
pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
|
||||
preds *= pred_mask
|
||||
|
||||
# Very simple post-processing step to improve performance at tight PCK thresholds
|
||||
idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1)
|
||||
idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1)
|
||||
idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1)
|
||||
idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1)
|
||||
|
||||
maxval_up = flat_hms.gather(2, idx_up)
|
||||
maxval_down = flat_hms.gather(2, idx_down)
|
||||
maxval_left = flat_hms.gather(2, idx_left)
|
||||
maxval_right = flat_hms.gather(2, idx_right)
|
||||
|
||||
diff1 = (maxval_right - maxval_left).sign() * 0.25
|
||||
diff2 = (maxval_down - maxval_up).sign() * 0.25
|
||||
diff1[idx_up <= hms.size(3)] = 0
|
||||
diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0
|
||||
diff2[(idx_left % hms.size(3)) == 0] = 0
|
||||
diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0
|
||||
|
||||
preds[:, :, 0] += diff1.squeeze(-1)
|
||||
preds[:, :, 1] += diff2.squeeze(-1)
|
||||
|
||||
preds_tf = torch.zeros(preds.size())
|
||||
preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
|
||||
|
||||
return preds, preds_tf, maxval
|
||||
534
StreamServer/src/analytic/action/SPPE/src/utils/img.py
Normal file
534
StreamServer/src/analytic/action/SPPE/src/utils/img.py
Normal file
@ -0,0 +1,534 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
import torch
|
||||
import scipy.misc
|
||||
from torchvision import transforms
|
||||
import torch.nn.functional as F
|
||||
from scipy.ndimage import maximum_filter
|
||||
|
||||
from PIL import Image
|
||||
from copy import deepcopy
|
||||
import matplotlib
|
||||
#matplotlib.use('agg')
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def im_to_torch(img):
|
||||
img = np.array(img)
|
||||
img = np.transpose(img, (2, 0, 1)) # C*H*W
|
||||
img = to_torch(img).float()
|
||||
if img.max() > 1:
|
||||
img /= 255
|
||||
return img
|
||||
|
||||
|
||||
def torch_to_im(img):
|
||||
img = to_numpy(img)
|
||||
img = np.transpose(img, (1, 2, 0)) # C*H*W
|
||||
return img
|
||||
|
||||
|
||||
def load_image(img_path):
|
||||
# H x W x C => C x H x W
|
||||
return im_to_torch(scipy.misc.imread(img_path, mode='RGB'))
|
||||
|
||||
|
||||
def to_numpy(tensor):
|
||||
if torch.is_tensor(tensor):
|
||||
return tensor.cpu().numpy()
|
||||
elif type(tensor).__module__ != 'numpy':
|
||||
raise ValueError("Cannot convert {} to numpy array"
|
||||
.format(type(tensor)))
|
||||
return tensor
|
||||
|
||||
|
||||
def to_torch(ndarray):
|
||||
if type(ndarray).__module__ == 'numpy':
|
||||
return torch.from_numpy(ndarray)
|
||||
elif not torch.is_tensor(ndarray):
|
||||
raise ValueError("Cannot convert {} to torch tensor"
|
||||
.format(type(ndarray)))
|
||||
return ndarray
|
||||
|
||||
|
||||
def drawCircle(img, pt, sigma):
|
||||
img = to_numpy(img)
|
||||
tmpSize = 3 * sigma
|
||||
# Check that any part of the gaussian is in-bounds
|
||||
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
|
||||
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
|
||||
|
||||
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
|
||||
br[0] < 0 or br[1] < 0):
|
||||
# If not, just return the image as is
|
||||
return to_torch(img)
|
||||
|
||||
# Generate gaussian
|
||||
size = 2 * tmpSize + 1
|
||||
x = np.arange(0, size, 1, float)
|
||||
y = x[:, np.newaxis]
|
||||
x0 = y0 = size // 2
|
||||
sigma = size / 4.0
|
||||
# The gaussian is not normalized, we want the center value to equal 1
|
||||
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||
g[g > 0] = 1
|
||||
# Usable gaussian range
|
||||
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
|
||||
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
|
||||
# Image range
|
||||
img_x = max(0, ul[0]), min(br[0], img.shape[1])
|
||||
img_y = max(0, ul[1]), min(br[1], img.shape[0])
|
||||
|
||||
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||
return to_torch(img)
|
||||
|
||||
|
||||
def drawGaussian(img, pt, sigma):
|
||||
img = to_numpy(img)
|
||||
tmpSize = 3 * sigma
|
||||
# Check that any part of the gaussian is in-bounds
|
||||
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
|
||||
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
|
||||
|
||||
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
|
||||
br[0] < 0 or br[1] < 0):
|
||||
# If not, just return the image as is
|
||||
return to_torch(img)
|
||||
|
||||
# Generate gaussian
|
||||
size = 2 * tmpSize + 1
|
||||
x = np.arange(0, size, 1, float)
|
||||
y = x[:, np.newaxis]
|
||||
x0 = y0 = size // 2
|
||||
sigma = size / 4.0
|
||||
# The gaussian is not normalized, we want the center value to equal 1
|
||||
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||
|
||||
# Usable gaussian range
|
||||
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
|
||||
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
|
||||
# Image range
|
||||
img_x = max(0, ul[0]), min(br[0], img.shape[1])
|
||||
img_y = max(0, ul[1]), min(br[1], img.shape[0])
|
||||
|
||||
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||
return to_torch(img)
|
||||
|
||||
|
||||
def drawBigCircle(img, pt, sigma):
|
||||
img = to_numpy(img)
|
||||
tmpSize = 3 * sigma
|
||||
# Check that any part of the gaussian is in-bounds
|
||||
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
|
||||
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
|
||||
|
||||
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
|
||||
br[0] < 0 or br[1] < 0):
|
||||
# If not, just return the image as is
|
||||
return to_torch(img)
|
||||
|
||||
# Generate gaussian
|
||||
size = 2 * tmpSize + 1
|
||||
x = np.arange(0, size, 1, float)
|
||||
y = x[:, np.newaxis]
|
||||
x0 = y0 = size // 2
|
||||
sigma = size / 4.0
|
||||
# The gaussian is not normalized, we want the center value to equal 1
|
||||
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||
g[g > 0.4] = 1
|
||||
# Usable gaussian range
|
||||
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
|
||||
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
|
||||
# Image range
|
||||
img_x = max(0, ul[0]), min(br[0], img.shape[1])
|
||||
img_y = max(0, ul[1]), min(br[1], img.shape[0])
|
||||
|
||||
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||
return to_torch(img)
|
||||
|
||||
|
||||
def drawSmallCircle(img, pt, sigma):
|
||||
img = to_numpy(img)
|
||||
tmpSize = 3 * sigma
|
||||
# Check that any part of the gaussian is in-bounds
|
||||
ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
|
||||
br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
|
||||
|
||||
if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
|
||||
br[0] < 0 or br[1] < 0):
|
||||
# If not, just return the image as is
|
||||
return to_torch(img)
|
||||
|
||||
# Generate gaussian
|
||||
size = 2 * tmpSize + 1
|
||||
x = np.arange(0, size, 1, float)
|
||||
y = x[:, np.newaxis]
|
||||
x0 = y0 = size // 2
|
||||
sigma = size / 4.0
|
||||
# The gaussian is not normalized, we want the center value to equal 1
|
||||
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||
g[g > 0.5] = 1
|
||||
# Usable gaussian range
|
||||
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
|
||||
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
|
||||
# Image range
|
||||
img_x = max(0, ul[0]), min(br[0], img.shape[1])
|
||||
img_y = max(0, ul[1]), min(br[1], img.shape[0])
|
||||
|
||||
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||
return to_torch(img)
|
||||
|
||||
|
||||
def transformBox(pt, ul, br, inpH, inpW, resH, resW):
|
||||
center = torch.zeros(2)
|
||||
center[0] = (br[0] - 1 - ul[0]) / 2
|
||||
center[1] = (br[1] - 1 - ul[1]) / 2
|
||||
|
||||
lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
|
||||
lenW = lenH * inpW / inpH
|
||||
|
||||
_pt = torch.zeros(2)
|
||||
_pt[0] = pt[0] - ul[0]
|
||||
_pt[1] = pt[1] - ul[1]
|
||||
# Move to center
|
||||
_pt[0] = _pt[0] + max(0, (lenW - 1) / 2 - center[0])
|
||||
_pt[1] = _pt[1] + max(0, (lenH - 1) / 2 - center[1])
|
||||
pt = (_pt * resH) / lenH
|
||||
pt[0] = round(float(pt[0]))
|
||||
pt[1] = round(float(pt[1]))
|
||||
return pt.int()
|
||||
|
||||
|
||||
def transformBoxInvert(pt, ul, br, inpH, inpW, resH, resW):
|
||||
center = np.zeros(2)
|
||||
center[0] = (br[0] - 1 - ul[0]) / 2
|
||||
center[1] = (br[1] - 1 - ul[1]) / 2
|
||||
|
||||
lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
|
||||
lenW = lenH * inpW / inpH
|
||||
|
||||
_pt = (pt * lenH) / resH
|
||||
_pt[0] = _pt[0] - max(0, (lenW - 1) / 2 - center[0])
|
||||
_pt[1] = _pt[1] - max(0, (lenH - 1) / 2 - center[1])
|
||||
|
||||
new_point = np.zeros(2)
|
||||
new_point[0] = _pt[0] + ul[0]
|
||||
new_point[1] = _pt[1] + ul[1]
|
||||
return new_point
|
||||
|
||||
|
||||
def transformBoxInvert_batch(pt, ul, br, inpH, inpW, resH, resW):
|
||||
"""
|
||||
pt: [n, 17, 2]
|
||||
ul: [n, 2]
|
||||
br: [n, 2]
|
||||
"""
|
||||
num_pt = pt.shape[1]
|
||||
center = (br - 1 - ul) / 2
|
||||
|
||||
size = br - ul
|
||||
size[:, 0] *= (inpH / inpW)
|
||||
|
||||
lenH, _ = torch.max(size, dim=1) # [n,]
|
||||
lenW = lenH * (inpW / inpH)
|
||||
|
||||
_pt = (pt * lenH[:, np.newaxis, np.newaxis]) / resH
|
||||
_pt[:, :, 0] = _pt[:, :, 0] - ((lenW[:, np.newaxis].repeat(1, num_pt) - 1) /
|
||||
2 - center[:, 0].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
|
||||
_pt[:, :, 1] = _pt[:, :, 1] - ((lenH[:, np.newaxis].repeat(1, num_pt) - 1) /
|
||||
2 - center[:, 1].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
|
||||
|
||||
new_point = torch.zeros(pt.size())
|
||||
new_point[:, :, 0] = _pt[:, :, 0] + ul[:, 0].unsqueeze(-1).repeat(1, num_pt)
|
||||
new_point[:, :, 1] = _pt[:, :, 1] + ul[:, 1].unsqueeze(-1).repeat(1, num_pt)
|
||||
return new_point
|
||||
|
||||
|
||||
def cropBox(img, ul, br, resH, resW):
|
||||
ul = ul.int()
|
||||
br = (br - 1).int()
|
||||
# br = br.int()
|
||||
lenH = max((br[1] - ul[1]).item(), (br[0] - ul[0]).item() * resH / resW)
|
||||
lenW = lenH * resW / resH
|
||||
if img.dim() == 2:
|
||||
img = img[np.newaxis, :]
|
||||
|
||||
box_shape = [(br[1] - ul[1]).item(), (br[0] - ul[0]).item()]
|
||||
pad_size = [(lenH - box_shape[0]) // 2, (lenW - box_shape[1]) // 2]
|
||||
# Padding Zeros
|
||||
if ul[1] > 0:
|
||||
img[:, :ul[1], :] = 0
|
||||
if ul[0] > 0:
|
||||
img[:, :, :ul[0]] = 0
|
||||
if br[1] < img.shape[1] - 1:
|
||||
img[:, br[1] + 1:, :] = 0
|
||||
if br[0] < img.shape[2] - 1:
|
||||
img[:, :, br[0] + 1:] = 0
|
||||
|
||||
src = np.zeros((3, 2), dtype=np.float32)
|
||||
dst = np.zeros((3, 2), dtype=np.float32)
|
||||
|
||||
src[0, :] = np.array(
|
||||
[ul[0] - pad_size[1], ul[1] - pad_size[0]], np.float32)
|
||||
src[1, :] = np.array(
|
||||
[br[0] + pad_size[1], br[1] + pad_size[0]], np.float32)
|
||||
dst[0, :] = 0
|
||||
dst[1, :] = np.array([resW - 1, resH - 1], np.float32)
|
||||
|
||||
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
||||
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
||||
|
||||
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
||||
|
||||
dst_img = cv2.warpAffine(torch_to_im(img), trans,
|
||||
(resW, resH), flags=cv2.INTER_LINEAR)
|
||||
|
||||
return im_to_torch(torch.Tensor(dst_img))
|
||||
|
||||
|
||||
def cv_rotate(img, rot, resW, resH):
|
||||
center = np.array((resW - 1, resH - 1)) / 2
|
||||
rot_rad = np.pi * rot / 180
|
||||
|
||||
src_dir = get_dir([0, (resH - 1) * -0.5], rot_rad)
|
||||
dst_dir = np.array([0, (resH - 1) * -0.5], np.float32)
|
||||
|
||||
src = np.zeros((3, 2), dtype=np.float32)
|
||||
dst = np.zeros((3, 2), dtype=np.float32)
|
||||
|
||||
src[0, :] = center
|
||||
src[1, :] = center + src_dir
|
||||
dst[0, :] = [(resW - 1) * 0.5, (resH - 1) * 0.5]
|
||||
dst[1, :] = np.array([(resW - 1) * 0.5, (resH - 1) * 0.5]) + dst_dir
|
||||
|
||||
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
||||
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
||||
|
||||
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
||||
|
||||
dst_img = cv2.warpAffine(torch_to_im(img), trans,
|
||||
(resW, resH), flags=cv2.INTER_LINEAR)
|
||||
|
||||
return im_to_torch(torch.Tensor(dst_img))
|
||||
|
||||
|
||||
def flip(x):
|
||||
assert (x.dim() == 3 or x.dim() == 4)
|
||||
dim = x.dim() - 1
|
||||
if '0.4.1' in torch.__version__ or '1.0' in torch.__version__:
|
||||
return x.flip(dims=(dim,))
|
||||
else:
|
||||
is_cuda = False
|
||||
if x.is_cuda:
|
||||
is_cuda = True
|
||||
x = x.cpu()
|
||||
x = x.numpy().copy()
|
||||
if x.ndim == 3:
|
||||
x = np.transpose(np.fliplr(np.transpose(x, (0, 2, 1))), (0, 2, 1))
|
||||
elif x.ndim == 4:
|
||||
for i in range(x.shape[0]):
|
||||
x[i] = np.transpose(
|
||||
np.fliplr(np.transpose(x[i], (0, 2, 1))), (0, 2, 1))
|
||||
# x = x.swapaxes(dim, 0)
|
||||
# x = x[::-1, ...]
|
||||
# x = x.swapaxes(0, dim)
|
||||
|
||||
x = torch.from_numpy(x.copy())
|
||||
if is_cuda:
|
||||
x = x.cuda()
|
||||
return x
|
||||
|
||||
|
||||
def shuffleLR(x, dataset):
|
||||
flipRef = dataset.flipRef
|
||||
assert (x.dim() == 3 or x.dim() == 4)
|
||||
for pair in flipRef:
|
||||
dim0, dim1 = pair
|
||||
dim0 -= 1
|
||||
dim1 -= 1
|
||||
if x.dim() == 4:
|
||||
tmp = x[:, dim1].clone()
|
||||
x[:, dim1] = x[:, dim0].clone()
|
||||
x[:, dim0] = tmp.clone()
|
||||
#x[:, dim0], x[:, dim1] = deepcopy((x[:, dim1], x[:, dim0]))
|
||||
else:
|
||||
tmp = x[dim1].clone()
|
||||
x[dim1] = x[dim0].clone()
|
||||
x[dim0] = tmp.clone()
|
||||
#x[dim0], x[dim1] = deepcopy((x[dim1], x[dim0]))
|
||||
return x
|
||||
|
||||
|
||||
def drawMPII(inps, preds):
|
||||
assert inps.dim() == 4
|
||||
p_color = ['g', 'b', 'purple', 'b', 'purple',
|
||||
'y', 'o', 'y', 'o', 'y', 'o',
|
||||
'pink', 'r', 'pink', 'r', 'pink', 'r']
|
||||
p_color = ['r', 'r', 'r', 'b', 'b', 'b',
|
||||
'black', 'black', 'black', 'black',
|
||||
'y', 'y', 'white', 'white', 'g', 'g']
|
||||
|
||||
nImg = inps.size(0)
|
||||
imgs = []
|
||||
for n in range(nImg):
|
||||
img = to_numpy(inps[n])
|
||||
img = np.transpose(img, (1, 2, 0))
|
||||
imgs.append(img)
|
||||
|
||||
fig = plt.figure()
|
||||
plt.imshow(imgs[0])
|
||||
ax = fig.add_subplot(1, 1, 1)
|
||||
#print(preds.shape)
|
||||
for p in range(16):
|
||||
x, y = preds[0][p]
|
||||
cor = (round(x), round(y)), 10
|
||||
ax.add_patch(plt.Circle(*cor, color=p_color[p]))
|
||||
plt.axis('off')
|
||||
|
||||
plt.show()
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def drawCOCO(inps, preds, scores):
|
||||
assert inps.dim() == 4
|
||||
p_color = ['g', 'b', 'purple', 'b', 'purple',
|
||||
'y', 'orange', 'y', 'orange', 'y', 'orange',
|
||||
'pink', 'r', 'pink', 'r', 'pink', 'r']
|
||||
|
||||
nImg = inps.size(0)
|
||||
imgs = []
|
||||
for n in range(nImg):
|
||||
img = to_numpy(inps[n])
|
||||
img = np.transpose(img, (1, 2, 0))
|
||||
imgs.append(img)
|
||||
|
||||
fig = plt.figure()
|
||||
plt.imshow(imgs[0])
|
||||
ax = fig.add_subplot(1, 1, 1)
|
||||
#print(preds.shape)
|
||||
for p in range(17):
|
||||
if scores[0][p][0] < 0.2:
|
||||
continue
|
||||
x, y = preds[0][p]
|
||||
cor = (round(x), round(y)), 3
|
||||
ax.add_patch(plt.Circle(*cor, color=p_color[p]))
|
||||
plt.axis('off')
|
||||
|
||||
plt.show()
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_3rd_point(a, b):
|
||||
direct = a - b
|
||||
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
|
||||
|
||||
|
||||
def get_dir(src_point, rot_rad):
|
||||
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
||||
|
||||
src_result = [0, 0]
|
||||
src_result[0] = src_point[0] * cs - src_point[1] * sn
|
||||
src_result[1] = src_point[0] * sn + src_point[1] * cs
|
||||
|
||||
return src_result
|
||||
|
||||
|
||||
def findPeak(hm):
|
||||
mx = maximum_filter(hm, size=5)
|
||||
idx = zip(*np.where((mx == hm) * (hm > 0.1)))
|
||||
candidate_points = []
|
||||
for (y, x) in idx:
|
||||
candidate_points.append([x, y, hm[y][x]])
|
||||
if len(candidate_points) == 0:
|
||||
return torch.zeros(0)
|
||||
candidate_points = np.array(candidate_points)
|
||||
candidate_points = candidate_points[np.lexsort(-candidate_points.T)]
|
||||
return torch.Tensor(candidate_points)
|
||||
|
||||
|
||||
def processPeaks(candidate_points, hm, pt1, pt2, inpH, inpW, resH, resW):
|
||||
# type: (Tensor, Tensor, Tensor, Tensor, float, float, float, float) -> List[Tensor]
|
||||
|
||||
if candidate_points.shape[0] == 0: # Low Response
|
||||
maxval = np.max(hm.reshape(1, -1), 1)
|
||||
idx = np.argmax(hm.reshape(1, -1), 1)
|
||||
|
||||
x = idx % resW
|
||||
y = int(idx / resW)
|
||||
|
||||
candidate_points = np.zeros((1, 3))
|
||||
candidate_points[0, 0:1] = x
|
||||
candidate_points[0, 1:2] = y
|
||||
candidate_points[0, 2:3] = maxval
|
||||
|
||||
res_pts = []
|
||||
for i in range(candidate_points.shape[0]):
|
||||
x, y, maxval = candidate_points[i][0], candidate_points[i][1], candidate_points[i][2]
|
||||
|
||||
if bool(maxval < 0.05) and len(res_pts) > 0:
|
||||
pass
|
||||
else:
|
||||
if bool(x > 0) and bool(x < resW - 2):
|
||||
if bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] > 0):
|
||||
x += 0.25
|
||||
elif bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] < 0):
|
||||
x -= 0.25
|
||||
if bool(y > 0) and bool(y < resH - 2):
|
||||
if bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] > 0):
|
||||
y += (0.25 * inpH / inpW)
|
||||
elif bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] < 0):
|
||||
y -= (0.25 * inpH / inpW)
|
||||
|
||||
#pt = torch.zeros(2)
|
||||
pt = np.zeros(2)
|
||||
pt[0] = x + 0.2
|
||||
pt[1] = y + 0.2
|
||||
|
||||
pt = transformBoxInvert(pt, pt1, pt2, inpH, inpW, resH, resW)
|
||||
|
||||
res_pt = np.zeros(3)
|
||||
res_pt[:2] = pt
|
||||
res_pt[2] = maxval
|
||||
|
||||
res_pts.append(res_pt)
|
||||
|
||||
if maxval < 0.05:
|
||||
break
|
||||
return res_pts
|
||||
|
||||
|
||||
def crop_dets(img, boxes, height, width):
|
||||
img = im_to_torch(img)
|
||||
img_h = img.size(1)
|
||||
img_w = img.size(2)
|
||||
img[0].add_(-0.406)
|
||||
img[1].add_(-0.457)
|
||||
img[2].add_(-0.480)
|
||||
|
||||
inps = torch.zeros(len(boxes), 3, height, width)
|
||||
pt1 = torch.zeros(len(boxes), 2)
|
||||
pt2 = torch.zeros(len(boxes), 2)
|
||||
for i, box in enumerate(boxes):
|
||||
upLeft = torch.Tensor((float(box[0]), float(box[1])))
|
||||
bottomRight = torch.Tensor((float(box[2]), float(box[3])))
|
||||
|
||||
h = bottomRight[1] - upLeft[1]
|
||||
w = bottomRight[0] - upLeft[0]
|
||||
if w > 100:
|
||||
scaleRate = 0.2
|
||||
else:
|
||||
scaleRate = 0.3
|
||||
|
||||
upLeft[0] = max(0, upLeft[0] - w * scaleRate / 2)
|
||||
upLeft[1] = max(0, upLeft[1] - h * scaleRate / 2)
|
||||
bottomRight[0] = max(min(img_w - 1, bottomRight[0] + w * scaleRate / 2), upLeft[0] + 5)
|
||||
bottomRight[1] = max(min(img_h - 1, bottomRight[1] + h * scaleRate / 2), upLeft[1] + 5)
|
||||
|
||||
inps[i] = cropBox(img.clone(), upLeft, bottomRight, height, width)
|
||||
pt1[i] = upLeft
|
||||
pt2[i] = bottomRight
|
||||
|
||||
return inps, pt1, pt2
|
||||
|
||||
169
StreamServer/src/analytic/action/SPPE/src/utils/pose.py
Normal file
169
StreamServer/src/analytic/action/SPPE/src/utils/pose.py
Normal file
@ -0,0 +1,169 @@
|
||||
from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate,
|
||||
cropBox, transformBox, flip, shuffleLR, drawCOCO)
|
||||
from utils import getPrediction
|
||||
import torch
|
||||
import numpy as np
|
||||
import random
|
||||
from SPPE.src.opt import opt
|
||||
|
||||
|
||||
def rnd(x):
|
||||
return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x))
|
||||
|
||||
|
||||
def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True):
|
||||
|
||||
nJoints_coco = 17
|
||||
nJoints_mpii = 16
|
||||
img = load_image(img_path)
|
||||
if train:
|
||||
img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
|
||||
img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
|
||||
img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
|
||||
|
||||
ori_img = img.clone()
|
||||
img[0].add_(-0.406)
|
||||
img[1].add_(-0.457)
|
||||
img[2].add_(-0.480)
|
||||
|
||||
upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1])))
|
||||
bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3])))
|
||||
ht = bottomRight[1] - upLeft[1]
|
||||
width = bottomRight[0] - upLeft[0]
|
||||
imght = img.shape[1]
|
||||
imgwidth = img.shape[2]
|
||||
scaleRate = random.uniform(*scale_factor)
|
||||
|
||||
upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
|
||||
upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
|
||||
bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2)
|
||||
bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2)
|
||||
|
||||
# Doing Random Sample
|
||||
if opt.addDPG:
|
||||
PatchScale = random.uniform(0, 1)
|
||||
if PatchScale > 0.85:
|
||||
ratio = ht / width
|
||||
if width < ht:
|
||||
patchWidth = PatchScale * width
|
||||
patchHt = patchWidth * ratio
|
||||
else:
|
||||
patchHt = PatchScale * ht
|
||||
patchWidth = patchHt / ratio
|
||||
|
||||
xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth)
|
||||
ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt)
|
||||
|
||||
xmax = xmin + patchWidth + 1
|
||||
ymax = ymin + patchHt + 1
|
||||
else:
|
||||
xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3))
|
||||
ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3))
|
||||
xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3)
|
||||
ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3)
|
||||
|
||||
upLeft[0] = xmin
|
||||
upLeft[1] = ymin
|
||||
bottomRight[0] = xmax
|
||||
bottomRight[1] = ymax
|
||||
|
||||
# Counting Joints number
|
||||
jointNum = 0
|
||||
if imgset == 'coco':
|
||||
for i in range(17):
|
||||
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
|
||||
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
|
||||
jointNum += 1
|
||||
else:
|
||||
for i in range(16):
|
||||
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
|
||||
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
|
||||
jointNum += 1
|
||||
|
||||
# Doing Random Crop
|
||||
if opt.addDPG:
|
||||
if jointNum > 13 and train:
|
||||
switch = random.uniform(0, 1)
|
||||
if switch > 0.96:
|
||||
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
elif switch > 0.92:
|
||||
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
elif switch > 0.88:
|
||||
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
elif switch > 0.84:
|
||||
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
elif switch > 0.80:
|
||||
bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
elif switch > 0.76:
|
||||
upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
|
||||
elif switch > 0.72:
|
||||
bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
elif switch > 0.68:
|
||||
upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
|
||||
|
||||
ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
|
||||
inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
|
||||
if jointNum == 0:
|
||||
inp = torch.zeros(3, opt.inputResH, opt.inputResW)
|
||||
|
||||
out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
|
||||
out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
|
||||
out = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
|
||||
setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
|
||||
|
||||
# Draw Label
|
||||
if imgset == 'coco':
|
||||
for i in range(nJoints_coco):
|
||||
if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
|
||||
and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
|
||||
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
|
||||
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
setMask[i].add_(1)
|
||||
elif imgset == 'mpii':
|
||||
for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
|
||||
if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
|
||||
and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
|
||||
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
|
||||
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
setMask[i].add_(1)
|
||||
else:
|
||||
for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
|
||||
if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
|
||||
and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
|
||||
out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
|
||||
out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
|
||||
if i != 6 + nJoints_coco and i != 7 + nJoints_coco:
|
||||
setMask[i].add_(1)
|
||||
|
||||
if opt.debug:
|
||||
preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH,
|
||||
opt.inputResW, opt.outputResH, opt.outputResW)
|
||||
tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH)
|
||||
drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores)
|
||||
|
||||
if train:
|
||||
# Flip
|
||||
if random.uniform(0, 1) < 0.5:
|
||||
inp = flip(inp)
|
||||
ori_inp = flip(ori_inp)
|
||||
out_bigcircle = shuffleLR(flip(out_bigcircle), dataset)
|
||||
out_smallcircle = shuffleLR(flip(out_smallcircle), dataset)
|
||||
out = shuffleLR(flip(out), dataset)
|
||||
# Rotate
|
||||
r = rnd(opt.rotate)
|
||||
if random.uniform(0, 1) < 0.6:
|
||||
r = 0
|
||||
if r != 0:
|
||||
inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH)
|
||||
out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH)
|
||||
out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH)
|
||||
out = cv_rotate(out, r, opt.outputResW, opt.outputResH)
|
||||
|
||||
return inp, out_bigcircle, out_smallcircle, out, setMask
|
||||
192
StreamServer/src/analytic/action/Track/Tracker.py
Normal file
192
StreamServer/src/analytic/action/Track/Tracker.py
Normal file
@ -0,0 +1,192 @@
|
||||
import time
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
|
||||
from .linear_assignment import min_cost_matching, matching_cascade
|
||||
from .kalman_filter import KalmanFilter
|
||||
from .iou_matching import iou_cost
|
||||
|
||||
|
||||
class TrackState:
|
||||
"""Enumeration type for the single target track state. Newly created tracks are
|
||||
classified as `tentative` until enough evidence has been collected. Then,
|
||||
the track state is changed to `confirmed`. Tracks that are no longer alive
|
||||
are classified as `deleted` to mark them for removal from the set of active
|
||||
tracks.
|
||||
"""
|
||||
Tentative = 1
|
||||
Confirmed = 2
|
||||
Deleted = 3
|
||||
|
||||
|
||||
class Detection(object):
|
||||
"""This class represents a bounding box, keypoints, score of person detected
|
||||
in a single image.
|
||||
|
||||
Args:
|
||||
tlbr: (float array) Of shape [top, left, bottom, right].,
|
||||
keypoints: (float array) Of shape [node, pts].,
|
||||
confidence: (float) Confidence score of detection.
|
||||
"""
|
||||
def __init__(self, tlbr, keypoints, confidence):
|
||||
self.tlbr = tlbr
|
||||
self.keypoints = keypoints
|
||||
self.confidence = confidence
|
||||
|
||||
def to_tlwh(self):
|
||||
"""Get (top, left, width, height).
|
||||
"""
|
||||
ret = self.tlbr.copy()
|
||||
ret[2:] = ret[2:] - ret[:2]
|
||||
return ret
|
||||
|
||||
def to_xyah(self):
|
||||
"""Get (x_center, y_center, aspect ratio, height).
|
||||
"""
|
||||
ret = self.to_tlwh()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
|
||||
class Track:
|
||||
def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30):
|
||||
self.mean = mean
|
||||
self.covariance = covariance
|
||||
self.track_id = track_id
|
||||
self.hist = 1
|
||||
self.age = 1
|
||||
self.time_since_update = 0
|
||||
self.n_init = n_init
|
||||
self.max_age = max_age
|
||||
|
||||
# keypoints list for use in Actions prediction.
|
||||
self.keypoints_list = deque(maxlen=buffer)
|
||||
|
||||
self.state = TrackState.Tentative
|
||||
|
||||
def to_tlwh(self):
|
||||
ret = self.mean[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def to_tlbr(self):
|
||||
ret = self.to_tlwh()
|
||||
ret[2:] = ret[:2] + ret[2:]
|
||||
return ret
|
||||
|
||||
def get_center(self):
|
||||
return self.mean[:2].copy()
|
||||
|
||||
def predict(self, kf):
|
||||
"""Propagate the state distribution to the current time step using a
|
||||
Kalman filter prediction step.
|
||||
"""
|
||||
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
|
||||
self.age += 1
|
||||
self.time_since_update += 1
|
||||
|
||||
def update(self, kf, detection):
|
||||
"""Perform Kalman filter measurement update step.
|
||||
"""
|
||||
self.mean, self.covariance = kf.update(self.mean, self.covariance,
|
||||
detection.to_xyah())
|
||||
self.keypoints_list.append(detection.keypoints)
|
||||
|
||||
self.hist += 1
|
||||
self.time_since_update = 0
|
||||
if self.state == TrackState.Tentative and self.hist >= self.n_init:
|
||||
self.state = TrackState.Confirmed
|
||||
|
||||
def mark_missed(self):
|
||||
"""Mark this track as missed (no association at the current time step).
|
||||
"""
|
||||
if self.state == TrackState.Tentative:
|
||||
self.state = TrackState.Deleted
|
||||
elif self.time_since_update > self.max_age:
|
||||
self.state = TrackState.Deleted
|
||||
|
||||
def is_tentative(self):
|
||||
return self.state == TrackState.Tentative
|
||||
|
||||
def is_confirmed(self):
|
||||
return self.state == TrackState.Confirmed
|
||||
|
||||
def is_deleted(self):
|
||||
return self.state == TrackState.Deleted
|
||||
|
||||
|
||||
class Tracker:
|
||||
def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5):
|
||||
self.max_iou_dist = max_iou_distance
|
||||
self.max_age = max_age
|
||||
self.n_init = n_init
|
||||
|
||||
self.kf = KalmanFilter()
|
||||
self.tracks = []
|
||||
self._next_id = 1
|
||||
|
||||
def predict(self):
|
||||
"""Propagate track state distributions one time step forward.
|
||||
This function should be called once every time step, before `update`.
|
||||
"""
|
||||
for track in self.tracks:
|
||||
track.predict(self.kf)
|
||||
|
||||
def update(self, detections):
|
||||
"""Perform measurement update and track management.
|
||||
Parameters
|
||||
----------
|
||||
detections : List[deep_sort.detection.Detection]
|
||||
A list of detections at the current time step.
|
||||
"""
|
||||
# Run matching cascade.
|
||||
matches, unmatched_tracks, unmatched_detections = self._match(detections)
|
||||
|
||||
# Update matched tracks set.
|
||||
for track_idx, detection_idx in matches:
|
||||
self.tracks[track_idx].update(self.kf, detections[detection_idx])
|
||||
# Update tracks that missing.
|
||||
for track_idx in unmatched_tracks:
|
||||
self.tracks[track_idx].mark_missed()
|
||||
# Create new detections track.
|
||||
for detection_idx in unmatched_detections:
|
||||
self._initiate_track(detections[detection_idx])
|
||||
|
||||
# Remove deleted tracks.
|
||||
self.tracks = [t for t in self.tracks if not t.is_deleted()]
|
||||
|
||||
def _match(self, detections):
|
||||
confirmed_tracks, unconfirmed_tracks = [], []
|
||||
for i, t in enumerate(self.tracks):
|
||||
if t.is_confirmed():
|
||||
confirmed_tracks.append(i)
|
||||
else:
|
||||
unconfirmed_tracks.append(i)
|
||||
|
||||
matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade(
|
||||
iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks
|
||||
)
|
||||
|
||||
track_candidates = unconfirmed_tracks + [
|
||||
k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1]
|
||||
unmatched_tracks_a = [
|
||||
k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1]
|
||||
|
||||
matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching(
|
||||
iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections
|
||||
)
|
||||
|
||||
matches = matches_a + matches_b
|
||||
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
def _initiate_track(self, detection):
|
||||
if detection.confidence < 0.4:
|
||||
return
|
||||
mean, covariance = self.kf.initiate(detection.to_xyah())
|
||||
self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age))
|
||||
self._next_id += 1
|
||||
|
||||
|
||||
78
StreamServer/src/analytic/action/Track/iou_matching.py
Normal file
78
StreamServer/src/analytic/action/Track/iou_matching.py
Normal file
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
|
||||
INFTY_COST = 1e+5
|
||||
|
||||
|
||||
def iou(bbox, candidates):
|
||||
"""Compute intersection over union.
|
||||
Parameters
|
||||
----------
|
||||
bbox : ndarray
|
||||
A bounding box in format `(xmin, ymin, xmax, ymax)`.
|
||||
candidates : ndarray
|
||||
A matrix of candidate bounding boxes (one per row) in the same format
|
||||
as `bbox`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The intersection over union in [0, 1] between the `bbox` and each
|
||||
candidate. A higher score means a larger fraction of the `bbox` is
|
||||
occluded by the candidate.
|
||||
"""
|
||||
#bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
|
||||
bbox_tl, bbox_br = bbox[:2], bbox[2:]
|
||||
candidates_tl = candidates[:, :2]
|
||||
candidates_br = candidates[:, 2:] # + candidates[:, :2]
|
||||
|
||||
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
|
||||
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
|
||||
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
|
||||
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
|
||||
wh = np.maximum(0., br - tl)
|
||||
|
||||
area_intersection = wh.prod(axis=1)
|
||||
area_bbox = (bbox[2:] - bbox[:2]).prod()
|
||||
area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1)
|
||||
return area_intersection / (area_bbox + area_candidates - area_intersection)
|
||||
|
||||
|
||||
def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
|
||||
"""An intersection over union distance metric.
|
||||
Parameters
|
||||
----------
|
||||
tracks : List[Track]
|
||||
A list of tracks.
|
||||
detections : List[Detection]
|
||||
A list of detections.
|
||||
track_indices : Optional[List[int]]
|
||||
A list of indices to tracks that should be matched. Defaults to
|
||||
all `tracks`.
|
||||
detection_indices : Optional[List[int]]
|
||||
A list of indices to detections that should be matched. Defaults
|
||||
to all `detections`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns a cost matrix of shape
|
||||
len(track_indices), len(detection_indices) where entry (i, j) is
|
||||
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
|
||||
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = np.arange(len(tracks))
|
||||
if detection_indices is None:
|
||||
detection_indices = np.arange(len(detections))
|
||||
|
||||
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
#if tracks[track_idx].time_since_update > 1:
|
||||
# cost_matrix[row, :] = INFTY_COST
|
||||
# continue
|
||||
|
||||
bbox = tracks[track_idx].to_tlbr()
|
||||
candidates = np.asarray([detections[i].tlbr for i in detection_indices])
|
||||
cost_matrix[row, :] = 1. - iou(bbox, candidates)
|
||||
|
||||
return cost_matrix
|
||||
198
StreamServer/src/analytic/action/Track/kalman_filter.py
Normal file
198
StreamServer/src/analytic/action/Track/kalman_filter.py
Normal file
@ -0,0 +1,198 @@
|
||||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
|
||||
|
||||
class KalmanFilter(object):
|
||||
"""A simple Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
The 8-dimensional state space
|
||||
x, y, a, h, vx, vy, va, vh
|
||||
|
||||
contains the bounding box center position (x, y), aspect ratio a, height h,
|
||||
and their respective velocities.
|
||||
|
||||
Object motion follows a constant velocity model. The bounding box location
|
||||
(x, y, a, h) is taken as direct observation of the state space (linear
|
||||
observation model).
|
||||
"""
|
||||
def __init__(self):
|
||||
ndim, dt = 4, 1.
|
||||
|
||||
# Create Kalman filter model matrices.
|
||||
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
|
||||
for i in range(ndim):
|
||||
self._motion_mat[i, ndim + i] = dt
|
||||
self._update_mat = np.eye(ndim, 2 * ndim)
|
||||
|
||||
# Motion and observation uncertainty are chosen relative to the current
|
||||
# state estimate. These weights control the amount of uncertainty in
|
||||
# the model. This is a bit hacky.
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initiate(self, measurement):
|
||||
"""Create track from unassociated measurement.
|
||||
Parameters
|
||||
----------
|
||||
measurement : ndarray
|
||||
Bounding box coordinates (x, y, a, h) with center position (x, y),
|
||||
aspect ratio a, and height h.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector (8 dimensional) and covariance matrix (8x8
|
||||
dimensional) of the new track. Unobserved velocities are initialized
|
||||
to 0 mean.
|
||||
"""
|
||||
mean_pos = measurement
|
||||
mean_vel = np.zeros_like(mean_pos)
|
||||
mean = np.r_[mean_pos, mean_vel]
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * measurement[3]]
|
||||
covariance = np.diag(np.square(std))
|
||||
return mean, covariance
|
||||
|
||||
def predict(self, mean, covariance):
|
||||
"""Run Kalman filter prediction step.
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The 8 dimensional mean vector of the object state at the previous
|
||||
time step.
|
||||
covariance : ndarray
|
||||
The 8x8 dimensional covariance matrix of the object state at the
|
||||
previous time step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector and covariance matrix of the predicted
|
||||
state. Unobserved velocities are initialized to 0 mean.
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-2,
|
||||
self._std_weight_position * mean[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[3],
|
||||
self._std_weight_velocity * mean[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * mean[3]]
|
||||
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
mean = np.dot(self._motion_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def project(self, mean, covariance):
|
||||
"""Project state distribution to measurement space.
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The state's mean vector (8 dimensional array).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the projected mean and covariance matrix of the given state
|
||||
estimate.
|
||||
"""
|
||||
std = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-1,
|
||||
self._std_weight_position * mean[3]]
|
||||
innovation_cov = np.diag(np.square(std))
|
||||
|
||||
mean = np.dot(self._update_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._update_mat, covariance, self._update_mat.T))
|
||||
return mean, covariance + innovation_cov
|
||||
|
||||
def update(self, mean, covariance, measurement):
|
||||
"""Run Kalman filter correction step.
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The predicted state's mean vector (8 dimensional).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
measurement : ndarray
|
||||
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
|
||||
is the center position, a the aspect ratio, and h the height of the
|
||||
bounding box.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the measurement-corrected state distribution.
|
||||
"""
|
||||
projected_mean, projected_cov = self.project(mean, covariance)
|
||||
|
||||
chol_factor, lower = scipy.linalg.cho_factor(
|
||||
projected_cov, lower=True, check_finite=False)
|
||||
kalman_gain = scipy.linalg.cho_solve(
|
||||
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
|
||||
check_finite=False).T
|
||||
innovation = measurement - projected_mean
|
||||
|
||||
new_mean = mean + np.dot(innovation, kalman_gain.T)
|
||||
new_covariance = covariance - np.linalg.multi_dot((
|
||||
kalman_gain, projected_cov, kalman_gain.T))
|
||||
return new_mean, new_covariance
|
||||
|
||||
def gating_distance(self, mean, covariance, measurements,
|
||||
only_position=False):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
`only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
Mean vector over the state distribution (8 dimensional).
|
||||
covariance : ndarray
|
||||
Covariance of the state distribution (8x8 dimensional).
|
||||
measurements : ndarray
|
||||
An Nx4 dimensional matrix of N measurements, each in
|
||||
format (x, y, a, h) where (x, y) is the bounding box center
|
||||
position, a the aspect ratio, and h the height.
|
||||
only_position : Optional[bool]
|
||||
If True, distance computation is done with respect to the bounding
|
||||
box center position only.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns an array of length N, where the i-th element contains the
|
||||
squared Mahalanobis distance between (mean, covariance) and
|
||||
`measurements[i]`.
|
||||
"""
|
||||
mean, covariance = self.project(mean, covariance)
|
||||
if only_position:
|
||||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
||||
191
StreamServer/src/analytic/action/Track/linear_assignment.py
Normal file
191
StreamServer/src/analytic/action/Track/linear_assignment.py
Normal file
@ -0,0 +1,191 @@
|
||||
import numpy as np
|
||||
#from sklearn.utils.linear_assignment_ import linear_assignment
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
|
||||
"""
|
||||
Table for the 0.95 quantile of the chi-square distribution with N degrees of
|
||||
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
|
||||
function and used as Mahalanobis gating threshold.
|
||||
"""
|
||||
chi2inv95 = {
|
||||
1: 3.8415,
|
||||
2: 5.9915,
|
||||
3: 7.8147,
|
||||
4: 9.4877,
|
||||
5: 11.070,
|
||||
6: 12.592,
|
||||
7: 14.067,
|
||||
8: 15.507,
|
||||
9: 16.919}
|
||||
INFTY_COST = 1e+5
|
||||
|
||||
|
||||
def min_cost_matching(distance_metric, max_distance, tracks, detections,
|
||||
track_indices=None, detection_indices=None):
|
||||
"""Solve linear assignment problem.
|
||||
Parameters
|
||||
----------
|
||||
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
||||
The distance metric is given a list of tracks and detections as well as
|
||||
a list of N track indices and M detection indices. The metric should
|
||||
return the NxM dimensional cost matrix, where element (i, j) is the
|
||||
association cost between the i-th track in the given track indices and
|
||||
the j-th detection in the given detection_indices.
|
||||
max_distance : float
|
||||
Gating threshold. Associations with cost larger than this value are
|
||||
disregarded.
|
||||
tracks : List[Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : List[int]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above).
|
||||
detection_indices : List[int]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above).
|
||||
|
||||
Returns
|
||||
-------
|
||||
(List[(int, int)], List[int], List[int])
|
||||
Returns a tuple with the following three entries:
|
||||
* A list of matched track and detection indices.
|
||||
* A list of unmatched track indices.
|
||||
* A list of unmatched detection indices.
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = np.arange(len(tracks))
|
||||
if detection_indices is None:
|
||||
detection_indices = np.arange(len(detections))
|
||||
|
||||
if len(detection_indices) == 0 or len(track_indices) == 0:
|
||||
return [], track_indices, detection_indices # Nothing to match.
|
||||
|
||||
cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices)
|
||||
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
|
||||
indices = linear_sum_assignment(cost_matrix)
|
||||
indices = np.array(indices).transpose()
|
||||
|
||||
matches, unmatched_tracks, unmatched_detections = [], [], []
|
||||
for col, detection_idx in enumerate(detection_indices):
|
||||
if col not in indices[:, 1]:
|
||||
unmatched_detections.append(detection_idx)
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
if row not in indices[:, 0]:
|
||||
unmatched_tracks.append(track_idx)
|
||||
for row, col in indices:
|
||||
track_idx = track_indices[row]
|
||||
detection_idx = detection_indices[col]
|
||||
if cost_matrix[row, col] > max_distance:
|
||||
unmatched_tracks.append(track_idx)
|
||||
unmatched_detections.append(detection_idx)
|
||||
else:
|
||||
matches.append((track_idx, detection_idx))
|
||||
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
|
||||
def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections,
|
||||
track_indices=None, detection_indices=None):
|
||||
"""Run matching cascade.
|
||||
Parameters
|
||||
----------
|
||||
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
||||
The distance metric is given a list of tracks and detections as well as
|
||||
a list of N track indices and M detection indices. The metric should
|
||||
return the NxM dimensional cost matrix, where element (i, j) is the
|
||||
association cost between the i-th track in the given track indices and
|
||||
the j-th detection in the given detection indices.
|
||||
max_distance : float
|
||||
Gating threshold. Associations with cost larger than this value are
|
||||
disregarded.
|
||||
cascade_depth: int
|
||||
The cascade depth, should be se to the maximum track age.
|
||||
tracks : List[Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : Optional[List[int]]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above). Defaults to all tracks.
|
||||
detection_indices : Optional[List[int]]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above). Defaults to all
|
||||
detections.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(List[(int, int)], List[int], List[int])
|
||||
Returns a tuple with the following three entries:
|
||||
* A list of matched track and detection indices.
|
||||
* A list of unmatched track indices.
|
||||
* A list of unmatched detection indices.
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = list(range(len(tracks)))
|
||||
if detection_indices is None:
|
||||
detection_indices = list(range(len(detections)))
|
||||
|
||||
unmatched_detections = detection_indices
|
||||
matches = []
|
||||
for level in range(cascade_depth):
|
||||
if len(unmatched_detections) == 0: # No detections left
|
||||
break
|
||||
|
||||
track_indices_l = [k for k in track_indices
|
||||
if tracks[k].time_since_update == 1 + level]
|
||||
if len(track_indices_l) == 0: # Nothing to match at this level
|
||||
continue
|
||||
|
||||
matches_l, _, unmatched_detections = min_cost_matching(
|
||||
distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections)
|
||||
matches += matches_l
|
||||
|
||||
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
|
||||
def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
|
||||
gated_cost=INFTY_COST, only_position=False):
|
||||
"""Invalidate infeasible entries in cost matrix based on the state
|
||||
distributions obtained by Kalman filtering.
|
||||
Parameters
|
||||
----------
|
||||
kf : The Kalman filter.
|
||||
cost_matrix : ndarray
|
||||
The NxM dimensional cost matrix, where N is the number of track indices
|
||||
and M is the number of detection indices, such that entry (i, j) is the
|
||||
association cost between `tracks[track_indices[i]]` and
|
||||
`detections[detection_indices[j]]`.
|
||||
tracks : List[Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : List[int]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above).
|
||||
detection_indices : List[int]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above).
|
||||
gated_cost : Optional[float]
|
||||
Entries in the cost matrix corresponding to infeasible associations are
|
||||
set this value. Defaults to a very large value.
|
||||
only_position : Optional[bool]
|
||||
If True, only the x, y position of the state distribution is considered
|
||||
during gating. Defaults to False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns the modified cost matrix.
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = chi2inv95[gating_dim]
|
||||
measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
track = tracks[track_idx]
|
||||
gating_distance = kf.gating_distance(track.mean, track.covariance,
|
||||
measurements, only_position)
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
|
||||
return cost_matrix
|
||||
0
StreamServer/src/analytic/action/__init__.py
Normal file
0
StreamServer/src/analytic/action/__init__.py
Normal file
155
StreamServer/src/analytic/action/action_model.py
Normal file
155
StreamServer/src/analytic/action/action_model.py
Normal file
@ -0,0 +1,155 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
from fastapi import HTTPException
|
||||
import torch
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from .Detection.Utils import ResizePadding
|
||||
from .CameraLoader import CamLoader, CamLoader_Q
|
||||
from .DetectorLoader import TinyYOLOv3_onecls
|
||||
|
||||
from .PoseEstimateLoader import SPPE_FastPose
|
||||
from .fn import draw_single
|
||||
|
||||
from .Track.Tracker import Detection, Tracker
|
||||
from .ActionsEstLoader import TSSTG
|
||||
|
||||
from config import CONFIG_FILE, YOLO_WEIGHT_FILE, SPPE_WEIGHT_FILE, TSSTG_WEIGHT_FILE
|
||||
|
||||
CONFIG_FILE = CONFIG_FILE
|
||||
YOLO_WEIGHT_FILE = YOLO_WEIGHT_FILE
|
||||
SPPE_WEIGHT_FILE = SPPE_WEIGHT_FILE
|
||||
TSSTG_WEIGHT_FILE = TSSTG_WEIGHT_FILE
|
||||
|
||||
INP_DETS = 384
|
||||
INP_POSE = (224, 160)
|
||||
POSE_BACKBONE = 'resnet50'
|
||||
SHOW_DETECTED = False
|
||||
SHOW_SKELETON = True
|
||||
DEVICE = 'cuda'
|
||||
|
||||
resize_fn = ResizePadding(INP_DETS, INP_DETS)
|
||||
|
||||
def preproc(image):
|
||||
"""preprocess function for CameraLoader.
|
||||
"""
|
||||
image = resize_fn(image)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
return image
|
||||
|
||||
|
||||
def kpt2bbox(kpt, ex=20):
|
||||
"""Get bbox that hold on all of the keypoints (x,y)
|
||||
kpt: array of shape `(N, 2)`,
|
||||
ex: (int) expand bounding box,
|
||||
"""
|
||||
return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
|
||||
kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
|
||||
|
||||
|
||||
def generate_action_model_frame(source):
|
||||
CAM_SOURCE = source
|
||||
|
||||
# Model initialization
|
||||
detect_model = TinyYOLOv3_onecls(INP_DETS, device=DEVICE, config_file=CONFIG_FILE,
|
||||
weight_file=YOLO_WEIGHT_FILE)
|
||||
pose_model = SPPE_FastPose(POSE_BACKBONE, INP_POSE[0], INP_POSE[1], device=DEVICE, path=SPPE_WEIGHT_FILE)
|
||||
action_model = TSSTG(weight_file=TSSTG_WEIGHT_FILE) # action model
|
||||
|
||||
# Tracker.
|
||||
max_age = 30
|
||||
tracker = Tracker(max_age=max_age, n_init=3)
|
||||
|
||||
cam = CamLoader(int(CAM_SOURCE) if CAM_SOURCE.isdigit() else CAM_SOURCE,
|
||||
preprocess=preproc).start()
|
||||
|
||||
fps_time = 0
|
||||
f = 0
|
||||
while cam.grabbed():
|
||||
f += 1
|
||||
frame = cam.getitem()
|
||||
image = frame.copy()
|
||||
|
||||
# Detect humans bbox in the frame with detector model.
|
||||
detected = detect_model.detect(frame, need_resize=False, expand_bb=10)
|
||||
|
||||
# Predict each tracks bbox of current frame from previous frames information with Kalman filter.
|
||||
tracker.predict()
|
||||
# Merge two source of predicted bbox together.
|
||||
for track in tracker.tracks:
|
||||
det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32)
|
||||
detected = torch.cat([detected, det], dim=0) if detected is not None else det
|
||||
|
||||
detections = [] # List of Detections object for tracking.
|
||||
if detected is not None:
|
||||
#detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0]
|
||||
# Predict skeleton pose of each bboxs.
|
||||
poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
|
||||
|
||||
# Create Detections object.
|
||||
detections = [Detection(kpt2bbox(ps['keypoints'].numpy()),
|
||||
np.concatenate((ps['keypoints'].numpy(),
|
||||
ps['kp_score'].numpy()), axis=1),
|
||||
ps['kp_score'].mean().numpy()) for ps in poses]
|
||||
|
||||
# VISUALIZE.
|
||||
if SHOW_DETECTED:
|
||||
for bb in detected[:, 0:5]:
|
||||
frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
|
||||
|
||||
# Update tracks by matching each track information of current and previous frame or
|
||||
# create a new track if no matched.
|
||||
tracker.update(detections)
|
||||
|
||||
# Predict Actions of each track.
|
||||
for i, track in enumerate(tracker.tracks):
|
||||
if not track.is_confirmed():
|
||||
continue
|
||||
|
||||
track_id = track.track_id
|
||||
bbox = track.to_tlbr().astype(int)
|
||||
center = track.get_center().astype(int)
|
||||
|
||||
action = 'pending'
|
||||
clr = (0, 255, 0)
|
||||
# Use 30 frames time-steps to prediction.
|
||||
if len(track.keypoints_list) == 30:
|
||||
pts = np.array(track.keypoints_list, dtype=np.float32)
|
||||
out = action_model.predict(pts, frame.shape[:2])
|
||||
action_name = action_model.class_names[out[0].argmax()]
|
||||
action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
|
||||
if action_name == 'Fall Down':
|
||||
clr = (255, 0, 0)
|
||||
elif action_name == 'Lying Down':
|
||||
clr = (255, 200, 0)
|
||||
|
||||
# VISUALIZE.
|
||||
if track.time_since_update == 0:
|
||||
if SHOW_SKELETON:
|
||||
frame = draw_single(frame, track.keypoints_list[-1])
|
||||
frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
|
||||
frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX,
|
||||
0.4, (255, 0, 0), 2)
|
||||
frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
|
||||
0.4, clr, 1)
|
||||
|
||||
# Show Frame.
|
||||
frame = cv2.resize(frame, (0, 0), fx=2., fy=2.)
|
||||
frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)),
|
||||
(10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
frame = frame[:, :, ::-1]
|
||||
fps_time = time.time()
|
||||
|
||||
# return frame for video streaming
|
||||
ret, buffer = cv2.imencode('.jpg', frame)
|
||||
if not ret:
|
||||
# If encoding fails, raise an error to stop the streaming
|
||||
raise HTTPException(status_code=500, detail="Frame encoding failed")
|
||||
yield (b'--frame\r\n'
|
||||
b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
|
||||
|
||||
|
||||
def output_action_detection():
|
||||
pass
|
||||
234
StreamServer/src/analytic/action/fn.py
Normal file
234
StreamServer/src/analytic/action/fn.py
Normal file
@ -0,0 +1,234 @@
|
||||
import re
|
||||
import cv2
|
||||
import time
|
||||
import math
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
RED = (0, 0, 255)
|
||||
GREEN = (0, 255, 0)
|
||||
BLUE = (255, 0, 0)
|
||||
CYAN = (255, 255, 0)
|
||||
YELLOW = (0, 255, 255)
|
||||
ORANGE = (0, 165, 255)
|
||||
PURPLE = (255, 0, 255)
|
||||
|
||||
"""COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head
|
||||
(5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
|
||||
(17, 11), (17, 12), # Body
|
||||
(11, 13), (12, 14), (13, 15), (14, 16)]"""
|
||||
COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body
|
||||
(7, 9), (8, 10), (9, 11), (10, 12)]
|
||||
POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar
|
||||
(77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
|
||||
(204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
|
||||
LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
|
||||
(77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
|
||||
(255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]
|
||||
|
||||
MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5),
|
||||
(8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)]
|
||||
|
||||
numpy_type_map = {
|
||||
'float64': torch.DoubleTensor,
|
||||
'float32': torch.FloatTensor,
|
||||
'float16': torch.HalfTensor,
|
||||
'int64': torch.LongTensor,
|
||||
'int32': torch.IntTensor,
|
||||
'int16': torch.ShortTensor,
|
||||
'int8': torch.CharTensor,
|
||||
'uint8': torch.ByteTensor,
|
||||
}
|
||||
|
||||
_use_shared_memory = True
|
||||
|
||||
|
||||
def collate_fn(batch):
|
||||
r"""Puts each data field into a tensor with outer dimension batch size"""
|
||||
|
||||
error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
|
||||
elem_type = type(batch[0])
|
||||
|
||||
if isinstance(batch[0], torch.Tensor):
|
||||
out = None
|
||||
if _use_shared_memory:
|
||||
# If we're in a background process, concatenate directly into a
|
||||
# shared memory tensor to avoid an extra copy
|
||||
numel = sum([x.numel() for x in batch])
|
||||
storage = batch[0].storage()._new_shared(numel)
|
||||
out = batch[0].new(storage)
|
||||
return torch.stack(batch, 0, out=out)
|
||||
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
|
||||
and elem_type.__name__ != 'string_':
|
||||
elem = batch[0]
|
||||
if elem_type.__name__ == 'ndarray':
|
||||
# array of string classes and object
|
||||
if re.search('[SaUO]', elem.dtype.str) is not None:
|
||||
raise TypeError(error_msg.format(elem.dtype))
|
||||
|
||||
return torch.stack([torch.from_numpy(b) for b in batch], 0)
|
||||
if elem.shape == (): # scalars
|
||||
py_type = float if elem.dtype.name.startswith('float') else int
|
||||
return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
|
||||
elif isinstance(batch[0], int):
|
||||
return torch.LongTensor(batch)
|
||||
elif isinstance(batch[0], float):
|
||||
return torch.DoubleTensor(batch)
|
||||
elif isinstance(batch[0], (str, bytes)):
|
||||
return batch
|
||||
elif isinstance(batch[0], collections.Mapping):
|
||||
return {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
|
||||
elif isinstance(batch[0], collections.Sequence):
|
||||
transposed = zip(*batch)
|
||||
return [collate_fn(samples) for samples in transposed]
|
||||
|
||||
raise TypeError((error_msg.format(type(batch[0]))))
|
||||
|
||||
|
||||
def collate_fn_list(batch):
|
||||
img, inp, im_name = zip(*batch)
|
||||
img = collate_fn(img)
|
||||
im_name = collate_fn(im_name)
|
||||
|
||||
return img, inp, im_name
|
||||
|
||||
|
||||
def draw_single(frame, pts, joint_format='coco'):
|
||||
if joint_format == 'coco':
|
||||
l_pair = COCO_PAIR
|
||||
p_color = POINT_COLORS
|
||||
line_color = LINE_COLORS
|
||||
elif joint_format == 'mpii':
|
||||
l_pair = MPII_PAIR
|
||||
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
|
||||
else:
|
||||
NotImplementedError
|
||||
|
||||
part_line = {}
|
||||
pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
|
||||
for n in range(pts.shape[0]):
|
||||
if pts[n, 2] <= 0.05:
|
||||
continue
|
||||
cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
|
||||
part_line[n] = (cor_x, cor_y)
|
||||
cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
|
||||
|
||||
for i, (start_p, end_p) in enumerate(l_pair):
|
||||
if start_p in part_line and end_p in part_line:
|
||||
start_xy = part_line[start_p]
|
||||
end_xy = part_line[end_p]
|
||||
cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1))
|
||||
return frame
|
||||
|
||||
|
||||
def vis_frame_fast(frame, im_res, joint_format='coco'):
|
||||
"""
|
||||
frame: frame image
|
||||
im_res: im_res of predictions
|
||||
format: coco or mpii
|
||||
|
||||
return rendered image
|
||||
"""
|
||||
if joint_format == 'coco':
|
||||
l_pair = COCO_PAIR
|
||||
p_color = POINT_COLORS
|
||||
line_color = LINE_COLORS
|
||||
elif joint_format == 'mpii':
|
||||
l_pair = MPII_PAIR
|
||||
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
|
||||
else:
|
||||
NotImplementedError
|
||||
|
||||
#im_name = im_res['imgname'].split('/')[-1]
|
||||
img = frame
|
||||
for human in im_res: # ['result']:
|
||||
part_line = {}
|
||||
kp_preds = human['keypoints']
|
||||
kp_scores = human['kp_score']
|
||||
kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0)))
|
||||
kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0)))
|
||||
# Draw keypoints
|
||||
for n in range(kp_scores.shape[0]):
|
||||
if kp_scores[n] <= 0.05:
|
||||
continue
|
||||
cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
|
||||
part_line[n] = (cor_x, cor_y)
|
||||
cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)
|
||||
# Draw limbs
|
||||
for i, (start_p, end_p) in enumerate(l_pair):
|
||||
if start_p in part_line and end_p in part_line:
|
||||
start_xy = part_line[start_p]
|
||||
end_xy = part_line[end_p]
|
||||
cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1)
|
||||
return img
|
||||
|
||||
|
||||
def vis_frame(frame, im_res, joint_format='coco'):
|
||||
"""
|
||||
frame: frame image
|
||||
im_res: im_res of predictions
|
||||
format: coco or mpii
|
||||
|
||||
return rendered image
|
||||
"""
|
||||
if joint_format == 'coco':
|
||||
l_pair = COCO_PAIR
|
||||
p_color = POINT_COLORS
|
||||
line_color = LINE_COLORS
|
||||
elif joint_format == 'mpii':
|
||||
l_pair = MPII_PAIR
|
||||
p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
|
||||
line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
im_name = im_res['imgname'].split('/')[-1]
|
||||
img = frame
|
||||
height, width = img.shape[:2]
|
||||
img = cv2.resize(img, (int(width/2), int(height/2)))
|
||||
for human in im_res['result']:
|
||||
part_line = {}
|
||||
kp_preds = human['keypoints']
|
||||
kp_scores = human['kp_score']
|
||||
kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0)))
|
||||
kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0)))
|
||||
# Draw keypoints
|
||||
for n in range(kp_scores.shape[0]):
|
||||
if kp_scores[n] <= 0.05:
|
||||
continue
|
||||
cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
|
||||
part_line[n] = (int(cor_x/2), int(cor_y/2))
|
||||
bg = img.copy()
|
||||
cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1)
|
||||
# Now create a mask of logo and create its inverse mask also
|
||||
transparency = max(0, min(1, kp_scores[n]))
|
||||
img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
|
||||
# Draw limbs
|
||||
for i, (start_p, end_p) in enumerate(l_pair):
|
||||
if start_p in part_line and end_p in part_line:
|
||||
start_xy = part_line[start_p]
|
||||
end_xy = part_line[end_p]
|
||||
bg = img.copy()
|
||||
|
||||
X = (start_xy[0], end_xy[0])
|
||||
Y = (start_xy[1], end_xy[1])
|
||||
mX = np.mean(X)
|
||||
mY = np.mean(Y)
|
||||
length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
|
||||
angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
|
||||
stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1
|
||||
polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
|
||||
cv2.fillConvexPoly(bg, polygon, line_color[i])
|
||||
#cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1)
|
||||
transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p])))
|
||||
img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
|
||||
img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC)
|
||||
return img
|
||||
|
||||
|
||||
def getTime(time1=0):
|
||||
if not time1:
|
||||
return time.time()
|
||||
else:
|
||||
interval = time.time() - time1
|
||||
return time.time(), interval
|
||||
284
StreamServer/src/analytic/action/pPose_nms.py
Normal file
284
StreamServer/src/analytic/action/pPose_nms.py
Normal file
@ -0,0 +1,284 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import torch
|
||||
import json
|
||||
import os
|
||||
import zipfile
|
||||
import time
|
||||
from multiprocessing.dummy import Pool as ThreadPool
|
||||
import numpy as np
|
||||
|
||||
''' Constant Configuration '''
|
||||
delta1 = 1
|
||||
mu = 1.7
|
||||
delta2 = 2.65
|
||||
gamma = 22.48
|
||||
scoreThreds = 0.3
|
||||
matchThreds = 5
|
||||
areaThres = 0 # 40 * 40.5
|
||||
alpha = 0.1
|
||||
#pool = ThreadPool(4)
|
||||
|
||||
|
||||
def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores):
|
||||
"""
|
||||
Parametric Pose NMS algorithm
|
||||
bboxes: bbox locations list (n, 4)
|
||||
bbox_scores: bbox scores list (n,)
|
||||
pose_preds: pose locations list (n, 17, 2)
|
||||
pose_scores: pose scores list (n, 17, 1)
|
||||
"""
|
||||
global ori_pose_preds, ori_pose_scores, ref_dists
|
||||
|
||||
pose_scores[pose_scores == 0] = 1e-5
|
||||
|
||||
final_result = []
|
||||
|
||||
ori_bboxes = bboxes.clone()
|
||||
ori_bbox_scores = bbox_scores.clone()
|
||||
ori_pose_preds = pose_preds.clone()
|
||||
ori_pose_scores = pose_scores.clone()
|
||||
|
||||
xmax = bboxes[:, 2]
|
||||
xmin = bboxes[:, 0]
|
||||
ymax = bboxes[:, 3]
|
||||
ymin = bboxes[:, 1]
|
||||
|
||||
widths = xmax - xmin
|
||||
heights = ymax - ymin
|
||||
ref_dists = alpha * np.maximum(widths, heights)
|
||||
|
||||
nsamples = bboxes.shape[0]
|
||||
human_scores = pose_scores.mean(dim=1)
|
||||
|
||||
human_ids = np.arange(nsamples)
|
||||
# Do pPose-NMS
|
||||
pick = []
|
||||
merge_ids = []
|
||||
while human_scores.shape[0] != 0:
|
||||
# Pick the one with highest score
|
||||
pick_id = torch.argmax(human_scores)
|
||||
pick.append(human_ids[pick_id])
|
||||
# num_visPart = torch.sum(pose_scores[pick_id] > 0.2)
|
||||
|
||||
# Get numbers of match keypoints by calling PCK_match
|
||||
ref_dist = ref_dists[human_ids[pick_id]]
|
||||
simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist)
|
||||
num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist)
|
||||
|
||||
# Delete humans who have more than matchThreds keypoints overlap and high similarity
|
||||
delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[
|
||||
(simi > gamma) | (num_match_keypoints >= matchThreds)]
|
||||
|
||||
if delete_ids.shape[0] == 0:
|
||||
delete_ids = pick_id
|
||||
#else:
|
||||
# delete_ids = torch.from_numpy(delete_ids)
|
||||
|
||||
merge_ids.append(human_ids[delete_ids])
|
||||
pose_preds = np.delete(pose_preds, delete_ids, axis=0)
|
||||
pose_scores = np.delete(pose_scores, delete_ids, axis=0)
|
||||
human_ids = np.delete(human_ids, delete_ids)
|
||||
human_scores = np.delete(human_scores, delete_ids, axis=0)
|
||||
bbox_scores = np.delete(bbox_scores, delete_ids, axis=0)
|
||||
|
||||
assert len(merge_ids) == len(pick)
|
||||
bboxs_pick = ori_bboxes[pick]
|
||||
preds_pick = ori_pose_preds[pick]
|
||||
scores_pick = ori_pose_scores[pick]
|
||||
bbox_scores_pick = ori_bbox_scores[pick]
|
||||
#final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick))
|
||||
#final_result = [item for item in final_result if item is not None]
|
||||
|
||||
for j in range(len(pick)):
|
||||
ids = np.arange(pose_preds.shape[1])
|
||||
max_score = torch.max(scores_pick[j, ids, 0])
|
||||
|
||||
if max_score < scoreThreds:
|
||||
continue
|
||||
|
||||
# Merge poses
|
||||
merge_id = merge_ids[j]
|
||||
merge_pose, merge_score = p_merge_fast(
|
||||
preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]])
|
||||
|
||||
max_score = torch.max(merge_score[ids])
|
||||
if max_score < scoreThreds:
|
||||
continue
|
||||
|
||||
xmax = max(merge_pose[:, 0])
|
||||
xmin = min(merge_pose[:, 0])
|
||||
ymax = max(merge_pose[:, 1])
|
||||
ymin = min(merge_pose[:, 1])
|
||||
|
||||
if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres:
|
||||
continue
|
||||
|
||||
final_result.append({
|
||||
'bbox': bboxs_pick[j],
|
||||
'bbox_score': bbox_scores_pick[j],
|
||||
'keypoints': merge_pose - 0.3,
|
||||
'kp_score': merge_score,
|
||||
'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score)
|
||||
})
|
||||
|
||||
return final_result
|
||||
|
||||
|
||||
def filter_result(args):
|
||||
score_pick, merge_id, pred_pick, pick, bbox_score_pick = args
|
||||
global ori_pose_preds, ori_pose_scores, ref_dists
|
||||
ids = np.arange(17)
|
||||
max_score = torch.max(score_pick[ids, 0])
|
||||
|
||||
if max_score < scoreThreds:
|
||||
return None
|
||||
|
||||
# Merge poses
|
||||
merge_pose, merge_score = p_merge_fast(
|
||||
pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick])
|
||||
|
||||
max_score = torch.max(merge_score[ids])
|
||||
if max_score < scoreThreds:
|
||||
return None
|
||||
|
||||
xmax = max(merge_pose[:, 0])
|
||||
xmin = min(merge_pose[:, 0])
|
||||
ymax = max(merge_pose[:, 1])
|
||||
ymin = min(merge_pose[:, 1])
|
||||
|
||||
if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5:
|
||||
return None
|
||||
|
||||
return {
|
||||
'keypoints': merge_pose - 0.3,
|
||||
'kp_score': merge_score,
|
||||
'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score)
|
||||
}
|
||||
|
||||
|
||||
def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist):
|
||||
"""
|
||||
Score-weighted pose merging
|
||||
INPUT:
|
||||
ref_pose: reference pose -- [17, 2]
|
||||
cluster_preds: redundant poses -- [n, 17, 2]
|
||||
cluster_scores: redundant poses score -- [n, 17, 1]
|
||||
ref_dist: reference scale -- Constant
|
||||
OUTPUT:
|
||||
final_pose: merged pose -- [17, 2]
|
||||
final_score: merged score -- [17]
|
||||
"""
|
||||
dist = torch.sqrt(torch.sum(
|
||||
torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
|
||||
dim=2
|
||||
)) # [n, 17]
|
||||
|
||||
kp_num = 17
|
||||
ref_dist = min(ref_dist, 15)
|
||||
|
||||
mask = (dist <= ref_dist)
|
||||
final_pose = torch.zeros(kp_num, 2)
|
||||
final_score = torch.zeros(kp_num)
|
||||
|
||||
if cluster_preds.dim() == 2:
|
||||
cluster_preds.unsqueeze_(0)
|
||||
cluster_scores.unsqueeze_(0)
|
||||
if mask.dim() == 1:
|
||||
mask.unsqueeze_(0)
|
||||
|
||||
for i in range(kp_num):
|
||||
cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1]
|
||||
cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze(
|
||||
-1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1))
|
||||
|
||||
# Get an normalized score
|
||||
normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores)
|
||||
|
||||
# Merge poses by a weighted sum
|
||||
final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1))
|
||||
final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1))
|
||||
|
||||
final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1))
|
||||
|
||||
return final_pose, final_score
|
||||
|
||||
|
||||
def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist):
|
||||
"""
|
||||
Score-weighted pose merging
|
||||
INPUT:
|
||||
ref_pose: reference pose -- [17, 2]
|
||||
cluster_preds: redundant poses -- [n, 17, 2]
|
||||
cluster_scores: redundant poses score -- [n, 17, 1]
|
||||
ref_dist: reference scale -- Constant
|
||||
OUTPUT:
|
||||
final_pose: merged pose -- [17, 2]
|
||||
final_score: merged score -- [17]
|
||||
"""
|
||||
dist = torch.sqrt(torch.sum(
|
||||
torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
|
||||
dim=2
|
||||
))
|
||||
|
||||
kp_num = 17
|
||||
ref_dist = min(ref_dist, 15)
|
||||
|
||||
mask = (dist <= ref_dist)
|
||||
final_pose = torch.zeros(kp_num, 2)
|
||||
final_score = torch.zeros(kp_num)
|
||||
|
||||
if cluster_preds.dim() == 2:
|
||||
cluster_preds.unsqueeze_(0)
|
||||
cluster_scores.unsqueeze_(0)
|
||||
if mask.dim() == 1:
|
||||
mask.unsqueeze_(0)
|
||||
|
||||
# Weighted Merge
|
||||
masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1))
|
||||
normed_scores = masked_scores / torch.sum(masked_scores, dim=0)
|
||||
|
||||
final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0)
|
||||
final_score = torch.mul(masked_scores, normed_scores).sum(dim=0)
|
||||
return final_pose, final_score
|
||||
|
||||
|
||||
def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist):
|
||||
pick_preds = all_preds[i]
|
||||
pred_scores = keypoint_scores[i]
|
||||
dist = torch.sqrt(torch.sum(
|
||||
torch.pow(pick_preds[np.newaxis, :] - all_preds, 2),
|
||||
dim=2
|
||||
))
|
||||
mask = (dist <= 1)
|
||||
|
||||
# Define a keypoints distance
|
||||
score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1])
|
||||
keypoint_scores.squeeze_()
|
||||
if keypoint_scores.dim() == 1:
|
||||
keypoint_scores.unsqueeze_(0)
|
||||
if pred_scores.dim() == 1:
|
||||
pred_scores.unsqueeze_(1)
|
||||
# The predicted scores are repeated up to do broadcast
|
||||
pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1)
|
||||
|
||||
score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\
|
||||
torch.tanh(keypoint_scores[mask] / delta1)
|
||||
|
||||
point_dist = torch.exp((-1) * dist / delta2)
|
||||
final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1)
|
||||
|
||||
return final_dist
|
||||
|
||||
|
||||
def PCK_match(pick_pred, all_preds, ref_dist):
|
||||
dist = torch.sqrt(torch.sum(
|
||||
torch.pow(pick_pred[np.newaxis, :] - all_preds, 2),
|
||||
dim=2
|
||||
))
|
||||
ref_dist = min(ref_dist, 7)
|
||||
num_match_keypoints = torch.sum(
|
||||
dist / ref_dist <= 1,
|
||||
dim=1
|
||||
)
|
||||
|
||||
return num_match_keypoints
|
||||
27
StreamServer/src/analytic/action/pose_utils.py
Normal file
27
StreamServer/src/analytic/action/pose_utils.py
Normal file
@ -0,0 +1,27 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def normalize_points_with_size(xy, width, height, flip=False):
|
||||
"""Normalize scale points in image with size of image to (0-1).
|
||||
xy : (frames, parts, xy) or (parts, xy)
|
||||
"""
|
||||
if xy.ndim == 2:
|
||||
xy = np.expand_dims(xy, 0)
|
||||
xy[:, :, 0] /= width
|
||||
xy[:, :, 1] /= height
|
||||
if flip:
|
||||
xy[:, :, 0] = 1 - xy[:, :, 0]
|
||||
return xy
|
||||
|
||||
|
||||
def scale_pose(xy):
|
||||
"""Normalize pose points by scale with max/min value of each pose.
|
||||
xy : (frames, parts, xy) or (parts, xy)
|
||||
"""
|
||||
if xy.ndim == 2:
|
||||
xy = np.expand_dims(xy, 0)
|
||||
xy_min = np.nanmin(xy, axis=1)
|
||||
xy_max = np.nanmax(xy, axis=1)
|
||||
for i in range(xy.shape[0]):
|
||||
xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
|
||||
return xy.squeeze()
|
||||
Loading…
Reference in New Issue
Block a user