diff --git a/.gitignore b/.gitignore
index ef76d3c..2b1a207 100644
--- a/.gitignore
+++ b/.gitignore
@@ -186,4 +186,5 @@ dist-ssr
config.json
-ActionDetector/
+*.pth
+*.cfg
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/ActionsEstLoader.py b/StreamServer/src/analytic/action/ActionsEstLoader.py
new file mode 100644
index 0000000..90d1818
--- /dev/null
+++ b/StreamServer/src/analytic/action/ActionsEstLoader.py
@@ -0,0 +1,52 @@
+import os
+import torch
+import numpy as np
+
+from .Actionsrecognition.Models import TwoStreamSpatialTemporalGraph
+from .pose_utils import normalize_points_with_size, scale_pose
+
+
+class TSSTG(object):
+ """Two-Stream Spatial Temporal Graph Model Loader.
+ Args:
+ weight_file: (str) Path to trained weights file.
+ device: (str) Device to load the model on 'cpu' or 'cuda'.
+ """
+ def __init__(self,
+ weight_file='./Models/TSSTG/tsstg-model.pth',
+ device='cuda'):
+ self.graph_args = {'strategy': 'spatial'}
+ self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
+ 'Stand up', 'Sit down', 'Fall Down']
+ self.num_class = len(self.class_names)
+ self.device = device
+
+ self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device)
+ self.model.load_state_dict(torch.load(weight_file))
+ self.model.eval()
+
+ def predict(self, pts, image_size):
+ """Predict actions from single person skeleton points and score in time sequence.
+ Args:
+ pts: (numpy array) points and score in shape `(t, v, c)` where
+ t : inputs sequence (time steps).,
+ v : number of graph node (body parts).,
+ c : channel (x, y, score).,
+ image_size: (tuple of int) width, height of image frame.
+ Returns:
+ (numpy array) Probability of each class actions.
+ """
+ pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1])
+ pts[:, :, :2] = scale_pose(pts[:, :, :2])
+ pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1)
+
+ pts = torch.tensor(pts, dtype=torch.float32)
+ pts = pts.permute(2, 0, 1)[None, :]
+
+ mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
+ mot = mot.to(self.device)
+ pts = pts.to(self.device)
+
+ out = self.model((pts, mot))
+
+ return out.detach().cpu().numpy()
diff --git a/StreamServer/src/analytic/action/Actionsrecognition/Models.py b/StreamServer/src/analytic/action/Actionsrecognition/Models.py
new file mode 100644
index 0000000..62b66e4
--- /dev/null
+++ b/StreamServer/src/analytic/action/Actionsrecognition/Models.py
@@ -0,0 +1,244 @@
+### Reference from: https://github.com/yysijie/st-gcn/tree/master/net
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from .Utils import Graph
+
+
+class GraphConvolution(nn.Module):
+ """The basic module for applying a graph convolution.
+ Args:
+ - in_channel: (int) Number of channels in the input sequence data.
+ - out_channels: (int) Number of channels produced by the convolution.
+ - kernel_size: (int) Size of the graph convolving kernel.
+ - t_kernel_size: (int) Size of the temporal convolving kernel.
+ - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
+ - t_padding: (int, optional) Temporal zero-padding added to both sides of
+ the input. Default: 0
+ - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
+ - bias: (bool, optional) If `True`, adds a learnable bias to the output.
+ Default: `True`
+ Shape:
+ - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
+ A: Graph adjacency matrix in :math:`(K, V, V)`,
+ - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`
+
+ where
+ :math:`N` is a batch size,
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
+ :math:`V` is the number of graph nodes.
+
+ """
+ def __init__(self, in_channels, out_channels, kernel_size,
+ t_kernel_size=1,
+ t_stride=1,
+ t_padding=0,
+ t_dilation=1,
+ bias=True):
+ super().__init__()
+
+ self.kernel_size = kernel_size
+ self.conv = nn.Conv2d(in_channels,
+ out_channels * kernel_size,
+ kernel_size=(t_kernel_size, 1),
+ padding=(t_padding, 0),
+ stride=(t_stride, 1),
+ dilation=(t_dilation, 1),
+ bias=bias)
+
+ def forward(self, x, A):
+ x = self.conv(x)
+ n, kc, t, v = x.size()
+ x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
+ x = torch.einsum('nkctv,kvw->nctw', (x, A))
+
+ return x.contiguous()
+
+
+class st_gcn(nn.Module):
+ """Applies a spatial temporal graph convolution over an input graph sequence.
+ Args:
+ - in_channels: (int) Number of channels in the input sequence data.
+ - out_channels: (int) Number of channels produced by the convolution.
+ - kernel_size: (tuple) Size of the temporal convolving kernel and
+ graph convolving kernel.
+ - stride: (int, optional) Stride of the temporal convolution. Default: 1
+ - dropout: (int, optional) Dropout rate of the final output. Default: 0
+ - residual: (bool, optional) If `True`, applies a residual mechanism.
+ Default: `True`
+ Shape:
+ - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
+ A: Graph Adjecency matrix in :math: `(K, V, V)`,
+ - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
+ where
+ :math:`N` is a batch size,
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
+ :math:`V` is the number of graph nodes.
+ """
+ def __init__(self, in_channels, out_channels, kernel_size,
+ stride=1,
+ dropout=0,
+ residual=True):
+ super().__init__()
+ assert len(kernel_size) == 2
+ assert kernel_size[0] % 2 == 1
+
+ padding = ((kernel_size[0] - 1) // 2, 0)
+
+ self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
+ self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(out_channels,
+ out_channels,
+ (kernel_size[0], 1),
+ (stride, 1),
+ padding),
+ nn.BatchNorm2d(out_channels),
+ nn.Dropout(dropout, inplace=True)
+ )
+
+ if not residual:
+ self.residual = lambda x: 0
+ elif (in_channels == out_channels) and (stride == 1):
+ self.residual = lambda x: x
+ else:
+ self.residual = nn.Sequential(nn.Conv2d(in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=(stride, 1)),
+ nn.BatchNorm2d(out_channels)
+ )
+ self.relu = nn.ReLU(inplace=True)
+
+ def forward(self, x, A):
+ res = self.residual(x)
+ x = self.gcn(x, A)
+ x = self.tcn(x) + res
+
+ return self.relu(x)
+
+
+class StreamSpatialTemporalGraph(nn.Module):
+ """Spatial temporal graph convolutional networks.
+ Args:
+ - in_channels: (int) Number of input channels.
+ - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
+ - num_class: (int) Number of class outputs. If `None` return pooling features of
+ the last st-gcn layer instead.
+ - edge_importance_weighting: (bool) If `True`, adds a learnable importance
+ weighting to the edges of the graph.
+ - **kwargs: (optional) Other parameters for graph convolution units.
+ Shape:
+ - Input: :math:`(N, in_channels, T_{in}, V_{in})`
+ - Output: :math:`(N, num_class)` where
+ :math:`N` is a batch size,
+ :math:`T_{in}` is a length of input sequence,
+ :math:`V_{in}` is the number of graph nodes,
+ or If num_class is `None`: `(N, out_channels)`
+ :math:`out_channels` is number of out_channels of the last layer.
+ """
+ def __init__(self, in_channels, graph_args, num_class=None,
+ edge_importance_weighting=True, **kwargs):
+ super().__init__()
+ # Load graph.
+ graph = Graph(**graph_args)
+ A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
+ self.register_buffer('A', A)
+
+ # Networks.
+ spatial_kernel_size = A.size(0)
+ temporal_kernel_size = 9
+ kernel_size = (temporal_kernel_size, spatial_kernel_size)
+ kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
+
+ self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
+ self.st_gcn_networks = nn.ModuleList((
+ st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
+ st_gcn(64, 64, kernel_size, 1, **kwargs),
+ st_gcn(64, 64, kernel_size, 1, **kwargs),
+ st_gcn(64, 64, kernel_size, 1, **kwargs),
+ st_gcn(64, 128, kernel_size, 2, **kwargs),
+ st_gcn(128, 128, kernel_size, 1, **kwargs),
+ st_gcn(128, 128, kernel_size, 1, **kwargs),
+ st_gcn(128, 256, kernel_size, 2, **kwargs),
+ st_gcn(256, 256, kernel_size, 1, **kwargs),
+ st_gcn(256, 256, kernel_size, 1, **kwargs)
+ ))
+
+ # initialize parameters for edge importance weighting.
+ if edge_importance_weighting:
+ self.edge_importance = nn.ParameterList([
+ nn.Parameter(torch.ones(A.size()))
+ for i in self.st_gcn_networks
+ ])
+ else:
+ self.edge_importance = [1] * len(self.st_gcn_networks)
+
+ if num_class is not None:
+ self.cls = nn.Conv2d(256, num_class, kernel_size=1)
+ else:
+ self.cls = lambda x: x
+
+ def forward(self, x):
+ # data normalization.
+ N, C, T, V = x.size()
+ x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T)
+ x = x.view(N, V * C, T)
+ x = self.data_bn(x)
+ x = x.view(N, V, C, T)
+ x = x.permute(0, 2, 3, 1).contiguous()
+ x = x.view(N, C, T, V)
+
+ # forward.
+ for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
+ x = gcn(x, self.A * importance)
+
+ x = F.avg_pool2d(x, x.size()[2:])
+ x = self.cls(x)
+ x = x.view(x.size(0), -1)
+
+ return x
+
+
+class TwoStreamSpatialTemporalGraph(nn.Module):
+ """Two inputs spatial temporal graph convolutional networks.
+ Args:
+ - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
+ - num_class: (int) Number of class outputs.
+ - edge_importance_weighting: (bool) If `True`, adds a learnable importance
+ weighting to the edges of the graph.
+ - **kwargs: (optional) Other parameters for graph convolution units.
+ Shape:
+ - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
+ for points and motions stream where.
+ :math:`N` is a batch size,
+ :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
+ :math:`T` is a length of input sequence,
+ :math:`V` is the number of graph nodes,
+ - Output: :math:`(N, num_class)`
+ """
+ def __init__(self, graph_args, num_class, edge_importance_weighting=True,
+ **kwargs):
+ super().__init__()
+ self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
+ edge_importance_weighting,
+ **kwargs)
+ self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None,
+ edge_importance_weighting,
+ **kwargs)
+
+ self.fcn = nn.Linear(256 * 2, num_class)
+
+ def forward(self, inputs):
+ out1 = self.pts_stream(inputs[0])
+ out2 = self.mot_stream(inputs[1])
+
+ concat = torch.cat([out1, out2], dim=-1)
+ out = self.fcn(concat)
+
+ return torch.sigmoid(out)
diff --git a/StreamServer/src/analytic/action/Actionsrecognition/Utils.py b/StreamServer/src/analytic/action/Actionsrecognition/Utils.py
new file mode 100644
index 0000000..04a9b10
--- /dev/null
+++ b/StreamServer/src/analytic/action/Actionsrecognition/Utils.py
@@ -0,0 +1,123 @@
+### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py
+
+import os
+import torch
+import numpy as np
+
+
+class Graph:
+ """The Graph to model the skeletons extracted by the Alpha-Pose.
+ Args:
+ - strategy: (string) must be one of the follow candidates
+ - uniform: Uniform Labeling,
+ - distance: Distance Partitioning,
+ - spatial: Spatial Configuration,
+ For more information, please refer to the section 'Partition Strategies'
+ in our paper (https://arxiv.org/abs/1801.07455).
+ - layout: (string) must be one of the follow candidates
+ - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
+ - max_hop: (int) the maximal distance between two connected nodes.
+ - dilation: (int) controls the spacing between the kernel points.
+ """
+ def __init__(self,
+ layout='coco_cut',
+ strategy='uniform',
+ max_hop=1,
+ dilation=1):
+ self.max_hop = max_hop
+ self.dilation = dilation
+
+ self.get_edge(layout)
+ self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
+ self.get_adjacency(strategy)
+
+ def get_edge(self, layout):
+ if layout == 'coco_cut':
+ self.num_node = 14
+ self_link = [(i, i) for i in range(self.num_node)]
+ neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
+ (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
+ self.edge = self_link + neighbor_link
+ self.center = 13
+ else:
+ raise ValueError('This layout is not supported!')
+
+ def get_adjacency(self, strategy):
+ valid_hop = range(0, self.max_hop + 1, self.dilation)
+ adjacency = np.zeros((self.num_node, self.num_node))
+ for hop in valid_hop:
+ adjacency[self.hop_dis == hop] = 1
+ normalize_adjacency = normalize_digraph(adjacency)
+
+ if strategy == 'uniform':
+ A = np.zeros((1, self.num_node, self.num_node))
+ A[0] = normalize_adjacency
+ self.A = A
+ elif strategy == 'distance':
+ A = np.zeros((len(valid_hop), self.num_node, self.num_node))
+ for i, hop in enumerate(valid_hop):
+ A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
+ hop]
+ self.A = A
+ elif strategy == 'spatial':
+ A = []
+ for hop in valid_hop:
+ a_root = np.zeros((self.num_node, self.num_node))
+ a_close = np.zeros((self.num_node, self.num_node))
+ a_further = np.zeros((self.num_node, self.num_node))
+ for i in range(self.num_node):
+ for j in range(self.num_node):
+ if self.hop_dis[j, i] == hop:
+ if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
+ a_root[j, i] = normalize_adjacency[j, i]
+ elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
+ a_close[j, i] = normalize_adjacency[j, i]
+ else:
+ a_further[j, i] = normalize_adjacency[j, i]
+ if hop == 0:
+ A.append(a_root)
+ else:
+ A.append(a_root + a_close)
+ A.append(a_further)
+ A = np.stack(A)
+ self.A = A
+ #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
+ else:
+ raise ValueError("This strategy is not supported!")
+
+
+def get_hop_distance(num_node, edge, max_hop=1):
+ A = np.zeros((num_node, num_node))
+ for i, j in edge:
+ A[j, i] = 1
+ A[i, j] = 1
+
+ # compute hop steps
+ hop_dis = np.zeros((num_node, num_node)) + np.inf
+ transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
+ arrive_mat = (np.stack(transfer_mat) > 0)
+ for d in range(max_hop, -1, -1):
+ hop_dis[arrive_mat[d]] = d
+ return hop_dis
+
+
+def normalize_digraph(A):
+ Dl = np.sum(A, 0)
+ num_node = A.shape[0]
+ Dn = np.zeros((num_node, num_node))
+ for i in range(num_node):
+ if Dl[i] > 0:
+ Dn[i, i] = Dl[i]**(-1)
+ AD = np.dot(A, Dn)
+ return AD
+
+
+def normalize_undigraph(A):
+ Dl = np.sum(A, 0)
+ num_node = A.shape[0]
+ Dn = np.zeros((num_node, num_node))
+ for i in range(num_node):
+ if Dl[i] > 0:
+ Dn[i, i] = Dl[i]**(-0.5)
+ DAD = np.dot(np.dot(Dn, A), Dn)
+ return DAD
diff --git a/StreamServer/src/analytic/action/Actionsrecognition/train.py b/StreamServer/src/analytic/action/Actionsrecognition/train.py
new file mode 100644
index 0000000..818fade
--- /dev/null
+++ b/StreamServer/src/analytic/action/Actionsrecognition/train.py
@@ -0,0 +1,216 @@
+import os
+import time
+import torch
+import pickle
+import numpy as np
+import torch.nn.functional as F
+from shutil import copyfile
+from tqdm import tqdm
+from torch.utils import data
+from torch.optim.adadelta import Adadelta
+from sklearn.model_selection import train_test_split
+
+from .Models import *
+from Visualizer import plot_graphs, plot_confusion_metrix
+
+
+save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)'
+
+device = 'cuda'
+epochs = 30
+batch_size = 32
+
+# DATA FILES.
+# Should be in format of
+# inputs: (N_samples, time_steps, graph_node, channels),
+# labels: (N_samples, num_class)
+# and do some of normalizations on it. Default data create from:
+# Data.create_dataset_(1-3).py
+# where
+# time_steps: Number of frame input sequence, Default: 30
+# graph_node: Number of node in skeleton, Default: 14
+# channels: Inputs data (x, y and scores), Default: 3
+# num_class: Number of pose class to train, Default: 7
+
+data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl',
+ '../Data/Home_new-set(labelXscrw).pkl']
+class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
+ 'Stand up', 'Sit down', 'Fall Down']
+num_class = len(class_names)
+
+
+def load_dataset(data_files, batch_size, split_size=0):
+ """Load data files into torch DataLoader with/without spliting train-test.
+ """
+ features, labels = [], []
+ for fil in data_files:
+ with open(fil, 'rb') as f:
+ fts, lbs = pickle.load(f)
+ features.append(fts)
+ labels.append(lbs)
+ del fts, lbs
+ features = np.concatenate(features, axis=0)
+ labels = np.concatenate(labels, axis=0)
+
+ if split_size > 0:
+ x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,
+ random_state=9)
+ train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2),
+ torch.tensor(y_train, dtype=torch.float32))
+ valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2),
+ torch.tensor(y_valid, dtype=torch.float32))
+ train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
+ valid_loader = data.DataLoader(valid_set, batch_size)
+ else:
+ train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2),
+ torch.tensor(labels, dtype=torch.float32))
+ train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
+ valid_loader = None
+ return train_loader, valid_loader
+
+
+def accuracy_batch(y_pred, y_true):
+ return (y_pred.argmax(1) == y_true.argmax(1)).mean()
+
+
+def set_training(model, mode=True):
+ for p in model.parameters():
+ p.requires_grad = mode
+ model.train(mode)
+ return model
+
+
+if __name__ == '__main__':
+ save_folder = os.path.join(os.path.dirname(__file__), save_folder)
+ if not os.path.exists(save_folder):
+ os.makedirs(save_folder)
+
+ # DATA.
+ train_loader, _ = load_dataset(data_files[0:1], batch_size)
+ valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2)
+
+ train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]),
+ batch_size, shuffle=True)
+ dataloader = {'train': train_loader, 'valid': valid_loader}
+ del train_loader_
+
+ # MODEL.
+ graph_args = {'strategy': 'spatial'}
+ model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
+
+ #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+ optimizer = Adadelta(model.parameters())
+
+ losser = torch.nn.BCELoss()
+
+ # TRAINING.
+ loss_list = {'train': [], 'valid': []}
+ accu_list = {'train': [], 'valid': []}
+ for e in range(epochs):
+ print('Epoch {}/{}'.format(e, epochs - 1))
+ for phase in ['train', 'valid']:
+ if phase == 'train':
+ model = set_training(model, True)
+ else:
+ model = set_training(model, False)
+
+ run_loss = 0.0
+ run_accu = 0.0
+ with tqdm(dataloader[phase], desc=phase) as iterator:
+ for pts, lbs in iterator:
+ # Create motion input by distance of points (x, y) of the same node
+ # in two frames.
+ mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
+
+ mot = mot.to(device)
+ pts = pts.to(device)
+ lbs = lbs.to(device)
+
+ # Forward.
+ out = model((pts, mot))
+ loss = losser(out, lbs)
+
+ if phase == 'train':
+ # Backward.
+ model.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ run_loss += loss.item()
+ accu = accuracy_batch(out.detach().cpu().numpy(),
+ lbs.detach().cpu().numpy())
+ run_accu += accu
+
+ iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
+ loss.item(), accu))
+ iterator.update()
+ #break
+ loss_list[phase].append(run_loss / len(iterator))
+ accu_list[phase].append(run_accu / len(iterator))
+ #break
+
+ print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
+ ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
+ loss_list['valid'][-1], accu_list['valid'][-1]))
+
+ # SAVE.
+ torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth'))
+
+ plot_graphs(list(loss_list.values()), list(loss_list.keys()),
+ 'Last Train: {:.2f}, Valid: {:.2f}'.format(
+ loss_list['train'][-1], loss_list['valid'][-1]
+ ), 'Loss', xlim=[0, epochs],
+ save=os.path.join(save_folder, 'loss_graph.png'))
+ plot_graphs(list(accu_list.values()), list(accu_list.keys()),
+ 'Last Train: {:.2f}, Valid: {:.2f}'.format(
+ accu_list['train'][-1], accu_list['valid'][-1]
+ ), 'Accu', xlim=[0, epochs],
+ save=os.path.join(save_folder, 'accu_graph.png'))
+
+ #break
+
+ del train_loader, valid_loader
+
+ model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth')))
+
+ # EVALUATION.
+ model = set_training(model, False)
+ data_file = data_files[1]
+ eval_loader, _ = load_dataset([data_file], 32)
+
+ print('Evaluation.')
+ run_loss = 0.0
+ run_accu = 0.0
+ y_preds = []
+ y_trues = []
+ with tqdm(eval_loader, desc='eval') as iterator:
+ for pts, lbs in iterator:
+ mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
+ mot = mot.to(device)
+ pts = pts.to(device)
+ lbs = lbs.to(device)
+
+ out = model((pts, mot))
+ loss = losser(out, lbs)
+
+ run_loss += loss.item()
+ accu = accuracy_batch(out.detach().cpu().numpy(),
+ lbs.detach().cpu().numpy())
+ run_accu += accu
+
+ y_preds.extend(out.argmax(1).detach().cpu().numpy())
+ y_trues.extend(lbs.argmax(1).cpu().numpy())
+
+ iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
+ loss.item(), accu))
+ iterator.update()
+
+ run_loss = run_loss / len(iterator)
+ run_accu = run_accu / len(iterator)
+
+ plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format(
+ os.path.basename(data_file), run_loss, run_accu
+ ), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format(
+ os.path.basename(data_file).split('.')[0])))
+
+ print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu))
diff --git a/StreamServer/src/analytic/action/CameraLoader.py b/StreamServer/src/analytic/action/CameraLoader.py
new file mode 100644
index 0000000..cde320a
--- /dev/null
+++ b/StreamServer/src/analytic/action/CameraLoader.py
@@ -0,0 +1,204 @@
+import os
+import cv2
+import time
+import torch
+import numpy as np
+
+from queue import Queue
+from threading import Thread, Lock
+
+
+class CamLoader:
+ """Use threading to capture a frame from camera for faster frame load.
+ Recommend for camera or webcam.
+
+ Args:
+ camera: (int, str) Source of camera or video.,
+ preprocess: (Callable function) to process the frame before return.
+ """
+ def __init__(self, camera, preprocess=None, ori_return=False):
+ self.stream = cv2.VideoCapture(camera)
+ assert self.stream.isOpened(), 'Cannot read camera source!'
+ self.fps = self.stream.get(cv2.CAP_PROP_FPS)
+ self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+
+ self.stopped = False
+ self.ret = False
+ self.frame = None
+ self.ori_frame = None
+ self.read_lock = Lock()
+ self.ori = ori_return
+
+ self.preprocess_fn = preprocess
+
+ def start(self):
+ self.t = Thread(target=self.update, args=()) # , daemon=True)
+ self.t.start()
+ c = 0
+ while not self.ret:
+ time.sleep(0.1)
+ c += 1
+ if c > 20:
+ self.stop()
+ raise TimeoutError('Can not get a frame from camera!!!')
+ return self
+
+ def update(self):
+ while not self.stopped:
+ ret, frame = self.stream.read()
+ self.read_lock.acquire()
+ self.ori_frame = frame.copy()
+ if ret and self.preprocess_fn is not None:
+ frame = self.preprocess_fn(frame)
+
+ self.ret, self.frame = ret, frame
+ self.read_lock.release()
+
+ def grabbed(self):
+ """Return `True` if can read a frame."""
+ return self.ret
+
+ def getitem(self):
+ self.read_lock.acquire()
+ frame = self.frame.copy()
+ ori_frame = self.ori_frame.copy()
+ self.read_lock.release()
+ if self.ori:
+ return frame, ori_frame
+ else:
+ return frame
+
+ def stop(self):
+ if self.stopped:
+ return
+ self.stopped = True
+ if self.t.is_alive():
+ self.t.join()
+ self.stream.release()
+
+ def __del__(self):
+ if self.stream.isOpened():
+ self.stream.release()
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self.stream.isOpened():
+ self.stream.release()
+
+
+class CamLoader_Q:
+ """Use threading and queue to capture a frame and store to queue for pickup in sequence.
+ Recommend for video file.
+
+ Args:
+ camera: (int, str) Source of camera or video.,
+ batch_size: (int) Number of batch frame to store in queue. Default: 1,
+ queue_size: (int) Maximum queue size. Default: 256,
+ preprocess: (Callable function) to process the frame before return.
+ """
+ def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None):
+ self.stream = cv2.VideoCapture(camera)
+ assert self.stream.isOpened(), 'Cannot read camera source!'
+ self.fps = self.stream.get(cv2.CAP_PROP_FPS)
+ self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+
+ # Queue for storing each frames.
+
+ self.stopped = False
+ self.batch_size = batch_size
+ self.Q = Queue(maxsize=queue_size)
+
+ self.preprocess_fn = preprocess
+
+ def start(self):
+ t = Thread(target=self.update, args=(), daemon=True).start()
+ c = 0
+ while not self.grabbed():
+ time.sleep(0.1)
+ c += 1
+ if c > 20:
+ self.stop()
+ raise TimeoutError('Can not get a frame from camera!!!')
+ return self
+
+ def update(self):
+ while not self.stopped:
+ if not self.Q.full():
+ frames = []
+ for k in range(self.batch_size):
+ ret, frame = self.stream.read()
+ if not ret:
+ self.stop()
+ return
+
+ if self.preprocess_fn is not None:
+ frame = self.preprocess_fn(frame)
+
+ frames.append(frame)
+ frames = np.stack(frames)
+ self.Q.put(frames)
+ else:
+ with self.Q.mutex:
+ self.Q.queue.clear()
+ # time.sleep(0.05)
+
+ def grabbed(self):
+ """Return `True` if can read a frame."""
+ return self.Q.qsize() > 0
+
+ def getitem(self):
+ return self.Q.get().squeeze()
+
+ def stop(self):
+ if self.stopped:
+ return
+ self.stopped = True
+ self.stream.release()
+
+ def __len__(self):
+ return self.Q.qsize()
+
+ def __del__(self):
+ if self.stream.isOpened():
+ self.stream.release()
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self.stream.isOpened():
+ self.stream.release()
+
+
+if __name__ == '__main__':
+ fps_time = 0
+
+ # Using threading.
+ cam = CamLoader(0).start()
+ while cam.grabbed():
+ frames = cam.getitem()
+
+ frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
+ (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ fps_time = time.time()
+ cv2.imshow('frame', frames)
+
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ cam.stop()
+ cv2.destroyAllWindows()
+
+ # Normal video capture.
+ """cam = cv2.VideoCapture(0)
+ while True:
+ ret, frame = cam.read()
+ if ret:
+ #time.sleep(0.05)
+ #frame = (cv2.flip(frame, 1) / 255.).astype(np.float)
+
+ frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
+ (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ fps_time = time.time()
+ cv2.imshow('frame', frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ cam.release()
+ cv2.destroyAllWindows()"""
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/Data/create_dataset_1.py b/StreamServer/src/analytic/action/Data/create_dataset_1.py
new file mode 100644
index 0000000..b1b79d8
--- /dev/null
+++ b/StreamServer/src/analytic/action/Data/create_dataset_1.py
@@ -0,0 +1,85 @@
+"""
+This script to create .csv videos frames action annotation file.
+
+- It will play a video frame by frame control the flow by [a] and [d]
+ to play previos or next frame.
+- Open the annot_file (.csv) and label each frame of video with number
+ of action class.
+"""
+
+import os
+import cv2
+import time
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
+ 'Stand up', 'Sit down', 'Fall Down'] # label.
+
+video_folder = '../Data/falldata/Home/Videos'
+annot_file = '../Data/Home_new.csv'
+
+index_video_to_play = 0 # Choose video to play.
+
+
+def create_csv(folder):
+ list_file = sorted(os.listdir(folder))
+ cols = ['video', 'frame', 'label']
+ df = pd.DataFrame(columns=cols)
+ for fil in list_file:
+ cap = cv2.VideoCapture(os.path.join(folder, fil))
+ frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ video = np.array([fil] * frames_count)
+ frame = np.arange(1, frames_count + 1)
+ label = np.array([0] * frames_count)
+ rows = np.stack([video, frame, label], axis=1)
+ df = df.append(pd.DataFrame(rows, columns=cols),
+ ignore_index=True)
+ cap.release()
+ df.to_csv(annot_file, index=False)
+
+
+if not os.path.exists(annot_file):
+ create_csv(video_folder)
+
+annot = pd.read_csv(annot_file)
+video_list = annot.iloc[:, 0].unique()
+video_file = os.path.join(video_folder, video_list[index_video_to_play])
+print(os.path.basename(video_file))
+
+annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True)
+frames_idx = annot.iloc[:, 1].tolist()
+
+cap = cv2.VideoCapture(video_file)
+frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format(
+ len(frames_idx), frames_count
+)
+
+i = 0
+while True:
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
+ ret, frame = cap.read()
+ if ret:
+ cls_name = class_names[int(annot.iloc[i, -1]) - 1]
+ frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
+ frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name),
+ (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ cv2.imshow('frame', frame)
+
+ key = cv2.waitKey(0) & 0xFF
+ if key == ord('q'):
+ break
+ elif key == ord('d'):
+ i += 1
+ continue
+ elif key == ord('a'):
+ i -= 1
+ continue
+ else:
+ break
+
+cap.release()
+cv2.destroyAllWindows()
diff --git a/StreamServer/src/analytic/action/Data/create_dataset_2.py b/StreamServer/src/analytic/action/Data/create_dataset_2.py
new file mode 100644
index 0000000..1bf1684
--- /dev/null
+++ b/StreamServer/src/analytic/action/Data/create_dataset_2.py
@@ -0,0 +1,137 @@
+"""
+This script to extract skeleton joints position and score.
+
+- This 'annot_folder' is a action class and bounding box for each frames that came with dataset.
+ Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax]
+ Use for crop a person to use in pose estimation model.
+- If have no annotation file you can leave annot_folder = '' for use Detector model to get the
+ bounding box.
+"""
+
+import os
+import cv2
+import time
+import torch
+import pandas as pd
+import numpy as np
+import torchvision.transforms as transforms
+
+from DetectorLoader import TinyYOLOv3_onecls
+from PoseEstimateLoader import SPPE_FastPose
+from fn import vis_frame_fast
+
+save_path = '../../Data/Home_new-pose+score.csv'
+
+annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py
+video_folder = '../Data/falldata/Home/Videos'
+annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame.
+
+# DETECTION MODEL.
+detector = TinyYOLOv3_onecls()
+
+# POSE MODEL.
+inp_h = 320
+inp_w = 256
+pose_estimator = SPPE_FastPose(inp_h, inp_w)
+
+# with score.
+columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s',
+ 'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x',
+ 'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s',
+ 'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s',
+ 'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y',
+ 'RAnkle_s', 'label']
+
+
+def normalize_points_with_size(points_xy, width, height, flip=False):
+ points_xy[:, 0] /= width
+ points_xy[:, 1] /= height
+ if flip:
+ points_xy[:, 0] = 1 - points_xy[:, 0]
+ return points_xy
+
+
+annot = pd.read_csv(annot_file)
+vid_list = annot['video'].unique()
+for vid in vid_list:
+ print(f'Process on: {vid}')
+ df = pd.DataFrame(columns=columns)
+ cur_row = 0
+
+ # Pose Labels.
+ frames_label = annot[annot['video'] == vid].reset_index(drop=True)
+
+ cap = cv2.VideoCapture(os.path.join(video_folder, vid))
+ frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+ int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+
+ # Bounding Boxs Labels.
+ annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt')
+ annot = None
+ if os.path.exists(annot_file):
+ annot = pd.read_csv(annot_file, header=None,
+ names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax'])
+ annot = annot.dropna().reset_index(drop=True)
+
+ assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot))
+
+ fps_time = 0
+ i = 1
+ while True:
+ ret, frame = cap.read()
+ if ret:
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ cls_idx = int(frames_label[frames_label['frame'] == i]['label'])
+
+ if annot:
+ bb = np.array(annot.iloc[i-1, 2:].astype(int))
+ else:
+ bb = detector.detect(frame)[0, :4].numpy().astype(int)
+ bb[:2] = np.maximum(0, bb[:2] - 5)
+ bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:]
+
+ result = []
+ if bb.any() != 0:
+ result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]),
+ torch.tensor([[1.0]]))
+
+ if len(result) > 0:
+ pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(),
+ frame_size[0], frame_size[1])
+ pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1)
+
+ #idx = result[0]['kp_score'] <= 0.05
+ #pt_norm[idx.squeeze()] = np.nan
+ row = [vid, i, *pt_norm.flatten().tolist(), cls_idx]
+ scr = result[0]['kp_score'].mean()
+ else:
+ row = [vid, i, *[np.nan] * (13 * 3), cls_idx]
+ scr = 0.0
+
+ df.loc[cur_row] = row
+ cur_row += 1
+
+ # VISUALIZE.
+ frame = vis_frame_fast(frame, result)
+ frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
+ frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr),
+ (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+ frame = frame[:, :, ::-1]
+ fps_time = time.time()
+ i += 1
+
+ cv2.imshow('frame', frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ else:
+ break
+
+ cap.release()
+ cv2.destroyAllWindows()
+
+ if os.path.exists(save_path):
+ df.to_csv(save_path, mode='a', header=False, index=False)
+ else:
+ df.to_csv(save_path, mode='w', index=False)
+
diff --git a/StreamServer/src/analytic/action/Data/create_dataset_3.py b/StreamServer/src/analytic/action/Data/create_dataset_3.py
new file mode 100644
index 0000000..a767bb8
--- /dev/null
+++ b/StreamServer/src/analytic/action/Data/create_dataset_3.py
@@ -0,0 +1,127 @@
+"""
+This script to create dataset and labels by clean off some NaN, do a normalization,
+label smoothing and label weights by scores.
+
+"""
+import os
+import pickle
+import numpy as np
+import pandas as pd
+
+
+class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
+ 'Stand up', 'Sit down', 'Fall Down']
+main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y',
+ 'RHip_x', 'RHip_y']
+main_idx_parts = [1, 2, 7, 8, -1] # 1.5
+
+csv_pose_file = '../Data/Coffee_room_new-pose+score.csv'
+save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl'
+
+# Params.
+smooth_labels_step = 8
+n_frames = 30
+skip_frame = 1
+
+annot = pd.read_csv(csv_pose_file)
+
+# Remove NaN.
+idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0
+idx = np.where(idx)[0]
+annot = annot.drop(idx)
+# One-Hot Labels.
+label_onehot = pd.get_dummies(annot['label'])
+annot = annot.drop('label', axis=1).join(label_onehot)
+cols = label_onehot.columns.values
+
+
+def scale_pose(xy):
+ """
+ Normalize pose points by scale with max/min value of each pose.
+ xy : (frames, parts, xy) or (parts, xy)
+ """
+ if xy.ndim == 2:
+ xy = np.expand_dims(xy, 0)
+ xy_min = np.nanmin(xy, axis=1)
+ xy_max = np.nanmax(xy, axis=1)
+ for i in range(xy.shape[0]):
+ xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
+ return xy.squeeze()
+
+
+def seq_label_smoothing(labels, max_step=10):
+ steps = 0
+ remain_step = 0
+ target_label = 0
+ active_label = 0
+ start_change = 0
+ max_val = np.max(labels)
+ min_val = np.min(labels)
+ for i in range(labels.shape[0]):
+ if remain_step > 0:
+ if i >= start_change:
+ labels[i][active_label] = max_val * remain_step / steps
+ labels[i][target_label] = max_val * (steps - remain_step) / steps \
+ if max_val * (steps - remain_step) / steps else min_val
+ remain_step -= 1
+ continue
+
+ diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0]
+ if len(diff_index) > 0:
+ start_change = i + remain_step // 2
+ steps = diff_index[0]
+ remain_step = steps
+ target_label = np.argmax(labels[i + remain_step])
+ active_label = np.argmax(labels[i])
+ return labels
+
+
+feature_set = np.empty((0, n_frames, 14, 3))
+labels_set = np.empty((0, len(cols)))
+vid_list = annot['video'].unique()
+for vid in vid_list:
+ print(f'Process on: {vid}')
+ data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video')
+
+ # Label Smoothing.
+ esp = 0.1
+ data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1)
+ data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step)
+
+ # Separate continuous frames.
+ frames = data['frame'].values
+ frames_set = []
+ fs = [0]
+ for i in range(1, len(frames)):
+ if frames[i] < frames[i-1] + 10:
+ fs.append(i)
+ else:
+ frames_set.append(fs)
+ fs = [i]
+ frames_set.append(fs)
+
+ for fs in frames_set:
+ xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3)
+ # Scale pose normalize.
+ xys[:, :, :2] = scale_pose(xys[:, :, :2])
+ # Add center point.
+ xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1)
+
+ # Weighting main parts score.
+ scr = xys[:, :, -1].copy()
+ scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0)
+ # Mean score.
+ scr = scr.mean(1)
+
+ # Targets.
+ lb = data.iloc[fs, -len(cols):].values
+ # Apply points score mean to all labels.
+ lb = lb * scr[:, None]
+
+ for i in range(xys.shape[0] - n_frames):
+ feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0)
+ labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0)
+
+
+"""with open(save_path, 'wb') as f:
+ pickle.dump((feature_set, labels_set), f)"""
diff --git a/StreamServer/src/analytic/action/Detection/Models.py b/StreamServer/src/analytic/action/Detection/Models.py
new file mode 100644
index 0000000..d12d5df
--- /dev/null
+++ b/StreamServer/src/analytic/action/Detection/Models.py
@@ -0,0 +1,348 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from .Utils import build_targets, to_cpu, parse_model_config
+
+
+def create_modules(module_defs):
+ """
+ Constructs module list of layer blocks from module configuration in module_defs
+ """
+ hyperparams = module_defs.pop(0)
+ output_filters = [int(hyperparams["channels"])] # [3]
+ module_list = nn.ModuleList()
+ for module_i, module_def in enumerate(module_defs):
+ modules = nn.Sequential()
+
+ if module_def["type"] == "convolutional":
+ bn = int(module_def["batch_normalize"])
+ filters = int(module_def["filters"])
+ kernel_size = int(module_def["size"])
+ pad = (kernel_size - 1) // 2
+ modules.add_module(
+ f"conv_{module_i}",
+ nn.Conv2d(
+ in_channels=output_filters[-1],
+ out_channels=filters,
+ kernel_size=kernel_size,
+ stride=int(module_def["stride"]),
+ padding=pad,
+ bias=not bn,
+ ),
+ )
+ if bn:
+ modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
+ if module_def["activation"] == "leaky":
+ modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
+
+ elif module_def["type"] == "maxpool":
+ kernel_size = int(module_def["size"])
+ stride = int(module_def["stride"])
+ if kernel_size == 2 and stride == 1:
+ modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
+ maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
+ modules.add_module(f"maxpool_{module_i}", maxpool)
+
+ elif module_def["type"] == "upsample":
+ upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
+ modules.add_module(f"upsample_{module_i}", upsample)
+
+ elif module_def["type"] == "route":
+ layers = [int(x) for x in module_def["layers"].split(",")]
+ filters = sum([output_filters[1:][i] for i in layers])
+ modules.add_module(f"route_{module_i}", EmptyLayer())
+
+ elif module_def["type"] == "shortcut":
+ filters = output_filters[1:][int(module_def["from"])]
+ modules.add_module(f"shortcut_{module_i}", EmptyLayer())
+
+ elif module_def["type"] == "yolo":
+ anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+ # Extract anchors
+ anchors = [int(x) for x in module_def["anchors"].split(",")]
+ anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+ anchors = [anchors[i] for i in anchor_idxs]
+ num_classes = int(module_def["classes"])
+ img_size = int(hyperparams["height"])
+ # Define detection layer
+ yolo_layer = YOLOLayer(anchors, num_classes, img_size)
+ modules.add_module(f"yolo_{module_i}", yolo_layer)
+ # Register module list and number of output filters
+ module_list.append(modules)
+ output_filters.append(filters)
+
+ return hyperparams, module_list
+
+
+class Upsample(nn.Module):
+ """ nn.Upsample is deprecated """
+ def __init__(self, scale_factor, mode="nearest"):
+ super(Upsample, self).__init__()
+ self.scale_factor = scale_factor
+ self.mode = mode
+
+ def forward(self, x):
+ x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
+ return x
+
+
+class EmptyLayer(nn.Module):
+ """Placeholder for 'route' and 'shortcut' layers"""
+ def __init__(self):
+ super(EmptyLayer, self).__init__()
+
+
+class YOLOLayer(nn.Module):
+ """Detection layer"""
+ def __init__(self, anchors, num_classes, img_dim=416):
+ super(YOLOLayer, self).__init__()
+ self.anchors = anchors
+ self.num_anchors = len(anchors)
+ self.num_classes = num_classes
+ self.ignore_thres = 0.5
+ self.mse_loss = nn.MSELoss()
+ self.bce_loss = nn.BCELoss()
+ self.obj_scale = 1
+ self.noobj_scale = 100
+ self.metrics = {}
+ self.img_dim = img_dim
+ self.grid_size = 0 # grid size
+
+ def compute_grid_offsets(self, grid_size, cuda=True):
+ self.grid_size = grid_size
+ g = self.grid_size
+ FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+ self.stride = self.img_dim / self.grid_size
+ # Calculate offsets for each grid
+ self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
+ self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
+ self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
+ self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
+ self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
+
+ def forward(self, x, targets=None, img_dim=None):
+ # Tensors for cuda support
+ FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+ LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+ ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
+
+ self.img_dim = img_dim
+ num_samples = x.size(0)
+ grid_size = x.size(2)
+
+ prediction = (
+ x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
+ .permute(0, 1, 3, 4, 2)
+ .contiguous()
+ )
+
+ # Get outputs
+ x = torch.sigmoid(prediction[..., 0]) # Center x
+ y = torch.sigmoid(prediction[..., 1]) # Center y
+ w = prediction[..., 2] # Width
+ h = prediction[..., 3] # Height
+ pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
+ pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
+
+ # If grid size does not match current we compute new offsets
+ if grid_size != self.grid_size:
+ self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
+
+ # Add offset and scale with anchors
+ pred_boxes = FloatTensor(prediction[..., :4].shape)
+ pred_boxes[..., 0] = x.data + self.grid_x
+ pred_boxes[..., 1] = y.data + self.grid_y
+ pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
+ pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
+
+ output = torch.cat(
+ (
+ pred_boxes.view(num_samples, -1, 4) * self.stride,
+ pred_conf.view(num_samples, -1, 1),
+ pred_cls.view(num_samples, -1, self.num_classes),
+ ),
+ -1,
+ )
+
+ if targets is None:
+ return output, 0
+ else:
+ iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
+ pred_boxes=pred_boxes,
+ pred_cls=pred_cls,
+ target=targets,
+ anchors=self.scaled_anchors,
+ ignore_thres=self.ignore_thres,
+ )
+
+ # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
+ loss_x = self.mse_loss(x[obj_mask.bool()], tx[obj_mask.bool()])
+ loss_y = self.mse_loss(y[obj_mask.bool()], ty[obj_mask.bool()])
+ loss_w = self.mse_loss(w[obj_mask.bool()], tw[obj_mask.bool()])
+ loss_h = self.mse_loss(h[obj_mask.bool()], th[obj_mask.bool()])
+ loss_conf_obj = self.bce_loss(pred_conf[obj_mask.bool()], tconf[obj_mask.bool()])
+ loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask.bool()], tconf[noobj_mask.bool()])
+ loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
+ loss_cls = self.bce_loss(pred_cls[obj_mask.bool()], tcls[obj_mask.bool()])
+ total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
+
+ # Metrics
+ cls_acc = 100 * class_mask[obj_mask.bool()].mean()
+ conf_obj = pred_conf[obj_mask.bool()].mean()
+ conf_noobj = pred_conf[noobj_mask.bool()].mean()
+ conf50 = (pred_conf > 0.5).float()
+ iou50 = (iou_scores > 0.5).float()
+ iou75 = (iou_scores > 0.75).float()
+ detected_mask = conf50 * class_mask * tconf
+ precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
+ recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
+ recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
+
+ self.metrics = {
+ "loss": to_cpu(total_loss).item(),
+ "x": to_cpu(loss_x).item(),
+ "y": to_cpu(loss_y).item(),
+ "w": to_cpu(loss_w).item(),
+ "h": to_cpu(loss_h).item(),
+ "conf": to_cpu(loss_conf).item(),
+ "cls": to_cpu(loss_cls).item(),
+ "cls_acc": to_cpu(cls_acc).item(),
+ "recall50": to_cpu(recall50).item(),
+ "recall75": to_cpu(recall75).item(),
+ "precision": to_cpu(precision).item(),
+ "conf_obj": to_cpu(conf_obj).item(),
+ "conf_noobj": to_cpu(conf_noobj).item(),
+ "grid_size": grid_size,
+ }
+
+ return output, total_loss
+
+
+class Darknet(nn.Module):
+ """YOLOv3 object detection model"""
+ def __init__(self, config_path, img_size=416):
+ super(Darknet, self).__init__()
+ self.module_defs = parse_model_config(config_path)
+ self.hyperparams, self.module_list = create_modules(self.module_defs)
+ self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
+ self.img_size = img_size
+ self.seen = 0
+ self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
+
+ def forward(self, x, targets=None):
+ img_dim = x.shape[2]
+ loss = 0
+ layer_outputs, yolo_outputs = [], []
+ for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+ if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+ x = module(x)
+ elif module_def["type"] == "route":
+ x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
+ elif module_def["type"] == "shortcut":
+ layer_i = int(module_def["from"])
+ x = layer_outputs[-1] + layer_outputs[layer_i]
+ elif module_def["type"] == "yolo":
+ x, layer_loss = module[0](x, targets, img_dim)
+ loss += layer_loss
+ yolo_outputs.append(x)
+ layer_outputs.append(x)
+ yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
+ return yolo_outputs if targets is None else (loss, yolo_outputs)
+
+ def load_darknet_weights(self, weights_path):
+ """Parses and loads the weights stored in 'weights_path'"""
+ # Open the weights file
+ with open(weights_path, "rb") as f:
+ header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
+ self.header_info = header # Needed to write header when saving weights
+ self.seen = header[3] # number of images seen during training
+ weights = np.fromfile(f, dtype=np.float32) # The rest are weights
+
+ # Establish cutoff for loading backbone weights
+ cutoff = None
+ if "darknet53.conv.74" in weights_path:
+ cutoff = 75
+
+ ptr = 0
+ for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+ if i == cutoff:
+ break
+ if module_def["type"] == "convolutional":
+ conv_layer = module[0]
+ if module_def["batch_normalize"]:
+ # Load BN bias, weights, running mean and running variance
+ bn_layer = module[1]
+ num_b = bn_layer.bias.numel() # Number of biases
+ # Bias
+ bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.bias)
+ bn_layer.bias.data.copy_(bn_b)
+ ptr += num_b
+ # Weight
+ bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.weight)
+ bn_layer.weight.data.copy_(bn_w)
+ ptr += num_b
+ # Running Mean
+ bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean)
+ bn_layer.running_mean.data.copy_(bn_rm)
+ ptr += num_b
+ # Running Var
+ bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_var)
+ bn_layer.running_var.data.copy_(bn_rv)
+ ptr += num_b
+ else:
+ # Load conv. bias
+ num_b = conv_layer.bias.numel()
+ conv_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(conv_layer.bias)
+ conv_layer.bias.data.copy_(conv_b)
+ ptr += num_b
+ # Load conv. weights
+ num_w = conv_layer.weight.numel()
+ conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv_layer.weight)
+ conv_layer.weight.data.copy_(conv_w)
+ ptr += num_w
+
+ def save_darknet_weights(self, path, cutoff=-1):
+ """
+ @:param path - path of the new weights file
+ @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+ """
+ fp = open(path, "wb")
+ self.header_info[3] = self.seen
+ self.header_info.tofile(fp)
+
+ # Iterate through layers
+ for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+ if module_def["type"] == "convolutional":
+ conv_layer = module[0]
+ # If batch norm, load bn first
+ if module_def["batch_normalize"]:
+ bn_layer = module[1]
+ bn_layer.bias.data.cpu().numpy().tofile(fp)
+ bn_layer.weight.data.cpu().numpy().tofile(fp)
+ bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+ bn_layer.running_var.data.cpu().numpy().tofile(fp)
+ # Load conv bias
+ else:
+ conv_layer.bias.data.cpu().numpy().tofile(fp)
+ # Load conv weights
+ conv_layer.weight.data.cpu().numpy().tofile(fp)
+
+ fp.close()
+
+ def load_pretrain_to_custom_class(self, weights_pth_path):
+ state = torch.load(weights_pth_path)
+
+ own_state = self.state_dict()
+ for name, param in state.items():
+ if name not in own_state:
+ print(f'Model does not have this param: {name}!')
+ continue
+
+ if param.shape != own_state[name].shape:
+ print(f'Do not load this param: {name} cause it shape not equal! : '
+ f'{param.shape} into {own_state[name].shape}')
+ continue
+
+ own_state[name].copy_(param)
diff --git a/StreamServer/src/analytic/action/Detection/Utils.py b/StreamServer/src/analytic/action/Detection/Utils.py
new file mode 100644
index 0000000..5e9472a
--- /dev/null
+++ b/StreamServer/src/analytic/action/Detection/Utils.py
@@ -0,0 +1,415 @@
+import cv2
+import math
+import time
+import tqdm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from torch.utils.data import DataLoader
+
+
+def to_cpu(tensor):
+ return tensor.detach().cpu()
+
+
+def load_classes(path):
+ """
+ Loads class labels at 'path'
+ """
+ fp = open(path, "r")
+ names = fp.read().split("\n")[:-1]
+ return names
+
+
+def weights_init_normal(m):
+ classname = m.__class__.__name__
+ if classname.find("Conv") != -1:
+ torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
+ elif classname.find("BatchNorm2d") != -1:
+ torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+ torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def rescale_boxes(boxes, current_dim, original_shape):
+ """ Rescales bounding boxes to the original shape """
+ orig_h, orig_w = original_shape
+ # The amount of padding that was added
+ pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
+ pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
+ # Image height and width after padding is removed
+ unpad_h = current_dim - pad_y
+ unpad_w = current_dim - pad_x
+ # Rescale bounding boxes to dimension of original image
+ boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
+ boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
+ boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
+ boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+ return boxes
+
+
+def xywh2xyxy(x):
+ y = x.new(x.shape)
+ y[..., 0] = x[..., 0] - x[..., 2] / 2
+ y[..., 1] = x[..., 1] - x[..., 3] / 2
+ y[..., 2] = x[..., 0] + x[..., 2] / 2
+ y[..., 3] = x[..., 1] + x[..., 3] / 2
+ return y
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+ # Arguments
+ tp: True positives (list).
+ conf: Objectness value from 0-1 (list).
+ pred_cls: Predicted object classes (list).
+ target_cls: True object classes (list).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+ # Sort by objectness
+ i = np.argsort(-conf)
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+ # Find unique classes
+ unique_classes = np.unique(target_cls)
+
+ # Create Precision-Recall curve and compute AP for each class
+ ap, p, r = [], [], []
+ for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
+ i = pred_cls == c
+ n_gt = (target_cls == c).sum() # Number of ground truth objects
+ n_p = i.sum() # Number of predicted objects
+
+ if n_p == 0 and n_gt == 0:
+ continue
+ elif n_p == 0 or n_gt == 0:
+ ap.append(0)
+ r.append(0)
+ p.append(0)
+ else:
+ # Accumulate FPs and TPs
+ fpc = (1 - tp[i]).cumsum()
+ tpc = (tp[i]).cumsum()
+
+ # Recall
+ recall_curve = tpc / (n_gt + 1e-16)
+ r.append(recall_curve[-1])
+
+ # Precision
+ precision_curve = tpc / (tpc + fpc)
+ p.append(precision_curve[-1])
+
+ # AP from recall-precision curve
+ ap.append(compute_ap(recall_curve, precision_curve))
+
+ # Compute F1 score (harmonic mean of precision and recall)
+ p, r, ap = np.array(p), np.array(r), np.array(ap)
+ f1 = 2 * p * r / (p + r + 1e-16)
+
+ return p, r, ap, f1, unique_classes.astype("int32")
+
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves.
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+ # Arguments
+ recall: The recall curve (list).
+ precision: The precision curve (list).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+ # correct AP calculation
+ # first append sentinel values at the end
+ mrec = np.concatenate(([0.0], recall, [1.0]))
+ mpre = np.concatenate(([0.0], precision, [0.0]))
+
+ # compute the precision envelope
+ for i in range(mpre.size - 1, 0, -1):
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+ # to calculate area under PR curve, look for points
+ # where X axis (recall) changes value
+ i = np.where(mrec[1:] != mrec[:-1])[0]
+
+ # and sum (\Delta recall) * prec
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+ return ap
+
+
+def get_batch_statistics(outputs, targets, iou_threshold):
+ """ Compute true positives, predicted scores and predicted labels per sample """
+ batch_metrics = []
+ for sample_i in range(len(outputs)):
+
+ if outputs[sample_i] is None:
+ continue
+
+ output = outputs[sample_i]
+ pred_boxes = output[:, :4]
+ pred_scores = output[:, 4]
+ pred_labels = output[:, -1]
+
+ true_positives = np.zeros(pred_boxes.shape[0])
+
+ annotations = targets[targets[:, 0] == sample_i][:, 1:]
+ target_labels = annotations[:, 0] if len(annotations) else []
+ if len(annotations):
+ detected_boxes = []
+ target_boxes = annotations[:, 1:]
+
+ for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+
+ # If targets are found break
+ if len(detected_boxes) == len(annotations):
+ break
+
+ # Ignore if label is not one of the target labels
+ if pred_label not in target_labels:
+ continue
+
+ iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
+ if iou >= iou_threshold and box_index not in detected_boxes:
+ true_positives[pred_i] = 1
+ detected_boxes += [box_index]
+ batch_metrics.append([true_positives, pred_scores, pred_labels])
+ return batch_metrics
+
+
+def bbox_wh_iou(wh1, wh2):
+ wh2 = wh2.t()
+ w1, h1 = wh1[0], wh1[1]
+ w2, h2 = wh2[0], wh2[1]
+ inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+ union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+ return inter_area / union_area
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+ """
+ Returns the IoU of two bounding boxes
+ """
+ if not x1y1x2y2:
+ # Transform from center and width to exact coordinates
+ b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+ b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+ b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+ b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+ else:
+ # Get the coordinates of bounding boxes
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+
+ # get the corrdinates of the intersection rectangle
+ inter_rect_x1 = torch.max(b1_x1, b2_x1)
+ inter_rect_y1 = torch.max(b1_y1, b2_y1)
+ inter_rect_x2 = torch.min(b1_x2, b2_x2)
+ inter_rect_y2 = torch.min(b1_y2, b2_y2)
+ # Intersection area
+ inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+ inter_rect_y2 - inter_rect_y1 + 1, min=0
+ )
+ # Union Area
+ b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+ b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+ iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+ return iou
+
+
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+ """
+ Removes detections with lower object confidence score than 'conf_thres' and performs
+ Non-Maximum Suppression to further filter detections.
+ Returns detections with shape:
+ (x1, y1, x2, y2, object_conf, class_score, class_pred)
+ """
+ # From (center x, center y, width, height) to (x1, y1, x2, y2)
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4])
+ output = [None for _ in range(len(prediction))]
+ for image_i, image_pred in enumerate(prediction):
+ # Filter out confidence scores below threshold
+ image_pred = image_pred[image_pred[:, 4] >= conf_thres]
+ # If none are remaining => process next image
+ if not image_pred.size(0):
+ continue
+ # Object confidence times class confidence
+ score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
+ # Sort by it
+ image_pred = image_pred[(-score).argsort()]
+ class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
+ detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
+ # Perform non-maximum suppression
+ keep_boxes = []
+ while detections.size(0):
+ large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
+ label_match = detections[0, -1] == detections[:, -1]
+ # Indices of boxes with lower confidence scores, large IOUs and matching labels
+ invalid = large_overlap & label_match
+ weights = detections[invalid, 4:5]
+ # Merge overlapping bboxes by order of confidence
+ detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
+ keep_boxes += [detections[0]]
+ detections = detections[~invalid]
+ if keep_boxes:
+ output[image_i] = torch.stack(keep_boxes)
+
+ return output
+
+
+def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
+ ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
+ FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
+
+ nB = pred_boxes.size(0)
+ nA = pred_boxes.size(1)
+ nC = pred_cls.size(-1)
+ nG = pred_boxes.size(2)
+
+ # Output tensors
+ obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
+ noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
+ class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
+ iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
+ tx = FloatTensor(nB, nA, nG, nG).fill_(0)
+ ty = FloatTensor(nB, nA, nG, nG).fill_(0)
+ tw = FloatTensor(nB, nA, nG, nG).fill_(0)
+ th = FloatTensor(nB, nA, nG, nG).fill_(0)
+ tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
+
+ # Convert to position relative to box
+ target_boxes = target[:, 2:6] * nG
+ gxy = target_boxes[:, :2]
+ gwh = target_boxes[:, 2:]
+ # Get anchors with best iou
+ ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
+ best_ious, best_n = ious.max(0)
+ # Separate target values
+ b, target_labels = target[:, :2].long().t()
+ gx, gy = gxy.t()
+ gw, gh = gwh.t()
+ gi, gj = gxy.long().t()
+ # Set masks
+ obj_mask[b, best_n, gj, gi] = 1
+ noobj_mask[b, best_n, gj, gi] = 0
+
+ # Set noobj mask to zero where iou exceeds ignore threshold
+ for i, anchor_ious in enumerate(ious.t()):
+ noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
+
+ # Coordinates
+ tx[b, best_n, gj, gi] = gx - gx.floor()
+ ty[b, best_n, gj, gi] = gy - gy.floor()
+ # Width and height
+ tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
+ th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
+ # One-hot encoding of label
+ tcls[b, best_n, gj, gi, target_labels] = 1
+ # Compute label correctness and iou at best anchor
+ class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
+ iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
+
+ tconf = obj_mask.float()
+ return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
+
+
+def parse_model_config(path):
+ """Parses the yolo-v3 layer configuration file and returns module definitions"""
+ file = open(path, 'r')
+ lines = file.read().split('\n')
+ lines = [x for x in lines if x and not x.startswith('#')]
+ lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+ module_defs = []
+ for line in lines:
+ if line.startswith('['): # This marks the start of a new block
+ module_defs.append({})
+ module_defs[-1]['type'] = line[1:-1].rstrip()
+ if module_defs[-1]['type'] == 'convolutional':
+ module_defs[-1]['batch_normalize'] = 0
+ else:
+ key, value = line.split("=")
+ value = value.strip()
+ module_defs[-1][key.rstrip()] = value.strip()
+
+ return module_defs
+
+
+def parse_data_config(path):
+ """Parses the data configuration file"""
+ options = dict()
+ options['gpus'] = '0,1,2,3'
+ options['num_workers'] = '10'
+ with open(path, 'r') as fp:
+ lines = fp.readlines()
+ for line in lines:
+ line = line.strip()
+ if line == '' or line.startswith('#'):
+ continue
+ key, value = line.split('=')
+ options[key.strip()] = value.strip()
+ return options
+
+
+def ResizePadding(height, width):
+ desized_size = (height, width)
+
+ def resizePadding(image, **kwargs):
+ old_size = image.shape[:2]
+ max_size_idx = old_size.index(max(old_size))
+ ratio = float(desized_size[max_size_idx]) / max(old_size)
+ new_size = tuple([int(x * ratio) for x in old_size])
+
+ if new_size > desized_size:
+ min_size_idx = old_size.index(min(old_size))
+ ratio = float(desized_size[min_size_idx]) / min(old_size)
+ new_size = tuple([int(x * ratio) for x in old_size])
+
+ image = cv2.resize(image, (new_size[1], new_size[0]))
+ delta_w = desized_size[1] - new_size[1]
+ delta_h = desized_size[0] - new_size[0]
+ top, bottom = delta_h // 2, delta_h - (delta_h // 2)
+ left, right = delta_w // 2, delta_w - (delta_w // 2)
+
+ image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
+ return image
+ return resizePadding
+
+
+class AverageValueMeter(object):
+ def __init__(self):
+ self.reset()
+ self.val = 0
+
+ def add(self, value, n=1):
+ self.val = value
+ self.sum += value
+ self.var += value * value
+ self.n += n
+
+ if self.n == 0:
+ self.mean, self.std = np.nan, np.nan
+ elif self.n == 1:
+ self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy
+ self.std = np.inf
+ self.mean_old = self.mean
+ self.m_s = 0.0
+ else:
+ self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
+ self.m_s += (value - self.mean_old) * (value - self.mean)
+ self.mean_old = self.mean
+ self.std = np.sqrt(self.m_s / (self.n - 1.0))
+
+ def value(self):
+ return self.mean, self.std
+
+ def reset(self):
+ self.n = 0
+ self.sum = 0.0
+ self.var = 0.0
+ self.val = 0.0
+ self.mean = np.nan
+ self.mean_old = 0.0
+ self.m_s = 0.0
+ self.std = np.nan
diff --git a/StreamServer/src/analytic/action/DetectorLoader.py b/StreamServer/src/analytic/action/DetectorLoader.py
new file mode 100644
index 0000000..6c06bdb
--- /dev/null
+++ b/StreamServer/src/analytic/action/DetectorLoader.py
@@ -0,0 +1,117 @@
+import time
+import torch
+import numpy as np
+import torchvision.transforms as transforms
+
+from queue import Queue
+from threading import Thread
+
+from .Detection.Models import Darknet
+from .Detection.Utils import non_max_suppression, ResizePadding
+
+
+class TinyYOLOv3_onecls(object):
+ """Load trained Tiny-YOLOv3 one class (person) detection model.
+ Args:
+ input_size: (int) Size of input image must be divisible by 32. Default: 416,
+ config_file: (str) Path to Yolo model structure config file.,
+ weight_file: (str) Path to trained weights file.,
+ nms: (float) Non-Maximum Suppression overlap threshold.,
+ conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off.,
+ device: (str) Device to load the model on 'cpu' or 'cuda'.
+ """
+ def __init__(self,
+ input_size=416,
+ config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg',
+ weight_file='Models/yolo-tiny-onecls/best-model.pth',
+ nms=0.2,
+ conf_thres=0.45,
+ device='cuda'):
+ self.input_size = input_size
+ self.model = Darknet(config_file).to(device)
+ self.model.load_state_dict(torch.load(weight_file))
+ self.model.eval()
+ self.device = device
+
+ self.nms = nms
+ self.conf_thres = conf_thres
+
+ self.resize_fn = ResizePadding(input_size, input_size)
+ self.transf_fn = transforms.ToTensor()
+
+ def detect(self, image, need_resize=True, expand_bb=5):
+ """Feed forward to the model.
+ Args:
+ image: (numpy array) Single RGB image to detect.,
+ need_resize: (bool) Resize to input_size before feed and will return bboxs
+ with scale to image original size.,
+ expand_bb: (int) Expand boundary of the boxs.
+ Returns:
+ (torch.float32) Of each detected object contain a
+ [top, left, bottom, right, bbox_score, class_score, class]
+ return `None` if no detected.
+ """
+ image_size = (self.input_size, self.input_size)
+ if need_resize:
+ image_size = image.shape[:2]
+ image = self.resize_fn(image)
+
+ image = self.transf_fn(image)[None, ...]
+ scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0]
+
+ detected = self.model(image.to(self.device))
+ detected = non_max_suppression(detected, self.conf_thres, self.nms)[0]
+ if detected is not None:
+ detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2
+ detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2
+ detected[:, 0:4] /= scf
+
+ detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb)
+ detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb)
+
+ return detected
+
+
+class ThreadDetection(object):
+ def __init__(self,
+ dataloader,
+ model,
+ queue_size=256):
+ self.model = model
+
+ self.dataloader = dataloader
+ self.stopped = False
+ self.Q = Queue(maxsize=queue_size)
+
+ def start(self):
+ t = Thread(target=self.update, args=(), daemon=True).start()
+ return self
+
+ def update(self):
+ while True:
+ if self.stopped:
+ return
+
+ images = self.dataloader.getitem()
+
+ outputs = self.model.detect(images)
+
+ if self.Q.full():
+ time.sleep(2)
+ self.Q.put((images, outputs))
+
+ def getitem(self):
+ return self.Q.get()
+
+ def stop(self):
+ self.stopped = True
+
+ def __len__(self):
+ return self.Q.qsize()
+
+
+
+
+
+
+
diff --git a/StreamServer/src/analytic/action/Models/TSSTG/_.txt b/StreamServer/src/analytic/action/Models/TSSTG/_.txt
new file mode 100644
index 0000000..4ff1ded
--- /dev/null
+++ b/StreamServer/src/analytic/action/Models/TSSTG/_.txt
@@ -0,0 +1 @@
+tsstg-model.pth
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/Models/sppe/_.txt b/StreamServer/src/analytic/action/Models/sppe/_.txt
new file mode 100644
index 0000000..c9fe15c
--- /dev/null
+++ b/StreamServer/src/analytic/action/Models/sppe/_.txt
@@ -0,0 +1,2 @@
+fast_res50_256x192.pth
+fast_res101_320x256.pth
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt b/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt
new file mode 100644
index 0000000..1cf401e
--- /dev/null
+++ b/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt
@@ -0,0 +1,2 @@
+best-model.pth
+yolov3-tiny-onecls.cfg
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/PoseEstimateLoader.py b/StreamServer/src/analytic/action/PoseEstimateLoader.py
new file mode 100644
index 0000000..b2cc285
--- /dev/null
+++ b/StreamServer/src/analytic/action/PoseEstimateLoader.py
@@ -0,0 +1,40 @@
+import os
+import cv2
+import torch
+
+from .SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50
+from .SPPE.src.utils.img import crop_dets
+from .pPose_nms import pose_nms
+from .SPPE.src.utils.eval import getPrediction
+
+
+class SPPE_FastPose(object):
+ def __init__(self,
+ backbone,
+ input_height=320,
+ input_width=256,
+ device='cuda',
+ path='./SPPE/models/sppe/'):
+ assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone)
+
+ self.inp_h = input_height
+ self.inp_w = input_width
+ self.device = device
+
+ if backbone == 'resnet101':
+ self.model = InferenNet_fast(path).to(device)
+ else:
+ self.model = InferenNet_fastRes50(path).to(device)
+ self.model.eval()
+
+ def predict(self, image, bboxs, bboxs_scores):
+ inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w)
+ pose_hm = self.model(inps.to(self.device)).cpu().data
+
+ # Cut eyes and ears.
+ pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1)
+
+ xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w,
+ pose_hm.shape[-2], pose_hm.shape[-1])
+ result = pose_nms(bboxs, bboxs_scores, xy_img, scores)
+ return result
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/README.md b/StreamServer/src/analytic/action/README.md
new file mode 100644
index 0000000..66cdc06
--- /dev/null
+++ b/StreamServer/src/analytic/action/README.md
@@ -0,0 +1,51 @@
+
+
+Using Tiny-YOLO oneclass to detect each person in the frame and use
+[AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use
+[ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames
+of each person tracks.
+
+Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down.
+
+
+

+
+
+## Prerequisites
+
+- Python > 3.6
+- Pytorch > 1.3.1
+
+Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2
+
+## Data
+
+This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing
+model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset
+for more robust person detection in a variant of angle pose.
+
+For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr)
+Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action
+frames by hand for training ST-GCN model.
+
+## Pre-Trained Models
+
+- Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing),
+ [.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing)
+- SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing),
+ [resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing)
+- ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing)
+
+## Basic Use
+
+1. Download all pre-trained models into ./Models folder.
+2. Run main.py
+
+```
+ python main.py ${video file or camera source}
+```
+
+## Reference
+
+- AlphaPose : https://github.com/Amanbhandula/AlphaPose
+- ST-GCN : https://github.com/yysijie/st-gcn
diff --git a/StreamServer/src/analytic/action/SPPE/LICENSE b/StreamServer/src/analytic/action/SPPE/LICENSE
new file mode 100644
index 0000000..fedbdfd
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Jeff-sjtu
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/SPPE/README.md b/StreamServer/src/analytic/action/SPPE/README.md
new file mode 100644
index 0000000..d23c179
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/README.md
@@ -0,0 +1 @@
+# pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose
diff --git a/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py b/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py
new file mode 100644
index 0000000..f000aad
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py
@@ -0,0 +1,82 @@
+import torch
+import torch.nn as nn
+import torch.utils.data
+import torch.utils.data.distributed
+import torch.nn.functional as F
+import numpy as np
+from .utils.img import flip, shuffleLR
+from .utils.eval import getPrediction
+from .models.FastPose import FastPose
+
+import time
+import sys
+
+import torch._utils
+try:
+ torch._utils._rebuild_tensor_v2
+except AttributeError:
+ def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
+ tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
+ tensor.requires_grad = requires_grad
+ tensor._backward_hooks = backward_hooks
+ return tensor
+ torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
+
+
+class InferenNet(nn.Module):
+ def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'):
+ super().__init__()
+
+ self.pyranet = FastPose('resnet101').cuda()
+ print('Loading pose model from {}'.format(weights_file))
+ sys.stdout.flush()
+ self.pyranet.load_state_dict(torch.load(weights_file))
+ self.pyranet.eval()
+ self.pyranet = model
+
+ self.dataset = dataset
+
+ def forward(self, x):
+ out = self.pyranet(x)
+ out = out.narrow(1, 0, 17)
+
+ flip_out = self.pyranet(flip(x))
+ flip_out = flip_out.narrow(1, 0, 17)
+
+ flip_out = flip(shuffleLR(
+ flip_out, self.dataset))
+
+ out = (flip_out + out) / 2
+
+ return out
+
+
+class InferenNet_fast(nn.Module):
+ def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'):
+ super().__init__()
+
+ self.pyranet = FastPose('resnet101').cuda()
+ print('Loading pose model from {}'.format(weights_file))
+ self.pyranet.load_state_dict(torch.load(weights_file))
+ self.pyranet.eval()
+
+ def forward(self, x):
+ out = self.pyranet(x)
+ out = out.narrow(1, 0, 17)
+
+ return out
+
+
+class InferenNet_fastRes50(nn.Module):
+ def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'):
+ super().__init__()
+
+ self.pyranet = FastPose('resnet50', 17).cuda()
+ print('Loading pose model from {}'.format(weights_file))
+ self.pyranet.load_state_dict(torch.load(weights_file))
+ self.pyranet.eval()
+
+ def forward(self, x):
+ out = self.pyranet(x)
+
+ return out
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py b/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py
new file mode 100644
index 0000000..18ccccf
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py
@@ -0,0 +1,32 @@
+import torch.nn as nn
+from torch.autograd import Variable
+
+from .layers.SE_Resnet import SEResnet
+from .layers.DUC import DUC
+from ..opt import opt
+
+
+class FastPose(nn.Module):
+ DIM = 128
+
+ def __init__(self, backbone='resnet101', num_join=opt.nClasses):
+ super(FastPose, self).__init__()
+ assert backbone in ['resnet50', 'resnet101']
+
+ self.preact = SEResnet(backbone)
+
+ self.suffle1 = nn.PixelShuffle(2)
+ self.duc1 = DUC(512, 1024, upscale_factor=2)
+ self.duc2 = DUC(256, 512, upscale_factor=2)
+
+ self.conv_out = nn.Conv2d(
+ self.DIM, num_join, kernel_size=3, stride=1, padding=1)
+
+ def forward(self, x: Variable):
+ out = self.preact(x)
+ out = self.suffle1(out)
+ out = self.duc1(out)
+ out = self.duc2(out)
+
+ out = self.conv_out(out)
+ return out
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/__init__.py b/StreamServer/src/analytic/action/SPPE/src/models/__init__.py
new file mode 100644
index 0000000..b974282
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/__init__.py
@@ -0,0 +1 @@
+from . import *
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py b/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py
new file mode 100644
index 0000000..6dd64b8
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py
@@ -0,0 +1,126 @@
+import torch.nn as nn
+from .layers.PRM import Residual as ResidualPyramid
+from .layers.Residual import Residual as Residual
+from torch.autograd import Variable
+from SPPE.src.opt import opt
+from collections import defaultdict
+
+
+class Hourglass(nn.Module):
+ def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
+ super(Hourglass, self).__init__()
+
+ self.ResidualUp = ResidualPyramid if n >= 2 else Residual
+ self.ResidualDown = ResidualPyramid if n >= 3 else Residual
+
+ self.depth = n
+ self.nModules = nModules
+ self.nFeats = nFeats
+ self.net_type = net_type
+ self.B = B
+ self.C = C
+ self.inputResH = inputResH
+ self.inputResW = inputResW
+
+ self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
+ self.low1 = nn.Sequential(
+ nn.MaxPool2d(2),
+ self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
+ )
+ if n > 1:
+ self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
+ else:
+ self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
+
+ self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
+ self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
+
+ self.upperBranch = self.up1
+ self.lowerBranch = nn.Sequential(
+ self.low1,
+ self.low2,
+ self.low3,
+ self.up2
+ )
+
+ def _make_residual(self, resBlock, useConv, inputResH, inputResW):
+ layer_list = []
+ for i in range(self.nModules):
+ layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
+ stride=1, net_type=self.net_type, useConv=useConv,
+ baseWidth=self.B, cardinality=self.C))
+ return nn.Sequential(*layer_list)
+
+ def forward(self, x: Variable):
+ up1 = self.upperBranch(x)
+ up2 = self.lowerBranch(x)
+ out = up1 + up2
+ return out
+
+
+class PyraNet(nn.Module):
+ def __init__(self):
+ super(PyraNet, self).__init__()
+
+ B, C = opt.baseWidth, opt.cardinality
+ self.inputResH = opt.inputResH / 4
+ self.inputResW = opt.inputResW / 4
+ self.nStack = opt.nStack
+
+ self.cnv1 = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
+ nn.BatchNorm2d(64),
+ nn.ReLU(True)
+ )
+ self.r1 = nn.Sequential(
+ ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
+ stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
+ nn.MaxPool2d(2)
+ )
+ self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ self.preact = nn.Sequential(
+ self.cnv1,
+ self.r1,
+ self.r4,
+ self.r5
+ )
+ self.stack_layers = defaultdict(list)
+ for i in range(self.nStack):
+ hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
+ lin = nn.Sequential(
+ hg,
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True),
+ nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True)
+ )
+ tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
+ self.stack_layers['lin'].append(lin)
+ self.stack_layers['out'].append(tmpOut)
+ if i < self.nStack - 1:
+ lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
+ tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
+ self.stack_layers['lin_'].append(lin_)
+ self.stack_layers['out_'].append(tmpOut_)
+
+ def forward(self, x: Variable):
+ out = []
+ inter = self.preact(x)
+ for i in range(self.nStack):
+ lin = self.stack_layers['lin'][i](inter)
+ tmpOut = self.stack_layers['out'][i](lin)
+ out.append(tmpOut)
+ if i < self.nStack - 1:
+ lin_ = self.stack_layers['lin_'][i](lin)
+ tmpOut_ = self.stack_layers['out_'][i](tmpOut)
+ inter = inter + lin_ + tmpOut_
+ return out
+
+
+def createModel(**kw):
+ model = PyraNet()
+ return model
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py b/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py
new file mode 100644
index 0000000..24382ba
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py
@@ -0,0 +1,236 @@
+import torch.nn as nn
+from .layers.PRM import Residual as ResidualPyramid
+from .layers.Residual import Residual as Residual
+from torch.autograd import Variable
+import torch
+from SPPE.src.opt import opt
+import math
+
+
+class Hourglass(nn.Module):
+ def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
+ super(Hourglass, self).__init__()
+
+ self.ResidualUp = ResidualPyramid if n >= 2 else Residual
+ self.ResidualDown = ResidualPyramid if n >= 3 else Residual
+
+ self.depth = n
+ self.nModules = nModules
+ self.nFeats = nFeats
+ self.net_type = net_type
+ self.B = B
+ self.C = C
+ self.inputResH = inputResH
+ self.inputResW = inputResW
+
+ up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
+ low1 = nn.Sequential(
+ nn.MaxPool2d(2),
+ self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
+ )
+ if n > 1:
+ low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
+ else:
+ low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
+
+ low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
+ up2 = nn.UpsamplingNearest2d(scale_factor=2)
+
+ self.upperBranch = up1
+ self.lowerBranch = nn.Sequential(
+ low1,
+ low2,
+ low3,
+ up2
+ )
+
+ def _make_residual(self, resBlock, useConv, inputResH, inputResW):
+ layer_list = []
+ for i in range(self.nModules):
+ layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
+ stride=1, net_type=self.net_type, useConv=useConv,
+ baseWidth=self.B, cardinality=self.C))
+ return nn.Sequential(*layer_list)
+
+ def forward(self, x: Variable):
+ up1 = self.upperBranch(x)
+ up2 = self.lowerBranch(x)
+ # out = up1 + up2
+ out = torch.add(up1, up2)
+ return out
+
+
+class PyraNet(nn.Module):
+ def __init__(self):
+ super(PyraNet, self).__init__()
+
+ B, C = opt.baseWidth, opt.cardinality
+ self.inputResH = opt.inputResH / 4
+ self.inputResW = opt.inputResW / 4
+ self.nStack = opt.nStack
+
+ conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
+
+ cnv1 = nn.Sequential(
+ conv1,
+ nn.BatchNorm2d(64),
+ nn.ReLU(True)
+ )
+
+ r1 = nn.Sequential(
+ ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
+ stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
+ nn.MaxPool2d(2)
+ )
+ r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ self.preact = nn.Sequential(
+ cnv1,
+ r1,
+ r4,
+ r5
+ )
+
+ self.stack_lin = nn.ModuleList()
+ self.stack_out = nn.ModuleList()
+ self.stack_lin_ = nn.ModuleList()
+ self.stack_out_ = nn.ModuleList()
+
+ for i in range(self.nStack):
+ hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
+ conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ lin = nn.Sequential(
+ hg,
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True),
+ conv1,
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True)
+ )
+ tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(tmpOut.weight)
+ self.stack_lin.append(lin)
+ self.stack_out.append(tmpOut)
+ if i < self.nStack - 1:
+ lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
+ tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(lin_.weight)
+ nn.init.xavier_normal(tmpOut_.weight)
+ self.stack_lin_.append(lin_)
+ self.stack_out_.append(tmpOut_)
+
+ def forward(self, x: Variable):
+ out = []
+ inter = self.preact(x)
+ for i in range(self.nStack):
+ lin = self.stack_lin[i](inter)
+ tmpOut = self.stack_out[i](lin)
+ out.append(tmpOut)
+ if i < self.nStack - 1:
+ lin_ = self.stack_lin_[i](lin)
+ tmpOut_ = self.stack_out_[i](tmpOut)
+ inter = inter + lin_ + tmpOut_
+ return out
+
+
+class PyraNet_Inference(nn.Module):
+ def __init__(self):
+ super(PyraNet_Inference, self).__init__()
+
+ B, C = opt.baseWidth, opt.cardinality
+ self.inputResH = opt.inputResH / 4
+ self.inputResW = opt.inputResW / 4
+ self.nStack = opt.nStack
+
+ conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
+
+ cnv1 = nn.Sequential(
+ conv1,
+ nn.BatchNorm2d(64),
+ nn.ReLU(True)
+ )
+
+ r1 = nn.Sequential(
+ ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
+ stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
+ nn.MaxPool2d(2)
+ )
+ r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
+ stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
+ self.preact = nn.Sequential(
+ cnv1,
+ r1,
+ r4,
+ r5
+ )
+
+ self.stack_lin = nn.ModuleList()
+ self.stack_out = nn.ModuleList()
+ self.stack_lin_ = nn.ModuleList()
+ self.stack_out_ = nn.ModuleList()
+
+ for i in range(self.nStack):
+ hg = Hourglass(4, opt.nFeats, opt.nResidual,
+ self.inputResH, self.inputResW, 'preact', B, C)
+ conv1 = nn.Conv2d(opt.nFeats, opt.nFeats,
+ kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ lin = nn.Sequential(
+ hg,
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True),
+ conv1,
+ nn.BatchNorm2d(opt.nFeats),
+ nn.ReLU(True)
+ )
+ tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses,
+ kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(tmpOut.weight)
+ self.stack_lin.append(lin)
+ self.stack_out.append(tmpOut)
+ if i < self.nStack - 1:
+ lin_ = nn.Conv2d(opt.nFeats, opt.nFeats,
+ kernel_size=1, stride=1, padding=0)
+ tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats,
+ kernel_size=1, stride=1, padding=0)
+ if opt.init:
+ nn.init.xavier_normal(lin_.weight)
+ nn.init.xavier_normal(tmpOut_.weight)
+ self.stack_lin_.append(lin_)
+ self.stack_out_.append(tmpOut_)
+
+ def forward(self, x: Variable):
+ inter = self.preact(x)
+ for i in range(self.nStack):
+ lin = self.stack_lin[i](inter)
+ tmpOut = self.stack_out[i](lin)
+ out = tmpOut
+ if i < self.nStack - 1:
+ lin_ = self.stack_lin_[i](lin)
+ tmpOut_ = self.stack_out_[i](tmpOut)
+ inter = inter + lin_ + tmpOut_
+ return out
+
+
+def createModel(**kw):
+ model = PyraNet()
+ return model
+
+
+def createModel_Inference(**kw):
+ model = PyraNet_Inference()
+ return model
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py
new file mode 100644
index 0000000..f6b5ee0
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py
@@ -0,0 +1,23 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class DUC(nn.Module):
+ """
+ INPUT: inplanes, planes, upscale_factor
+ OUTPUT: (planes // 4)* ht * wd
+ """
+ def __init__(self, inplanes, planes, upscale_factor=2):
+ super(DUC, self).__init__()
+ self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False)
+ self.bn = nn.BatchNorm2d(planes)
+ self.relu = nn.ReLU()
+
+ self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ x = self.pixel_shuffle(x)
+ return x
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py
new file mode 100644
index 0000000..375bea4
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py
@@ -0,0 +1,135 @@
+import torch.nn as nn
+from .util_models import ConcatTable, CaddTable, Identity
+import math
+from opt import opt
+
+
+class Residual(nn.Module):
+ def __init__(self, numIn, numOut, inputResH, inputResW, stride=1,
+ net_type='preact', useConv=False, baseWidth=9, cardinality=4):
+ super(Residual, self).__init__()
+
+ self.con = ConcatTable([convBlock(numIn, numOut, inputResH,
+ inputResW, net_type, baseWidth, cardinality, stride),
+ skipLayer(numIn, numOut, stride, useConv)])
+ self.cadd = CaddTable(True)
+
+ def forward(self, x):
+ out = self.con(x)
+ out = self.cadd(out)
+ return out
+
+
+def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride):
+ numIn = int(numIn)
+ numOut = int(numOut)
+
+ addTable = ConcatTable()
+ s_list = []
+ if net_type != 'no_preact':
+ s_list.append(nn.BatchNorm2d(numIn))
+ s_list.append(nn.ReLU(True))
+
+ conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ s_list.append(conv1)
+
+ s_list.append(nn.BatchNorm2d(numOut // 2))
+ s_list.append(nn.ReLU(True))
+
+ conv2 = nn.Conv2d(numOut // 2, numOut // 2,
+ kernel_size=3, stride=stride, padding=1)
+ if opt.init:
+ nn.init.xavier_normal(conv2.weight)
+ s_list.append(conv2)
+
+ s = nn.Sequential(*s_list)
+ addTable.add(s)
+
+ D = math.floor(numOut // baseWidth)
+ C = cardinality
+ s_list = []
+
+ if net_type != 'no_preact':
+ s_list.append(nn.BatchNorm2d(numIn))
+ s_list.append(nn.ReLU(True))
+
+ conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C))
+
+ s_list.append(conv1)
+ s_list.append(nn.BatchNorm2d(D))
+ s_list.append(nn.ReLU(True))
+ s_list.append(pyramid(D, C, inputResH, inputResW))
+ s_list.append(nn.BatchNorm2d(D))
+ s_list.append(nn.ReLU(True))
+
+ a = nn.Conv2d(D, numOut // 2, kernel_size=1)
+ a.nBranchIn = C
+ if opt.init:
+ nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C))
+ s_list.append(a)
+
+ s = nn.Sequential(*s_list)
+ addTable.add(s)
+
+ elewiswAdd = nn.Sequential(
+ addTable,
+ CaddTable(False)
+ )
+ conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
+ if opt.init:
+ nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2))
+ model = nn.Sequential(
+ elewiswAdd,
+ nn.BatchNorm2d(numOut // 2),
+ nn.ReLU(True),
+ conv2
+ )
+ return model
+
+
+def pyramid(D, C, inputResH, inputResW):
+ pyraTable = ConcatTable()
+ sc = math.pow(2, 1 / C)
+ for i in range(C):
+ scaled = 1 / math.pow(sc, i + 1)
+ conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight)
+ s = nn.Sequential(
+ nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)),
+ conv1,
+ nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW))))
+ pyraTable.add(s)
+ pyra = nn.Sequential(
+ pyraTable,
+ CaddTable(False)
+ )
+ return pyra
+
+
+class skipLayer(nn.Module):
+ def __init__(self, numIn, numOut, stride, useConv):
+ super(skipLayer, self).__init__()
+ self.identity = False
+
+ if numIn == numOut and stride == 1 and not useConv:
+ self.identity = True
+ else:
+ conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ self.m = nn.Sequential(
+ nn.BatchNorm2d(numIn),
+ nn.ReLU(True),
+ conv1
+ )
+
+ def forward(self, x):
+ if self.identity:
+ return x
+ else:
+ return self.m(x)
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py
new file mode 100644
index 0000000..1449a41
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py
@@ -0,0 +1,54 @@
+import torch.nn as nn
+import math
+from .util_models import ConcatTable, CaddTable, Identity
+from opt import opt
+
+
+def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw):
+ con = ConcatTable([convBlock(numIn, numOut, stride, net_type),
+ skipLayer(numIn, numOut, stride, useConv)])
+ cadd = CaddTable(True)
+ return nn.Sequential(con, cadd)
+
+
+def convBlock(numIn, numOut, stride, net_type):
+ s_list = []
+ if net_type != 'no_preact':
+ s_list.append(nn.BatchNorm2d(numIn))
+ s_list.append(nn.ReLU(True))
+
+ conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ s_list.append(conv1)
+
+ s_list.append(nn.BatchNorm2d(numOut // 2))
+ s_list.append(nn.ReLU(True))
+
+ conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1)
+ if opt.init:
+ nn.init.xavier_normal(conv2.weight)
+ s_list.append(conv2)
+ s_list.append(nn.BatchNorm2d(numOut // 2))
+ s_list.append(nn.ReLU(True))
+
+ conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
+ if opt.init:
+ nn.init.xavier_normal(conv3.weight)
+ s_list.append(conv3)
+
+ return nn.Sequential(*s_list)
+
+
+def skipLayer(numIn, numOut, stride, useConv):
+ if numIn == numOut and stride == 1 and not useConv:
+ return Identity()
+ else:
+ conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
+ if opt.init:
+ nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
+ return nn.Sequential(
+ nn.BatchNorm2d(numIn),
+ nn.ReLU(True),
+ conv1
+ )
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py
new file mode 100644
index 0000000..72f07db
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py
@@ -0,0 +1,82 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(planes * 4)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = F.relu(self.bn1(self.conv1(x)), inplace=True)
+ out = F.relu(self.bn2(self.conv2(out)), inplace=True)
+ out = self.bn3(self.conv3(out))
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = F.relu(out, inplace=True)
+
+ return out
+
+
+class ResNet(nn.Module):
+ """ Resnet """
+ def __init__(self, architecture):
+ super(ResNet, self).__init__()
+ assert architecture in ["resnet50", "resnet101"]
+ self.inplanes = 64
+ self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
+ self.block = Bottleneck
+
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+ self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
+
+ self.layer1 = self.make_layer(self.block, 64, self.layers[0])
+ self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2)
+ self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2)
+
+ self.layer4 = self.make_layer(
+ self.block, 512, self.layers[3], stride=2)
+
+ def forward(self, x):
+ x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
+ x = self.layer1(x)
+ x = self.layer2(x)
+ x = self.layer3(x)
+ x = self.layer4(x)
+ return x
+
+ def stages(self):
+ return [self.layer1, self.layer2, self.layer3, self.layer4]
+
+ def make_layer(self, block, planes, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(self.inplanes, planes * block.expansion,
+ kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(planes * block.expansion),
+ )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample))
+ self.inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(block(self.inplanes, planes))
+
+ return nn.Sequential(*layers)
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py
new file mode 100644
index 0000000..caecaa3
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py
@@ -0,0 +1,99 @@
+import torch.nn as nn
+from .SE_module import SELayer
+import torch.nn.functional as F
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+ padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(planes * 4)
+ if reduction:
+ self.se = SELayer(planes * 4)
+
+ self.reduc = reduction
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = F.relu(self.bn1(self.conv1(x)), inplace=True)
+ out = F.relu(self.bn2(self.conv2(out)), inplace=True)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+ if self.reduc:
+ out = self.se(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = F.relu(out)
+
+ return out
+
+
+class SEResnet(nn.Module):
+ """ SEResnet """
+
+ def __init__(self, architecture):
+ super(SEResnet, self).__init__()
+ assert architecture in ["resnet50", "resnet101"]
+ self.inplanes = 64
+ self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
+ self.block = Bottleneck
+
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
+ stride=2, padding=3, bias=False)
+ self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layer1 = self.make_layer(self.block, 64, self.layers[0])
+ self.layer2 = self.make_layer(
+ self.block, 128, self.layers[1], stride=2)
+ self.layer3 = self.make_layer(
+ self.block, 256, self.layers[2], stride=2)
+
+ self.layer4 = self.make_layer(
+ self.block, 512, self.layers[3], stride=2)
+
+ def forward(self, x):
+ x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4
+ x = self.layer1(x) # 256 * h/4 * w/4
+ x = self.layer2(x) # 512 * h/8 * w/8
+ x = self.layer3(x) # 1024 * h/16 * w/16
+ x = self.layer4(x) # 2048 * h/32 * w/32
+ return x
+
+ def stages(self):
+ return [self.layer1, self.layer2, self.layer3, self.layer4]
+
+ def make_layer(self, block, planes, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(self.inplanes, planes * block.expansion,
+ kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(planes * block.expansion),
+ )
+
+ layers = []
+ if downsample is not None:
+ layers.append(block(self.inplanes, planes, stride, downsample, reduction=True))
+ else:
+ layers.append(block(self.inplanes, planes, stride, downsample))
+ self.inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(block(self.inplanes, planes))
+
+ return nn.Sequential(*layers)
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py
new file mode 100644
index 0000000..ab8aefe
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py
@@ -0,0 +1,19 @@
+from torch import nn
+
+
+class SELayer(nn.Module):
+ def __init__(self, channel, reduction=1):
+ super(SELayer, self).__init__()
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
+ self.fc = nn.Sequential(
+ nn.Linear(channel, channel // reduction),
+ nn.ReLU(inplace=True),
+ nn.Linear(channel // reduction, channel),
+ nn.Sigmoid()
+ )
+
+ def forward(self, x):
+ b, c, _, _ = x.size()
+ y = self.avg_pool(x).view(b, c)
+ y = self.fc(y).view(b, c, 1, 1)
+ return x * y
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py
new file mode 100644
index 0000000..b6e690f
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py
@@ -0,0 +1 @@
+from . import *
diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py
new file mode 100644
index 0000000..52d60d8
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py
@@ -0,0 +1,37 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+
+
+class ConcatTable(nn.Module):
+ def __init__(self, module_list=None):
+ super(ConcatTable, self).__init__()
+
+ self.modules_list = nn.ModuleList(module_list)
+
+ def forward(self, x: Variable):
+ y = []
+ for i in range(len(self.modules_list)):
+ y.append(self.modules_list[i](x))
+ return y
+
+ def add(self, module):
+ self.modules_list.append(module)
+
+
+class CaddTable(nn.Module):
+ def __init__(self, inplace=False):
+ super(CaddTable, self).__init__()
+ self.inplace = inplace
+
+ def forward(self, x: Variable or list):
+ return torch.stack(x, 0).sum(0)
+
+
+class Identity(nn.Module):
+ def __init__(self, params=None):
+ super(Identity, self).__init__()
+ self.params = nn.ParameterList(params)
+
+ def forward(self, x: Variable or list):
+ return x
diff --git a/StreamServer/src/analytic/action/SPPE/src/opt.py b/StreamServer/src/analytic/action/SPPE/src/opt.py
new file mode 100644
index 0000000..2a43bcc
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/opt.py
@@ -0,0 +1,115 @@
+"""import argparse
+import torch
+
+parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training')
+parser.add_argument("--return_counts", type=bool, default=True)
+parser.add_argument("--mode", default='client')
+parser.add_argument("--port", default=52162)
+
+"----------------------------- General options -----------------------------"
+parser.add_argument('--expID', default='default', type=str,
+ help='Experiment ID')
+parser.add_argument('--dataset', default='coco', type=str,
+ help='Dataset choice: mpii | coco')
+parser.add_argument('--nThreads', default=30, type=int,
+ help='Number of data loading threads')
+parser.add_argument('--debug', default=False, type=bool,
+ help='Print the debug information')
+parser.add_argument('--snapshot', default=1, type=int,
+ help='How often to take a snapshot of the model (0 = never)')
+
+"----------------------------- AlphaPose options -----------------------------"
+parser.add_argument('--addDPG', default=False, type=bool,
+ help='Train with data augmentation')
+
+"----------------------------- Model options -----------------------------"
+parser.add_argument('--netType', default='hgPRM', type=str,
+ help='Options: hgPRM | resnext')
+parser.add_argument('--loadModel', default=None, type=str,
+ help='Provide full path to a previously trained model')
+parser.add_argument('--Continue', default=False, type=bool,
+ help='Pick up where an experiment left off')
+parser.add_argument('--nFeats', default=256, type=int,
+ help='Number of features in the hourglass')
+parser.add_argument('--nClasses', default=33, type=int,
+ help='Number of output channel')
+parser.add_argument('--nStack', default=8, type=int,
+ help='Number of hourglasses to stack')
+
+"----------------------------- Hyperparameter options -----------------------------"
+parser.add_argument('--LR', default=2.5e-4, type=float,
+ help='Learning rate')
+parser.add_argument('--momentum', default=0, type=float,
+ help='Momentum')
+parser.add_argument('--weightDecay', default=0, type=float,
+ help='Weight decay')
+parser.add_argument('--crit', default='MSE', type=str,
+ help='Criterion type')
+parser.add_argument('--optMethod', default='rmsprop', type=str,
+ help='Optimization method: rmsprop | sgd | nag | adadelta')
+
+
+"----------------------------- Training options -----------------------------"
+parser.add_argument('--nEpochs', default=50, type=int,
+ help='Number of hourglasses to stack')
+parser.add_argument('--epoch', default=0, type=int,
+ help='Current epoch')
+parser.add_argument('--trainBatch', default=40, type=int,
+ help='Train-batch size')
+parser.add_argument('--validBatch', default=20, type=int,
+ help='Valid-batch size')
+parser.add_argument('--trainIters', default=0, type=int,
+ help='Total train iters')
+parser.add_argument('--valIters', default=0, type=int,
+ help='Total valid iters')
+parser.add_argument('--init', default=None, type=str,
+ help='Initialization')
+"----------------------------- Data options -----------------------------"
+parser.add_argument('--inputResH', default=384, type=int,
+ help='Input image height')
+parser.add_argument('--inputResW', default=320, type=int,
+ help='Input image width')
+parser.add_argument('--outputResH', default=96, type=int,
+ help='Output heatmap height')
+parser.add_argument('--outputResW', default=80, type=int,
+ help='Output heatmap width')
+parser.add_argument('--scale', default=0.25, type=float,
+ help='Degree of scale augmentation')
+parser.add_argument('--rotate', default=30, type=float,
+ help='Degree of rotation augmentation')
+parser.add_argument('--hmGauss', default=1, type=int,
+ help='Heatmap gaussian size')
+
+"----------------------------- PyraNet options -----------------------------"
+parser.add_argument('--baseWidth', default=9, type=int,
+ help='Heatmap gaussian size')
+parser.add_argument('--cardinality', default=5, type=int,
+ help='Heatmap gaussian size')
+parser.add_argument('--nResidual', default=1, type=int,
+ help='Number of residual modules at each location in the pyranet')
+
+"----------------------------- Distribution options -----------------------------"
+parser.add_argument('--dist', dest='dist', type=int, default=1,
+ help='distributed training or not')
+parser.add_argument('--backend', dest='backend', type=str, default='gloo',
+ help='backend for distributed training')
+parser.add_argument('--port', dest='port',
+ help='port of server')
+opt = parser.parse_args()"""
+
+"""if opt.Continue:
+ opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID))
+ opt.Continue = True
+ opt.nEpochs = 50
+ print("--- Continue ---")"""
+
+
+class opt:
+ nClasses = 33
+ inputResH = 384
+ inputResW = 320
+ outputResH = 96
+ outputResW = 80
+ scale = 0.25
+ rotate = 30
+ hmGauss = 1
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py b/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py
new file mode 100644
index 0000000..b6e690f
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py
@@ -0,0 +1 @@
+from . import *
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/__init__.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py
new file mode 100644
index 0000000..e1f2646
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py
@@ -0,0 +1,85 @@
+import os
+import h5py
+from functools import reduce
+
+import torch.utils.data as data
+from ..pose import generateSampleBox
+from opt import opt
+
+
+class Mscoco(data.Dataset):
+ def __init__(self, train=True, sigma=1,
+ scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
+ self.img_folder = '../data/coco/images' # root image folders
+ self.is_train = train # training set or test set
+ self.inputResH = opt.inputResH
+ self.inputResW = opt.inputResW
+ self.outputResH = opt.outputResH
+ self.outputResW = opt.outputResW
+ self.sigma = sigma
+ self.scale_factor = scale_factor
+ self.rot_factor = rot_factor
+ self.label_type = label_type
+
+ self.nJoints_coco = 17
+ self.nJoints_mpii = 16
+ self.nJoints = 33
+
+ self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16, 17)
+ self.flipRef = ((2, 3), (4, 5), (6, 7),
+ (8, 9), (10, 11), (12, 13),
+ (14, 15), (16, 17))
+
+ # create train/val split
+ with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
+ # train
+ self.imgname_coco_train = annot['imgname'][:-5887]
+ self.bndbox_coco_train = annot['bndbox'][:-5887]
+ self.part_coco_train = annot['part'][:-5887]
+ # val
+ self.imgname_coco_val = annot['imgname'][-5887:]
+ self.bndbox_coco_val = annot['bndbox'][-5887:]
+ self.part_coco_val = annot['part'][-5887:]
+
+ self.size_train = self.imgname_coco_train.shape[0]
+ self.size_val = self.imgname_coco_val.shape[0]
+
+ def __getitem__(self, index):
+ sf = self.scale_factor
+
+ if self.is_train:
+ part = self.part_coco_train[index]
+ bndbox = self.bndbox_coco_train[index]
+ imgname = self.imgname_coco_train[index]
+ else:
+ part = self.part_coco_val[index]
+ bndbox = self.bndbox_coco_val[index]
+ imgname = self.imgname_coco_val[index]
+
+ imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
+ img_path = os.path.join(self.img_folder, imgname)
+
+ metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
+ 'coco', sf, self, train=self.is_train)
+
+ inp, out_bigcircle, out_smallcircle, out, setMask = metaData
+
+ label = []
+ for i in range(opt.nStack):
+ if i < 2:
+ # label.append(out_bigcircle.clone())
+ label.append(out.clone())
+ elif i < 4:
+ # label.append(out_smallcircle.clone())
+ label.append(out.clone())
+ else:
+ label.append(out.clone())
+
+ return inp, label, setMask, 'coco'
+
+ def __len__(self):
+ if self.is_train:
+ return self.size_train
+ else:
+ return self.size_val
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py
new file mode 100644
index 0000000..db3e04a
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py
@@ -0,0 +1,122 @@
+import os
+import h5py
+from functools import reduce
+
+import torch.utils.data as data
+from ..pose import generateSampleBox
+from opt import opt
+
+
+class Mscoco(data.Dataset):
+ def __init__(self, train=True, sigma=1,
+ scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
+ self.img_folder = '../data/' # root image folders
+ self.is_train = train # training set or test set
+ self.inputResH = 320
+ self.inputResW = 256
+ self.outputResH = 80
+ self.outputResW = 64
+ self.sigma = sigma
+ self.scale_factor = (0.2, 0.3)
+ self.rot_factor = rot_factor
+ self.label_type = label_type
+
+ self.nJoints_coco = 17
+ self.nJoints_mpii = 16
+ self.nJoints = 33
+
+ self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO
+ 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, # MPII
+ 28, 29, 32, 33)
+
+ self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO
+ (8, 9), (10, 11), (12, 13),
+ (14, 15), (16, 17),
+ (18, 23), (19, 22), (20, 21), # MPII
+ (28, 33), (29, 32), (30, 31))
+
+ '''
+ Create train/val split
+ '''
+ # COCO
+ with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
+ # train
+ self.imgname_coco_train = annot['imgname'][:-5887]
+ self.bndbox_coco_train = annot['bndbox'][:-5887]
+ self.part_coco_train = annot['part'][:-5887]
+ # val
+ self.imgname_coco_val = annot['imgname'][-5887:]
+ self.bndbox_coco_val = annot['bndbox'][-5887:]
+ self.part_coco_val = annot['part'][-5887:]
+ # MPII
+ with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
+ # train
+ self.imgname_mpii_train = annot['imgname'][:-1358]
+ self.bndbox_mpii_train = annot['bndbox'][:-1358]
+ self.part_mpii_train = annot['part'][:-1358]
+ # val
+ self.imgname_mpii_val = annot['imgname'][-1358:]
+ self.bndbox_mpii_val = annot['bndbox'][-1358:]
+ self.part_mpii_val = annot['part'][-1358:]
+
+ self.size_coco_train = self.imgname_coco_train.shape[0]
+ self.size_coco_val = self.imgname_coco_val.shape[0]
+ self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0]
+ self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0]
+ self.train, self.valid = [], []
+
+ def __getitem__(self, index):
+ sf = self.scale_factor
+
+ if self.is_train and index < self.size_coco_train: # COCO
+ part = self.part_coco_train[index]
+ bndbox = self.bndbox_coco_train[index]
+ imgname = self.imgname_coco_train[index]
+ imgset = 'coco'
+ elif self.is_train: # MPII
+ part = self.part_mpii_train[index - self.size_coco_train]
+ bndbox = self.bndbox_mpii_train[index - self.size_coco_train]
+ imgname = self.imgname_mpii_train[index - self.size_coco_train]
+ imgset = 'mpii'
+ elif index < self.size_coco_val:
+ part = self.part_coco_val[index]
+ bndbox = self.bndbox_coco_val[index]
+ imgname = self.imgname_coco_val[index]
+ imgset = 'coco'
+ else:
+ part = self.part_mpii_val[index - self.size_coco_val]
+ bndbox = self.bndbox_mpii_val[index - self.size_coco_val]
+ imgname = self.imgname_mpii_val[index - self.size_coco_val]
+ imgset = 'mpii'
+
+ if imgset == 'coco':
+ imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
+ else:
+ imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
+
+ img_path = os.path.join(self.img_folder, imgset, 'images', imgname)
+
+ metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
+ imgset, sf, self, train=self.is_train)
+
+ inp, out_bigcircle, out_smallcircle, out, setMask = metaData
+
+ label = []
+ for i in range(opt.nStack):
+ if i < 2:
+ # label.append(out_bigcircle.clone())
+ label.append(out.clone())
+ elif i < 4:
+ # label.append(out_smallcircle.clone())
+ label.append(out.clone())
+ else:
+ label.append(out.clone())
+
+ return inp, label, setMask, imgset
+
+ def __len__(self):
+ if self.is_train:
+ return self.size_train
+ else:
+ return self.size_val
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py
new file mode 100644
index 0000000..eae0dd8
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py
@@ -0,0 +1,84 @@
+import os
+import h5py
+from functools import reduce
+
+import torch.utils.data as data
+from ..pose import generateSampleBox
+from opt import opt
+
+
+class Mpii(data.Dataset):
+ def __init__(self, train=True, sigma=1,
+ scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
+ self.img_folder = '../data/mpii/images' # root image folders
+ self.is_train = train # training set or test set
+ self.inputResH = 320
+ self.inputResW = 256
+ self.outputResH = 80
+ self.outputResW = 64
+ self.sigma = sigma
+ self.scale_factor = (0.2, 0.3)
+ self.rot_factor = rot_factor
+ self.label_type = label_type
+
+ self.nJoints_mpii = 16
+ self.nJoints = 16
+
+ self.accIdxs = (1, 2, 3, 4, 5, 6,
+ 11, 12, 15, 16)
+ self.flipRef = ((1, 6), (2, 5), (3, 4),
+ (11, 16), (12, 15), (13, 14))
+
+ # create train/val split
+ with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
+ # train
+ self.imgname_mpii_train = annot['imgname'][:-1358]
+ self.bndbox_mpii_train = annot['bndbox'][:-1358]
+ self.part_mpii_train = annot['part'][:-1358]
+ # val
+ self.imgname_mpii_val = annot['imgname'][-1358:]
+ self.bndbox_mpii_val = annot['bndbox'][-1358:]
+ self.part_mpii_val = annot['part'][-1358:]
+
+ self.size_train = self.imgname_mpii_train.shape[0]
+ self.size_val = self.imgname_mpii_val.shape[0]
+ self.train, self.valid = [], []
+
+ def __getitem__(self, index):
+ sf = self.scale_factor
+
+ if self.is_train:
+ part = self.part_mpii_train[index]
+ bndbox = self.bndbox_mpii_train[index]
+ imgname = self.imgname_mpii_train[index]
+ else:
+ part = self.part_mpii_val[index]
+ bndbox = self.bndbox_mpii_val[index]
+ imgname = self.imgname_mpii_val[index]
+
+ imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
+ img_path = os.path.join(self.img_folder, imgname)
+
+ metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
+ 'mpii', sf, self, train=self.is_train)
+
+ inp, out_bigcircle, out_smallcircle, out, setMask = metaData
+
+ label = []
+ for i in range(opt.nStack):
+ if i < 2:
+ #label.append(out_bigcircle.clone())
+ label.append(out.clone())
+ elif i < 4:
+ #label.append(out_smallcircle.clone())
+ label.append(out.clone())
+ else:
+ label.append(out.clone())
+
+ return inp, label, setMask
+
+ def __len__(self):
+ if self.is_train:
+ return self.size_train
+ else:
+ return self.size_val
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/eval.py b/StreamServer/src/analytic/action/SPPE/src/utils/eval.py
new file mode 100644
index 0000000..b1659b4
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/eval.py
@@ -0,0 +1,216 @@
+from ..opt import opt
+try:
+ from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
+except ImportError:
+ from .img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
+import torch
+
+
+class DataLogger(object):
+ def __init__(self):
+ self.clear()
+
+ def clear(self):
+ self.value = 0
+ self.sum = 0
+ self.cnt = 0
+ self.avg = 0
+
+ def update(self, value, n=1):
+ self.value = value
+ self.sum += value * n
+ self.cnt += n
+ self._cal_avg()
+
+ def _cal_avg(self):
+ self.avg = self.sum / self.cnt
+
+
+def accuracy(output, label, dataset):
+ if type(output) == list:
+ return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset)
+ else:
+ return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs)
+
+
+def heatmapAccuracy(output, label, idxs):
+ preds = getPreds(output)
+ gt = getPreds(label)
+
+ norm = torch.ones(preds.size(0)) * opt.outputResH / 10
+ dists = calc_dists(preds, gt, norm)
+ #print(dists)
+ acc = torch.zeros(len(idxs) + 1)
+ avg_acc = 0
+ cnt = 0
+ for i in range(len(idxs)):
+ acc[i + 1] = dist_acc(dists[idxs[i] - 1])
+ if acc[i + 1] >= 0:
+ avg_acc = avg_acc + acc[i + 1]
+ cnt += 1
+ if cnt != 0:
+ acc[0] = avg_acc / cnt
+ return acc
+
+
+def getPreds(hm):
+ """ get predictions from score maps in torch Tensor
+ return type: torch.LongTensor
+ """
+ assert hm.dim() == 4, 'Score maps should be 4-dim'
+ maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2)
+
+ maxval = maxval.view(hm.size(0), hm.size(1), 1)
+ idx = idx.view(hm.size(0), hm.size(1), 1) + 1
+
+ preds = idx.repeat(1, 1, 2).float()
+
+ preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3)
+ preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3))
+
+ # pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
+ # preds *= pred_mask
+ return preds
+
+
+def calc_dists(preds, target, normalize):
+ preds = preds.float().clone()
+ target = target.float().clone()
+ dists = torch.zeros(preds.size(1), preds.size(0))
+ for n in range(preds.size(0)):
+ for c in range(preds.size(1)):
+ if target[n, c, 0] > 0 and target[n, c, 1] > 0:
+ dists[c, n] = torch.dist(
+ preds[n, c, :], target[n, c, :]) / normalize[n]
+ else:
+ dists[c, n] = -1
+ return dists
+
+
+def dist_acc(dists, thr=0.5):
+ """ Return percentage below threshold while ignoring values with a -1 """
+ if dists.ne(-1).sum() > 0:
+ return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum()
+ else:
+ return - 1
+
+
+def postprocess(output):
+ p = getPreds(output)
+
+ for i in range(p.size(0)):
+ for j in range(p.size(1)):
+ hm = output[i][j]
+ pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1]))
+ if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
+ diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
+ p[i][j] += diff.sign() * 0.25
+ p -= 0.5
+
+ return p
+
+
+def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
+ """
+ Get keypoint location from heatmaps
+ """
+ assert hms.dim() == 4, 'Score maps should be 4-dim'
+ maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2)
+
+ maxval = maxval.view(hms.size(0), hms.size(1), 1)
+ idx = idx.view(hms.size(0), hms.size(1), 1) + 1
+
+ preds = idx.repeat(1, 1, 2).float()
+
+ preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
+ preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
+
+ pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
+ preds *= pred_mask
+
+ # Very simple post-processing step to improve performance at tight PCK thresholds
+ """for i in range(preds.size(0)):
+ for j in range(preds.size(1)):
+ hm = hms[i][j]
+ pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1])))
+ if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
+ diff = torch.Tensor(
+ (hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
+ preds[i][j] += diff.sign() * 0.25
+ preds += 0.2"""
+
+ preds_tf = torch.zeros(preds.size())
+ preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
+ return preds, preds_tf, maxval
+
+
+def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
+
+ assert hms.dim() == 4, 'Score maps should be 4-dim'
+
+ preds_img = {}
+ hms = hms.numpy()
+ for n in range(hms.shape[0]): # Number of samples
+ preds_img[n] = {} # Result of sample: n
+ for k in range(hms.shape[1]): # Number of keypoints
+ preds_img[n][k] = [] # Result of keypoint: k
+ hm = hms[n][k]
+
+ candidate_points = findPeak(hm)
+
+ res_pt = processPeaks(candidate_points, hm,
+ pt1[n], pt2[n], inpH, inpW, resH, resW)
+
+ preds_img[n][k] = res_pt
+
+ return preds_img
+
+
+def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW):
+ """
+ Get keypoint location from heatmaps
+ pt1, pt2: [n, 2]
+ OUTPUT:
+ preds: [n, 17, 2]
+ """
+
+ assert hms.dim() == 4, 'Score maps should be 4-dim'
+ flat_hms = hms.view(hms.size(0), hms.size(1), -1)
+ maxval, idx = torch.max(flat_hms, 2)
+
+ maxval = maxval.view(hms.size(0), hms.size(1), 1)
+ idx = idx.view(hms.size(0), hms.size(1), 1) + 1
+
+ preds = idx.repeat(1, 1, 2).float()
+
+ preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
+ preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
+
+ pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
+ preds *= pred_mask
+
+ # Very simple post-processing step to improve performance at tight PCK thresholds
+ idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1)
+ idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1)
+ idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1)
+ idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1)
+
+ maxval_up = flat_hms.gather(2, idx_up)
+ maxval_down = flat_hms.gather(2, idx_down)
+ maxval_left = flat_hms.gather(2, idx_left)
+ maxval_right = flat_hms.gather(2, idx_right)
+
+ diff1 = (maxval_right - maxval_left).sign() * 0.25
+ diff2 = (maxval_down - maxval_up).sign() * 0.25
+ diff1[idx_up <= hms.size(3)] = 0
+ diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0
+ diff2[(idx_left % hms.size(3)) == 0] = 0
+ diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0
+
+ preds[:, :, 0] += diff1.squeeze(-1)
+ preds[:, :, 1] += diff2.squeeze(-1)
+
+ preds_tf = torch.zeros(preds.size())
+ preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
+
+ return preds, preds_tf, maxval
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/img.py b/StreamServer/src/analytic/action/SPPE/src/utils/img.py
new file mode 100644
index 0000000..24df2ee
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/img.py
@@ -0,0 +1,534 @@
+import numpy as np
+import cv2
+import torch
+import scipy.misc
+from torchvision import transforms
+import torch.nn.functional as F
+from scipy.ndimage import maximum_filter
+
+from PIL import Image
+from copy import deepcopy
+import matplotlib
+#matplotlib.use('agg')
+import matplotlib.pyplot as plt
+
+
+def im_to_torch(img):
+ img = np.array(img)
+ img = np.transpose(img, (2, 0, 1)) # C*H*W
+ img = to_torch(img).float()
+ if img.max() > 1:
+ img /= 255
+ return img
+
+
+def torch_to_im(img):
+ img = to_numpy(img)
+ img = np.transpose(img, (1, 2, 0)) # C*H*W
+ return img
+
+
+def load_image(img_path):
+ # H x W x C => C x H x W
+ return im_to_torch(scipy.misc.imread(img_path, mode='RGB'))
+
+
+def to_numpy(tensor):
+ if torch.is_tensor(tensor):
+ return tensor.cpu().numpy()
+ elif type(tensor).__module__ != 'numpy':
+ raise ValueError("Cannot convert {} to numpy array"
+ .format(type(tensor)))
+ return tensor
+
+
+def to_torch(ndarray):
+ if type(ndarray).__module__ == 'numpy':
+ return torch.from_numpy(ndarray)
+ elif not torch.is_tensor(ndarray):
+ raise ValueError("Cannot convert {} to torch tensor"
+ .format(type(ndarray)))
+ return ndarray
+
+
+def drawCircle(img, pt, sigma):
+ img = to_numpy(img)
+ tmpSize = 3 * sigma
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
+ br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
+
+ if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
+ br[0] < 0 or br[1] < 0):
+ # If not, just return the image as is
+ return to_torch(img)
+
+ # Generate gaussian
+ size = 2 * tmpSize + 1
+ x = np.arange(0, size, 1, float)
+ y = x[:, np.newaxis]
+ x0 = y0 = size // 2
+ sigma = size / 4.0
+ # The gaussian is not normalized, we want the center value to equal 1
+ g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+ g[g > 0] = 1
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], img.shape[1])
+ img_y = max(0, ul[1]), min(br[1], img.shape[0])
+
+ img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+ return to_torch(img)
+
+
+def drawGaussian(img, pt, sigma):
+ img = to_numpy(img)
+ tmpSize = 3 * sigma
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
+ br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
+
+ if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
+ br[0] < 0 or br[1] < 0):
+ # If not, just return the image as is
+ return to_torch(img)
+
+ # Generate gaussian
+ size = 2 * tmpSize + 1
+ x = np.arange(0, size, 1, float)
+ y = x[:, np.newaxis]
+ x0 = y0 = size // 2
+ sigma = size / 4.0
+ # The gaussian is not normalized, we want the center value to equal 1
+ g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], img.shape[1])
+ img_y = max(0, ul[1]), min(br[1], img.shape[0])
+
+ img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+ return to_torch(img)
+
+
+def drawBigCircle(img, pt, sigma):
+ img = to_numpy(img)
+ tmpSize = 3 * sigma
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
+ br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
+
+ if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
+ br[0] < 0 or br[1] < 0):
+ # If not, just return the image as is
+ return to_torch(img)
+
+ # Generate gaussian
+ size = 2 * tmpSize + 1
+ x = np.arange(0, size, 1, float)
+ y = x[:, np.newaxis]
+ x0 = y0 = size // 2
+ sigma = size / 4.0
+ # The gaussian is not normalized, we want the center value to equal 1
+ g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+ g[g > 0.4] = 1
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], img.shape[1])
+ img_y = max(0, ul[1]), min(br[1], img.shape[0])
+
+ img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+ return to_torch(img)
+
+
+def drawSmallCircle(img, pt, sigma):
+ img = to_numpy(img)
+ tmpSize = 3 * sigma
+ # Check that any part of the gaussian is in-bounds
+ ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)]
+ br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)]
+
+ if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or
+ br[0] < 0 or br[1] < 0):
+ # If not, just return the image as is
+ return to_torch(img)
+
+ # Generate gaussian
+ size = 2 * tmpSize + 1
+ x = np.arange(0, size, 1, float)
+ y = x[:, np.newaxis]
+ x0 = y0 = size // 2
+ sigma = size / 4.0
+ # The gaussian is not normalized, we want the center value to equal 1
+ g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+ g[g > 0.5] = 1
+ # Usable gaussian range
+ g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
+ g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
+ # Image range
+ img_x = max(0, ul[0]), min(br[0], img.shape[1])
+ img_y = max(0, ul[1]), min(br[1], img.shape[0])
+
+ img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+ return to_torch(img)
+
+
+def transformBox(pt, ul, br, inpH, inpW, resH, resW):
+ center = torch.zeros(2)
+ center[0] = (br[0] - 1 - ul[0]) / 2
+ center[1] = (br[1] - 1 - ul[1]) / 2
+
+ lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
+ lenW = lenH * inpW / inpH
+
+ _pt = torch.zeros(2)
+ _pt[0] = pt[0] - ul[0]
+ _pt[1] = pt[1] - ul[1]
+ # Move to center
+ _pt[0] = _pt[0] + max(0, (lenW - 1) / 2 - center[0])
+ _pt[1] = _pt[1] + max(0, (lenH - 1) / 2 - center[1])
+ pt = (_pt * resH) / lenH
+ pt[0] = round(float(pt[0]))
+ pt[1] = round(float(pt[1]))
+ return pt.int()
+
+
+def transformBoxInvert(pt, ul, br, inpH, inpW, resH, resW):
+ center = np.zeros(2)
+ center[0] = (br[0] - 1 - ul[0]) / 2
+ center[1] = (br[1] - 1 - ul[1]) / 2
+
+ lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW)
+ lenW = lenH * inpW / inpH
+
+ _pt = (pt * lenH) / resH
+ _pt[0] = _pt[0] - max(0, (lenW - 1) / 2 - center[0])
+ _pt[1] = _pt[1] - max(0, (lenH - 1) / 2 - center[1])
+
+ new_point = np.zeros(2)
+ new_point[0] = _pt[0] + ul[0]
+ new_point[1] = _pt[1] + ul[1]
+ return new_point
+
+
+def transformBoxInvert_batch(pt, ul, br, inpH, inpW, resH, resW):
+ """
+ pt: [n, 17, 2]
+ ul: [n, 2]
+ br: [n, 2]
+ """
+ num_pt = pt.shape[1]
+ center = (br - 1 - ul) / 2
+
+ size = br - ul
+ size[:, 0] *= (inpH / inpW)
+
+ lenH, _ = torch.max(size, dim=1) # [n,]
+ lenW = lenH * (inpW / inpH)
+
+ _pt = (pt * lenH[:, np.newaxis, np.newaxis]) / resH
+ _pt[:, :, 0] = _pt[:, :, 0] - ((lenW[:, np.newaxis].repeat(1, num_pt) - 1) /
+ 2 - center[:, 0].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
+ _pt[:, :, 1] = _pt[:, :, 1] - ((lenH[:, np.newaxis].repeat(1, num_pt) - 1) /
+ 2 - center[:, 1].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0)
+
+ new_point = torch.zeros(pt.size())
+ new_point[:, :, 0] = _pt[:, :, 0] + ul[:, 0].unsqueeze(-1).repeat(1, num_pt)
+ new_point[:, :, 1] = _pt[:, :, 1] + ul[:, 1].unsqueeze(-1).repeat(1, num_pt)
+ return new_point
+
+
+def cropBox(img, ul, br, resH, resW):
+ ul = ul.int()
+ br = (br - 1).int()
+ # br = br.int()
+ lenH = max((br[1] - ul[1]).item(), (br[0] - ul[0]).item() * resH / resW)
+ lenW = lenH * resW / resH
+ if img.dim() == 2:
+ img = img[np.newaxis, :]
+
+ box_shape = [(br[1] - ul[1]).item(), (br[0] - ul[0]).item()]
+ pad_size = [(lenH - box_shape[0]) // 2, (lenW - box_shape[1]) // 2]
+ # Padding Zeros
+ if ul[1] > 0:
+ img[:, :ul[1], :] = 0
+ if ul[0] > 0:
+ img[:, :, :ul[0]] = 0
+ if br[1] < img.shape[1] - 1:
+ img[:, br[1] + 1:, :] = 0
+ if br[0] < img.shape[2] - 1:
+ img[:, :, br[0] + 1:] = 0
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ dst = np.zeros((3, 2), dtype=np.float32)
+
+ src[0, :] = np.array(
+ [ul[0] - pad_size[1], ul[1] - pad_size[0]], np.float32)
+ src[1, :] = np.array(
+ [br[0] + pad_size[1], br[1] + pad_size[0]], np.float32)
+ dst[0, :] = 0
+ dst[1, :] = np.array([resW - 1, resH - 1], np.float32)
+
+ src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+ dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ dst_img = cv2.warpAffine(torch_to_im(img), trans,
+ (resW, resH), flags=cv2.INTER_LINEAR)
+
+ return im_to_torch(torch.Tensor(dst_img))
+
+
+def cv_rotate(img, rot, resW, resH):
+ center = np.array((resW - 1, resH - 1)) / 2
+ rot_rad = np.pi * rot / 180
+
+ src_dir = get_dir([0, (resH - 1) * -0.5], rot_rad)
+ dst_dir = np.array([0, (resH - 1) * -0.5], np.float32)
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ dst = np.zeros((3, 2), dtype=np.float32)
+
+ src[0, :] = center
+ src[1, :] = center + src_dir
+ dst[0, :] = [(resW - 1) * 0.5, (resH - 1) * 0.5]
+ dst[1, :] = np.array([(resW - 1) * 0.5, (resH - 1) * 0.5]) + dst_dir
+
+ src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+ dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ dst_img = cv2.warpAffine(torch_to_im(img), trans,
+ (resW, resH), flags=cv2.INTER_LINEAR)
+
+ return im_to_torch(torch.Tensor(dst_img))
+
+
+def flip(x):
+ assert (x.dim() == 3 or x.dim() == 4)
+ dim = x.dim() - 1
+ if '0.4.1' in torch.__version__ or '1.0' in torch.__version__:
+ return x.flip(dims=(dim,))
+ else:
+ is_cuda = False
+ if x.is_cuda:
+ is_cuda = True
+ x = x.cpu()
+ x = x.numpy().copy()
+ if x.ndim == 3:
+ x = np.transpose(np.fliplr(np.transpose(x, (0, 2, 1))), (0, 2, 1))
+ elif x.ndim == 4:
+ for i in range(x.shape[0]):
+ x[i] = np.transpose(
+ np.fliplr(np.transpose(x[i], (0, 2, 1))), (0, 2, 1))
+ # x = x.swapaxes(dim, 0)
+ # x = x[::-1, ...]
+ # x = x.swapaxes(0, dim)
+
+ x = torch.from_numpy(x.copy())
+ if is_cuda:
+ x = x.cuda()
+ return x
+
+
+def shuffleLR(x, dataset):
+ flipRef = dataset.flipRef
+ assert (x.dim() == 3 or x.dim() == 4)
+ for pair in flipRef:
+ dim0, dim1 = pair
+ dim0 -= 1
+ dim1 -= 1
+ if x.dim() == 4:
+ tmp = x[:, dim1].clone()
+ x[:, dim1] = x[:, dim0].clone()
+ x[:, dim0] = tmp.clone()
+ #x[:, dim0], x[:, dim1] = deepcopy((x[:, dim1], x[:, dim0]))
+ else:
+ tmp = x[dim1].clone()
+ x[dim1] = x[dim0].clone()
+ x[dim0] = tmp.clone()
+ #x[dim0], x[dim1] = deepcopy((x[dim1], x[dim0]))
+ return x
+
+
+def drawMPII(inps, preds):
+ assert inps.dim() == 4
+ p_color = ['g', 'b', 'purple', 'b', 'purple',
+ 'y', 'o', 'y', 'o', 'y', 'o',
+ 'pink', 'r', 'pink', 'r', 'pink', 'r']
+ p_color = ['r', 'r', 'r', 'b', 'b', 'b',
+ 'black', 'black', 'black', 'black',
+ 'y', 'y', 'white', 'white', 'g', 'g']
+
+ nImg = inps.size(0)
+ imgs = []
+ for n in range(nImg):
+ img = to_numpy(inps[n])
+ img = np.transpose(img, (1, 2, 0))
+ imgs.append(img)
+
+ fig = plt.figure()
+ plt.imshow(imgs[0])
+ ax = fig.add_subplot(1, 1, 1)
+ #print(preds.shape)
+ for p in range(16):
+ x, y = preds[0][p]
+ cor = (round(x), round(y)), 10
+ ax.add_patch(plt.Circle(*cor, color=p_color[p]))
+ plt.axis('off')
+
+ plt.show()
+
+ return imgs
+
+
+def drawCOCO(inps, preds, scores):
+ assert inps.dim() == 4
+ p_color = ['g', 'b', 'purple', 'b', 'purple',
+ 'y', 'orange', 'y', 'orange', 'y', 'orange',
+ 'pink', 'r', 'pink', 'r', 'pink', 'r']
+
+ nImg = inps.size(0)
+ imgs = []
+ for n in range(nImg):
+ img = to_numpy(inps[n])
+ img = np.transpose(img, (1, 2, 0))
+ imgs.append(img)
+
+ fig = plt.figure()
+ plt.imshow(imgs[0])
+ ax = fig.add_subplot(1, 1, 1)
+ #print(preds.shape)
+ for p in range(17):
+ if scores[0][p][0] < 0.2:
+ continue
+ x, y = preds[0][p]
+ cor = (round(x), round(y)), 3
+ ax.add_patch(plt.Circle(*cor, color=p_color[p]))
+ plt.axis('off')
+
+ plt.show()
+
+ return imgs
+
+
+def get_3rd_point(a, b):
+ direct = a - b
+ return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+
+
+def get_dir(src_point, rot_rad):
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+
+ src_result = [0, 0]
+ src_result[0] = src_point[0] * cs - src_point[1] * sn
+ src_result[1] = src_point[0] * sn + src_point[1] * cs
+
+ return src_result
+
+
+def findPeak(hm):
+ mx = maximum_filter(hm, size=5)
+ idx = zip(*np.where((mx == hm) * (hm > 0.1)))
+ candidate_points = []
+ for (y, x) in idx:
+ candidate_points.append([x, y, hm[y][x]])
+ if len(candidate_points) == 0:
+ return torch.zeros(0)
+ candidate_points = np.array(candidate_points)
+ candidate_points = candidate_points[np.lexsort(-candidate_points.T)]
+ return torch.Tensor(candidate_points)
+
+
+def processPeaks(candidate_points, hm, pt1, pt2, inpH, inpW, resH, resW):
+ # type: (Tensor, Tensor, Tensor, Tensor, float, float, float, float) -> List[Tensor]
+
+ if candidate_points.shape[0] == 0: # Low Response
+ maxval = np.max(hm.reshape(1, -1), 1)
+ idx = np.argmax(hm.reshape(1, -1), 1)
+
+ x = idx % resW
+ y = int(idx / resW)
+
+ candidate_points = np.zeros((1, 3))
+ candidate_points[0, 0:1] = x
+ candidate_points[0, 1:2] = y
+ candidate_points[0, 2:3] = maxval
+
+ res_pts = []
+ for i in range(candidate_points.shape[0]):
+ x, y, maxval = candidate_points[i][0], candidate_points[i][1], candidate_points[i][2]
+
+ if bool(maxval < 0.05) and len(res_pts) > 0:
+ pass
+ else:
+ if bool(x > 0) and bool(x < resW - 2):
+ if bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] > 0):
+ x += 0.25
+ elif bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] < 0):
+ x -= 0.25
+ if bool(y > 0) and bool(y < resH - 2):
+ if bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] > 0):
+ y += (0.25 * inpH / inpW)
+ elif bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] < 0):
+ y -= (0.25 * inpH / inpW)
+
+ #pt = torch.zeros(2)
+ pt = np.zeros(2)
+ pt[0] = x + 0.2
+ pt[1] = y + 0.2
+
+ pt = transformBoxInvert(pt, pt1, pt2, inpH, inpW, resH, resW)
+
+ res_pt = np.zeros(3)
+ res_pt[:2] = pt
+ res_pt[2] = maxval
+
+ res_pts.append(res_pt)
+
+ if maxval < 0.05:
+ break
+ return res_pts
+
+
+def crop_dets(img, boxes, height, width):
+ img = im_to_torch(img)
+ img_h = img.size(1)
+ img_w = img.size(2)
+ img[0].add_(-0.406)
+ img[1].add_(-0.457)
+ img[2].add_(-0.480)
+
+ inps = torch.zeros(len(boxes), 3, height, width)
+ pt1 = torch.zeros(len(boxes), 2)
+ pt2 = torch.zeros(len(boxes), 2)
+ for i, box in enumerate(boxes):
+ upLeft = torch.Tensor((float(box[0]), float(box[1])))
+ bottomRight = torch.Tensor((float(box[2]), float(box[3])))
+
+ h = bottomRight[1] - upLeft[1]
+ w = bottomRight[0] - upLeft[0]
+ if w > 100:
+ scaleRate = 0.2
+ else:
+ scaleRate = 0.3
+
+ upLeft[0] = max(0, upLeft[0] - w * scaleRate / 2)
+ upLeft[1] = max(0, upLeft[1] - h * scaleRate / 2)
+ bottomRight[0] = max(min(img_w - 1, bottomRight[0] + w * scaleRate / 2), upLeft[0] + 5)
+ bottomRight[1] = max(min(img_h - 1, bottomRight[1] + h * scaleRate / 2), upLeft[1] + 5)
+
+ inps[i] = cropBox(img.clone(), upLeft, bottomRight, height, width)
+ pt1[i] = upLeft
+ pt2[i] = bottomRight
+
+ return inps, pt1, pt2
+
diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/pose.py b/StreamServer/src/analytic/action/SPPE/src/utils/pose.py
new file mode 100644
index 0000000..60836f0
--- /dev/null
+++ b/StreamServer/src/analytic/action/SPPE/src/utils/pose.py
@@ -0,0 +1,169 @@
+from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate,
+ cropBox, transformBox, flip, shuffleLR, drawCOCO)
+from utils import getPrediction
+import torch
+import numpy as np
+import random
+from SPPE.src.opt import opt
+
+
+def rnd(x):
+ return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x))
+
+
+def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True):
+
+ nJoints_coco = 17
+ nJoints_mpii = 16
+ img = load_image(img_path)
+ if train:
+ img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
+ img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
+ img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
+
+ ori_img = img.clone()
+ img[0].add_(-0.406)
+ img[1].add_(-0.457)
+ img[2].add_(-0.480)
+
+ upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1])))
+ bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3])))
+ ht = bottomRight[1] - upLeft[1]
+ width = bottomRight[0] - upLeft[0]
+ imght = img.shape[1]
+ imgwidth = img.shape[2]
+ scaleRate = random.uniform(*scale_factor)
+
+ upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
+ upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
+ bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2)
+ bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2)
+
+ # Doing Random Sample
+ if opt.addDPG:
+ PatchScale = random.uniform(0, 1)
+ if PatchScale > 0.85:
+ ratio = ht / width
+ if width < ht:
+ patchWidth = PatchScale * width
+ patchHt = patchWidth * ratio
+ else:
+ patchHt = PatchScale * ht
+ patchWidth = patchHt / ratio
+
+ xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth)
+ ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt)
+
+ xmax = xmin + patchWidth + 1
+ ymax = ymin + patchHt + 1
+ else:
+ xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3))
+ ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3))
+ xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3)
+ ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3)
+
+ upLeft[0] = xmin
+ upLeft[1] = ymin
+ bottomRight[0] = xmax
+ bottomRight[1] = ymax
+
+ # Counting Joints number
+ jointNum = 0
+ if imgset == 'coco':
+ for i in range(17):
+ if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
+ and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
+ jointNum += 1
+ else:
+ for i in range(16):
+ if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
+ and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
+ jointNum += 1
+
+ # Doing Random Crop
+ if opt.addDPG:
+ if jointNum > 13 and train:
+ switch = random.uniform(0, 1)
+ if switch > 0.96:
+ bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
+ bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
+ elif switch > 0.92:
+ upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
+ bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
+ elif switch > 0.88:
+ upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
+ bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
+ elif switch > 0.84:
+ upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
+ upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
+ elif switch > 0.80:
+ bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
+ elif switch > 0.76:
+ upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
+ elif switch > 0.72:
+ bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
+ elif switch > 0.68:
+ upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
+
+ ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
+ inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
+ if jointNum == 0:
+ inp = torch.zeros(3, opt.inputResH, opt.inputResW)
+
+ out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
+ out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
+ out = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
+ setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
+
+ # Draw Label
+ if imgset == 'coco':
+ for i in range(nJoints_coco):
+ if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
+ and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
+ out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
+ out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ setMask[i].add_(1)
+ elif imgset == 'mpii':
+ for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
+ if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
+ and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
+ out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
+ out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ setMask[i].add_(1)
+ else:
+ for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
+ if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
+ and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
+ out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
+ out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
+ if i != 6 + nJoints_coco and i != 7 + nJoints_coco:
+ setMask[i].add_(1)
+
+ if opt.debug:
+ preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH,
+ opt.inputResW, opt.outputResH, opt.outputResW)
+ tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH)
+ drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores)
+
+ if train:
+ # Flip
+ if random.uniform(0, 1) < 0.5:
+ inp = flip(inp)
+ ori_inp = flip(ori_inp)
+ out_bigcircle = shuffleLR(flip(out_bigcircle), dataset)
+ out_smallcircle = shuffleLR(flip(out_smallcircle), dataset)
+ out = shuffleLR(flip(out), dataset)
+ # Rotate
+ r = rnd(opt.rotate)
+ if random.uniform(0, 1) < 0.6:
+ r = 0
+ if r != 0:
+ inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH)
+ out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH)
+ out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH)
+ out = cv_rotate(out, r, opt.outputResW, opt.outputResH)
+
+ return inp, out_bigcircle, out_smallcircle, out, setMask
diff --git a/StreamServer/src/analytic/action/Track/Tracker.py b/StreamServer/src/analytic/action/Track/Tracker.py
new file mode 100644
index 0000000..324deeb
--- /dev/null
+++ b/StreamServer/src/analytic/action/Track/Tracker.py
@@ -0,0 +1,192 @@
+import time
+import numpy as np
+from collections import deque
+
+from .linear_assignment import min_cost_matching, matching_cascade
+from .kalman_filter import KalmanFilter
+from .iou_matching import iou_cost
+
+
+class TrackState:
+ """Enumeration type for the single target track state. Newly created tracks are
+ classified as `tentative` until enough evidence has been collected. Then,
+ the track state is changed to `confirmed`. Tracks that are no longer alive
+ are classified as `deleted` to mark them for removal from the set of active
+ tracks.
+ """
+ Tentative = 1
+ Confirmed = 2
+ Deleted = 3
+
+
+class Detection(object):
+ """This class represents a bounding box, keypoints, score of person detected
+ in a single image.
+
+ Args:
+ tlbr: (float array) Of shape [top, left, bottom, right].,
+ keypoints: (float array) Of shape [node, pts].,
+ confidence: (float) Confidence score of detection.
+ """
+ def __init__(self, tlbr, keypoints, confidence):
+ self.tlbr = tlbr
+ self.keypoints = keypoints
+ self.confidence = confidence
+
+ def to_tlwh(self):
+ """Get (top, left, width, height).
+ """
+ ret = self.tlbr.copy()
+ ret[2:] = ret[2:] - ret[:2]
+ return ret
+
+ def to_xyah(self):
+ """Get (x_center, y_center, aspect ratio, height).
+ """
+ ret = self.to_tlwh()
+ ret[:2] += ret[2:] / 2
+ ret[2] /= ret[3]
+ return ret
+
+
+class Track:
+ def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30):
+ self.mean = mean
+ self.covariance = covariance
+ self.track_id = track_id
+ self.hist = 1
+ self.age = 1
+ self.time_since_update = 0
+ self.n_init = n_init
+ self.max_age = max_age
+
+ # keypoints list for use in Actions prediction.
+ self.keypoints_list = deque(maxlen=buffer)
+
+ self.state = TrackState.Tentative
+
+ def to_tlwh(self):
+ ret = self.mean[:4].copy()
+ ret[2] *= ret[3]
+ ret[:2] -= ret[2:] / 2
+ return ret
+
+ def to_tlbr(self):
+ ret = self.to_tlwh()
+ ret[2:] = ret[:2] + ret[2:]
+ return ret
+
+ def get_center(self):
+ return self.mean[:2].copy()
+
+ def predict(self, kf):
+ """Propagate the state distribution to the current time step using a
+ Kalman filter prediction step.
+ """
+ self.mean, self.covariance = kf.predict(self.mean, self.covariance)
+ self.age += 1
+ self.time_since_update += 1
+
+ def update(self, kf, detection):
+ """Perform Kalman filter measurement update step.
+ """
+ self.mean, self.covariance = kf.update(self.mean, self.covariance,
+ detection.to_xyah())
+ self.keypoints_list.append(detection.keypoints)
+
+ self.hist += 1
+ self.time_since_update = 0
+ if self.state == TrackState.Tentative and self.hist >= self.n_init:
+ self.state = TrackState.Confirmed
+
+ def mark_missed(self):
+ """Mark this track as missed (no association at the current time step).
+ """
+ if self.state == TrackState.Tentative:
+ self.state = TrackState.Deleted
+ elif self.time_since_update > self.max_age:
+ self.state = TrackState.Deleted
+
+ def is_tentative(self):
+ return self.state == TrackState.Tentative
+
+ def is_confirmed(self):
+ return self.state == TrackState.Confirmed
+
+ def is_deleted(self):
+ return self.state == TrackState.Deleted
+
+
+class Tracker:
+ def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5):
+ self.max_iou_dist = max_iou_distance
+ self.max_age = max_age
+ self.n_init = n_init
+
+ self.kf = KalmanFilter()
+ self.tracks = []
+ self._next_id = 1
+
+ def predict(self):
+ """Propagate track state distributions one time step forward.
+ This function should be called once every time step, before `update`.
+ """
+ for track in self.tracks:
+ track.predict(self.kf)
+
+ def update(self, detections):
+ """Perform measurement update and track management.
+ Parameters
+ ----------
+ detections : List[deep_sort.detection.Detection]
+ A list of detections at the current time step.
+ """
+ # Run matching cascade.
+ matches, unmatched_tracks, unmatched_detections = self._match(detections)
+
+ # Update matched tracks set.
+ for track_idx, detection_idx in matches:
+ self.tracks[track_idx].update(self.kf, detections[detection_idx])
+ # Update tracks that missing.
+ for track_idx in unmatched_tracks:
+ self.tracks[track_idx].mark_missed()
+ # Create new detections track.
+ for detection_idx in unmatched_detections:
+ self._initiate_track(detections[detection_idx])
+
+ # Remove deleted tracks.
+ self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+ def _match(self, detections):
+ confirmed_tracks, unconfirmed_tracks = [], []
+ for i, t in enumerate(self.tracks):
+ if t.is_confirmed():
+ confirmed_tracks.append(i)
+ else:
+ unconfirmed_tracks.append(i)
+
+ matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade(
+ iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks
+ )
+
+ track_candidates = unconfirmed_tracks + [
+ k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1]
+ unmatched_tracks_a = [
+ k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1]
+
+ matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching(
+ iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections
+ )
+
+ matches = matches_a + matches_b
+ unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+ return matches, unmatched_tracks, unmatched_detections
+
+ def _initiate_track(self, detection):
+ if detection.confidence < 0.4:
+ return
+ mean, covariance = self.kf.initiate(detection.to_xyah())
+ self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age))
+ self._next_id += 1
+
+
diff --git a/StreamServer/src/analytic/action/Track/iou_matching.py b/StreamServer/src/analytic/action/Track/iou_matching.py
new file mode 100644
index 0000000..843268f
--- /dev/null
+++ b/StreamServer/src/analytic/action/Track/iou_matching.py
@@ -0,0 +1,78 @@
+import numpy as np
+
+INFTY_COST = 1e+5
+
+
+def iou(bbox, candidates):
+ """Compute intersection over union.
+ Parameters
+ ----------
+ bbox : ndarray
+ A bounding box in format `(xmin, ymin, xmax, ymax)`.
+ candidates : ndarray
+ A matrix of candidate bounding boxes (one per row) in the same format
+ as `bbox`.
+
+ Returns
+ -------
+ ndarray
+ The intersection over union in [0, 1] between the `bbox` and each
+ candidate. A higher score means a larger fraction of the `bbox` is
+ occluded by the candidate.
+ """
+ #bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
+ bbox_tl, bbox_br = bbox[:2], bbox[2:]
+ candidates_tl = candidates[:, :2]
+ candidates_br = candidates[:, 2:] # + candidates[:, :2]
+
+ tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+ np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+ br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+ np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+ wh = np.maximum(0., br - tl)
+
+ area_intersection = wh.prod(axis=1)
+ area_bbox = (bbox[2:] - bbox[:2]).prod()
+ area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1)
+ return area_intersection / (area_bbox + area_candidates - area_intersection)
+
+
+def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
+ """An intersection over union distance metric.
+ Parameters
+ ----------
+ tracks : List[Track]
+ A list of tracks.
+ detections : List[Detection]
+ A list of detections.
+ track_indices : Optional[List[int]]
+ A list of indices to tracks that should be matched. Defaults to
+ all `tracks`.
+ detection_indices : Optional[List[int]]
+ A list of indices to detections that should be matched. Defaults
+ to all `detections`.
+
+ Returns
+ -------
+ ndarray
+ Returns a cost matrix of shape
+ len(track_indices), len(detection_indices) where entry (i, j) is
+ `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+
+ """
+ if track_indices is None:
+ track_indices = np.arange(len(tracks))
+ if detection_indices is None:
+ detection_indices = np.arange(len(detections))
+
+ cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+ for row, track_idx in enumerate(track_indices):
+ #if tracks[track_idx].time_since_update > 1:
+ # cost_matrix[row, :] = INFTY_COST
+ # continue
+
+ bbox = tracks[track_idx].to_tlbr()
+ candidates = np.asarray([detections[i].tlbr for i in detection_indices])
+ cost_matrix[row, :] = 1. - iou(bbox, candidates)
+
+ return cost_matrix
diff --git a/StreamServer/src/analytic/action/Track/kalman_filter.py b/StreamServer/src/analytic/action/Track/kalman_filter.py
new file mode 100644
index 0000000..9e038e1
--- /dev/null
+++ b/StreamServer/src/analytic/action/Track/kalman_filter.py
@@ -0,0 +1,198 @@
+# vim: expandtab:ts=4:sw=4
+import numpy as np
+import scipy.linalg
+
+
+class KalmanFilter(object):
+ """A simple Kalman filter for tracking bounding boxes in image space.
+
+ The 8-dimensional state space
+ x, y, a, h, vx, vy, va, vh
+
+ contains the bounding box center position (x, y), aspect ratio a, height h,
+ and their respective velocities.
+
+ Object motion follows a constant velocity model. The bounding box location
+ (x, y, a, h) is taken as direct observation of the state space (linear
+ observation model).
+ """
+ def __init__(self):
+ ndim, dt = 4, 1.
+
+ # Create Kalman filter model matrices.
+ self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+ for i in range(ndim):
+ self._motion_mat[i, ndim + i] = dt
+ self._update_mat = np.eye(ndim, 2 * ndim)
+
+ # Motion and observation uncertainty are chosen relative to the current
+ # state estimate. These weights control the amount of uncertainty in
+ # the model. This is a bit hacky.
+ self._std_weight_position = 1. / 20
+ self._std_weight_velocity = 1. / 160
+
+ def initiate(self, measurement):
+ """Create track from unassociated measurement.
+ Parameters
+ ----------
+ measurement : ndarray
+ Bounding box coordinates (x, y, a, h) with center position (x, y),
+ aspect ratio a, and height h.
+
+ Returns
+ -------
+ (ndarray, ndarray)
+ Returns the mean vector (8 dimensional) and covariance matrix (8x8
+ dimensional) of the new track. Unobserved velocities are initialized
+ to 0 mean.
+ """
+ mean_pos = measurement
+ mean_vel = np.zeros_like(mean_pos)
+ mean = np.r_[mean_pos, mean_vel]
+
+ std = [
+ 2 * self._std_weight_position * measurement[3],
+ 2 * self._std_weight_position * measurement[3],
+ 1e-2,
+ 2 * self._std_weight_position * measurement[3],
+ 10 * self._std_weight_velocity * measurement[3],
+ 10 * self._std_weight_velocity * measurement[3],
+ 1e-5,
+ 10 * self._std_weight_velocity * measurement[3]]
+ covariance = np.diag(np.square(std))
+ return mean, covariance
+
+ def predict(self, mean, covariance):
+ """Run Kalman filter prediction step.
+ Parameters
+ ----------
+ mean : ndarray
+ The 8 dimensional mean vector of the object state at the previous
+ time step.
+ covariance : ndarray
+ The 8x8 dimensional covariance matrix of the object state at the
+ previous time step.
+
+ Returns
+ -------
+ (ndarray, ndarray)
+ Returns the mean vector and covariance matrix of the predicted
+ state. Unobserved velocities are initialized to 0 mean.
+ """
+ std_pos = [
+ self._std_weight_position * mean[3],
+ self._std_weight_position * mean[3],
+ 1e-2,
+ self._std_weight_position * mean[3]]
+ std_vel = [
+ self._std_weight_velocity * mean[3],
+ self._std_weight_velocity * mean[3],
+ 1e-5,
+ self._std_weight_velocity * mean[3]]
+ motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+ mean = np.dot(self._motion_mat, mean)
+ covariance = np.linalg.multi_dot((
+ self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+ return mean, covariance
+
+ def project(self, mean, covariance):
+ """Project state distribution to measurement space.
+ Parameters
+ ----------
+ mean : ndarray
+ The state's mean vector (8 dimensional array).
+ covariance : ndarray
+ The state's covariance matrix (8x8 dimensional).
+
+ Returns
+ -------
+ (ndarray, ndarray)
+ Returns the projected mean and covariance matrix of the given state
+ estimate.
+ """
+ std = [
+ self._std_weight_position * mean[3],
+ self._std_weight_position * mean[3],
+ 1e-1,
+ self._std_weight_position * mean[3]]
+ innovation_cov = np.diag(np.square(std))
+
+ mean = np.dot(self._update_mat, mean)
+ covariance = np.linalg.multi_dot((
+ self._update_mat, covariance, self._update_mat.T))
+ return mean, covariance + innovation_cov
+
+ def update(self, mean, covariance, measurement):
+ """Run Kalman filter correction step.
+ Parameters
+ ----------
+ mean : ndarray
+ The predicted state's mean vector (8 dimensional).
+ covariance : ndarray
+ The state's covariance matrix (8x8 dimensional).
+ measurement : ndarray
+ The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+ is the center position, a the aspect ratio, and h the height of the
+ bounding box.
+
+ Returns
+ -------
+ (ndarray, ndarray)
+ Returns the measurement-corrected state distribution.
+ """
+ projected_mean, projected_cov = self.project(mean, covariance)
+
+ chol_factor, lower = scipy.linalg.cho_factor(
+ projected_cov, lower=True, check_finite=False)
+ kalman_gain = scipy.linalg.cho_solve(
+ (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+ check_finite=False).T
+ innovation = measurement - projected_mean
+
+ new_mean = mean + np.dot(innovation, kalman_gain.T)
+ new_covariance = covariance - np.linalg.multi_dot((
+ kalman_gain, projected_cov, kalman_gain.T))
+ return new_mean, new_covariance
+
+ def gating_distance(self, mean, covariance, measurements,
+ only_position=False):
+ """Compute gating distance between state distribution and measurements.
+ A suitable distance threshold can be obtained from `chi2inv95`. If
+ `only_position` is False, the chi-square distribution has 4 degrees of
+ freedom, otherwise 2.
+
+ Parameters
+ ----------
+ mean : ndarray
+ Mean vector over the state distribution (8 dimensional).
+ covariance : ndarray
+ Covariance of the state distribution (8x8 dimensional).
+ measurements : ndarray
+ An Nx4 dimensional matrix of N measurements, each in
+ format (x, y, a, h) where (x, y) is the bounding box center
+ position, a the aspect ratio, and h the height.
+ only_position : Optional[bool]
+ If True, distance computation is done with respect to the bounding
+ box center position only.
+
+ Returns
+ -------
+ ndarray
+ Returns an array of length N, where the i-th element contains the
+ squared Mahalanobis distance between (mean, covariance) and
+ `measurements[i]`.
+ """
+ mean, covariance = self.project(mean, covariance)
+ if only_position:
+ mean, covariance = mean[:2], covariance[:2, :2]
+ measurements = measurements[:, :2]
+
+ cholesky_factor = np.linalg.cholesky(covariance)
+ d = measurements - mean
+ z = scipy.linalg.solve_triangular(
+ cholesky_factor, d.T, lower=True, check_finite=False,
+ overwrite_b=True)
+ squared_maha = np.sum(z * z, axis=0)
+ return squared_maha
diff --git a/StreamServer/src/analytic/action/Track/linear_assignment.py b/StreamServer/src/analytic/action/Track/linear_assignment.py
new file mode 100644
index 0000000..ea76e81
--- /dev/null
+++ b/StreamServer/src/analytic/action/Track/linear_assignment.py
@@ -0,0 +1,191 @@
+import numpy as np
+#from sklearn.utils.linear_assignment_ import linear_assignment
+from scipy.optimize import linear_sum_assignment
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+ 1: 3.8415,
+ 2: 5.9915,
+ 3: 7.8147,
+ 4: 9.4877,
+ 5: 11.070,
+ 6: 12.592,
+ 7: 14.067,
+ 8: 15.507,
+ 9: 16.919}
+INFTY_COST = 1e+5
+
+
+def min_cost_matching(distance_metric, max_distance, tracks, detections,
+ track_indices=None, detection_indices=None):
+ """Solve linear assignment problem.
+ Parameters
+ ----------
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+ The distance metric is given a list of tracks and detections as well as
+ a list of N track indices and M detection indices. The metric should
+ return the NxM dimensional cost matrix, where element (i, j) is the
+ association cost between the i-th track in the given track indices and
+ the j-th detection in the given detection_indices.
+ max_distance : float
+ Gating threshold. Associations with cost larger than this value are
+ disregarded.
+ tracks : List[Track]
+ A list of predicted tracks at the current time step.
+ detections : List[Detection]
+ A list of detections at the current time step.
+ track_indices : List[int]
+ List of track indices that maps rows in `cost_matrix` to tracks in
+ `tracks` (see description above).
+ detection_indices : List[int]
+ List of detection indices that maps columns in `cost_matrix` to
+ detections in `detections` (see description above).
+
+ Returns
+ -------
+ (List[(int, int)], List[int], List[int])
+ Returns a tuple with the following three entries:
+ * A list of matched track and detection indices.
+ * A list of unmatched track indices.
+ * A list of unmatched detection indices.
+ """
+ if track_indices is None:
+ track_indices = np.arange(len(tracks))
+ if detection_indices is None:
+ detection_indices = np.arange(len(detections))
+
+ if len(detection_indices) == 0 or len(track_indices) == 0:
+ return [], track_indices, detection_indices # Nothing to match.
+
+ cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices)
+ cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+ indices = linear_sum_assignment(cost_matrix)
+ indices = np.array(indices).transpose()
+
+ matches, unmatched_tracks, unmatched_detections = [], [], []
+ for col, detection_idx in enumerate(detection_indices):
+ if col not in indices[:, 1]:
+ unmatched_detections.append(detection_idx)
+ for row, track_idx in enumerate(track_indices):
+ if row not in indices[:, 0]:
+ unmatched_tracks.append(track_idx)
+ for row, col in indices:
+ track_idx = track_indices[row]
+ detection_idx = detection_indices[col]
+ if cost_matrix[row, col] > max_distance:
+ unmatched_tracks.append(track_idx)
+ unmatched_detections.append(detection_idx)
+ else:
+ matches.append((track_idx, detection_idx))
+
+ return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections,
+ track_indices=None, detection_indices=None):
+ """Run matching cascade.
+ Parameters
+ ----------
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+ The distance metric is given a list of tracks and detections as well as
+ a list of N track indices and M detection indices. The metric should
+ return the NxM dimensional cost matrix, where element (i, j) is the
+ association cost between the i-th track in the given track indices and
+ the j-th detection in the given detection indices.
+ max_distance : float
+ Gating threshold. Associations with cost larger than this value are
+ disregarded.
+ cascade_depth: int
+ The cascade depth, should be se to the maximum track age.
+ tracks : List[Track]
+ A list of predicted tracks at the current time step.
+ detections : List[Detection]
+ A list of detections at the current time step.
+ track_indices : Optional[List[int]]
+ List of track indices that maps rows in `cost_matrix` to tracks in
+ `tracks` (see description above). Defaults to all tracks.
+ detection_indices : Optional[List[int]]
+ List of detection indices that maps columns in `cost_matrix` to
+ detections in `detections` (see description above). Defaults to all
+ detections.
+
+ Returns
+ -------
+ (List[(int, int)], List[int], List[int])
+ Returns a tuple with the following three entries:
+ * A list of matched track and detection indices.
+ * A list of unmatched track indices.
+ * A list of unmatched detection indices.
+ """
+ if track_indices is None:
+ track_indices = list(range(len(tracks)))
+ if detection_indices is None:
+ detection_indices = list(range(len(detections)))
+
+ unmatched_detections = detection_indices
+ matches = []
+ for level in range(cascade_depth):
+ if len(unmatched_detections) == 0: # No detections left
+ break
+
+ track_indices_l = [k for k in track_indices
+ if tracks[k].time_since_update == 1 + level]
+ if len(track_indices_l) == 0: # Nothing to match at this level
+ continue
+
+ matches_l, _, unmatched_detections = min_cost_matching(
+ distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections)
+ matches += matches_l
+
+ unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+ return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
+ gated_cost=INFTY_COST, only_position=False):
+ """Invalidate infeasible entries in cost matrix based on the state
+ distributions obtained by Kalman filtering.
+ Parameters
+ ----------
+ kf : The Kalman filter.
+ cost_matrix : ndarray
+ The NxM dimensional cost matrix, where N is the number of track indices
+ and M is the number of detection indices, such that entry (i, j) is the
+ association cost between `tracks[track_indices[i]]` and
+ `detections[detection_indices[j]]`.
+ tracks : List[Track]
+ A list of predicted tracks at the current time step.
+ detections : List[Detection]
+ A list of detections at the current time step.
+ track_indices : List[int]
+ List of track indices that maps rows in `cost_matrix` to tracks in
+ `tracks` (see description above).
+ detection_indices : List[int]
+ List of detection indices that maps columns in `cost_matrix` to
+ detections in `detections` (see description above).
+ gated_cost : Optional[float]
+ Entries in the cost matrix corresponding to infeasible associations are
+ set this value. Defaults to a very large value.
+ only_position : Optional[bool]
+ If True, only the x, y position of the state distribution is considered
+ during gating. Defaults to False.
+
+ Returns
+ -------
+ ndarray
+ Returns the modified cost matrix.
+ """
+ gating_dim = 2 if only_position else 4
+ gating_threshold = chi2inv95[gating_dim]
+ measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
+ for row, track_idx in enumerate(track_indices):
+ track = tracks[track_idx]
+ gating_distance = kf.gating_distance(track.mean, track.covariance,
+ measurements, only_position)
+ cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+
+ return cost_matrix
diff --git a/StreamServer/src/analytic/action/__init__.py b/StreamServer/src/analytic/action/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/StreamServer/src/analytic/action/action_model.py b/StreamServer/src/analytic/action/action_model.py
new file mode 100644
index 0000000..d7f9858
--- /dev/null
+++ b/StreamServer/src/analytic/action/action_model.py
@@ -0,0 +1,155 @@
+import os
+import cv2
+import time
+from fastapi import HTTPException
+import torch
+import argparse
+import numpy as np
+
+from .Detection.Utils import ResizePadding
+from .CameraLoader import CamLoader, CamLoader_Q
+from .DetectorLoader import TinyYOLOv3_onecls
+
+from .PoseEstimateLoader import SPPE_FastPose
+from .fn import draw_single
+
+from .Track.Tracker import Detection, Tracker
+from .ActionsEstLoader import TSSTG
+
+from config import CONFIG_FILE, YOLO_WEIGHT_FILE, SPPE_WEIGHT_FILE, TSSTG_WEIGHT_FILE
+
+CONFIG_FILE = CONFIG_FILE
+YOLO_WEIGHT_FILE = YOLO_WEIGHT_FILE
+SPPE_WEIGHT_FILE = SPPE_WEIGHT_FILE
+TSSTG_WEIGHT_FILE = TSSTG_WEIGHT_FILE
+
+INP_DETS = 384
+INP_POSE = (224, 160)
+POSE_BACKBONE = 'resnet50'
+SHOW_DETECTED = False
+SHOW_SKELETON = True
+DEVICE = 'cuda'
+
+resize_fn = ResizePadding(INP_DETS, INP_DETS)
+
+def preproc(image):
+ """preprocess function for CameraLoader.
+ """
+ image = resize_fn(image)
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ return image
+
+
+def kpt2bbox(kpt, ex=20):
+ """Get bbox that hold on all of the keypoints (x,y)
+ kpt: array of shape `(N, 2)`,
+ ex: (int) expand bounding box,
+ """
+ return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
+ kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
+
+
+def generate_action_model_frame(source):
+ CAM_SOURCE = source
+
+ # Model initialization
+ detect_model = TinyYOLOv3_onecls(INP_DETS, device=DEVICE, config_file=CONFIG_FILE,
+ weight_file=YOLO_WEIGHT_FILE)
+ pose_model = SPPE_FastPose(POSE_BACKBONE, INP_POSE[0], INP_POSE[1], device=DEVICE, path=SPPE_WEIGHT_FILE)
+ action_model = TSSTG(weight_file=TSSTG_WEIGHT_FILE) # action model
+
+ # Tracker.
+ max_age = 30
+ tracker = Tracker(max_age=max_age, n_init=3)
+
+ cam = CamLoader(int(CAM_SOURCE) if CAM_SOURCE.isdigit() else CAM_SOURCE,
+ preprocess=preproc).start()
+
+ fps_time = 0
+ f = 0
+ while cam.grabbed():
+ f += 1
+ frame = cam.getitem()
+ image = frame.copy()
+
+ # Detect humans bbox in the frame with detector model.
+ detected = detect_model.detect(frame, need_resize=False, expand_bb=10)
+
+ # Predict each tracks bbox of current frame from previous frames information with Kalman filter.
+ tracker.predict()
+ # Merge two source of predicted bbox together.
+ for track in tracker.tracks:
+ det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32)
+ detected = torch.cat([detected, det], dim=0) if detected is not None else det
+
+ detections = [] # List of Detections object for tracking.
+ if detected is not None:
+ #detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0]
+ # Predict skeleton pose of each bboxs.
+ poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
+
+ # Create Detections object.
+ detections = [Detection(kpt2bbox(ps['keypoints'].numpy()),
+ np.concatenate((ps['keypoints'].numpy(),
+ ps['kp_score'].numpy()), axis=1),
+ ps['kp_score'].mean().numpy()) for ps in poses]
+
+ # VISUALIZE.
+ if SHOW_DETECTED:
+ for bb in detected[:, 0:5]:
+ frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
+
+ # Update tracks by matching each track information of current and previous frame or
+ # create a new track if no matched.
+ tracker.update(detections)
+
+ # Predict Actions of each track.
+ for i, track in enumerate(tracker.tracks):
+ if not track.is_confirmed():
+ continue
+
+ track_id = track.track_id
+ bbox = track.to_tlbr().astype(int)
+ center = track.get_center().astype(int)
+
+ action = 'pending'
+ clr = (0, 255, 0)
+ # Use 30 frames time-steps to prediction.
+ if len(track.keypoints_list) == 30:
+ pts = np.array(track.keypoints_list, dtype=np.float32)
+ out = action_model.predict(pts, frame.shape[:2])
+ action_name = action_model.class_names[out[0].argmax()]
+ action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
+ if action_name == 'Fall Down':
+ clr = (255, 0, 0)
+ elif action_name == 'Lying Down':
+ clr = (255, 200, 0)
+
+ # VISUALIZE.
+ if track.time_since_update == 0:
+ if SHOW_SKELETON:
+ frame = draw_single(frame, track.keypoints_list[-1])
+ frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
+ frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX,
+ 0.4, (255, 0, 0), 2)
+ frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
+ 0.4, clr, 1)
+
+ # Show Frame.
+ frame = cv2.resize(frame, (0, 0), fx=2., fy=2.)
+ frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)),
+ (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+ frame = frame[:, :, ::-1]
+ fps_time = time.time()
+
+ # return frame for video streaming
+ ret, buffer = cv2.imencode('.jpg', frame)
+ if not ret:
+ # If encoding fails, raise an error to stop the streaming
+ raise HTTPException(status_code=500, detail="Frame encoding failed")
+ yield (b'--frame\r\n'
+ b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
+
+
+def output_action_detection():
+ pass
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/fn.py b/StreamServer/src/analytic/action/fn.py
new file mode 100644
index 0000000..7231a88
--- /dev/null
+++ b/StreamServer/src/analytic/action/fn.py
@@ -0,0 +1,234 @@
+import re
+import cv2
+import time
+import math
+import torch
+import numpy as np
+
+RED = (0, 0, 255)
+GREEN = (0, 255, 0)
+BLUE = (255, 0, 0)
+CYAN = (255, 255, 0)
+YELLOW = (0, 255, 255)
+ORANGE = (0, 165, 255)
+PURPLE = (255, 0, 255)
+
+"""COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head
+ (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
+ (17, 11), (17, 12), # Body
+ (11, 13), (12, 14), (13, 15), (14, 16)]"""
+COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body
+ (7, 9), (8, 10), (9, 11), (10, 12)]
+POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar
+ (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
+ (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
+LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
+ (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
+ (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]
+
+MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5),
+ (8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)]
+
+numpy_type_map = {
+ 'float64': torch.DoubleTensor,
+ 'float32': torch.FloatTensor,
+ 'float16': torch.HalfTensor,
+ 'int64': torch.LongTensor,
+ 'int32': torch.IntTensor,
+ 'int16': torch.ShortTensor,
+ 'int8': torch.CharTensor,
+ 'uint8': torch.ByteTensor,
+}
+
+_use_shared_memory = True
+
+
+def collate_fn(batch):
+ r"""Puts each data field into a tensor with outer dimension batch size"""
+
+ error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
+ elem_type = type(batch[0])
+
+ if isinstance(batch[0], torch.Tensor):
+ out = None
+ if _use_shared_memory:
+ # If we're in a background process, concatenate directly into a
+ # shared memory tensor to avoid an extra copy
+ numel = sum([x.numel() for x in batch])
+ storage = batch[0].storage()._new_shared(numel)
+ out = batch[0].new(storage)
+ return torch.stack(batch, 0, out=out)
+ elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+ and elem_type.__name__ != 'string_':
+ elem = batch[0]
+ if elem_type.__name__ == 'ndarray':
+ # array of string classes and object
+ if re.search('[SaUO]', elem.dtype.str) is not None:
+ raise TypeError(error_msg.format(elem.dtype))
+
+ return torch.stack([torch.from_numpy(b) for b in batch], 0)
+ if elem.shape == (): # scalars
+ py_type = float if elem.dtype.name.startswith('float') else int
+ return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
+ elif isinstance(batch[0], int):
+ return torch.LongTensor(batch)
+ elif isinstance(batch[0], float):
+ return torch.DoubleTensor(batch)
+ elif isinstance(batch[0], (str, bytes)):
+ return batch
+ elif isinstance(batch[0], collections.Mapping):
+ return {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
+ elif isinstance(batch[0], collections.Sequence):
+ transposed = zip(*batch)
+ return [collate_fn(samples) for samples in transposed]
+
+ raise TypeError((error_msg.format(type(batch[0]))))
+
+
+def collate_fn_list(batch):
+ img, inp, im_name = zip(*batch)
+ img = collate_fn(img)
+ im_name = collate_fn(im_name)
+
+ return img, inp, im_name
+
+
+def draw_single(frame, pts, joint_format='coco'):
+ if joint_format == 'coco':
+ l_pair = COCO_PAIR
+ p_color = POINT_COLORS
+ line_color = LINE_COLORS
+ elif joint_format == 'mpii':
+ l_pair = MPII_PAIR
+ p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
+ else:
+ NotImplementedError
+
+ part_line = {}
+ pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
+ for n in range(pts.shape[0]):
+ if pts[n, 2] <= 0.05:
+ continue
+ cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
+ part_line[n] = (cor_x, cor_y)
+ cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
+
+ for i, (start_p, end_p) in enumerate(l_pair):
+ if start_p in part_line and end_p in part_line:
+ start_xy = part_line[start_p]
+ end_xy = part_line[end_p]
+ cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1))
+ return frame
+
+
+def vis_frame_fast(frame, im_res, joint_format='coco'):
+ """
+ frame: frame image
+ im_res: im_res of predictions
+ format: coco or mpii
+
+ return rendered image
+ """
+ if joint_format == 'coco':
+ l_pair = COCO_PAIR
+ p_color = POINT_COLORS
+ line_color = LINE_COLORS
+ elif joint_format == 'mpii':
+ l_pair = MPII_PAIR
+ p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
+ else:
+ NotImplementedError
+
+ #im_name = im_res['imgname'].split('/')[-1]
+ img = frame
+ for human in im_res: # ['result']:
+ part_line = {}
+ kp_preds = human['keypoints']
+ kp_scores = human['kp_score']
+ kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0)))
+ kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0)))
+ # Draw keypoints
+ for n in range(kp_scores.shape[0]):
+ if kp_scores[n] <= 0.05:
+ continue
+ cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
+ part_line[n] = (cor_x, cor_y)
+ cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)
+ # Draw limbs
+ for i, (start_p, end_p) in enumerate(l_pair):
+ if start_p in part_line and end_p in part_line:
+ start_xy = part_line[start_p]
+ end_xy = part_line[end_p]
+ cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1)
+ return img
+
+
+def vis_frame(frame, im_res, joint_format='coco'):
+ """
+ frame: frame image
+ im_res: im_res of predictions
+ format: coco or mpii
+
+ return rendered image
+ """
+ if joint_format == 'coco':
+ l_pair = COCO_PAIR
+ p_color = POINT_COLORS
+ line_color = LINE_COLORS
+ elif joint_format == 'mpii':
+ l_pair = MPII_PAIR
+ p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
+ line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
+ else:
+ raise NotImplementedError
+
+ im_name = im_res['imgname'].split('/')[-1]
+ img = frame
+ height, width = img.shape[:2]
+ img = cv2.resize(img, (int(width/2), int(height/2)))
+ for human in im_res['result']:
+ part_line = {}
+ kp_preds = human['keypoints']
+ kp_scores = human['kp_score']
+ kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0)))
+ kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0)))
+ # Draw keypoints
+ for n in range(kp_scores.shape[0]):
+ if kp_scores[n] <= 0.05:
+ continue
+ cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
+ part_line[n] = (int(cor_x/2), int(cor_y/2))
+ bg = img.copy()
+ cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1)
+ # Now create a mask of logo and create its inverse mask also
+ transparency = max(0, min(1, kp_scores[n]))
+ img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
+ # Draw limbs
+ for i, (start_p, end_p) in enumerate(l_pair):
+ if start_p in part_line and end_p in part_line:
+ start_xy = part_line[start_p]
+ end_xy = part_line[end_p]
+ bg = img.copy()
+
+ X = (start_xy[0], end_xy[0])
+ Y = (start_xy[1], end_xy[1])
+ mX = np.mean(X)
+ mY = np.mean(Y)
+ length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
+ angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
+ stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1
+ polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
+ cv2.fillConvexPoly(bg, polygon, line_color[i])
+ #cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1)
+ transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p])))
+ img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
+ img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC)
+ return img
+
+
+def getTime(time1=0):
+ if not time1:
+ return time.time()
+ else:
+ interval = time.time() - time1
+ return time.time(), interval
\ No newline at end of file
diff --git a/StreamServer/src/analytic/action/pPose_nms.py b/StreamServer/src/analytic/action/pPose_nms.py
new file mode 100644
index 0000000..7867a42
--- /dev/null
+++ b/StreamServer/src/analytic/action/pPose_nms.py
@@ -0,0 +1,284 @@
+# -*- coding: utf-8 -*-
+import torch
+import json
+import os
+import zipfile
+import time
+from multiprocessing.dummy import Pool as ThreadPool
+import numpy as np
+
+''' Constant Configuration '''
+delta1 = 1
+mu = 1.7
+delta2 = 2.65
+gamma = 22.48
+scoreThreds = 0.3
+matchThreds = 5
+areaThres = 0 # 40 * 40.5
+alpha = 0.1
+#pool = ThreadPool(4)
+
+
+def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores):
+ """
+ Parametric Pose NMS algorithm
+ bboxes: bbox locations list (n, 4)
+ bbox_scores: bbox scores list (n,)
+ pose_preds: pose locations list (n, 17, 2)
+ pose_scores: pose scores list (n, 17, 1)
+ """
+ global ori_pose_preds, ori_pose_scores, ref_dists
+
+ pose_scores[pose_scores == 0] = 1e-5
+
+ final_result = []
+
+ ori_bboxes = bboxes.clone()
+ ori_bbox_scores = bbox_scores.clone()
+ ori_pose_preds = pose_preds.clone()
+ ori_pose_scores = pose_scores.clone()
+
+ xmax = bboxes[:, 2]
+ xmin = bboxes[:, 0]
+ ymax = bboxes[:, 3]
+ ymin = bboxes[:, 1]
+
+ widths = xmax - xmin
+ heights = ymax - ymin
+ ref_dists = alpha * np.maximum(widths, heights)
+
+ nsamples = bboxes.shape[0]
+ human_scores = pose_scores.mean(dim=1)
+
+ human_ids = np.arange(nsamples)
+ # Do pPose-NMS
+ pick = []
+ merge_ids = []
+ while human_scores.shape[0] != 0:
+ # Pick the one with highest score
+ pick_id = torch.argmax(human_scores)
+ pick.append(human_ids[pick_id])
+ # num_visPart = torch.sum(pose_scores[pick_id] > 0.2)
+
+ # Get numbers of match keypoints by calling PCK_match
+ ref_dist = ref_dists[human_ids[pick_id]]
+ simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist)
+ num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist)
+
+ # Delete humans who have more than matchThreds keypoints overlap and high similarity
+ delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[
+ (simi > gamma) | (num_match_keypoints >= matchThreds)]
+
+ if delete_ids.shape[0] == 0:
+ delete_ids = pick_id
+ #else:
+ # delete_ids = torch.from_numpy(delete_ids)
+
+ merge_ids.append(human_ids[delete_ids])
+ pose_preds = np.delete(pose_preds, delete_ids, axis=0)
+ pose_scores = np.delete(pose_scores, delete_ids, axis=0)
+ human_ids = np.delete(human_ids, delete_ids)
+ human_scores = np.delete(human_scores, delete_ids, axis=0)
+ bbox_scores = np.delete(bbox_scores, delete_ids, axis=0)
+
+ assert len(merge_ids) == len(pick)
+ bboxs_pick = ori_bboxes[pick]
+ preds_pick = ori_pose_preds[pick]
+ scores_pick = ori_pose_scores[pick]
+ bbox_scores_pick = ori_bbox_scores[pick]
+ #final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick))
+ #final_result = [item for item in final_result if item is not None]
+
+ for j in range(len(pick)):
+ ids = np.arange(pose_preds.shape[1])
+ max_score = torch.max(scores_pick[j, ids, 0])
+
+ if max_score < scoreThreds:
+ continue
+
+ # Merge poses
+ merge_id = merge_ids[j]
+ merge_pose, merge_score = p_merge_fast(
+ preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]])
+
+ max_score = torch.max(merge_score[ids])
+ if max_score < scoreThreds:
+ continue
+
+ xmax = max(merge_pose[:, 0])
+ xmin = min(merge_pose[:, 0])
+ ymax = max(merge_pose[:, 1])
+ ymin = min(merge_pose[:, 1])
+
+ if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres:
+ continue
+
+ final_result.append({
+ 'bbox': bboxs_pick[j],
+ 'bbox_score': bbox_scores_pick[j],
+ 'keypoints': merge_pose - 0.3,
+ 'kp_score': merge_score,
+ 'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score)
+ })
+
+ return final_result
+
+
+def filter_result(args):
+ score_pick, merge_id, pred_pick, pick, bbox_score_pick = args
+ global ori_pose_preds, ori_pose_scores, ref_dists
+ ids = np.arange(17)
+ max_score = torch.max(score_pick[ids, 0])
+
+ if max_score < scoreThreds:
+ return None
+
+ # Merge poses
+ merge_pose, merge_score = p_merge_fast(
+ pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick])
+
+ max_score = torch.max(merge_score[ids])
+ if max_score < scoreThreds:
+ return None
+
+ xmax = max(merge_pose[:, 0])
+ xmin = min(merge_pose[:, 0])
+ ymax = max(merge_pose[:, 1])
+ ymin = min(merge_pose[:, 1])
+
+ if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5:
+ return None
+
+ return {
+ 'keypoints': merge_pose - 0.3,
+ 'kp_score': merge_score,
+ 'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score)
+ }
+
+
+def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist):
+ """
+ Score-weighted pose merging
+ INPUT:
+ ref_pose: reference pose -- [17, 2]
+ cluster_preds: redundant poses -- [n, 17, 2]
+ cluster_scores: redundant poses score -- [n, 17, 1]
+ ref_dist: reference scale -- Constant
+ OUTPUT:
+ final_pose: merged pose -- [17, 2]
+ final_score: merged score -- [17]
+ """
+ dist = torch.sqrt(torch.sum(
+ torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
+ dim=2
+ )) # [n, 17]
+
+ kp_num = 17
+ ref_dist = min(ref_dist, 15)
+
+ mask = (dist <= ref_dist)
+ final_pose = torch.zeros(kp_num, 2)
+ final_score = torch.zeros(kp_num)
+
+ if cluster_preds.dim() == 2:
+ cluster_preds.unsqueeze_(0)
+ cluster_scores.unsqueeze_(0)
+ if mask.dim() == 1:
+ mask.unsqueeze_(0)
+
+ for i in range(kp_num):
+ cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1]
+ cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze(
+ -1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1))
+
+ # Get an normalized score
+ normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores)
+
+ # Merge poses by a weighted sum
+ final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1))
+ final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1))
+
+ final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1))
+
+ return final_pose, final_score
+
+
+def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist):
+ """
+ Score-weighted pose merging
+ INPUT:
+ ref_pose: reference pose -- [17, 2]
+ cluster_preds: redundant poses -- [n, 17, 2]
+ cluster_scores: redundant poses score -- [n, 17, 1]
+ ref_dist: reference scale -- Constant
+ OUTPUT:
+ final_pose: merged pose -- [17, 2]
+ final_score: merged score -- [17]
+ """
+ dist = torch.sqrt(torch.sum(
+ torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
+ dim=2
+ ))
+
+ kp_num = 17
+ ref_dist = min(ref_dist, 15)
+
+ mask = (dist <= ref_dist)
+ final_pose = torch.zeros(kp_num, 2)
+ final_score = torch.zeros(kp_num)
+
+ if cluster_preds.dim() == 2:
+ cluster_preds.unsqueeze_(0)
+ cluster_scores.unsqueeze_(0)
+ if mask.dim() == 1:
+ mask.unsqueeze_(0)
+
+ # Weighted Merge
+ masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1))
+ normed_scores = masked_scores / torch.sum(masked_scores, dim=0)
+
+ final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0)
+ final_score = torch.mul(masked_scores, normed_scores).sum(dim=0)
+ return final_pose, final_score
+
+
+def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist):
+ pick_preds = all_preds[i]
+ pred_scores = keypoint_scores[i]
+ dist = torch.sqrt(torch.sum(
+ torch.pow(pick_preds[np.newaxis, :] - all_preds, 2),
+ dim=2
+ ))
+ mask = (dist <= 1)
+
+ # Define a keypoints distance
+ score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1])
+ keypoint_scores.squeeze_()
+ if keypoint_scores.dim() == 1:
+ keypoint_scores.unsqueeze_(0)
+ if pred_scores.dim() == 1:
+ pred_scores.unsqueeze_(1)
+ # The predicted scores are repeated up to do broadcast
+ pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1)
+
+ score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\
+ torch.tanh(keypoint_scores[mask] / delta1)
+
+ point_dist = torch.exp((-1) * dist / delta2)
+ final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1)
+
+ return final_dist
+
+
+def PCK_match(pick_pred, all_preds, ref_dist):
+ dist = torch.sqrt(torch.sum(
+ torch.pow(pick_pred[np.newaxis, :] - all_preds, 2),
+ dim=2
+ ))
+ ref_dist = min(ref_dist, 7)
+ num_match_keypoints = torch.sum(
+ dist / ref_dist <= 1,
+ dim=1
+ )
+
+ return num_match_keypoints
diff --git a/StreamServer/src/analytic/action/pose_utils.py b/StreamServer/src/analytic/action/pose_utils.py
new file mode 100644
index 0000000..934cef6
--- /dev/null
+++ b/StreamServer/src/analytic/action/pose_utils.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+
+def normalize_points_with_size(xy, width, height, flip=False):
+ """Normalize scale points in image with size of image to (0-1).
+ xy : (frames, parts, xy) or (parts, xy)
+ """
+ if xy.ndim == 2:
+ xy = np.expand_dims(xy, 0)
+ xy[:, :, 0] /= width
+ xy[:, :, 1] /= height
+ if flip:
+ xy[:, :, 0] = 1 - xy[:, :, 0]
+ return xy
+
+
+def scale_pose(xy):
+ """Normalize pose points by scale with max/min value of each pose.
+ xy : (frames, parts, xy) or (parts, xy)
+ """
+ if xy.ndim == 2:
+ xy = np.expand_dims(xy, 0)
+ xy_min = np.nanmin(xy, axis=1)
+ xy_max = np.nanmax(xy, axis=1)
+ for i in range(xy.shape[0]):
+ xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
+ return xy.squeeze()