diff --git a/.gitignore b/.gitignore index ef76d3c..2b1a207 100644 --- a/.gitignore +++ b/.gitignore @@ -186,4 +186,5 @@ dist-ssr config.json -ActionDetector/ +*.pth +*.cfg \ No newline at end of file diff --git a/StreamServer/src/analytic/action/ActionsEstLoader.py b/StreamServer/src/analytic/action/ActionsEstLoader.py new file mode 100644 index 0000000..90d1818 --- /dev/null +++ b/StreamServer/src/analytic/action/ActionsEstLoader.py @@ -0,0 +1,52 @@ +import os +import torch +import numpy as np + +from .Actionsrecognition.Models import TwoStreamSpatialTemporalGraph +from .pose_utils import normalize_points_with_size, scale_pose + + +class TSSTG(object): + """Two-Stream Spatial Temporal Graph Model Loader. + Args: + weight_file: (str) Path to trained weights file. + device: (str) Device to load the model on 'cpu' or 'cuda'. + """ + def __init__(self, + weight_file='./Models/TSSTG/tsstg-model.pth', + device='cuda'): + self.graph_args = {'strategy': 'spatial'} + self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', + 'Stand up', 'Sit down', 'Fall Down'] + self.num_class = len(self.class_names) + self.device = device + + self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device) + self.model.load_state_dict(torch.load(weight_file)) + self.model.eval() + + def predict(self, pts, image_size): + """Predict actions from single person skeleton points and score in time sequence. + Args: + pts: (numpy array) points and score in shape `(t, v, c)` where + t : inputs sequence (time steps)., + v : number of graph node (body parts)., + c : channel (x, y, score)., + image_size: (tuple of int) width, height of image frame. + Returns: + (numpy array) Probability of each class actions. + """ + pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1]) + pts[:, :, :2] = scale_pose(pts[:, :, :2]) + pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1) + + pts = torch.tensor(pts, dtype=torch.float32) + pts = pts.permute(2, 0, 1)[None, :] + + mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] + mot = mot.to(self.device) + pts = pts.to(self.device) + + out = self.model((pts, mot)) + + return out.detach().cpu().numpy() diff --git a/StreamServer/src/analytic/action/Actionsrecognition/Models.py b/StreamServer/src/analytic/action/Actionsrecognition/Models.py new file mode 100644 index 0000000..62b66e4 --- /dev/null +++ b/StreamServer/src/analytic/action/Actionsrecognition/Models.py @@ -0,0 +1,244 @@ +### Reference from: https://github.com/yysijie/st-gcn/tree/master/net + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from .Utils import Graph + + +class GraphConvolution(nn.Module): + """The basic module for applying a graph convolution. + Args: + - in_channel: (int) Number of channels in the input sequence data. + - out_channels: (int) Number of channels produced by the convolution. + - kernel_size: (int) Size of the graph convolving kernel. + - t_kernel_size: (int) Size of the temporal convolving kernel. + - t_stride: (int, optional) Stride of the temporal convolution. Default: 1 + - t_padding: (int, optional) Temporal zero-padding added to both sides of + the input. Default: 0 + - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1 + - bias: (bool, optional) If `True`, adds a learnable bias to the output. + Default: `True` + Shape: + - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`, + A: Graph adjacency matrix in :math:`(K, V, V)`, + - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)` + + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + + """ + def __init__(self, in_channels, out_channels, kernel_size, + t_kernel_size=1, + t_stride=1, + t_padding=0, + t_dilation=1, + bias=True): + super().__init__() + + self.kernel_size = kernel_size + self.conv = nn.Conv2d(in_channels, + out_channels * kernel_size, + kernel_size=(t_kernel_size, 1), + padding=(t_padding, 0), + stride=(t_stride, 1), + dilation=(t_dilation, 1), + bias=bias) + + def forward(self, x, A): + x = self.conv(x) + n, kc, t, v = x.size() + x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v) + x = torch.einsum('nkctv,kvw->nctw', (x, A)) + + return x.contiguous() + + +class st_gcn(nn.Module): + """Applies a spatial temporal graph convolution over an input graph sequence. + Args: + - in_channels: (int) Number of channels in the input sequence data. + - out_channels: (int) Number of channels produced by the convolution. + - kernel_size: (tuple) Size of the temporal convolving kernel and + graph convolving kernel. + - stride: (int, optional) Stride of the temporal convolution. Default: 1 + - dropout: (int, optional) Dropout rate of the final output. Default: 0 + - residual: (bool, optional) If `True`, applies a residual mechanism. + Default: `True` + Shape: + - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`, + A: Graph Adjecency matrix in :math: `(K, V, V)`, + - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)` + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + """ + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, + dropout=0, + residual=True): + super().__init__() + assert len(kernel_size) == 2 + assert kernel_size[0] % 2 == 1 + + padding = ((kernel_size[0] - 1) // 2, 0) + + self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1]) + self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, + out_channels, + (kernel_size[0], 1), + (stride, 1), + padding), + nn.BatchNorm2d(out_channels), + nn.Dropout(dropout, inplace=True) + ) + + if not residual: + self.residual = lambda x: 0 + elif (in_channels == out_channels) and (stride == 1): + self.residual = lambda x: x + else: + self.residual = nn.Sequential(nn.Conv2d(in_channels, + out_channels, + kernel_size=1, + stride=(stride, 1)), + nn.BatchNorm2d(out_channels) + ) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, A): + res = self.residual(x) + x = self.gcn(x, A) + x = self.tcn(x) + res + + return self.relu(x) + + +class StreamSpatialTemporalGraph(nn.Module): + """Spatial temporal graph convolutional networks. + Args: + - in_channels: (int) Number of input channels. + - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class. + - num_class: (int) Number of class outputs. If `None` return pooling features of + the last st-gcn layer instead. + - edge_importance_weighting: (bool) If `True`, adds a learnable importance + weighting to the edges of the graph. + - **kwargs: (optional) Other parameters for graph convolution units. + Shape: + - Input: :math:`(N, in_channels, T_{in}, V_{in})` + - Output: :math:`(N, num_class)` where + :math:`N` is a batch size, + :math:`T_{in}` is a length of input sequence, + :math:`V_{in}` is the number of graph nodes, + or If num_class is `None`: `(N, out_channels)` + :math:`out_channels` is number of out_channels of the last layer. + """ + def __init__(self, in_channels, graph_args, num_class=None, + edge_importance_weighting=True, **kwargs): + super().__init__() + # Load graph. + graph = Graph(**graph_args) + A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False) + self.register_buffer('A', A) + + # Networks. + spatial_kernel_size = A.size(0) + temporal_kernel_size = 9 + kernel_size = (temporal_kernel_size, spatial_kernel_size) + kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'} + + self.data_bn = nn.BatchNorm1d(in_channels * A.size(1)) + self.st_gcn_networks = nn.ModuleList(( + st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0), + st_gcn(64, 64, kernel_size, 1, **kwargs), + st_gcn(64, 64, kernel_size, 1, **kwargs), + st_gcn(64, 64, kernel_size, 1, **kwargs), + st_gcn(64, 128, kernel_size, 2, **kwargs), + st_gcn(128, 128, kernel_size, 1, **kwargs), + st_gcn(128, 128, kernel_size, 1, **kwargs), + st_gcn(128, 256, kernel_size, 2, **kwargs), + st_gcn(256, 256, kernel_size, 1, **kwargs), + st_gcn(256, 256, kernel_size, 1, **kwargs) + )) + + # initialize parameters for edge importance weighting. + if edge_importance_weighting: + self.edge_importance = nn.ParameterList([ + nn.Parameter(torch.ones(A.size())) + for i in self.st_gcn_networks + ]) + else: + self.edge_importance = [1] * len(self.st_gcn_networks) + + if num_class is not None: + self.cls = nn.Conv2d(256, num_class, kernel_size=1) + else: + self.cls = lambda x: x + + def forward(self, x): + # data normalization. + N, C, T, V = x.size() + x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T) + x = x.view(N, V * C, T) + x = self.data_bn(x) + x = x.view(N, V, C, T) + x = x.permute(0, 2, 3, 1).contiguous() + x = x.view(N, C, T, V) + + # forward. + for gcn, importance in zip(self.st_gcn_networks, self.edge_importance): + x = gcn(x, self.A * importance) + + x = F.avg_pool2d(x, x.size()[2:]) + x = self.cls(x) + x = x.view(x.size(0), -1) + + return x + + +class TwoStreamSpatialTemporalGraph(nn.Module): + """Two inputs spatial temporal graph convolutional networks. + Args: + - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class. + - num_class: (int) Number of class outputs. + - edge_importance_weighting: (bool) If `True`, adds a learnable importance + weighting to the edges of the graph. + - **kwargs: (optional) Other parameters for graph convolution units. + Shape: + - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))` + for points and motions stream where. + :math:`N` is a batch size, + :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y)) + :math:`T` is a length of input sequence, + :math:`V` is the number of graph nodes, + - Output: :math:`(N, num_class)` + """ + def __init__(self, graph_args, num_class, edge_importance_weighting=True, + **kwargs): + super().__init__() + self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None, + edge_importance_weighting, + **kwargs) + self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None, + edge_importance_weighting, + **kwargs) + + self.fcn = nn.Linear(256 * 2, num_class) + + def forward(self, inputs): + out1 = self.pts_stream(inputs[0]) + out2 = self.mot_stream(inputs[1]) + + concat = torch.cat([out1, out2], dim=-1) + out = self.fcn(concat) + + return torch.sigmoid(out) diff --git a/StreamServer/src/analytic/action/Actionsrecognition/Utils.py b/StreamServer/src/analytic/action/Actionsrecognition/Utils.py new file mode 100644 index 0000000..04a9b10 --- /dev/null +++ b/StreamServer/src/analytic/action/Actionsrecognition/Utils.py @@ -0,0 +1,123 @@ +### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py + +import os +import torch +import numpy as np + + +class Graph: + """The Graph to model the skeletons extracted by the Alpha-Pose. + Args: + - strategy: (string) must be one of the follow candidates + - uniform: Uniform Labeling, + - distance: Distance Partitioning, + - spatial: Spatial Configuration, + For more information, please refer to the section 'Partition Strategies' + in our paper (https://arxiv.org/abs/1801.07455). + - layout: (string) must be one of the follow candidates + - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out. + - max_hop: (int) the maximal distance between two connected nodes. + - dilation: (int) controls the spacing between the kernel points. + """ + def __init__(self, + layout='coco_cut', + strategy='uniform', + max_hop=1, + dilation=1): + self.max_hop = max_hop + self.dilation = dilation + + self.get_edge(layout) + self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop) + self.get_adjacency(strategy) + + def get_edge(self, layout): + if layout == 'coco_cut': + self.num_node = 14 + self_link = [(i, i) for i in range(self.num_node)] + neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10), + (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)] + self.edge = self_link + neighbor_link + self.center = 13 + else: + raise ValueError('This layout is not supported!') + + def get_adjacency(self, strategy): + valid_hop = range(0, self.max_hop + 1, self.dilation) + adjacency = np.zeros((self.num_node, self.num_node)) + for hop in valid_hop: + adjacency[self.hop_dis == hop] = 1 + normalize_adjacency = normalize_digraph(adjacency) + + if strategy == 'uniform': + A = np.zeros((1, self.num_node, self.num_node)) + A[0] = normalize_adjacency + self.A = A + elif strategy == 'distance': + A = np.zeros((len(valid_hop), self.num_node, self.num_node)) + for i, hop in enumerate(valid_hop): + A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis == + hop] + self.A = A + elif strategy == 'spatial': + A = [] + for hop in valid_hop: + a_root = np.zeros((self.num_node, self.num_node)) + a_close = np.zeros((self.num_node, self.num_node)) + a_further = np.zeros((self.num_node, self.num_node)) + for i in range(self.num_node): + for j in range(self.num_node): + if self.hop_dis[j, i] == hop: + if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]: + a_root[j, i] = normalize_adjacency[j, i] + elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]: + a_close[j, i] = normalize_adjacency[j, i] + else: + a_further[j, i] = normalize_adjacency[j, i] + if hop == 0: + A.append(a_root) + else: + A.append(a_root + a_close) + A.append(a_further) + A = np.stack(A) + self.A = A + #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2) + else: + raise ValueError("This strategy is not supported!") + + +def get_hop_distance(num_node, edge, max_hop=1): + A = np.zeros((num_node, num_node)) + for i, j in edge: + A[j, i] = 1 + A[i, j] = 1 + + # compute hop steps + hop_dis = np.zeros((num_node, num_node)) + np.inf + transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)] + arrive_mat = (np.stack(transfer_mat) > 0) + for d in range(max_hop, -1, -1): + hop_dis[arrive_mat[d]] = d + return hop_dis + + +def normalize_digraph(A): + Dl = np.sum(A, 0) + num_node = A.shape[0] + Dn = np.zeros((num_node, num_node)) + for i in range(num_node): + if Dl[i] > 0: + Dn[i, i] = Dl[i]**(-1) + AD = np.dot(A, Dn) + return AD + + +def normalize_undigraph(A): + Dl = np.sum(A, 0) + num_node = A.shape[0] + Dn = np.zeros((num_node, num_node)) + for i in range(num_node): + if Dl[i] > 0: + Dn[i, i] = Dl[i]**(-0.5) + DAD = np.dot(np.dot(Dn, A), Dn) + return DAD diff --git a/StreamServer/src/analytic/action/Actionsrecognition/train.py b/StreamServer/src/analytic/action/Actionsrecognition/train.py new file mode 100644 index 0000000..818fade --- /dev/null +++ b/StreamServer/src/analytic/action/Actionsrecognition/train.py @@ -0,0 +1,216 @@ +import os +import time +import torch +import pickle +import numpy as np +import torch.nn.functional as F +from shutil import copyfile +from tqdm import tqdm +from torch.utils import data +from torch.optim.adadelta import Adadelta +from sklearn.model_selection import train_test_split + +from .Models import * +from Visualizer import plot_graphs, plot_confusion_metrix + + +save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)' + +device = 'cuda' +epochs = 30 +batch_size = 32 + +# DATA FILES. +# Should be in format of +# inputs: (N_samples, time_steps, graph_node, channels), +# labels: (N_samples, num_class) +# and do some of normalizations on it. Default data create from: +# Data.create_dataset_(1-3).py +# where +# time_steps: Number of frame input sequence, Default: 30 +# graph_node: Number of node in skeleton, Default: 14 +# channels: Inputs data (x, y and scores), Default: 3 +# num_class: Number of pose class to train, Default: 7 + +data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl', + '../Data/Home_new-set(labelXscrw).pkl'] +class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', + 'Stand up', 'Sit down', 'Fall Down'] +num_class = len(class_names) + + +def load_dataset(data_files, batch_size, split_size=0): + """Load data files into torch DataLoader with/without spliting train-test. + """ + features, labels = [], [] + for fil in data_files: + with open(fil, 'rb') as f: + fts, lbs = pickle.load(f) + features.append(fts) + labels.append(lbs) + del fts, lbs + features = np.concatenate(features, axis=0) + labels = np.concatenate(labels, axis=0) + + if split_size > 0: + x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size, + random_state=9) + train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2), + torch.tensor(y_train, dtype=torch.float32)) + valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2), + torch.tensor(y_valid, dtype=torch.float32)) + train_loader = data.DataLoader(train_set, batch_size, shuffle=True) + valid_loader = data.DataLoader(valid_set, batch_size) + else: + train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2), + torch.tensor(labels, dtype=torch.float32)) + train_loader = data.DataLoader(train_set, batch_size, shuffle=True) + valid_loader = None + return train_loader, valid_loader + + +def accuracy_batch(y_pred, y_true): + return (y_pred.argmax(1) == y_true.argmax(1)).mean() + + +def set_training(model, mode=True): + for p in model.parameters(): + p.requires_grad = mode + model.train(mode) + return model + + +if __name__ == '__main__': + save_folder = os.path.join(os.path.dirname(__file__), save_folder) + if not os.path.exists(save_folder): + os.makedirs(save_folder) + + # DATA. + train_loader, _ = load_dataset(data_files[0:1], batch_size) + valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2) + + train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]), + batch_size, shuffle=True) + dataloader = {'train': train_loader, 'valid': valid_loader} + del train_loader_ + + # MODEL. + graph_args = {'strategy': 'spatial'} + model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device) + + #optimizer = torch.optim.Adam(model.parameters(), lr=0.001) + optimizer = Adadelta(model.parameters()) + + losser = torch.nn.BCELoss() + + # TRAINING. + loss_list = {'train': [], 'valid': []} + accu_list = {'train': [], 'valid': []} + for e in range(epochs): + print('Epoch {}/{}'.format(e, epochs - 1)) + for phase in ['train', 'valid']: + if phase == 'train': + model = set_training(model, True) + else: + model = set_training(model, False) + + run_loss = 0.0 + run_accu = 0.0 + with tqdm(dataloader[phase], desc=phase) as iterator: + for pts, lbs in iterator: + # Create motion input by distance of points (x, y) of the same node + # in two frames. + mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] + + mot = mot.to(device) + pts = pts.to(device) + lbs = lbs.to(device) + + # Forward. + out = model((pts, mot)) + loss = losser(out, lbs) + + if phase == 'train': + # Backward. + model.zero_grad() + loss.backward() + optimizer.step() + + run_loss += loss.item() + accu = accuracy_batch(out.detach().cpu().numpy(), + lbs.detach().cpu().numpy()) + run_accu += accu + + iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format( + loss.item(), accu)) + iterator.update() + #break + loss_list[phase].append(run_loss / len(iterator)) + accu_list[phase].append(run_accu / len(iterator)) + #break + + print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:' + ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1], + loss_list['valid'][-1], accu_list['valid'][-1])) + + # SAVE. + torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth')) + + plot_graphs(list(loss_list.values()), list(loss_list.keys()), + 'Last Train: {:.2f}, Valid: {:.2f}'.format( + loss_list['train'][-1], loss_list['valid'][-1] + ), 'Loss', xlim=[0, epochs], + save=os.path.join(save_folder, 'loss_graph.png')) + plot_graphs(list(accu_list.values()), list(accu_list.keys()), + 'Last Train: {:.2f}, Valid: {:.2f}'.format( + accu_list['train'][-1], accu_list['valid'][-1] + ), 'Accu', xlim=[0, epochs], + save=os.path.join(save_folder, 'accu_graph.png')) + + #break + + del train_loader, valid_loader + + model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth'))) + + # EVALUATION. + model = set_training(model, False) + data_file = data_files[1] + eval_loader, _ = load_dataset([data_file], 32) + + print('Evaluation.') + run_loss = 0.0 + run_accu = 0.0 + y_preds = [] + y_trues = [] + with tqdm(eval_loader, desc='eval') as iterator: + for pts, lbs in iterator: + mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] + mot = mot.to(device) + pts = pts.to(device) + lbs = lbs.to(device) + + out = model((pts, mot)) + loss = losser(out, lbs) + + run_loss += loss.item() + accu = accuracy_batch(out.detach().cpu().numpy(), + lbs.detach().cpu().numpy()) + run_accu += accu + + y_preds.extend(out.argmax(1).detach().cpu().numpy()) + y_trues.extend(lbs.argmax(1).cpu().numpy()) + + iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format( + loss.item(), accu)) + iterator.update() + + run_loss = run_loss / len(iterator) + run_accu = run_accu / len(iterator) + + plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format( + os.path.basename(data_file), run_loss, run_accu + ), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format( + os.path.basename(data_file).split('.')[0]))) + + print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu)) diff --git a/StreamServer/src/analytic/action/CameraLoader.py b/StreamServer/src/analytic/action/CameraLoader.py new file mode 100644 index 0000000..cde320a --- /dev/null +++ b/StreamServer/src/analytic/action/CameraLoader.py @@ -0,0 +1,204 @@ +import os +import cv2 +import time +import torch +import numpy as np + +from queue import Queue +from threading import Thread, Lock + + +class CamLoader: + """Use threading to capture a frame from camera for faster frame load. + Recommend for camera or webcam. + + Args: + camera: (int, str) Source of camera or video., + preprocess: (Callable function) to process the frame before return. + """ + def __init__(self, camera, preprocess=None, ori_return=False): + self.stream = cv2.VideoCapture(camera) + assert self.stream.isOpened(), 'Cannot read camera source!' + self.fps = self.stream.get(cv2.CAP_PROP_FPS) + self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + self.stopped = False + self.ret = False + self.frame = None + self.ori_frame = None + self.read_lock = Lock() + self.ori = ori_return + + self.preprocess_fn = preprocess + + def start(self): + self.t = Thread(target=self.update, args=()) # , daemon=True) + self.t.start() + c = 0 + while not self.ret: + time.sleep(0.1) + c += 1 + if c > 20: + self.stop() + raise TimeoutError('Can not get a frame from camera!!!') + return self + + def update(self): + while not self.stopped: + ret, frame = self.stream.read() + self.read_lock.acquire() + self.ori_frame = frame.copy() + if ret and self.preprocess_fn is not None: + frame = self.preprocess_fn(frame) + + self.ret, self.frame = ret, frame + self.read_lock.release() + + def grabbed(self): + """Return `True` if can read a frame.""" + return self.ret + + def getitem(self): + self.read_lock.acquire() + frame = self.frame.copy() + ori_frame = self.ori_frame.copy() + self.read_lock.release() + if self.ori: + return frame, ori_frame + else: + return frame + + def stop(self): + if self.stopped: + return + self.stopped = True + if self.t.is_alive(): + self.t.join() + self.stream.release() + + def __del__(self): + if self.stream.isOpened(): + self.stream.release() + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.stream.isOpened(): + self.stream.release() + + +class CamLoader_Q: + """Use threading and queue to capture a frame and store to queue for pickup in sequence. + Recommend for video file. + + Args: + camera: (int, str) Source of camera or video., + batch_size: (int) Number of batch frame to store in queue. Default: 1, + queue_size: (int) Maximum queue size. Default: 256, + preprocess: (Callable function) to process the frame before return. + """ + def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None): + self.stream = cv2.VideoCapture(camera) + assert self.stream.isOpened(), 'Cannot read camera source!' + self.fps = self.stream.get(cv2.CAP_PROP_FPS) + self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + # Queue for storing each frames. + + self.stopped = False + self.batch_size = batch_size + self.Q = Queue(maxsize=queue_size) + + self.preprocess_fn = preprocess + + def start(self): + t = Thread(target=self.update, args=(), daemon=True).start() + c = 0 + while not self.grabbed(): + time.sleep(0.1) + c += 1 + if c > 20: + self.stop() + raise TimeoutError('Can not get a frame from camera!!!') + return self + + def update(self): + while not self.stopped: + if not self.Q.full(): + frames = [] + for k in range(self.batch_size): + ret, frame = self.stream.read() + if not ret: + self.stop() + return + + if self.preprocess_fn is not None: + frame = self.preprocess_fn(frame) + + frames.append(frame) + frames = np.stack(frames) + self.Q.put(frames) + else: + with self.Q.mutex: + self.Q.queue.clear() + # time.sleep(0.05) + + def grabbed(self): + """Return `True` if can read a frame.""" + return self.Q.qsize() > 0 + + def getitem(self): + return self.Q.get().squeeze() + + def stop(self): + if self.stopped: + return + self.stopped = True + self.stream.release() + + def __len__(self): + return self.Q.qsize() + + def __del__(self): + if self.stream.isOpened(): + self.stream.release() + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.stream.isOpened(): + self.stream.release() + + +if __name__ == '__main__': + fps_time = 0 + + # Using threading. + cam = CamLoader(0).start() + while cam.grabbed(): + frames = cam.getitem() + + frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + fps_time = time.time() + cv2.imshow('frame', frames) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + cam.stop() + cv2.destroyAllWindows() + + # Normal video capture. + """cam = cv2.VideoCapture(0) + while True: + ret, frame = cam.read() + if ret: + #time.sleep(0.05) + #frame = (cv2.flip(frame, 1) / 255.).astype(np.float) + + frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + fps_time = time.time() + cv2.imshow('frame', frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + cam.release() + cv2.destroyAllWindows()""" \ No newline at end of file diff --git a/StreamServer/src/analytic/action/Data/create_dataset_1.py b/StreamServer/src/analytic/action/Data/create_dataset_1.py new file mode 100644 index 0000000..b1b79d8 --- /dev/null +++ b/StreamServer/src/analytic/action/Data/create_dataset_1.py @@ -0,0 +1,85 @@ +""" +This script to create .csv videos frames action annotation file. + +- It will play a video frame by frame control the flow by [a] and [d] + to play previos or next frame. +- Open the annot_file (.csv) and label each frame of video with number + of action class. +""" + +import os +import cv2 +import time +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', + 'Stand up', 'Sit down', 'Fall Down'] # label. + +video_folder = '../Data/falldata/Home/Videos' +annot_file = '../Data/Home_new.csv' + +index_video_to_play = 0 # Choose video to play. + + +def create_csv(folder): + list_file = sorted(os.listdir(folder)) + cols = ['video', 'frame', 'label'] + df = pd.DataFrame(columns=cols) + for fil in list_file: + cap = cv2.VideoCapture(os.path.join(folder, fil)) + frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + video = np.array([fil] * frames_count) + frame = np.arange(1, frames_count + 1) + label = np.array([0] * frames_count) + rows = np.stack([video, frame, label], axis=1) + df = df.append(pd.DataFrame(rows, columns=cols), + ignore_index=True) + cap.release() + df.to_csv(annot_file, index=False) + + +if not os.path.exists(annot_file): + create_csv(video_folder) + +annot = pd.read_csv(annot_file) +video_list = annot.iloc[:, 0].unique() +video_file = os.path.join(video_folder, video_list[index_video_to_play]) +print(os.path.basename(video_file)) + +annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True) +frames_idx = annot.iloc[:, 1].tolist() + +cap = cv2.VideoCapture(video_file) +frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + +assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format( + len(frames_idx), frames_count +) + +i = 0 +while True: + cap.set(cv2.CAP_PROP_POS_FRAMES, i) + ret, frame = cap.read() + if ret: + cls_name = class_names[int(annot.iloc[i, -1]) - 1] + frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5) + frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + cv2.imshow('frame', frame) + + key = cv2.waitKey(0) & 0xFF + if key == ord('q'): + break + elif key == ord('d'): + i += 1 + continue + elif key == ord('a'): + i -= 1 + continue + else: + break + +cap.release() +cv2.destroyAllWindows() diff --git a/StreamServer/src/analytic/action/Data/create_dataset_2.py b/StreamServer/src/analytic/action/Data/create_dataset_2.py new file mode 100644 index 0000000..1bf1684 --- /dev/null +++ b/StreamServer/src/analytic/action/Data/create_dataset_2.py @@ -0,0 +1,137 @@ +""" +This script to extract skeleton joints position and score. + +- This 'annot_folder' is a action class and bounding box for each frames that came with dataset. + Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax] + Use for crop a person to use in pose estimation model. +- If have no annotation file you can leave annot_folder = '' for use Detector model to get the + bounding box. +""" + +import os +import cv2 +import time +import torch +import pandas as pd +import numpy as np +import torchvision.transforms as transforms + +from DetectorLoader import TinyYOLOv3_onecls +from PoseEstimateLoader import SPPE_FastPose +from fn import vis_frame_fast + +save_path = '../../Data/Home_new-pose+score.csv' + +annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py +video_folder = '../Data/falldata/Home/Videos' +annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame. + +# DETECTION MODEL. +detector = TinyYOLOv3_onecls() + +# POSE MODEL. +inp_h = 320 +inp_w = 256 +pose_estimator = SPPE_FastPose(inp_h, inp_w) + +# with score. +columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s', + 'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x', + 'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s', + 'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s', + 'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y', + 'RAnkle_s', 'label'] + + +def normalize_points_with_size(points_xy, width, height, flip=False): + points_xy[:, 0] /= width + points_xy[:, 1] /= height + if flip: + points_xy[:, 0] = 1 - points_xy[:, 0] + return points_xy + + +annot = pd.read_csv(annot_file) +vid_list = annot['video'].unique() +for vid in vid_list: + print(f'Process on: {vid}') + df = pd.DataFrame(columns=columns) + cur_row = 0 + + # Pose Labels. + frames_label = annot[annot['video'] == vid].reset_index(drop=True) + + cap = cv2.VideoCapture(os.path.join(video_folder, vid)) + frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + # Bounding Boxs Labels. + annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt') + annot = None + if os.path.exists(annot_file): + annot = pd.read_csv(annot_file, header=None, + names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax']) + annot = annot.dropna().reset_index(drop=True) + + assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot)) + + fps_time = 0 + i = 1 + while True: + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + cls_idx = int(frames_label[frames_label['frame'] == i]['label']) + + if annot: + bb = np.array(annot.iloc[i-1, 2:].astype(int)) + else: + bb = detector.detect(frame)[0, :4].numpy().astype(int) + bb[:2] = np.maximum(0, bb[:2] - 5) + bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:] + + result = [] + if bb.any() != 0: + result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]), + torch.tensor([[1.0]])) + + if len(result) > 0: + pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(), + frame_size[0], frame_size[1]) + pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1) + + #idx = result[0]['kp_score'] <= 0.05 + #pt_norm[idx.squeeze()] = np.nan + row = [vid, i, *pt_norm.flatten().tolist(), cls_idx] + scr = result[0]['kp_score'].mean() + else: + row = [vid, i, *[np.nan] * (13 * 3), cls_idx] + scr = 0.0 + + df.loc[cur_row] = row + cur_row += 1 + + # VISUALIZE. + frame = vis_frame_fast(frame, result) + frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2) + frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + frame = frame[:, :, ::-1] + fps_time = time.time() + i += 1 + + cv2.imshow('frame', frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + + cap.release() + cv2.destroyAllWindows() + + if os.path.exists(save_path): + df.to_csv(save_path, mode='a', header=False, index=False) + else: + df.to_csv(save_path, mode='w', index=False) + diff --git a/StreamServer/src/analytic/action/Data/create_dataset_3.py b/StreamServer/src/analytic/action/Data/create_dataset_3.py new file mode 100644 index 0000000..a767bb8 --- /dev/null +++ b/StreamServer/src/analytic/action/Data/create_dataset_3.py @@ -0,0 +1,127 @@ +""" +This script to create dataset and labels by clean off some NaN, do a normalization, +label smoothing and label weights by scores. + +""" +import os +import pickle +import numpy as np +import pandas as pd + + +class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', + 'Stand up', 'Sit down', 'Fall Down'] +main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y', + 'RHip_x', 'RHip_y'] +main_idx_parts = [1, 2, 7, 8, -1] # 1.5 + +csv_pose_file = '../Data/Coffee_room_new-pose+score.csv' +save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl' + +# Params. +smooth_labels_step = 8 +n_frames = 30 +skip_frame = 1 + +annot = pd.read_csv(csv_pose_file) + +# Remove NaN. +idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0 +idx = np.where(idx)[0] +annot = annot.drop(idx) +# One-Hot Labels. +label_onehot = pd.get_dummies(annot['label']) +annot = annot.drop('label', axis=1).join(label_onehot) +cols = label_onehot.columns.values + + +def scale_pose(xy): + """ + Normalize pose points by scale with max/min value of each pose. + xy : (frames, parts, xy) or (parts, xy) + """ + if xy.ndim == 2: + xy = np.expand_dims(xy, 0) + xy_min = np.nanmin(xy, axis=1) + xy_max = np.nanmax(xy, axis=1) + for i in range(xy.shape[0]): + xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1 + return xy.squeeze() + + +def seq_label_smoothing(labels, max_step=10): + steps = 0 + remain_step = 0 + target_label = 0 + active_label = 0 + start_change = 0 + max_val = np.max(labels) + min_val = np.min(labels) + for i in range(labels.shape[0]): + if remain_step > 0: + if i >= start_change: + labels[i][active_label] = max_val * remain_step / steps + labels[i][target_label] = max_val * (steps - remain_step) / steps \ + if max_val * (steps - remain_step) / steps else min_val + remain_step -= 1 + continue + + diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0] + if len(diff_index) > 0: + start_change = i + remain_step // 2 + steps = diff_index[0] + remain_step = steps + target_label = np.argmax(labels[i + remain_step]) + active_label = np.argmax(labels[i]) + return labels + + +feature_set = np.empty((0, n_frames, 14, 3)) +labels_set = np.empty((0, len(cols))) +vid_list = annot['video'].unique() +for vid in vid_list: + print(f'Process on: {vid}') + data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video') + + # Label Smoothing. + esp = 0.1 + data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1) + data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step) + + # Separate continuous frames. + frames = data['frame'].values + frames_set = [] + fs = [0] + for i in range(1, len(frames)): + if frames[i] < frames[i-1] + 10: + fs.append(i) + else: + frames_set.append(fs) + fs = [i] + frames_set.append(fs) + + for fs in frames_set: + xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3) + # Scale pose normalize. + xys[:, :, :2] = scale_pose(xys[:, :, :2]) + # Add center point. + xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1) + + # Weighting main parts score. + scr = xys[:, :, -1].copy() + scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0) + # Mean score. + scr = scr.mean(1) + + # Targets. + lb = data.iloc[fs, -len(cols):].values + # Apply points score mean to all labels. + lb = lb * scr[:, None] + + for i in range(xys.shape[0] - n_frames): + feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0) + labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0) + + +"""with open(save_path, 'wb') as f: + pickle.dump((feature_set, labels_set), f)""" diff --git a/StreamServer/src/analytic/action/Detection/Models.py b/StreamServer/src/analytic/action/Detection/Models.py new file mode 100644 index 0000000..d12d5df --- /dev/null +++ b/StreamServer/src/analytic/action/Detection/Models.py @@ -0,0 +1,348 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from .Utils import build_targets, to_cpu, parse_model_config + + +def create_modules(module_defs): + """ + Constructs module list of layer blocks from module configuration in module_defs + """ + hyperparams = module_defs.pop(0) + output_filters = [int(hyperparams["channels"])] # [3] + module_list = nn.ModuleList() + for module_i, module_def in enumerate(module_defs): + modules = nn.Sequential() + + if module_def["type"] == "convolutional": + bn = int(module_def["batch_normalize"]) + filters = int(module_def["filters"]) + kernel_size = int(module_def["size"]) + pad = (kernel_size - 1) // 2 + modules.add_module( + f"conv_{module_i}", + nn.Conv2d( + in_channels=output_filters[-1], + out_channels=filters, + kernel_size=kernel_size, + stride=int(module_def["stride"]), + padding=pad, + bias=not bn, + ), + ) + if bn: + modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) + if module_def["activation"] == "leaky": + modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) + + elif module_def["type"] == "maxpool": + kernel_size = int(module_def["size"]) + stride = int(module_def["stride"]) + if kernel_size == 2 and stride == 1: + modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) + maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) + modules.add_module(f"maxpool_{module_i}", maxpool) + + elif module_def["type"] == "upsample": + upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") + modules.add_module(f"upsample_{module_i}", upsample) + + elif module_def["type"] == "route": + layers = [int(x) for x in module_def["layers"].split(",")] + filters = sum([output_filters[1:][i] for i in layers]) + modules.add_module(f"route_{module_i}", EmptyLayer()) + + elif module_def["type"] == "shortcut": + filters = output_filters[1:][int(module_def["from"])] + modules.add_module(f"shortcut_{module_i}", EmptyLayer()) + + elif module_def["type"] == "yolo": + anchor_idxs = [int(x) for x in module_def["mask"].split(",")] + # Extract anchors + anchors = [int(x) for x in module_def["anchors"].split(",")] + anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] + anchors = [anchors[i] for i in anchor_idxs] + num_classes = int(module_def["classes"]) + img_size = int(hyperparams["height"]) + # Define detection layer + yolo_layer = YOLOLayer(anchors, num_classes, img_size) + modules.add_module(f"yolo_{module_i}", yolo_layer) + # Register module list and number of output filters + module_list.append(modules) + output_filters.append(filters) + + return hyperparams, module_list + + +class Upsample(nn.Module): + """ nn.Upsample is deprecated """ + def __init__(self, scale_factor, mode="nearest"): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + return x + + +class EmptyLayer(nn.Module): + """Placeholder for 'route' and 'shortcut' layers""" + def __init__(self): + super(EmptyLayer, self).__init__() + + +class YOLOLayer(nn.Module): + """Detection layer""" + def __init__(self, anchors, num_classes, img_dim=416): + super(YOLOLayer, self).__init__() + self.anchors = anchors + self.num_anchors = len(anchors) + self.num_classes = num_classes + self.ignore_thres = 0.5 + self.mse_loss = nn.MSELoss() + self.bce_loss = nn.BCELoss() + self.obj_scale = 1 + self.noobj_scale = 100 + self.metrics = {} + self.img_dim = img_dim + self.grid_size = 0 # grid size + + def compute_grid_offsets(self, grid_size, cuda=True): + self.grid_size = grid_size + g = self.grid_size + FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor + self.stride = self.img_dim / self.grid_size + # Calculate offsets for each grid + self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor) + self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor) + self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]) + self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) + self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) + + def forward(self, x, targets=None, img_dim=None): + # Tensors for cuda support + FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor + LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor + ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor + + self.img_dim = img_dim + num_samples = x.size(0) + grid_size = x.size(2) + + prediction = ( + x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) + .permute(0, 1, 3, 4, 2) + .contiguous() + ) + + # Get outputs + x = torch.sigmoid(prediction[..., 0]) # Center x + y = torch.sigmoid(prediction[..., 1]) # Center y + w = prediction[..., 2] # Width + h = prediction[..., 3] # Height + pred_conf = torch.sigmoid(prediction[..., 4]) # Conf + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. + + # If grid size does not match current we compute new offsets + if grid_size != self.grid_size: + self.compute_grid_offsets(grid_size, cuda=x.is_cuda) + + # Add offset and scale with anchors + pred_boxes = FloatTensor(prediction[..., :4].shape) + pred_boxes[..., 0] = x.data + self.grid_x + pred_boxes[..., 1] = y.data + self.grid_y + pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w + pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h + + output = torch.cat( + ( + pred_boxes.view(num_samples, -1, 4) * self.stride, + pred_conf.view(num_samples, -1, 1), + pred_cls.view(num_samples, -1, self.num_classes), + ), + -1, + ) + + if targets is None: + return output, 0 + else: + iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( + pred_boxes=pred_boxes, + pred_cls=pred_cls, + target=targets, + anchors=self.scaled_anchors, + ignore_thres=self.ignore_thres, + ) + + # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) + loss_x = self.mse_loss(x[obj_mask.bool()], tx[obj_mask.bool()]) + loss_y = self.mse_loss(y[obj_mask.bool()], ty[obj_mask.bool()]) + loss_w = self.mse_loss(w[obj_mask.bool()], tw[obj_mask.bool()]) + loss_h = self.mse_loss(h[obj_mask.bool()], th[obj_mask.bool()]) + loss_conf_obj = self.bce_loss(pred_conf[obj_mask.bool()], tconf[obj_mask.bool()]) + loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask.bool()], tconf[noobj_mask.bool()]) + loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj + loss_cls = self.bce_loss(pred_cls[obj_mask.bool()], tcls[obj_mask.bool()]) + total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls + + # Metrics + cls_acc = 100 * class_mask[obj_mask.bool()].mean() + conf_obj = pred_conf[obj_mask.bool()].mean() + conf_noobj = pred_conf[noobj_mask.bool()].mean() + conf50 = (pred_conf > 0.5).float() + iou50 = (iou_scores > 0.5).float() + iou75 = (iou_scores > 0.75).float() + detected_mask = conf50 * class_mask * tconf + precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) + recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) + recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) + + self.metrics = { + "loss": to_cpu(total_loss).item(), + "x": to_cpu(loss_x).item(), + "y": to_cpu(loss_y).item(), + "w": to_cpu(loss_w).item(), + "h": to_cpu(loss_h).item(), + "conf": to_cpu(loss_conf).item(), + "cls": to_cpu(loss_cls).item(), + "cls_acc": to_cpu(cls_acc).item(), + "recall50": to_cpu(recall50).item(), + "recall75": to_cpu(recall75).item(), + "precision": to_cpu(precision).item(), + "conf_obj": to_cpu(conf_obj).item(), + "conf_noobj": to_cpu(conf_noobj).item(), + "grid_size": grid_size, + } + + return output, total_loss + + +class Darknet(nn.Module): + """YOLOv3 object detection model""" + def __init__(self, config_path, img_size=416): + super(Darknet, self).__init__() + self.module_defs = parse_model_config(config_path) + self.hyperparams, self.module_list = create_modules(self.module_defs) + self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")] + self.img_size = img_size + self.seen = 0 + self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32) + + def forward(self, x, targets=None): + img_dim = x.shape[2] + loss = 0 + layer_outputs, yolo_outputs = [], [] + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): + if module_def["type"] in ["convolutional", "upsample", "maxpool"]: + x = module(x) + elif module_def["type"] == "route": + x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1) + elif module_def["type"] == "shortcut": + layer_i = int(module_def["from"]) + x = layer_outputs[-1] + layer_outputs[layer_i] + elif module_def["type"] == "yolo": + x, layer_loss = module[0](x, targets, img_dim) + loss += layer_loss + yolo_outputs.append(x) + layer_outputs.append(x) + yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) + return yolo_outputs if targets is None else (loss, yolo_outputs) + + def load_darknet_weights(self, weights_path): + """Parses and loads the weights stored in 'weights_path'""" + # Open the weights file + with open(weights_path, "rb") as f: + header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values + self.header_info = header # Needed to write header when saving weights + self.seen = header[3] # number of images seen during training + weights = np.fromfile(f, dtype=np.float32) # The rest are weights + + # Establish cutoff for loading backbone weights + cutoff = None + if "darknet53.conv.74" in weights_path: + cutoff = 75 + + ptr = 0 + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): + if i == cutoff: + break + if module_def["type"] == "convolutional": + conv_layer = module[0] + if module_def["batch_normalize"]: + # Load BN bias, weights, running mean and running variance + bn_layer = module[1] + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.bias) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.weight) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(conv_layer.bias) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv_layer.weight) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + + def save_darknet_weights(self, path, cutoff=-1): + """ + @:param path - path of the new weights file + @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) + """ + fp = open(path, "wb") + self.header_info[3] = self.seen + self.header_info.tofile(fp) + + # Iterate through layers + for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if module_def["type"] == "convolutional": + conv_layer = module[0] + # If batch norm, load bn first + if module_def["batch_normalize"]: + bn_layer = module[1] + bn_layer.bias.data.cpu().numpy().tofile(fp) + bn_layer.weight.data.cpu().numpy().tofile(fp) + bn_layer.running_mean.data.cpu().numpy().tofile(fp) + bn_layer.running_var.data.cpu().numpy().tofile(fp) + # Load conv bias + else: + conv_layer.bias.data.cpu().numpy().tofile(fp) + # Load conv weights + conv_layer.weight.data.cpu().numpy().tofile(fp) + + fp.close() + + def load_pretrain_to_custom_class(self, weights_pth_path): + state = torch.load(weights_pth_path) + + own_state = self.state_dict() + for name, param in state.items(): + if name not in own_state: + print(f'Model does not have this param: {name}!') + continue + + if param.shape != own_state[name].shape: + print(f'Do not load this param: {name} cause it shape not equal! : ' + f'{param.shape} into {own_state[name].shape}') + continue + + own_state[name].copy_(param) diff --git a/StreamServer/src/analytic/action/Detection/Utils.py b/StreamServer/src/analytic/action/Detection/Utils.py new file mode 100644 index 0000000..5e9472a --- /dev/null +++ b/StreamServer/src/analytic/action/Detection/Utils.py @@ -0,0 +1,415 @@ +import cv2 +import math +import time +import tqdm +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from torch.utils.data import DataLoader + + +def to_cpu(tensor): + return tensor.detach().cpu() + + +def load_classes(path): + """ + Loads class labels at 'path' + """ + fp = open(path, "r") + names = fp.read().split("\n")[:-1] + return names + + +def weights_init_normal(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + torch.nn.init.normal_(m.weight.data, 0.0, 0.02) + elif classname.find("BatchNorm2d") != -1: + torch.nn.init.normal_(m.weight.data, 1.0, 0.02) + torch.nn.init.constant_(m.bias.data, 0.0) + + +def rescale_boxes(boxes, current_dim, original_shape): + """ Rescales bounding boxes to the original shape """ + orig_h, orig_w = original_shape + # The amount of padding that was added + pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) + pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) + # Image height and width after padding is removed + unpad_h = current_dim - pad_y + unpad_w = current_dim - pad_x + # Rescale bounding boxes to dimension of original image + boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h + boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w + boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h + return boxes + + +def xywh2xyxy(x): + y = x.new(x.shape) + y[..., 0] = x[..., 0] - x[..., 2] / 2 + y[..., 1] = x[..., 1] - x[..., 3] / 2 + y[..., 2] = x[..., 0] + x[..., 2] / 2 + y[..., 3] = x[..., 1] + x[..., 3] / 2 + return y + + +def ap_per_class(tp, conf, pred_cls, target_cls): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (list). + conf: Objectness value from 0-1 (list). + pred_cls: Predicted object classes (list). + target_cls: True object classes (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + ap, p, r = [], [], [] + for c in tqdm.tqdm(unique_classes, desc="Computing AP"): + i = pred_cls == c + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects + + if n_p == 0 and n_gt == 0: + continue + elif n_p == 0 or n_gt == 0: + ap.append(0) + r.append(0) + p.append(0) + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() + + # Recall + recall_curve = tpc / (n_gt + 1e-16) + r.append(recall_curve[-1]) + + # Precision + precision_curve = tpc / (tpc + fpc) + p.append(precision_curve[-1]) + + # AP from recall-precision curve + ap.append(compute_ap(recall_curve, precision_curve)) + + # Compute F1 score (harmonic mean of precision and recall) + p, r, ap = np.array(p), np.array(r), np.array(ap) + f1 = 2 * p * r / (p + r + 1e-16) + + return p, r, ap, f1, unique_classes.astype("int32") + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Code originally from https://github.com/rbgirshick/py-faster-rcnn. + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([0.0], precision, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def get_batch_statistics(outputs, targets, iou_threshold): + """ Compute true positives, predicted scores and predicted labels per sample """ + batch_metrics = [] + for sample_i in range(len(outputs)): + + if outputs[sample_i] is None: + continue + + output = outputs[sample_i] + pred_boxes = output[:, :4] + pred_scores = output[:, 4] + pred_labels = output[:, -1] + + true_positives = np.zeros(pred_boxes.shape[0]) + + annotations = targets[targets[:, 0] == sample_i][:, 1:] + target_labels = annotations[:, 0] if len(annotations) else [] + if len(annotations): + detected_boxes = [] + target_boxes = annotations[:, 1:] + + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): + + # If targets are found break + if len(detected_boxes) == len(annotations): + break + + # Ignore if label is not one of the target labels + if pred_label not in target_labels: + continue + + iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) + if iou >= iou_threshold and box_index not in detected_boxes: + true_positives[pred_i] = 1 + detected_boxes += [box_index] + batch_metrics.append([true_positives, pred_scores, pred_labels]) + return batch_metrics + + +def bbox_wh_iou(wh1, wh2): + wh2 = wh2.t() + w1, h1 = wh1[0], wh1[1] + w2, h2 = wh2[0], wh2[1] + inter_area = torch.min(w1, w2) * torch.min(h1, h2) + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area + return inter_area / union_area + + +def bbox_iou(box1, box2, x1y1x2y2=True): + """ + Returns the IoU of two bounding boxes + """ + if not x1y1x2y2: + # Transform from center and width to exact coordinates + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 + else: + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] + + # get the corrdinates of the intersection rectangle + inter_rect_x1 = torch.max(b1_x1, b2_x1) + inter_rect_y1 = torch.max(b1_y1, b2_y1) + inter_rect_x2 = torch.min(b1_x2, b2_x2) + inter_rect_y2 = torch.min(b1_y2, b2_y2) + # Intersection area + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( + inter_rect_y2 - inter_rect_y1 + 1, min=0 + ) + # Union Area + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) + + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) + + return iou + + +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' and performs + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x1, y1, x2, y2, object_conf, class_score, class_pred) + """ + # From (center x, center y, width, height) to (x1, y1, x2, y2) + prediction[..., :4] = xywh2xyxy(prediction[..., :4]) + output = [None for _ in range(len(prediction))] + for image_i, image_pred in enumerate(prediction): + # Filter out confidence scores below threshold + image_pred = image_pred[image_pred[:, 4] >= conf_thres] + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Object confidence times class confidence + score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] + # Sort by it + image_pred = image_pred[(-score).argsort()] + class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) + detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) + # Perform non-maximum suppression + keep_boxes = [] + while detections.size(0): + large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres + label_match = detections[0, -1] == detections[:, -1] + # Indices of boxes with lower confidence scores, large IOUs and matching labels + invalid = large_overlap & label_match + weights = detections[invalid, 4:5] + # Merge overlapping bboxes by order of confidence + detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() + keep_boxes += [detections[0]] + detections = detections[~invalid] + if keep_boxes: + output[image_i] = torch.stack(keep_boxes) + + return output + + +def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres): + ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor + FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor + + nB = pred_boxes.size(0) + nA = pred_boxes.size(1) + nC = pred_cls.size(-1) + nG = pred_boxes.size(2) + + # Output tensors + obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) + noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) + class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) + iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) + tx = FloatTensor(nB, nA, nG, nG).fill_(0) + ty = FloatTensor(nB, nA, nG, nG).fill_(0) + tw = FloatTensor(nB, nA, nG, nG).fill_(0) + th = FloatTensor(nB, nA, nG, nG).fill_(0) + tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0) + + # Convert to position relative to box + target_boxes = target[:, 2:6] * nG + gxy = target_boxes[:, :2] + gwh = target_boxes[:, 2:] + # Get anchors with best iou + ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) + best_ious, best_n = ious.max(0) + # Separate target values + b, target_labels = target[:, :2].long().t() + gx, gy = gxy.t() + gw, gh = gwh.t() + gi, gj = gxy.long().t() + # Set masks + obj_mask[b, best_n, gj, gi] = 1 + noobj_mask[b, best_n, gj, gi] = 0 + + # Set noobj mask to zero where iou exceeds ignore threshold + for i, anchor_ious in enumerate(ious.t()): + noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 + + # Coordinates + tx[b, best_n, gj, gi] = gx - gx.floor() + ty[b, best_n, gj, gi] = gy - gy.floor() + # Width and height + tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) + th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) + # One-hot encoding of label + tcls[b, best_n, gj, gi, target_labels] = 1 + # Compute label correctness and iou at best anchor + class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() + iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) + + tconf = obj_mask.float() + return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf + + +def parse_model_config(path): + """Parses the yolo-v3 layer configuration file and returns module definitions""" + file = open(path, 'r') + lines = file.read().split('\n') + lines = [x for x in lines if x and not x.startswith('#')] + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + module_defs = [] + for line in lines: + if line.startswith('['): # This marks the start of a new block + module_defs.append({}) + module_defs[-1]['type'] = line[1:-1].rstrip() + if module_defs[-1]['type'] == 'convolutional': + module_defs[-1]['batch_normalize'] = 0 + else: + key, value = line.split("=") + value = value.strip() + module_defs[-1][key.rstrip()] = value.strip() + + return module_defs + + +def parse_data_config(path): + """Parses the data configuration file""" + options = dict() + options['gpus'] = '0,1,2,3' + options['num_workers'] = '10' + with open(path, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.strip() + if line == '' or line.startswith('#'): + continue + key, value = line.split('=') + options[key.strip()] = value.strip() + return options + + +def ResizePadding(height, width): + desized_size = (height, width) + + def resizePadding(image, **kwargs): + old_size = image.shape[:2] + max_size_idx = old_size.index(max(old_size)) + ratio = float(desized_size[max_size_idx]) / max(old_size) + new_size = tuple([int(x * ratio) for x in old_size]) + + if new_size > desized_size: + min_size_idx = old_size.index(min(old_size)) + ratio = float(desized_size[min_size_idx]) / min(old_size) + new_size = tuple([int(x * ratio) for x in old_size]) + + image = cv2.resize(image, (new_size[1], new_size[0])) + delta_w = desized_size[1] - new_size[1] + delta_h = desized_size[0] - new_size[0] + top, bottom = delta_h // 2, delta_h - (delta_h // 2) + left, right = delta_w // 2, delta_w - (delta_w // 2) + + image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT) + return image + return resizePadding + + +class AverageValueMeter(object): + def __init__(self): + self.reset() + self.val = 0 + + def add(self, value, n=1): + self.val = value + self.sum += value + self.var += value * value + self.n += n + + if self.n == 0: + self.mean, self.std = np.nan, np.nan + elif self.n == 1: + self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy + self.std = np.inf + self.mean_old = self.mean + self.m_s = 0.0 + else: + self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n) + self.m_s += (value - self.mean_old) * (value - self.mean) + self.mean_old = self.mean + self.std = np.sqrt(self.m_s / (self.n - 1.0)) + + def value(self): + return self.mean, self.std + + def reset(self): + self.n = 0 + self.sum = 0.0 + self.var = 0.0 + self.val = 0.0 + self.mean = np.nan + self.mean_old = 0.0 + self.m_s = 0.0 + self.std = np.nan diff --git a/StreamServer/src/analytic/action/DetectorLoader.py b/StreamServer/src/analytic/action/DetectorLoader.py new file mode 100644 index 0000000..6c06bdb --- /dev/null +++ b/StreamServer/src/analytic/action/DetectorLoader.py @@ -0,0 +1,117 @@ +import time +import torch +import numpy as np +import torchvision.transforms as transforms + +from queue import Queue +from threading import Thread + +from .Detection.Models import Darknet +from .Detection.Utils import non_max_suppression, ResizePadding + + +class TinyYOLOv3_onecls(object): + """Load trained Tiny-YOLOv3 one class (person) detection model. + Args: + input_size: (int) Size of input image must be divisible by 32. Default: 416, + config_file: (str) Path to Yolo model structure config file., + weight_file: (str) Path to trained weights file., + nms: (float) Non-Maximum Suppression overlap threshold., + conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off., + device: (str) Device to load the model on 'cpu' or 'cuda'. + """ + def __init__(self, + input_size=416, + config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg', + weight_file='Models/yolo-tiny-onecls/best-model.pth', + nms=0.2, + conf_thres=0.45, + device='cuda'): + self.input_size = input_size + self.model = Darknet(config_file).to(device) + self.model.load_state_dict(torch.load(weight_file)) + self.model.eval() + self.device = device + + self.nms = nms + self.conf_thres = conf_thres + + self.resize_fn = ResizePadding(input_size, input_size) + self.transf_fn = transforms.ToTensor() + + def detect(self, image, need_resize=True, expand_bb=5): + """Feed forward to the model. + Args: + image: (numpy array) Single RGB image to detect., + need_resize: (bool) Resize to input_size before feed and will return bboxs + with scale to image original size., + expand_bb: (int) Expand boundary of the boxs. + Returns: + (torch.float32) Of each detected object contain a + [top, left, bottom, right, bbox_score, class_score, class] + return `None` if no detected. + """ + image_size = (self.input_size, self.input_size) + if need_resize: + image_size = image.shape[:2] + image = self.resize_fn(image) + + image = self.transf_fn(image)[None, ...] + scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0] + + detected = self.model(image.to(self.device)) + detected = non_max_suppression(detected, self.conf_thres, self.nms)[0] + if detected is not None: + detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2 + detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2 + detected[:, 0:4] /= scf + + detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb) + detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb) + + return detected + + +class ThreadDetection(object): + def __init__(self, + dataloader, + model, + queue_size=256): + self.model = model + + self.dataloader = dataloader + self.stopped = False + self.Q = Queue(maxsize=queue_size) + + def start(self): + t = Thread(target=self.update, args=(), daemon=True).start() + return self + + def update(self): + while True: + if self.stopped: + return + + images = self.dataloader.getitem() + + outputs = self.model.detect(images) + + if self.Q.full(): + time.sleep(2) + self.Q.put((images, outputs)) + + def getitem(self): + return self.Q.get() + + def stop(self): + self.stopped = True + + def __len__(self): + return self.Q.qsize() + + + + + + + diff --git a/StreamServer/src/analytic/action/Models/TSSTG/_.txt b/StreamServer/src/analytic/action/Models/TSSTG/_.txt new file mode 100644 index 0000000..4ff1ded --- /dev/null +++ b/StreamServer/src/analytic/action/Models/TSSTG/_.txt @@ -0,0 +1 @@ +tsstg-model.pth \ No newline at end of file diff --git a/StreamServer/src/analytic/action/Models/sppe/_.txt b/StreamServer/src/analytic/action/Models/sppe/_.txt new file mode 100644 index 0000000..c9fe15c --- /dev/null +++ b/StreamServer/src/analytic/action/Models/sppe/_.txt @@ -0,0 +1,2 @@ +fast_res50_256x192.pth +fast_res101_320x256.pth \ No newline at end of file diff --git a/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt b/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt new file mode 100644 index 0000000..1cf401e --- /dev/null +++ b/StreamServer/src/analytic/action/Models/yolo-tiny-onecls/_.txt @@ -0,0 +1,2 @@ +best-model.pth +yolov3-tiny-onecls.cfg \ No newline at end of file diff --git a/StreamServer/src/analytic/action/PoseEstimateLoader.py b/StreamServer/src/analytic/action/PoseEstimateLoader.py new file mode 100644 index 0000000..b2cc285 --- /dev/null +++ b/StreamServer/src/analytic/action/PoseEstimateLoader.py @@ -0,0 +1,40 @@ +import os +import cv2 +import torch + +from .SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50 +from .SPPE.src.utils.img import crop_dets +from .pPose_nms import pose_nms +from .SPPE.src.utils.eval import getPrediction + + +class SPPE_FastPose(object): + def __init__(self, + backbone, + input_height=320, + input_width=256, + device='cuda', + path='./SPPE/models/sppe/'): + assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone) + + self.inp_h = input_height + self.inp_w = input_width + self.device = device + + if backbone == 'resnet101': + self.model = InferenNet_fast(path).to(device) + else: + self.model = InferenNet_fastRes50(path).to(device) + self.model.eval() + + def predict(self, image, bboxs, bboxs_scores): + inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w) + pose_hm = self.model(inps.to(self.device)).cpu().data + + # Cut eyes and ears. + pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1) + + xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w, + pose_hm.shape[-2], pose_hm.shape[-1]) + result = pose_nms(bboxs, bboxs_scores, xy_img, scores) + return result \ No newline at end of file diff --git a/StreamServer/src/analytic/action/README.md b/StreamServer/src/analytic/action/README.md new file mode 100644 index 0000000..66cdc06 --- /dev/null +++ b/StreamServer/src/analytic/action/README.md @@ -0,0 +1,51 @@ +

Human Falling Detection and Tracking https://github.com/GajuuzZ/Human-Falling-Detect-Tracks

+ +Using Tiny-YOLO oneclass to detect each person in the frame and use +[AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use +[ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames +of each person tracks. + +Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down. + +
+ +
+ +## Prerequisites + +- Python > 3.6 +- Pytorch > 1.3.1 + +Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2 + +## Data + +This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing +model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset +for more robust person detection in a variant of angle pose. + +For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr) +Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action +frames by hand for training ST-GCN model. + +## Pre-Trained Models + +- Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing), + [.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing) +- SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing), + [resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing) +- ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing) + +## Basic Use + +1. Download all pre-trained models into ./Models folder. +2. Run main.py + +``` + python main.py ${video file or camera source} +``` + +## Reference + +- AlphaPose : https://github.com/Amanbhandula/AlphaPose +- ST-GCN : https://github.com/yysijie/st-gcn diff --git a/StreamServer/src/analytic/action/SPPE/LICENSE b/StreamServer/src/analytic/action/SPPE/LICENSE new file mode 100644 index 0000000..fedbdfd --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Jeff-sjtu + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/StreamServer/src/analytic/action/SPPE/README.md b/StreamServer/src/analytic/action/SPPE/README.md new file mode 100644 index 0000000..d23c179 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/README.md @@ -0,0 +1 @@ +# pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose diff --git a/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py b/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py new file mode 100644 index 0000000..f000aad --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/main_fast_inference.py @@ -0,0 +1,82 @@ +import torch +import torch.nn as nn +import torch.utils.data +import torch.utils.data.distributed +import torch.nn.functional as F +import numpy as np +from .utils.img import flip, shuffleLR +from .utils.eval import getPrediction +from .models.FastPose import FastPose + +import time +import sys + +import torch._utils +try: + torch._utils._rebuild_tensor_v2 +except AttributeError: + def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): + tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) + tensor.requires_grad = requires_grad + tensor._backward_hooks = backward_hooks + return tensor + torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 + + +class InferenNet(nn.Module): + def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'): + super().__init__() + + self.pyranet = FastPose('resnet101').cuda() + print('Loading pose model from {}'.format(weights_file)) + sys.stdout.flush() + self.pyranet.load_state_dict(torch.load(weights_file)) + self.pyranet.eval() + self.pyranet = model + + self.dataset = dataset + + def forward(self, x): + out = self.pyranet(x) + out = out.narrow(1, 0, 17) + + flip_out = self.pyranet(flip(x)) + flip_out = flip_out.narrow(1, 0, 17) + + flip_out = flip(shuffleLR( + flip_out, self.dataset)) + + out = (flip_out + out) / 2 + + return out + + +class InferenNet_fast(nn.Module): + def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'): + super().__init__() + + self.pyranet = FastPose('resnet101').cuda() + print('Loading pose model from {}'.format(weights_file)) + self.pyranet.load_state_dict(torch.load(weights_file)) + self.pyranet.eval() + + def forward(self, x): + out = self.pyranet(x) + out = out.narrow(1, 0, 17) + + return out + + +class InferenNet_fastRes50(nn.Module): + def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'): + super().__init__() + + self.pyranet = FastPose('resnet50', 17).cuda() + print('Loading pose model from {}'.format(weights_file)) + self.pyranet.load_state_dict(torch.load(weights_file)) + self.pyranet.eval() + + def forward(self, x): + out = self.pyranet(x) + + return out diff --git a/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py b/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py new file mode 100644 index 0000000..18ccccf --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/FastPose.py @@ -0,0 +1,32 @@ +import torch.nn as nn +from torch.autograd import Variable + +from .layers.SE_Resnet import SEResnet +from .layers.DUC import DUC +from ..opt import opt + + +class FastPose(nn.Module): + DIM = 128 + + def __init__(self, backbone='resnet101', num_join=opt.nClasses): + super(FastPose, self).__init__() + assert backbone in ['resnet50', 'resnet101'] + + self.preact = SEResnet(backbone) + + self.suffle1 = nn.PixelShuffle(2) + self.duc1 = DUC(512, 1024, upscale_factor=2) + self.duc2 = DUC(256, 512, upscale_factor=2) + + self.conv_out = nn.Conv2d( + self.DIM, num_join, kernel_size=3, stride=1, padding=1) + + def forward(self, x: Variable): + out = self.preact(x) + out = self.suffle1(out) + out = self.duc1(out) + out = self.duc2(out) + + out = self.conv_out(out) + return out diff --git a/StreamServer/src/analytic/action/SPPE/src/models/__init__.py b/StreamServer/src/analytic/action/SPPE/src/models/__init__.py new file mode 100644 index 0000000..b974282 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/__init__.py @@ -0,0 +1 @@ +from . import * \ No newline at end of file diff --git a/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py b/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py new file mode 100644 index 0000000..6dd64b8 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/hg-prm.py @@ -0,0 +1,126 @@ +import torch.nn as nn +from .layers.PRM import Residual as ResidualPyramid +from .layers.Residual import Residual as Residual +from torch.autograd import Variable +from SPPE.src.opt import opt +from collections import defaultdict + + +class Hourglass(nn.Module): + def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C): + super(Hourglass, self).__init__() + + self.ResidualUp = ResidualPyramid if n >= 2 else Residual + self.ResidualDown = ResidualPyramid if n >= 3 else Residual + + self.depth = n + self.nModules = nModules + self.nFeats = nFeats + self.net_type = net_type + self.B = B + self.C = C + self.inputResH = inputResH + self.inputResW = inputResW + + self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW) + self.low1 = nn.Sequential( + nn.MaxPool2d(2), + self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) + ) + if n > 1: + self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C) + else: + self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) + + self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2) + self.up2 = nn.UpsamplingNearest2d(scale_factor=2) + + self.upperBranch = self.up1 + self.lowerBranch = nn.Sequential( + self.low1, + self.low2, + self.low3, + self.up2 + ) + + def _make_residual(self, resBlock, useConv, inputResH, inputResW): + layer_list = [] + for i in range(self.nModules): + layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW, + stride=1, net_type=self.net_type, useConv=useConv, + baseWidth=self.B, cardinality=self.C)) + return nn.Sequential(*layer_list) + + def forward(self, x: Variable): + up1 = self.upperBranch(x) + up2 = self.lowerBranch(x) + out = up1 + up2 + return out + + +class PyraNet(nn.Module): + def __init__(self): + super(PyraNet, self).__init__() + + B, C = opt.baseWidth, opt.cardinality + self.inputResH = opt.inputResH / 4 + self.inputResW = opt.inputResW / 4 + self.nStack = opt.nStack + + self.cnv1 = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), + nn.BatchNorm2d(64), + nn.ReLU(True) + ) + self.r1 = nn.Sequential( + ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, + stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), + nn.MaxPool2d(2) + ) + self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + self.preact = nn.Sequential( + self.cnv1, + self.r1, + self.r4, + self.r5 + ) + self.stack_layers = defaultdict(list) + for i in range(self.nStack): + hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C) + lin = nn.Sequential( + hg, + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True), + nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True) + ) + tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0) + self.stack_layers['lin'].append(lin) + self.stack_layers['out'].append(tmpOut) + if i < self.nStack - 1: + lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) + tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0) + self.stack_layers['lin_'].append(lin_) + self.stack_layers['out_'].append(tmpOut_) + + def forward(self, x: Variable): + out = [] + inter = self.preact(x) + for i in range(self.nStack): + lin = self.stack_layers['lin'][i](inter) + tmpOut = self.stack_layers['out'][i](lin) + out.append(tmpOut) + if i < self.nStack - 1: + lin_ = self.stack_layers['lin_'][i](lin) + tmpOut_ = self.stack_layers['out_'][i](tmpOut) + inter = inter + lin_ + tmpOut_ + return out + + +def createModel(**kw): + model = PyraNet() + return model diff --git a/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py b/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py new file mode 100644 index 0000000..24382ba --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/hgPRM.py @@ -0,0 +1,236 @@ +import torch.nn as nn +from .layers.PRM import Residual as ResidualPyramid +from .layers.Residual import Residual as Residual +from torch.autograd import Variable +import torch +from SPPE.src.opt import opt +import math + + +class Hourglass(nn.Module): + def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C): + super(Hourglass, self).__init__() + + self.ResidualUp = ResidualPyramid if n >= 2 else Residual + self.ResidualDown = ResidualPyramid if n >= 3 else Residual + + self.depth = n + self.nModules = nModules + self.nFeats = nFeats + self.net_type = net_type + self.B = B + self.C = C + self.inputResH = inputResH + self.inputResW = inputResW + + up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW) + low1 = nn.Sequential( + nn.MaxPool2d(2), + self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) + ) + if n > 1: + low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C) + else: + low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) + + low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2) + up2 = nn.UpsamplingNearest2d(scale_factor=2) + + self.upperBranch = up1 + self.lowerBranch = nn.Sequential( + low1, + low2, + low3, + up2 + ) + + def _make_residual(self, resBlock, useConv, inputResH, inputResW): + layer_list = [] + for i in range(self.nModules): + layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW, + stride=1, net_type=self.net_type, useConv=useConv, + baseWidth=self.B, cardinality=self.C)) + return nn.Sequential(*layer_list) + + def forward(self, x: Variable): + up1 = self.upperBranch(x) + up2 = self.lowerBranch(x) + # out = up1 + up2 + out = torch.add(up1, up2) + return out + + +class PyraNet(nn.Module): + def __init__(self): + super(PyraNet, self).__init__() + + B, C = opt.baseWidth, opt.cardinality + self.inputResH = opt.inputResH / 4 + self.inputResW = opt.inputResW / 4 + self.nStack = opt.nStack + + conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3)) + + cnv1 = nn.Sequential( + conv1, + nn.BatchNorm2d(64), + nn.ReLU(True) + ) + + r1 = nn.Sequential( + ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, + stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), + nn.MaxPool2d(2) + ) + r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + self.preact = nn.Sequential( + cnv1, + r1, + r4, + r5 + ) + + self.stack_lin = nn.ModuleList() + self.stack_out = nn.ModuleList() + self.stack_lin_ = nn.ModuleList() + self.stack_out_ = nn.ModuleList() + + for i in range(self.nStack): + hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C) + conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + lin = nn.Sequential( + hg, + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True), + conv1, + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True) + ) + tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(tmpOut.weight) + self.stack_lin.append(lin) + self.stack_out.append(tmpOut) + if i < self.nStack - 1: + lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) + tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(lin_.weight) + nn.init.xavier_normal(tmpOut_.weight) + self.stack_lin_.append(lin_) + self.stack_out_.append(tmpOut_) + + def forward(self, x: Variable): + out = [] + inter = self.preact(x) + for i in range(self.nStack): + lin = self.stack_lin[i](inter) + tmpOut = self.stack_out[i](lin) + out.append(tmpOut) + if i < self.nStack - 1: + lin_ = self.stack_lin_[i](lin) + tmpOut_ = self.stack_out_[i](tmpOut) + inter = inter + lin_ + tmpOut_ + return out + + +class PyraNet_Inference(nn.Module): + def __init__(self): + super(PyraNet_Inference, self).__init__() + + B, C = opt.baseWidth, opt.cardinality + self.inputResH = opt.inputResH / 4 + self.inputResW = opt.inputResW / 4 + self.nStack = opt.nStack + + conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3)) + + cnv1 = nn.Sequential( + conv1, + nn.BatchNorm2d(64), + nn.ReLU(True) + ) + + r1 = nn.Sequential( + ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, + stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), + nn.MaxPool2d(2) + ) + r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, + stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) + self.preact = nn.Sequential( + cnv1, + r1, + r4, + r5 + ) + + self.stack_lin = nn.ModuleList() + self.stack_out = nn.ModuleList() + self.stack_lin_ = nn.ModuleList() + self.stack_out_ = nn.ModuleList() + + for i in range(self.nStack): + hg = Hourglass(4, opt.nFeats, opt.nResidual, + self.inputResH, self.inputResW, 'preact', B, C) + conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, + kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + lin = nn.Sequential( + hg, + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True), + conv1, + nn.BatchNorm2d(opt.nFeats), + nn.ReLU(True) + ) + tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, + kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(tmpOut.weight) + self.stack_lin.append(lin) + self.stack_out.append(tmpOut) + if i < self.nStack - 1: + lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, + kernel_size=1, stride=1, padding=0) + tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, + kernel_size=1, stride=1, padding=0) + if opt.init: + nn.init.xavier_normal(lin_.weight) + nn.init.xavier_normal(tmpOut_.weight) + self.stack_lin_.append(lin_) + self.stack_out_.append(tmpOut_) + + def forward(self, x: Variable): + inter = self.preact(x) + for i in range(self.nStack): + lin = self.stack_lin[i](inter) + tmpOut = self.stack_out[i](lin) + out = tmpOut + if i < self.nStack - 1: + lin_ = self.stack_lin_[i](lin) + tmpOut_ = self.stack_out_[i](tmpOut) + inter = inter + lin_ + tmpOut_ + return out + + +def createModel(**kw): + model = PyraNet() + return model + + +def createModel_Inference(**kw): + model = PyraNet_Inference() + return model diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py new file mode 100644 index 0000000..f6b5ee0 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/DUC.py @@ -0,0 +1,23 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class DUC(nn.Module): + """ + INPUT: inplanes, planes, upscale_factor + OUTPUT: (planes // 4)* ht * wd + """ + def __init__(self, inplanes, planes, upscale_factor=2): + super(DUC, self).__init__() + self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False) + self.bn = nn.BatchNorm2d(planes) + self.relu = nn.ReLU() + + self.pixel_shuffle = nn.PixelShuffle(upscale_factor) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + x = self.pixel_shuffle(x) + return x diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py new file mode 100644 index 0000000..375bea4 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/PRM.py @@ -0,0 +1,135 @@ +import torch.nn as nn +from .util_models import ConcatTable, CaddTable, Identity +import math +from opt import opt + + +class Residual(nn.Module): + def __init__(self, numIn, numOut, inputResH, inputResW, stride=1, + net_type='preact', useConv=False, baseWidth=9, cardinality=4): + super(Residual, self).__init__() + + self.con = ConcatTable([convBlock(numIn, numOut, inputResH, + inputResW, net_type, baseWidth, cardinality, stride), + skipLayer(numIn, numOut, stride, useConv)]) + self.cadd = CaddTable(True) + + def forward(self, x): + out = self.con(x) + out = self.cadd(out) + return out + + +def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride): + numIn = int(numIn) + numOut = int(numOut) + + addTable = ConcatTable() + s_list = [] + if net_type != 'no_preact': + s_list.append(nn.BatchNorm2d(numIn)) + s_list.append(nn.ReLU(True)) + + conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + s_list.append(conv1) + + s_list.append(nn.BatchNorm2d(numOut // 2)) + s_list.append(nn.ReLU(True)) + + conv2 = nn.Conv2d(numOut // 2, numOut // 2, + kernel_size=3, stride=stride, padding=1) + if opt.init: + nn.init.xavier_normal(conv2.weight) + s_list.append(conv2) + + s = nn.Sequential(*s_list) + addTable.add(s) + + D = math.floor(numOut // baseWidth) + C = cardinality + s_list = [] + + if net_type != 'no_preact': + s_list.append(nn.BatchNorm2d(numIn)) + s_list.append(nn.ReLU(True)) + + conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C)) + + s_list.append(conv1) + s_list.append(nn.BatchNorm2d(D)) + s_list.append(nn.ReLU(True)) + s_list.append(pyramid(D, C, inputResH, inputResW)) + s_list.append(nn.BatchNorm2d(D)) + s_list.append(nn.ReLU(True)) + + a = nn.Conv2d(D, numOut // 2, kernel_size=1) + a.nBranchIn = C + if opt.init: + nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C)) + s_list.append(a) + + s = nn.Sequential(*s_list) + addTable.add(s) + + elewiswAdd = nn.Sequential( + addTable, + CaddTable(False) + ) + conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1) + if opt.init: + nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2)) + model = nn.Sequential( + elewiswAdd, + nn.BatchNorm2d(numOut // 2), + nn.ReLU(True), + conv2 + ) + return model + + +def pyramid(D, C, inputResH, inputResW): + pyraTable = ConcatTable() + sc = math.pow(2, 1 / C) + for i in range(C): + scaled = 1 / math.pow(sc, i + 1) + conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1) + if opt.init: + nn.init.xavier_normal(conv1.weight) + s = nn.Sequential( + nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)), + conv1, + nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW)))) + pyraTable.add(s) + pyra = nn.Sequential( + pyraTable, + CaddTable(False) + ) + return pyra + + +class skipLayer(nn.Module): + def __init__(self, numIn, numOut, stride, useConv): + super(skipLayer, self).__init__() + self.identity = False + + if numIn == numOut and stride == 1 and not useConv: + self.identity = True + else: + conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + self.m = nn.Sequential( + nn.BatchNorm2d(numIn), + nn.ReLU(True), + conv1 + ) + + def forward(self, x): + if self.identity: + return x + else: + return self.m(x) diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py new file mode 100644 index 0000000..1449a41 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/Residual.py @@ -0,0 +1,54 @@ +import torch.nn as nn +import math +from .util_models import ConcatTable, CaddTable, Identity +from opt import opt + + +def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw): + con = ConcatTable([convBlock(numIn, numOut, stride, net_type), + skipLayer(numIn, numOut, stride, useConv)]) + cadd = CaddTable(True) + return nn.Sequential(con, cadd) + + +def convBlock(numIn, numOut, stride, net_type): + s_list = [] + if net_type != 'no_preact': + s_list.append(nn.BatchNorm2d(numIn)) + s_list.append(nn.ReLU(True)) + + conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + s_list.append(conv1) + + s_list.append(nn.BatchNorm2d(numOut // 2)) + s_list.append(nn.ReLU(True)) + + conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1) + if opt.init: + nn.init.xavier_normal(conv2.weight) + s_list.append(conv2) + s_list.append(nn.BatchNorm2d(numOut // 2)) + s_list.append(nn.ReLU(True)) + + conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1) + if opt.init: + nn.init.xavier_normal(conv3.weight) + s_list.append(conv3) + + return nn.Sequential(*s_list) + + +def skipLayer(numIn, numOut, stride, useConv): + if numIn == numOut and stride == 1 and not useConv: + return Identity() + else: + conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride) + if opt.init: + nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) + return nn.Sequential( + nn.BatchNorm2d(numIn), + nn.ReLU(True), + conv1 + ) diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py new file mode 100644 index 0000000..72f07db --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/Resnet.py @@ -0,0 +1,82 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = F.relu(self.bn1(self.conv1(x)), inplace=True) + out = F.relu(self.bn2(self.conv2(out)), inplace=True) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = F.relu(out, inplace=True) + + return out + + +class ResNet(nn.Module): + """ Resnet """ + def __init__(self, architecture): + super(ResNet, self).__init__() + assert architecture in ["resnet50", "resnet101"] + self.inplanes = 64 + self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] + self.block = Bottleneck + + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2) + + self.layer1 = self.make_layer(self.block, 64, self.layers[0]) + self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2) + self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2) + + self.layer4 = self.make_layer( + self.block, 512, self.layers[3], stride=2) + + def forward(self, x): + x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + return x + + def stages(self): + return [self.layer1, self.layer2, self.layer3, self.layer4] + + def make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py new file mode 100644 index 0000000..caecaa3 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_Resnet.py @@ -0,0 +1,99 @@ +import torch.nn as nn +from .SE_module import SELayer +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + if reduction: + self.se = SELayer(planes * 4) + + self.reduc = reduction + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = F.relu(self.bn1(self.conv1(x)), inplace=True) + out = F.relu(self.bn2(self.conv2(out)), inplace=True) + + out = self.conv3(out) + out = self.bn3(out) + if self.reduc: + out = self.se(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = F.relu(out) + + return out + + +class SEResnet(nn.Module): + """ SEResnet """ + + def __init__(self, architecture): + super(SEResnet, self).__init__() + assert architecture in ["resnet50", "resnet101"] + self.inplanes = 64 + self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] + self.block = Bottleneck + + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, + stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.layer1 = self.make_layer(self.block, 64, self.layers[0]) + self.layer2 = self.make_layer( + self.block, 128, self.layers[1], stride=2) + self.layer3 = self.make_layer( + self.block, 256, self.layers[2], stride=2) + + self.layer4 = self.make_layer( + self.block, 512, self.layers[3], stride=2) + + def forward(self, x): + x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4 + x = self.layer1(x) # 256 * h/4 * w/4 + x = self.layer2(x) # 512 * h/8 * w/8 + x = self.layer3(x) # 1024 * h/16 * w/16 + x = self.layer4(x) # 2048 * h/32 * w/32 + return x + + def stages(self): + return [self.layer1, self.layer2, self.layer3, self.layer4] + + def make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + if downsample is not None: + layers.append(block(self.inplanes, planes, stride, downsample, reduction=True)) + else: + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py new file mode 100644 index 0000000..ab8aefe --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/SE_module.py @@ -0,0 +1,19 @@ +from torch import nn + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=1): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py new file mode 100644 index 0000000..b6e690f --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/__init__.py @@ -0,0 +1 @@ +from . import * diff --git a/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py b/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py new file mode 100644 index 0000000..52d60d8 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/models/layers/util_models.py @@ -0,0 +1,37 @@ +import torch +import torch.nn as nn +from torch.autograd import Variable + + +class ConcatTable(nn.Module): + def __init__(self, module_list=None): + super(ConcatTable, self).__init__() + + self.modules_list = nn.ModuleList(module_list) + + def forward(self, x: Variable): + y = [] + for i in range(len(self.modules_list)): + y.append(self.modules_list[i](x)) + return y + + def add(self, module): + self.modules_list.append(module) + + +class CaddTable(nn.Module): + def __init__(self, inplace=False): + super(CaddTable, self).__init__() + self.inplace = inplace + + def forward(self, x: Variable or list): + return torch.stack(x, 0).sum(0) + + +class Identity(nn.Module): + def __init__(self, params=None): + super(Identity, self).__init__() + self.params = nn.ParameterList(params) + + def forward(self, x: Variable or list): + return x diff --git a/StreamServer/src/analytic/action/SPPE/src/opt.py b/StreamServer/src/analytic/action/SPPE/src/opt.py new file mode 100644 index 0000000..2a43bcc --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/opt.py @@ -0,0 +1,115 @@ +"""import argparse +import torch + +parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training') +parser.add_argument("--return_counts", type=bool, default=True) +parser.add_argument("--mode", default='client') +parser.add_argument("--port", default=52162) + +"----------------------------- General options -----------------------------" +parser.add_argument('--expID', default='default', type=str, + help='Experiment ID') +parser.add_argument('--dataset', default='coco', type=str, + help='Dataset choice: mpii | coco') +parser.add_argument('--nThreads', default=30, type=int, + help='Number of data loading threads') +parser.add_argument('--debug', default=False, type=bool, + help='Print the debug information') +parser.add_argument('--snapshot', default=1, type=int, + help='How often to take a snapshot of the model (0 = never)') + +"----------------------------- AlphaPose options -----------------------------" +parser.add_argument('--addDPG', default=False, type=bool, + help='Train with data augmentation') + +"----------------------------- Model options -----------------------------" +parser.add_argument('--netType', default='hgPRM', type=str, + help='Options: hgPRM | resnext') +parser.add_argument('--loadModel', default=None, type=str, + help='Provide full path to a previously trained model') +parser.add_argument('--Continue', default=False, type=bool, + help='Pick up where an experiment left off') +parser.add_argument('--nFeats', default=256, type=int, + help='Number of features in the hourglass') +parser.add_argument('--nClasses', default=33, type=int, + help='Number of output channel') +parser.add_argument('--nStack', default=8, type=int, + help='Number of hourglasses to stack') + +"----------------------------- Hyperparameter options -----------------------------" +parser.add_argument('--LR', default=2.5e-4, type=float, + help='Learning rate') +parser.add_argument('--momentum', default=0, type=float, + help='Momentum') +parser.add_argument('--weightDecay', default=0, type=float, + help='Weight decay') +parser.add_argument('--crit', default='MSE', type=str, + help='Criterion type') +parser.add_argument('--optMethod', default='rmsprop', type=str, + help='Optimization method: rmsprop | sgd | nag | adadelta') + + +"----------------------------- Training options -----------------------------" +parser.add_argument('--nEpochs', default=50, type=int, + help='Number of hourglasses to stack') +parser.add_argument('--epoch', default=0, type=int, + help='Current epoch') +parser.add_argument('--trainBatch', default=40, type=int, + help='Train-batch size') +parser.add_argument('--validBatch', default=20, type=int, + help='Valid-batch size') +parser.add_argument('--trainIters', default=0, type=int, + help='Total train iters') +parser.add_argument('--valIters', default=0, type=int, + help='Total valid iters') +parser.add_argument('--init', default=None, type=str, + help='Initialization') +"----------------------------- Data options -----------------------------" +parser.add_argument('--inputResH', default=384, type=int, + help='Input image height') +parser.add_argument('--inputResW', default=320, type=int, + help='Input image width') +parser.add_argument('--outputResH', default=96, type=int, + help='Output heatmap height') +parser.add_argument('--outputResW', default=80, type=int, + help='Output heatmap width') +parser.add_argument('--scale', default=0.25, type=float, + help='Degree of scale augmentation') +parser.add_argument('--rotate', default=30, type=float, + help='Degree of rotation augmentation') +parser.add_argument('--hmGauss', default=1, type=int, + help='Heatmap gaussian size') + +"----------------------------- PyraNet options -----------------------------" +parser.add_argument('--baseWidth', default=9, type=int, + help='Heatmap gaussian size') +parser.add_argument('--cardinality', default=5, type=int, + help='Heatmap gaussian size') +parser.add_argument('--nResidual', default=1, type=int, + help='Number of residual modules at each location in the pyranet') + +"----------------------------- Distribution options -----------------------------" +parser.add_argument('--dist', dest='dist', type=int, default=1, + help='distributed training or not') +parser.add_argument('--backend', dest='backend', type=str, default='gloo', + help='backend for distributed training') +parser.add_argument('--port', dest='port', + help='port of server') +opt = parser.parse_args()""" + +"""if opt.Continue: + opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID)) + opt.Continue = True + opt.nEpochs = 50 + print("--- Continue ---")""" + + +class opt: + nClasses = 33 + inputResH = 384 + inputResW = 320 + outputResH = 96 + outputResW = 80 + scale = 0.25 + rotate = 30 + hmGauss = 1 diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py b/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py new file mode 100644 index 0000000..b6e690f --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/__init__.py @@ -0,0 +1 @@ +from . import * diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/__init__.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py new file mode 100644 index 0000000..e1f2646 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/coco.py @@ -0,0 +1,85 @@ +import os +import h5py +from functools import reduce + +import torch.utils.data as data +from ..pose import generateSampleBox +from opt import opt + + +class Mscoco(data.Dataset): + def __init__(self, train=True, sigma=1, + scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'): + self.img_folder = '../data/coco/images' # root image folders + self.is_train = train # training set or test set + self.inputResH = opt.inputResH + self.inputResW = opt.inputResW + self.outputResH = opt.outputResH + self.outputResW = opt.outputResW + self.sigma = sigma + self.scale_factor = scale_factor + self.rot_factor = rot_factor + self.label_type = label_type + + self.nJoints_coco = 17 + self.nJoints_mpii = 16 + self.nJoints = 33 + + self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17) + self.flipRef = ((2, 3), (4, 5), (6, 7), + (8, 9), (10, 11), (12, 13), + (14, 15), (16, 17)) + + # create train/val split + with h5py.File('../data/coco/annot_clean.h5', 'r') as annot: + # train + self.imgname_coco_train = annot['imgname'][:-5887] + self.bndbox_coco_train = annot['bndbox'][:-5887] + self.part_coco_train = annot['part'][:-5887] + # val + self.imgname_coco_val = annot['imgname'][-5887:] + self.bndbox_coco_val = annot['bndbox'][-5887:] + self.part_coco_val = annot['part'][-5887:] + + self.size_train = self.imgname_coco_train.shape[0] + self.size_val = self.imgname_coco_val.shape[0] + + def __getitem__(self, index): + sf = self.scale_factor + + if self.is_train: + part = self.part_coco_train[index] + bndbox = self.bndbox_coco_train[index] + imgname = self.imgname_coco_train[index] + else: + part = self.part_coco_val[index] + bndbox = self.bndbox_coco_val[index] + imgname = self.imgname_coco_val[index] + + imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname)) + img_path = os.path.join(self.img_folder, imgname) + + metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, + 'coco', sf, self, train=self.is_train) + + inp, out_bigcircle, out_smallcircle, out, setMask = metaData + + label = [] + for i in range(opt.nStack): + if i < 2: + # label.append(out_bigcircle.clone()) + label.append(out.clone()) + elif i < 4: + # label.append(out_smallcircle.clone()) + label.append(out.clone()) + else: + label.append(out.clone()) + + return inp, label, setMask, 'coco' + + def __len__(self): + if self.is_train: + return self.size_train + else: + return self.size_val diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py new file mode 100644 index 0000000..db3e04a --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/fuse.py @@ -0,0 +1,122 @@ +import os +import h5py +from functools import reduce + +import torch.utils.data as data +from ..pose import generateSampleBox +from opt import opt + + +class Mscoco(data.Dataset): + def __init__(self, train=True, sigma=1, + scale_factor=0.25, rot_factor=30, label_type='Gaussian'): + self.img_folder = '../data/' # root image folders + self.is_train = train # training set or test set + self.inputResH = 320 + self.inputResW = 256 + self.outputResH = 80 + self.outputResW = 64 + self.sigma = sigma + self.scale_factor = (0.2, 0.3) + self.rot_factor = rot_factor + self.label_type = label_type + + self.nJoints_coco = 17 + self.nJoints_mpii = 16 + self.nJoints = 33 + + self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO + 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, # MPII + 28, 29, 32, 33) + + self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO + (8, 9), (10, 11), (12, 13), + (14, 15), (16, 17), + (18, 23), (19, 22), (20, 21), # MPII + (28, 33), (29, 32), (30, 31)) + + ''' + Create train/val split + ''' + # COCO + with h5py.File('../data/coco/annot_clean.h5', 'r') as annot: + # train + self.imgname_coco_train = annot['imgname'][:-5887] + self.bndbox_coco_train = annot['bndbox'][:-5887] + self.part_coco_train = annot['part'][:-5887] + # val + self.imgname_coco_val = annot['imgname'][-5887:] + self.bndbox_coco_val = annot['bndbox'][-5887:] + self.part_coco_val = annot['part'][-5887:] + # MPII + with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot: + # train + self.imgname_mpii_train = annot['imgname'][:-1358] + self.bndbox_mpii_train = annot['bndbox'][:-1358] + self.part_mpii_train = annot['part'][:-1358] + # val + self.imgname_mpii_val = annot['imgname'][-1358:] + self.bndbox_mpii_val = annot['bndbox'][-1358:] + self.part_mpii_val = annot['part'][-1358:] + + self.size_coco_train = self.imgname_coco_train.shape[0] + self.size_coco_val = self.imgname_coco_val.shape[0] + self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0] + self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0] + self.train, self.valid = [], [] + + def __getitem__(self, index): + sf = self.scale_factor + + if self.is_train and index < self.size_coco_train: # COCO + part = self.part_coco_train[index] + bndbox = self.bndbox_coco_train[index] + imgname = self.imgname_coco_train[index] + imgset = 'coco' + elif self.is_train: # MPII + part = self.part_mpii_train[index - self.size_coco_train] + bndbox = self.bndbox_mpii_train[index - self.size_coco_train] + imgname = self.imgname_mpii_train[index - self.size_coco_train] + imgset = 'mpii' + elif index < self.size_coco_val: + part = self.part_coco_val[index] + bndbox = self.bndbox_coco_val[index] + imgname = self.imgname_coco_val[index] + imgset = 'coco' + else: + part = self.part_mpii_val[index - self.size_coco_val] + bndbox = self.bndbox_mpii_val[index - self.size_coco_val] + imgname = self.imgname_mpii_val[index - self.size_coco_val] + imgset = 'mpii' + + if imgset == 'coco': + imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname)) + else: + imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13] + + img_path = os.path.join(self.img_folder, imgset, 'images', imgname) + + metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, + imgset, sf, self, train=self.is_train) + + inp, out_bigcircle, out_smallcircle, out, setMask = metaData + + label = [] + for i in range(opt.nStack): + if i < 2: + # label.append(out_bigcircle.clone()) + label.append(out.clone()) + elif i < 4: + # label.append(out_smallcircle.clone()) + label.append(out.clone()) + else: + label.append(out.clone()) + + return inp, label, setMask, imgset + + def __len__(self): + if self.is_train: + return self.size_train + else: + return self.size_val diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py new file mode 100644 index 0000000..eae0dd8 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/dataset/mpii.py @@ -0,0 +1,84 @@ +import os +import h5py +from functools import reduce + +import torch.utils.data as data +from ..pose import generateSampleBox +from opt import opt + + +class Mpii(data.Dataset): + def __init__(self, train=True, sigma=1, + scale_factor=0.25, rot_factor=30, label_type='Gaussian'): + self.img_folder = '../data/mpii/images' # root image folders + self.is_train = train # training set or test set + self.inputResH = 320 + self.inputResW = 256 + self.outputResH = 80 + self.outputResW = 64 + self.sigma = sigma + self.scale_factor = (0.2, 0.3) + self.rot_factor = rot_factor + self.label_type = label_type + + self.nJoints_mpii = 16 + self.nJoints = 16 + + self.accIdxs = (1, 2, 3, 4, 5, 6, + 11, 12, 15, 16) + self.flipRef = ((1, 6), (2, 5), (3, 4), + (11, 16), (12, 15), (13, 14)) + + # create train/val split + with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot: + # train + self.imgname_mpii_train = annot['imgname'][:-1358] + self.bndbox_mpii_train = annot['bndbox'][:-1358] + self.part_mpii_train = annot['part'][:-1358] + # val + self.imgname_mpii_val = annot['imgname'][-1358:] + self.bndbox_mpii_val = annot['bndbox'][-1358:] + self.part_mpii_val = annot['part'][-1358:] + + self.size_train = self.imgname_mpii_train.shape[0] + self.size_val = self.imgname_mpii_val.shape[0] + self.train, self.valid = [], [] + + def __getitem__(self, index): + sf = self.scale_factor + + if self.is_train: + part = self.part_mpii_train[index] + bndbox = self.bndbox_mpii_train[index] + imgname = self.imgname_mpii_train[index] + else: + part = self.part_mpii_val[index] + bndbox = self.bndbox_mpii_val[index] + imgname = self.imgname_mpii_val[index] + + imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13] + img_path = os.path.join(self.img_folder, imgname) + + metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, + 'mpii', sf, self, train=self.is_train) + + inp, out_bigcircle, out_smallcircle, out, setMask = metaData + + label = [] + for i in range(opt.nStack): + if i < 2: + #label.append(out_bigcircle.clone()) + label.append(out.clone()) + elif i < 4: + #label.append(out_smallcircle.clone()) + label.append(out.clone()) + else: + label.append(out.clone()) + + return inp, label, setMask + + def __len__(self): + if self.is_train: + return self.size_train + else: + return self.size_val diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/eval.py b/StreamServer/src/analytic/action/SPPE/src/utils/eval.py new file mode 100644 index 0000000..b1659b4 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/eval.py @@ -0,0 +1,216 @@ +from ..opt import opt +try: + from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks +except ImportError: + from .img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks +import torch + + +class DataLogger(object): + def __init__(self): + self.clear() + + def clear(self): + self.value = 0 + self.sum = 0 + self.cnt = 0 + self.avg = 0 + + def update(self, value, n=1): + self.value = value + self.sum += value * n + self.cnt += n + self._cal_avg() + + def _cal_avg(self): + self.avg = self.sum / self.cnt + + +def accuracy(output, label, dataset): + if type(output) == list: + return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset) + else: + return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs) + + +def heatmapAccuracy(output, label, idxs): + preds = getPreds(output) + gt = getPreds(label) + + norm = torch.ones(preds.size(0)) * opt.outputResH / 10 + dists = calc_dists(preds, gt, norm) + #print(dists) + acc = torch.zeros(len(idxs) + 1) + avg_acc = 0 + cnt = 0 + for i in range(len(idxs)): + acc[i + 1] = dist_acc(dists[idxs[i] - 1]) + if acc[i + 1] >= 0: + avg_acc = avg_acc + acc[i + 1] + cnt += 1 + if cnt != 0: + acc[0] = avg_acc / cnt + return acc + + +def getPreds(hm): + """ get predictions from score maps in torch Tensor + return type: torch.LongTensor + """ + assert hm.dim() == 4, 'Score maps should be 4-dim' + maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2) + + maxval = maxval.view(hm.size(0), hm.size(1), 1) + idx = idx.view(hm.size(0), hm.size(1), 1) + 1 + + preds = idx.repeat(1, 1, 2).float() + + preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3) + preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3)) + + # pred_mask = maxval.gt(0).repeat(1, 1, 2).float() + # preds *= pred_mask + return preds + + +def calc_dists(preds, target, normalize): + preds = preds.float().clone() + target = target.float().clone() + dists = torch.zeros(preds.size(1), preds.size(0)) + for n in range(preds.size(0)): + for c in range(preds.size(1)): + if target[n, c, 0] > 0 and target[n, c, 1] > 0: + dists[c, n] = torch.dist( + preds[n, c, :], target[n, c, :]) / normalize[n] + else: + dists[c, n] = -1 + return dists + + +def dist_acc(dists, thr=0.5): + """ Return percentage below threshold while ignoring values with a -1 """ + if dists.ne(-1).sum() > 0: + return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum() + else: + return - 1 + + +def postprocess(output): + p = getPreds(output) + + for i in range(p.size(0)): + for j in range(p.size(1)): + hm = output[i][j] + pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1])) + if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1: + diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX])) + p[i][j] += diff.sign() * 0.25 + p -= 0.5 + + return p + + +def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW): + """ + Get keypoint location from heatmaps + """ + assert hms.dim() == 4, 'Score maps should be 4-dim' + maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2) + + maxval = maxval.view(hms.size(0), hms.size(1), 1) + idx = idx.view(hms.size(0), hms.size(1), 1) + 1 + + preds = idx.repeat(1, 1, 2).float() + + preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3) + preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3)) + + pred_mask = maxval.gt(0).repeat(1, 1, 2).float() + preds *= pred_mask + + # Very simple post-processing step to improve performance at tight PCK thresholds + """for i in range(preds.size(0)): + for j in range(preds.size(1)): + hm = hms[i][j] + pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1]))) + if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1: + diff = torch.Tensor( + (hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX])) + preds[i][j] += diff.sign() * 0.25 + preds += 0.2""" + + preds_tf = torch.zeros(preds.size()) + preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW) + return preds, preds_tf, maxval + + +def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW): + + assert hms.dim() == 4, 'Score maps should be 4-dim' + + preds_img = {} + hms = hms.numpy() + for n in range(hms.shape[0]): # Number of samples + preds_img[n] = {} # Result of sample: n + for k in range(hms.shape[1]): # Number of keypoints + preds_img[n][k] = [] # Result of keypoint: k + hm = hms[n][k] + + candidate_points = findPeak(hm) + + res_pt = processPeaks(candidate_points, hm, + pt1[n], pt2[n], inpH, inpW, resH, resW) + + preds_img[n][k] = res_pt + + return preds_img + + +def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW): + """ + Get keypoint location from heatmaps + pt1, pt2: [n, 2] + OUTPUT: + preds: [n, 17, 2] + """ + + assert hms.dim() == 4, 'Score maps should be 4-dim' + flat_hms = hms.view(hms.size(0), hms.size(1), -1) + maxval, idx = torch.max(flat_hms, 2) + + maxval = maxval.view(hms.size(0), hms.size(1), 1) + idx = idx.view(hms.size(0), hms.size(1), 1) + 1 + + preds = idx.repeat(1, 1, 2).float() + + preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3) + preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3)) + + pred_mask = maxval.gt(0).repeat(1, 1, 2).float() + preds *= pred_mask + + # Very simple post-processing step to improve performance at tight PCK thresholds + idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1) + idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1) + idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1) + idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1) + + maxval_up = flat_hms.gather(2, idx_up) + maxval_down = flat_hms.gather(2, idx_down) + maxval_left = flat_hms.gather(2, idx_left) + maxval_right = flat_hms.gather(2, idx_right) + + diff1 = (maxval_right - maxval_left).sign() * 0.25 + diff2 = (maxval_down - maxval_up).sign() * 0.25 + diff1[idx_up <= hms.size(3)] = 0 + diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0 + diff2[(idx_left % hms.size(3)) == 0] = 0 + diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0 + + preds[:, :, 0] += diff1.squeeze(-1) + preds[:, :, 1] += diff2.squeeze(-1) + + preds_tf = torch.zeros(preds.size()) + preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW) + + return preds, preds_tf, maxval diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/img.py b/StreamServer/src/analytic/action/SPPE/src/utils/img.py new file mode 100644 index 0000000..24df2ee --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/img.py @@ -0,0 +1,534 @@ +import numpy as np +import cv2 +import torch +import scipy.misc +from torchvision import transforms +import torch.nn.functional as F +from scipy.ndimage import maximum_filter + +from PIL import Image +from copy import deepcopy +import matplotlib +#matplotlib.use('agg') +import matplotlib.pyplot as plt + + +def im_to_torch(img): + img = np.array(img) + img = np.transpose(img, (2, 0, 1)) # C*H*W + img = to_torch(img).float() + if img.max() > 1: + img /= 255 + return img + + +def torch_to_im(img): + img = to_numpy(img) + img = np.transpose(img, (1, 2, 0)) # C*H*W + return img + + +def load_image(img_path): + # H x W x C => C x H x W + return im_to_torch(scipy.misc.imread(img_path, mode='RGB')) + + +def to_numpy(tensor): + if torch.is_tensor(tensor): + return tensor.cpu().numpy() + elif type(tensor).__module__ != 'numpy': + raise ValueError("Cannot convert {} to numpy array" + .format(type(tensor))) + return tensor + + +def to_torch(ndarray): + if type(ndarray).__module__ == 'numpy': + return torch.from_numpy(ndarray) + elif not torch.is_tensor(ndarray): + raise ValueError("Cannot convert {} to torch tensor" + .format(type(ndarray))) + return ndarray + + +def drawCircle(img, pt, sigma): + img = to_numpy(img) + tmpSize = 3 * sigma + # Check that any part of the gaussian is in-bounds + ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)] + br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)] + + if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or + br[0] < 0 or br[1] < 0): + # If not, just return the image as is + return to_torch(img) + + # Generate gaussian + size = 2 * tmpSize + 1 + x = np.arange(0, size, 1, float) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + sigma = size / 4.0 + # The gaussian is not normalized, we want the center value to equal 1 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + g[g > 0] = 1 + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], img.shape[1]) + img_y = max(0, ul[1]), min(br[1], img.shape[0]) + + img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] + return to_torch(img) + + +def drawGaussian(img, pt, sigma): + img = to_numpy(img) + tmpSize = 3 * sigma + # Check that any part of the gaussian is in-bounds + ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)] + br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)] + + if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or + br[0] < 0 or br[1] < 0): + # If not, just return the image as is + return to_torch(img) + + # Generate gaussian + size = 2 * tmpSize + 1 + x = np.arange(0, size, 1, float) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + sigma = size / 4.0 + # The gaussian is not normalized, we want the center value to equal 1 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], img.shape[1]) + img_y = max(0, ul[1]), min(br[1], img.shape[0]) + + img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] + return to_torch(img) + + +def drawBigCircle(img, pt, sigma): + img = to_numpy(img) + tmpSize = 3 * sigma + # Check that any part of the gaussian is in-bounds + ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)] + br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)] + + if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or + br[0] < 0 or br[1] < 0): + # If not, just return the image as is + return to_torch(img) + + # Generate gaussian + size = 2 * tmpSize + 1 + x = np.arange(0, size, 1, float) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + sigma = size / 4.0 + # The gaussian is not normalized, we want the center value to equal 1 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + g[g > 0.4] = 1 + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], img.shape[1]) + img_y = max(0, ul[1]), min(br[1], img.shape[0]) + + img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] + return to_torch(img) + + +def drawSmallCircle(img, pt, sigma): + img = to_numpy(img) + tmpSize = 3 * sigma + # Check that any part of the gaussian is in-bounds + ul = [int(pt[0] - tmpSize), int(pt[1] - tmpSize)] + br = [int(pt[0] + tmpSize + 1), int(pt[1] + tmpSize + 1)] + + if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or + br[0] < 0 or br[1] < 0): + # If not, just return the image as is + return to_torch(img) + + # Generate gaussian + size = 2 * tmpSize + 1 + x = np.arange(0, size, 1, float) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + sigma = size / 4.0 + # The gaussian is not normalized, we want the center value to equal 1 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + g[g > 0.5] = 1 + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], img.shape[1]) + img_y = max(0, ul[1]), min(br[1], img.shape[0]) + + img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] + return to_torch(img) + + +def transformBox(pt, ul, br, inpH, inpW, resH, resW): + center = torch.zeros(2) + center[0] = (br[0] - 1 - ul[0]) / 2 + center[1] = (br[1] - 1 - ul[1]) / 2 + + lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW) + lenW = lenH * inpW / inpH + + _pt = torch.zeros(2) + _pt[0] = pt[0] - ul[0] + _pt[1] = pt[1] - ul[1] + # Move to center + _pt[0] = _pt[0] + max(0, (lenW - 1) / 2 - center[0]) + _pt[1] = _pt[1] + max(0, (lenH - 1) / 2 - center[1]) + pt = (_pt * resH) / lenH + pt[0] = round(float(pt[0])) + pt[1] = round(float(pt[1])) + return pt.int() + + +def transformBoxInvert(pt, ul, br, inpH, inpW, resH, resW): + center = np.zeros(2) + center[0] = (br[0] - 1 - ul[0]) / 2 + center[1] = (br[1] - 1 - ul[1]) / 2 + + lenH = max(br[1] - ul[1], (br[0] - ul[0]) * inpH / inpW) + lenW = lenH * inpW / inpH + + _pt = (pt * lenH) / resH + _pt[0] = _pt[0] - max(0, (lenW - 1) / 2 - center[0]) + _pt[1] = _pt[1] - max(0, (lenH - 1) / 2 - center[1]) + + new_point = np.zeros(2) + new_point[0] = _pt[0] + ul[0] + new_point[1] = _pt[1] + ul[1] + return new_point + + +def transformBoxInvert_batch(pt, ul, br, inpH, inpW, resH, resW): + """ + pt: [n, 17, 2] + ul: [n, 2] + br: [n, 2] + """ + num_pt = pt.shape[1] + center = (br - 1 - ul) / 2 + + size = br - ul + size[:, 0] *= (inpH / inpW) + + lenH, _ = torch.max(size, dim=1) # [n,] + lenW = lenH * (inpW / inpH) + + _pt = (pt * lenH[:, np.newaxis, np.newaxis]) / resH + _pt[:, :, 0] = _pt[:, :, 0] - ((lenW[:, np.newaxis].repeat(1, num_pt) - 1) / + 2 - center[:, 0].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0) + _pt[:, :, 1] = _pt[:, :, 1] - ((lenH[:, np.newaxis].repeat(1, num_pt) - 1) / + 2 - center[:, 1].unsqueeze(-1).repeat(1, num_pt)).clamp(min=0) + + new_point = torch.zeros(pt.size()) + new_point[:, :, 0] = _pt[:, :, 0] + ul[:, 0].unsqueeze(-1).repeat(1, num_pt) + new_point[:, :, 1] = _pt[:, :, 1] + ul[:, 1].unsqueeze(-1).repeat(1, num_pt) + return new_point + + +def cropBox(img, ul, br, resH, resW): + ul = ul.int() + br = (br - 1).int() + # br = br.int() + lenH = max((br[1] - ul[1]).item(), (br[0] - ul[0]).item() * resH / resW) + lenW = lenH * resW / resH + if img.dim() == 2: + img = img[np.newaxis, :] + + box_shape = [(br[1] - ul[1]).item(), (br[0] - ul[0]).item()] + pad_size = [(lenH - box_shape[0]) // 2, (lenW - box_shape[1]) // 2] + # Padding Zeros + if ul[1] > 0: + img[:, :ul[1], :] = 0 + if ul[0] > 0: + img[:, :, :ul[0]] = 0 + if br[1] < img.shape[1] - 1: + img[:, br[1] + 1:, :] = 0 + if br[0] < img.shape[2] - 1: + img[:, :, br[0] + 1:] = 0 + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + + src[0, :] = np.array( + [ul[0] - pad_size[1], ul[1] - pad_size[0]], np.float32) + src[1, :] = np.array( + [br[0] + pad_size[1], br[1] + pad_size[0]], np.float32) + dst[0, :] = 0 + dst[1, :] = np.array([resW - 1, resH - 1], np.float32) + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + dst_img = cv2.warpAffine(torch_to_im(img), trans, + (resW, resH), flags=cv2.INTER_LINEAR) + + return im_to_torch(torch.Tensor(dst_img)) + + +def cv_rotate(img, rot, resW, resH): + center = np.array((resW - 1, resH - 1)) / 2 + rot_rad = np.pi * rot / 180 + + src_dir = get_dir([0, (resH - 1) * -0.5], rot_rad) + dst_dir = np.array([0, (resH - 1) * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + + src[0, :] = center + src[1, :] = center + src_dir + dst[0, :] = [(resW - 1) * 0.5, (resH - 1) * 0.5] + dst[1, :] = np.array([(resW - 1) * 0.5, (resH - 1) * 0.5]) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + dst_img = cv2.warpAffine(torch_to_im(img), trans, + (resW, resH), flags=cv2.INTER_LINEAR) + + return im_to_torch(torch.Tensor(dst_img)) + + +def flip(x): + assert (x.dim() == 3 or x.dim() == 4) + dim = x.dim() - 1 + if '0.4.1' in torch.__version__ or '1.0' in torch.__version__: + return x.flip(dims=(dim,)) + else: + is_cuda = False + if x.is_cuda: + is_cuda = True + x = x.cpu() + x = x.numpy().copy() + if x.ndim == 3: + x = np.transpose(np.fliplr(np.transpose(x, (0, 2, 1))), (0, 2, 1)) + elif x.ndim == 4: + for i in range(x.shape[0]): + x[i] = np.transpose( + np.fliplr(np.transpose(x[i], (0, 2, 1))), (0, 2, 1)) + # x = x.swapaxes(dim, 0) + # x = x[::-1, ...] + # x = x.swapaxes(0, dim) + + x = torch.from_numpy(x.copy()) + if is_cuda: + x = x.cuda() + return x + + +def shuffleLR(x, dataset): + flipRef = dataset.flipRef + assert (x.dim() == 3 or x.dim() == 4) + for pair in flipRef: + dim0, dim1 = pair + dim0 -= 1 + dim1 -= 1 + if x.dim() == 4: + tmp = x[:, dim1].clone() + x[:, dim1] = x[:, dim0].clone() + x[:, dim0] = tmp.clone() + #x[:, dim0], x[:, dim1] = deepcopy((x[:, dim1], x[:, dim0])) + else: + tmp = x[dim1].clone() + x[dim1] = x[dim0].clone() + x[dim0] = tmp.clone() + #x[dim0], x[dim1] = deepcopy((x[dim1], x[dim0])) + return x + + +def drawMPII(inps, preds): + assert inps.dim() == 4 + p_color = ['g', 'b', 'purple', 'b', 'purple', + 'y', 'o', 'y', 'o', 'y', 'o', + 'pink', 'r', 'pink', 'r', 'pink', 'r'] + p_color = ['r', 'r', 'r', 'b', 'b', 'b', + 'black', 'black', 'black', 'black', + 'y', 'y', 'white', 'white', 'g', 'g'] + + nImg = inps.size(0) + imgs = [] + for n in range(nImg): + img = to_numpy(inps[n]) + img = np.transpose(img, (1, 2, 0)) + imgs.append(img) + + fig = plt.figure() + plt.imshow(imgs[0]) + ax = fig.add_subplot(1, 1, 1) + #print(preds.shape) + for p in range(16): + x, y = preds[0][p] + cor = (round(x), round(y)), 10 + ax.add_patch(plt.Circle(*cor, color=p_color[p])) + plt.axis('off') + + plt.show() + + return imgs + + +def drawCOCO(inps, preds, scores): + assert inps.dim() == 4 + p_color = ['g', 'b', 'purple', 'b', 'purple', + 'y', 'orange', 'y', 'orange', 'y', 'orange', + 'pink', 'r', 'pink', 'r', 'pink', 'r'] + + nImg = inps.size(0) + imgs = [] + for n in range(nImg): + img = to_numpy(inps[n]) + img = np.transpose(img, (1, 2, 0)) + imgs.append(img) + + fig = plt.figure() + plt.imshow(imgs[0]) + ax = fig.add_subplot(1, 1, 1) + #print(preds.shape) + for p in range(17): + if scores[0][p][0] < 0.2: + continue + x, y = preds[0][p] + cor = (round(x), round(y)), 3 + ax.add_patch(plt.Circle(*cor, color=p_color[p])) + plt.axis('off') + + plt.show() + + return imgs + + +def get_3rd_point(a, b): + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_dir(src_point, rot_rad): + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result + + +def findPeak(hm): + mx = maximum_filter(hm, size=5) + idx = zip(*np.where((mx == hm) * (hm > 0.1))) + candidate_points = [] + for (y, x) in idx: + candidate_points.append([x, y, hm[y][x]]) + if len(candidate_points) == 0: + return torch.zeros(0) + candidate_points = np.array(candidate_points) + candidate_points = candidate_points[np.lexsort(-candidate_points.T)] + return torch.Tensor(candidate_points) + + +def processPeaks(candidate_points, hm, pt1, pt2, inpH, inpW, resH, resW): + # type: (Tensor, Tensor, Tensor, Tensor, float, float, float, float) -> List[Tensor] + + if candidate_points.shape[0] == 0: # Low Response + maxval = np.max(hm.reshape(1, -1), 1) + idx = np.argmax(hm.reshape(1, -1), 1) + + x = idx % resW + y = int(idx / resW) + + candidate_points = np.zeros((1, 3)) + candidate_points[0, 0:1] = x + candidate_points[0, 1:2] = y + candidate_points[0, 2:3] = maxval + + res_pts = [] + for i in range(candidate_points.shape[0]): + x, y, maxval = candidate_points[i][0], candidate_points[i][1], candidate_points[i][2] + + if bool(maxval < 0.05) and len(res_pts) > 0: + pass + else: + if bool(x > 0) and bool(x < resW - 2): + if bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] > 0): + x += 0.25 + elif bool(hm[int(y)][int(x) + 1] - hm[int(y)][int(x) - 1] < 0): + x -= 0.25 + if bool(y > 0) and bool(y < resH - 2): + if bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] > 0): + y += (0.25 * inpH / inpW) + elif bool(hm[int(y) + 1][int(x)] - hm[int(y) - 1][int(x)] < 0): + y -= (0.25 * inpH / inpW) + + #pt = torch.zeros(2) + pt = np.zeros(2) + pt[0] = x + 0.2 + pt[1] = y + 0.2 + + pt = transformBoxInvert(pt, pt1, pt2, inpH, inpW, resH, resW) + + res_pt = np.zeros(3) + res_pt[:2] = pt + res_pt[2] = maxval + + res_pts.append(res_pt) + + if maxval < 0.05: + break + return res_pts + + +def crop_dets(img, boxes, height, width): + img = im_to_torch(img) + img_h = img.size(1) + img_w = img.size(2) + img[0].add_(-0.406) + img[1].add_(-0.457) + img[2].add_(-0.480) + + inps = torch.zeros(len(boxes), 3, height, width) + pt1 = torch.zeros(len(boxes), 2) + pt2 = torch.zeros(len(boxes), 2) + for i, box in enumerate(boxes): + upLeft = torch.Tensor((float(box[0]), float(box[1]))) + bottomRight = torch.Tensor((float(box[2]), float(box[3]))) + + h = bottomRight[1] - upLeft[1] + w = bottomRight[0] - upLeft[0] + if w > 100: + scaleRate = 0.2 + else: + scaleRate = 0.3 + + upLeft[0] = max(0, upLeft[0] - w * scaleRate / 2) + upLeft[1] = max(0, upLeft[1] - h * scaleRate / 2) + bottomRight[0] = max(min(img_w - 1, bottomRight[0] + w * scaleRate / 2), upLeft[0] + 5) + bottomRight[1] = max(min(img_h - 1, bottomRight[1] + h * scaleRate / 2), upLeft[1] + 5) + + inps[i] = cropBox(img.clone(), upLeft, bottomRight, height, width) + pt1[i] = upLeft + pt2[i] = bottomRight + + return inps, pt1, pt2 + diff --git a/StreamServer/src/analytic/action/SPPE/src/utils/pose.py b/StreamServer/src/analytic/action/SPPE/src/utils/pose.py new file mode 100644 index 0000000..60836f0 --- /dev/null +++ b/StreamServer/src/analytic/action/SPPE/src/utils/pose.py @@ -0,0 +1,169 @@ +from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate, + cropBox, transformBox, flip, shuffleLR, drawCOCO) +from utils import getPrediction +import torch +import numpy as np +import random +from SPPE.src.opt import opt + + +def rnd(x): + return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x)) + + +def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True): + + nJoints_coco = 17 + nJoints_mpii = 16 + img = load_image(img_path) + if train: + img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) + img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) + img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) + + ori_img = img.clone() + img[0].add_(-0.406) + img[1].add_(-0.457) + img[2].add_(-0.480) + + upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1]))) + bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3]))) + ht = bottomRight[1] - upLeft[1] + width = bottomRight[0] - upLeft[0] + imght = img.shape[1] + imgwidth = img.shape[2] + scaleRate = random.uniform(*scale_factor) + + upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2) + upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2) + bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2) + bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2) + + # Doing Random Sample + if opt.addDPG: + PatchScale = random.uniform(0, 1) + if PatchScale > 0.85: + ratio = ht / width + if width < ht: + patchWidth = PatchScale * width + patchHt = patchWidth * ratio + else: + patchHt = PatchScale * ht + patchWidth = patchHt / ratio + + xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth) + ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt) + + xmax = xmin + patchWidth + 1 + ymax = ymin + patchHt + 1 + else: + xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3)) + ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3)) + xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3) + ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3) + + upLeft[0] = xmin + upLeft[1] = ymin + bottomRight[0] = xmax + bottomRight[1] = ymax + + # Counting Joints number + jointNum = 0 + if imgset == 'coco': + for i in range(17): + if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ + and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: + jointNum += 1 + else: + for i in range(16): + if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ + and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: + jointNum += 1 + + # Doing Random Crop + if opt.addDPG: + if jointNum > 13 and train: + switch = random.uniform(0, 1) + if switch > 0.96: + bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 + bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 + elif switch > 0.92: + upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 + bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 + elif switch > 0.88: + upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 + bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 + elif switch > 0.84: + upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 + upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 + elif switch > 0.80: + bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 + elif switch > 0.76: + upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 + elif switch > 0.72: + bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 + elif switch > 0.68: + upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 + + ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW) + inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW) + if jointNum == 0: + inp = torch.zeros(3, opt.inputResH, opt.inputResW) + + out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW) + out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW) + out = torch.zeros(nJoints, opt.outputResH, opt.outputResW) + setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW) + + # Draw Label + if imgset == 'coco': + for i in range(nJoints_coco): + if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ + and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: + out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) + out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + setMask[i].add_(1) + elif imgset == 'mpii': + for i in range(nJoints_coco, nJoints_coco + nJoints_mpii): + if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \ + and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]: + out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) + out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + setMask[i].add_(1) + else: + for i in range(nJoints_coco, nJoints_coco + nJoints_mpii): + if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \ + and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]: + out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) + out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) + if i != 6 + nJoints_coco and i != 7 + nJoints_coco: + setMask[i].add_(1) + + if opt.debug: + preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH, + opt.inputResW, opt.outputResH, opt.outputResW) + tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH) + drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores) + + if train: + # Flip + if random.uniform(0, 1) < 0.5: + inp = flip(inp) + ori_inp = flip(ori_inp) + out_bigcircle = shuffleLR(flip(out_bigcircle), dataset) + out_smallcircle = shuffleLR(flip(out_smallcircle), dataset) + out = shuffleLR(flip(out), dataset) + # Rotate + r = rnd(opt.rotate) + if random.uniform(0, 1) < 0.6: + r = 0 + if r != 0: + inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH) + out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH) + out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH) + out = cv_rotate(out, r, opt.outputResW, opt.outputResH) + + return inp, out_bigcircle, out_smallcircle, out, setMask diff --git a/StreamServer/src/analytic/action/Track/Tracker.py b/StreamServer/src/analytic/action/Track/Tracker.py new file mode 100644 index 0000000..324deeb --- /dev/null +++ b/StreamServer/src/analytic/action/Track/Tracker.py @@ -0,0 +1,192 @@ +import time +import numpy as np +from collections import deque + +from .linear_assignment import min_cost_matching, matching_cascade +from .kalman_filter import KalmanFilter +from .iou_matching import iou_cost + + +class TrackState: + """Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + """ + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Detection(object): + """This class represents a bounding box, keypoints, score of person detected + in a single image. + + Args: + tlbr: (float array) Of shape [top, left, bottom, right]., + keypoints: (float array) Of shape [node, pts]., + confidence: (float) Confidence score of detection. + """ + def __init__(self, tlbr, keypoints, confidence): + self.tlbr = tlbr + self.keypoints = keypoints + self.confidence = confidence + + def to_tlwh(self): + """Get (top, left, width, height). + """ + ret = self.tlbr.copy() + ret[2:] = ret[2:] - ret[:2] + return ret + + def to_xyah(self): + """Get (x_center, y_center, aspect ratio, height). + """ + ret = self.to_tlwh() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + +class Track: + def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.hist = 1 + self.age = 1 + self.time_since_update = 0 + self.n_init = n_init + self.max_age = max_age + + # keypoints list for use in Actions prediction. + self.keypoints_list = deque(maxlen=buffer) + + self.state = TrackState.Tentative + + def to_tlwh(self): + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def get_center(self): + return self.mean[:2].copy() + + def predict(self, kf): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + """ + self.mean, self.covariance = kf.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kf, detection): + """Perform Kalman filter measurement update step. + """ + self.mean, self.covariance = kf.update(self.mean, self.covariance, + detection.to_xyah()) + self.keypoints_list.append(detection.keypoints) + + self.hist += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hist >= self.n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self.max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + return self.state == TrackState.Tentative + + def is_confirmed(self): + return self.state == TrackState.Confirmed + + def is_deleted(self): + return self.state == TrackState.Deleted + + +class Tracker: + def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5): + self.max_iou_dist = max_iou_distance + self.max_age = max_age + self.n_init = n_init + + self.kf = KalmanFilter() + self.tracks = [] + self._next_id = 1 + + def predict(self): + """Propagate track state distributions one time step forward. + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.kf) + + def update(self, detections): + """Perform measurement update and track management. + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = self._match(detections) + + # Update matched tracks set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update(self.kf, detections[detection_idx]) + # Update tracks that missing. + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + # Create new detections track. + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + + # Remove deleted tracks. + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + def _match(self, detections): + confirmed_tracks, unconfirmed_tracks = [], [] + for i, t in enumerate(self.tracks): + if t.is_confirmed(): + confirmed_tracks.append(i) + else: + unconfirmed_tracks.append(i) + + matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade( + iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks + ) + + track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1] + + matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching( + iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections + ) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + if detection.confidence < 0.4: + return + mean, covariance = self.kf.initiate(detection.to_xyah()) + self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age)) + self._next_id += 1 + + diff --git a/StreamServer/src/analytic/action/Track/iou_matching.py b/StreamServer/src/analytic/action/Track/iou_matching.py new file mode 100644 index 0000000..843268f --- /dev/null +++ b/StreamServer/src/analytic/action/Track/iou_matching.py @@ -0,0 +1,78 @@ +import numpy as np + +INFTY_COST = 1e+5 + + +def iou(bbox, candidates): + """Compute intersection over union. + Parameters + ---------- + bbox : ndarray + A bounding box in format `(xmin, ymin, xmax, ymax)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + """ + #bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + bbox_tl, bbox_br = bbox[:2], bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, 2:] # + candidates[:, :2] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = (bbox[2:] - bbox[:2]).prod() + area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + + +def iou_cost(tracks, detections, track_indices=None, detection_indices=None): + """An intersection over union distance metric. + Parameters + ---------- + tracks : List[Track] + A list of tracks. + detections : List[Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + #if tracks[track_idx].time_since_update > 1: + # cost_matrix[row, :] = INFTY_COST + # continue + + bbox = tracks[track_idx].to_tlbr() + candidates = np.asarray([detections[i].tlbr for i in detection_indices]) + cost_matrix[row, :] = 1. - iou(bbox, candidates) + + return cost_matrix diff --git a/StreamServer/src/analytic/action/Track/kalman_filter.py b/StreamServer/src/analytic/action/Track/kalman_filter.py new file mode 100644 index 0000000..9e038e1 --- /dev/null +++ b/StreamServer/src/analytic/action/Track/kalman_filter.py @@ -0,0 +1,198 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +class KalmanFilter(object): + """A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + """ + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + mean = np.dot(self._motion_mat, mean) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + cholesky_factor = np.linalg.cholesky(covariance) + d = measurements - mean + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha diff --git a/StreamServer/src/analytic/action/Track/linear_assignment.py b/StreamServer/src/analytic/action/Track/linear_assignment.py new file mode 100644 index 0000000..ea76e81 --- /dev/null +++ b/StreamServer/src/analytic/action/Track/linear_assignment.py @@ -0,0 +1,191 @@ +import numpy as np +#from sklearn.utils.linear_assignment_ import linear_assignment +from scipy.optimize import linear_sum_assignment + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} +INFTY_COST = 1e+5 + + +def min_cost_matching(distance_metric, max_distance, tracks, detections, + track_indices=None, detection_indices=None): + """Solve linear assignment problem. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[Track] + A list of predicted tracks at the current time step. + detections : List[Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + indices = linear_sum_assignment(cost_matrix) + indices = np.array(indices).transpose() + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in indices[:, 1]: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in indices[:, 0]: + unmatched_tracks.append(track_idx) + for row, col in indices: + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections, + track_indices=None, detection_indices=None): + """Run matching cascade. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + cascade_depth: int + The cascade depth, should be se to the maximum track age. + tracks : List[Track] + A list of predicted tracks at the current time step. + detections : List[Detection] + A list of detections at the current time step. + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [k for k in track_indices + if tracks[k].time_since_update == 1 + level] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = min_cost_matching( + distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections) + matches += matches_l + + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices, + gated_cost=INFTY_COST, only_position=False): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[Track] + A list of predicted tracks at the current time step. + detections : List[Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + + Returns + ------- + ndarray + Returns the modified cost matrix. + """ + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance(track.mean, track.covariance, + measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + + return cost_matrix diff --git a/StreamServer/src/analytic/action/__init__.py b/StreamServer/src/analytic/action/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/StreamServer/src/analytic/action/action_model.py b/StreamServer/src/analytic/action/action_model.py new file mode 100644 index 0000000..d7f9858 --- /dev/null +++ b/StreamServer/src/analytic/action/action_model.py @@ -0,0 +1,155 @@ +import os +import cv2 +import time +from fastapi import HTTPException +import torch +import argparse +import numpy as np + +from .Detection.Utils import ResizePadding +from .CameraLoader import CamLoader, CamLoader_Q +from .DetectorLoader import TinyYOLOv3_onecls + +from .PoseEstimateLoader import SPPE_FastPose +from .fn import draw_single + +from .Track.Tracker import Detection, Tracker +from .ActionsEstLoader import TSSTG + +from config import CONFIG_FILE, YOLO_WEIGHT_FILE, SPPE_WEIGHT_FILE, TSSTG_WEIGHT_FILE + +CONFIG_FILE = CONFIG_FILE +YOLO_WEIGHT_FILE = YOLO_WEIGHT_FILE +SPPE_WEIGHT_FILE = SPPE_WEIGHT_FILE +TSSTG_WEIGHT_FILE = TSSTG_WEIGHT_FILE + +INP_DETS = 384 +INP_POSE = (224, 160) +POSE_BACKBONE = 'resnet50' +SHOW_DETECTED = False +SHOW_SKELETON = True +DEVICE = 'cuda' + +resize_fn = ResizePadding(INP_DETS, INP_DETS) + +def preproc(image): + """preprocess function for CameraLoader. + """ + image = resize_fn(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + +def kpt2bbox(kpt, ex=20): + """Get bbox that hold on all of the keypoints (x,y) + kpt: array of shape `(N, 2)`, + ex: (int) expand bounding box, + """ + return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex, + kpt[:, 0].max() + ex, kpt[:, 1].max() + ex)) + + +def generate_action_model_frame(source): + CAM_SOURCE = source + + # Model initialization + detect_model = TinyYOLOv3_onecls(INP_DETS, device=DEVICE, config_file=CONFIG_FILE, + weight_file=YOLO_WEIGHT_FILE) + pose_model = SPPE_FastPose(POSE_BACKBONE, INP_POSE[0], INP_POSE[1], device=DEVICE, path=SPPE_WEIGHT_FILE) + action_model = TSSTG(weight_file=TSSTG_WEIGHT_FILE) # action model + + # Tracker. + max_age = 30 + tracker = Tracker(max_age=max_age, n_init=3) + + cam = CamLoader(int(CAM_SOURCE) if CAM_SOURCE.isdigit() else CAM_SOURCE, + preprocess=preproc).start() + + fps_time = 0 + f = 0 + while cam.grabbed(): + f += 1 + frame = cam.getitem() + image = frame.copy() + + # Detect humans bbox in the frame with detector model. + detected = detect_model.detect(frame, need_resize=False, expand_bb=10) + + # Predict each tracks bbox of current frame from previous frames information with Kalman filter. + tracker.predict() + # Merge two source of predicted bbox together. + for track in tracker.tracks: + det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32) + detected = torch.cat([detected, det], dim=0) if detected is not None else det + + detections = [] # List of Detections object for tracking. + if detected is not None: + #detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0] + # Predict skeleton pose of each bboxs. + poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4]) + + # Create Detections object. + detections = [Detection(kpt2bbox(ps['keypoints'].numpy()), + np.concatenate((ps['keypoints'].numpy(), + ps['kp_score'].numpy()), axis=1), + ps['kp_score'].mean().numpy()) for ps in poses] + + # VISUALIZE. + if SHOW_DETECTED: + for bb in detected[:, 0:5]: + frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1) + + # Update tracks by matching each track information of current and previous frame or + # create a new track if no matched. + tracker.update(detections) + + # Predict Actions of each track. + for i, track in enumerate(tracker.tracks): + if not track.is_confirmed(): + continue + + track_id = track.track_id + bbox = track.to_tlbr().astype(int) + center = track.get_center().astype(int) + + action = 'pending' + clr = (0, 255, 0) + # Use 30 frames time-steps to prediction. + if len(track.keypoints_list) == 30: + pts = np.array(track.keypoints_list, dtype=np.float32) + out = action_model.predict(pts, frame.shape[:2]) + action_name = action_model.class_names[out[0].argmax()] + action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100) + if action_name == 'Fall Down': + clr = (255, 0, 0) + elif action_name == 'Lying Down': + clr = (255, 200, 0) + + # VISUALIZE. + if track.time_since_update == 0: + if SHOW_SKELETON: + frame = draw_single(frame, track.keypoints_list[-1]) + frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) + frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX, + 0.4, (255, 0, 0), 2) + frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX, + 0.4, clr, 1) + + # Show Frame. + frame = cv2.resize(frame, (0, 0), fx=2., fy=2.) + frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)), + (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) + frame = frame[:, :, ::-1] + fps_time = time.time() + + # return frame for video streaming + ret, buffer = cv2.imencode('.jpg', frame) + if not ret: + # If encoding fails, raise an error to stop the streaming + raise HTTPException(status_code=500, detail="Frame encoding failed") + yield (b'--frame\r\n' + b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n') + + +def output_action_detection(): + pass \ No newline at end of file diff --git a/StreamServer/src/analytic/action/fn.py b/StreamServer/src/analytic/action/fn.py new file mode 100644 index 0000000..7231a88 --- /dev/null +++ b/StreamServer/src/analytic/action/fn.py @@ -0,0 +1,234 @@ +import re +import cv2 +import time +import math +import torch +import numpy as np + +RED = (0, 0, 255) +GREEN = (0, 255, 0) +BLUE = (255, 0, 0) +CYAN = (255, 255, 0) +YELLOW = (0, 255, 255) +ORANGE = (0, 165, 255) +PURPLE = (255, 0, 255) + +"""COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head + (5, 6), (5, 7), (7, 9), (6, 8), (8, 10), + (17, 11), (17, 12), # Body + (11, 13), (12, 14), (13, 15), (14, 16)]""" +COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body + (7, 9), (8, 10), (9, 11), (10, 12)] +POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar + (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist + (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck +LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222), + (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255), + (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)] + +MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5), + (8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)] + +numpy_type_map = { + 'float64': torch.DoubleTensor, + 'float32': torch.FloatTensor, + 'float16': torch.HalfTensor, + 'int64': torch.LongTensor, + 'int32': torch.IntTensor, + 'int16': torch.ShortTensor, + 'int8': torch.CharTensor, + 'uint8': torch.ByteTensor, +} + +_use_shared_memory = True + + +def collate_fn(batch): + r"""Puts each data field into a tensor with outer dimension batch size""" + + error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" + elem_type = type(batch[0]) + + if isinstance(batch[0], torch.Tensor): + out = None + if _use_shared_memory: + # If we're in a background process, concatenate directly into a + # shared memory tensor to avoid an extra copy + numel = sum([x.numel() for x in batch]) + storage = batch[0].storage()._new_shared(numel) + out = batch[0].new(storage) + return torch.stack(batch, 0, out=out) + elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ + and elem_type.__name__ != 'string_': + elem = batch[0] + if elem_type.__name__ == 'ndarray': + # array of string classes and object + if re.search('[SaUO]', elem.dtype.str) is not None: + raise TypeError(error_msg.format(elem.dtype)) + + return torch.stack([torch.from_numpy(b) for b in batch], 0) + if elem.shape == (): # scalars + py_type = float if elem.dtype.name.startswith('float') else int + return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) + elif isinstance(batch[0], int): + return torch.LongTensor(batch) + elif isinstance(batch[0], float): + return torch.DoubleTensor(batch) + elif isinstance(batch[0], (str, bytes)): + return batch + elif isinstance(batch[0], collections.Mapping): + return {key: collate_fn([d[key] for d in batch]) for key in batch[0]} + elif isinstance(batch[0], collections.Sequence): + transposed = zip(*batch) + return [collate_fn(samples) for samples in transposed] + + raise TypeError((error_msg.format(type(batch[0])))) + + +def collate_fn_list(batch): + img, inp, im_name = zip(*batch) + img = collate_fn(img) + im_name = collate_fn(im_name) + + return img, inp, im_name + + +def draw_single(frame, pts, joint_format='coco'): + if joint_format == 'coco': + l_pair = COCO_PAIR + p_color = POINT_COLORS + line_color = LINE_COLORS + elif joint_format == 'mpii': + l_pair = MPII_PAIR + p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE] + else: + NotImplementedError + + part_line = {} + pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0) + for n in range(pts.shape[0]): + if pts[n, 2] <= 0.05: + continue + cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1]) + part_line[n] = (cor_x, cor_y) + cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1) + + for i, (start_p, end_p) in enumerate(l_pair): + if start_p in part_line and end_p in part_line: + start_xy = part_line[start_p] + end_xy = part_line[end_p] + cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1)) + return frame + + +def vis_frame_fast(frame, im_res, joint_format='coco'): + """ + frame: frame image + im_res: im_res of predictions + format: coco or mpii + + return rendered image + """ + if joint_format == 'coco': + l_pair = COCO_PAIR + p_color = POINT_COLORS + line_color = LINE_COLORS + elif joint_format == 'mpii': + l_pair = MPII_PAIR + p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE] + else: + NotImplementedError + + #im_name = im_res['imgname'].split('/')[-1] + img = frame + for human in im_res: # ['result']: + part_line = {} + kp_preds = human['keypoints'] + kp_scores = human['kp_score'] + kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0))) + kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0))) + # Draw keypoints + for n in range(kp_scores.shape[0]): + if kp_scores[n] <= 0.05: + continue + cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) + part_line[n] = (cor_x, cor_y) + cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1) + # Draw limbs + for i, (start_p, end_p) in enumerate(l_pair): + if start_p in part_line and end_p in part_line: + start_xy = part_line[start_p] + end_xy = part_line[end_p] + cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1) + return img + + +def vis_frame(frame, im_res, joint_format='coco'): + """ + frame: frame image + im_res: im_res of predictions + format: coco or mpii + + return rendered image + """ + if joint_format == 'coco': + l_pair = COCO_PAIR + p_color = POINT_COLORS + line_color = LINE_COLORS + elif joint_format == 'mpii': + l_pair = MPII_PAIR + p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE] + line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE] + else: + raise NotImplementedError + + im_name = im_res['imgname'].split('/')[-1] + img = frame + height, width = img.shape[:2] + img = cv2.resize(img, (int(width/2), int(height/2))) + for human in im_res['result']: + part_line = {} + kp_preds = human['keypoints'] + kp_scores = human['kp_score'] + kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0))) + kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0))) + # Draw keypoints + for n in range(kp_scores.shape[0]): + if kp_scores[n] <= 0.05: + continue + cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) + part_line[n] = (int(cor_x/2), int(cor_y/2)) + bg = img.copy() + cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1) + # Now create a mask of logo and create its inverse mask also + transparency = max(0, min(1, kp_scores[n])) + img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0) + # Draw limbs + for i, (start_p, end_p) in enumerate(l_pair): + if start_p in part_line and end_p in part_line: + start_xy = part_line[start_p] + end_xy = part_line[end_p] + bg = img.copy() + + X = (start_xy[0], end_xy[0]) + Y = (start_xy[1], end_xy[1]) + mX = np.mean(X) + mY = np.mean(Y) + length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5 + angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1])) + stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1 + polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1) + cv2.fillConvexPoly(bg, polygon, line_color[i]) + #cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1) + transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p]))) + img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0) + img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC) + return img + + +def getTime(time1=0): + if not time1: + return time.time() + else: + interval = time.time() - time1 + return time.time(), interval \ No newline at end of file diff --git a/StreamServer/src/analytic/action/pPose_nms.py b/StreamServer/src/analytic/action/pPose_nms.py new file mode 100644 index 0000000..7867a42 --- /dev/null +++ b/StreamServer/src/analytic/action/pPose_nms.py @@ -0,0 +1,284 @@ +# -*- coding: utf-8 -*- +import torch +import json +import os +import zipfile +import time +from multiprocessing.dummy import Pool as ThreadPool +import numpy as np + +''' Constant Configuration ''' +delta1 = 1 +mu = 1.7 +delta2 = 2.65 +gamma = 22.48 +scoreThreds = 0.3 +matchThreds = 5 +areaThres = 0 # 40 * 40.5 +alpha = 0.1 +#pool = ThreadPool(4) + + +def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores): + """ + Parametric Pose NMS algorithm + bboxes: bbox locations list (n, 4) + bbox_scores: bbox scores list (n,) + pose_preds: pose locations list (n, 17, 2) + pose_scores: pose scores list (n, 17, 1) + """ + global ori_pose_preds, ori_pose_scores, ref_dists + + pose_scores[pose_scores == 0] = 1e-5 + + final_result = [] + + ori_bboxes = bboxes.clone() + ori_bbox_scores = bbox_scores.clone() + ori_pose_preds = pose_preds.clone() + ori_pose_scores = pose_scores.clone() + + xmax = bboxes[:, 2] + xmin = bboxes[:, 0] + ymax = bboxes[:, 3] + ymin = bboxes[:, 1] + + widths = xmax - xmin + heights = ymax - ymin + ref_dists = alpha * np.maximum(widths, heights) + + nsamples = bboxes.shape[0] + human_scores = pose_scores.mean(dim=1) + + human_ids = np.arange(nsamples) + # Do pPose-NMS + pick = [] + merge_ids = [] + while human_scores.shape[0] != 0: + # Pick the one with highest score + pick_id = torch.argmax(human_scores) + pick.append(human_ids[pick_id]) + # num_visPart = torch.sum(pose_scores[pick_id] > 0.2) + + # Get numbers of match keypoints by calling PCK_match + ref_dist = ref_dists[human_ids[pick_id]] + simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist) + num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist) + + # Delete humans who have more than matchThreds keypoints overlap and high similarity + delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[ + (simi > gamma) | (num_match_keypoints >= matchThreds)] + + if delete_ids.shape[0] == 0: + delete_ids = pick_id + #else: + # delete_ids = torch.from_numpy(delete_ids) + + merge_ids.append(human_ids[delete_ids]) + pose_preds = np.delete(pose_preds, delete_ids, axis=0) + pose_scores = np.delete(pose_scores, delete_ids, axis=0) + human_ids = np.delete(human_ids, delete_ids) + human_scores = np.delete(human_scores, delete_ids, axis=0) + bbox_scores = np.delete(bbox_scores, delete_ids, axis=0) + + assert len(merge_ids) == len(pick) + bboxs_pick = ori_bboxes[pick] + preds_pick = ori_pose_preds[pick] + scores_pick = ori_pose_scores[pick] + bbox_scores_pick = ori_bbox_scores[pick] + #final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick)) + #final_result = [item for item in final_result if item is not None] + + for j in range(len(pick)): + ids = np.arange(pose_preds.shape[1]) + max_score = torch.max(scores_pick[j, ids, 0]) + + if max_score < scoreThreds: + continue + + # Merge poses + merge_id = merge_ids[j] + merge_pose, merge_score = p_merge_fast( + preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]]) + + max_score = torch.max(merge_score[ids]) + if max_score < scoreThreds: + continue + + xmax = max(merge_pose[:, 0]) + xmin = min(merge_pose[:, 0]) + ymax = max(merge_pose[:, 1]) + ymin = min(merge_pose[:, 1]) + + if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres: + continue + + final_result.append({ + 'bbox': bboxs_pick[j], + 'bbox_score': bbox_scores_pick[j], + 'keypoints': merge_pose - 0.3, + 'kp_score': merge_score, + 'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score) + }) + + return final_result + + +def filter_result(args): + score_pick, merge_id, pred_pick, pick, bbox_score_pick = args + global ori_pose_preds, ori_pose_scores, ref_dists + ids = np.arange(17) + max_score = torch.max(score_pick[ids, 0]) + + if max_score < scoreThreds: + return None + + # Merge poses + merge_pose, merge_score = p_merge_fast( + pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick]) + + max_score = torch.max(merge_score[ids]) + if max_score < scoreThreds: + return None + + xmax = max(merge_pose[:, 0]) + xmin = min(merge_pose[:, 0]) + ymax = max(merge_pose[:, 1]) + ymin = min(merge_pose[:, 1]) + + if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5: + return None + + return { + 'keypoints': merge_pose - 0.3, + 'kp_score': merge_score, + 'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score) + } + + +def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist): + """ + Score-weighted pose merging + INPUT: + ref_pose: reference pose -- [17, 2] + cluster_preds: redundant poses -- [n, 17, 2] + cluster_scores: redundant poses score -- [n, 17, 1] + ref_dist: reference scale -- Constant + OUTPUT: + final_pose: merged pose -- [17, 2] + final_score: merged score -- [17] + """ + dist = torch.sqrt(torch.sum( + torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2), + dim=2 + )) # [n, 17] + + kp_num = 17 + ref_dist = min(ref_dist, 15) + + mask = (dist <= ref_dist) + final_pose = torch.zeros(kp_num, 2) + final_score = torch.zeros(kp_num) + + if cluster_preds.dim() == 2: + cluster_preds.unsqueeze_(0) + cluster_scores.unsqueeze_(0) + if mask.dim() == 1: + mask.unsqueeze_(0) + + for i in range(kp_num): + cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1] + cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze( + -1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1)) + + # Get an normalized score + normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores) + + # Merge poses by a weighted sum + final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1)) + final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1)) + + final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1)) + + return final_pose, final_score + + +def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist): + """ + Score-weighted pose merging + INPUT: + ref_pose: reference pose -- [17, 2] + cluster_preds: redundant poses -- [n, 17, 2] + cluster_scores: redundant poses score -- [n, 17, 1] + ref_dist: reference scale -- Constant + OUTPUT: + final_pose: merged pose -- [17, 2] + final_score: merged score -- [17] + """ + dist = torch.sqrt(torch.sum( + torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2), + dim=2 + )) + + kp_num = 17 + ref_dist = min(ref_dist, 15) + + mask = (dist <= ref_dist) + final_pose = torch.zeros(kp_num, 2) + final_score = torch.zeros(kp_num) + + if cluster_preds.dim() == 2: + cluster_preds.unsqueeze_(0) + cluster_scores.unsqueeze_(0) + if mask.dim() == 1: + mask.unsqueeze_(0) + + # Weighted Merge + masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1)) + normed_scores = masked_scores / torch.sum(masked_scores, dim=0) + + final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0) + final_score = torch.mul(masked_scores, normed_scores).sum(dim=0) + return final_pose, final_score + + +def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist): + pick_preds = all_preds[i] + pred_scores = keypoint_scores[i] + dist = torch.sqrt(torch.sum( + torch.pow(pick_preds[np.newaxis, :] - all_preds, 2), + dim=2 + )) + mask = (dist <= 1) + + # Define a keypoints distance + score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1]) + keypoint_scores.squeeze_() + if keypoint_scores.dim() == 1: + keypoint_scores.unsqueeze_(0) + if pred_scores.dim() == 1: + pred_scores.unsqueeze_(1) + # The predicted scores are repeated up to do broadcast + pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1) + + score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\ + torch.tanh(keypoint_scores[mask] / delta1) + + point_dist = torch.exp((-1) * dist / delta2) + final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1) + + return final_dist + + +def PCK_match(pick_pred, all_preds, ref_dist): + dist = torch.sqrt(torch.sum( + torch.pow(pick_pred[np.newaxis, :] - all_preds, 2), + dim=2 + )) + ref_dist = min(ref_dist, 7) + num_match_keypoints = torch.sum( + dist / ref_dist <= 1, + dim=1 + ) + + return num_match_keypoints diff --git a/StreamServer/src/analytic/action/pose_utils.py b/StreamServer/src/analytic/action/pose_utils.py new file mode 100644 index 0000000..934cef6 --- /dev/null +++ b/StreamServer/src/analytic/action/pose_utils.py @@ -0,0 +1,27 @@ +import numpy as np + + +def normalize_points_with_size(xy, width, height, flip=False): + """Normalize scale points in image with size of image to (0-1). + xy : (frames, parts, xy) or (parts, xy) + """ + if xy.ndim == 2: + xy = np.expand_dims(xy, 0) + xy[:, :, 0] /= width + xy[:, :, 1] /= height + if flip: + xy[:, :, 0] = 1 - xy[:, :, 0] + return xy + + +def scale_pose(xy): + """Normalize pose points by scale with max/min value of each pose. + xy : (frames, parts, xy) or (parts, xy) + """ + if xy.ndim == 2: + xy = np.expand_dims(xy, 0) + xy_min = np.nanmin(xy, axis=1) + xy_max = np.nanmax(xy, axis=1) + for i in range(xy.shape[0]): + xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1 + return xy.squeeze()