Mask-RCNN网络有两个主要部分。
第一个是候选区域生成网络,每个图像生成大约300个候选区域。在训练期间,这些候选区域(ROI)中的每一个都通过第二部分,即目标检测和掩模预测网络,如上所示。注意,由于掩模预测分支与分类框预测分支并行运行,因此对于每个给定的ROI,网络预测可能会获得属于任何类别的掩模。
在推理期间,候选区域会用非最大抑制性方法进行筛选,并且掩模预测分支仅处理最高得分100检测框。因此,对于100个ROI和90个对象类,网络的掩模预测部分输出尺寸为100x90x15x15的4D张量,其中每个掩模的大小为15×15。
对于上面显示的绵羊图像,网络检测到两个对象。对于每个对象,它输出一个数组,其中包含预测的类分数(表示对象属于预测类的概率),检测到的对象的边界框的左,上,右和下位置。从掩码预测分支的输出中提取相应分类的掩码。检测到的两个对象的掩码如下所示:
与Faster R-CNN一样,Mask R-CNN所用架构网络也很灵活。我们之所以选择InceptionV2是因为速度更快,但正如Mask R-CNN论文的作者所指出的那样,人们可以通过ResNeXt-101这样的更好的架构获得更好的结果。
与其他物体探测器(如YOLOv3)相比,Mask-RCNN网络可在更大的图像上运行。网络调整输入图像的大小,使得较小的边是800像素。下面我们将详细介绍获取实例分段结果所需的步骤。为了简化和清晰可视化,我们使用相同的颜色来表示同一类的对象,
Mask R-CNN简要理解见:
//0-image,1-video,2-camera
int read_file = 0;
// Load names of classes 导入分类名文件
string classesFile = "./model/mscoco_labels.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line))
{
classes.push_back(line);
}
// Load the colors 导入颜色类文件
string colorsFile = "./model/colors.txt";
ifstream colorFptr(colorsFile.c_str());
while (getline(colorFptr, line))
{
char *pEnd;
double r, g, b;
//字符串转换成浮点数
r = strtod(line.c_str(), &pEnd);
g = strtod(pEnd, NULL);
b = strtod(pEnd, NULL);
Scalar color = Scalar(r, g, b, 255.0);
colors.push_back(Scalar(r, g, b, 255.0));
}
// Give the configuration and weight files for the model
String textGraph = "./model/mask_rcnn_inception_v2_coco.pbtxt";
String modelWeights = "./model/mask_rcnn_inception_v2_coco.pb";
// Load the network 导入网络
Net net = readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
//只使用CPU
net.setPreferableTarget(DNN_TARGET_CPU);
// Open a video file or an image file or a camera stream.
string str, outputFile;
VideoCapture cap;
VideoWriter video;
Mat frame, blob;
try
{
//输出文件,默认是视频
outputFile = "mask_rcnn_out_cpp.avi";
if (read_file == 0)
{
// Open the image file 打开图像文件
str = "image/cars.jpg";
//cout << "Image file input : " << str << endl;
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
frame = imread(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.jpg");
outputFile = str;
}
else if (read_file == 1)
{
// Open the video file 打开视频文件
str = "./image/cars.mp4";
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
cap.open(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.avi");
outputFile = str;
}
// Open the webcam 打开摄像头
else
{
cap.open(0);
}
}
catch (...)
{
cout << "Could not open the input image/video stream" << endl;
return 0;
}
// Get the video writer initialized to save the output video 如果读入的不是图像,生成输出视频
if (read_file != 0)
{
video.open(outputFile, VideoWriter::fourcc('M', 'J', 'P', 'G'), 28,
Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)));
}
// Create a window 显示窗口
static const string kWinName = "Deep learning object detection in OpenCV";
//Process frames 处理图像
while (waitKey(1) < 0)
{
//如果是视频
if (read_file != 0)
{
// get frame from the video 获取单帧图像
cap >> frame;
}
// Stop the program if reached end of video 如果图像不存在
if (frame.empty())
{
cout << "Done processing !!!" << endl;
cout << "Output file is stored as " << outputFile << endl;
waitKey(0);
break;
}
// Create a 4D blob from a frame 获得深度学习的输入图像
blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false);
//blobFromImage(frame, blob);
//Sets the input to the network 设置输入
net.setInput(blob);
// Runs the forward pass to get output from the output layers 获得输出层
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
vector<Mat> outs;
net.forward(outs, outNames);
// Extract the bounding box and mask for each of the detected objects 提取预测框和掩模
postprocess(frame, outs);
// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("Mask-RCNN Inference time for a frame : %0.0f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
// Write the frame with the detection boxes 保存结果
Mat detectedFrame;
frame.convertTo(detectedFrame, CV_8U);
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, frame);
//enter退出
if (waitKey(1000) == 27)
{
break;
}
if (read_file == 0)
{
imwrite(outputFile, detectedFrame);
break;
}
else
{
video.write(detectedFrame);
}
}
/**
* @brief For each frame, extract the bounding box and mask for each detected object 提取每张图像的预测框和掩模
*
* @param frame
* @param outs
*/
void postprocess(Mat &frame, const vector<Mat> &outs)
{
//预测框结果
Mat outDetections = outs[0];
//掩模结果
Mat outMasks = outs[1];
// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
//预测的框个数
const int numDetections = outDetections.size[2];
//类别数
const int numClasses = outMasks.size[1];
outDetections = outDetections.reshape(1, outDetections.total() / 7);
//筛选预测框数
for (int i = 0; i < numDetections; ++i)
{
//提取预测框置信度
float score = outDetections.at<float>(i, 2);
//超过阈值
if (score > confThreshold)
{
// Extract the bounding box
//类别
int classId = static_cast<int>(outDetections.at<float>(i, 1));
int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));
int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));
int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));
int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));
//防止框画在外面
left = max(0, min(left, frame.cols - 1));
top = max(0, min(top, frame.rows - 1));
right = max(0, min(right, frame.cols - 1));
bottom = max(0, min(bottom, frame.rows - 1));
Rect box = Rect(left, top, right - left + 1, bottom - top + 1);
// Extract the mask for the object 提取掩模
Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));
// Draw bounding box, colorize and show the mask on the image
drawBox(frame, classId, score, box, objectMask);
}
}
}
/**
* @brief Draw the predicted bounding box, colorize and show the mask on the image 画图
*
* @param frame
* @param classId
* @param conf
* @param box
* @param objectMask
*/
void drawBox(Mat &frame, int classId, float conf, Rect box, Mat &objectMask)
{
//Draw a rectangle displaying the bounding box 画预测框
rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3);
//Get the label for the class name and its confidence
//置信度获取
string label = format("%.2f", conf);
//获取标签
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ":" + label;
}
//Display the label at the top of the bounding box
int baseLine;
//获取字符串的高度和宽度
//标签,字体,文本大小的倍数,文本粗细,文本最低点对应的纵坐标
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
box.y = max(box.y, labelSize.height);
//画框打标签
rectangle(frame, Point(box.x, box.y - round(1.5 * labelSize.height)), Point(box.x + round(1.5 * labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
//填充颜色
Scalar color = colors[classId % colors.size()];
// Resize the mask, threshold, color and apply it on the image 重置大小
resize(objectMask, objectMask, Size(box.width, box.height));
Mat mask = (objectMask > maskThreshold);
//叠加获得颜色掩模
Mat coloredRoi = (0.3 * color + 0.7 * frame(box));
coloredRoi.convertTo(coloredRoi, CV_8UC3);
// Draw the contours on the image 画轮廓
vector<Mat> contours;
Mat hierarchy;
mask.convertTo(mask, CV_8U);
findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);
drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);
coloredRoi.copyTo(frame(box), mask);
}
检测结果图像:
// Mask R-CNN in OpenCV.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include "pch.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <iostream>
#include <string.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
using namespace std;
// Initialize the parameters
// Confidence threshold 置信度阈值
float confThreshold = 0.5;
// Mask threshold 掩模阈值
float maskThreshold = 0.3;
vector<string> classes;
vector<Scalar> colors;
// Draw the predicted bounding box
void drawBox(Mat &frame, int classId, float conf, Rect box, Mat &objectMask);
// Postprocess the neural network's output for each frame
void postprocess(Mat &frame, const vector<Mat> &outs);
int main()
{
//0-image,1-video,2-camera
int read_file = 0;
// Load names of classes 导入分类名文件
string classesFile = "./model/mscoco_labels.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line))
{
classes.push_back(line);
}
// Load the colors 导入颜色类文件
string colorsFile = "./model/colors.txt";
ifstream colorFptr(colorsFile.c_str());
while (getline(colorFptr, line))
{
char *pEnd;
double r, g, b;
//字符串转换成浮点数
r = strtod(line.c_str(), &pEnd);
g = strtod(pEnd, NULL);
b = strtod(pEnd, NULL);
Scalar color = Scalar(r, g, b, 255.0);
colors.push_back(Scalar(r, g, b, 255.0));
}
// Give the configuration and weight files for the model
String textGraph = "./model/mask_rcnn_inception_v2_coco.pbtxt";
String modelWeights = "./model/mask_rcnn_inception_v2_coco.pb";
// Load the network 导入网络
Net net = readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
//只使用CPU
net.setPreferableTarget(DNN_TARGET_CPU);
// Open a video file or an image file or a camera stream.
string str, outputFile;
VideoCapture cap;
VideoWriter video;
Mat frame, blob;
try
{
//输出文件,默认是视频
outputFile = "mask_rcnn_out_cpp.avi";
if (read_file == 0)
{
// Open the image file 打开图像文件
str = "image/cars.jpg";
//cout << "Image file input : " << str << endl;
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
frame = imread(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.jpg");
outputFile = str;
}
else if (read_file == 1)
{
// Open the video file 打开视频文件
str = "./image/cars.mp4";
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
cap.open(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.avi");
outputFile = str;
}
// Open the webcam 打开摄像头
else
{
cap.open(0);
}
}
catch (...)
{
cout << "Could not open the input image/video stream" << endl;
return 0;
}
// Get the video writer initialized to save the output video 如果读入的不是图像,生成输出视频
if (read_file != 0)
{
video.open(outputFile, VideoWriter::fourcc('M', 'J', 'P', 'G'), 28,
Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)));
}
// Create a window 显示窗口
static const string kWinName = "Deep learning object detection in OpenCV";
//Process frames 处理图像
while (waitKey(1) < 0)
{
//如果是视频
if (read_file != 0)
{
// get frame from the video 获取单帧图像
cap >> frame;
}
// Stop the program if reached end of video 如果图像不存在
if (frame.empty())
{
cout << "Done processing !!!" << endl;
cout << "Output file is stored as " << outputFile << endl;
waitKey(0);
break;
}
// Create a 4D blob from a frame 获得深度学习的输入图像
blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false);
//blobFromImage(frame, blob);
//Sets the input to the network 设置输入
net.setInput(blob);
// Runs the forward pass to get output from the output layers 获得输出层
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
vector<Mat> outs;
net.forward(outs, outNames);
// Extract the bounding box and mask for each of the detected objects 提取预测框和掩模
postprocess(frame, outs);
// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("Mask-RCNN Inference time for a frame : %0.0f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
// Write the frame with the detection boxes 保存结果
Mat detectedFrame;
frame.convertTo(detectedFrame, CV_8U);
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, frame);
//enter退出
if (waitKey(1000) == 27)
{
break;
}
if (read_file == 0)
{
imwrite(outputFile, detectedFrame);
break;
}
else
{
video.write(detectedFrame);
}
}
cap.release();
//释放生成的视频
if (read_file != 0)
{
video.release();
}
return 0;
}
/**
* @brief For each frame, extract the bounding box and mask for each detected object 提取每张图像的预测框和掩模
*
* @param frame
* @param outs
*/
void postprocess(Mat &frame, const vector<Mat> &outs)
{
//预测框结果
Mat outDetections = outs[0];
//掩模结果
Mat outMasks = outs[1];
// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
//预测的框个数
const int numDetections = outDetections.size[2];
//类别数
const int numClasses = outMasks.size[1];
outDetections = outDetections.reshape(1, outDetections.total() / 7);
//筛选预测框数
for (int i = 0; i < numDetections; ++i)
{
//提取预测框置信度
float score = outDetections.at<float>(i, 2);
//超过阈值
if (score > confThreshold)
{
// Extract the bounding box
//类别
int classId = static_cast<int>(outDetections.at<float>(i, 1));
int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));
int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));
int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));
int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));
//防止框画在外面
left = max(0, min(left, frame.cols - 1));
top = max(0, min(top, frame.rows - 1));
right = max(0, min(right, frame.cols - 1));
bottom = max(0, min(bottom, frame.rows - 1));
Rect box = Rect(left, top, right - left + 1, bottom - top + 1);
// Extract the mask for the object 提取掩模
Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));
// Draw bounding box, colorize and show the mask on the image
drawBox(frame, classId, score, box, objectMask);
}
}
}
/**
* @brief Draw the predicted bounding box, colorize and show the mask on the image 画图
*
* @param frame
* @param classId
* @param conf
* @param box
* @param objectMask
*/
void drawBox(Mat &frame, int classId, float conf, Rect box, Mat &objectMask)
{
//Draw a rectangle displaying the bounding box 画预测框
rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3);
//Get the label for the class name and its confidence
//置信度获取
string label = format("%.2f", conf);
//获取标签
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ":" + label;
}
//Display the label at the top of the bounding box
int baseLine;
//获取字符串的高度和宽度
//标签,字体,文本大小的倍数,文本粗细,文本最低点对应的纵坐标
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
box.y = max(box.y, labelSize.height);
//画框打标签
rectangle(frame, Point(box.x, box.y - round(1.5 * labelSize.height)), Point(box.x + round(1.5 * labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
//填充颜色
Scalar color = colors[classId % colors.size()];
// Resize the mask, threshold, color and apply it on the image 重置大小
resize(objectMask, objectMask, Size(box.width, box.height));
Mat mask = (objectMask > maskThreshold);
//叠加获得颜色掩模
Mat coloredRoi = (0.3 * color + 0.7 * frame(box));
coloredRoi.convertTo(coloredRoi, CV_8UC3);
// Draw the contours on the image 画轮廓
vector<Mat> contours;
Mat hierarchy;
mask.convertTo(mask, CV_8U);
findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);
drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);
coloredRoi.copyTo(frame(box), mask);
}
Python版本代码:
import cv2 as cv
import numpy as np
import os.path
import sys
import random
# Initialize the parameters
confThreshold = 0.5 # Confidence threshold
maskThreshold = 0.3 # Mask threshold
# Draw the predicted bounding box, colorize and show the mask on the image
def drawBox(frame, classId, conf, left, top, right, bottom, classMask):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
# Print a label of class.
label = '%.2f' % conf
if classes:
assert(classId < len(classes))
label = '%s:%s' % (classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
# Resize the mask, threshold, color and apply it on the image
classMask = cv.resize(classMask, (right - left + 1, bottom - top + 1))
mask = (classMask > maskThreshold)
roi = frame[top:bottom+1, left:right+1][mask]
# color = colors[classId%len(colors)]
# Comment the above line and uncomment the two lines below to generate different instance colors
colorIndex = random.randint(0, len(colors)-1)
color = colors[colorIndex]
frame[top:bottom+1, left:right+1][mask] = ([0.3*color[0], 0.3*color[1], 0.3*color[2]] + 0.7 * roi).astype(np.uint8)
# Draw the contours on the image
mask = mask.astype(np.uint8)
contours, hierarchy = cv.findContours(mask,cv.RETR_TREE,cv.CHAIN_APPROX_SIMPLE)
cv.drawContours(frame[top:bottom+1, left:right+1], contours, -1, color, 3, cv.LINE_8, hierarchy, 100)
# For each frame, extract the bounding box and mask for each detected object
def postprocess(boxes, masks):
# Output size of masks is NxCxHxW where
# N - number of detected boxes
# C - number of classes (excluding background)
# HxW - segmentation shape
numClasses = masks.shape[1]
numDetections = boxes.shape[2]
frameH = frame.shape[0]
frameW = frame.shape[1]
for i in range(numDetections):
box = boxes[0, 0, i]
mask = masks[i]
score = box[2]
if score > confThreshold:
classId = int(box[1])
# Extract the bounding box
left = int(frameW * box[3])
top = int(frameH * box[4])
right = int(frameW * box[5])
bottom = int(frameH * box[6])
left = max(0, min(left, frameW - 1))
top = max(0, min(top, frameH - 1))
right = max(0, min(right, frameW - 1))
bottom = max(0, min(bottom, frameH - 1))
# Extract the mask for the object
classMask = mask[classId]
# Draw bounding box, colorize and show the mask on the image
drawBox(frame, classId, score, left, top, right, bottom, classMask)
# Load names of classes
classesFile = "./model/mscoco_labels.names";
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Give the textGraph and weight files for the model
textGraph = "./model/mask_rcnn_inception_v2_coco.pbtxt";
modelWeights = "./model/mask_rcnn_inception_v2_coco.pb";
# Load the network
net = cv.dnn.readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Load the classes
colorsFile = "./model/colors.txt";
with open(colorsFile, 'rt') as f:
colorsStr = f.read().rstrip('\n').split('\n')
colors = [] #[0,0,0]
for i in range(len(colorsStr)):
rgb = colorsStr[i].split(' ')
color = np.array([float(rgb[0]), float(rgb[1]), float(rgb[2])])
colors.append(color)
winName = 'Mask-RCNN Object detection and Segmentation in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#image,video,none
input_file="image"
input_file_name="./image/cars.jpg"
outputFile = "mask_rcnn_out_py.avi"
if (input_file is "image"):
# Open the image file
if not os.path.isfile(input_file_name):
print("Input image file ", input_file_name, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(input_file_name)
outputFile = input_file_name[:-4]+'_mask_rcnn_out_py.jpg'
elif (input_file is "video"):
# Open the video file
if not os.path.isfile(input_file_name):
print("Input video file ", input_file_name, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(input_file_name)
outputFile = input_file_name[:-4]+'_mask_rcnn_out_py.avi'
else:
# Webcam input
cap = cv.VideoCapture(0)
# Get the video writer initialized to save the output video
if (input_file is not "image"):
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 28, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
while cv.waitKey(1) < 0:
# Get frame from the video
hasFrame, frame = cap.read()
# Stop the program if reached end of video
if not hasFrame:
print("Done processing !!!")
print("Output file is stored as ", outputFile)
cv.waitKey(3000)
break
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, swapRB=True, crop=False)
# Set the input to the network
net.setInput(blob)
# Run the forward pass to get output from the output layers
boxes, masks = net.forward(['detection_out_final', 'detection_masks'])
# Extract the bounding box and mask for each of the detected objects
postprocess(boxes, masks)
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time for a frame : %0.0f ms' % abs(t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# Write the frame with the detection boxes
if (input_file is "image"):
cv.imwrite(outputFile, frame.astype(np.uint8));
else:
vid_writer.write(frame.astype(np.uint8))
cv.imshow(winName, frame)
我正在学习如何使用Nokogiri,根据这段代码我遇到了一些问题:require'rubygems'require'mechanize'post_agent=WWW::Mechanize.newpost_page=post_agent.get('http://www.vbulletin.org/forum/showthread.php?t=230708')puts"\nabsolutepathwithtbodygivesnil"putspost_page.parser.xpath('/html/body/div/div/div/div/div/table/tbody/tr/td/div
我有一个Ruby程序,它使用rubyzip压缩XML文件的目录树。gem。我的问题是文件开始变得很重,我想提高压缩级别,因为压缩时间不是问题。我在rubyzipdocumentation中找不到一种为创建的ZIP文件指定压缩级别的方法。有人知道如何更改此设置吗?是否有另一个允许指定压缩级别的Ruby库? 最佳答案 这是我通过查看rubyzip内部创建的代码。level=Zlib::BEST_COMPRESSIONZip::ZipOutputStream.open(zip_file)do|zip|Dir.glob("**/*")d
类classAprivatedeffooputs:fooendpublicdefbarputs:barendprivatedefzimputs:zimendprotecteddefdibputs:dibendendA的实例a=A.new测试a.foorescueputs:faila.barrescueputs:faila.zimrescueputs:faila.dibrescueputs:faila.gazrescueputs:fail测试输出failbarfailfailfail.发送测试[:foo,:bar,:zim,:dib,:gaz].each{|m|a.send(m)resc
很好奇,就使用rubyonrails自动化单元测试而言,你们正在做什么?您是否创建了一个脚本来在cron中运行rake作业并将结果邮寄给您?git中的预提交Hook?只是手动调用?我完全理解测试,但想知道在错误发生之前捕获错误的最佳实践是什么。让我们理所当然地认为测试本身是完美无缺的,并且可以正常工作。下一步是什么以确保他们在正确的时间将可能有害的结果传达给您? 最佳答案 不确定您到底想听什么,但是有几个级别的自动代码库控制:在处理某项功能时,您可以使用类似autotest的内容获得关于哪些有效,哪些无效的即时反馈。要确保您的提
假设我做了一个模块如下:m=Module.newdoclassCendend三个问题:除了对m的引用之外,还有什么方法可以访问C和m中的其他内容?我可以在创建匿名模块后为其命名吗(就像我输入“module...”一样)?如何在使用完匿名模块后将其删除,使其定义的常量不再存在? 最佳答案 三个答案:是的,使用ObjectSpace.此代码使c引用你的类(class)C不引用m:c=nilObjectSpace.each_object{|obj|c=objif(Class===objandobj.name=~/::C$/)}当然这取决于
我正在尝试使用ruby和Savon来使用网络服务。测试服务为http://www.webservicex.net/WS/WSDetails.aspx?WSID=9&CATID=2require'rubygems'require'savon'client=Savon::Client.new"http://www.webservicex.net/stockquote.asmx?WSDL"client.get_quotedo|soap|soap.body={:symbol=>"AAPL"}end返回SOAP异常。检查soap信封,在我看来soap请求没有正确的命名空间。任何人都可以建议我
关闭。这个问题是opinion-based.它目前不接受答案。想要改进这个问题?更新问题,以便editingthispost可以用事实和引用来回答它.关闭4年前。Improvethisquestion我想在固定时间创建一系列低音和高音调的哔哔声。例如:在150毫秒时发出高音调的蜂鸣声在151毫秒时发出低音调的蜂鸣声200毫秒时发出低音调的蜂鸣声250毫秒的高音调蜂鸣声有没有办法在Ruby或Python中做到这一点?我真的不在乎输出编码是什么(.wav、.mp3、.ogg等等),但我确实想创建一个输出文件。
在控制台中反复尝试之后,我想到了这种方法,可以按发生日期对类似activerecord的(Mongoid)对象进行分组。我不确定这是完成此任务的最佳方法,但它确实有效。有没有人有更好的建议,或者这是一个很好的方法?#eventsisanarrayofactiverecord-likeobjectsthatincludeatimeattributeevents.map{|event|#converteventsarrayintoanarrayofhasheswiththedayofthemonthandtheevent{:number=>event.time.day,:event=>ev
我在我的项目目录中完成了compasscreate.和compassinitrails。几个问题:我已将我的.sass文件放在public/stylesheets中。这是放置它们的正确位置吗?当我运行compasswatch时,它不会自动编译这些.sass文件。我必须手动指定文件:compasswatchpublic/stylesheets/myfile.sass等。如何让它自动运行?文件ie.css、print.css和screen.css已放在stylesheets/compiled。如何在编译后不让它们重新出现的情况下删除它们?我自己编译的.sass文件编译成compiled/t
我想将html转换为纯文本。不过,我不想只删除标签,我想智能地保留尽可能多的格式。为插入换行符标签,检测段落并格式化它们等。输入非常简单,通常是格式良好的html(不是整个文档,只是一堆内容,通常没有anchor或图像)。我可以将几个正则表达式放在一起,让我达到80%,但我认为可能有一些现有的解决方案更智能。 最佳答案 首先,不要尝试为此使用正则表达式。很有可能你会想出一个脆弱/脆弱的解决方案,它会随着HTML的变化而崩溃,或者很难管理和维护。您可以使用Nokogiri快速解析HTML并提取文本:require'nokogiri'h