检测视频流中的某个对象

我正在尝试在黑白道路上检测到一个白色物体，以使自动遥控汽车在其周围行驶。而且我正在检测道路上除了白盒子以外的所有东西。

我的尝试可以在我的代码示例中看到

#input= one video stream frame 320x240
frame = copy.deepcopy(input)
grayFrame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
threshGray = cv2.adaptiveThreshold(
            grayFrame,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,blockSize=123,C=-19,)
contours,_ = cv2.findContours(threshGray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:

   #some filtering needs to be done
   #
   #after filtering append contour

   filteredContours.append(cnt)
   cv2.rectangle(frame,(x,y),(x + w,y + h),(3,244,244),1)

cv2.drawContours(frame,filteredContours,-1,(255,255),1 )
cv2.imshow("with contours",frame)
cv2.imshow("adaptiveThreshhold",threshGray)
cv2.imshow("input",input)

我正在寻找一种在障碍物周围绘制边框的方法。问题是我不知道如何从其余部分中提取此框。可能是因为框的轮廓和右侧的线已连接，这就是为什么边框这么大。如果有人知道这样做的方式，那就太好了。

点击此处查看结果

首先：输入图像

第二：adaptiveThreshold之后

第三：带有轮廓（粉红色）和边界框（黄色）

此时，您获得了几种白色值的候选。

您需要将代码添加到#some filtering needs to be done中，以摆脱要查找的NOT边界框的候选列表。

我建议您将候选列表与足够大的方盒进行比较。

因为所有没有BOX的轮廓（您要在道路上找到）都不满足如上所述的关于方形框的条件。

我认为您正在寻找的是三角形蒙版，从输入图像中也可以看到车道标记。是否尝试使用车道检测器，使所有车道外的区域都可以被遮盖，并且只能处理车道中的空间。

下面，我尝试通过HoughLinesP使用Lane探测器，并添加了Contours。尝试使用它，我没有测试此代码，但没有发现任何问题。

#! /usr/bin/env python 3
"""
Lane detector using the Hog transform method
"""
import cv2 as cv
import numpy as np
# import matplotlib.pyplot as plt
import random as rng

rng.seed(369)

def do_canny(frame):
    # Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
    gray = cv.cvtColor(frame,cv.COLOR_RGB2GRAY)
    # Applies a 5x5 gaussian blur with deviation of 0 to frame - not mandatory since Canny will do this for us
    blur = cv.GaussianBlur(gray,(5,5),0)
    # Applies Canny edge detector with minVal of 50 and maxVal of 150
    canny = cv.Canny(blur,50,150)
    return canny

def do_segment(frame):
    # Since an image is a multi-directional array containing the relative intensities of each pixel in the image,we can use frame.shape to return a tuple: [number of rows,number of columns,number of channels] of the dimensions of the frame
    # frame.shape[0] give us the number of rows of pixels the frame has. Since height begins from 0 at the top,the y-coordinate of the bottom of the frame is its height
    height = frame.shape[0]
    # Creates a triangular polygon for the mask defined by three (x,y) coordinates
    polygons = np.array([
                            [(0,height),(800,(380,290)]
                        ])
    # Creates an image filled with zero intensities with the same dimensions as the frame
    mask = np.zeros_like(frame)
    # Allows the mask to be filled with values of 1 and the other areas to be filled with values of 0
    cv.fillPoly(mask,polygons,255)
    # A bitwise and operation between the mask and frame keeps only the triangular area of the frame
    segment = cv.bitwise_and(frame,mask)
    return segment

def calculate_lines(frame,lines):
    # Empty arrays to store the coordinates of the left and right lines
    left = []
    right = []
    # Loops through every detected line
    for line in lines:
        # Reshapes line from 2D array to 1D array
        x1,y1,x2,y2 = line.reshape(4)
        # Fits a linear polynomial to the x and y coordinates and returns a vector of coefficients which describe the slope and y-intercept
        parameters = np.polyfit((x1,x2),(y1,y2),1)
        slope = parameters[0]
        y_intercept = parameters[1]
        # If slope is negative,the line is to the left of the lane,and otherwise,the line is to the right of the lane
        if slope < 0:
            left.append((slope,y_intercept))
        else:
            right.append((slope,y_intercept))
    # Averages out all the values for left and right into a single slope and y-intercept value for each line
    left_avg = np.average(left,axis = 0)
    right_avg = np.average(right,axis = 0)
    # Calculates the x1,y2 coordinates for the left and right lines
    left_line = calculate_coordinates(frame,left_avg)
    right_line = calculate_coordinates(frame,right_avg)
    return np.array([left_line,right_line])

def calculate_coordinates(frame,parameters):
    slope,intercept = parameters
    # Sets initial y-coordinate as height from top down (bottom of the frame)
    y1 = frame.shape[0]
    # Sets final y-coordinate as 150 above the bottom of the frame
    y2 = int(y1 - 150)
    # Sets initial x-coordinate as (y1 - b) / m since y1 = mx1 + b
    x1 = int((y1 - intercept) / slope)
    # Sets final x-coordinate as (y2 - b) / m since y2 = mx2 + b
    x2 = int((y2 - intercept) / slope)
    return np.array([x1,y2])

def visualize_lines(frame,lines):
    # Creates an image filled with zero intensities with the same dimensions as the frame
    lines_visualize = np.zeros_like(frame)
    # Checks if any lines are detected
    if lines is not None:
        for x1,y2 in lines:
            # Draws lines between two coordinates with green color and 5 thickness
            cv.line(lines_visualize,(x1,y1),(x2,(0,255,0),5)
    return lines_visualize

# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture(1)
while (cap.isOpened()):
    # ret = a boolean return value from getting the frame,frame = the current frame being projected in the video
    ret,frame = cap.read()
    canny = do_canny(frame)
    cv.imshow("canny",canny)
    # plt.imshow(frame)
    # plt.show()
    segment = do_segment(canny)
    hough = cv.HoughLinesP(segment,2,np.pi / 180,100,np.array([]),minLineLength = 100,maxLineGap = 50)
    # Averages multiple detected lines from hough into one line for left border of lane and one line for right border of lane
    lines = calculate_lines(frame,hough)
    # Visualizes the lines
    lines_visualize = visualize_lines(frame,lines)
    cv.imshow("hough",lines_visualize)
    # Overlays lines on frame by taking their weighted sums and adding an arbitrary scalar value of 1 as the gamma argument
    output = cv.addWeighted(frame,0.9,lines_visualize,1,1)
    contours,_ = cv.findContours(output,cv.RETR_TREE,cv.CHAIN_APPROX_SIMPLE)

    contours_poly = [None]*len(contours)
    boundRect = [None]*len(contours)
    centers = [None]*len(contours)
    radius = [None]*len(contours)
    for i,c in enumerate(contours):
        contours_poly[i] = cv.approxPolyDP(c,3,True)
        boundRect[i] = cv.boundingRect(contours_poly[i])
        centers[i],radius[i] = cv.minEnclosingCircle(contours_poly[i])
    ## [allthework]

    ## [zeroMat]
    drawing = np.zeros((output.shape[0],output.shape[1],3),dtype=np.uint8)
    ## [zeroMat]

    ## [forContour]
    # Draw polygonal contour + bonding rects + circles
    for i in range(len(contours)):
        color = (rng.randint(0,256),rng.randint(0,256))
        cv.drawContours(drawing,contours_poly,i,color)
        cv.rectangle(drawing,(int(boundRect[i][0]),int(boundRect[i][1])),\
          (int(boundRect[i][0]+boundRect[i][2]),int(boundRect[i][1]+boundRect[i][3])),color,2)

    # Opens a new window and displays the output frame
    cv.imshow('Contours',drawing)
    # Frames are read by intervals of 10 milliseconds. The programs breaks out of the while loop when the user presses the 'q' key
    if cv.waitKey(10) & 0xFF == ord('q'):
        break
# The following frees up resources and closes all windows
cap.release()
cv.destroyAllWindows()

尝试在佳能阈值中使用其他值。

检测视频流中的某个对象

tonghai0709 回答：检测视频流中的某个对象

大家都在问