提取文本OpenCV
我正在尝试在图像中查找文本的边界框,并且正在使用这种方法:
// calculate the local variances of the grayscale image Mat t_mean, t_mean_2; Mat grayF; outImg_gray.convertTo(grayF, CV_32F); int winSize = 35; blur(grayF, t_mean, cv::Size(winSize,winSize)); blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize)); Mat varMat = t_mean_2 - t_mean.mul(t_mean); varMat.convertTo(varMat, CV_8U); // threshold the high variance regions Mat varMatRegions = varMat > 100;
当给这样一个图像:
然后当我显示varMatRegions
我得到这个图像:
正如你所看到的,它将左边的文本块和卡片的头部结合起来,对于大多数卡片来说,这种方法效果很好,但是在繁忙的卡片上可能会导致问题。
这些轮廓连接不好的原因是它使得轮廓的边界框几乎占据整个卡。
任何人都可以提出一个不同的方式,我可以找到文本,以确保正确的文本检测?
200分以上的人可以在卡上面找到这两个文字。
您可以通过查找边缘元素(从LPD中获得启发)来检测文本:
#include "opencv2/opencv.hpp" std::vector<cv::Rect> detectLetters(cv::Mat img) { std::vector<cv::Rect> boundRect; cv::Mat img_gray, img_sobel, img_threshold, element; cvtColor(img, img_gray, CV_BGR2GRAY); cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT); cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY); element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) ); cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick std::vector< std::vector< cv::Point> > contours; cv::findContours(img_threshold, contours, 0, 1); std::vector<std::vector<cv::Point> > contours_poly( contours.size() ); for( int i = 0; i < contours.size(); i++ ) if (contours[i].size()>100) { cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true ); cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) )); if (appRect.width>appRect.height) boundRect.push_back(appRect); } return boundRect; }
用法:
int main(int argc,char** argv) { //Read cv::Mat img1=cv::imread("side_1.jpg"); cv::Mat img2=cv::imread("side_2.jpg"); //Detect std::vector<cv::Rect> letterBBoxes1=detectLetters(img1); std::vector<cv::Rect> letterBBoxes2=detectLetters(img2); //Display for(int i=0; i< letterBBoxes1.size(); i++) cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0); cv::imwrite( "imgOut1.jpg", img1); for(int i=0; i< letterBBoxes2.size(); i++) cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0); cv::imwrite( "imgOut2.jpg", img2); return 0; }
结果:
一个。 element = getStructuringElement(cv :: MORPH_RECT,cv :: Size(17,3));
湾 element = getStructuringElement(cv :: MORPH_RECT,cv :: Size(30,30));
结果与所提到的其他图像相似。
我在下面的程序中使用了基于渐变的方法。 添加结果图像。 请注意,我正在使用缩小的图像版本进行处理。
c ++版本
The MIT License (MIT) Copyright (c) 2014 Dhanushka Dangampola Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "stdafx.h" #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <iostream> using namespace cv; using namespace std; #define INPUT_FILE "1.jpg" #define OUTPUT_FOLDER_PATH string("") int _tmain(int argc, _TCHAR* argv[]) { Mat large = imread(INPUT_FILE); Mat rgb; // downsample and use it for processing pyrDown(large, rgb); Mat small; cvtColor(rgb, small, CV_BGR2GRAY); // morphological gradient Mat grad; Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3)); morphologyEx(small, grad, MORPH_GRADIENT, morphKernel); // binarize Mat bw; threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU); // connect horizontally oriented regions Mat connected; morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1)); morphologyEx(bw, connected, MORPH_CLOSE, morphKernel); // find contours Mat mask = Mat::zeros(bw.size(), CV_8UC1); vector<vector<Point>> contours; vector<Vec4i> hierarchy; findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); // filter contours for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]) { Rect rect = boundingRect(contours[idx]); Mat maskROI(mask, rect); maskROI = Scalar(0, 0, 0); // fill the contour drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED); // ratio of non-zero pixels in the filled region double r = (double)countNonZero(maskROI)/(rect.width*rect.height); if (r > .45 /* assume at least 45% of the area is filled if it contains text */ && (rect.height > 8 && rect.width > 8) /* constraints on region size */ /* these two conditions alone are not very robust. better to use something like the number of significant peaks in a horizontal projection as a third condition */ ) { rectangle(rgb, rect, Scalar(0, 255, 0), 2); } } imwrite(OUTPUT_FOLDER_PATH + string("rgb.jpg"), rgb); return 0; }
蟒蛇版本
The MIT License (MIT) Copyright (c) 2017 Dhanushka Dangampola Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import cv2 import numpy as np large = cv2.imread('1.jpg') rgb = cv2.pyrDown(large) small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel) _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1)) connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel) # using RETR_EXTERNAL instead of RETR_CCOMP contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) mask = np.zeros(bw.shape, dtype=np.uint8) for idx in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[idx]) mask[y:y+h, x:x+w] = 0 cv2.drawContours(mask, contours, idx, (255, 255, 255), -1) r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h) if r > 0.45 and w > 8 and h > 8: cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2) cv2.imshow('rects', rgb)
这是我用来检测文本块的另一种方法:
- 将图像转换为灰度
- 应用的阈值 (简单的二进制阈值,以150的精选值作为阈值)
- 应用膨胀来增强图像中的线条,从而导致更紧凑的对象和更少的空白空间碎片。 对迭代次数使用较高的值,因此扩张非常重(13次迭代,也为最佳结果而精心挑选)。
- 使用opencv findContours函数识别结果图像中的对象轮廓。
- 画了一个包围每个轮廓对象的边框 (矩形) – 每个边框都构成一个文本块。
- 根据上面的算法也可以找到相交或嵌套对象(比如第一张牌的整个顶部区域),可选地丢弃的区域不可能是您正在搜索的对象(例如文本块),其中一些可能是对你的目的不感兴趣。
下面是python与pyopencv编写的代码,它应该很容易移植到C ++。
import cv2 image = cv2.imread("card.png") gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) # grayscale _,thresh = cv2.threshold(gray,150,255,cv2.THRESH_BINARY_INV) # threshold kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3)) dilated = cv2.dilate(thresh,kernel,iterations = 13) # dilate _, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours # for each contour found, draw a rectangle around it on original image for contour in contours: # get rectangle bounding contour [x,y,w,h] = cv2.boundingRect(contour) # discard areas that are too large if h>300 and w>300: continue # discard areas that are too small if h<40 or w<40: continue # draw rectangle around contour on original image cv2.rectangle(image,(x,y),(x+w,y+h),(255,0,255),2) # write original image with added contours to disk cv2.imwrite("contoured.jpg", image)
原始图片是您帖子中的第一张图片。
经过预处理(灰度,阈值和扩张 – 所以在步骤3之后),图像看起来像这样:
下面是结果图片(最后一行是“contoured.jpg”); 图像中对象的最终边界框如下所示:
您可以看到左侧的文本块被检测为单独的块,与其周围分隔。
使用相同的脚本具有相同的参数(除了像下面描述的第二个图像更改的阈值类型),以下是其他2张卡片的结果:
调整参数
参数(阈值,膨胀参数)针对该图像和该任务(寻找文本块)进行了优化,并且可以根据需要对其他卡图像或其他类型的对象进行调整。
对于阈值(步骤2),我使用了黑色阈值。 对于文本比背景更cv2.THRESH_BINARY
图片,比如文章中的第二张图片,应该使用白色的阈值,所以用cv2.THRESH_BINARY
替换。 对于第二幅图像,我也使用了一个稍微高一些的阈值(180)。 改变阈值的参数和扩张的迭代次数将导致图像中定界对象的不同程度的灵敏度。
查找其他对象类型:
例如,将第一张图像中的膨胀减少为5次迭代,可以更好地定义图像中的对象,大致查找图像中的所有单词 (而不是文本块):
知道一个单词的粗略大小,在这里我放弃了太小(小于20像素宽度或高度)或太大(大于100像素宽度或高度)忽略不太可能是单词的对象的区域,以得到结果上面的图片。
你可以尝试一下由蔡琦毅和田英丽开发的这个方法 。
他们还共享一个可以使用的软件(基于Opencv-1.0,它应该在Windows平台下运行)(尽管没有可用的源代码)。 它会生成图像中的所有文本边界框(以阴影显示)。 通过应用您的示例图像,您将得到以下结果:
注意:为了使结果更加健壮,可以进一步合并相邻的框。
更新:如果您的最终目标是识别图像中的文字,您可以进一步检查gttext ,这是一个OCR免费软件和地面实验工具,用于彩色图像文本。 源代码也是可用的。
有了这个,你可以得到如下认可的文字:
@ dhanushka的做法显示了最大的承诺,但我想在Python中玩耍,所以继续前进,翻译它的乐趣:
import cv2 import numpy as np from cv2 import boundingRect, countNonZero, cvtColor, drawContours, findContours, getStructuringElement, imread, morphologyEx, pyrDown, rectangle, threshold large = imread(image_path) # downsample and use it for processing rgb = pyrDown(large) # apply grayscale small = cvtColor(rgb, cv2.COLOR_BGR2GRAY) # morphological gradient morph_kernel = getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) grad = morphologyEx(small, cv2.MORPH_GRADIENT, morph_kernel) # binarize _, bw = threshold(src=grad, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU) morph_kernel = getStructuringElement(cv2.MORPH_RECT, (9, 1)) # connect horizontally oriented regions connected = morphologyEx(bw, cv2.MORPH_CLOSE, morph_kernel) mask = np.zeros(bw.shape, np.uint8) # find contours im2, contours, hierarchy = findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) # filter contours for idx in range(0, len(hierarchy[0])): rect = x, y, rect_width, rect_height = boundingRect(contours[idx]) # fill the contour mask = drawContours(mask, contours, idx, (255, 255, 2555), cv2.FILLED) # ratio of non-zero pixels in the filled region r = float(countNonZero(mask)) / (rect_width * rect_height) if r > 0.45 and rect_height > 8 and rect_width > 8: rgb = rectangle(rgb, (x, y+rect_height), (x+rect_width, y), (0,255,0),3)
现在显示图像:
from PIL import Image Image.fromarray(rgb).show()
不是最剧烈的脚本,但我试图尽可能接近原始的C ++代码,以便读者遵循。
它几乎和原来的一样。 我会很乐意阅读如何改进/修正以完全类似于原始结果的建议。
以上代码JAVA版本:谢谢@William
public static List<Rect> detectLetters(Mat img){ List<Rect> boundRect=new ArrayList<>(); Mat img_gray =new Mat(), img_sobel=new Mat(), img_threshold=new Mat(), element=new Mat(); Imgproc.cvtColor(img, img_gray, Imgproc.COLOR_RGB2GRAY); Imgproc.Sobel(img_gray, img_sobel, CvType.CV_8U, 1, 0, 3, 1, 0, Core.BORDER_DEFAULT); //at src, Mat dst, double thresh, double maxval, int type Imgproc.threshold(img_sobel, img_threshold, 0, 255, 8); element=Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(15,5)); Imgproc.morphologyEx(img_threshold, img_threshold, Imgproc.MORPH_CLOSE, element); List<MatOfPoint> contours = new ArrayList<MatOfPoint>(); Mat hierarchy = new Mat(); Imgproc.findContours(img_threshold, contours,hierarchy, 0, 1); List<MatOfPoint> contours_poly = new ArrayList<MatOfPoint>(contours.size()); for( int i = 0; i < contours.size(); i++ ){ MatOfPoint2f mMOP2f1=new MatOfPoint2f(); MatOfPoint2f mMOP2f2=new MatOfPoint2f(); contours.get(i).convertTo(mMOP2f1, CvType.CV_32FC2); Imgproc.approxPolyDP(mMOP2f1, mMOP2f2, 2, true); mMOP2f2.convertTo(contours.get(i), CvType.CV_32S); Rect appRect = Imgproc.boundingRect(contours.get(i)); if (appRect.width>appRect.height) { boundRect.add(appRect); } } return boundRect; }
在实践中使用这个代码:
System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Mat img1=Imgcodecs.imread("abc.png"); List<Rect> letterBBoxes1=Utils.detectLetters(img1); for(int i=0; i< letterBBoxes1.size(); i++) Imgproc.rectangle(img1,letterBBoxes1.get(i).br(), letterBBoxes1.get(i).tl(),new Scalar(0,255,0),3,8,0); Imgcodecs.imwrite("abc1.png", img1);
@ dhanushka解决方案的Python实现:
def process_rgb(rgb): hasText = 0 gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY); morphKernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)) grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, morphKernel) # binarize _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # connect horizontally oriented regions morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1)) connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, morphKernel) # find contours mask = np.zeros(bw.shape[:2], dtype="uint8"); _,contours, hierarchy = cv2.findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) # filter contours idx = 0 while idx >= 0: x,y,w,h = cv2.boundingRect(contours[idx]); # fill the contour cv2.drawContours(mask, contours, idx, (255, 255, 255), cv2.FILLED); # ratio of non-zero pixels in the filled region r = cv2.contourArea(contours[idx])/(w*h) if(r > 0.45 and h > 5 and w > 5 and w > h): cv2.rectangle(rgb, (x,y), (x+w,y+h), (0, 255, 0), 2) hasText = 1 idx = hierarchy[0][idx][0] return hasText, rgb