1、登录有图片验证码时,使用tesseract-ocr解析图片
2、安装
windows:本地调用,先安装tesseract-ocr包linux 执行yum install icu 52.1 libicu-devel pango 1.22.0 libgnomeui-devel libtiff-devel libjpeg-devel libpng-devel -y安装leptonica-1.78.0.tar.gz:1、cd leptonica-1.78.0 2、 ./ autogen.sh 3、./configure --prefix=/usr/local/ 4、make 5、make install安装tesseract-4.0.0.tar.gz:1、cd tesseract-4.0.0 2、 ./ autogen.sh 3、./configure : 注意是否有warning提示,如果有提示需要把插件安装完成 4、make 5、make install 6、ldconfig
vim /etc/profile 添加: PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib64/pkgconfig export PKG_CONFIG_PATH CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include/ export CPLUS_INCLUDE_PATH C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/local/include/leptonica export C_INCLUDE_PATH LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64 export LD_LIBRARY_PATH LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib64 export LIBRARY_PATH 保存后执行: source /etc/profile
4、pom引入:
<dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>3.2.1</version> </dependency>
5、代码:先通过接口/getSecurityCode?time=获取图片,并保存到本地,开始解析图片
package com.yiliao.utils; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import org.apache.commons.lang.StringUtils; import java.awt.Color; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; public class OCRHelper { public static void main(String[] args) throws IOException{ } /** * * @param sfile * 需要去噪的图像 * @param destDir * 去噪后的图像保存地址 * @throws IOException */ public static void cleanImage(File sfile, String destDir) { File destF = new File(destDir); if (!destF.exists()) { destF.mkdirs(); } BufferedImage bufferedImage = null; try { bufferedImage = ImageIO.read(sfile); } catch (IOException e) { e.printStackTrace(); } int h = bufferedImage.getHeight(); int w = bufferedImage.getWidth(); // 灰度化 int[][] gray = new int[w][h]; for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int argb = bufferedImage.getRGB(x, y); // 图像加亮(调整亮度识别率非常高) int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30); int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30); int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30); if (r >= 255) { r = 255; } if (g >= 255) { g = 255; } if (b >= 255) { b = 255; } gray[x][y] = (int) Math .pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2) * 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2); } } // 二值化 int threshold = ostu(gray, w, h); BufferedImage binaryBufferedImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY); for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { if (gray[x][y] > threshold) { gray[x][y] |= 0x00FFFF; } else { gray[x][y] &= 0xFF0000; } binaryBufferedImage.setRGB(x, y, gray[x][y]); } } try { ImageIO.write(binaryBufferedImage, "jpg", new File(destDir, sfile .getName())); } catch (IOException e) { e.printStackTrace(); } } public static int ostu(int[][] gray, int w, int h) { int[] histData = new int[w * h]; // Calculate histogram for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int red = 0xFF & gray[x][y]; histData[red]++; } } // Total number of pixels int total = w * h; float sum = 0; for (int t = 0; t < 256; t++) sum += t * histData[t]; float sumB = 0; int wB = 0; int wF = 0; float varMax = 0; int threshold = 0; for (int t = 0; t < 256; t++) { wB += histData[t]; // Weight Background if (wB == 0) continue; wF = total - wB; // Weight Foreground if (wF == 0) break; sumB += (float) (t * histData[t]); float mB = sumB / wB; // Mean Background float mF = (sum - sumB) / wF; // Mean Foreground // Calculate Between Class Variance float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF); // Check if new maximum found if (varBetween > varMax) { varMax = varBetween; threshold = t; } } return threshold; } private static final String path = System.getProperty("user.dir") + "/src/main/resources/"; public static String analysisCode(){ // String tessPath = new File("tessdata").getAbsolutePath(); cleanImage(new File(path+"uploadFile/securityCode.jpg"),path+"uploadFile/"); Tesseract tesseract = new Tesseract(); tesseract.setLanguage("eng"); tesseract.setDatapath(path+"tessdata"); String result = null; try { result = tesseract.doOCR(new File(path+"uploadFile/securityCode.jpg")); } catch (TesseractException e) { e.printStackTrace(); } if (null != result){ return result.replaceAll("\\s*", ""); } return null; } }