搜索
您的当前位置:首页正文

Windows下Java调用OCR进行图片识别

来源:知库网
Windows下Java调⽤OCR进⾏图⽚识别

使⽤Java语⾔,通过Tesseract-OCR对图⽚进⾏识别。1.Tesseract-OCR

下载windows版本并安装。2.程序如下:a.ImageIOHelper类

package OCR;

import java.awt.image.BufferedImage;import java.io.File;

import java.io.IOException;import java.util.Iterator;import java.util.Locale;

import javax.imageio.IIOImage;import javax.imageio.ImageIO;

import javax.imageio.ImageReader;

import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;

import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;

import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;

public class ImageIOHelper { /**

* 图⽚⽂件转换为tif格式

* @param imageFile ⽂件路径

* @param imageFormat ⽂件扩展名 * @return */

public static File createImage(File imageFile, String imageFormat) { File tempFile = null; try {

Iterator readers = ImageIO.getImageReadersByFormatName(imageFormat); ImageReader reader = readers.next();

ImageInputStream iis = ImageIO.createImageInputStream(imageFile); reader.setInput(iis);

//Read the stream metadata

IIOMetadata streamMetadata = reader.getStreamMetadata();

//Set up the writeParam

TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE); tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);

//Get tif writer and set output to file

Iterator writers = ImageIO.getImageWritersByFormatName(\"tiff\"); ImageWriter writer = writers.next();

BufferedImage bi = reader.read(0);

IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0)); tempFile = tempImageFile(imageFile);

ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile); writer.setOutput(ios);

writer.write(streamMetadata, image, tiffWriteParam); ios.close();

writer.dispose(); reader.dispose();

} catch (IOException e) { e.printStackTrace(); }

return tempFile; }

private static File tempImageFile(File imageFile) { String path = imageFile.getPath();

StringBuffer strB = new StringBuffer(path); strB.insert(path.lastIndexOf('.'),0);

return new File(strB.toString().replaceFirst(\"(?<=//.)(//w+)$\ } }

b.OCR核⼼类

package OCR;

import java.io.BufferedReader;import java.io.File;

import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List;

import org.jdesktop.swingx.util.OS;

public class OCR {

private final String LANG_OPTION = \"-l\"; //英⽂字母⼩写l,并⾮数字1 private final String EOL = System.getProperty(\"line.separator\"); private String tessPath = \"C://Program Files//Tesseract-OCR\";

//private String tessPath = new File(\"tesseract\").getAbsolutePath();

public String recognizeText(File imageFile,String imageFormat)throws Exception{ File tempImage = ImageIOHelper.createImage(imageFile,imageFormat); File outputFile = new File(imageFile.getParentFile(),\"output\"); StringBuffer strB = new StringBuffer();

List cmd = new ArrayList(); if(OS.isWindowsXP()){

cmd.add(tessPath+\"//tesseract\"); }else if(OS.isLinux()){ cmd.add(\"tesseract\"); }else{

cmd.add(tessPath+\"//tesseract\"); }

cmd.add(\"\");

cmd.add(outputFile.getName()); //cmd.add(LANG_OPTION); //cmd.add(\"chi_sim\"); //cmd.add(\"eng\");

ProcessBuilder pb = new ProcessBuilder(); pb.directory(imageFile.getParentFile());

cmd.set(1, tempImage.getName()); pb.command(cmd);

pb.redirectErrorStream(true);

Process process = pb.start(); //tesseract.exe 1.jpg 1 -l chi_sim int w = process.waitFor();

//删除临时正在⼯作⽂件 tempImage.delete();

if(w==0){

BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+\".txt\"),\"UTF-8\"));

String str;

while((str = in.readLine())!=null){ strB.append(str).append(EOL); }

in.close(); }else{

String msg; switch(w){ case 1:

msg = \"Errors accessing files.There may be spaces in your image's filename.\"; break; case 29:

msg = \"Cannot recongnize the image or its selected region.\";

break; case 31:

msg = \"Unsupported image format.\"; break; default:

msg = \"Errors occurred.\"; }

tempImage.delete();

//throw new RuntimeException(msg); }

new File(outputFile.getAbsolutePath()+\".txt\").delete(); return strB.toString(); }}

c.main

package OCR;import java.io.File;

import java.io.IOException;

public class TestOcr { /**

* @param args */

public static void main(String[] args) { //输⼊图⽚地址

String path = \"d://test//test.bmp\"; try {

String valCode = new OCR().recognizeText(new File(path), \"bmp\"); System.out.println(valCode); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } }

以上就是本⽂的全部内容,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持。

因篇幅问题不能全部显示,请点此查看更多更全内容

Top