使⽤Java语⾔,通过Tesseract-OCR对图⽚进⾏识别。1.Tesseract-OCR
下载windows版本并安装。2.程序如下:a.ImageIOHelper类
package OCR;
import java.awt.image.BufferedImage;import java.io.File;
import java.io.IOException;import java.util.Iterator;import java.util.Locale;
import javax.imageio.IIOImage;import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;
import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
public class ImageIOHelper { /**
* 图⽚⽂件转换为tif格式
* @param imageFile ⽂件路径
* @param imageFormat ⽂件扩展名 * @return */
public static File createImage(File imageFile, String imageFormat) { File tempFile = null; try {
Iterator ImageInputStream iis = ImageIO.createImageInputStream(imageFile); reader.setInput(iis); //Read the stream metadata IIOMetadata streamMetadata = reader.getStreamMetadata(); //Set up the writeParam TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE); tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); //Get tif writer and set output to file Iterator BufferedImage bi = reader.read(0); IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0)); tempFile = tempImageFile(imageFile); ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile); writer.setOutput(ios); writer.write(streamMetadata, image, tiffWriteParam); ios.close(); writer.dispose(); reader.dispose(); } catch (IOException e) { e.printStackTrace(); } return tempFile; } private static File tempImageFile(File imageFile) { String path = imageFile.getPath(); StringBuffer strB = new StringBuffer(path); strB.insert(path.lastIndexOf('.'),0); return new File(strB.toString().replaceFirst(\"(?<=//.)(//w+)$\ } } b.OCR核⼼类 package OCR; import java.io.BufferedReader;import java.io.File; import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List; import org.jdesktop.swingx.util.OS; public class OCR { private final String LANG_OPTION = \"-l\"; //英⽂字母⼩写l,并⾮数字1 private final String EOL = System.getProperty(\"line.separator\"); private String tessPath = \"C://Program Files//Tesseract-OCR\"; //private String tessPath = new File(\"tesseract\").getAbsolutePath(); public String recognizeText(File imageFile,String imageFormat)throws Exception{ File tempImage = ImageIOHelper.createImage(imageFile,imageFormat); File outputFile = new File(imageFile.getParentFile(),\"output\"); StringBuffer strB = new StringBuffer(); List cmd.add(tessPath+\"//tesseract\"); }else if(OS.isLinux()){ cmd.add(\"tesseract\"); }else{ cmd.add(tessPath+\"//tesseract\"); } cmd.add(\"\"); cmd.add(outputFile.getName()); //cmd.add(LANG_OPTION); //cmd.add(\"chi_sim\"); //cmd.add(\"eng\"); ProcessBuilder pb = new ProcessBuilder(); pb.directory(imageFile.getParentFile()); cmd.set(1, tempImage.getName()); pb.command(cmd); pb.redirectErrorStream(true); Process process = pb.start(); //tesseract.exe 1.jpg 1 -l chi_sim int w = process.waitFor(); //删除临时正在⼯作⽂件 tempImage.delete(); if(w==0){ BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+\".txt\"),\"UTF-8\")); String str; while((str = in.readLine())!=null){ strB.append(str).append(EOL); } in.close(); }else{ String msg; switch(w){ case 1: msg = \"Errors accessing files.There may be spaces in your image's filename.\"; break; case 29: msg = \"Cannot recongnize the image or its selected region.\"; break; case 31: msg = \"Unsupported image format.\"; break; default: msg = \"Errors occurred.\"; } tempImage.delete(); //throw new RuntimeException(msg); } new File(outputFile.getAbsolutePath()+\".txt\").delete(); return strB.toString(); }} c.main package OCR;import java.io.File; import java.io.IOException; public class TestOcr { /** * @param args */ public static void main(String[] args) { //输⼊图⽚地址 String path = \"d://test//test.bmp\"; try { String valCode = new OCR().recognizeText(new File(path), \"bmp\"); System.out.println(valCode); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } 以上就是本⽂的全部内容,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持。 因篇幅问题不能全部显示,请点此查看更多更全内容