public interface ITesseract
| Modifier and Type | Interface and Description |
|---|---|
static class |
ITesseract.RenderedFormat
Rendered formats supported by Tesseract.
|
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
DOCUMENT_TITLE |
static java.lang.String |
htmlBeginTag |
static java.lang.String |
htmlEndTag |
static java.lang.String |
PAGE_SEPARATOR |
| Modifier and Type | Method and Description |
|---|---|
void |
createDocuments(java.lang.String[] filenames,
java.lang.String[] outputbases,
java.util.List<ITesseract.RenderedFormat> formats)
Creates documents for given renderers.
|
void |
createDocuments(java.lang.String filename,
java.lang.String outputbase,
java.util.List<ITesseract.RenderedFormat> formats)
Creates documents for given renderers.
|
java.util.List<OCRResult> |
createDocumentsWithResults(java.awt.image.BufferedImage[] bis,
java.lang.String[] filenames,
java.lang.String[] outputbases,
java.util.List<ITesseract.RenderedFormat> formats,
int pageIteratorLevel)
Creates documents with OCR results for given renderers at specified page
iterator level.
|
OCRResult |
createDocumentsWithResults(java.awt.image.BufferedImage bi,
java.lang.String filename,
java.lang.String outputbase,
java.util.List<ITesseract.RenderedFormat> formats,
int pageIteratorLevel)
Creates documents with OCR result for given renderers at specified page
iterator level.
|
java.util.List<OCRResult> |
createDocumentsWithResults(java.lang.String[] filenames,
java.lang.String[] outputbases,
java.util.List<ITesseract.RenderedFormat> formats,
int pageIteratorLevel)
Creates documents with OCR results for given renderers at specified page
iterator level.
|
OCRResult |
createDocumentsWithResults(java.lang.String filename,
java.lang.String outputbase,
java.util.List<ITesseract.RenderedFormat> formats,
int pageIteratorLevel)
Creates documents with OCR result for given renderers at specified page
iterator level.
|
java.lang.String |
doOCR(java.awt.image.BufferedImage bi)
Performs OCR operation.
|
java.lang.String |
doOCR(java.awt.image.BufferedImage bi,
java.awt.Rectangle rect)
Deprecated.
|
java.lang.String |
doOCR(java.awt.image.BufferedImage bi,
java.lang.String filename,
java.util.List<java.awt.Rectangle> rects)
Performs OCR operation.
|
java.lang.String |
doOCR(java.io.File imageFile)
Performs OCR operation.
|
java.lang.String |
doOCR(java.io.File imageFile,
java.util.List<java.awt.Rectangle> rects)
Performs OCR operation.
|
java.lang.String |
doOCR(java.io.File imageFile,
java.awt.Rectangle rect)
Deprecated.
|
java.lang.String |
doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
int bpp,
java.lang.String filename,
java.util.List<java.awt.Rectangle> rects)
Performs OCR operation.
|
java.lang.String |
doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
java.awt.Rectangle rect,
int bpp)
Deprecated.
|
java.lang.String |
doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
java.lang.String filename,
java.awt.Rectangle rect,
int bpp)
Deprecated.
|
java.lang.String |
doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.awt.Rectangle rect)
Deprecated.
|
java.lang.String |
doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.lang.String filename,
java.util.List<java.util.List<java.awt.Rectangle>> roiss)
Performs OCR operation.
|
java.lang.String |
doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.lang.String filename,
java.awt.Rectangle rect)
Deprecated.
|
java.util.List<java.awt.Rectangle> |
getSegmentedRegions(java.awt.image.BufferedImage bi,
int pageIteratorLevel)
Gets segmented regions at specified page iterator level.
|
java.util.List<Word> |
getWords(java.awt.image.BufferedImage bi,
int pageIteratorLevel)
Gets recognized words at specified page iterator level.
|
java.util.List<Word> |
getWords(java.util.List<java.awt.image.BufferedImage> biList,
int pageIteratorLevel)
Gets recognized words at specified page iterator level.
|
void |
setConfigs(java.util.List<java.lang.String> configs)
Sets configs to be passed to Tesseract's
Init method. |
void |
setDatapath(java.lang.String datapath)
Sets tessdata path.
|
void |
setLanguage(java.lang.String language)
Sets language for OCR.
|
void |
setOcrEngineMode(int ocrEngineMode)
Sets OCR engine mode.
|
void |
setPageSegMode(int mode)
Sets page segmentation mode.
|
void |
setTessVariable(java.lang.String key,
java.lang.String value)
Deprecated.
Use
setVariable(String key, String value) instead. |
void |
setVariable(java.lang.String key,
java.lang.String value)
Sets the value of Tesseract's internal parameter.
|
static final java.lang.String htmlBeginTag
static final java.lang.String htmlEndTag
static final java.lang.String PAGE_SEPARATOR
static final java.lang.String DOCUMENT_TITLE
java.lang.String doOCR(java.io.File imageFile)
throws TesseractException
imageFile - an image fileTesseractException@Deprecated
java.lang.String doOCR(java.io.File imageFile,
java.awt.Rectangle rect)
throws TesseractException
imageFile - an image filerect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.TesseractExceptionjava.lang.String doOCR(java.io.File imageFile,
java.util.List<java.awt.Rectangle> rects)
throws TesseractException
imageFile - an image filerects - list of the bounding rectangles defines the regions of the
image to be recognized. A rectangle of zero dimension or
null indicates the whole image.TesseractExceptionjava.lang.String doOCR(java.awt.image.BufferedImage bi)
throws TesseractException
bi - a buffered imageTesseractException@Deprecated
java.lang.String doOCR(java.awt.image.BufferedImage bi,
java.awt.Rectangle rect)
throws TesseractException
bi - a buffered imagerect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.TesseractExceptionjava.lang.String doOCR(java.awt.image.BufferedImage bi,
java.lang.String filename,
java.util.List<java.awt.Rectangle> rects)
throws TesseractException
bi - a buffered imagefilename - input file name. Needed only for training and reading a
UNLV zone file.rects - list of the bounding rectangles defines the regions of the
image to be recognized. A rectangle of zero dimension or
null indicates the whole image.TesseractException@Deprecated
java.lang.String doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.awt.Rectangle rect)
throws TesseractException
imageList - a list of IIOImage objectsrect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.TesseractException@Deprecated
java.lang.String doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.lang.String filename,
java.awt.Rectangle rect)
throws TesseractException
imageList - a list of IIOImage objectsfilename - input file name. Needed only for training and reading a
UNLV zone file.rect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.TesseractExceptionjava.lang.String doOCR(java.util.List<javax.imageio.IIOImage> imageList,
java.lang.String filename,
java.util.List<java.util.List<java.awt.Rectangle>> roiss)
throws TesseractException
imageList - a list of IIOImage objectsfilename - input file name. Needed only for training and reading a
UNLV zone file.roiss - list of list of the bounding rectangles defines the regions
of the images to be recognized. A rectangle of zero dimension or
null indicates the whole image.TesseractException@Deprecated
java.lang.String doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
java.awt.Rectangle rect,
int bpp)
throws TesseractException
SetImage, (optionally)
SetRectangle, and one or more of the Get*Text
functions.xsize - width of imageysize - height of imagebuf - pixel datarect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.bpp - bits per pixel, represents the bit depth of the image, with 1
for binary bitmap, 8 for gray, and 24 for color RGB.TesseractException@Deprecated
java.lang.String doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
java.lang.String filename,
java.awt.Rectangle rect,
int bpp)
throws TesseractException
SetImage, (optionally)
SetRectangle, and one or more of the Get*Text
functions.xsize - width of imageysize - height of imagebuf - pixel datafilename - input file name. Needed only for training and reading a
UNLV zone file.rect - the bounding rectangle defines the region of the image to be
recognized. A rectangle of zero dimension or null indicates
the whole image.bpp - bits per pixel, represents the bit depth of the image, with 1
for binary bitmap, 8 for gray, and 24 for color RGB.TesseractExceptionjava.lang.String doOCR(int xsize,
int ysize,
java.nio.ByteBuffer buf,
int bpp,
java.lang.String filename,
java.util.List<java.awt.Rectangle> rects)
throws TesseractException
SetImage, (optionally)
SetRectangle, and one or more of the Get*Text
functions.xsize - width of imageysize - height of imagebuf - pixel databpp - bits per pixel, represents the bit depth of the image, with 1
for binary bitmap, 8 for gray, and 24 for color RGB.filename - input file name. Needed only for training and reading a
UNLV zone file.rects - list of the bounding rectangles defines the regions of the
image to be recognized. A rectangle of zero dimension or
null indicates the whole image.TesseractExceptionvoid setDatapath(java.lang.String datapath)
datapath - the tessdata path to setvoid setLanguage(java.lang.String language)
language - the language code, which follows ISO 639-3 standard.void setOcrEngineMode(int ocrEngineMode)
ocrEngineMode - the OcrEngineMode to setvoid setPageSegMode(int mode)
mode - the page segmentation mode to set@Deprecated
void setTessVariable(java.lang.String key,
java.lang.String value)
setVariable(String key, String value) instead.key - variable name, e.g., tessedit_create_hocr,
tessedit_char_whitelist, etc.value - value for corresponding variable, e.g., "1", "0",
"0123456789", etc.void setVariable(java.lang.String key,
java.lang.String value)
key - variable name, e.g., tessedit_create_hocr,
tessedit_char_whitelist, etc.value - value for corresponding variable, e.g., "1", "0",
"0123456789", etc.void setConfigs(java.util.List<java.lang.String> configs)
Init method.configs - list of config filenames, e.g., "digits", "bazaar",
"quiet"void createDocuments(java.lang.String filename,
java.lang.String outputbase,
java.util.List<ITesseract.RenderedFormat> formats)
throws TesseractException
filename - input imageoutputbase - output filename without extensionformats - types of renderersTesseractExceptionvoid createDocuments(java.lang.String[] filenames,
java.lang.String[] outputbases,
java.util.List<ITesseract.RenderedFormat> formats)
throws TesseractException
filenames - array of input filesoutputbases - array of output filenames without extensionformats - types of renderersTesseractExceptionOCRResult createDocumentsWithResults(java.awt.image.BufferedImage bi, java.lang.String filename, java.lang.String outputbase, java.util.List<ITesseract.RenderedFormat> formats, int pageIteratorLevel) throws TesseractException
bi - input buffered imagefilename - filename (optional)outputbase - output filenames without extensionformats - types of rendererpageIteratorLevel - TessPageIteratorLevel enumTesseractExceptionjava.util.List<OCRResult> createDocumentsWithResults(java.awt.image.BufferedImage[] bis, java.lang.String[] filenames, java.lang.String[] outputbases, java.util.List<ITesseract.RenderedFormat> formats, int pageIteratorLevel) throws TesseractException
bis - array of input buffered imagesfilenames - array of filenamesoutputbases - array of output filenames without extensionformats - types of rendererpageIteratorLevel - TessPageIteratorLevel enumTesseractExceptionOCRResult createDocumentsWithResults(java.lang.String filename, java.lang.String outputbase, java.util.List<ITesseract.RenderedFormat> formats, int pageIteratorLevel) throws TesseractException
filename - input fileoutputbase - output filenames without extensionformats - types of rendererpageIteratorLevel - TessPageIteratorLevel enumTesseractExceptionjava.util.List<OCRResult> createDocumentsWithResults(java.lang.String[] filenames, java.lang.String[] outputbases, java.util.List<ITesseract.RenderedFormat> formats, int pageIteratorLevel) throws TesseractException
filenames - array of input filesoutputbases - array of output filenames without extensionformats - types of rendererpageIteratorLevel - TessPageIteratorLevel enumTesseractExceptionjava.util.List<java.awt.Rectangle> getSegmentedRegions(java.awt.image.BufferedImage bi,
int pageIteratorLevel)
throws TesseractException
bi - input buffered imagepageIteratorLevel - TessPageIteratorLevel enumRectangleTesseractExceptionjava.util.List<Word> getWords(java.awt.image.BufferedImage bi, int pageIteratorLevel)
bi - input buffered imagepageIteratorLevel - TessPageIteratorLevel enumWordjava.util.List<Word> getWords(java.util.List<java.awt.image.BufferedImage> biList, int pageIteratorLevel)
biList - list of input buffered imagespageIteratorLevel - WordCopyright © 2024 Tess4J. All rights reserved.