Class TessAPI1

java.lang.Object
net.sourceforge.tess4j.TessAPI1
All Implemented Interfaces:
com.sun.jna.Library, ITessAPI
Direct Known Subclasses:
Tesseract1

public class TessAPI1 extends Object implements com.sun.jna.Library, ITessAPI
A Java wrapper for Tesseract OCR 5.4.1 API using JNA Direct Mapping.
  • Constructor Details

    • TessAPI1

      public TessAPI1()
  • Method Details

    • TessVersion

      public static String TessVersion()
      Gets the version identifier.
      Returns:
      the version identifier
    • TessDeleteText

      public static void TessDeleteText(com.sun.jna.Pointer text)
      Deallocates the memory block occupied by text.
      Parameters:
      text - the pointer to text
    • TessDeleteTextArray

      public static void TessDeleteTextArray(com.sun.jna.ptr.PointerByReference arr)
      Deallocates the memory block occupied by text array.
      Parameters:
      arr - text array pointer reference
    • TessDeleteIntArray

      public static void TessDeleteIntArray(IntBuffer arr)
      Deallocates the memory block occupied by integer array.
      Parameters:
      arr - int array
    • TessTextRendererCreate

      public static ITessAPI.TessResultRenderer TessTextRendererCreate(String outputbase)
    • TessHOcrRendererCreate

      public static ITessAPI.TessResultRenderer TessHOcrRendererCreate(String outputbase)
    • TessHOcrRendererCreate2

      public static ITessAPI.TessResultRenderer TessHOcrRendererCreate2(String outputbase, int font_info)
    • TessAltoRendererCreate

      public static ITessAPI.TessResultRenderer TessAltoRendererCreate(String outputbase)
    • TessPAGERendererCreate

      public static ITessAPI.TessResultRenderer TessPAGERendererCreate(String outputbase)
    • TessTsvRendererCreate

      public static ITessAPI.TessResultRenderer TessTsvRendererCreate(String outputbase)
    • TessPDFRendererCreate

      public static ITessAPI.TessResultRenderer TessPDFRendererCreate(String outputbase, String datadir, int textonly)
    • TessUnlvRendererCreate

      public static ITessAPI.TessResultRenderer TessUnlvRendererCreate(String outputbase)
    • TessBoxTextRendererCreate

      public static ITessAPI.TessResultRenderer TessBoxTextRendererCreate(String outputbase)
    • TessLSTMBoxRendererCreate

      public static ITessAPI.TessResultRenderer TessLSTMBoxRendererCreate(String outputbase)
    • TessWordStrBoxRendererCreate

      public static ITessAPI.TessResultRenderer TessWordStrBoxRendererCreate(String outputbase)
    • TessDeleteResultRenderer

      public static void TessDeleteResultRenderer(ITessAPI.TessResultRenderer renderer)
    • TessResultRendererInsert

      public static void TessResultRendererInsert(ITessAPI.TessResultRenderer renderer, ITessAPI.TessResultRenderer next)
    • TessResultRendererNext

      public static ITessAPI.TessResultRenderer TessResultRendererNext(ITessAPI.TessResultRenderer renderer)
    • TessResultRendererBeginDocument

      public static int TessResultRendererBeginDocument(ITessAPI.TessResultRenderer renderer, String title)
    • TessResultRendererAddImage

      public static int TessResultRendererAddImage(ITessAPI.TessResultRenderer renderer, com.sun.jna.ptr.PointerByReference api)
    • TessResultRendererEndDocument

      public static int TessResultRendererEndDocument(ITessAPI.TessResultRenderer renderer)
    • TessResultRendererExtention

      public static com.sun.jna.Pointer TessResultRendererExtention(ITessAPI.TessResultRenderer renderer)
    • TessResultRendererTitle

      public static com.sun.jna.Pointer TessResultRendererTitle(ITessAPI.TessResultRenderer renderer)
    • TessResultRendererImageNum

      public static int TessResultRendererImageNum(ITessAPI.TessResultRenderer renderer)
    • TessBaseAPICreate

      public static ITessAPI.TessBaseAPI TessBaseAPICreate()
      Creates an instance of the base class for all Tesseract APIs.
      Returns:
      the TesseractAPI instance
    • TessBaseAPIDelete

      public static void TessBaseAPIDelete(ITessAPI.TessBaseAPI handle)
      Disposes the TesseractAPI instance.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPISetInputName

      public static void TessBaseAPISetInputName(ITessAPI.TessBaseAPI handle, String name)
      Set the name of the input file. Needed only for training and reading a UNLV zone file, and for searchable PDF output.
      Parameters:
      handle - the TesseractAPI instance
      name - name of the input file
    • TessBaseAPIGetInputName

      public static String TessBaseAPIGetInputName(ITessAPI.TessBaseAPI handle)
      These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      input file name
    • TessBaseAPISetInputImage

      public static void TessBaseAPISetInputImage(ITessAPI.TessBaseAPI handle, net.sourceforge.lept4j.Pix pix)
    • TessBaseAPIGetInputImage

      public static net.sourceforge.lept4j.Pix TessBaseAPIGetInputImage(ITessAPI.TessBaseAPI handle)
    • TessBaseAPIGetSourceYResolution

      public static int TessBaseAPIGetSourceYResolution(ITessAPI.TessBaseAPI handle)
    • TessBaseAPIGetDatapath

      public static String TessBaseAPIGetDatapath(ITessAPI.TessBaseAPI handle)
    • TessBaseAPISetOutputName

      public static void TessBaseAPISetOutputName(ITessAPI.TessBaseAPI handle, String name)
      Set the name of the bonus output files. Needed only for debugging.
      Parameters:
      handle - the TesseractAPI instance
      name - name of the output file
    • TessBaseAPISetVariable

      public static int TessBaseAPISetVariable(ITessAPI.TessBaseAPI handle, String name, String value)
      Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. E.g., SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

      Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).
      Parameters:
      handle - the TesseractAPI instance
      name - name of the input
      value - variable value
      Returns:
      1 on success
    • TessBaseAPIGetIntVariable

      public static int TessBaseAPIGetIntVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value)
      Get the value of an internal int parameter.
      Parameters:
      handle - the TesseractAPI instance
      name - name of the input
      value - pass the int buffer value
      Returns:
      1 on success
    • TessBaseAPIGetBoolVariable

      public static int TessBaseAPIGetBoolVariable(ITessAPI.TessBaseAPI handle, String name, IntBuffer value)
      Get the value of an internal bool parameter.
      Parameters:
      handle - the TesseractAPI instance
      name - pass the name of the variable
      value - pass the int buffer value
      Returns:
      1 on success
    • TessBaseAPIGetDoubleVariable

      public static int TessBaseAPIGetDoubleVariable(ITessAPI.TessBaseAPI handle, String name, DoubleBuffer value)
      Get the value of an internal double parameter.
      Parameters:
      handle - the TesseractAPI instance
      name - pass the name of the variable
      value - pass the double buffer value
      Returns:
      1 on success
    • TessBaseAPIGetStringVariable

      public static String TessBaseAPIGetStringVariable(ITessAPI.TessBaseAPI handle, String name)
      Get the value of an internal string parameter.
      Parameters:
      handle - the TesseractAPI instance
      name - pass the name of the variable
      Returns:
      the string value
    • TessBaseAPIPrintVariablesToFile

      public static void TessBaseAPIPrintVariablesToFile(ITessAPI.TessBaseAPI handle, String filename)
      Print Tesseract parameters to the given file.

      Note: Must not be the first method called after instance create.
      Parameters:
      handle - the TesseractAPI instance
      filename - name of the file where the variables will be persisted
    • TessBaseAPIInit1

      public static int TessBaseAPIInit1(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, com.sun.jna.ptr.PointerByReference configs, int configs_size)
      Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

      Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

      It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. E.g., if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init.
      This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

      If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.
      Parameters:
      handle - the TesseractAPI instance
      datapath - The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].)
      language - The language is (usually) an ISO 639-3 string or NULL will default to eng. The language may be a string of the form [~]<lang>[+[~]<lang>] indicating that multiple languages are to be loaded. E.g., hin+eng will load Hindi and English.
      oem - ocr engine mode
      configs - pointer configuration
      configs_size - pointer configuration size
      Returns:
      0 on success and -1 on initialization failure
    • TessBaseAPIInit2

      public static int TessBaseAPIInit2(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem)
      Parameters:
      handle - the TesseractAPI instance
      datapath - The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].)
      language - The language is (usually) an ISO 639-3 string or NULL will default to eng. The language may be a string of the form [~]<lang>[+[~]<lang>] indicating that multiple languages are to be loaded. E.g., hin+eng will load Hindi and English.
      oem - ocr engine mode
      Returns:
      0 on success and -1 on initialization failure
    • TessBaseAPIInit3

      public static int TessBaseAPIInit3(ITessAPI.TessBaseAPI handle, String datapath, String language)
      Parameters:
      handle - the TesseractAPI instance
      datapath - The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].)
      language - The language is (usually) an ISO 639-3 string or NULL will default to eng. The language may be a string of the form [~]<lang>[+[~]<lang>] indicating that multiple languages are to be loaded. E.g., hin+eng will load Hindi and English.
      Returns:
      0 on success and -1 on initialization failure
    • TessBaseAPIInit4

      public static int TessBaseAPIInit4(ITessAPI.TessBaseAPI handle, String datapath, String language, int oem, com.sun.jna.ptr.PointerByReference configs, int configs_size, com.sun.jna.ptr.PointerByReference vars_vec, com.sun.jna.ptr.PointerByReference vars_values, com.ochafik.lang.jnaerator.runtime.NativeSize vars_vec_size, int set_only_non_debug_params)
      Parameters:
      handle - the TesseractAPI instance
      datapath - The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].)
      language - The language is (usually) an ISO 639-3 string or NULL will default to eng. The language may be a string of the form [~]<lang>[+[~]<lang>] indicating that multiple languages are to be loaded. E.g., hin+eng will load Hindi and English.
      oem - ocr engine mode
      configs - pointer configuration
      configs_size - pointer configuration size
      vars_vec -
      vars_values -
      vars_vec_size -
      set_only_non_debug_params -
      Returns:
      0 on success and -1 on initialization failure
    • TessBaseAPIInit5

      public static int TessBaseAPIInit5(ITessAPI.TessBaseAPI handle, String data, int data_size, String language, int oem, com.sun.jna.ptr.PointerByReference configs, int configs_size, com.sun.jna.ptr.PointerByReference vars_vec, com.sun.jna.ptr.PointerByReference vars_values, com.ochafik.lang.jnaerator.runtime.NativeSize vars_vec_size, int set_only_non_debug_params)
      Parameters:
      handle - the TesseractAPI instance
      data - In-memory version reads the traineddata file directly from the given data[data_size] array. Also implements the version with a datapath in data, flagged by data_size = 0.
      data_size -
      language - The language is (usually) an ISO 639-3 string or NULL will default to eng. The language may be a string of the form [~]<lang>[+[~]<lang>] indicating that multiple languages are to be loaded. E.g., hin+eng will load Hindi and English.
      oem - ocr engine mode
      configs - pointer configuration
      configs_size - pointer configuration size
      vars_vec -
      vars_values -
      vars_vec_size -
      set_only_non_debug_params -
      Returns:
      0 on success and -1 on initialization failure
    • TessBaseAPIGetInitLanguagesAsString

      public static String TessBaseAPIGetInitLanguagesAsString(ITessAPI.TessBaseAPI handle)
      Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded, use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      languages as string
    • TessBaseAPIGetLoadedLanguagesAsVector

      public static com.sun.jna.ptr.PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(ITessAPI.TessBaseAPI handle)
      Returns the loaded languages in the vector of STRINGs. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      loaded languages as vector
    • TessBaseAPIGetAvailableLanguagesAsVector

      public static com.sun.jna.ptr.PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(ITessAPI.TessBaseAPI handle)
      Returns the available languages in the vector of STRINGs.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      available languages as vector
    • TessBaseAPIInitForAnalysePage

      public static void TessBaseAPIInitForAnalysePage(ITessAPI.TessBaseAPI handle)
      Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPIReadConfigFile

      public static void TessBaseAPIReadConfigFile(ITessAPI.TessBaseAPI handle, String filename, int init_only)
      Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).
      Parameters:
      handle - the TesseractAPI instance
      filename - relative or absolute path for the "config" file containing a set of param and value pairs
      init_only -
    • TessBaseAPISetPageSegMode

      public static void TessBaseAPISetPageSegMode(ITessAPI.TessBaseAPI handle, int mode)
      Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
      Parameters:
      handle - the TesseractAPI instance
      mode - tesseract page segment mode
    • TessBaseAPIGetPageSegMode

      public static int TessBaseAPIGetPageSegMode(ITessAPI.TessBaseAPI handle)
      Return the current page segmentation mode.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      page segment mode value
    • TessBaseAPIRect

      public static com.sun.jna.Pointer TessBaseAPIRect(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
      Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

      Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.
      Parameters:
      handle - the TesseractAPI instance
      imagedata - image byte buffer
      bytes_per_pixel - bytes per pixel
      bytes_per_line - bytes per line
      left - image left
      top - image top
      width - image width
      height - image height
      Returns:
      the pointer to recognized text
    • TessBaseAPIClearAdaptiveClassifier

      public static void TessBaseAPIClearAdaptiveClassifier(ITessAPI.TessBaseAPI handle)
      Call between pages or documents etc to free up memory and forget adaptive data.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPISetImage

      public static void TessBaseAPISetImage(ITessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
      Provide an image for Tesseract to recognize. Format is as TesseractRect above. Does not copy the image buffer, or take ownership. The source image may be destroyed after Recognize is called, either explicitly or implicitly via one of the Get*Text functions. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.
      Parameters:
      handle - the TesseractAPI instance
      imagedata - image byte buffer
      width - image width
      height - image height
      bytes_per_pixel - bytes per pixel
      bytes_per_line - bytes per line
    • TessBaseAPISetImage2

      public static void TessBaseAPISetImage2(ITessAPI.TessBaseAPI handle, net.sourceforge.lept4j.Pix pix)
      Provide an image for Tesseract to recognize. As with SetImage above, Tesseract doesn't take a copy or ownership or pixDestroy the image, so it must persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. A future version of Tesseract may choose to use Pix as its internal representation and discard IMAGE altogether. Because of that, an implementation that sources and targets Pix may end up with less copies than an implementation that does not.
      Parameters:
      handle - the TesseractAPI instance
      pix -
    • TessBaseAPISetSourceResolution

      public static void TessBaseAPISetSourceResolution(ITessAPI.TessBaseAPI handle, int ppi)
      Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().
      Parameters:
      handle - the TesseractAPI instance
      ppi - source resolution value
    • TessBaseAPISetRectangle

      public static void TessBaseAPISetRectangle(ITessAPI.TessBaseAPI handle, int left, int top, int width, int height)
      Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recognition results so multiple rectangles can be recognized with the same image.
      Parameters:
      handle - the TesseractAPI instance
      left - value
      top - value
      width - value
      height - value
    • TessBaseAPIGetThresholdedImage

      public static net.sourceforge.lept4j.Pix TessBaseAPIGetThresholdedImage(ITessAPI.TessBaseAPI handle)
      ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      internal thresholded image
    • TessBaseAPIGetGradient

      public static float TessBaseAPIGetGradient(ITessAPI.TessBaseAPI handle)
      Return average gradient of lines on page.
      Parameters:
      handle -
      Returns:
      average gradient (angle) of lines on page
    • TessBaseAPIGetRegions

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetRegions(ITessAPI.TessBaseAPI handle, com.sun.jna.ptr.PointerByReference pixa)
      Get the result of page layout analysis as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.
      Parameters:
      handle - the TesseractAPI instance
      pixa - array of Pix
      Returns:
      array of Box
    • TessBaseAPIGetTextlines

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetTextlines(ITessAPI.TessBaseAPI handle, com.sun.jna.ptr.PointerByReference pixa, com.sun.jna.ptr.PointerByReference blockids)
      Get the textlines as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.
      Helper method to extract from the thresholded image (most common usage).
      Parameters:
      handle - the TesseractAPI instance
      pixa - array of Pix
      blockids -
      Returns:
      array of Box
    • TessBaseAPIGetTextlines1

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetTextlines1(ITessAPI.TessBaseAPI handle, int raw_image, int raw_padding, com.sun.jna.ptr.PointerByReference pixa, com.sun.jna.ptr.PointerByReference blockids, com.sun.jna.ptr.PointerByReference paraids)
      Get the textlines as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.
      Parameters:
      handle - the TesseractAPI instance
      raw_image -
      raw_padding -
      pixa - array of Pix
      blockids -
      paraids -
      Returns:
      array of Box
    • TessBaseAPIGetStrips

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetStrips(ITessAPI.TessBaseAPI handle, com.sun.jna.ptr.PointerByReference pixa, com.sun.jna.ptr.PointerByReference blockids)
      Get textlines and strips of image regions as a Leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use.
      Parameters:
      handle - the TesseractAPI instance
      pixa - array of Pix
      blockids -
      Returns:
      array of Box
    • TessBaseAPIGetWords

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetWords(ITessAPI.TessBaseAPI handle, com.sun.jna.ptr.PointerByReference pixa)
      Get the words as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.
      Parameters:
      handle - the TesseractAPI instance
      pixa - array of Pix
      Returns:
      array of Box
    • TessBaseAPIGetConnectedComponents

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetConnectedComponents(ITessAPI.TessBaseAPI handle, com.sun.jna.ptr.PointerByReference cc)
      Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.
      Parameters:
      handle - the TesseractAPI instance
      cc - array of Pix
      Returns:
      array of Box
    • TessBaseAPIGetComponentImages

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetComponentImages(ITessAPI.TessBaseAPI handle, int level, int text_only, com.sun.jna.ptr.PointerByReference pixa, com.sun.jna.ptr.PointerByReference blockids)
      Get the given level kind of components (block, textline, word etc.) as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned. Helper function to get binary images with no padding (most common usage).
      Parameters:
      handle - the TesseractAPI instance
      level - PageIteratorLevel
      text_only -
      pixa - array of Pix
      blockids -
      Returns:
      array of Box
    • TessBaseAPIGetComponentImages1

      public static net.sourceforge.lept4j.Boxa TessBaseAPIGetComponentImages1(ITessAPI.TessBaseAPI handle, int level, int text_only, int raw_image, int raw_padding, com.sun.jna.ptr.PointerByReference pixa, com.sun.jna.ptr.PointerByReference blockids, com.sun.jna.ptr.PointerByReference paraids)
      Get the given level kind of components (block, textline, word etc.) as a Leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If paraids is not NULL, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.
      Parameters:
      handle - the TesseractAPI instance
      level - PageIteratorLevel
      text_only -
      raw_image -
      raw_padding -
      pixa - array of Pix
      blockids -
      paraids -
      Returns:
    • TessBaseAPIGetThresholdedImageScaleFactor

      public static int TessBaseAPIGetThresholdedImageScaleFactor(ITessAPI.TessBaseAPI handle)
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      Scale factor from original image.
    • TessBaseAPIAnalyseLayout

      public static ITessAPI.TessPageIterator TessBaseAPIAnalyseLayout(ITessAPI.TessBaseAPI handle)
      Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. Returns NULL on error. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End, DetectOS, or anything else that changes the internal PAGE_RES.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      returns an iterator to the results. Returns NULL on error. The returned iterator must be deleted after use.
    • TessBaseAPIRecognize

      public static int TessBaseAPIRecognize(ITessAPI.TessBaseAPI handle, ITessAPI.ETEXT_DESC monitor)
      Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.
      Parameters:
      handle - the TesseractAPI instance
      monitor - the result as Tesseract internal structures
      Returns:
      0 on success
    • TessBaseAPIGetIterator

      public static ITessAPI.TessResultIterator TessBaseAPIGetIterator(ITessAPI.TessBaseAPI handle)
      Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End, DetectOS, or anything else that changes the internal PAGE_RES.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      the result iterator
    • TessBaseAPIGetMutableIterator

      public static ITessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(ITessAPI.TessBaseAPI handle)
      Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End, DetectOS, or anything else that changes the internal PAGE_RES.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      the mutable iterator
    • TessBaseAPIProcessPages

      public static int TessBaseAPIProcessPages(ITessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer)
      Recognizes all the pages in the named file, as a multi-page tiff or list of filenames, or single image, and gets the appropriate kind of text according to parameters: tessedit_create_boxfile, tessedit_make_boxes_from_boxes, tessedit_write_unlv, tessedit_create_hocr. Calls ProcessPage on each page in the input file, which may be a multi-page tiff, single-page other file format, or a plain text list of images to read. If tessedit_page_number is non-negative, processing begins at that page of a multi-page tiff file, or filelist. The text is returned in text_out. Returns false on error. If non-zero timeout_millisec terminates processing after the timeout on a single page. If non-NULL and non-empty, and some page fails for some reason, the page is reprocessed with the retry_config config file. Useful for interactively debugging a bad page.
      Parameters:
      handle - the TesseractAPI instance
      filename - multi-page tiff or list of filenames
      retry_config - retry config values
      timeout_millisec - timeout value
      renderer - result renderer
      Returns:
      the status
    • TessBaseAPIProcessPage

      public static int TessBaseAPIProcessPage(ITessAPI.TessBaseAPI handle, net.sourceforge.lept4j.Pix pix, int page_index, String filename, String retry_config, int timeout_millisec, ITessAPI.TessResultRenderer renderer)
    • TessBaseAPIGetUTF8Text

      public static com.sun.jna.Pointer TessBaseAPIGetUTF8Text(ITessAPI.TessBaseAPI handle)
      The recognized text is returned as a char* which is coded as UTF-8 and must be freed with the delete [] operator.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      the pointer to output text
    • TessBaseAPIGetHOCRText

      public static com.sun.jna.Pointer TessBaseAPIGetHOCRText(ITessAPI.TessBaseAPI handle, int page_number)
      Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to hOCR text
    • TessBaseAPIGetAltoText

      public static com.sun.jna.Pointer TessBaseAPIGetAltoText(ITessAPI.TessBaseAPI handle, int page_number)
      Make an XML-formatted string with Alto markup from the internal data structures.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to Alto text
    • TessBaseAPIGetPAGEText

      public static com.sun.jna.Pointer TessBaseAPIGetPAGEText(ITessAPI.TessBaseAPI handle, int page_number)
      Make an XML-formatted string with PAGE markup from the internal data structures.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to PAGE text
    • TessBaseAPIGetTsvText

      public static com.sun.jna.Pointer TessBaseAPIGetTsvText(ITessAPI.TessBaseAPI handle, int page_number)
      Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to TSV text
    • TessBaseAPIGetBoxText

      public static com.sun.jna.Pointer TessBaseAPIGetBoxText(ITessAPI.TessBaseAPI handle, int page_number)
      The recognized text is returned as a char* which is coded as a UTF8 box file and must be freed with the delete [] operator. page_number is a 0-base page index that will appear in the box file.
      Parameters:
      handle - the TesseractAPI instance
      page_number - number of the page
      Returns:
      the pointer to box text
    • TessBaseAPIGetLSTMBoxText

      public static com.sun.jna.Pointer TessBaseAPIGetLSTMBoxText(ITessAPI.TessBaseAPI handle, int page_number)
      Create a UTF8 box file for LSTM training from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to LSTM Box text
    • TessBaseAPIGetWordStrBoxText

      public static com.sun.jna.Pointer TessBaseAPIGetWordStrBoxText(ITessAPI.TessBaseAPI handle, int page_number)
      Create a UTF8 box file with WordStr strings from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.
      Parameters:
      handle - the TesseractAPI instance
      page_number - page number
      Returns:
      the pointer to WordStr Box text
    • TessBaseAPIGetUNLVText

      public static com.sun.jna.Pointer TessBaseAPIGetUNLVText(ITessAPI.TessBaseAPI handle)
      The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes and must be freed with the delete [] operator.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      the pointer to UNLV text
    • TessBaseAPIMeanTextConf

      public static int TessBaseAPIMeanTextConf(ITessAPI.TessBaseAPI handle)
      Returns the average word confidence for Tesseract page result.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      the (average) confidence value between 0 and 100.
    • TessBaseAPIAllWordConfidences

      public static com.sun.jna.ptr.IntByReference TessBaseAPIAllWordConfidences(ITessAPI.TessBaseAPI handle)
      Returns an array of all word confidences, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space-delimited words in GetUTF8Text.
      Parameters:
      handle - the TesseractAPI instance
      Returns:
      all word confidences (between 0 and 100) in an array, terminated by -1
    • TessBaseAPIAdaptToWordStr

      public static int TessBaseAPIAdaptToWordStr(ITessAPI.TessBaseAPI handle, int mode, String wordstr)
      Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved.
      Parameters:
      handle - the TesseractAPI instance
      mode - tesseract page segment mode
      wordstr - The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes.
      Returns:
      false if adaption was not possible for some reason.
    • TessBaseAPIClear

      public static void TessBaseAPIClear(ITessAPI.TessBaseAPI handle)
      Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPIEnd

      public static void TessBaseAPIEnd(ITessAPI.TessBaseAPI handle)
      Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPIIsValidWord

      public static int TessBaseAPIIsValidWord(ITessAPI.TessBaseAPI handle, String word)
      Check whether a word is valid according to Tesseract's language model.
      Parameters:
      handle - the TesseractAPI instance
      word - word value
      Returns:
      0 if the word is invalid, non-zero if valid
    • TessBaseAPIGetTextDirection

      public static int TessBaseAPIGetTextDirection(ITessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope)
      Gets text direction.
      Parameters:
      handle - the TesseractAPI instance
      out_offset - offset
      out_slope - slope
      Returns:
      TRUE if text direction is valid
    • TessBaseAPIClearPersistentCache

      public static void TessBaseAPIClearPersistentCache(ITessAPI.TessBaseAPI handle)
      Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally -- surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.
      Parameters:
      handle - the TesseractAPI instance
    • TessBaseAPIDetectOrientationScript

      public static int TessBaseAPIDetectOrientationScript(ITessAPI.TessBaseAPI handle, IntBuffer orient_deg, FloatBuffer orient_conf, com.sun.jna.ptr.PointerByReference script_name, FloatBuffer script_conf)
      Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270); orient_conf is the confidence (15.0 is reasonably confident); script_name is an ASCII string, the name of the script, e.g. "Latin"; script_conf is confidence level in the script.
      Parameters:
      handle - the TesseractAPI instance
      orient_deg -
      orient_conf -
      script_name -
      script_conf -
      Returns:
      TRUE on success and writes values to each parameter as an output
    • TessBaseAPIGetUnichar

      public static String TessBaseAPIGetUnichar(ITessAPI.TessBaseAPI handle, int unichar_id)
      Gets the string of the specified unichar.
      Parameters:
      handle - the TesseractAPI instance
      unichar_id - the unichar id
      Returns:
      the string form of the specified unichar.
    • TessPageIteratorDelete

      public static void TessPageIteratorDelete(ITessAPI.TessPageIterator handle)
      Deletes the specified PageIterator instance.
      Parameters:
      handle - the TessPageIterator instance
    • TessPageIteratorCopy

      public static ITessAPI.TessPageIterator TessPageIteratorCopy(ITessAPI.TessPageIterator handle)
      Creates a copy of the specified PageIterator instance.
      Parameters:
      handle - the TessPageIterator instance
      Returns:
      page iterator copy
    • TessPageIteratorBegin

      public static void TessPageIteratorBegin(ITessAPI.TessPageIterator handle)
      Resets the iterator to point to the start of the page.
      Parameters:
      handle - the TessPageIterator instance
    • TessPageIteratorNext

      public static int TessPageIteratorNext(ITessAPI.TessPageIterator handle, int level)
      Moves to the start of the next object at the given level in the page hierarchy, and returns false if the end of the page was reached. NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each non-text block at least once.
      Think of non text blocks as containing a single para, with at least one line, with a single imaginary word, containing a single symbol. The bounding boxes mark out any polygonal nature of the block, and PTIsTextType(BLockType()) is false for non-text blocks.
      Calls to Next with different levels may be freely intermixed. This function iterates words in right-to-left scripts correctly, if the appropriate language has been loaded into Tesseract.
      Parameters:
      handle - the TessPageIterator instance
      level - tesseract page level
      Returns:
      next iterator object
    • TessPageIteratorIsAtBeginningOf

      public static int TessPageIteratorIsAtBeginningOf(ITessAPI.TessPageIterator handle, int level)
      Returns TRUE if the iterator is at the start of an object at the given level. Possible uses include determining if a call to Next(RIL_WORD) moved to the start of a RIL_PARA.
      Parameters:
      handle - the TessPageIterator instance
      level - tesseract page level
      Returns:
      1 if true
    • TessPageIteratorIsAtFinalElement

      public static int TessPageIteratorIsAtFinalElement(ITessAPI.TessPageIterator handle, int level, int element)
      Returns whether the iterator is positioned at the last element in a given level. (e.g. the last word in a line, the last line in a block).
      Parameters:
      handle - the TessPageIterator instance
      level - tesseract page level
      element - page iterator level
      Returns:
      1 if true
    • TessPageIteratorBoundingBox

      public static int TessPageIteratorBoundingBox(ITessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom)
      Returns the bounding rectangle of the current object at the given level in coordinates of the original image.
      Parameters:
      handle - the TessPageIterator instance
      level - tesseract page level
      left - int buffer position
      top - int buffer position
      right - int buffer position
      bottom - int buffer position
      Returns:
      FALSE if there is no such object at the current position
    • TessPageIteratorBlockType

      public static int TessPageIteratorBlockType(ITessAPI.TessPageIterator handle)
      Returns the type of the current block.
      Parameters:
      handle - the TessPageIterator instance
      Returns:
      TessPolyBlockType value
    • TessPageIteratorGetBinaryImage

      public static net.sourceforge.lept4j.Pix TessPageIteratorGetBinaryImage(ITessAPI.TessPageIterator handle, int level)
      Returns a binary image of the current object at the given level. The position and size match the return from BoundingBoxInternal, and so this could be upscaled with respect to the original input image. Use pixDestroy to delete the image after use. The following methods are used to generate the images: RIL_BLOCK: mask the page image with the block polygon. RIL_TEXTLINE: Clip the rectangle of the line box from the page image. TODO(rays) fix this to generate and use a line polygon. RIL_WORD: Clip the rectangle of the word box from the page image. RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior to recognition) or the bounding box otherwise. A reconstruction of the original image (using xor to check for double representation) should be reasonably accurate, apart from removed noise, at the block level. Below the block level, the reconstruction will be missing images and line separators. At the symbol level, kerned characters will be invade the bounding box if rendered after recognition, making an xor reconstruction inaccurate, but an or construction better. Before recognition, symbol-level reconstruction should be good, even with xor, since the images come from the connected components.
      Parameters:
      handle - the TessPageIterator instance
      level - PageIteratorLevel
      Returns:
    • TessPageIteratorGetImage

      public static net.sourceforge.lept4j.Pix TessPageIteratorGetImage(ITessAPI.TessPageIterator handle, int level, int padding, net.sourceforge.lept4j.Pix original_image, IntBuffer left, IntBuffer top)
      Returns an image of the current object at the given level in greyscale if available in the input. To guarantee a binary image use BinaryImage. NOTE that in order to give the best possible image, the bounds are expanded slightly over the binary connected component, by the supplied padding, so the top-left position of the returned image is returned in (left,top). These will most likely not match the coordinates returned by BoundingBox. If you do not supply an original image, you will get a binary one. Use pixDestroy to delete the image after use.
      Parameters:
      handle - the TessPageIterator instance
      level - PageIteratorLevel
      padding -
      original_image -
      left -
      top -
      Returns:
    • TessPageIteratorBaseline

      public static int TessPageIteratorBaseline(ITessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2)
      Returns the baseline of the current object at the given level. The baseline is the line that passes through (x1, y1) and (x2, y2).
      WARNING: with vertical text, baselines may be vertical!
      Parameters:
      handle - the TessPageIterator instance
      level - PageIteratorLevel
      x1 - int buffer position
      y1 - int buffer position
      x2 - int buffer position
      y2 - int buffer position
      Returns:
      TRUE if the baseline is valid
    • TessPageIteratorOrientation

      public static void TessPageIteratorOrientation(ITessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle)
      Returns the orientation.
      Parameters:
      handle - the TessPageIterator instance
      orientation - orientation value
      writing_direction - writing direction value
      textline_order - text line order
      deskew_angle - deskew angle
    • TessPageIteratorParagraphInfo

      public static void TessPageIteratorParagraphInfo(ITessAPI.TessPageIterator handle, IntBuffer justification, IntBuffer is_list_item, IntBuffer is_crown, IntBuffer first_line_indent)
      Gets paragraph information.
      Parameters:
      handle - the TessPageIterator instance
      justification - justification type
      is_list_item - list item
      is_crown - very first or continuation
      first_line_indent - first line indentation
    • TessResultIteratorDelete

      public static void TessResultIteratorDelete(ITessAPI.TessResultIterator handle)
      Deletes the specified ResultIterator handle.
      Parameters:
      handle - the TessResultIterator instance
    • TessResultIteratorCopy

      public static ITessAPI.TessResultIterator TessResultIteratorCopy(ITessAPI.TessResultIterator handle)
      Creates a copy of the specified ResultIterator instance.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      the copy object
    • TessResultIteratorGetPageIterator

      public static ITessAPI.TessPageIterator TessResultIteratorGetPageIterator(ITessAPI.TessResultIterator handle)
      Gets the PageIterator of the specified ResultIterator instance.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      the page iterator
    • TessResultIteratorGetPageIteratorConst

      public static ITessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(ITessAPI.TessResultIterator handle)
      Gets the PageIterator of the specified ResultIterator instance.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      the page iterator constant
    • TessResultIteratorNext

      public static int TessResultIteratorNext(ITessAPI.TessResultIterator handle, int level)
    • TessResultIteratorGetUTF8Text

      public static com.sun.jna.Pointer TessResultIteratorGetUTF8Text(ITessAPI.TessResultIterator handle, int level)
      Returns the null terminated UTF-8 encoded text string for the current object at the given level. Use delete [] to free after use.
      Parameters:
      handle - the TessResultIterator instance
      level - tesseract page level
      Returns:
      the pointer to recognized text
    • TessResultIteratorConfidence

      public static float TessResultIteratorConfidence(ITessAPI.TessResultIterator handle, int level)
      Returns the mean confidence of the current object at the given level. The number should be interpreted as a percent probability (0.0f-100.0f).
      Parameters:
      handle - the TessResultIterator instance
      level - tesseract page level
      Returns:
      confidence value
    • TessResultIteratorWordRecognitionLanguage

      public static String TessResultIteratorWordRecognitionLanguage(ITessAPI.TessResultIterator handle)
    • TessResultIteratorWordFontAttributes

      public static String TessResultIteratorWordFontAttributes(ITessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id)
      Returns the font attributes of the current word. If iterating at a higher level object than words, e.g., textlines, then this will return the attributes of the first word in that textline. The actual return value is a string representing a font name. It points to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as the iterator itself, ie rendered invalid by various members of TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. Pointsize is returned in printers points (1/72 inch).
      Parameters:
      handle - the TessResultIterator instance
      is_bold - font attribute
      is_italic - font attribute
      is_underlined - font attribute
      is_monospace - font attribute
      is_serif - font attribute
      is_smallcaps - font attribute
      pointsize - font attribute
      font_id - font attribute
      Returns:
      font name
    • TessResultIteratorWordIsFromDictionary

      public static int TessResultIteratorWordIsFromDictionary(ITessAPI.TessResultIterator handle)
      Returns TRUE if the current word was found in a dictionary.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      1 if word is from dictionary
    • TessResultIteratorWordIsNumeric

      public static int TessResultIteratorWordIsNumeric(ITessAPI.TessResultIterator handle)
      Returns TRUE if the current word is numeric.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      1 if word is numeric
    • TessResultIteratorSymbolIsSuperscript

      public static int TessResultIteratorSymbolIsSuperscript(ITessAPI.TessResultIterator handle)
      Returns TRUE if the current symbol is a superscript. If iterating at a higher level object than symbols, e.g., words, then this will return the attributes of the first symbol in that word.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      1 if symbol is superscript
    • TessResultIteratorSymbolIsSubscript

      public static int TessResultIteratorSymbolIsSubscript(ITessAPI.TessResultIterator handle)
      Returns TRUE if the current symbol is a subscript. If iterating at a higher level object than symbols, e.g., words, then this will return the attributes of the first symbol in that word.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      1 if symbol is subscript
    • TessResultIteratorSymbolIsDropcap

      public static int TessResultIteratorSymbolIsDropcap(ITessAPI.TessResultIterator handle)
      Returns TRUE if the current symbol is a dropcap. If iterating at a higher level object than symbols, e.g., words, then this will return the attributes of the first symbol in that word.
      Parameters:
      handle - the TessResultIterator instance
      Returns:
      1 if symbol is dropcap
    • TessResultIteratorGetChoiceIterator

      public static ITessAPI.TessChoiceIterator TessResultIteratorGetChoiceIterator(ITessAPI.TessResultIterator handle)
    • TessChoiceIteratorDelete

      public static void TessChoiceIteratorDelete(ITessAPI.TessChoiceIterator handle)
    • TessChoiceIteratorNext

      public static int TessChoiceIteratorNext(ITessAPI.TessChoiceIterator handle)
    • TessChoiceIteratorGetUTF8Text

      public static String TessChoiceIteratorGetUTF8Text(ITessAPI.TessChoiceIterator handle)
    • TessChoiceIteratorConfidence

      public static float TessChoiceIteratorConfidence(ITessAPI.TessChoiceIterator handle)
    • TessMonitorCreate

      public static ITessAPI.ETEXT_DESC TessMonitorCreate()
    • TessMonitorDelete

      public static void TessMonitorDelete(ITessAPI.ETEXT_DESC monitor)
    • TessMonitorSetCancelFunc

      public static void TessMonitorSetCancelFunc(ITessAPI.ETEXT_DESC monitor, ITessAPI.TessCancelFunc cancelFunc)
    • TessMonitorSetCancelThis

      public static void TessMonitorSetCancelThis(ITessAPI.ETEXT_DESC monitor, com.sun.jna.Pointer cancelThis)
    • TessMonitorGetCancelThis

      public static com.sun.jna.Pointer TessMonitorGetCancelThis(ITessAPI.ETEXT_DESC monitor)
    • TessMonitorSetProgressFunc

      public static void TessMonitorSetProgressFunc(ITessAPI.ETEXT_DESC monitor, ITessAPI.TessProgressFunc progressFunc)
    • TessMonitorGetProgress

      public static int TessMonitorGetProgress(ITessAPI.ETEXT_DESC monitor)
    • TessMonitorSetDeadlineMSecs

      public static void TessMonitorSetDeadlineMSecs(ITessAPI.ETEXT_DESC monitor, int deadline)