2019-09-04 14:37:02 -04:00
/ * *
* @ author mshwed [ m @ ttshwed . com ]
* @ copyright Crown Copyright 2019
* @ license Apache - 2.0
* /
import Operation from "../Operation.mjs" ;
import OperationError from "../errors/OperationError.mjs" ;
import { isImage } from "../lib/FileType.mjs" ;
import { isWorkerEnvironment } from "../Utils.mjs" ;
import jimp from "jimp" ;
import Tesseract from "tesseract.js" ;
const { TesseractWorker } = Tesseract ;
/ * *
* OCR operation
* /
class OCR extends Operation {
/ * *
* OCR constructor
* /
constructor ( ) {
super ( ) ;
this . name = "OCR" ;
this . module = "Default" ;
this . description = "Optical character recognition or optical character reader (OCR) is the mechanical or electronic conversion of images of typed, handwritten or printed text into machine-encoded text." ;
this . infoURL = "https://en.wikipedia.org/wiki/Optical_character_recognition" ;
this . inputType = "ArrayBuffer" ;
this . outputType = "string" ;
2019-09-05 09:20:59 -04:00
this . args = [ ] ;
2019-09-04 14:37:02 -04:00
}
/ * *
* @ param { ArrayBuffer } input
* @ param { Object [ ] } args
2019-09-05 09:20:59 -04:00
* @ returns { string }
2019-09-04 14:37:02 -04:00
* /
async run ( input , args ) {
if ( ! isImage ( input ) ) {
throw new OperationError ( "Invalid File Type" ) ;
}
try {
if ( isWorkerEnvironment ( ) )
self . sendStatusMessage ( "Performing OCR on image..." ) ;
let image ;
try {
image = await jimp . read ( input ) ;
image = await image . getBase64Async ( jimp . AUTO ) ;
} catch ( err ) {
throw new OperationError ( ` Error loading image. ( ${ err } ) ` ) ;
}
const worker = new TesseractWorker ( ) ;
const result = await worker . recognize ( image )
. progress ( progress => {
2019-09-05 09:20:59 -04:00
if ( isWorkerEnvironment ( ) ) self . sendStatusMessage ( ` ${ progress . status } - ${ ( parseFloat ( progress . progress ) * 100 ) . toFixed ( 2 ) } % ` ) ;
2019-09-04 14:37:02 -04:00
} ) ;
return result . text ;
} catch ( err ) {
throw new OperationError ( ` Error performing OCR on image. ( ${ err } ) ` ) ;
}
}
}
export default OCR ;