We are going to reuse the tesseract OCR code. Create a new tesseract_ocr() helper and use it. Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Message-Id: <20201021105035.2477784-5-f4bug@amsat.org> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
		
			
				
	
	
		
			47 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			47 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# ...
 | 
						|
#
 | 
						|
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
 | 
						|
#
 | 
						|
# This work is licensed under the terms of the GNU GPL, version 2 or
 | 
						|
# later. See the COPYING file in the top-level directory.
 | 
						|
 | 
						|
import re
 | 
						|
import logging
 | 
						|
 | 
						|
from avocado.utils import process
 | 
						|
from avocado.utils.path import find_command, CmdNotFoundError
 | 
						|
 | 
						|
def tesseract_available(expected_version):
 | 
						|
    try:
 | 
						|
        find_command('tesseract')
 | 
						|
    except CmdNotFoundError:
 | 
						|
        return False
 | 
						|
    res = process.run('tesseract --version')
 | 
						|
    try:
 | 
						|
        version = res.stdout_text.split()[1]
 | 
						|
    except IndexError:
 | 
						|
        version = res.stderr_text.split()[1]
 | 
						|
    return int(version.split('.')[0]) == expected_version
 | 
						|
 | 
						|
    match = re.match(r'tesseract\s(\d)', res)
 | 
						|
    if match is None:
 | 
						|
        return False
 | 
						|
    # now this is guaranteed to be a digit
 | 
						|
    return int(match.groups()[0]) == expected_version
 | 
						|
 | 
						|
 | 
						|
def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
 | 
						|
    console_logger = logging.getLogger('tesseract')
 | 
						|
    console_logger.debug(image_path)
 | 
						|
    if tesseract_version == 4:
 | 
						|
        tesseract_args += ' --oem 1'
 | 
						|
    proc = process.run("tesseract {} {} stdout".format(tesseract_args,
 | 
						|
                                                       image_path))
 | 
						|
    lines = []
 | 
						|
    for line in proc.stdout_text.split('\n'):
 | 
						|
        sline = line.strip()
 | 
						|
        if len(sline):
 | 
						|
            console_logger.debug(sline)
 | 
						|
            lines += [sline]
 | 
						|
    return lines
 |