# ... # # Copyright (c) 2019 Philippe Mathieu-Daudé # # This work is licensed under the terms of the GNU GPL, version 2 or # later. See the COPYING file in the top-level directory. import logging from subprocess import run def tesseract_ocr(image_path, tesseract_args=''): console_logger = logging.getLogger('console') console_logger.debug(image_path) proc = run(['tesseract', image_path, 'stdout'], capture_output=True, encoding='utf8') if proc.returncode: return None lines = [] for line in proc.stdout.split('\n'): sline = line.strip() if len(sline): console_logger.debug(sline) lines += [sline] return lines