aboutsummaryrefslogtreecommitdiff
path: root/tests/avocado/tesseract_utils.py
blob: 72cd9ab798967537756906fbc514bf6e306b7ff7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# ...
#
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
#
# This work is licensed under the terms of the GNU GPL, version 2 or
# later. See the COPYING file in the top-level directory.

import re
import logging

from avocado.utils import process
from avocado.utils.path import find_command, CmdNotFoundError

def tesseract_available(expected_version):
    try:
        find_command('tesseract')
    except CmdNotFoundError:
        return False
    res = process.run('tesseract --version')
    try:
        version = res.stdout_text.split()[1]
    except IndexError:
        version = res.stderr_text.split()[1]
    return int(version.split('.')[0]) == expected_version

    match = re.match(r'tesseract\s(\d)', res)
    if match is None:
        return False
    # now this is guaranteed to be a digit
    return int(match.groups()[0]) == expected_version


def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
    console_logger = logging.getLogger('tesseract')
    console_logger.debug(image_path)
    if tesseract_version == 4:
        tesseract_args += ' --oem 1'
    proc = process.run("tesseract {} {} stdout".format(tesseract_args,
                                                       image_path))
    lines = []
    for line in proc.stdout_text.split('\n'):
        sline = line.strip()
        if len(sline):
            console_logger.debug(sline)
            lines += [sline]
    return lines