aboutsummaryrefslogtreecommitdiff
path: root/doc/sphinx/kernel_include.py
blob: f523aa68a36b397b3c6f12998ec597c28e7ad69a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# pylint: disable=R0903, C0330, R0914, R0912, E0401

u"""
    kernel-include
    ~~~~~~~~~~~~~~

    Implementation of the ``kernel-include`` reST-directive.

    :copyright:  Copyright (C) 2016  Markus Heiser
    :license:    GPL Version 2, June 1991 see linux/COPYING for details.

    The ``kernel-include`` reST-directive is a replacement for the ``include``
    directive. The ``kernel-include`` directive expand environment variables in
    the path name and allows to include files from arbitrary locations.

    .. hint::

      Including files from arbitrary locations (e.g. from ``/etc``) is a
      security risk for builders. This is why the ``include`` directive from
      docutils *prohibit* pathnames pointing to locations *above* the filesystem
      tree where the reST document with the include directive is placed.

    Substrings of the form $name or ${name} are replaced by the value of
    environment variable name. Malformed variable names and references to
    non-existing variables are left unchanged.
"""

# ==============================================================================
# imports
# ==============================================================================

import os.path

from docutils import io, nodes, statemachine
from docutils.utils.error_reporting import SafeString, ErrorString
from docutils.parsers.rst import directives
from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
from docutils.parsers.rst.directives.misc import Include

__version__  = '1.0'

# ==============================================================================
def setup(app):
# ==============================================================================

    app.add_directive("kernel-include", KernelInclude)
    return dict(
        version = __version__,
        parallel_read_safe = True,
        parallel_write_safe = True
    )

# ==============================================================================
class KernelInclude(Include):
# ==============================================================================

    u"""KernelInclude (``kernel-include``) directive"""

    def run(self):
        path = os.path.realpath(
            os.path.expandvars(self.arguments[0]))

        # to get a bit security back, prohibit /etc:
        if path.startswith(os.sep + "etc"):
            raise self.severe(
                'Problems with "%s" directive, prohibited path: %s'
                % (self.name, path))

        self.arguments[0] = path

        #return super(KernelInclude, self).run() # won't work, see HINTs in _run()
        return self._run()

    def _run(self):
        """Include a file as part of the content of this reST file."""

        # HINT: I had to copy&paste the whole Include.run method. I'am not happy
        # with this, but due to security reasons, the Include.run method does
        # not allow absolute or relative pathnames pointing to locations *above*
        # the filesystem tree where the reST document is placed.

        if not self.state.document.settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        source = self.state_machine.input_lines.source(
            self.lineno - self.state_machine.input_offset - 1)
        source_dir = os.path.dirname(os.path.abspath(source))
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            path = os.path.join(self.standard_include_path, path[1:-1])
        path = os.path.normpath(os.path.join(source_dir, path))

        # HINT: this is the only line I had to change / commented out:
        #path = utils.relative_path(None, path)

        path = nodes.reprunicode(path)
        encoding = self.options.get(
            'encoding', self.state.document.settings.input_encoding)
        e_handler=self.state.document.settings.input_encoding_error_handler
        tab_width = self.options.get(
            'tab-width', self.state.document.settings.tab_width)
        try:
            self.state.document.settings.record_dependencies.add(path)
            include_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=e_handler)
        except UnicodeEncodeError as error:
            raise self.severe('Problems with "%s" directive path:\n'
                              'Cannot encode input file path "%s" '
                              '(wrong locale?).' %
                              (self.name, SafeString(path)))
        except IOError as error:
            raise self.severe('Problems with "%s" directive path:\n%s.' %
                      (self.name, ErrorString(error)))
        startline = self.options.get('start-line', None)
        endline = self.options.get('end-line', None)
        try:
            if startline or (endline is not None):
                lines = include_file.readlines()
                rawtext = ''.join(lines[startline:endline])
            else:
                rawtext = include_file.read()
        except UnicodeError as error:
            raise self.severe('Problem with "%s" directive:\n%s' %
                              (self.name, ErrorString(error)))
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        after_text = self.options.get('start-after', None)
        if after_text:
            # skip content in rawtext before *and incl.* a matching text
            after_index = rawtext.find(after_text)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[after_index + len(after_text):]
        before_text = self.options.get('end-before', None)
        if before_text:
            # skip content in rawtext after *and incl.* a matching text
            before_index = rawtext.find(before_text)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[:before_index]

        include_lines = statemachine.string2lines(rawtext, tab_width,
                                                  convert_whitespace=True)
        if 'literal' in self.options:
            # Convert tabs to spaces, if `tab_width` is positive.
            if tab_width >= 0:
                text = rawtext.expandtabs(tab_width)
            else:
                text = rawtext
            literal_block = nodes.literal_block(rawtext, source=path,
                                    classes=self.options.get('class', []))
            literal_block.line = 1
            self.add_name(literal_block)
            if 'number-lines' in self.options:
                try:
                    startline = int(self.options['number-lines'] or 1)
                except ValueError:
                    raise self.error(':number-lines: with non-integer '
                                     'start value')
                endline = startline + len(include_lines)
                if text.endswith('\n'):
                    text = text[:-1]
                tokens = NumberLines([([], text)], startline, endline)
                for classes, value in tokens:
                    if classes:
                        literal_block += nodes.inline(value, value,
                                                      classes=classes)
                    else:
                        literal_block += nodes.Text(value, value)
            else:
                literal_block += nodes.Text(text, text)
            return [literal_block]
        if 'code' in self.options:
            self.options['source'] = path
            codeblock = CodeBlock(self.name,
                                  [self.options.pop('code')], # arguments
                                  self.options,
                                  include_lines, # content
                                  self.lineno,
                                  self.content_offset,
                                  self.block_text,
                                  self.state,
                                  self.state_machine)
            return codeblock.run()
        self.state_machine.insert_input(include_lines, path)
        return []