From b7a4c0793ceab4fcde64098e164c36f8fbd48b64 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:24:52 +0200
Subject: arglist: optimize flush_pre_post

pre_flush_set and post_flush_set are almost always empty, so we can use
extend() instead of a for...in loop to add the previous elements of
self._container.

We can also skip the conversion from deque to list since pre_flush is
always appended on the right side.

On a QEMU build the time spent in flush_pre_post goes from 1.4 to 0.5
seconds.
---
 mesonbuild/arglist.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/mesonbuild/arglist.py b/mesonbuild/arglist.py
index fd4de96..4ab7d09 100644
--- a/mesonbuild/arglist.py
+++ b/mesonbuild/arglist.py
@@ -119,7 +119,7 @@ class CompilerArgs(collections.abc.MutableSequence):
     # This correctly deduplicates the entries after _can_dedup definition
     # Note: This function is designed to work without delete operations, as deletions are worsening the performance a lot.
     def flush_pre_post(self) -> None:
-        pre_flush = collections.deque()   # type: T.Deque[str]
+        new = list()                      # type: T.List[str]
         pre_flush_set = set()             # type: T.Set[str]
         post_flush = collections.deque()  # type: T.Deque[str]
         post_flush_set = set()            # type: T.Set[str]
@@ -128,7 +128,7 @@ class CompilerArgs(collections.abc.MutableSequence):
         for a in self.pre:
             dedup = self._can_dedup(a)
             if a not in pre_flush_set:
-                pre_flush.append(a)
+                new.append(a)
                 if dedup is Dedup.OVERRIDEN:
                     pre_flush_set.add(a)
         for a in reversed(self.post):
@@ -140,12 +140,15 @@ class CompilerArgs(collections.abc.MutableSequence):
 
         #pre and post will overwrite every element that is in the container
         #only copy over args that are in _container but not in the post flush or pre flush set
+        if pre_flush_set or post_flush_set:
+            for a in self._container:
+                if a not in post_flush_set and a not in pre_flush_set:
+                    new.append(a)
+        else:
+            new.extend(self._container)
+        new.extend(post_flush)
 
-        for a in self._container:
-            if a not in post_flush_set and a not in pre_flush_set:
-                pre_flush.append(a)
-
-        self._container = list(pre_flush) + list(post_flush)
+        self._container = new
         self.pre.clear()
         self.post.clear()
 
-- 
cgit v1.1


From 3d4fb02e2907b2532333fdb5eefe6335c7cd94c4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:26:11 +0200
Subject: ninjabackend: optimize ninja_quote

Use regular expressions to quickly weed out strings that require quoting
On a QEMU build the time spent in ninja_quote goes from 1.978s to 1.281s,
with str.replace being kicked completely out of the profile.
---
 mesonbuild/backend/ninjabackend.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index b4ebdc3..5f7f03a 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -114,13 +114,17 @@ rsp_threshold = get_rsp_threshold()
 # from, etc.), so it must not be shell quoted.
 raw_names = {'DEPFILE_UNQUOTED', 'DESC', 'pool', 'description', 'targetdep'}
 
+NINJA_QUOTE_BUILD_PAT = re.compile(r"[$ :\n]")
+NINJA_QUOTE_VAR_PAT = re.compile(r"[$ \n]")
+
 def ninja_quote(text, is_build_line=False):
     if is_build_line:
-        qcs = ('$', ' ', ':')
+        quote_re = NINJA_QUOTE_BUILD_PAT
     else:
-        qcs = ('$', ' ')
-    for char in qcs:
-        text = text.replace(char, '$' + char)
+        quote_re = NINJA_QUOTE_VAR_PAT
+    # Fast path for when no quoting is necessary
+    if not quote_re.search(text):
+        return text
     if '\n' in text:
         errmsg = '''Ninja does not support newlines in rules. The content was:
 
@@ -128,7 +132,7 @@ def ninja_quote(text, is_build_line=False):
 
 Please report this error with a test case to the Meson bug tracker.'''.format(text)
         raise MesonException(errmsg)
-    return text
+    return quote_re.sub(r'$\g<0>', text)
 
 @unique
 class Quoting(Enum):
-- 
cgit v1.1


From 372f420778f8f4b55b207f9a6be2c20e2bd22c38 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:39:50 +0200
Subject: File: precompute hash

Most files are going to be looked up into a set or dictionary.  Precompute
the hash so that we only need to do so once and we can also use it to
quickly weed out unequal objects.

On a QEMU build, the time spent in __eq__ and __hash goes respectively
from 3.110s to 2.162s and from 0.648s to 0.299s.  Even larger gains are
obtained by the next patch.
---
 mesonbuild/mesonlib.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mesonbuild/mesonlib.py b/mesonbuild/mesonlib.py
index 4b8cce8..760b235 100644
--- a/mesonbuild/mesonlib.py
+++ b/mesonbuild/mesonlib.py
@@ -242,6 +242,7 @@ class File:
         self.is_built = is_built
         self.subdir = subdir
         self.fname = fname
+        self.hash = hash((is_built, subdir, fname))
 
     def __str__(self) -> str:
         return self.relative_name()
@@ -291,10 +292,12 @@ class File:
     def __eq__(self, other) -> bool:
         if not isinstance(other, File):
             return NotImplemented
+        if self.hash != other.hash:
+            return False
         return (self.fname, self.subdir, self.is_built) == (other.fname, other.subdir, other.is_built)
 
     def __hash__(self) -> int:
-        return hash((self.fname, self.subdir, self.is_built))
+        return self.hash
 
     @lru_cache(maxsize=None)
     def relative_name(self) -> str:
-- 
cgit v1.1


From fcf9746232efd03970c963e9e5207e1ec8dc99fd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:27:13 +0200
Subject: build: optimize extract_objects

extract_objects is repeatedly looking up files in self.sources, which is a list.
Convert it to a set beforehand so that the lookup is O(1).

On a QEMU build, the time spent in extract_objects goes from 3.292s to 0.431s.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 mesonbuild/build.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mesonbuild/build.py b/mesonbuild/build.py
index 5e6db73..edd1506 100644
--- a/mesonbuild/build.py
+++ b/mesonbuild/build.py
@@ -774,6 +774,7 @@ class BuildTarget(Target):
 
     def extract_objects(self, srclist):
         obj_src = []
+        sources_set = set(self.sources)
         for src in srclist:
             if isinstance(src, str):
                 src = File(False, self.subdir, src)
@@ -782,7 +783,7 @@ class BuildTarget(Target):
             else:
                 raise MesonException('Object extraction arguments must be strings or Files.')
             # FIXME: It could be a generated source
-            if src not in self.sources:
+            if src not in sources_set:
                 raise MesonException('Tried to extract unknown source {}.'.format(src))
             obj_src.append(src)
         return ExtractedObjects(self, obj_src)
-- 
cgit v1.1


From b14601da7b42f2b68bb098fc04e267a390f201c2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:33:12 +0200
Subject: ninjabackend: optimize length_estimate

Optimize the regular expression so that the variable expansion part
always ends up in group 1, and the trailer after the variable is discarded
in the same match.  Do not use re.sub to remove braces, and do not bother
building the expanded command, just adjust the estimated length on the fly.

functools.reduce is extremely slow, so I am keeping ' '.join(chunk).

On a QEMU build the time spend in the function goes from 1.072s to 0.757s.
---
 mesonbuild/backend/ninjabackend.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index 5f7f03a..fd8be00 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -265,18 +265,20 @@ class NinjaRule:
 
         # expand variables in command
         command = ' '.join([self._quoter(x) for x in self.command + self.args])
-        expanded_command = ''
-        for m in re.finditer(r'(\${\w*})|(\$\w*)|([^$]*)', command):
-            chunk = m.group()
-            if chunk.startswith('$'):
-                chunk = chunk[1:]
-                chunk = re.sub(r'{(.*)}', r'\1', chunk)
-                chunk = ninja_vars.get(chunk, [])  # undefined ninja variables are empty
-                chunk = ' '.join(chunk)
-            expanded_command += chunk
+        estimate = len(command)
+        for m in re.finditer(r'(\${\w*}|\$\w*)?[^$]*', command):
+            if m.start(1) != -1:
+                estimate -= m.end(1) - m.start(1) + 1
+                chunk = m.group(1)
+                if chunk[1] == '{':
+                    chunk = chunk[2:-1]
+                else:
+                    chunk = chunk[1:]
+                chunk = ninja_vars.get(chunk, []) # undefined ninja variables are empty
+                estimate += len(' '.join(chunk))
 
         # determine command length
-        return len(expanded_command)
+        return estimate
 
 class NinjaBuildElement:
     def __init__(self, all_outputs, outfilenames, rulename, infilenames, implicit_outs=None):
-- 
cgit v1.1


From cbde13850f1334ef3019288096ccd95dfcdaec6e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:33:39 +0200
Subject: ninjabackend: avoid lambdas

The lambda in NinjaBuildElement.write is quite expensive, totalling 0.3s
just to do a couple function calls.  Since it is used just once, simply
inline it.

On a QEMU build, the total time spent in write from this series goes from
5.321s to 3.238s, though part of it can be attributed to previous patches.
---
 mesonbuild/backend/ninjabackend.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index fd8be00..3a5c102 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -386,10 +386,9 @@ class NinjaBuildElement:
             newelems = []
             for i in elems:
                 if not should_quote or i == '&&': # Hackety hack hack
-                    quoter = ninja_quote
+                    newelems.append(ninja_quote(i))
                 else:
-                    quoter = lambda x: ninja_quote(qf(x))
-                newelems.append(quoter(i))
+                    newelems.append(ninja_quote(qf(i)))
             line += ' '.join(newelems)
             line += '\n'
             outfile.write(line)
-- 
cgit v1.1


From 847bb4347039e8f52c661a6d9cddd411f42b41ed Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Aug 2020 15:36:40 +0200
Subject: clike: optimize to_native

Look for group-able flags with a single regex match, since we are already using
regexes for .so files.  Also weed out flags other than -isystem very quickly
with a single startswith call.

On a QEMU build, the time spent in to_native goes from 2.279s to 1.322s.
---
 mesonbuild/compilers/mixins/clike.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mesonbuild/compilers/mixins/clike.py b/mesonbuild/compilers/mixins/clike.py
index 95b9592..4311fa5 100644
--- a/mesonbuild/compilers/mixins/clike.py
+++ b/mesonbuild/compilers/mixins/clike.py
@@ -40,7 +40,9 @@ from .visualstudio import VisualStudioLikeCompiler
 if T.TYPE_CHECKING:
     from ...environment import Environment
 
-SOREGEX = re.compile(r'.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$')
+GROUP_FLAGS = re.compile(r'''\.so (?:\.[0-9]+)? (?:\.[0-9]+)? (?:\.[0-9]+)?$ |
+                             ^(?:-Wl,)?-l |
+                             \.a$''', re.X)
 
 class CLikeCompilerArgs(arglist.CompilerArgs):
     prepend_prefixes = ('-I', '-L')
@@ -69,8 +71,7 @@ class CLikeCompilerArgs(arglist.CompilerArgs):
             group_start = -1
             group_end = -1
             for i, each in enumerate(new):
-                if not each.startswith(('-Wl,-l', '-l')) and not each.endswith('.a') and \
-                   not SOREGEX.match(each):
+                if not GROUP_FLAGS.search(each):
                     continue
                 group_end = i
                 if group_start < 0:
@@ -85,6 +86,9 @@ class CLikeCompilerArgs(arglist.CompilerArgs):
             default_dirs = self.compiler.get_default_include_dirs()
             bad_idx_list = []  # type: T.List[int]
             for i, each in enumerate(new):
+                if not each.startswith('-isystem'):
+                    continue
+
                 # Remove the -isystem and the path if the path is a default path
                 if (each == '-isystem' and
                         i < (len(new) - 1) and
@@ -92,7 +96,7 @@ class CLikeCompilerArgs(arglist.CompilerArgs):
                     bad_idx_list += [i, i + 1]
                 elif each.startswith('-isystem=') and each[9:] in default_dirs:
                     bad_idx_list += [i]
-                elif each.startswith('-isystem') and each[8:] in default_dirs:
+                elif each[8:] in default_dirs:
                     bad_idx_list += [i]
             for i in reversed(bad_idx_list):
                 new.pop(i)
-- 
cgit v1.1