# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provide event logging in the git trace2 EVENT format.

The git trace2 EVENT format is defined at:
https://www.kernel.org/pub/software/scm/git/docs/technical/api-trace2.html#_event_format
https://git-scm.com/docs/api-trace2#_the_event_format_target

  Usage:

  git_trace_log = EventLog()
  git_trace_log.StartEvent()
  ...
  git_trace_log.ExitEvent()
  git_trace_log.Write()
"""


import datetime
import errno
import json
import os
import socket
import sys
import tempfile
import threading


# Timeout when sending events via socket (applies to connect, send)
SOCK_TIMEOUT = 0.5  # in seconds
# BaseEventLog __init__ Counter that is consistent within the same process
p_init_count = 0


class BaseEventLog:
    """Event log that records events that occurred during a repo invocation.

    Events are written to the log as a consecutive JSON entries, one per line.
    Entries follow the git trace2 EVENT format.

    Each entry contains the following common keys:
    - event: The event name
    - sid: session-id - Unique string to allow process instance to be
          identified.
    - thread: The thread name.
    - time: is the UTC time of the event.

    Valid 'event' names and event specific fields are documented here:
    https://git-scm.com/docs/api-trace2#_event_format
    """

    def __init__(
        self, env=None, repo_source_version=None, add_init_count=False
    ):
        """Initializes the event log."""
        global p_init_count
        p_init_count += 1
        self._log = []
        # Try to get session-id (sid) from environment (setup in repo launcher).
        KEY = "GIT_TRACE2_PARENT_SID"
        if env is None:
            env = os.environ

        self.start = datetime.datetime.now(datetime.timezone.utc)

        # Save both our sid component and the complete sid.
        # We use our sid component (self._sid) as the unique filename prefix and
        # the full sid (self._full_sid) in the log itself.
        self._sid = (
            f"repo-{self.start.strftime('%Y%m%dT%H%M%SZ')}-P{os.getpid():08x}"
        )

        if add_init_count:
            self._sid = f"{self._sid}-{p_init_count}"

        parent_sid = env.get(KEY)
        # Append our sid component to the parent sid (if it exists).
        if parent_sid is not None:
            self._full_sid = parent_sid + "/" + self._sid
        else:
            self._full_sid = self._sid

        # Set/update the environment variable.
        # Environment handling across systems is messy.
        try:
            env[KEY] = self._full_sid
        except UnicodeEncodeError:
            env[KEY] = self._full_sid.encode()

        if repo_source_version is not None:
            # Add a version event to front of the log.
            self._AddVersionEvent(repo_source_version)

    @property
    def full_sid(self):
        return self._full_sid

    def _AddVersionEvent(self, repo_source_version):
        """Adds a 'version' event at the beginning of current log."""
        version_event = self._CreateEventDict("version")
        version_event["evt"] = "2"
        version_event["exe"] = repo_source_version
        self._log.insert(0, version_event)

    def _CreateEventDict(self, event_name):
        """Returns a dictionary with common keys/values for git trace2 events.

        Args:
            event_name: The event name.

        Returns:
            Dictionary with the common event fields populated.
        """
        return {
            "event": event_name,
            "sid": self._full_sid,
            "thread": threading.current_thread().name,
            "time": datetime.datetime.now(datetime.timezone.utc).isoformat(),
        }

    def StartEvent(self, argv):
        """Append a 'start' event to the current log."""
        start_event = self._CreateEventDict("start")
        start_event["argv"] = argv
        self._log.append(start_event)

    def ExitEvent(self, result):
        """Append an 'exit' event to the current log.

        Args:
            result: Exit code of the event
        """
        exit_event = self._CreateEventDict("exit")

        # Consider 'None' success (consistent with event_log result handling).
        if result is None:
            result = 0
        exit_event["code"] = result
        time_delta = datetime.datetime.now(datetime.timezone.utc) - self.start
        exit_event["t_abs"] = time_delta.total_seconds()
        self._log.append(exit_event)

    def CommandEvent(self, name, subcommands):
        """Append a 'command' event to the current log.

        Args:
            name: Name of the primary command (ex: repo, git)
            subcommands: List of the sub-commands (ex: version, init, sync)
        """
        command_event = self._CreateEventDict("cmd_name")
        name = f"{name}-"
        name += "-".join(subcommands)
        command_event["name"] = name
        command_event["hierarchy"] = name
        self._log.append(command_event)

    def LogConfigEvents(self, config, event_dict_name):
        """Append a |event_dict_name| event for each config key in |config|.

        Args:
            config: Configuration dictionary.
            event_dict_name: Name of the event dictionary for items to be logged
                under.
        """
        for param, value in config.items():
            event = self._CreateEventDict(event_dict_name)
            event["param"] = param
            event["value"] = value
            self._log.append(event)

    def DefParamRepoEvents(self, config):
        """Append 'def_param' events for repo config keys to the current log.

        This appends one event for each repo.* config key.

        Args:
            config: Repo configuration dictionary
        """
        # Only output the repo.* config parameters.
        repo_config = {k: v for k, v in config.items() if k.startswith("repo.")}
        self.LogConfigEvents(repo_config, "def_param")

    def GetDataEventName(self, value):
        """Returns 'data-json' if the value is an array else returns 'data'."""
        return "data-json" if value[0] == "[" and value[-1] == "]" else "data"

    def LogDataConfigEvents(self, config, prefix):
        """Append a 'data' event for each entry in |config| to the current log.

        For each keyX and valueX of the config, "key" field of the event is
        '|prefix|/keyX' and the "value" of the "key" field is valueX.

        Args:
            config: Configuration dictionary.
            prefix: Prefix for each key that is logged.
        """
        for key, value in config.items():
            event = self._CreateEventDict(self.GetDataEventName(value))
            event["key"] = f"{prefix}/{key}"
            event["value"] = value
            self._log.append(event)

    def ErrorEvent(self, msg, fmt=None):
        """Append a 'error' event to the current log."""
        error_event = self._CreateEventDict("error")
        if fmt is None:
            fmt = msg
        error_event["msg"] = f"RepoErrorEvent:{msg}"
        error_event["fmt"] = f"RepoErrorEvent:{fmt}"
        self._log.append(error_event)

    def _WriteLog(self, write_fn):
        """Writes the log out using a provided writer function.

        Generate compact JSON output for each item in the log, and write it
        using write_fn.

        Args:
            write_fn: A function that accepts byts and writes them to a
                destination.
        """

        for e in self._log:
            # Dump in compact encoding mode.
            # See 'Compact encoding' in Python docs:
            # https://docs.python.org/3/library/json.html#module-json
            write_fn(
                json.dumps(e, indent=None, separators=(",", ":")).encode(
                    "utf-8"
                )
                + b"\n"
            )

    def Write(self, path=None):
        """Writes the log out to a file or socket.

        Log is only written if 'path' or 'git config --get trace2.eventtarget'
        provide a valid path (or socket) to write logs to.

        Logging filename format follows the git trace2 style of being a unique
        (exclusive writable) file.

        Args:
            path: Path to where logs should be written. The path may have a
                prefix of the form "af_unix:[{stream|dgram}:]", in which case
                the path is treated as a Unix domain socket. See
                https://git-scm.com/docs/api-trace2#_enabling_a_target for
                details.

        Returns:
            log_path: Path to the log file or socket if log is written,
                otherwise None
        """
        log_path = None
        # If no logging path is specified, exit.
        if path is None:
            return None

        path_is_socket = False
        socket_type = None
        if isinstance(path, str):
            parts = path.split(":", 1)
            if parts[0] == "af_unix" and len(parts) == 2:
                path_is_socket = True
                path = parts[1]
                parts = path.split(":", 1)
                if parts[0] == "stream" and len(parts) == 2:
                    socket_type = socket.SOCK_STREAM
                    path = parts[1]
                elif parts[0] == "dgram" and len(parts) == 2:
                    socket_type = socket.SOCK_DGRAM
                    path = parts[1]
            else:
                # Get absolute path.
                path = os.path.abspath(os.path.expanduser(path))
        else:
            raise TypeError("path: str required but got %s." % type(path))

        # Git trace2 requires a directory to write log to.

        # TODO(https://crbug.com/gerrit/13706): Support file (append) mode also.
        if not (path_is_socket or os.path.isdir(path)):
            return None

        if path_is_socket:
            if socket_type == socket.SOCK_STREAM or socket_type is None:
                try:
                    with socket.socket(
                        socket.AF_UNIX, socket.SOCK_STREAM
                    ) as sock:
                        sock.settimeout(SOCK_TIMEOUT)
                        sock.connect(path)
                        self._WriteLog(sock.sendall)
                    return f"af_unix:stream:{path}"
                except OSError as err:
                    # If we tried to connect to a DGRAM socket using STREAM,
                    # ignore the attempt and continue to DGRAM below. Otherwise,
                    # issue a warning.
                    if err.errno != errno.EPROTOTYPE:
                        print(
                            f"repo: warning: git trace2 logging failed: {err}",
                            file=sys.stderr,
                        )
                        return None
            if socket_type == socket.SOCK_DGRAM or socket_type is None:
                try:
                    with socket.socket(
                        socket.AF_UNIX, socket.SOCK_DGRAM
                    ) as sock:
                        self._WriteLog(lambda bs: sock.sendto(bs, path))
                        return f"af_unix:dgram:{path}"
                except OSError as err:
                    print(
                        f"repo: warning: git trace2 logging failed: {err}",
                        file=sys.stderr,
                    )
                    return None
            # Tried to open a socket but couldn't connect (SOCK_STREAM) or write
            # (SOCK_DGRAM).
            print(
                "repo: warning: git trace2 logging failed: could not write to "
                "socket",
                file=sys.stderr,
            )
            return None

        # Path is an absolute path
        # Use NamedTemporaryFile to generate a unique filename as required by
        # git trace2.
        try:
            with tempfile.NamedTemporaryFile(
                mode="xb", prefix=self._sid, dir=path, delete=False
            ) as f:
                # TODO(https://crbug.com/gerrit/13706): Support writing events
                # as they occur.
                self._WriteLog(f.write)
                log_path = f.name
        except FileExistsError as err:
            print(
                "repo: warning: git trace2 logging failed: %r" % err,
                file=sys.stderr,
            )
            return None
        return log_path
