MOON
Server: Apache
System: Linux vps.erhabenn.com.br 3.10.0-1160.119.1.el7.tuxcare.els2.x86_64 #1 SMP Mon Jul 15 12:09:18 UTC 2024 x86_64
User: machen (1008)
PHP: 8.2.31
Disabled: NONE
Upload Files
File: //lib/python2.7/site-packages/azurelinuxagent/ga/collect_logs.py
# Microsoft Azure Linux Agent
#
# Copyright 2020 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
#

import datetime
import os
import sys
import threading
import time
from azurelinuxagent.common import cgroupconfigurator, logcollector

import azurelinuxagent.common.conf as conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation
from azurelinuxagent.common.future import subprocess_dev_null, ustr
from azurelinuxagent.common.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.logcollector import COMPRESSED_ARCHIVE_PATH
from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.utils.shellutil import CommandError
from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, AGENT_NAME, CURRENT_VERSION

_INITIAL_LOG_COLLECTION_DELAY = 5 * 60 # Five minutes of delay

def get_collect_logs_handler():
    return CollectLogsHandler()


def is_log_collection_allowed():
    # There are three conditions that need to be met in order to allow periodic log collection:
    # 1) It should be enabled in the configuration.
    # 2) The system must be using cgroups to manage services. Needed for resource limiting of the log collection.
    # 3) The python version must be greater than 2.6 in order to support the ZipFile library used when collecting.
    conf_enabled = conf.get_collect_logs()
    cgroups_enabled = CGroupConfigurator.get_instance().enabled()
    supported_python = PY_VERSION_MINOR >= 6 if PY_VERSION_MAJOR == 2 else PY_VERSION_MAJOR == 3
    is_allowed = conf_enabled and cgroups_enabled and supported_python

    msg = "Checking if log collection is allowed at this time [{0}]. All three conditions must be met: " \
          "configuration enabled [{1}], cgroups enabled [{2}], python supported: [{3}]".format(is_allowed,
                                                                                               conf_enabled,
                                                                                               cgroups_enabled,
                                                                                               supported_python)
    logger.info(msg)
    add_event(
        name=AGENT_NAME,
        version=CURRENT_VERSION,
        op=WALAEventOperation.LogCollection,
        is_success=is_allowed,
        message=msg,
        log_event=False)

    return is_allowed


class CollectLogsHandler(ThreadHandlerInterface):
    """
    Periodically collects and uploads logs from the VM to the host.
    """

    _THREAD_NAME = "CollectLogsHandler"
    __CGROUPS_FLAG_ENV_VARIABLE = "_AZURE_GUEST_AGENT_LOG_COLLECTOR_MONITOR_CGROUPS_"

    @staticmethod
    def get_thread_name():
        return CollectLogsHandler._THREAD_NAME

    @staticmethod
    def enable_cgroups_validation():
        os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] = "1"

    @staticmethod
    def disable_cgroups_validation():
        if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
            del os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE]

    @staticmethod
    def should_validate_cgroups():
        if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
            return os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] == "1"
        return False

    def __init__(self):
        self.protocol = None
        self.protocol_util = None
        self.event_thread = None
        self.should_run = True
        self.last_state = None
        self.period = conf.get_collect_logs_period()

    def run(self):
        self.start()

    def keep_alive(self):
        return self.should_run

    def is_alive(self):
        return self.event_thread.is_alive()

    def start(self):
        self.event_thread = threading.Thread(target=self.daemon)
        self.event_thread.setDaemon(True)
        self.event_thread.setName(self.get_thread_name())
        self.event_thread.start()

    def join(self):
        self.event_thread.join()

    def stopped(self):
        return not self.should_run

    def stop(self):
        self.should_run = False
        if self.is_alive():
            self.join()

    def init_protocols(self):
        # The initialization of ProtocolUtil for the log collection thread should be done within the thread itself
        # rather than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each
        # thread would now have its own ProtocolUtil object as per the SingletonPerThread model.
        self.protocol_util = get_protocol_util()
        self.protocol = self.protocol_util.get_protocol()

    def daemon(self):
        # Delay the first collector on start up to give short lived VMs (that might be dead before the second 
        # collection has a chance to run) an opportunity to do produce meaningful logs to collect.
        time.sleep(_INITIAL_LOG_COLLECTION_DELAY)

        try:
            CollectLogsHandler.enable_cgroups_validation()
            if self.protocol_util is None or self.protocol is None:
                self.init_protocols()

            while not self.stopped():
                try:
                    self.collect_and_send_logs()
                except Exception as e:
                    logger.error("An error occurred in the log collection thread main loop; "
                                 "will skip the current iteration.\n{0}", ustr(e))
                finally:
                    time.sleep(self.period)
        except Exception as e:
            logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e))
        finally:
            CollectLogsHandler.disable_cgroups_validation()

    def collect_and_send_logs(self):
        if self._collect_logs():
            self._send_logs()

    def _collect_logs(self):
        logger.info("Starting log collection...")

        # Invoke the command line tool in the agent to collect logs, with resource limits on CPU and memory (RAM).

        systemd_cmd = [
            "systemd-run", "--unit={0}".format(logcollector.CGROUPS_UNIT),
            "--slice={0}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE), "--scope"
        ]

        # The log tool is invoked from the current agent's egg with the command line option
        collect_logs_cmd = [sys.executable, "-u", sys.argv[0], "-collect-logs"]
        final_command = systemd_cmd + collect_logs_cmd

        def exec_command(output_file):
            start_time = datetime.datetime.utcnow()
            success = False
            msg = None
            try:
                shellutil.run_command(final_command, log_error=False, stdout=output_file, stderr=output_file)
                duration = elapsed_milliseconds(start_time)
                archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH)

                msg = "Successfully collected logs. Archive size: {0} b, elapsed time: {1} ms.".format(archive_size,
                                                                                                    duration)
                logger.info(msg)
                success = True

                return True
            except Exception as e:
                duration = elapsed_milliseconds(start_time)
                err_msg = ustr(e)

                if isinstance(e, CommandError):
                    # pylint has limited (i.e. no) awareness of control flow w.r.t. typing. we disable=no-member
                    # here because we know e must be a CommandError but pylint still considers the case where
                    # e is a different type of exception.
                    err_msg = ustr("Log Collector exited with code {0}").format(e.returncode) # pylint: disable=no-member

                    if e.returncode == logcollector.INVALID_CGROUPS_ERRCODE: # pylint: disable=no-member
                        logger.info("Disabling periodic log collection until service restart due to process error.")
                        self.stop()
                    
                    # When the OOM killer is invoked on the log collector process, this error code is
                    # returned. Stop the periodic operation because it seems to be persistent.
                    elif e.returncode == logcollector.FORCE_KILLED_ERRCODE: # pylint: disable=no-member
                        logger.info("Disabling periodic log collection until service restart due to OOM error.")
                        self.stop()
                    else:
                        logger.info(err_msg)

                msg = "Failed to collect logs. Elapsed time: {0} ms. Error: {1}".format(duration, err_msg)
                # No need to log to the local log since we logged stdout, stderr from the process.

                return False
            finally:
                add_event(
                    name=AGENT_NAME,
                    version=CURRENT_VERSION,
                    op=WALAEventOperation.LogCollection,
                    is_success=success,
                    message=msg,
                    log_event=False)
        
        try:
            logfile = open(conf.get_agent_log_file(), "a+")
        except Exception:
            with subprocess_dev_null() as DEVNULL:
                return exec_command(DEVNULL)
        else:
            return exec_command(logfile)
        finally:
            if logfile is not None:
                logfile.close()

    def _send_logs(self):
        msg = None
        success = False
        try:
            with open(COMPRESSED_ARCHIVE_PATH, "rb") as fh:
                archive_content = fh.read()
                self.protocol.upload_logs(archive_content)
                msg = "Successfully uploaded logs."
                logger.info(msg)

            success = True
        except Exception as e:
            msg = "Failed to upload logs. Error: {0}".format(ustr(e))
            logger.warn(msg)
        finally:
            add_event(
                name=AGENT_NAME,
                version=CURRENT_VERSION,
                op=WALAEventOperation.LogCollection,
                is_success=success,
                message=msg,
                log_event=False)