"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Copyright © 2019 Cloud Linux Software Inc.
This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import argparse
import asyncio
import gc
import logging
import os
import signal
import sys
import time
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager, suppress
from functools import partial
from pathlib import Path
from subprocess import CalledProcessError, check_output
from typing import Tuple
import daemon
from lockfile import AlreadyLocked
import daemon.pidfile
import psutil
import defence360agent.internals.logger
from defence360agent import files
from defence360agent.api import health, inactivity
from defence360agent.contracts.config import (
ConfigsValidator,
Core,
Merger,
Model,
SimpleRpc,
)
from defence360agent.contracts.hook_events import HookEvent
from defence360agent.contracts.license import LicenseCLN
from defence360agent.contracts.plugins import MessageSink, MessageSource
from defence360agent.internals.global_scope import g
from defence360agent.internals.iaid import IndependentAgentIDAPI
from defence360agent.internals.the_sink import TheSink
from defence360agent.model import instance, simplification, tls_check
from defence360agent.simple_rpc import (
NonRootRpcServer,
NonRootRpcServerAV,
RpcServer,
RpcServerAV,
is_running,
)
from defence360agent.subsys import systemd_notifier
from defence360agent.utils import (
Task,
create_task_and_log_exceptions,
is_root_user,
is_systemd_boot,
)
from defence360agent.utils.check_db import is_db_corrupted
from defence360agent.utils.cli import EXITCODE_GENERAL_ERROR
from defence360agent.utils.common import DAY, rate_limit
from defence360agent.sentry import flush_sentry
from imav.malwarelib.config import (
MalwareHitStatus,
MalwareScanResourceType,
)
from imav.malwarelib.model import MalwareHit
import sentry_sdk
# Increase recursion depth to allow malware scanner into deeply nested
# directories with absolute path length up to 4096 symbols
_MAX_RECURSION_DEPTH = 2100
_DB_IS_CORRUPTED_FLAG = Path("%s.is_corrupted" % Model.PATH)
_DB_IS_CORRUPTED_MSG = (
"Imunify360 database is corrupt. "
"Application cannot run with corrupt database. "
"Please, contact Imunify360 support team at "
"https://cloudlinux.zendesk.com"
)
logger = logging.getLogger(__name__)
throttled_log_error = rate_limit(period=DAY)(logger.error)
class TaskFactory:
def __init__(self):
self.pool = set()
def __call__(self, loop, coro):
task = Task(coro, loop=loop)
self.pool.add(task)
task.add_done_callback(self.pool.discard)
return task
@contextmanager
def log_and_suppress_error(message):
"""Log *message* on any error & suppress it."""
try:
yield
except Exception as e:
logger.error("caught error %r on %s", e, message)
sentry_sdk.capture_exception(e)
async def _shutdown_task(loop, the_sink, plugin_list):
with log_and_suppress_error("marking the start of the shutdown process"):
# (there is SHUTDOWN_TIMEOUT)
health.sensor.shutting_down(time.time())
logger.info("shutdown task starting, pid=%s", os.getpid())
with log_and_suppress_error(
"preventing new messages (if any) processing to start"
):
_tasks = []
async with asyncio.timeout(10):
if "sensor_server" in g:
g.sensor_server.close()
_tasks.append(g.sensor_server.wait_closed())
# note: first exception is propagated; tasks are no canceled
_tasks.append(the_sink.shutdown())
await asyncio.gather(*_tasks)
for plugin in sorted(plugin_list, key=lambda p: p.SHUTDOWN_PRIORITY):
with log_and_suppress_error(
"This happened while shutting down a plugin!!"
):
logger.info(
"Shutting down %s.%s...",
plugin.__class__.__module__,
plugin.__class__.__name__,
)
# make shutting down running task be a responsibility
# of a particular plugin but not of a universal shotgun
await plugin.shutdown()
with log_and_suppress_error("shutting down IAID API"):
await IndependentAgentIDAPI.shutdown()
# Wait for graceful web-server restart (if it was started before shutdown)
if (restart_task := g.get("web_server_restart_task")) is not None:
with log_and_suppress_error("waiting for web server restart"):
await asyncio.wait_for(restart_task)
with log_and_suppress_error("stopping loop"):
loop.stop()
flush_sentry()
logger.info("shutdown task finished, pid=%s", os.getpid())
def _daemonize(pidfilepath):
logger.info("Run as daemon [pidfile = %s]", pidfilepath)
dc = daemon.DaemonContext()
dc.pidfile = daemon.pidfile.PIDLockFile(pidfilepath)
dc.prevent_core = False
dc.umask = Core.FILE_UMASK
if is_systemd_boot():
dc.detach_process = False
else:
dc.detach_process = True
dc.files_preserve = defence360agent.internals.logger.get_fds()
try:
dc.open()
except AlreadyLocked:
logger.error("PID file already locked by another process")
sys.exit(EXITCODE_GENERAL_ERROR)
gc.collect()
# quirk: somehow this is needed for root logger messages to do not
# propagate to specialized loggers, e.g. 'perf', 'nework'
defence360agent.internals.logger.reconfigure()
async def _initial_files_update():
"""Perform update files on start."""
await files.update_all_no_fail_if_files_exist()
def _tls_check_reset(loop):
# init thread id for simplification.run_in_executor() worker thread
loop.run_until_complete(
simplification.run_in_executor(loop, tls_check.reset)
)
# mark current thread as "main_thread" for more informative error messages
# PSSST! simplification.run_in_executor() is main thread now! :-X
# tls_check.reset("main_thread")
def plugin_instances(objs, pclass):
return [p for p in objs if isinstance(p, pclass)]
def _start_plugins(loop, plugin_classes) -> Tuple[TheSink, list, list]:
plugins = [plugin_class() for plugin_class in plugin_classes]
# instantiate sinks
sinks = plugin_instances(plugins, MessageSink)
for s in sinks:
logger.info("Creating sink %r", s)
loop.run_until_complete(s.create_sink(loop))
# instantiate sources
the_sink = TheSink(sinks, loop)
sources = plugin_instances(plugins, MessageSource)
for s in sources:
logger.info("Creating source %r", s)
loop.run_until_complete(s.create_source(loop, the_sink))
the_sink.start()
return the_sink, sinks, sources
def _start_rpc(loop, the_sink: TheSink):
logger.info("Starting RpcServers...")
if SimpleRpc.SOCKET_ACTIVATION:
rpc_servers = (RpcServerAV, NonRootRpcServerAV)
else:
rpc_servers = (RpcServer, NonRootRpcServer)
for rpc in rpc_servers:
loop.run_until_complete(rpc.create(loop, the_sink))
def _get_pids_open(*files):
try:
out = check_output(
["lsof", "+wt"] + list(files),
env={"PATH": "/usr/sbin:/usr/bin", **os.environ},
)
except CalledProcessError as e:
out = bytes(e.output)
except FileNotFoundError:
logger.warning("There is no lsof in /usr/sbin:/usr/bin")
return []
except IOError:
return []
lines = out.strip().split(b"\n")
pids = [int(line) for line in lines if line]
return list(set(pids))
def _check_able_to_start(pidfile):
if is_running():
# get parent process info
ppid = os.getppid()
if ppid != 0:
parent = psutil.Process(ppid).name()
pids_used_socket = _get_pids_open(
SimpleRpc.SOCKET_PATH, SimpleRpc.NON_ROOT_SOCKET_PATH
)
process_used_socket = []
for pid in pids_used_socket:
try:
_pr = psutil.Process(pid)
except psutil.NoSuchProcess:
continue
_local_parent = _pr.parent()
if _local_parent:
_parent_name = _local_parent.name()
else:
_parent_name = "None"
process_used_socket.append(
(
pid,
_pr.name(),
"parent process = %s" % str(_parent_name),
)
)
try:
with open(pidfile) as file:
written_pid = file.read()
except (OSError, IOError):
written_pid = None
throttled_log_error(
"Instance of %s is already running. "
'Parent process "%s" with pid "%s". '
"Sockets are in use by %s. "
"%s file contents %s pid"
% (
Core.SVC_NAME,
parent,
ppid,
str(process_used_socket),
pidfile,
written_pid,
)
)
sys.exit(EXITCODE_GENERAL_ERROR)
if is_db_corrupted(db_path=Model.PATH):
if not _DB_IS_CORRUPTED_FLAG.exists():
logger.error(_DB_IS_CORRUPTED_MSG)
_DB_IS_CORRUPTED_FLAG.touch()
else:
logger.warning(_DB_IS_CORRUPTED_MSG)
sys.exit(EXITCODE_GENERAL_ERROR)
else:
with suppress(FileNotFoundError):
_DB_IS_CORRUPTED_FLAG.unlink()
def start(plugin_classes: list, init_actions) -> None:
"""Common function for agent service startup.
plugin_classes is a list of classes implementing message processing
plugins. init_actions is a coroutine that will be called prior to starting
RPC and message processing."""
if not is_root_user():
logger.info("Imunify agent could be started by the root user only!")
sys.exit(EXITCODE_GENERAL_ERROR)
args = parse_cli()
defence360agent.internals.logger.setLogLevel(args.verbose)
if args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE"):
defence360agent.internals.logger.update_logging_config_from_file(
args.log_config or os.environ.get("IMUNIFY360_LOGGING_CONFIG_FILE")
)
sys.setrecursionlimit(_MAX_RECURSION_DEPTH)
_check_able_to_start(args.pidfile)
if args.daemon:
_daemonize(args.pidfile)
systemd_notifier.notify(systemd_notifier.AgentState.DAEMONIZED)
health.sensor.starting(time.time())
if not LicenseCLN.is_registered():
health.sensor.unregistered()
loop = asyncio.get_event_loop()
_cpu = os.cpu_count()
# https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
# default's in Python 3.8
loop.set_default_executor(
ThreadPoolExecutor(max_workers=min(32, _cpu + 4 if _cpu else 5))
)
loop.set_task_factory(TaskFactory())
try:
_tls_check_reset(loop)
instance.db.init(Model.PATH)
validate_configs_on_start(loop)
Merger.update_merged_config()
loop.run_until_complete(init_actions())
try:
_stop_pending_cleanup()
except simplification.PeeweeException as e:
# we intentionally capture all exceptions here and log them
# it may happened on package update or other reasons, we don't
# want to start agent in such case
logger.error("Failed to stop pending cleanup. Reason: %s", repr(e))
sys.exit(EXITCODE_GENERAL_ERROR)
# If this is first agent run - we SHOULD download
# all of the static files
# If it isn't first agent run - essential files already downloaded
# and will be updated asynchronously
if not loop.run_until_complete(files.essential_files_exist()):
logger.info(
"Essential files are missing. Performing initial files update."
)
loop.run_until_complete(_initial_files_update())
inactivity.track.set_timeout(SimpleRpc.INACTIVITY_TIMEOUT)
the_sink, sinks, sources = _start_plugins(loop, plugin_classes)
_start_rpc(loop, the_sink)
logger.info("Message Bus started")
agent_started = HookEvent.AgentStarted(
version=Core.VERSION, resident=False
)
create_task_and_log_exceptions(
loop, the_sink.process_message, agent_started
)
# note: plugins are started before the shutdown task has been setup
# therefore plugin.shutdown() won't be called before create_source()
_setup_signal_handlers(
loop, partial(_shutdown_task, loop, the_sink, sinks + sources)
)
loop.run_forever()
logger.info("loop stopped")
finally:
# closing the loop after loop.stop() cuts off pending tasks stacktraces
loop.close()
def validate_configs_on_start(loop):
try:
ConfigsValidator.validate_config_layers()
except Exception as e:
from defence360agent.hooks.execute import execute_hooks
agent_misconfig = HookEvent.AgentMisconfig(error=repr(e))
loop.run_until_complete(execute_hooks(agent_misconfig))
logger.warning(str(e))
sys.exit(EXITCODE_GENERAL_ERROR)
def _setup_signal_handlers(loop, shutdowntask):
called = False # whether the signal handler was called already
def _sighandler(loop, sig):
nonlocal called
if not called:
called = True
logger.info("Caught %s", sig)
# note: store ref, to keep the task alive, just in case
called = create_task_and_log_exceptions(loop, shutdowntask)
else:
logger.info(
"Caught %s. Shutdown task is already running, please wait.",
sig,
)
for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGUSR1, signal.SIGUSR2):
loop.add_signal_handler(sig, _sighandler, loop, sig)
def parse_cli():
parser = argparse.ArgumentParser(description="Run imunify agent")
parser.add_argument(
"-v",
dest="verbose",
action="count",
default=0,
help=(
"Level of logging. Each value corresponds to:"
"1 - console only log level,"
"2 - previous plus add network log,"
"3 - all previous plus add process message log,"
"4 - all previous plus add debug log"
),
)
parser.add_argument("--daemon", action="store_true", help="run as daemon")
parser.add_argument(
"--pidfile",
default="/var/run/imunify360.pid",
help="use with --daemon",
)
parser.add_argument("--log-config", help="logging config filename")
return parser.parse_args(sys.argv[1:])
def _stop_pending_cleanup():
"""
Get back to FOUND all malware hits which have stuck in CLEANUP_STARTED
"""
hits = MalwareHit.select().where(
MalwareHit.status == MalwareHitStatus.CLEANUP_STARTED,
MalwareHit.resource_type == MalwareScanResourceType.FILE.value,
)
MalwareHit.set_status(hits, MalwareHitStatus.FOUND)