"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Copyright © 2019 Cloud Linux Software Inc.
This software is also available under ImunifyAV commercial license,
see <https://www.imunify360.com/legal/eula>
"""
import asyncio
import collections
import logging
import time
from abc import ABC, abstractmethod
from base64 import b64encode
from functools import cached_property
from glob import iglob
from os import fsencode
from pathlib import Path
from typing import ClassVar, Deque, List, Optional
from uuid import uuid4
from defence360agent.api import inactivity
from defence360agent.contracts.config import (
Malware as Config,
MalwareScanIntensity,
)
from defence360agent.internals.global_scope import g
from defence360agent.utils import (
antivirus_mode,
create_task_and_log_exceptions,
)
from defence360agent.utils.common import HOUR, rate_limit
from defence360agent.utils.serialization import serialize_attr, unserialize
from imav.malwarelib.config import (
MalwareScanResourceType,
MalwareScanType,
QueuedScanState,
)
from imav.malwarelib.scan.ai_bolit.detached import (
AiBolitDetachedScan,
)
from imav.malwarelib.scan.app_version_detector import (
AVDExecutionError,
AppVersionDetector,
)
from imav.malwarelib.scan.detached import DetachedScan
from imav.malwarelib.scan.mds.detached import MDSDetachedScan
from imav.malwarelib.scan.mds.scanner import MalwareDatabaseScanner
from imav.malwarelib.utils.user_list import panel_users
logger = logging.getLogger(__name__)
throttled_log_error = rate_limit(period=HOUR, on_drop=logger.warning)(
logger.error
)
INTENSITY_FALLBACK = {
"low": {
"intensity_cpu": 1,
"intensity_io": 1,
"intensity_ram": 1024,
},
"moderate": {
"intensity_cpu": 4,
"intensity_io": 4,
"intensity_ram": 2048,
},
"high": {
"intensity_cpu": 7,
"intensity_io": 7,
"intensity_ram": 4096,
},
}
class QueuedScanBase(ABC):
resource_type: ClassVar[MalwareScanResourceType]
detached_scan: DetachedScan
state = None
def __init__(
self,
path,
*,
scanid=None,
scan_type: str = MalwareScanType.ON_DEMAND,
created: int = None,
started: Optional[float] = None,
intensity=None,
home_dirs=None,
intensity_cpu=None,
intensity_io=None,
intensity_ram=None,
initiator=None,
state: Optional[str] = None,
**_,
):
self.path = path
if intensity:
intensity_fallback = INTENSITY_FALLBACK[intensity]
else:
intensity_fallback = {
"intensity_cpu": MalwareScanIntensity.CPU,
"intensity_io": MalwareScanIntensity.IO,
"intensity_ram": MalwareScanIntensity.RAM,
}
self.args = {
"intensity_cpu": intensity_cpu
or intensity_fallback["intensity_cpu"],
"intensity_io": intensity_io or intensity_fallback["intensity_io"],
"intensity_ram": intensity_ram
or intensity_fallback["intensity_ram"],
"initiator": initiator,
}
home_dirs = home_dirs or []
if scan_type == MalwareScanType.ON_DEMAND and Path(path) in home_dirs:
scan_type = MalwareScanType.USER
self.scanid = scanid or uuid4().hex
self.scan_type = scan_type
self.created = created or int(time.time())
self.state = (
QueuedScanState.queued if state is None else QueuedScanState(state)
)
self.started = started
self.scanner_task = None
@abstractmethod
async def start(self):
pass
@property
@abstractmethod
def total_resources(self):
pass
def stop(self):
if self.scanner_task:
self.scanner_task.cancel()
def status(self):
result = {
"status": self.state.value,
"path": self.path,
"scanid": self.scanid,
"started": self.started,
"created": self.created,
"scan_type": self.scan_type,
"resource_type": self.resource_type.value,
**self.args,
}
if self.state == QueuedScanState.running:
result["phase"] = self.detached_scan.phase
result["progress"] = self.detached_scan.progress
return result
def __getstate__(self):
state = self.__dict__.copy()
del state["detached_scan"]
del state["scanner_task"]
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.scanner_task = None
def __eq__(self, other):
return (
self.resource_type == other.resource_type
and self.path == other.path
)
def __repr__(self):
return "<{}({!r}, scanid={})>".format(
self.__class__.__qualname__, self.path, self.scanid
)
# We don't need to hash queued scans
__hash__ = None # type: ignore
def to_dict(self):
return {
"path": self.path,
"scanid": self.scanid,
"scan_type": self.scan_type,
"created": self.created,
"started": self.started,
"args": self.args,
"resource_type": self.resource_type.value,
"state": self.state.value,
}
@classmethod
def from_dict(cls, kwargs):
for arg, value in kwargs.pop("args", {}).items():
kwargs[arg] = value
return cls(**kwargs)
class QueuedFileScan(QueuedScanBase):
resource_type = MalwareScanResourceType.FILE
def __init__(
self,
path,
*,
scanid=None,
scan_type: str = MalwareScanType.ON_DEMAND,
created: int = None,
started: Optional[float] = None,
intensity=None,
home_dirs=None,
intensity_cpu=None,
intensity_io=None,
intensity_ram=None,
file_patterns=None,
exclude_patterns=None,
follow_symlinks=None,
detect_elf=None,
initiator=None,
state=None,
**_,
):
super().__init__(
path,
scanid=scanid,
scan_type=scan_type,
created=created,
started=started,
intensity=intensity,
home_dirs=home_dirs,
intensity_cpu=intensity_cpu,
intensity_io=intensity_io,
intensity_ram=intensity_ram,
initiator=initiator,
state=state,
)
self.args.update(
file_patterns=file_patterns or None,
exclude_patterns=exclude_patterns or None,
follow_symlinks=follow_symlinks or False,
detect_elf=detect_elf,
)
self.detached_scan = AiBolitDetachedScan(self.scanid)
def __setstate__(self, state):
# WARNING: Avoid adding a new attribute to a serializable class.
# If an object deserializes after a package upgrade, it will lack it
super().__setstate__(state)
self.detached_scan = AiBolitDetachedScan(self.scanid)
async def start(self):
self.started = time.time()
from imav.malwarelib.scan.scanner import MalwareScanner
scanner = MalwareScanner(sink=g.sink, hooks=True)
self.scanner_task = scanner.start(
self.path,
scan_id=self.scanid,
scan_type=self.scan_type,
started=self.started,
**self.args,
)
scan_data = await scanner.async_wait()
if scan_data is None:
logger.info("Scan cancelled for %s", self.path)
self.state = QueuedScanState.stopped
else:
self.state = QueuedScanState.running
scan_data["initiator"] = (
# using `get` because there is no initiator before version 6.8
self.args.get("initiator")
# for compatibility reason: when `self.initiator` is available
or getattr(self, "initiator", "undefined")
)
return scan_data
def stop(self):
if self.scanner_task:
self.scanner_task.cancel()
@property
def total_resources(self):
return self.detached_scan.total_resources
class QueuedDbScan(QueuedScanBase):
resource_type = MalwareScanResourceType.DB
def __init__(
self,
path: str,
scanid: Optional[str] = None,
# FIXME: Use Enum instead of a class with str attributes.
scan_type: str = MalwareScanType.ON_DEMAND,
created: int = None,
started: Optional[float] = None,
intensity: Optional[str] = None,
home_dirs=None,
intensity_cpu=None,
intensity_io=None,
intensity_ram=None,
state=None,
**_,
):
super().__init__(
path=path,
scanid=scanid,
scan_type=scan_type,
created=created,
started=started,
intensity=intensity,
home_dirs=home_dirs,
intensity_cpu=intensity_cpu,
intensity_io=intensity_io,
intensity_ram=intensity_ram,
state=state,
)
self.detached_scan = MDSDetachedScan(self.scanid)
def __setstate__(self, state):
super().__setstate__(state)
self.detached_scan = MDSDetachedScan(self.scanid)
@property
def total_resources(self) -> int:
return self.detached_scan.total_resources
async def _scan(self):
# app-version-detector should recursive check all directories,
# no need to extract them explicitly.
# Used to make files and db scans idempotent (DEF-19264)
# MDS scanner (php) should alerady handle /path/* as /path/ (DEF-19096)
apps_path = (
self.path.rstrip("*") if self.path.endswith("/*") else self.path
)
unglobbed_paths = [b64encode(fsencode(d)) for d in iglob(apps_path)]
try:
with inactivity.track.task("AVD_scan"):
await AppVersionDetector().start(unglobbed_paths)
except AVDExecutionError as exc: # Exited with non-zero return code
await self.detached_scan.handle_aborted_process(
sink=g.sink,
scan_path=self.path,
scan_type=self.scan_type,
scan_started=self.started,
cmd=exc.command,
out=exc.out,
err=exc.err,
)
else:
await MalwareDatabaseScanner(
self.path,
[self.path], # FIXME: pass unglobbed_paths here
**self.args,
scan_type=self.scan_type,
scan_id=self.scanid,
).scan(self.started)
async def start(self) -> None:
self.started = time.time()
self.scanner_task = create_task_and_log_exceptions(
asyncio.get_event_loop(), self._scan
)
# For backward compatibility to deserialize an old queue
State = QueuedScanState
QueuedScan = QueuedFileScan
serialize_scans = serialize_attr(path=Config.SCANS_PATH, attr="_scans_info")
SCAN_TYPE_CLASSES = {
MalwareScanResourceType.FILE.value: QueuedFileScan,
MalwareScanResourceType.DB.value: QueuedDbScan,
}
class ScanQueue:
@property
def _scans_info(self):
return collections.deque(item.to_dict() for item in self._scans)
@cached_property
def _scans(self) -> Deque[QueuedScanBase]:
# it should be loaded once per instance
scans = collections.deque()
for scan_info in unserialize(
path=Config.SCANS_PATH, fallback=collections.deque
):
try:
cls = SCAN_TYPE_CLASSES[scan_info["resource_type"]]
scans.append(cls.from_dict(scan_info))
except Exception as exc:
# don't flood Sentry, send one error message
throttled_log_error(
"Can't get scan class for %s due to %s", scan_info, exc
)
return scans
@property
def current_scan(self):
return self.peek(0)
@serialize_scans
async def put(
self,
paths,
resource_type: MalwareScanResourceType,
prioritize=False,
**scan_args,
):
home_dirs = [Path(user["home"]) for user in await panel_users()]
if resource_type == MalwareScanResourceType.FILE:
scans_to_add: List[QueuedScanBase] = [
QueuedFileScan(path, home_dirs=home_dirs, **scan_args)
for path in paths
]
elif (
antivirus_mode.disabled
and resource_type == MalwareScanResourceType.DB
):
scans_to_add = db_scans(paths, home_dirs, scan_args)
else:
raise ValueError("Unknown resource_type: {}".format(resource_type))
if prioritize and self._scans:
running = self._scans.popleft()
self._scans.extendleft(reversed(scans_to_add))
self._scans.appendleft(running)
else:
self._scans.extend(scans_to_add)
@serialize_scans
def remove(self, scan=None):
if len(self) == 0:
return
scan = scan or self.current_scan
self._scans.remove(scan)
scan.stop()
logger.info("Scans pending: %d", len(self))
def peek(self, priority):
if -1 < priority < len(self):
return self._scans[priority]
def find_all(self, scan_ids):
return [scan for scan in self._scans if scan.scanid in scan_ids]
def find(self, **kwargs) -> Optional[QueuedScanBase]:
for scan in self._scans:
if all([getattr(scan, k) == v for k, v in kwargs.items()]):
return scan
return None
def update(self, scan_ids, status) -> None:
for scan in self._scans:
if scan.scanid in scan_ids:
scan.state = status
def get_scans_from_paths(self, paths):
for scan in self.scans:
if scan.path in paths:
yield scan, scan.state.value
def scan_summaries(self, scans=None):
scans = scans or self._scans
return collections.OrderedDict(
(
scan.scanid,
{
"path": scan.path,
"scan_status": scan.state.value,
"scan_type": scan.scan_type,
"started": scan.started,
"created": scan.created,
"error": None,
"total_resources": scan.total_resources,
"total_malicious": 0,
"resource_type": scan.resource_type.value,
},
)
for scan in scans
)
@property
def scans(self):
return list(self._scans)
def __bool__(self):
return len(self._scans) > 0
def __contains__(self, scan):
return scan in self._scans
def __len__(self):
return len(self._scans)
def db_scans(paths, home_dirs, scan_args):
return [
QueuedDbScan(path, home_dirs=home_dirs, **scan_args) for path in paths
]