"""
This module gets and caches news from imunify blog
"""
import asyncio
import os
import socket
import time
import urllib.request
from urllib.error import HTTPError
from xml.etree import ElementTree
from contextlib import suppress
from logging import getLogger
from defence360agent.simple_rpc.hosting_panel import HostingPanel
from defence360agent.utils import retry_on
logger = getLogger(__file__)
RSS_FEED_REMOTE_URL = "https://blog.imunify360.com/rss.xml"
_TIMEOUT = 300 # default timeout for network operations here
TAGS_TO_READ = ["title", "pubDate", "guid", "link"]
__all__ = ["HTTPError", "NewsFeed"]
class NewsFeed:
cache_ttl = 60 # in minutes
cache_file_path = "/var/imunify360/tmp/feed_cache.rss"
@classmethod
@retry_on(
(ElementTree.ParseError, urllib.request.URLError),
max_tries=10,
on_error=lambda *args: NewsFeed.clear_cache(*args),
)
async def get(cls):
if cls._expired():
await cls._refresh()
category_info = PanelCategory(HostingPanel().NAME)
with open(cls.cache_file_path) as cache_file:
root = ElementTree.fromstring(cache_file.read())
imunify_news = root.iter("item")
return [
{
child.tag: child.text
for child in item
if child.tag in TAGS_TO_READ
}
for item in imunify_news
if category_info.is_allowed(item)
]
@classmethod
async def _refresh(cls):
cache_file_dir_path = os.path.dirname(cls.cache_file_path)
if not os.path.exists(cache_file_dir_path):
os.makedirs(cache_file_dir_path)
logger.info("Refresh news cache")
with open(cls.cache_file_path, "wb") as cache_file:
cache_file.write(await cls._fetch())
@classmethod
def _expired(cls):
if os.path.exists(cls.cache_file_path):
last_modified_time = os.path.getmtime(cls.cache_file_path)
else:
last_modified_time = 0
cache_age = (time.time() - last_modified_time) / 60 # in minutes
return cache_age > cls.cache_ttl
@classmethod
async def _fetch(cls, timeout=_TIMEOUT):
return await asyncio.get_event_loop().run_in_executor(
None, _fetch_url, RSS_FEED_REMOTE_URL, timeout
)
@classmethod
async def clear_cache(cls, *args):
logger.warning("Clearing cache due to error: %s", args)
with suppress(FileNotFoundError):
os.unlink(cls.cache_file_path)
def _fetch_url(url, timeout):
try:
# Cloudflare Browser Integrity Check blocks the default urllib
# User-Agent. RSS feed URL was added to exceptions but they are
# not free, so let's set a custom User-Agent anyway.
headers = {"User-Agent": "imunify360-urllib/0.1"}
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, timeout=timeout) as response:
return response.read()
except socket.timeout:
raise TimeoutError
class PanelCategory:
# RSS news categories, value saved in xml category tag
# categories are case-insensitive so lowercase it
panel_categories = {"cpanel", "plesk", "directadmin"}
no_panel_category = "standalone-imunify"
def __init__(self, p_name):
p_name = p_name.lower()
self.current = (
p_name
if p_name in PanelCategory.panel_categories
else PanelCategory.no_panel_category
)
self.competitors = PanelCategory.panel_categories | {
PanelCategory.no_panel_category
} - {self.current}
def is_allowed(self, item):
item_categories = {
child.text for child in item if child.tag == "category"
}
# category tag can include not only exact panel name, but also some
# phrase for SEO purpose, so check it by `in` on joined string
joined_category = "|||".join(item_categories).lower()
current_in_category = self.current in joined_category
competitors_in_category = any(
com in joined_category for com in self.competitors
)
# current panel didn't mentioned in categories,
# but competitor was -> don't add to result
return not competitors_in_category or current_in_category