#! /usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2005 Duke University
# Copyright 2007 Red Hat
import os
import re
import time
import types
import urlparse
urlparse.uses_fragment.append("media")
import urllib
import Errors
from urlgrabber.grabber import URLGrabber
from urlgrabber.grabber import default_grabber
from urlgrabber.progress import format_number
import urlgrabber.mirror
from urlgrabber.grabber import URLGrabError
import repoMDObject
import packageSack
from repos import Repository
import parser
import sqlitecachec
import sqlitesack
from yum import config
from yum import misc
from yum import comps
from yum import _
from constants import *
import metalink
import logging
import logginglevels
import warnings
import glob
import shutil
import stat
import errno
import tempfile
# This is unused now, probably nothing uses it but it was global/public.
skip_old_DBMD_check = False
try:
import xattr
if not hasattr(xattr, 'get') or not hasattr(xattr, 'set'):
xattr = None # This is a "newer" API.
except ImportError:
xattr = None
# The problem we are trying to solve here is that:
#
# 1. We rarely want to be downloading MD/pkgs/etc.
# 2. We want to check those files are valid (match checksums) when we do
# download them.
# 3. We _really_ don't want to checksum all the files every time we
# run (100s of MBs).
# 4. We can continue to download files from bad mirrors, or retry files due to
# C-c etc.
#
# ...we used to solve this by just checking the file size, and assuming the
# files had been downloaded and checksumed as correct if that matched. But that
# was error prone on bad mirrors, so now we store the checksum in an
# xattr ... this does mean that if you can't store xattrs (Eg. NFS) you will
# rechecksum everything constantly.
def _xattr_get_chksum(filename, chktype):
if not xattr:
return None
try:
ret = xattr.get(filename, 'user.yum.checksum.' + chktype)
except: # Documented to be "EnvironmentError", but make sure
return None
return ret
def _xattr_set_chksum(filename, chktype, chksum):
if not xattr:
return None
try:
xattr.set(filename, 'user.yum.checksum.' + chktype, chksum)
except:
return False # Data too long. = IOError ... ignore everything.
return True
warnings.simplefilter("ignore", Errors.YumFutureDeprecationWarning)
logger = logging.getLogger("yum.Repos")
verbose_logger = logging.getLogger("yum.verbose.Repos")
class YumPackageSack(packageSack.PackageSack):
"""imports/handles package objects from an mdcache dict object"""
def __init__(self, packageClass):
packageSack.PackageSack.__init__(self)
self.pc = packageClass
self.added = {}
def __del__(self):
try:
self.close()
except Errors.RepoError, e:
verbose_logger.debug("Exception %s %s in %s ignored" % (repr(e), str(e), self.__del__))
def close(self):
self.added = {}
def addDict(self, repo, datatype, dataobj, callback=None):
if repo in self.added:
if datatype in self.added[repo]:
return
total = len(dataobj)
if datatype == 'metadata':
current = 0
for pkgid in dataobj:
current += 1
if callback: callback.progressbar(current, total, repo)
pkgdict = dataobj[pkgid]
po = self.pc(repo, pkgdict)
po.id = pkgid
self._addToDictAsList(self.pkgsByID, pkgid, po)
self.addPackage(po)
if repo not in self.added:
self.added[repo] = []
self.added[repo].append('metadata')
# indexes will need to be rebuilt
self.indexesBuilt = 0
elif datatype in ['filelists', 'otherdata']:
if repo in self.added:
if 'metadata' not in self.added[repo]:
raise Errors.RepoError, '%s md for %s imported before primary' \
% (datatype, repo.ui_id)
current = 0
for pkgid in dataobj:
current += 1
if callback: callback.progressbar(current, total, repo)
pkgdict = dataobj[pkgid]
if pkgid in self.pkgsByID:
for po in self.pkgsByID[pkgid]:
po.importFromDict(pkgdict)
self.added[repo].append(datatype)
# indexes will need to be rebuilt
self.indexesBuilt = 0
else:
# umm, wtf?
pass
def _retrieve_async(self, repo, data):
""" Just schedule the metadata downloads """
for item in data:
if item in self.added.get(repo, []):
continue
if item == 'metadata':
mydbtype = 'primary_db'
elif item == 'filelists':
mydbtype = 'filelists_db'
elif item == 'otherdata':
mydbtype = 'other_db'
else:
continue
if self._check_db_version(repo, mydbtype):
if not self._check_uncompressed_db_gen(repo, mydbtype):
# NOTE: No failfunc.
repo._retrieveMD(mydbtype, async=True, failfunc=None)
def populate(self, repo, mdtype='metadata', callback=None, cacheonly=0):
if mdtype == 'all':
data = ['metadata', 'filelists', 'otherdata']
else:
data = [ mdtype ]
if not hasattr(repo, 'cacheHandler'):
repo.cacheHandler = sqlitecachec.RepodataParserSqlite(
storedir=repo.cachedir,
repoid=repo.id,
callback=callback,
)
for item in data:
if repo in self.added:
if item in self.added[repo]:
continue
if item == 'metadata':
mydbtype = 'primary_db'
mymdtype = 'primary'
repo_get_function = repo.getPrimaryXML
repo_cache_function = repo.cacheHandler.getPrimary
elif item == 'filelists':
mydbtype = 'filelists_db'
mymdtype = 'filelists'
repo_get_function = repo.getFileListsXML
repo_cache_function = repo.cacheHandler.getFilelists
elif item == 'otherdata':
mydbtype = 'other_db'
mymdtype = 'other'
repo_get_function = repo.getOtherXML
repo_cache_function = repo.cacheHandler.getOtherdata
else:
continue
if self._check_db_version(repo, mydbtype):
# Use gen decompression on DB files. Keeps exactly what we
# downloaded in the download dir.
# Backwards compat. ... try the old uncompressed version first.
db_un_fn = self._check_uncompressed_db(repo, mydbtype)
if not db_un_fn:
db_un_fn = self._check_uncompressed_db_gen(repo, mydbtype)
if not db_un_fn:
db_fn = repo._retrieveMD(mydbtype)
if db_fn:
# unlink the decompressed file, we know it's not valid
misc.unlink_f(repo.cachedir +'/gen/%s.sqlite' % mydbtype)
db_un_fn = self._check_uncompressed_db_gen(repo,
mydbtype)
if not db_un_fn: # Shouldn't happen?
raise Errors.RepoError, '%s: Check uncompressed DB failed' % repo
dobj = repo.cacheHandler.open_database(db_un_fn)
else:
repo._xml2sqlite_local = True
# Download...
xml = repo_get_function()
# Use generated dir. and handle compression types metadata
# parser doesn't understand.
gen = mymdtype + '.xml'
ret = misc.repo_gen_decompress(xml, gen, cached=repo.cache)
if not ret:
raise Errors.RepoError, '%s: Decompress DB failed' % repo
xml = ret
# Convert XML => .sqlite
xmldata = repo.repoXML.getData(mymdtype)
(ctype, csum) = xmldata.checksum
dobj = repo_cache_function(xml, csum)
if not cacheonly:
self.addDict(repo, item, dobj, callback)
del dobj
# get rid of all this stuff we don't need now
del repo.cacheHandler
def _check_uncompressed_db_gen(self, repo, mdtype):
"""return file name of db in gen/ dir if good, None if not"""
mydbdata = repo.repoXML.getData(mdtype)
(r_base, remote) = mydbdata.location
fname = os.path.basename(remote)
compressed_fn = repo.cachedir + '/' + fname
db_un_fn = mdtype + '.sqlite'
if not repo._checkMD(compressed_fn, mdtype, data=mydbdata,
check_can_fail=True):
return None
ret = misc.repo_gen_decompress(compressed_fn, db_un_fn,
cached=repo.cache)
if ret:
return self._check_uncompressed_db_fn(repo, mdtype, ret)
return None
def _check_uncompressed_db(self, repo, mdtype):
"""return file name of uncompressed db is good, None if not"""
mydbdata = repo.repoXML.getData(mdtype)
(r_base, remote) = mydbdata.location
fname = os.path.basename(remote)
compressed_fn = repo.cachedir + '/' + fname
db_un_fn = misc.decompress(compressed_fn, fn_only=True)
return self._check_uncompressed_db_fn(repo, mdtype, db_un_fn)
def _check_uncompressed_db_fn(self, repo, mdtype, db_un_fn):
result = None
if os.path.exists(db_un_fn):
try:
repo.checkMD(db_un_fn, mdtype, openchecksum=True)
except URLGrabError:
if not repo.cache:
misc.unlink_f(db_un_fn)
else:
result = db_un_fn
return result
def _check_db_version(self, repo, mdtype):
return repo._check_db_version(mdtype)
class YumRepository(Repository, config.RepoConf):
"""
This is an actual repository object
Configuration attributes are pulled in from config.RepoConf.
"""
def __init__(self, repoid):
config.RepoConf.__init__(self)
Repository.__init__(self, repoid)
self.repofile = None
self.mirrorurls = []
self._urls = []
self.enablegroups = 0
self.groupsfilename = 'yumgroups.xml' # something some freaks might
# eventually want
self.repoMDFile = 'repodata/repomd.xml'
self._repoXML = None
self._oldRepoMDData = {}
self.cache = 0
self._retry_no_cache = False
self.mirrorlistparsed = 0
self.yumvar = {} # empty dict of yumvariables for $string replacement
self._proxy_dict = {}
self.metadata_cookie_fn = 'cachecookie'
self._metadataCurrent = None
self._metalink = None
self.groups_added = False
self.http_headers = {}
self.repo_config_age = 0 # if we're a repo not from a file then the
# config is very, very old
# throw in some stubs for things that will be set by the config class
self.basecachedir = ""
self.base_persistdir = ""
self.cost = 1000
self.copy_local = 0
# holder for stuff we've grabbed
self.retrieved = { 'primary':0, 'filelists':0, 'other':0, 'group':0,
'updateinfo':0, 'prestodelta':0}
self._preloaded_repomd = False
# callbacks
self.callback = None # for the grabber
self.multi_callback = None
self.failure_obj = None
self.mirror_failure_obj = None
self.interrupt_callback = None
self._callbacks_changed = False
# callback function for handling media
self.mediafunc = None
# callbacks for gpg key importing and confirmation
self.gpg_import_func = None
self.gpgca_import_func = None
self.confirm_func = None
# The reason we want to turn this off are things like repoids
# called "tmp" in repoquery --repofrompath and/or new1/old1 in repodiff.
self.timestamp_check = True
self._sack = None
self._grabfunc = None
self._grab = None
self._async = False
def __cmp__(self, other):
""" Sort yum repos. by cost, and then by alphanumeric on their id. """
if other is None:
return 1
if hasattr(other, 'cost'):
ocost = other.cost
else:
ocost = 1000
ret = cmp(self.cost, ocost)
if ret:
return ret
return cmp(self.id, other.id)
def _getSack(self):
# FIXME: Note that having the repo hold the sack, which holds "repos"
# is not only confusing but creates a circular dep.
# Atm. we don't leak memory because RepoStorage.close() is called,
# which calls repo.close() which calls sack.close() which removes the
# repos from the sack ... thus. breaking the cycle.
if self._sack is None:
self._sack = sqlitesack.YumSqlitePackageSack(
sqlitesack.YumAvailablePackageSqlite)
return self._sack
sack = property(_getSack)
def _ui_id(self):
""" Show self.id, but include any $releasever/$basearch/etc. data. """
if hasattr(self, '__cached_ui_id'):
return getattr(self, '__cached_ui_id')
val = config._readRawRepoFile(self)
if not val:
val = ''
else:
ini, section_id = val
ini = ini[section_id]
if 'metalink' in ini:
val = ini['metalink']
elif 'mirrorlist' in ini:
val = ini['mirrorlist']
elif 'baseurl' in ini:
val = ini['baseurl']
else:
val = ''
ret = self.id
for var in self.ui_repoid_vars:
if '$'+var in val:
ret += '/'
ret += str(self.yumvar[var])
setattr(self, '__cached_ui_id', ret)
return ret
ui_id = property(_ui_id)
def close(self):
if self._sack is not None:
self.sack.close()
Repository.close(self)
def _resetSack(self):
self._sack = None
def __getProxyDict(self):
self.doProxyDict()
if self._proxy_dict:
return self._proxy_dict
return None
# consistent access to how proxy information should look (and ensuring
# that it's actually determined for the repo)
proxy_dict = property(__getProxyDict)
def getPackageSack(self):
"""Returns the instance of this repository's package sack."""
return self.sack
def ready(self):
"""Returns true if this repository is setup and ready for use."""
if hasattr(self, 'metadata_cookie'):
return self.repoXML is not None
return False
def getGroupLocation(self):
"""Returns the location of the group."""
if 'group_gz' in self.repoXML.fileTypes():
thisdata = self.repoXML.getData('group_gz')
else:
thisdata = self.repoXML.getData('group')
return thisdata.location
def __str__(self):
# Note: You might expect this to be .ui_id, except people got used to
# the fact that str(repo) == repo.id and used the former instead of
# the later when they wanted just the .id. So we have to live with it
# and use .ui_id explicitly.
return self.id
def _checksum(self, sumtype, file, CHUNK=2**16, checksum_can_fail=False,
datasize=None):
"""takes filename, hand back Checksum of it
sumtype = md5 or sha
filename = /path/to/file
CHUNK=65536 by default"""
try:
return misc.checksum(sumtype, file, CHUNK, datasize)
except (Errors.MiscError, EnvironmentError), e:
if checksum_can_fail:
return None
msg = 'Error opening file for checksum: %s' % e
if isinstance(e, Errors.FIPSNonCompliantError):
msg = str(e)
raise Errors.RepoError(msg)
def dump(self):
output = '[%s]\n' % self.id
# we exclude all vars which start with _ or are in this list:
excluded_vars = ('mediafunc', 'sack', 'metalink_data', 'grab',
'grabfunc', 'repoXML', 'cfg', 'retrieved',
'mirrorlistparsed', 'gpg_import_func',
'gpgca_import_func', 'failure_obj',
'callback', 'confirm_func', 'groups_added',
'interrupt_callback', 'id', 'mirror_failure_obj',
'repo_config_age', 'groupsfilename', 'copy_local',
'basecachedir', 'http_headers', 'metadata_cookie',
'metadata_cookie_fn', 'quick_enable_disable',
'repoMDFile', 'timestamp_check', 'urls', 'mirrorurls',
'yumvar', 'repofile', 'multi_callback')
for attr in dir(self):
if attr.startswith('_'):
continue
if attr in excluded_vars:
continue
if isinstance(getattr(self, attr), types.MethodType):
continue
res = getattr(self, attr)
if not res and type(res) not in (type(False), type(0)):
res = ''
if type(res) == types.ListType:
res = ',\n '.join(res)
output = output + '%s = %s\n' % (attr, res)
return output
def enablePersistent(self):
"""Persistently enables this repository."""
self.enable()
try:
config.writeRawRepoFile(self,only=['enabled'])
except IOError, e:
if e.errno == errno.EACCES:
logger.warning(e)
else:
raise IOError, str(e)
def disablePersistent(self):
"""Persistently disables this repository."""
self.disable()
try:
config.writeRawRepoFile(self,only=['enabled'])
except IOError, e:
if e.errno == errno.EACCES:
logger.warning(e)
else:
raise IOError, str(e)
def check(self):
"""self-check the repo information - if we don't have enough to move
on then raise a repo error"""
if len(self._urls) < 1 and not self.mediaid:
raise Errors.RepoError, \
'Cannot find a valid baseurl for repo: %s' % self.ui_id
def doProxyDict(self):
if self._proxy_dict:
return
self._proxy_dict = {} # zap it
proxy_string = None
empty = (None, '_none_', '')
if self.proxy in empty: # got 'proxy=_none_'
proxy_string = '' # this disables default proxies
elif self.proxy:
proxy_string = '%s' % self.proxy
if self.proxy_username not in empty:
auth = urllib.quote(self.proxy_username)
if self.proxy_password not in empty:
auth += ':' + urllib.quote(self.proxy_password)
proto, rest = re.match('(\w+://)(.+)', proxy_string).groups()
proxy_string = '%s%s@%s' % (proto, auth, rest)
if proxy_string is not None:
self._proxy_dict['http'] = proxy_string
self._proxy_dict['https'] = proxy_string
self._proxy_dict['ftp'] = proxy_string
def __headersListFromDict(self, cache=True):
"""Convert our dict of headers to a list of 2-tuples for urlgrabber."""
headers = []
for key in self.http_headers:
headers.append((key, self.http_headers[key]))
if not (cache or 'Pragma' in self.http_headers):
headers.append(('Pragma', 'no-cache'))
return headers
def setupGrab(self):
warnings.warn('setupGrab() will go away in a future version of Yum.\n',
Errors.YumFutureDeprecationWarning, stacklevel=2)
self._setupGrab()
def _setupGrab(self):
"""sets up the grabber functions with the already stocked in urls for
the mirror groups"""
if self.failovermethod == 'roundrobin':
mgclass = urlgrabber.mirror.MGRandomOrder
else:
mgclass = urlgrabber.mirror.MirrorGroup
ugopts = self._default_grabopts()
self._grabfunc = URLGrabber(progress_obj=self.callback,
multi_progress_obj=self.multi_callback,
failure_callback=self.failure_obj,
interrupt_callback=self.interrupt_callback,
copy_local=self.copy_local,
reget='simple',
**ugopts)
def add_mc(url):
host = urlparse.urlsplit(url).netloc.split('@')[-1]
mc = self.metalink_data._host2mc.get(host)
if mc:
url = {
'mirror': misc.to_utf8(url),
'kwargs': {
'max_connections': mc.max_connections,
'preference': mc.preference,
'private': mc.private,
},
}
return url
urls = self.urls
if self.metalink:
urls = map(add_mc, urls)
def mirror_failure(obj):
action = {}
# timeout, refused connect, and HTTP 503 may retry
e = obj.exception
if e.errno == 12 or \
e.errno == 14 and getattr(e, 'code', 0) in (7, 503):
tries = getattr(obj, 'tries', self.retries)
if tries <= self.retries - len(self.urls):
# don't remove this mirror yet
action['remove'] = False
elif e.errno == -3:
# unsupported checksum type, fail now
action['fail'] = True
# No known user of this callback, but just in case...
cb = self.mirror_failure_obj
if cb:
fun, arg, karg = callable(cb) and (cb, (), {}) or cb
action.update(fun(obj, *arg, **karg))
return action
self._grab = mgclass(self._grabfunc, urls,
failure_callback=mirror_failure)
def _default_grabopts(self, cache=True):
opts = { 'keepalive': self.keepalive,
'bandwidth': self.bandwidth,
'retry': self.retries,
'throttle': self.throttle,
'timeout': self.timeout,
'minrate': self.minrate,
'ip_resolve': self.ip_resolve,
'http_headers': tuple(self.__headersListFromDict(cache=cache)),
'ssl_verify_peer': self.sslverify,
'ssl_verify_host': self.sslverify,
'ssl_ca_cert': self.sslcacert,
'ssl_cert': self.sslclientcert,
'ssl_key': self.sslclientkey,
'user_agent': default_grabber.opts.user_agent,
'username': self.username,
'password': self.password,
'ftp_disable_epsv': self.ftp_disable_epsv,
}
if self.proxy == 'libproxy':
opts['libproxy'] = True
else:
opts['proxies'] = self.proxy_dict
return opts
def _getgrabfunc(self):
if not self._grabfunc or self._callbacks_changed:
self._setupGrab()
self._callbacks_changed = False
return self._grabfunc
def _getgrab(self):
if not self._grab or self._callbacks_changed:
self._setupGrab()
self._callbacks_changed = False
return self._grab
grabfunc = property(lambda self: self._getgrabfunc())
grab = property(lambda self: self._getgrab())
def _dirSetupMkdir_p(self, dpath):
"""make the necessary directory path, if possible, raise on failure"""
if os.path.exists(dpath) and os.path.isdir(dpath):
return
try:
os.makedirs(dpath, mode=0755)
except OSError, e:
msg = "%s: %s %s: %s" % ("Error making cache directory",
dpath, "error was", e)
raise Errors.RepoError, msg
def dirSetup(self):
"""make the necessary dirs, if possible, raise on failure"""
cachedir = os.path.join(self.basecachedir, self.id)
persistdir = os.path.join(self.base_persistdir, self.id)
pkgdir = os.path.join(cachedir, 'packages')
hdrdir = os.path.join(cachedir, 'headers')
self.setAttribute('_dir_setup_cachedir', cachedir)
self.setAttribute('_dir_setup_pkgdir', pkgdir)
self.setAttribute('_dir_setup_hdrdir', hdrdir)
self.setAttribute('_dir_setup_persistdir', persistdir)
ext=''
if os.geteuid() != 0:
ext = '-ro'
self.setAttribute('_dir_setup_gpgdir', persistdir + '/gpgdir' + ext)
self.setAttribute('_dir_setup_gpgcadir', persistdir + '/gpgcadir' + ext)
cookie = self.cachedir + '/' + self.metadata_cookie_fn
self.setAttribute('_dir_setup_metadata_cookie', cookie)
for dir in [self.cachedir, self.cachedir + '/gen', self.pkgdir]:
self._dirSetupMkdir_p(dir)
# persistdir is really root-only but try the make anyway and just
# catch the exception
for dir in [self.persistdir]:
try:
self._dirSetupMkdir_p(dir)
except Errors.RepoError, e:
pass
# if we're using a cachedir that's not the system one, copy over these
# basic items from the system one
if self._preload_md_from_system_cache('repomd.xml'):
self._preloaded_repomd = True
self._preload_md_from_system_cache('cachecookie')
self._preload_md_from_system_cache('mirrorlist.txt')
self._preload_md_from_system_cache('metalink.xml')
def _dirGetAttr(self, attr):
""" Make the directory attributes call .dirSetup() if needed. """
attr = '_dir_setup_' + attr
if not hasattr(self, attr):
self.dirSetup()
return getattr(self, attr)
def _dirSetAttr(self, attr, val):
""" Make the directory attributes call .dirSetup() if needed. """
attr = '_dir_setup_' + attr
if not hasattr(self, attr):
self.dirSetup()
if attr == '_dir_setup_pkgdir':
if not hasattr(self, '_old_pkgdirs'):
self._old_pkgdirs = []
self._old_pkgdirs.append(getattr(self, attr))
ret = setattr(self, attr, val)
if attr in ('_dir_setup_pkgdir', ):
self._dirSetupMkdir_p(val)
return ret
cachedir = property(lambda self: self._dirGetAttr('cachedir'))
persistdir = property(lambda self: self._dirGetAttr('persistdir'))
pkgdir = property(lambda self: self._dirGetAttr('pkgdir'),
lambda self, x: self._dirSetAttr('pkgdir', x))
hdrdir = property(lambda self: self._dirGetAttr('hdrdir'),
lambda self, x: self._dirSetAttr('hdrdir', x))
gpgdir = property(lambda self: self._dirGetAttr('gpgdir'),
lambda self, x: self._dirSetAttr('gpgdir', x))
gpgcadir = property(lambda self: self._dirGetAttr('gpgcadir'),
lambda self, x: self._dirSetAttr('gpgcadir', x))
metadata_cookie = property(lambda self: self._dirGetAttr('metadata_cookie'))
def baseurlSetup(self):
warnings.warn('baseurlSetup() will go away in a future version of Yum.\n',
Errors.YumFutureDeprecationWarning, stacklevel=2)
self._baseurlSetup()
def _hack_mirrorlist_for_anaconda(self):
# Anaconda doesn't like having mirrorlist and metalink, so we allow
# mirrorlist to act like metalink. Except we'd really like to know which
# we have without parsing it ... and want to store it in the right
# place etc.
# So here is #1 hack: see if the metalin kis unset and the mirrorlist
# URL contains the string "metalink", if it does we copy it over.
if self.metalink:
return
if not self.mirrorlist:
return
if self.mirrorlist.find("metalink") == -1:
return
self.metalink = self.mirrorlist
def _baseurlSetup(self):
"""go through the baseurls and mirrorlists and populate self.urls
with valid ones, run self.check() at the end to make sure it worked"""
self.baseurl = self._replace_and_check_url(self.baseurl)
# FIXME: We put all the mirrors in .baseurl as well as
# .urls for backward compat. (see bottom of func). So we'll save this
# out for repolist -v ... or anything else wants to know the baseurl
self._orig_baseurl = self.baseurl
mirrorurls = []
self._hack_mirrorlist_for_anaconda()
if self.metalink and not self.mirrorlistparsed:
# FIXME: This is kind of lying to API callers
mirrorurls.extend(list(self.metalink_data.urls()))
self.mirrorlistparsed = True
if self.mirrorlist and not self.mirrorlistparsed:
mirrorurls.extend(self._getMirrorList())
self.mirrorlistparsed = True
self.mirrorurls = self._replace_and_check_url(mirrorurls)
self._urls = self.baseurl + self.mirrorurls
# if our mirrorlist is just screwed then make sure we unlink a mirrorlist cache
if len(self._urls) < 1:
if hasattr(self, 'mirrorlist_file') and os.path.exists(self.mirrorlist_file):
if not self.cache:
try:
misc.unlink_f(self.mirrorlist_file)
except (IOError, OSError), e:
logger.error('Could not delete bad mirrorlist file: %s - %s' % (self.mirrorlist_file, e))
else:
logger.warning('removing mirrorlist with no valid mirrors: %s' % self.mirrorlist_file)
# store them all back in baseurl for compat purposes
self.baseurl = self._urls
self.check()
def _replace_and_check_url(self, url_list):
goodurls = []
skipped = None
for url in url_list:
# obvious bogons get ignored b/c, we could get more interesting checks but <shrug>
if url in ['', None]:
continue
url = parser.varReplace(url, self.yumvar)
if url[-1] != '/':
url= url + '/'
try:
# This started throwing ValueErrors, BZ 666826
(s,b,p,q,f,o) = urlparse.urlparse(url)
except (ValueError, IndexError, KeyError), e:
s = 'blah'
if s not in ['http', 'ftp', 'file', 'https']:
skipped = url
continue
else:
goodurls.append(url)
if skipped is not None:
# Caller cleans up for us.
if goodurls:
logger.warning('YumRepo Warning: Some mirror URLs are not using ftp, http[s] or file.\n Eg. %s' % misc.to_utf8(skipped))
else: # And raises in this case
logger.error('YumRepo Error: All mirror URLs are not using ftp, http[s] or file.\n Eg. %s' % misc.to_utf8(skipped))
return goodurls
def _geturls(self):
if not self._urls:
self._baseurlSetup()
return self._urls
urls = property(fget=lambda self: self._geturls(),
fset=lambda self, value: setattr(self, "_urls", value),
fdel=lambda self: setattr(self, "_urls", None))
def _getMetalink(self):
if not self._metalink:
self.metalink_filename = self.cachedir + '/' + 'metalink.xml'
local = self.metalink_filename + '.tmp'
if not self._metalinkCurrent():
url = misc.to_utf8(self.metalink)
ugopts = self._default_grabopts(cache=self.http_caching=='all')
try:
ug = URLGrabber(progress_obj = self.callback, **ugopts)
result = ug.urlgrab(url, local, text="%s/metalink" % self.ui_id)
except URLGrabError, e:
if not os.path.exists(self.metalink_filename):
msg = ("Cannot retrieve metalink for repository: %s. "
"Please verify its path and try again" % self.ui_id )
raise Errors.RepoError, msg
# Now, we have an old usable metalink, so we can't move to
# a newer repomd.xml ... or checksums won't match.
logger.error("Could not get metalink %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1])))
self._metadataCurrent = True
if not self._metadataCurrent:
try:
self._metalink = metalink.MetaLinkRepoMD(result)
shutil.move(result, self.metalink_filename)
except metalink.MetaLinkRepoErrorParseFail, e:
# Downloaded file failed to parse, revert (dito. above):
logger.error("Could not parse metalink %s error was \n%s" % (url, e))
self._metadataCurrent = True
misc.unlink_f(result)
if self._metadataCurrent:
self._metalink = metalink.MetaLinkRepoMD(self.metalink_filename)
return self._metalink
metalink_data = property(fget=lambda self: self._getMetalink(),
fset=lambda self, value: setattr(self, "_metalink",
value),
fdel=lambda self: setattr(self, "_metalink", None))
def _all_urls_are_files(self, url):
if url:
return url.startswith("/") or url.startswith("file:")
if not self.urls: # WTF ... but whatever.
return False
# Not an explicit url ... so make sure all mirrors/etc. are file://
for url in self.urls:
if not self._all_urls_are_files(url):
return False
return True
def _getFile(self, url=None, relative=None, local=None, start=None, end=None,
copy_local=None, checkfunc=None, text=None, reget='simple',
cache=True, size=None, **kwargs):
"""retrieve file from the mirrorgroup for the repo
relative to local, optionally get range from
start to end, also optionally retrieve from a specific baseurl"""
# if local or relative is None: raise an exception b/c that shouldn't happen
# if url is not None - then do a grab from the complete url - not through
# the mirror, raise errors as need be
# if url is None do a grab via the mirror group/grab for the repo
# return the path to the local file
# if copylocal isn't specified pickup the repo-defined attr
if copy_local is None:
copy_local = self.copy_local
if local is None or relative is None:
raise Errors.RepoError, \
"get request for Repo %s, gave no source or dest" % self.ui_id
if self.cache == 1:
if os.path.exists(local): # FIXME - we should figure out a way
return local # to run the checkfunc from here
else: # ain't there - raise
raise Errors.RepoError, \
"Caching enabled but no local cache of %s from %s" % (local,
self.ui_id)
if url:
(scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
if self.mediaid and self.mediafunc:
discnum = 1
if url:
if scheme == "media" and fragid:
discnum = int(fragid)
try:
# FIXME: we need to figure out what really matters to
# pass to the media grabber function here
result = self.mediafunc(local = local, checkfunc = checkfunc, relative = relative, text = text, copy_local = copy_local, url = url, mediaid = self.mediaid, name = self.name, discnum = discnum, range = (start, end))
return result
except Errors.MediaError, e:
verbose_logger.log(logginglevels.DEBUG_2, "Error getting package from media; falling back to url %s" %(e,))
if size and (copy_local or not self._all_urls_are_files(url)):
dirstat = os.statvfs(os.path.dirname(local))
avail = dirstat.f_bavail * dirstat.f_bsize
if avail < long(size):
raise Errors.RepoError, _('''\
Insufficient space in download directory %s
* free %s
* needed %s'''
) % (os.path.dirname(local), format_number(avail), format_number(long(size)))
if url and scheme != "media":
ugopts = self._default_grabopts(cache=cache)
ug = URLGrabber(progress_obj = self.callback,
copy_local = copy_local,
reget = reget,
failure_callback = self.failure_obj,
interrupt_callback=self.interrupt_callback,
checkfunc=checkfunc,
size=size,
retry_no_cache=self._retry_no_cache,
**ugopts)
remote = url + '/' + relative
try:
result = ug.urlgrab(misc.to_utf8(remote), local,
text=misc.to_utf8(text),
range=(start, end),
)
except URLGrabError, e:
self._del_dl_file(local, size)
errstr = "failed to retrieve %s from %s\nerror was %s" % (relative, self, e)
e = Errors.RepoError(errstr)
e.repo = self
raise e
else:
headers = tuple(self.__headersListFromDict(cache=cache))
try:
result = self.grab.urlgrab(misc.to_utf8(relative), local,
text = misc.to_utf8(text),
range = (start, end),
copy_local=copy_local,
reget = reget,
checkfunc=checkfunc,
http_headers=headers,
size=size,
retry_no_cache=self._retry_no_cache,
**kwargs
)
except URLGrabError, e:
self._del_dl_file(local, size)
errstr = "failure: %s from %s: %s" % (relative, self, e)
errors = getattr(e, 'errors', None)
e = Errors.NoMoreMirrorsRepoError(errstr, errors)
e.repo = self
raise e
return result
__get = _getFile
def getPackage(self, package, checkfunc=None, text=None, cache=True, **kwargs):
remote = package.relativepath
local = package.localPkg()
basepath = package.basepath
if self._preload_pkg_from_system_cache(package):
if package.verifyLocalPkg():
return local
misc.unlink_f(local)
if checkfunc is None:
def checkfunc(obj):
if not package.verifyLocalPkg():
misc.unlink_f(local)
raise URLGrabError(-1, _('Package does not match intended download.'))
# We would normally pass this to _getFile() directly but that could
# break backward compatibility with plugins that override _getFile()
# (BZ 1360532).
self._retry_no_cache = self.http_caching == 'lazy:packages'
try:
ret = self._getFile(url=basepath,
relative=remote,
local=local,
checkfunc=checkfunc,
text=text,
cache=cache,
size=package.size,
**kwargs
)
finally:
self._retry_no_cache = False
if not kwargs.get('async') and not package.verifyLocalPkg():
# Don't return as "success" when bad.
msg = "Downloaded package %s, from %s, but it was invalid."
msg = msg % (package, package.repo.id)
raise Errors.RepoError, msg
return ret
def getHeader(self, package, checkfunc = None, reget = 'simple',
cache = True):
remote = package.relativepath
local = package.localHdr()
start = package.hdrstart
end = package.hdrend
size = end-start
basepath = package.basepath
# yes, I know, don't ask
if not os.path.exists(self.hdrdir):
os.makedirs(self.hdrdir)
return self._getFile(url=basepath, relative=remote, local=local, start=start,
reget=None, end=end, checkfunc=checkfunc, copy_local=1,
cache=cache, size=size,
)
def metadataCurrent(self):
"""Check if there is a metadata_cookie and check its age. If the
age of the cookie is less than metadata_expire time then return true
else return False. This result is cached, so that metalink/repomd.xml
are synchronized."""
if self._metadataCurrent is not None:
return self._metadataCurrent
mC_def = self.withinCacheAge(self.metadata_cookie, self.metadata_expire)
if not mC_def: # Normal path...
return mC_def
# Edge cases, both repomd.xml and metalink (if used). Must exist.
repomdfn = self.cachedir + '/' + 'repomd.xml'
if not os.path.exists(repomdfn):
return False
self._hack_mirrorlist_for_anaconda()
mlfn = self.cachedir + '/' + 'metalink.xml'
if self.metalink and not os.path.exists(mlfn):
return False
self._metadataCurrent = True
return True
# The metalink _shouldn't_ be newer than the repomd.xml or the checksums
# will be off, but we only really care when we are downloading the
# repomd.xml ... so keep it in mind that they can be off on disk.
# Also see _getMetalink()
def _metalinkCurrent(self):
if self._metadataCurrent is not None:
return self._metadataCurrent
if self.cache and not os.path.exists(self.metalink_filename):
raise Errors.RepoError, 'Cannot find metalink.xml file for %s' %self
if self.cache:
self._metadataCurrent = True
elif not os.path.exists(self.metalink_filename):
self._metadataCurrent = False
elif self.withinCacheAge(self.metadata_cookie, self.metadata_expire):
self._metadataCurrent = True
else:
self._metadataCurrent = False
return self._metadataCurrent
def _matchExpireFilter(self):
"""Return whether cache_req matches metadata_expire_filter."""
# Never/write means we just skip this...
if (hasattr(self, '_metadata_cache_req') and
self._metadata_cache_req.startswith("read-only:") and
self.metadata_expire_filter.startswith("read-only:")):
cache_filt = self.metadata_expire_filter[len("read-only:"):]
cache_req = self._metadata_cache_req[len("read-only:"):]
if cache_filt == 'future':
assert cache_req in ('past', 'present', 'future')
return True
if cache_filt == 'present':
if cache_req in ('past', 'present'):
return True
if cache_filt == 'past':
if cache_req == 'past':
return True
return False
def withinCacheAge(self, myfile, expiration_time, expire_req_filter=True):
"""check if any file is older than a certain amount of time. Used for
the cachecookie and the mirrorlist
return True if w/i the expiration time limit
false if the time limit has expired
Additionally compare the file to age of the newest .repo or yum.conf
file. If any of them are newer then invalidate the cache
"""
if expire_req_filter and self._matchExpireFilter():
expiration_time = -1
# -1 is special and should never get refreshed
if expiration_time == -1 and os.path.exists(myfile):
return True
val = False
if os.path.exists(myfile):
cookie_info = os.stat(myfile)
if cookie_info[8] + expiration_time > time.time():
val = True
# WE ARE FROM THE FUTURE!!!!
elif cookie_info[8] > time.time():
val = False
if not self.check_config_file_age:
return val
# make sure none of our config files for this repo are newer than
# us
if cookie_info[8] < int(self.repo_config_age):
val = False
return val
def setMetadataCookie(self):
"""if possible, set touch the metadata_cookie file"""
check = self.metadata_cookie
if not os.path.exists(self.metadata_cookie):
check = self.cachedir
if os.access(check, os.W_OK):
fo = open(self.metadata_cookie, 'w+')
fo.close()
del fo
def setup(self, cache, mediafunc = None, gpg_import_func=None, confirm_func=None, gpgca_import_func=None):
try:
self.cache = cache
self.mediafunc = mediafunc
self.gpg_import_func = gpg_import_func
self.gpgca_import_func = gpgca_import_func
self.confirm_func = confirm_func
except Errors.RepoError, e:
raise
if not self.mediafunc and self.mediaid and not self.mirrorlist and not self.baseurl:
verbose_logger.log(logginglevels.DEBUG_2, "Disabling media repo for non-media-aware frontend")
self.enabled = False
self.skip_if_unavailable = True
def _cachingRepoXML(self, local):
""" Should we cache the current repomd.xml """
if self.cache and not os.path.exists(local):
raise Errors.RepoError, 'Cannot find repomd.xml file for %s' % self.ui_id
if self.cache or self.metadataCurrent():
return True
return False
def _getFileRepoXML(self, local, text=None, grab_can_fail=None):
""" Call _getFile() for the repomd.xml file. """
checkfunc = (self._checkRepoXML, (), {})
if grab_can_fail is None:
grab_can_fail = 'old_repo_XML' in self._oldRepoMDData
tfname = ''
try:
# This is named so that "yum clean metadata" picks it up
tfname = tempfile.mktemp(prefix='repomd', suffix="tmp.xml",
dir=os.path.dirname(local))
result = self._getFile(relative=self.repoMDFile,
local=tfname,
copy_local=1,
text=text,
reget=None,
checkfunc=checkfunc,
cache=self.http_caching == 'all',
size=102400) # setting max size as 100K
except URLGrabError, e:
misc.unlink_f(tfname)
if grab_can_fail:
return None
raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e)
except Errors.RepoError:
misc.unlink_f(tfname)
if grab_can_fail:
return None
raise
# This should always work...
try:
os.rename(result, local)
except:
# But in case it doesn't...
misc.unlink_f(tfname)
if grab_can_fail:
return None
raise Errors.RepoError, 'Error renaming file %s to %s' % (result,
local)
return local
def _parseRepoXML(self, local, parse_can_fail=None):
""" Parse the repomd.xml file. """
try:
return repoMDObject.RepoMD(self.id, local)
except Errors.RepoMDError, e:
if parse_can_fail is None:
parse_can_fail = 'old_repo_XML' in self._oldRepoMDData
if parse_can_fail:
return None
raise Errors.RepoError, 'Error importing repomd.xml from %s: %s' % (self.ui_id, e)
def _saveOldRepoXML(self, local):
""" If we have an older repomd.xml file available, save it out. """
# Cleanup old trash...
for fname in glob.glob(self.cachedir + "/*.old.tmp"):
misc.unlink_f(fname)
if os.path.exists(local):
old_local = local + '.old.tmp' # locked, so this is ok
shutil.copy2(local, old_local)
xml = self._parseRepoXML(old_local, True)
if xml is None:
return None
self._oldRepoMDData = {'old_repo_XML' : xml, 'local' : local,
'old_local' : old_local, 'new_MD_files' : []}
return xml
return None
def _revertOldRepoXML(self):
""" If we have older data available, revert to it. """
# If we can't do a timestamp check, then we can be looking at a
# completely different repo. from last time ... ergo. we can't revert.
# We still want the old data, so we don't download twice. So we
# pretend everything is good until the revert.
if not self.timestamp_check:
raise Errors.RepoError, "Can't download or revert repomd.xml for %s" % self.ui_id
if 'old_repo_XML' not in self._oldRepoMDData:
self._oldRepoMDData = {}
return
# Unique names mean the rename doesn't work anymore.
for fname in self._oldRepoMDData['new_MD_files']:
misc.unlink_f(fname)
old_data = self._oldRepoMDData
self._oldRepoMDData = {}
if 'old_local' in old_data:
os.rename(old_data['old_local'], old_data['local'])
self._repoXML = old_data['old_repo_XML']
if 'old_MD_files' not in old_data:
return
for revert in old_data['old_MD_files']:
os.rename(revert + '.old.tmp', revert)
def _doneOldRepoXML(self):
""" Done with old data, delete it. """
old_data = self._oldRepoMDData
self._oldRepoMDData = {}
if 'old_local' in old_data:
misc.unlink_f(old_data['old_local'])
if 'old_MD_files' not in old_data:
return
for revert in old_data['old_MD_files']:
misc.unlink_f(revert + '.old.tmp')
def _get_mdtype_data(self, mdtype, repoXML=None):
if repoXML is None:
repoXML = self.repoXML
if mdtype == 'group' and 'group_gz' in repoXML.fileTypes():
mdtype = 'group_gz'
if (mdtype in ['other', 'filelists', 'primary'] and
self._check_db_version(mdtype + '_db', repoXML=repoXML)):
mdtype += '_db'
return (mdtype, repoXML.repoData.get(mdtype))
def _get_mdtype_fname(self, data, compressed=False):
(r_base, remote) = data.location
local = self.cachedir + '/' + os.path.basename(remote)
if compressed: # DB file, we need the uncompressed version
local = misc.decompress(local, fn_only=True)
return local
def _groupCheckDataMDNewer(self):
""" We check the timestamps, if any of the timestamps for the
"new" data is older than what we have ... we revert. """
if 'old_repo_XML' not in self._oldRepoMDData:
return True
old_repo_XML = self._oldRepoMDData['old_repo_XML']
if (self.timestamp_check and
old_repo_XML.timestamp > self.repoXML.timestamp):
logger.warning("Not using downloaded %s/repomd.xml because it is "
"older than what we have:\n"
" Current : %s\n Downloaded: %s" %
(self.id,
time.ctime(old_repo_XML.timestamp),
time.ctime(self.repoXML.timestamp)))
return False
return True
@staticmethod
def _checkRepoXMLMetalink(repoXML, repomd):
""" Check parsed repomd.xml against metalink.repomd data. """
if repoXML.timestamp != repomd.timestamp:
return False
if repoXML.length != repomd.size:
return False
done = False
for checksum in repoXML.checksums:
if checksum not in repomd.chksums:
continue
if repoXML.checksums[checksum] != repomd.chksums[checksum]:
return False
# All checksums should be trusted, but if we have more than one
# then we might as well check them all ... paranoia is good.
done = True
return done
def _checkRepoMetalink(self, repoXML=None, metalink_data=None):
""" Check the repomd.xml against the metalink data, if we have it. """
if repoXML is None:
repoXML = self._repoXML
if metalink_data is None:
metalink_data = self.metalink_data
if self._checkRepoXMLMetalink(repoXML, metalink_data.repomd):
return True
# FIXME: We probably want to skip to the first mirror which has the
# latest repomd.xml, but say "if we can't find one, use the newest old
# repomd.xml" ... alas. that's not so easy to do in urlgrabber atm.
for repomd in self.metalink_data.old_repomds:
if self._checkRepoXMLMetalink(repoXML, repomd):
verbose_logger.log(logginglevels.DEBUG_2,
"Using older repomd.xml\n"
" Latest: %s\n"
" Using: %s" %
(time.ctime(metalink_data.repomd.timestamp),
time.ctime(repomd.timestamp)))
return True
return False
def _latestRepoXML(self, local):
""" Save the Old Repo XML, and if it exists check to see if it's the
latest available given the metalink data. """
oxml = self._saveOldRepoXML(local)
if not oxml: # No old repomd.xml data
return False
self._hack_mirrorlist_for_anaconda()
if not self.metalink: # Nothing to check it against
return False
# Get the latest metalink, and the latest repomd data from it
repomd = self.metalink_data.repomd
if self.timestamp_check and oxml.timestamp > repomd.timestamp:
# We have something "newer" than the latest, and have timestamp
# checking which will kill anything passing the metalink check.
return True
# Do we have the latest repomd already
return self._checkRepoXMLMetalink(oxml, repomd)
def _commonLoadRepoXML(self, text, mdtypes=None):
""" Common LoadRepoXML for instant and group, returns False if you
should just return. """
local = self.cachedir + '/repomd.xml'
if self._repoXML is not None:
return False
if self._cachingRepoXML(local):
caching = True
result = local
else:
caching = False
if self._latestRepoXML(local):
result = local
old_data = self._oldRepoMDData
self._repoXML = old_data['old_repo_XML']
else:
result = self._getFileRepoXML(local, text)
if result is None:
if (self.skip_if_unavailable and hasattr(self, '_metadata_cache_req')
and self._metadata_cache_req in ('write', 'read-only:future')):
# Since skip_if_unavailable=True, we can just disable this repo
raise Errors.RepoError, "Can't download repomd.xml for %s" % self.ui_id
# Ignore this as we have a copy
self._revertOldRepoXML()
return False
# if we have a 'fresh' repomd.xml then update the cookie
self.setMetadataCookie()
if self._repoXML is None:
self._repoXML = self._parseRepoXML(result)
if self._repoXML is None:
self._revertOldRepoXML()
return False
if caching:
return False # Skip any work.
if not self._groupCheckDataMDNewer():
self._revertOldRepoXML()
return False
return True
def _check_db_version(self, mdtype, repoXML=None):
if self.mddownloadpolicy == 'xml':
return False
if repoXML is None:
repoXML = self.repoXML
if mdtype in repoXML.repoData:
if DBVERSION == repoXML.repoData[mdtype].dbversion:
return True
return False
# mmdtype is unused, but in theory was == primary
# dbmtype == primary_db etc.
def _groupCheckDataMDValid(self, data, dbmdtype, mmdtype, file_check=False):
""" Check that we already have this data, and that it's valid. Given
the DB mdtype and the main mdtype (no _db suffix). """
if data is None:
return None
if not file_check:
compressed = False
local = self._get_mdtype_fname(data)
else:
compressed = False
local = self._get_mdtype_fname(data)
if not os.path.exists(local):
local = misc.decompress(local, fn_only=True)
compressed = True
# If we can, make a copy of the system-wide-cache version of this file,
# note that we often don't get here. So we also do this in
# YumPackageSack.populate ... and we look for the uncompressed versions
# in retrieveMD.
self._preload_md_from_system_cache(os.path.basename(local))
if not self._checkMD(local, dbmdtype, openchecksum=compressed,
data=data, check_can_fail=True):
return None
return local
def _commonRetrieveDataMD(self, mdtypes=None):
""" Retrieve any listed mdtypes, and revert if there was a failure.
Also put any of the non-valid mdtype files from the old_repo_XML
into the delete list, this means metadata can change filename
without us leaking it. """
downloading = self._commonRetrieveDataMD_list(mdtypes)
for (ndata, nmdtype) in downloading:
if not self._retrieveMD(nmdtype, retrieve_can_fail=True):
self._revertOldRepoXML()
return False
self._commonRetrieveDataMD_done(downloading)
return True
def _commonRetrieveDataMD_list(self, mdtypes):
""" Return a list of metadata to be retrieved """
def _mdtype_eq(omdtype, odata, nmdtype, ndata):
""" Check if two returns from _get_mdtype_data() are equal. """
if ndata is None:
return False
if omdtype != nmdtype:
return False
if odata.checksum != ndata.checksum:
return False
# If we turn --unique-md-filenames on without chaning the data,
# then we'll get different filenames, but the same checksum.
# Atm. just say they are different, to make sure we delete the
# old files.
orname = os.path.basename(odata.location[1])
nrname = os.path.basename(ndata.location[1])
if orname != nrname:
return False
return True
all_mdtypes = self.retrieved.keys()
# Add in any extra stuff we don't know about.
for mdtype in self.repoXML.fileTypes():
if mdtype in all_mdtypes:
continue
if mdtype in ('primary_db', 'filelists_db', 'other_db', 'group_gz'):
continue
all_mdtypes.append(mdtype)
if mdtypes is None:
mdtypes = all_mdtypes
reverts = []
if 'old_repo_XML' not in self._oldRepoMDData:
old_repo_XML = None
else:
old_repo_XML = self._oldRepoMDData['old_repo_XML']
self._oldRepoMDData['old_MD_files'] = reverts
# Inited twice atm. ... sue me
newmdfiles = self._oldRepoMDData['new_MD_files'] = []
downloading = []
for mdtype in all_mdtypes:
(nmdtype, ndata) = self._get_mdtype_data(mdtype)
if old_repo_XML:
(omdtype, odata) = self._get_mdtype_data(mdtype,
repoXML=old_repo_XML)
local = self._groupCheckDataMDValid(odata, omdtype,mdtype,True)
if local:
if _mdtype_eq(omdtype, odata, nmdtype, ndata):
continue # If they are the same do nothing
# Move this version, we _may_ get a new one.
# We delete it on success, revert it back on failure.
# We don't copy as we know it's bad due to above test.
os.rename(local, local + '.old.tmp')
reverts.append(local)
# This is the super easy way. We just to see if a generated
# file is there for all files, but it should always work.
# And anyone who is giving us MD with blah and blah.sqlite
# which are different types, can play a game I like to call
# "come here, ouch".
gen_local = local + '.sqlite'
if os.path.exists(gen_local):
os.rename(gen_local, gen_local + '.old.tmp')
reverts.append(gen_local)
if ndata is None: # Doesn't exist in this repo
continue
if mdtype not in mdtypes:
continue
# No old repomd data, but we might still have uncompressed MD
if self._groupCheckDataMDValid(ndata, nmdtype, mdtype):
continue
downloading.append((ndata, nmdtype))
newmdfiles.append(self._get_mdtype_fname(ndata, False))
return downloading
def _commonRetrieveDataMD_done(self, downloading):
""" Uncompress the downloaded metadata """
for (ndata, nmdtype) in downloading:
local = self._get_mdtype_fname(ndata, False)
self._doneOldRepoXML()
def _groupLoadRepoXML(self, text=None, mdtypes=None):
""" Retrieve the new repomd.xml from the repository, then check it
and parse it. If it fails we revert to the old version and pretend
that is fine. If the new repomd.xml requires new version of files
that we have, like updateinfo.xml, we download those too and if any
of those fail, we again revert everything and pretend old data is
good. """
if self._commonLoadRepoXML(text):
self._commonRetrieveDataMD(mdtypes)
def _mdpolicy2mdtypes(self):
md_groups = {'instant' : ['__None__'],
'group:primary' : ['primary'],
'group:small' : ["primary", "updateinfo", "group", "pkgtags"],
'group:main' : ["primary", "updateinfo", "group", "pkgtags",
"filelists", "prestodelta"]}
mdtypes = set()
if type(self.mdpolicy) in types.StringTypes:
mdtypes.update(md_groups.get(self.mdpolicy, [self.mdpolicy]))
else:
for mdpolicy in self.mdpolicy:
mdtypes.update(md_groups.get(mdpolicy, [mdpolicy]))
if not mdtypes or 'group:all' in mdtypes:
mdtypes = None
else:
mdtypes.discard("__None__")
mdtypes = sorted(list(mdtypes))
return mdtypes
def _loadRepoXML(self, text=None):
"""retrieve/check/read in repomd.xml from the repository"""
try:
return self._groupLoadRepoXML(text, self._mdpolicy2mdtypes())
except KeyboardInterrupt:
self._revertOldRepoXML() # Undo metadata cookie?
raise
raise Errors.RepoError, 'Bad loadRepoXML policy (for %s): %s' % (self.ui_id, self.mdpolicy)
def _getRepoXML(self):
if self._repoXML:
return self._repoXML
self._loadRepoXML(text=self.ui_id)
return self._repoXML
repoXML = property(fget=lambda self: self._getRepoXML(),
fset=lambda self, val: setattr(self, "_repoXML", val),
fdel=lambda self: setattr(self, "_repoXML", None))
def _checkRepoXML(self, fo):
if type(fo) is types.InstanceType:
filepath = fo.filename
else:
filepath = fo
if self.repo_gpgcheck and not self._override_sigchecks:
if misc.gpgme is None:
raise URLGrabError(-1, 'pygpgme is not working so repomd.xml can not be verified for %s' % (self))
sigfile = self.cachedir + '/repomd.xml.asc'
try:
result = self._getFile(relative='repodata/repomd.xml.asc',
copy_local=1,
local = sigfile,
text='%s/signature' % self.ui_id,
reget=None,
checkfunc=None,
cache=self.http_caching == 'all',
size=102400)
except URLGrabError, e:
raise URLGrabError(-1, 'Error finding signature for repomd.xml for %s: %s' % (self, e))
valid = misc.valid_detached_sig(result, filepath, self.gpgdir)
if not valid and self.gpg_import_func:
try:
self.gpg_import_func(self, self.confirm_func)
except Errors.YumBaseError, e:
raise URLGrabError(-1, 'Gpg Keys not imported, cannot verify repomd.xml for repo %s' % (self))
valid = misc.valid_detached_sig(result, filepath, self.gpgdir)
if not valid:
raise URLGrabError(-1, 'repomd.xml signature could not be verified for %s' % (self))
try:
repoXML = repoMDObject.RepoMD(self.id, filepath)
except Errors.RepoMDError, e:
raise URLGrabError(-1, 'Error importing repomd.xml for %s: %s' % (self, e))
self._hack_mirrorlist_for_anaconda()
if self.metalink and not self._checkRepoMetalink(repoXML):
raise URLGrabError(-1, 'repomd.xml does not match metalink for %s' %
self)
def _del_dl_file(self, local, size):
""" Delete a downloaded file if it's the correct size. """
sd = misc.stat_f(local)
if not sd: # File doesn't exist...
return
if size and sd.st_size < size:
return # Still more to get...
# Is the correct size, or too big ... delete it so we'll try again.
misc.unlink_f(local)
def checkMD(self, fn, mdtype, openchecksum=False):
"""check the metadata type against its checksum"""
return self._checkMD(fn, mdtype, openchecksum)
def _checkMD(self, fn, mdtype, openchecksum=False,
data=None, check_can_fail=False):
""" Internal function, use .checkMD() from outside yum. """
thisdata = data # So the argument name is nicer
if thisdata is None:
thisdata = self.repoXML.getData(mdtype)
# Note openchecksum means do it after you've uncompressed the data.
if openchecksum:
(r_ctype, r_csum) = thisdata.openchecksum # get the remote checksum
size = thisdata.opensize
else:
(r_ctype, r_csum) = thisdata.checksum # get the remote checksum
size = thisdata.size
if type(fn) == types.InstanceType: # this is an urlgrabber check
file = fn.filename
else:
file = fn
if size is not None:
size = int(size)
l_csum = _xattr_get_chksum(file, r_ctype)
if l_csum:
fsize = misc.stat_f(file)
if fsize is not None: # We just got an xattr, so it should be there
if size is None and l_csum == r_csum and fsize.st_size > 0:
return 1
if size == fsize.st_size and l_csum == r_csum:
return 1
# Anything goes wrong, run the checksums as normal...
try: # get the local checksum
l_csum = self._checksum(r_ctype, file, datasize=size)
except Errors.RepoError, e:
if check_can_fail:
return None
raise URLGrabError(-3, 'Error performing checksum: %s' % e)
if l_csum == r_csum:
_xattr_set_chksum(file, r_ctype, l_csum)
return 1
else:
if check_can_fail:
return None
raise URLGrabError(-1, 'Metadata file does not match checksum')
def retrieveMD(self, mdtype):
"""base function to retrieve metadata files from the remote url
returns the path to the local metadata file of a 'mdtype'
mdtype can be 'primary', 'filelists', 'other' or 'group'."""
return self._retrieveMD(mdtype)
def _retrieveMD(self, mdtype, retrieve_can_fail=False, **kwargs):
""" Internal function, use .retrieveMD() from outside yum. """
# Note that this can raise Errors.RepoMDError if mdtype doesn't exist
# for this repo.
# FIXME - maybe retrieveMD should call decompress() after we've checked
# the checksum by default? since we're never acting on compressed MD
thisdata = self.repoXML.getData(mdtype)
(r_base, remote) = thisdata.location
fname = os.path.basename(remote)
local = self.cachedir + '/' + fname
if self.retrieved.get(mdtype):
# got it, move along
return local
# Having preloaded the repomd means we should first try preloading this
# file as well (forcing it this way is only needed when dealing with
# simple filenames).
if self._preloaded_repomd:
misc.unlink_f(local)
if (os.path.exists(local) or
self._preload_md_from_system_cache(os.path.basename(local))):
if self._checkMD(local, mdtype, check_can_fail=True):
self.retrieved[mdtype] = 1
return local # it's the same return the local one
if self.cache == 1:
if retrieve_can_fail:
return None
if os.path.exists(local):
msg = "Caching enabled and local cache: %s does not match checksum" % local
else:
msg = "Caching enabled but no local cache of %s from %s" % (local, self.ui_id)
raise Errors.RepoError, msg
# Given the file already exists, is it a partial download of thisdata
# that we can try to reget? With unique filenames, that's always.
# With simple filenames, use the old expected checksum to verify
# (assuming the existing file or part represents the old data but it
# usually does).
partial = True
orepomd = self._oldRepoMDData.get('old_repo_XML')
if orepomd is not None:
odata = orepomd.repoData.get(mdtype)
if odata is not None:
ofname = os.path.basename(odata.location[1])
partial = (fname != ofname or
thisdata.checksum == odata.checksum)
try:
def checkfunc(obj):
try:
self.checkMD(obj, mdtype)
except URLGrabError:
# Don't share MD among mirrors, in theory we could use:
# self._del_dl_file(local, int(thisdata.size))
# ...but this is safer.
misc.unlink_f(obj.filename)
raise
self.retrieved[mdtype] = 1
text = "%s/%s" % (self.ui_id, mdtype)
if thisdata.size is None or not partial:
reget = None
else:
reget = 'simple'
self._del_dl_file(local, int(thisdata.size))
local = self._getFile(relative=remote,
local=local,
copy_local=1,
reget=reget,
checkfunc=checkfunc,
text=text,
cache=self.http_caching == 'all',
size=thisdata.size,
**kwargs)
except Errors.RepoError:
if retrieve_can_fail:
return None
raise
except URLGrabError, e:
if retrieve_can_fail:
return None
raise Errors.RepoError, \
"Could not retrieve %s matching remote checksum from %s" % (local, self.ui_id)
else:
return local
def getPrimaryXML(self):
"""this gets you the path to the primary.xml file, retrieving it if we
need a new one"""
return self.retrieveMD('primary')
def getFileListsXML(self):
"""this gets you the path to the filelists.xml file, retrieving it if we
need a new one"""
return self.retrieveMD('filelists')
def getOtherXML(self):
return self.retrieveMD('other')
def getGroups(self):
"""gets groups and returns group file path for the repository, if there
is none or retrieve/decompress fails, it returns None"""
if 'group_gz' in self.repoXML.fileTypes():
fn = self._retrieveMD('group_gz', retrieve_can_fail=True)
if fn:
try:
fn = misc.repo_gen_decompress(fn, 'comps.xml', cached=self.cache)
except IOError, e:
logger.warning(e)
fn = None
return fn
return self._retrieveMD('group', retrieve_can_fail=True)
def setCallback(self, callback, multi_callback=None):
self.callback = callback
self.multi_callback = multi_callback
self._callbacks_changed = True
def setFailureObj(self, failure_obj):
self.failure_obj = failure_obj
self._callbacks_changed = True
def setMirrorFailureObj(self, failure_obj):
self.mirror_failure_obj = failure_obj
self._callbacks_changed = True
def setInterruptCallback(self, callback):
self.interrupt_callback = callback
self._callbacks_changed = True
def _readMirrorList(self, fo, url=None):
""" read the mirror list from the specified file object """
returnlist = []
content = []
if fo is not None:
try:
content = fo.readlines()
except Exception, e:
if url is None: # Shouldn't happen
url = "<unknown>"
logger.error("Could not read mirrorlist %s, error was \n%s" % (url, e))
content = []
for line in content:
if not re.match('\w+://\S+\s*$', line):
continue
mirror = line.rstrip() # no more trailing \n's
mirror = mirror.replace('$ARCH', '$BASEARCH')
returnlist.append(mirror)
return (returnlist, content)
def _getMirrorList(self):
"""retrieve an up2date-style mirrorlist file from our mirrorlist url,
also save the file to the local repo dir and use that if cache expiry
not expired
we also s/$ARCH/$BASEARCH/ and move along
return the baseurls from the mirrorlist file
"""
self.mirrorlist_file = self.cachedir + '/' + 'mirrorlist.txt'
fo = None
cacheok = False
if self.withinCacheAge(self.mirrorlist_file, self.mirrorlist_expire,
expire_req_filter=False):
cacheok = True
fo = open(self.mirrorlist_file, 'r')
url = 'file://' + self.mirrorlist_file # just to keep self._readMirrorList(fo,url) happy
else:
url = self.mirrorlist
scheme = urlparse.urlparse(url)[0]
if scheme == '':
url = 'file://' + url
ugopts = self._default_grabopts()
try:
fo = urlgrabber.grabber.urlopen(url, **ugopts)
except URLGrabError, e:
logger.error("Could not retrieve mirrorlist %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1])))
fo = None
(returnlist, content) = self._readMirrorList(fo, url)
if returnlist:
if not self.cache and not cacheok:
output = open(self.mirrorlist_file, 'w')
for line in content:
output.write(line)
output.close()
elif not cacheok and os.path.exists(self.mirrorlist_file):
# New mirror file failed, so use the old one (better than nothing)
os.utime(self.mirrorlist_file, None)
return self._readMirrorList(open(self.mirrorlist_file, 'r'))[0]
return returnlist
def _preload_file(self, fn, destfn):
"""attempts to copy the file, if possible"""
# don't copy it if the copy in our users dir is newer or equal
if not os.path.exists(fn):
return False
if os.path.exists(destfn):
if os.stat(fn)[stat.ST_CTIME] <= os.stat(destfn)[stat.ST_CTIME]:
return False
try:
# IOError is the main culprit, with mode=600. But ignore everything.
shutil.copy2(fn, destfn)
except:
return False
return True
def _preload_file_from_system_cache(self, filename, subdir='',
destfn=None):
"""attempts to copy the file from the system-wide cache,
if possible"""
if not hasattr(self, 'old_base_cache_dir'):
return False
if self.old_base_cache_dir == "":
return False
glob_repo_cache_dir=os.path.join(self.old_base_cache_dir, self.id)
if not os.path.exists(glob_repo_cache_dir):
return False
if os.path.normpath(glob_repo_cache_dir) == os.path.normpath(self.cachedir):
return False
# Try to copy whatever file it is
fn = glob_repo_cache_dir + '/' + subdir + os.path.basename(filename)
if destfn is None:
destfn = self.cachedir + '/' + subdir + os.path.basename(filename)
return self._preload_file(fn, destfn)
def _preload_md_from_system_cache(self, filename):
"""attempts to copy the metadata file from the system-wide cache,
if possible"""
return self._preload_file_from_system_cache(filename)
def _preload_pkg_from_system_cache(self, pkg):
"""attempts to copy the package from the system-wide cache,
if possible"""
pname = os.path.basename(pkg.localPkg())
destfn = os.path.join(self.pkgdir, pname)
if self._preload_file_from_system_cache(pkg.localPkg(),
subdir='packages/',
destfn=destfn):
return True
if not hasattr(self, '_old_pkgdirs'):
return False
for opkgdir in self._old_pkgdirs:
if self._preload_file(os.path.join(opkgdir, pname), destfn):
return True
return False
def _verify_md(self):
problems = []
print 'verifying md'
try:
md_types = self.repoXML.fileTypes()
except Errors.RepoError, e:
prb = RepoVerifyProblem(1, "failed to load repomd.xml", str(e))
problems.append(prb)
return problems
for md_type in md_types:
print 'verifying %s' % md_type
try:
self.retrieveMD(md_type)
except Errors.RepoError, e:
msg = "%s metadata missing or does not match checksum" % md_type
prb = RepoVerifyProblem(2, msg, str(e))
problems.append(prb)
return problems
def _verify_comps(self):
print 'verifying comps'
problems = []
# grab the comps for this repo
# run the xmllint on it
# chuck it into a comps object
# make sure it parses
grpfile = self.getGroups()
# open it up as a file object so iterparse can cope with our compressed file
if grpfile is not None:
grpfile = misc.decompress(grpfile)
try:
c = comps.Comps()
c.add(grpfile)
except (Errors.GroupsError, Errors.CompsException), e:
msg = "comps file failed to add"
prb = RepoVerifyProblem(REPO_PROBLEM_COMPS, msg, str(e))
problems.add(prb)
else:
if c.compscount == 0:
msg = "no groups in comps"
prb = RepoVerifyProblem(REPO_PROBLEM_COMPS, msg, "")
problems.add(prb)
return problems
def _verify_packages(self):
return []
def verify(self, items=['repodata', 'comps']):
"""download/verify the specified items
@items = ['repodata', 'comps'] can include: repodata, comps, packages
"""
problems = []
if 'repodata' in items:
problems.extend(self._verify_md())
if 'comps' in items:
if self.enablegroups:
problems.extend(self._verify_comps())
if 'packages' in items:
problems.extend(self._verify_packages())
# what else can we verify?
return problems
def getMirrorList(mirrorlist, pdict = None):
warnings.warn('getMirrorList() will go away in a future version of Yum.\n',
Errors.YumFutureDeprecationWarning, stacklevel=2)
"""retrieve an up2date-style mirrorlist file from a url,
we also s/$ARCH/$BASEARCH/ and move along
returns a list of the urls from that file"""
returnlist = []
if hasattr(urlgrabber.grabber, 'urlopen'):
urlresolver = urlgrabber.grabber
else:
import urllib
urlresolver = urllib
scheme = urlparse.urlparse(mirrorlist)[0]
if scheme == '':
url = 'file://' + mirrorlist
else:
url = mirrorlist
try:
fo = urlresolver.urlopen(url, proxies=pdict)
except URLGrabError, e:
print "Could not retrieve mirrorlist %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1]))
fo = None
if fo is not None:
content = fo.readlines()
for line in content:
if re.match('\s*(#|$)', line):
continue
mirror = line.rstrip() # no more trailing \n's
mirror = mirror.replace('$ARCH', '$BASEARCH')
returnlist.append(mirror)
return returnlist
class RepoVerifyProblem:
""" Holder for each "problem" we find with a repo.verify(). """
def __init__(self, type, msg, details, fake=False):
self.type = type
self.message = msg
self.details = details
self.fake = fake