diff --git a/etc/crab-bash-completion.sh b/etc/crab-bash-completion.sh index 41650723..1f87a9ab 100644 --- a/etc/crab-bash-completion.sh +++ b/etc/crab-bash-completion.sh @@ -23,13 +23,13 @@ _UseCrab () "") case "$cur" in "") - COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) ) + COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) ) ;; -*) COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug' -- $cur) ) ;; *) - COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) ) + COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) ) ;; esac ;; @@ -284,9 +284,29 @@ _UseCrab () esac ;; + "setdatasetstatus") + case "$cur" in + -*) + COMPREPLY=( $(compgen -W '--help -h --status --dataset' -- $cur) ) + ;; + *) + COMPREPLY=( $(compgen -f $cur) ) + esac + ;; + + "setfilestatus") + case "$cur" in + -*) + COMPREPLY=( $(compgen -W '--help -h --status --dataset --files' -- $cur) ) + ;; + *) + COMPREPLY=( $(compgen -f $cur) ) + esac + ;; + *) - COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal' -- $cur) ) + COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal setdatasetstatus setfilestatus' -- $cur) ) ;; esac diff --git a/src/python/CRABClient/ClientMapping.py b/src/python/CRABClient/ClientMapping.py index c6fad36a..d503e96a 100644 --- a/src/python/CRABClient/ClientMapping.py +++ b/src/python/CRABClient/ClientMapping.py @@ -135,6 +135,8 @@ 'checkusername' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False}, 'checkwrite' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': True, 'requiresLocalCache': False}, 'checkdataset' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False}, + 'setdatasetstatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False}, + 'setfilestatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False}, 'getlog' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True }, 'getoutput' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': True, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True }, 'kill' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': True }, diff --git a/src/python/CRABClient/Commands/setdatasetstatus.py b/src/python/CRABClient/Commands/setdatasetstatus.py new file mode 100644 index 00000000..9184d6b9 --- /dev/null +++ b/src/python/CRABClient/Commands/setdatasetstatus.py @@ -0,0 +1,167 @@ +# pylint: disable=consider-using-f-string, unspecified-encoding +""" +allow users to (in)validate their own DBS USER datasets +""" + +import sys +import json + +from CRABClient.Commands.SubCommand import SubCommand +from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException +from CRABClient.ClientUtilities import colors +from CRABClient.CrabRestInterface import HTTPRequests + +if sys.version_info >= (3, 0): + from urllib.parse import urlencode # pylint: disable=E0611 +if sys.version_info < (3, 0): + from urllib import urlencode + +try: + from CRABClient import __version__ +except: # pylint: disable=bare-except + __version__ = '0.0.0' + + +def getDbsREST(instance=None, logger=None, cert=None, key=None, version=None): + """ + given a DBS istance (e.g. prod/phys03) returns a DBSReader and DBSWriter + client instances which communicate with DBS REST via curl + Arguments: + logger: a logger + cert, key : name of files, can use the path to X509_USER_PROXY for both + version: the CRAB Client version to put in the User Agent field of the query + """ + # if user supplied a simple prod/phys03 like instance, these two lines will do + # note that our HTTPRequests will add https:// + dbsReadUrl = "cmsweb.cern.ch:8443/dbs/" + instance + "/DBSReader/" + dbsWriteUrl = "cmsweb.cern.ch:8443/dbs/" + instance + "/DBSWriter/" + # a possible use case e.g. for testing is to use int instance of DBS. requires testbed CMSWEB + if instance.startswith('int'): + dbsReadUrl = dbsReadUrl.replace('cmsweb', 'cmsweb-testbed') + dbsWriteUrl = dbsWriteUrl.replace('cmsweb', 'cmsweb-testbed') + # if user knoww better and provided a full URL, we'll take and adapt + # to have both Reader and Writer, + if instance.startswith("https://"): + url = instance.lstrip("https://") # will be added back in HTTPRequests + if "DBSReader" in url: + dbsReadUrl = url + dbsWriteUrl = url.replace('DBSReader', 'DBSWriter') + elif 'DBSWriter' in url: + dbsWriteUrl = url + dbsReadUrl = url.replace('DBSWriter', 'DBSReader') + else: + raise ConfigurationException("bad instance value %s" % instance) + + logger.debug('Read Url = %s' % dbsReadUrl) + logger.debug('Write Url = %s' % dbsWriteUrl) + + dbsReader = HTTPRequests(hostname=dbsReadUrl, localcert=cert, localkey=key, + retry=2, logger=logger, verbose=False, contentType='application/json', + userAgent='CRABClient', version=version) + + dbsWriter = HTTPRequests(hostname=dbsWriteUrl, localcert=cert, localkey=key, + retry=2, logger=logger, verbose=False, contentType='application/json', + userAgent='CRABClient', version=version) + return dbsReader, dbsWriter + + +class setdatasetstatus(SubCommand): + """ + Set status of a USER dataet in phys03, + optionally invalidates/revalidates all files in it + meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py + and to work whenever CRAB is supported, i.e. with both python2 and python3 + """ + + name = 'setdatasetstatus' + + def __init__(self, logger, cmdargs=None): + SubCommand.__init__(self, logger, cmdargs) + + def __call__(self): + result = 'FAILED' # will change to 'SUCCESS' when all is OK + + instance = self.options.instance + dataset = self.options.dataset + status = self.options.status + recursive = self.options.recursive + self.logger.debug('instance = %s' % instance) + self.logger.debug('dataset = %s' % dataset) + self.logger.debug('status = %s' % status) + self.logger.debug('recursive = %s' % recursive) + + if recursive: + self.logger.warning("ATTENTION: recursive option is not implemented yet. Ignoring it") + + # from DBS instance, to DBS REST services + dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger, + cert=self.proxyfilename, key=self.proxyfilename, + version=__version__) + + self.logger.info("looking up Dataset %s in DBS %s" % (dataset, instance)) + datasetStatusQuery = {'dataset': dataset, 'dataset_access_type': '*', 'detail': True} + ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery)) + self.logger.debug('exitcode= %s', rc) + if not ds: + self.logger.error("ERROR: dataset %s not found in DBS" % dataset) + raise ConfigurationException + self.logger.info("Dataset status in DBS is %s" % ds[0]['dataset_access_type']) + self.logger.info("Will set it to %s" % status) + data = {'dataset': dataset, 'dataset_access_type': status} + jdata = json.dumps(data) + out, rc, msg = dbsWriter.put(uri='datasets', data=jdata) + if rc == 200 and msg == 'OK': + self.logger.info("Dataset status changed successfully") + result = 'SUCCESS' + else: + msg = "Dataset status change failed: %s" % out + raise CommandFailedException(msg) + + ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery)) + self.logger.debug('exitcode= %s', rc) + self.logger.info("Dataset status in DBS now is %s" % ds[0]['dataset_access_type']) + + return {'commandStatus': result} + + def setOptions(self): + """ + __setOptions__ + + This allows to set specific command options + """ + self.parser.add_option('--instance', dest='instance', default='prod/phys03', + help="DBS instance. e.g. prod/phys03 (default) or int/phys03. Use at your own risk." + \ + "Unless you really know what you are doing, stay with the default" + ) + self.parser.add_option('--dataset', dest='dataset', default=None, + help='dataset name') + self.parser.add_option('--status', dest='status', default=None, + help="New status of the dataset: VALID/INVALID/DELETED/DEPRECATED", + choices=['VALID', 'INVALID', 'DELETED', 'DEPRECATED'] + ) + self.parser.add_option('--recursive', dest='recursive', default=False, action="store_true", + help="Apply status to children datasets and sets all files status in those" + \ + "to VALID if status=VALID, INVALID otherwise" + ) + + def validateOptions(self): + SubCommand.validateOptions(self) + + if self.options.dataset is None: + msg = "%sError%s: Please specify the dataset to check." % (colors.RED, colors.NORMAL) + msg += " Use the --dataset option." + ex = MissingOptionException(msg) + ex.missingOption = "dataset" + raise ex + if self.options.status is None: + msg = "%sError%s: Please specify the new dataset status." % (colors.RED, colors.NORMAL) + msg += " Use the --status option." + ex = MissingOptionException(msg) + ex.missingOption = "status" + raise ex + # minimal sanity check + instance = self.options.instance + if not '/' in instance or len(instance.split('/'))>2 and not instance.startswith('https://'): + msg = "Bad instance value %s. " % instance + msg += "Use either server/db format or full URL" + raise ConfigurationException(msg) diff --git a/src/python/CRABClient/Commands/setfilestatus.py b/src/python/CRABClient/Commands/setfilestatus.py new file mode 100644 index 00000000..3d5405c9 --- /dev/null +++ b/src/python/CRABClient/Commands/setfilestatus.py @@ -0,0 +1,156 @@ +# pylint: disable=consider-using-f-string, unspecified-encoding +""" +allow users to (in)validate some files in their USER datasets in phys03 +""" + +import json + +from CRABClient.Commands.SubCommand import SubCommand +from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException +from CRABClient.ClientUtilities import colors +from CRABClient.Commands.setdatasetstatus import getDbsREST + +try: + from CRABClient import __version__ +except: # pylint: disable=bare-except + __version__ = '0.0.0' + + +class setfilestatus(SubCommand): + """ + Set status of a USER dataset in phys03, + optionally invalidates/revalidates all files in it + meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py + and to work whenever CRAB is supported, i.e. with both python2 and python3 + """ + + name = 'setfilestatus' + + def __init__(self, logger, cmdargs=None): + SubCommand.__init__(self, logger, cmdargs) + + def __call__(self): + + result = 'FAILED' # will change to 'SUCCESS' when all is OK + + # intitalize, and validate args + instance = self.options.instance + dataset = self.options.dataset + files = self.options.files + status = self.options.status + self.logger.debug('instance = %s' % instance) + self.logger.debug('dataset = %s' % dataset) + self.logger.debug('files = %s' % files) + self.logger.debug('status = %s' % status) + + statusToSet = 1 if status == 'VALID' else 0 + + filesToChange = None + if files: + # did the user specify the name of a file containing a list of LFN's ? + try: + with open(files, 'r') as f: + flist = [lfn.strip() for lfn in f] + filesToChange = ','.join(flist) + except IOError: + # no. Assume we have a comma separated list of LFN's (a single LFN is also OK) + filesToChange = files.strip(",").strip() + finally: + # files and dataset options are mutually exclusive + dataset = None + if ',' in filesToChange: + raise NotImplementedError('list of LFNs is not supported yet') + + # from DBS instance, to DBS REST services + dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger, + cert=self.proxyfilename, key=self.proxyfilename, + version=__version__) + # we will need the dataset name + if dataset: + datasetName = dataset + else: + # get it from DBS + lfn = filesToChange.split(',')[0] + query = {'logical_file_name': lfn} + out, rc, msg = dbsReader.get(uri='datasets', data=query) + if not out: + self.logger.error("ERROR: file %s not found in DBS" % lfn) + raise ConfigurationException + datasetName = out[0]['dataset'] + self.logger.info('LFN to be changed belongs to dataset %s' % datasetName) + + # when acting on a list of LFN's, can't print status of all files before/after + # best we can do is to print the number of valid/invalid file in the dataset + # before/after. + + self.logFilesTally(dataset=datasetName, dbs=dbsReader) + + if filesToChange: + data = {'logical_file_name': filesToChange, 'is_file_valid': statusToSet} + if dataset: + data = {'dataset': dataset, 'is_file_valid': statusToSet} + jdata = json.dumps(data) # PUT requires data in JSON format + out, rc, msg = dbsWriter.put(uri='files', data=jdata) + if rc == 200 and msg == 'OK': + self.logger.info("File(s) status changed successfully") + result = 'SUCCESS' + else: + msg = "File(s) status change failed: %s" % out + raise CommandFailedException(msg) + + self.logFilesTally(dataset=datasetName, dbs=dbsReader) + + return {'commandStatus': result} + + def logFilesTally(self, dataset=None, dbs=None): + """ prints total/valid/invalid files in dataset """ + query = {'dataset': dataset, 'validFileOnly': 1} + out, _, _ = dbs.get(uri='files', data=query) + valid = len(out) + query = {'dataset': dataset, 'validFileOnly': 0} + out, _, _ = dbs.get(uri='files', data=query) + total = len(out) + invalid = total - valid + self.logger.info("Dataset file count total/valid/invalid = %d/%d/%d" % (total, valid, invalid)) + + def setOptions(self): + """ + __setOptions__ + + This allows to set specific command options + """ + self.parser.add_option('-i', '--instance', dest='instance', default='prod/phys03', + help='DBS instance. e.g. prod/phys03 (default) or int/phys03' + ) + self.parser.add_option('-d', '--dataset', dest='dataset', default=None, + help='Will apply status to all files in this dataset.' + \ + ' Use either --files or--dataset', + metavar='') + self.parser.add_option('-s', '--status', dest='status', default=None, + help='New status of the file(s): VALID/INVALID', + choices=['VALID', 'INVALID'] + ) + self.parser.add_option('-f', '--files', dest='files', default=None, + help='List of files to be validated/invalidated.' + \ + ' Can be either a simple LFN or a file containg LFNs or' + \ + ' a comma separated list of LFNs. Use either --files or --dataset', + metavar="") + + def validateOptions(self): + SubCommand.validateOptions(self) + + if not self.options.files and not self.options.dataset: + msg = "%sError%s: Please specify the files to change." % (colors.RED, colors.NORMAL) + msg += " Use either the --files or the --dataset option." + ex = MissingOptionException(msg) + ex.missingOption = "files" + raise ex + if self.options.files and self.options.dataset: + msg = "%sError%s: You can not use both --files and --dataset at same time" % (colors.RED, colors.NORMAL) + raise ConfigurationException(msg) + if self.options.status is None: + msg = "%sError%s: Please specify the new file(s) status." % (colors.RED, colors.NORMAL) + msg += " Use the --status option." + ex = MissingOptionException(msg) + ex.missingOption = "status" + raise ex diff --git a/src/python/CRABClient/CrabRestInterface.py b/src/python/CRABClient/CrabRestInterface.py index 545ce982..b1e04e59 100644 --- a/src/python/CRABClient/CrabRestInterface.py +++ b/src/python/CRABClient/CrabRestInterface.py @@ -1,3 +1,4 @@ +# pylint: disable=consider-using-f-string """ Handles client interactions with remote REST interface """ @@ -84,11 +85,11 @@ def parseResponseHeader(response): class HTTPRequests(dict): """ This code forks a subprocess which executes curl to communicate - with CRAB REST. + with CRAB or other REST servers which returns JSON """ - def __init__(self, hostname='localhost', localcert=None, localkey=None, version=__version__, - retry=0, logger=None, verbose=False, userAgent='CRAB?'): + def __init__(self, hostname='localhost', localcert=None, localkey=None, contentType=None, + retry=0, logger=None, version=__version__, verbose=False, userAgent='CRAB?'): """ Initialise an HTTP handler """ @@ -112,6 +113,7 @@ def __init__(self, hostname='localhost', localcert=None, localkey=None, version= self.setdefault("retry", retry) self.setdefault("verbose", verbose) self.setdefault("userAgent", userAgent) + self.setdefault("Content-type", contentType) self.logger = logger if logger else logging.getLogger() def get(self, uri=None, data=None): @@ -182,6 +184,8 @@ def makeRequest(self, uri=None, data=None, verb='GET'): command += '/cvmfs/cms.cern.ch/cmsmon/gocurl -verbose 2 -method {0}'.format(verb) command += ' -header "User-Agent: %s/%s"' % (self['userAgent'], self['version']) command += ' -header "Accept: */*"' + if self['Content-type']: + command += ' -header "Content-type: %s"' % self['Content-type'] if verb in ['POST', 'PUT']: command += ' -header "Content-Type: application/x-www-form-urlencoded"' command += ' -data "@%s"' % path @@ -193,6 +197,8 @@ def makeRequest(self, uri=None, data=None, verb='GET'): command += 'curl -v -X {0}'.format(verb) command += ' -H "User-Agent: %s/%s"' % (self['userAgent'], self['version']) command += ' -H "Accept: */*"' + if self['Content-type']: + command += ' -H "Content-type: %s"' % self['Content-type'] command += ' --data @%s' % path command += ' --cert "%s"' % self['cert'] command += ' --key "%s"' % self['key'] @@ -203,7 +209,8 @@ def makeRequest(self, uri=None, data=None, verb='GET'): # retries are counted AFTER 1st try, so call is made up to nRetries+1 times ! nRetries = max(2, self['retry']) for i in range(nRetries + 1): - stdout, stderr, curlExitCode = execute_command(command=command, logger=None) + curlLogger = self.logger if self['verbose'] else None + stdout, stderr, curlExitCode = execute_command(command=command, logger=curlLogger) http_code, http_reason = parseResponseHeader(stderr) if curlExitCode != 0 or http_code != 200: @@ -260,8 +267,9 @@ class CRABRest: def __init__(self, hostname='localhost', localcert=None, localkey=None, version=__version__, retry=0, logger=None, verbose=False, userAgent='CRAB?'): - self.server = HTTPRequests(hostname, localcert, localkey, version, - retry, logger, verbose, userAgent) + self.server = HTTPRequests(hostname=hostname, localcert=localcert, localkey=localkey, + version=version, + retry=retry, logger=logger, verbose=verbose, userAgent=userAgent) instance = 'prod' self.uriNoApi = '/crabserver/' + instance + '/'