Skip to content

Commit ded6c9b

Browse files
authored
Merge pull request #604 from 4dn-dcic/ajs_250707_skip_fq_runs_on_restricted_not_uploaded
Ajs 250707 skip fq runs on restricted not uploaded
2 parents 04fab9c + 8a1e82c commit ded6c9b

4 files changed

Lines changed: 41 additions & 9 deletions

File tree

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ foursight
77
Change Log
88
----------
99

10+
4.9.28
11+
==========
12+
* update fastq checks to not run pipelines on pre-release status files that have not been uploaded as OK as they will become restricted
13+
* md5run_uploaded, fastqc, fastq_first_line
14+
15+
1016
4.9.27
1117
==========
1218
* modify missing raw file check to not add badge if files are restricted and missing as this is expected in some cases

chalicelib_fourfront/checks/helpers/wfr_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,3 +2072,19 @@ def limit_number_of_runs(check, my_auth):
20722072
check.summary = f'Limiting the number of workflow runs to {n_runs_max} every 6h'
20732073
check.full_output = {}
20742074
return check, n_runs_available
2075+
2076+
2077+
def prereleased_and_not_uploaded(connection, filemeta):
2078+
"""Check if a file is pre-released and not uploaded yet.
2079+
This should only happen for files that are to be restricted status upon release.
2080+
Some files with pipelines will be uploaded so should have the usual basic QC run on them,
2081+
however, there will be cases where there is no processing pipeline so no need to upload the
2082+
raw files and so the md5, fastq_first_line and fastQC can be skipped"""
2083+
statuses_to_check = ['pre-release', 'restricted']
2084+
if filemeta.get('status') not in statuses_to_check:
2085+
return False
2086+
my_s3_util = s3Utils(env=connection.ff_env)
2087+
if not my_s3_util.does_key_exist(filemeta.get('upload_key'), my_s3_util.raw_file_bucket, False):
2088+
# if the file is not in the raw bucket, it is not uploaded
2089+
return True
2090+
return False

chalicelib_fourfront/checks/wfr_checks.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,6 @@ def md5run_status(connection, **kwargs):
161161
check.brief_output.append('did not complete checking all')
162162
break
163163

164-
# cnt of files to be triggered at this iteration of loop
165-
n_runs_to_trigger = len(missing_md5_to_start + not_switched_status)
166164
# find bucket
167165
if 'FileProcessed' in a_file['@type']:
168166
my_bucket = out_bucket
@@ -256,12 +254,21 @@ def md5run_uploaded_files(connection, **kwargs):
256254
query = '/search/?type=File&md5sum=No+value' + ''.join(['&status=' + s for s in statuses])
257255

258256
files = {}
259-
files['uploaded_without_md5run'] = [f['accession'] for f in ff_utils.search_metadata(
260-
query + '&workflow_run_inputs.workflow.title%21=md5+0.2.6', key=my_auth)]
261-
files['uploaded_with_md5run'] = [f['accession'] for f in ff_utils.search_metadata(
262-
query + '&workflow_run_inputs.workflow.title=md5+0.2.6', key=my_auth)]
257+
no_md5_files = ff_utils.search_metadata(query, key=my_auth)
258+
no_md5_files = [f for f in no_md5_files if not wfr_utils.prereleased_and_not_uploaded(connection, f)]
259+
for f in no_md5_files:
260+
wfrs = f.get('workflow_run_inputs')
261+
has_md5run = False
262+
for wfr in wfrs:
263+
if wfr.get('workflow', {}).get('title') == 'md5+0.2.6':
264+
has_md5run = True
265+
break
266+
if has_md5run:
267+
files.setdefault('files_with_md5run', []).append(f['accession'])
268+
else:
269+
files.setdefault('files_without_md5run', []).append(f['accession'])
263270

264-
if files['uploaded_without_md5run'] or files['uploaded_with_md5run']:
271+
if files.get('uploaded_without_md5run') or files.get('uploaded_with_md5run'):
265272
check.status = 'WARN'
266273
check.summary = 'Some files need md5 run before release'
267274
check.description = 'Some files with status updloaded or higher are missing md5sum'
@@ -358,6 +365,9 @@ def fastqc_status(connection, **kwargs):
358365
query += '&lab.display_title=' + lab
359366
# The search
360367
res = ff_utils.search_metadata(query, key=my_auth)
368+
# check for pre-released status files that have not been uploaded (because they will become restricted)
369+
# and remove from res
370+
res = [f for f in res if not wfr_utils.prereleased_and_not_uploaded(connection, f)]
361371
if not res:
362372
check.summary = 'All Good!'
363373
return check
@@ -2074,7 +2084,7 @@ def fastq_first_line_status(connection, **kwargs):
20742084
# The search
20752085
print('About to query ES for files')
20762086
res = ff_utils.search_metadata(query, key=my_auth)
2077-
2087+
res = [f for f in res if not wfr_utils.prereleased_and_not_uploaded(connection, f)]
20782088
if not res:
20792089
check.summary = "All good!"
20802090
return check

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "foursight"
3-
version = "4.9.27"
3+
version = "4.9.28"
44
description = "Serverless Chalice Application for Monitoring"
55
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
66
license = "MIT"

0 commit comments

Comments
 (0)