Skip to content

Commit 96439cb

Browse files
committed
Implement fix from umccr/sash#19 (comment)
For MNV handling for PAVE
1 parent ce00931 commit 96439cb

File tree

2 files changed

+83
-1
lines changed

2 files changed

+83
-1
lines changed

bolt/common/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
######################################
77
MAX_SOMATIC_VARIANTS = 500_000
88
MAX_SOMATIC_VARIANTS_GNOMAD_FILTER = 0.01
9+
MNV_COMPONENT_PROMOTION_AF_DELTA = 0.10
910

1011

1112
####################################
@@ -60,6 +61,7 @@ class VcfFilter(enum.Enum):
6061
PON = 'PON'
6162
ENCODE = 'ENCODE'
6263
GNOMAD_COMMON = 'gnomAD_common'
64+
MNV_COMBINED = 'mnv_combined'
6365

6466
@property
6567
def namespace(self):
@@ -186,6 +188,9 @@ def namespace(self):
186188
VcfFilter.GNOMAD_COMMON: {
187189
'Description': f'gnomAD AF >= {MAX_GNOMAD_AF}',
188190
},
191+
VcfFilter.MNV_COMBINED: {
192+
'Description': 'VARIANT IS AN MNV COMBINED CALL',
193+
},
189194

190195

191196
# INFO

bolt/workflows/smlv_somatic/annotate.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pathlib
2+
import collections
23

34

45
import click
@@ -72,6 +73,13 @@ def entry(ctx, **kwargs):
7273
output_dir,
7374
)
7475

76+
# Promote mnv_component variants to PASS when matching PASS MNV by MNVTAG and AF proximity
77+
promoted_fp = promote_mnv_components(
78+
pon_fp,
79+
kwargs['tumor_name'],
80+
output_dir,
81+
)
82+
7583
# Annotate with cancer-related and functional information from a range of sources using PCGR
7684
# - Select variants to process - there is an upper limit for PCGR of around 500k
7785
# - Set tumor and normal AF and DP in INFO for PCGR and remove all other annotations
@@ -86,7 +94,7 @@ def entry(ctx, **kwargs):
8694
# - Hits in PCAWG [INFO/PCGR_ICGC_PCAWG_COUNT]
8795
# Set selected data or full input
8896
selection_data = select_variants(
89-
pon_fp,
97+
promoted_fp,
9098
kwargs['tumor_name'],
9199
kwargs['cancer_genes_fp'],
92100
output_dir,
@@ -170,6 +178,75 @@ def panel_of_normal_annotations(input_fp, tumor_name, threads, pon_dir, output_d
170178
return output_fp
171179

172180

181+
def promote_mnv_components(input_fp, tumor_name, output_dir):
182+
"""Reconcile MNV combined and component calls using MNVTAG.
183+
https://github.com/umccr/sash/issues/19#issuecomment-3251447307
184+
185+
- Promote components: remove 'mnv_component' from FILTER; set PASS if no other filters
186+
- Demote combined: for non-component MNV-like records with the same MNVTAG, if tumor AF is
187+
within ±MNV_COMPONENT_PROMOTION_AF_DELTA of any component AF for that MNVTAG, set
188+
FILTER='mnv_combined' (only when the record is currently PASS)
189+
"""
190+
191+
# Collect component AFs per MNVTAG
192+
vcf = cyvcf2.VCF(input_fp)
193+
tumor_index = vcf.samples.index(tumor_name)
194+
component_afs_by_tag = collections.defaultdict(list)
195+
for record in vcf:
196+
mnvtag = record.INFO.get('MNVTAG')
197+
if not mnvtag:
198+
continue
199+
if 'mnv_component' in record.FILTERS:
200+
try:
201+
tumor_af = record.format('AF')[tumor_index, 0]
202+
except Exception:
203+
continue
204+
if tumor_af is None:
205+
continue
206+
component_afs_by_tag[mnvtag].append(float(tumor_af))
207+
208+
# Write out, applying promotions and combined demotion
209+
vcf = cyvcf2.VCF(input_fp)
210+
# Ensure header includes mnv_combined FILTER
211+
util.add_vcf_header_entry(vcf, constants.VcfFilter.MNV_COMBINED)
212+
213+
output_fp = output_dir / f'{tumor_name}.mnv_promoted.vcf.gz'
214+
output_fh = cyvcf2.Writer(output_fp, vcf, 'wz')
215+
216+
tumor_index = vcf.samples.index(tumor_name)
217+
af_delta = float(constants.MNV_COMPONENT_PROMOTION_AF_DELTA)
218+
for record in vcf:
219+
mnvtag = record.INFO.get('MNVTAG')
220+
if mnvtag and mnvtag in component_afs_by_tag:
221+
# Promote components: drop mnv_component; set PASS if no other filters
222+
if 'mnv_component' in record.FILTERS:
223+
other_filters = [f for f in record.FILTERS if f not in ('PASS', 'mnv_component')]
224+
record.FILTER = ';'.join(other_filters) if other_filters else 'PASS'
225+
else:
226+
# Demote combined PASS calls matching by MNVTAG and AF
227+
is_mnv_like = (len(record.REF) > 1) or any(len(a) > 1 for a in (record.ALT or []))
228+
if is_mnv_like:
229+
try:
230+
tumor_af = record.format('AF')[tumor_index, 0]
231+
except Exception:
232+
tumor_af = None
233+
if tumor_af is not None:
234+
tumor_af_value = round(float(tumor_af), 3)
235+
component_af_values = [round(v, 3) for v in component_afs_by_tag[mnvtag]]
236+
min_abs_af_diff = min(abs(tumor_af_value - v) for v in component_af_values)
237+
existing_non_pass = [f for f in record.FILTERS if f != 'PASS']
238+
if min_abs_af_diff <= af_delta and not existing_non_pass:
239+
record.FILTER = constants.VcfFilter.MNV_COMBINED.value
240+
241+
output_fh.write_record(record)
242+
243+
output_fh.close()
244+
245+
# Index output
246+
util.execute_command(f"bcftools index -t {output_fp}")
247+
return output_fp
248+
249+
173250
def select_variants(input_fp, tumor_name, cancer_genes_fp, output_dir):
174251
# Exclude variants until we hopefully move the needle below the threshold
175252

0 commit comments

Comments
 (0)