1
1
import pathlib
2
+ import collections
2
3
3
4
4
5
import click
@@ -72,6 +73,13 @@ def entry(ctx, **kwargs):
72
73
output_dir ,
73
74
)
74
75
76
+ # Promote mnv_component variants to PASS when matching PASS MNV by MNVTAG and AF proximity
77
+ promoted_fp = promote_mnv_components (
78
+ pon_fp ,
79
+ kwargs ['tumor_name' ],
80
+ output_dir ,
81
+ )
82
+
75
83
# Annotate with cancer-related and functional information from a range of sources using PCGR
76
84
# - Select variants to process - there is an upper limit for PCGR of around 500k
77
85
# - Set tumor and normal AF and DP in INFO for PCGR and remove all other annotations
@@ -86,7 +94,7 @@ def entry(ctx, **kwargs):
86
94
# - Hits in PCAWG [INFO/PCGR_ICGC_PCAWG_COUNT]
87
95
# Set selected data or full input
88
96
selection_data = select_variants (
89
- pon_fp ,
97
+ promoted_fp ,
90
98
kwargs ['tumor_name' ],
91
99
kwargs ['cancer_genes_fp' ],
92
100
output_dir ,
@@ -170,6 +178,75 @@ def panel_of_normal_annotations(input_fp, tumor_name, threads, pon_dir, output_d
170
178
return output_fp
171
179
172
180
181
+ def promote_mnv_components (input_fp , tumor_name , output_dir ):
182
+ """Reconcile MNV combined and component calls using MNVTAG.
183
+ https://github.com/umccr/sash/issues/19#issuecomment-3251447307
184
+
185
+ - Promote components: remove 'mnv_component' from FILTER; set PASS if no other filters
186
+ - Demote combined: for non-component MNV-like records with the same MNVTAG, if tumor AF is
187
+ within ±MNV_COMPONENT_PROMOTION_AF_DELTA of any component AF for that MNVTAG, set
188
+ FILTER='mnv_combined' (only when the record is currently PASS)
189
+ """
190
+
191
+ # Collect component AFs per MNVTAG
192
+ vcf = cyvcf2 .VCF (input_fp )
193
+ tumor_index = vcf .samples .index (tumor_name )
194
+ component_afs_by_tag = collections .defaultdict (list )
195
+ for record in vcf :
196
+ mnvtag = record .INFO .get ('MNVTAG' )
197
+ if not mnvtag :
198
+ continue
199
+ if 'mnv_component' in record .FILTERS :
200
+ try :
201
+ tumor_af = record .format ('AF' )[tumor_index , 0 ]
202
+ except Exception :
203
+ continue
204
+ if tumor_af is None :
205
+ continue
206
+ component_afs_by_tag [mnvtag ].append (float (tumor_af ))
207
+
208
+ # Write out, applying promotions and combined demotion
209
+ vcf = cyvcf2 .VCF (input_fp )
210
+ # Ensure header includes mnv_combined FILTER
211
+ util .add_vcf_header_entry (vcf , constants .VcfFilter .MNV_COMBINED )
212
+
213
+ output_fp = output_dir / f'{ tumor_name } .mnv_promoted.vcf.gz'
214
+ output_fh = cyvcf2 .Writer (output_fp , vcf , 'wz' )
215
+
216
+ tumor_index = vcf .samples .index (tumor_name )
217
+ af_delta = float (constants .MNV_COMPONENT_PROMOTION_AF_DELTA )
218
+ for record in vcf :
219
+ mnvtag = record .INFO .get ('MNVTAG' )
220
+ if mnvtag and mnvtag in component_afs_by_tag :
221
+ # Promote components: drop mnv_component; set PASS if no other filters
222
+ if 'mnv_component' in record .FILTERS :
223
+ other_filters = [f for f in record .FILTERS if f not in ('PASS' , 'mnv_component' )]
224
+ record .FILTER = ';' .join (other_filters ) if other_filters else 'PASS'
225
+ else :
226
+ # Demote combined PASS calls matching by MNVTAG and AF
227
+ is_mnv_like = (len (record .REF ) > 1 ) or any (len (a ) > 1 for a in (record .ALT or []))
228
+ if is_mnv_like :
229
+ try :
230
+ tumor_af = record .format ('AF' )[tumor_index , 0 ]
231
+ except Exception :
232
+ tumor_af = None
233
+ if tumor_af is not None :
234
+ tumor_af_value = round (float (tumor_af ), 3 )
235
+ component_af_values = [round (v , 3 ) for v in component_afs_by_tag [mnvtag ]]
236
+ min_abs_af_diff = min (abs (tumor_af_value - v ) for v in component_af_values )
237
+ existing_non_pass = [f for f in record .FILTERS if f != 'PASS' ]
238
+ if min_abs_af_diff <= af_delta and not existing_non_pass :
239
+ record .FILTER = constants .VcfFilter .MNV_COMBINED .value
240
+
241
+ output_fh .write_record (record )
242
+
243
+ output_fh .close ()
244
+
245
+ # Index output
246
+ util .execute_command (f"bcftools index -t { output_fp } " )
247
+ return output_fp
248
+
249
+
173
250
def select_variants (input_fp , tumor_name , cancer_genes_fp , output_dir ):
174
251
# Exclude variants until we hopefully move the needle below the threshold
175
252
0 commit comments