11#!/usr/bin/env python3
22
3- import xml .etree .ElementTree as ET
43import argparse
5- import pathlib
6- import json
74import hashlib
5+ import json
6+ import pathlib
7+ import xml .etree .ElementTree as ET
8+
89from ofac_scraper import OfacWebsiteScraper
910
1011FEATURE_TYPE_TEXT = "Digital Currency Address - "
11- NAMESPACE = {'sdn' : 'https://sanctionslistservice.ofac.treas.gov/api/PublicationPreview/exports/ADVANCED_XML' }
12+ NAMESPACE = {
13+ "sdn" : "https://sanctionslistservice.ofac.treas.gov/api/PublicationPreview/exports/"
14+ "ADVANCED_XML"
15+ }
1216
1317# List of implemented output formats
1418OUTPUT_FORMATS = ["TXT" , "JSON" ]
1519
1620SDN_ADVANCED_FILE_PATH = "sdn_advanced.xml"
1721
22+
1823def parse_arguments ():
1924 parser = argparse .ArgumentParser (
20- description = 'Tool to extract sanctioned digital currency addresses from the OFAC special designated nationals XML file (sdn_advanced.xml)' )
21- parser .add_argument ('assets' , nargs = '*' ,
22- default = [], help = 'the asset for which the sanctioned addresses should be extracted (default: XBT (Bitcoin))' )
23- parser .add_argument ('-sdn' , '--special-designated-nationals-list' , dest = 'sdn' , type = argparse .FileType ('rb' ),
24- help = 'the path to the sdn_advanced.xml file (can be downloaded from https://www.treasury.gov/ofac/downloads/sanctions/1.0/sdn_advanced.xml)' , default = SDN_ADVANCED_FILE_PATH )
25- parser .add_argument ('-f' , '--output-format' , dest = 'format' , nargs = '*' , choices = OUTPUT_FORMATS ,
26- default = OUTPUT_FORMATS [0 ], help = 'the output file format of the address list (default: TXT)' )
27- parser .add_argument ('-path' , '--output-path' , dest = 'outpath' , type = pathlib .Path , default = pathlib .Path (
28- "./" ), help = 'the path where the lists should be written to (default: current working directory ("./")' )
25+ description = "Tool to extract sanctioned digital currency addresses from the "
26+ "OFAC special designated nationals XML file (sdn_advanced.xml)"
27+ )
28+ parser .add_argument (
29+ "assets" ,
30+ nargs = "*" ,
31+ default = [],
32+ help = "the asset for which the sanctioned addresses should be extracted "
33+ "(default: XBT (Bitcoin))" ,
34+ )
35+ parser .add_argument (
36+ "-sdn" ,
37+ "--special-designated-nationals-list" ,
38+ dest = "sdn" ,
39+ type = argparse .FileType ("rb" ),
40+ help = "the path to the sdn_advanced.xml file (can be downloaded from "
41+ "https://www.treasury.gov/ofac/downloads/sanctions/1.0/sdn_advanced.xml)" ,
42+ default = SDN_ADVANCED_FILE_PATH ,
43+ )
44+ parser .add_argument (
45+ "-f" ,
46+ "--output-format" ,
47+ dest = "format" ,
48+ nargs = "*" ,
49+ choices = OUTPUT_FORMATS ,
50+ default = OUTPUT_FORMATS [0 ],
51+ help = "the output file format of the address list (default: TXT)" ,
52+ )
53+ parser .add_argument (
54+ "-path" ,
55+ "--output-path" ,
56+ dest = "outpath" ,
57+ type = pathlib .Path ,
58+ default = pathlib .Path ("./" ),
59+ help = "the path where the lists should be written to (default: current working "
60+ 'directory ("./")' ,
61+ )
2962 return parser .parse_args ()
3063
64+
3165def feature_type_text (asset ):
3266 """returns text we expect in a <FeatureType></FeatureType> tag for a given asset"""
3367 return "Digital Currency Address - " + asset
3468
69+
3570def get_possible_assets (root ):
3671 """
3772 Returns a list of possible digital currency assets from the parsed XML.
3873 """
3974 assets = []
40- feature_types = root .findall ('sdn:ReferenceValueSets/sdn:FeatureTypeValues/sdn:FeatureType' , NAMESPACE )
75+ feature_types = root .findall (
76+ "sdn:ReferenceValueSets/sdn:FeatureTypeValues/sdn:FeatureType" , NAMESPACE
77+ )
4178 for feature_type in feature_types :
42- if feature_type .text .startswith (' Digital Currency Address - ' ):
43- asset = feature_type .text .replace (' Digital Currency Address - ' , '' )
79+ if feature_type .text .startswith (" Digital Currency Address - " ):
80+ asset = feature_type .text .replace (" Digital Currency Address - " , "" )
4481 assets .append (asset )
4582 return assets
4683
84+
4785def get_address_id (root , asset ):
4886 """returns the feature id of the given asset"""
4987 feature_type = root .find (
50- "sdn:ReferenceValueSets/sdn:FeatureTypeValues/*[.='{}']" .format (feature_type_text (asset )), NAMESPACE )
51- if feature_type == None :
52- raise LookupError ("No FeatureType with the name {} found" .format (
53- feature_type_text (asset )))
88+ f"sdn:ReferenceValueSets/sdn:FeatureTypeValues"
89+ f"/*[.='{ feature_type_text (asset )} ']" ,
90+ NAMESPACE ,
91+ )
92+ if feature_type is None :
93+ raise LookupError (
94+ f"No FeatureType with the name { feature_type_text (asset )} found"
95+ )
5496 address_id = feature_type .attrib ["ID" ]
5597 return address_id
5698
5799
58100def get_sanctioned_addresses (root , address_id ):
59101 """returns a list of sanctioned addresses for the given address_id"""
60- addresses = list ()
61- for feature in root .findall ("sdn:DistinctParties//*[@FeatureTypeID='{}']" .format (address_id ), NAMESPACE ):
102+ addresses = []
103+ for feature in root .findall (
104+ f"sdn:DistinctParties//*[@FeatureTypeID='{ address_id } ']" , NAMESPACE
105+ ):
62106 for version_detail in feature .findall (".//sdn:VersionDetail" , NAMESPACE ):
63107 addresses .append (version_detail .text )
64108 return addresses
@@ -72,14 +116,15 @@ def write_addresses(addresses, asset, output_formats, outpath):
72116
73117
74118def write_addresses_txt (addresses , asset , outpath ):
75- with open ("{ }/sanctioned_addresses_{}.txt". format ( outpath , asset ), 'w' ) as out :
119+ with open (f" { outpath } /sanctioned_addresses_{ asset } .txt", "w" ) as out :
76120 for address in addresses :
77- out .write (address + "\n " )
121+ out .write (address + "\n " )
78122
79123
80124def write_addresses_json (addresses , asset , outpath ):
81- with open ("{}/sanctioned_addresses_{}.json" .format (outpath , asset ), 'w' ) as out :
82- out .write (json .dumps (addresses , indent = 2 )+ "\n " )
125+ with open (f"{ outpath } /sanctioned_addresses_{ asset } .json" , "w" ) as out :
126+ out .write (json .dumps (addresses , indent = 2 ) + "\n " )
127+
83128
84129def compute_sha256 (file_path ):
85130 sha256_hash = hashlib .sha256 ()
@@ -88,10 +133,12 @@ def compute_sha256(file_path):
88133 sha256_hash .update (chunk )
89134 return sha256_hash .hexdigest ()
90135
136+
91137def write_checksum_file (sha256 , checksum_file_path ):
92138 with open (checksum_file_path , "w" ) as checksum_file :
93139 checksum_file .write (f"SHA256({ SDN_ADVANCED_FILE_PATH } ) = { sha256 } \n " )
94140
141+
95142def main ():
96143 args = parse_arguments ()
97144
@@ -109,17 +156,17 @@ def main():
109156 tree = ET .parse (args .sdn )
110157 root = tree .getroot ()
111158
112- assets = list ()
113- if type (args .assets ) == str :
159+ assets = []
160+ if isinstance (args .format , str ) :
114161 assets .append (args .assets )
115162 else :
116163 assets = args .assets
117164
118165 if len (assets ) == 0 :
119166 assets = get_possible_assets (root )
120167
121- output_formats = list ()
122- if type (args .format ) == str :
168+ output_formats = []
169+ if isinstance (args .format , str ) :
123170 output_formats .append (args .format )
124171 else :
125172 output_formats = args .format
@@ -150,5 +197,6 @@ def main():
150197
151198 write_checksum_file (sha256_checksum_from_site , "data/sdn_advanced_checksum.txt" )
152199
200+
153201if __name__ == "__main__" :
154202 main ()
0 commit comments