diff --git a/label_maker/images.py b/label_maker/images.py index 077fb9a..ac2ce09 100644 --- a/label_maker/images.py +++ b/label_maker/images.py @@ -1,6 +1,7 @@ # pylint: disable=unused-argument """Generate an .npz file containing arrays for training machine learning algorithms""" +import concurrent.futures from os import makedirs, path as op from random import shuffle @@ -8,7 +9,7 @@ from label_maker.utils import get_image_function -def download_images(dest_folder, classes, imagery, ml_type, background_ratio, imagery_offset=False, **kwargs): +def download_images(dest_folder, classes, imagery, ml_type, background_ratio, threadcount, imagery_offset=False, **kwargs): """Download satellite images specified by a URL and a label.npz file Parameters ------------ @@ -28,6 +29,8 @@ def download_images(dest_folder, classes, imagery, ml_type, background_ratio, im background_ratio: float Determines the number of background images to download in single class problems. Ex. A value of 1 will download an equal number of background images to class images. + threadcount: int + Number of threads to use for faster parallel image download imagery_offset: list An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will move the images 15 pixels right and 5 pixels up relative to the requested tile bounds @@ -72,5 +75,6 @@ def class_test(value): image_function = get_image_function(imagery) kwargs['imagery_offset'] = imagery_offset - for tile in tiles: - image_function(tile, imagery, tiles_dir, kwargs) + with concurrent.futures.ThreadPoolExecutor(max_workers=threadcount) as executor: + [executor.submit(image_function, tile, imagery, tiles_dir, kwargs) for tile in tiles] + executor.shutdown(wait=True) diff --git a/label_maker/main.py b/label_maker/main.py index b3769e5..8739b57 100644 --- a/label_maker/main.py +++ b/label_maker/main.py @@ -50,7 +50,7 @@ def parse_args(args): subparsers.add_parser('download', parents=[pparser], help='', formatter_class=dhf) l = subparsers.add_parser('labels', parents=[pparser], help='', formatter_class=dhf) p = subparsers.add_parser('preview', parents=[pparser], help='', formatter_class=dhf) - subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf) + i = subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf) subparsers.add_parser('package', parents=[pparser], help='', formatter_class=dhf) # labels has an optional parameter @@ -60,6 +60,10 @@ def parse_args(args): p.add_argument('-n', '--number', default=5, type=int, help='number of examples images to create per class') + #images has optional parameter + i.add_argument('-t', '--threadcount', default=10, type=int, + help= 'thread count to use') + # turn namespace into dictinary parsed_args = vars(parser.parse_args(args)) @@ -109,7 +113,8 @@ def cli(): number = args.get('number') preview(dest_folder=dest_folder, number=number, **config) elif cmd == 'images': - download_images(dest_folder=dest_folder, **config) + threadcount = args.get('threadcount') + download_images(dest_folder=dest_folder, threadcount=threadcount, **config) elif cmd == 'package': package_directory(dest_folder=dest_folder, **config)