From fed8c55890394ea6b81536f7e284b99223a16b01 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 13 Oct 2022 10:17:25 +0000 Subject: [PATCH 1/6] Use softmax in _get_gaussian_kernel1d --- torchvision/transforms/functional_tensor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 20b76fbf079..86bb89d8b7c 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -730,8 +730,7 @@ def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor: ksize_half = (kernel_size - 1) * 0.5 x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) - pdf = torch.exp(-0.5 * (x / sigma).pow(2)) - kernel1d = pdf / pdf.sum() + kernel1d = torch.softmax(-0.5 * (x / sigma).pow(2), dim=0) return kernel1d From 85e6fe0074c4f3a260481986439f239c6454770e Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 13 Oct 2022 12:49:32 +0000 Subject: [PATCH 2/6] Revert "Use softmax in _get_gaussian_kernel1d" This reverts commit eb8fba36302d2da9e06e6f40afaaf901b276a771. --- torchvision/transforms/functional_tensor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 86bb89d8b7c..20b76fbf079 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -730,7 +730,8 @@ def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor: ksize_half = (kernel_size - 1) * 0.5 x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) - kernel1d = torch.softmax(-0.5 * (x / sigma).pow(2), dim=0) + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + kernel1d = pdf / pdf.sum() return kernel1d From 127979dfbd4ba82b71ee5f2b5e17f8b0985a0506 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 13 Oct 2022 21:22:36 +0000 Subject: [PATCH 3/6] Code update --- .../prototype/transforms/functional/_misc.py | 30 ++++++++++++++++++- torchvision/transforms/functional_tensor.py | 12 ++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/torchvision/prototype/transforms/functional/_misc.py b/torchvision/prototype/transforms/functional/_misc.py index 8fda24e17ec..abb002bd3a7 100644 --- a/torchvision/prototype/transforms/functional/_misc.py +++ b/torchvision/prototype/transforms/functional/_misc.py @@ -1,3 +1,4 @@ +import math from typing import List, Optional, Union import PIL.Image @@ -32,6 +33,22 @@ def normalize( return normalize_image_tensor(inpt, mean=mean, std=std, inplace=inplace) +def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> torch.Tensor: + lim = (kernel_size - 1) / (2 * math.sqrt(2) * sigma) + x = torch.linspace(-lim, lim, steps=kernel_size) + kernel1d = torch.softmax(-x.pow_(2), dim=0) + return kernel1d + + +def _get_gaussian_kernel2d( + kernel_size: List[int], sigma: List[float], dtype: torch.dtype, device: torch.device +) -> torch.Tensor: + kernel1d_x = _get_gaussian_kernel1d(kernel_size[0], sigma[0]).to(device, dtype=dtype) + kernel1d_y = _get_gaussian_kernel1d(kernel_size[1], sigma[1]).to(device, dtype=dtype) + kernel2d = kernel1d_y.unsqueeze(-1) * kernel1d_x + return kernel2d + + def gaussian_blur_image_tensor( image: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None ) -> torch.Tensor: @@ -70,7 +87,18 @@ def gaussian_blur_image_tensor( else: needs_unsquash = False - output = _FT.gaussian_blur(image, kernel_size, sigma) + dtype = image.dtype if torch.is_floating_point(image) else torch.float32 + kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=image.device) + kernel = kernel.expand(image.shape[-3], 1, kernel.shape[0], kernel.shape[1]) + + image, need_cast, need_squeeze, out_dtype = _FT._cast_squeeze_in(image, [kernel.dtype]) + + # padding = (left, right, top, bottom) + padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2] + output = _FT.torch_pad(image, padding, mode="reflect") + output = _FT.conv2d(output, kernel, groups=output.shape[-3]) + + output = _FT._cast_squeeze_out(output, need_cast, need_squeeze, out_dtype) if needs_unsquash: output = output.view(shape) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 20b76fbf079..b16a9c7ea2e 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -1,3 +1,4 @@ +import math import warnings from typing import List, Optional, Tuple, Union @@ -726,13 +727,10 @@ def perspective( return _apply_grid_transform(img, grid, interpolation, fill=fill) -def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor: - ksize_half = (kernel_size - 1) * 0.5 - - x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) - pdf = torch.exp(-0.5 * (x / sigma).pow(2)) - kernel1d = pdf / pdf.sum() - +def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> torch.Tensor: + lim = (kernel_size - 1) / (2 * math.sqrt(2) * sigma) + x = torch.linspace(-lim, lim, steps=kernel_size) + kernel1d = torch.softmax(-x.pow_(2), dim=0) return kernel1d From c26baa9c0019ff61414147c57aa3660f7e0d7544 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 17 Oct 2022 09:16:05 +0000 Subject: [PATCH 4/6] Relaxed tolerance in consistency tests for GaussianBlur and ElasticTransform --- test/test_prototype_transforms_consistency.py | 4 ++++ torchvision/transforms/functional_tensor.py | 12 +++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py index 7d2f1d735ea..4e9d33467ca 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_prototype_transforms_consistency.py @@ -323,6 +323,9 @@ def __init__( ], # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(163, 163), (72, 333), (313, 95)]), + # We updated gaussian blur kernel generation with a faster and numerically more stable version + # This brings float32 accumulation visible in elastic transform -> we need to relax consistency tolerance + closeness_kwargs={"rtol": 1e-1, "atol": 1}, ), ConsistencyConfig( prototype_transforms.GaussianBlur, @@ -333,6 +336,7 @@ def __init__( ArgsKwargs(kernel_size=3, sigma=0.7), ArgsKwargs(kernel_size=5, sigma=(0.3, 1.4)), ], + closeness_kwargs={"rtol": 1e-5, "atol": 1e-5}, ), ConsistencyConfig( prototype_transforms.RandomAffine, diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index b233e4119c7..4944c75fab8 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -1,4 +1,3 @@ -import math import warnings from typing import List, Optional, Tuple, Union @@ -727,10 +726,13 @@ def perspective( return _apply_grid_transform(img, grid, interpolation, fill=fill) -def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> torch.Tensor: - lim = (kernel_size - 1) / (2 * math.sqrt(2) * sigma) - x = torch.linspace(-lim, lim, steps=kernel_size) - kernel1d = torch.softmax(-x.pow_(2), dim=0) +def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor: + ksize_half = (kernel_size - 1) * 0.5 + + x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + kernel1d = pdf / pdf.sum() + return kernel1d From 173e6f907d78d9b4ce1f6f2863fdc5e5048131c8 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 17 Oct 2022 13:21:56 +0000 Subject: [PATCH 5/6] Code review updates --- test/test_prototype_transforms_consistency.py | 42 ++++++++++--------- .../prototype/transforms/functional/_misc.py | 5 ++- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py index 4e9d33467ca..ae81a455dbd 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_prototype_transforms_consistency.py @@ -308,25 +308,28 @@ def __init__( ArgsKwargs(brightness=0.1, contrast=0.4, saturation=0.7, hue=0.3), ], ), - ConsistencyConfig( - prototype_transforms.ElasticTransform, - legacy_transforms.ElasticTransform, - [ - ArgsKwargs(), - ArgsKwargs(alpha=20.0), - ArgsKwargs(alpha=(15.3, 27.2)), - ArgsKwargs(sigma=3.0), - ArgsKwargs(sigma=(2.5, 3.9)), - ArgsKwargs(interpolation=prototype_transforms.InterpolationMode.NEAREST), - ArgsKwargs(interpolation=prototype_transforms.InterpolationMode.BICUBIC), - ArgsKwargs(fill=1), - ], - # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(163, 163), (72, 333), (313, 95)]), - # We updated gaussian blur kernel generation with a faster and numerically more stable version - # This brings float32 accumulation visible in elastic transform -> we need to relax consistency tolerance - closeness_kwargs={"rtol": 1e-1, "atol": 1}, - ), + *[ + ConsistencyConfig( + prototype_transforms.ElasticTransform, + legacy_transforms.ElasticTransform, + [ + ArgsKwargs(), + ArgsKwargs(alpha=20.0), + ArgsKwargs(alpha=(15.3, 27.2)), + ArgsKwargs(sigma=3.0), + ArgsKwargs(sigma=(2.5, 3.9)), + ArgsKwargs(interpolation=prototype_transforms.InterpolationMode.NEAREST), + ArgsKwargs(interpolation=prototype_transforms.InterpolationMode.BICUBIC), + ArgsKwargs(fill=1), + ], + # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image + make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(163, 163), (72, 333), (313, 95)], dtypes=[dt]), + # We updated gaussian blur kernel generation with a faster and numerically more stable version + # This brings float32 accumulation visible in elastic transform -> we need to relax consistency tolerance + closeness_kwargs=ckw, + ) + for dt, ckw in [(torch.uint8, {"rtol": 1e-1, "atol": 1}), (torch.float32, {"rtol": 1e-3, "atol": 1e-5})] + ], ConsistencyConfig( prototype_transforms.GaussianBlur, legacy_transforms.GaussianBlur, @@ -510,7 +513,6 @@ def check_call_consistency( image_repr = f"[{tuple(image.shape)}, {str(image.dtype).rsplit('.')[-1]}]" image_tensor = torch.Tensor(image) - try: torch.manual_seed(0) output_legacy_tensor = legacy_transform(image_tensor) diff --git a/torchvision/prototype/transforms/functional/_misc.py b/torchvision/prototype/transforms/functional/_misc.py index 3f2446299c8..fa4a6e9be73 100644 --- a/torchvision/prototype/transforms/functional/_misc.py +++ b/torchvision/prototype/transforms/functional/_misc.py @@ -3,6 +3,7 @@ import PIL.Image import torch +from torch.nn.functional import conv2d, pad as torch_pad from torchvision.prototype import features from torchvision.transforms import functional_tensor as _FT from torchvision.transforms.functional import pil_to_tensor, to_pil_image @@ -95,8 +96,8 @@ def gaussian_blur_image_tensor( # padding = (left, right, top, bottom) padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2] - output = _FT.torch_pad(image, padding, mode="reflect") - output = _FT.conv2d(output, kernel, groups=output.shape[-3]) + output = torch_pad(image, padding, mode="reflect") + output = conv2d(output, kernel, groups=output.shape[-3]) output = _FT._cast_squeeze_out(output, need_cast, need_squeeze, out_dtype) From bb70a3e2bd553d1c80e2cc57ff18d4d0d4a46417 Mon Sep 17 00:00:00 2001 From: vfdev Date: Mon, 17 Oct 2022 15:59:56 +0200 Subject: [PATCH 6/6] Update test_prototype_transforms_consistency.py --- test/test_prototype_transforms_consistency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py index ae81a455dbd..212755068d9 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_prototype_transforms_consistency.py @@ -328,7 +328,7 @@ def __init__( # This brings float32 accumulation visible in elastic transform -> we need to relax consistency tolerance closeness_kwargs=ckw, ) - for dt, ckw in [(torch.uint8, {"rtol": 1e-1, "atol": 1}), (torch.float32, {"rtol": 1e-3, "atol": 1e-5})] + for dt, ckw in [(torch.uint8, {"rtol": 1e-1, "atol": 1}), (torch.float32, {"rtol": 1e-2, "atol": 1e-3})] ], ConsistencyConfig( prototype_transforms.GaussianBlur,