Skip to content

Cleaning up Ops Boxes and Losses 🧹 #5979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion torchvision/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
remove_small_boxes,
clip_boxes_to_image,
box_area,
box_convert,
box_iou,
generalized_box_iou,
distance_box_iou,
complete_box_iou,
masks_to_boxes,
)
from .boxes import box_convert
from .ciou_loss import complete_box_iou_loss
from .deform_conv import deform_conv2d, DeformConv2d
from .diou_loss import distance_box_iou_loss
Expand Down
30 changes: 30 additions & 0 deletions torchvision/ops/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,33 @@ def split_normalization_params(
else:
other_params.extend(p for p in module.parameters() if p.requires_grad)
return norm_params, other_params


def _upcast(t: Tensor) -> Tensor:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that previous code used different versions of upcast. More specifically boxes permitted integers while losses didn't.

# Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
if t.is_floating_point():
return t if t.dtype in (torch.float32, torch.float64) else t.float()
else:
return t if t.dtype in (torch.int32, torch.int64) else t.int()


def _loss_inter_union(
boxes1: torch.Tensor,
boxes2: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:

x1, y1, x2, y2 = boxes1.unbind(dim=-1)
x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)

# Intersection keypoints
xkis1 = torch.max(x1, x1g)
ykis1 = torch.max(y1, y1g)
xkis2 = torch.min(x2, x2g)
ykis2 = torch.min(y2, y2g)

intsctk = torch.zeros_like(x1)
mask = (ykis2 > ykis1) & (xkis2 > xkis1)
intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk

return intsctk, unionk
35 changes: 5 additions & 30 deletions torchvision/ops/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from ..utils import _log_api_usage_once
from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh
from ._utils import _upcast


def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
Expand Down Expand Up @@ -215,14 +216,6 @@ def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
return boxes


def _upcast(t: Tensor) -> Tensor:
# Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
if t.is_floating_point():
return t if t.dtype in (torch.float32, torch.float64) else t.float()
else:
return t if t.dtype in (torch.int32, torch.int64) else t.int()


def box_area(boxes: Tensor) -> Tensor:
"""
Computes the area of a set of bounding boxes, which are specified by their
Expand Down Expand Up @@ -327,25 +320,8 @@ def complete_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(complete_box_iou)

boxes1 = _upcast(boxes1)
boxes2 = _upcast(boxes2)

inter, union = _box_inter_union(boxes1, boxes2)
iou = inter / union

lti = torch.min(boxes1[:, None, :2], boxes2[:, None, :2])
rbi = torch.max(boxes1[:, None, 2:], boxes2[:, None, 2:])

whi = (rbi - lti).clamp(min=0) # [N,M,2]
diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps

# centers of boxes
x_p = (boxes1[:, 0] + boxes1[:, 2]) / 2
y_p = (boxes1[:, 1] + boxes1[:, 3]) / 2
x_g = (boxes2[:, 0] + boxes2[:, 2]) / 2
y_g = (boxes2[:, 1] + boxes2[:, 3]) / 2
# The distance between boxes' centers squared.
centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2
diou = distance_box_iou(boxes1, boxes2, eps)
iou = box_iou(boxes1, boxes2)

w_pred = boxes1[:, 2] - boxes1[:, 0]
h_pred = boxes1[:, 3] - boxes1[:, 1]
Expand All @@ -356,7 +332,7 @@ def complete_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso
v = (4 / (torch.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2)
with torch.no_grad():
alpha = v / (1 - iou + v + eps)
return iou - (centers_distance_squared / diagonal_distance_squared) - alpha * v
return diou - alpha * v


def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor:
Expand All @@ -381,8 +357,7 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso
boxes1 = _upcast(boxes1)
boxes2 = _upcast(boxes2)

inter, union = _box_inter_union(boxes1, boxes2)
iou = inter / union
iou = box_iou(boxes1, boxes2)

lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
Expand Down
49 changes: 14 additions & 35 deletions torchvision/ops/ciou_loss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import torch

from ..utils import _log_api_usage_once
from .giou_loss import _upcast
from ._utils import _loss_inter_union
from .diou_loss import distance_box_iou_loss


def complete_box_iou_loss(
Expand All @@ -12,6 +13,9 @@ def complete_box_iou_loss(
) -> torch.Tensor:

"""
# Original Implementation from
https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would leave this as a comment on the source. Long unrendered URLs are not particularly helpful for the documentation. We should make the same change on diou.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sphinx agrees with you 😃

image

Let me re-wrtie

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeap. That looks ugly and it's on multiple places. If you want bring a separate quick PR that moves attributions on the main part of the methods to avoid the issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One option is to embed the link e.g.

Original Implementation from Detectron2

But the docstring should at least start by describing what the object is, even if it's very obvious from its name already. So I would suggest to write something like

"""Complete Box IoU Loss.

Implementation is adapted from `Detectron2 <https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py>`__.

...

Copy link
Contributor Author

@oke-aditya oke-aditya May 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

That's how it's currently over main branch. I would suggest adding 3 words in end
implementation adapted from Detectron2.

Copy link
Member

@NicolasHug NicolasHug May 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a great description but the preview is a bit long @oke-aditya . It's best to skip a line after the first sentence (there's even a PEP for that) to keep the preview is short and to-the-point.

Multi-line docstrings consist of a summary line just like a one-line docstring, followed by a blank line, followed by a more elaborate description. The summary line may be used by automatic indexing tools; it is important that it fits on one line and is separated from the rest of the docstring by a blank line

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will be tackling this in seperate PR anyways to unify all stuff. I feel we need bit more revamp for Ops docs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should definitely provide attribution to other projects when we use code from them. Let's keep the reference on the code as a comment on the main body of the method similar to what we do in most places instead of placing a link. We can review this on the future and make changes in a coordinated manner.


Gradient-friendly IoU loss with an additional penalty that is non-zero when the
boxes do not overlap overlap area, This loss function considers important geometrical
factors such as overlap area, normalized central point distance and aspect ratio.
Expand All @@ -30,50 +34,25 @@ def complete_box_iou_loss(
``'sum'``: The output will be summed. Default: ``'none'``
eps : (float): small number to prevent division by zero. Default: 1e-7

Reference:
Returns:
Tensor: Loss tensor with the reduction option applied.

Complete Intersection over Union Loss (Zhaohui Zheng et. al)
https://arxiv.org/abs/1911.08287
Reference:
Zhaohui Zheng et. al: Complete Intersection over Union Loss:
https://arxiv.org/abs/1911.08287

"""

# Original Implementation : https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py

if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(complete_box_iou_loss)

boxes1 = _upcast(boxes1)
boxes2 = _upcast(boxes2)
diou_loss = distance_box_iou_loss(boxes1, boxes2, reduction="none", eps=eps)
intsct, union = _loss_inter_union(boxes1, boxes2)
iou = intsct / (union + eps)

x1, y1, x2, y2 = boxes1.unbind(dim=-1)
x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)

# Intersection keypoints
xkis1 = torch.max(x1, x1g)
ykis1 = torch.max(y1, y1g)
xkis2 = torch.min(x2, x2g)
ykis2 = torch.min(y2, y2g)

intsct = torch.zeros_like(x1)
mask = (ykis2 > ykis1) & (xkis2 > xkis1)
intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps
iou = intsct / union

# smallest enclosing box
xc1 = torch.min(x1, x1g)
yc1 = torch.min(y1, y1g)
xc2 = torch.max(x2, x2g)
yc2 = torch.max(y2, y2g)
diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps

# centers of boxes
x_p = (x2 + x1) / 2
y_p = (y2 + y1) / 2
x_g = (x1g + x2g) / 2
y_g = (y1g + y2g) / 2
distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2)

# width and height of boxes
w_pred = x2 - x1
h_pred = y2 - y1
Expand All @@ -83,7 +62,7 @@ def complete_box_iou_loss(
with torch.no_grad():
alpha = v / (1 - iou + v + eps)

loss = 1 - iou + (distance / diag_len) + alpha * v
loss = diou_loss + alpha * v
if reduction == "mean":
loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum()
elif reduction == "sum":
Expand Down
20 changes: 6 additions & 14 deletions torchvision/ops/diou_loss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch

from ..utils import _log_api_usage_once
from .boxes import _upcast
from ._utils import _upcast, _loss_inter_union


def distance_box_iou_loss(
Expand All @@ -11,6 +11,9 @@ def distance_box_iou_loss(
eps: float = 1e-7,
) -> torch.Tensor:
"""
Original Implementation from
https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py

Gradient-friendly IoU loss with an additional penalty that is non-zero when the
distance between boxes' centers isn't zero. Indeed, for two exactly overlapping
boxes, the distance IoU is the same as the IoU loss.
Expand All @@ -37,8 +40,6 @@ def distance_box_iou_loss(
https://arxiv.org/abs/1911.08287
"""

# Original Implementation : https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py

if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(distance_box_iou_loss)

Expand All @@ -48,17 +49,8 @@ def distance_box_iou_loss(
x1, y1, x2, y2 = boxes1.unbind(dim=-1)
x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)

# Intersection keypoints
xkis1 = torch.max(x1, x1g)
ykis1 = torch.max(y1, y1g)
xkis2 = torch.min(x2, x2g)
ykis2 = torch.min(y2, y2g)

intsct = torch.zeros_like(x1)
mask = (ykis2 > ykis1) & (xkis2 > xkis1)
intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps
iou = intsct / union
intsct, union = _loss_inter_union(boxes1, boxes2)
iou = intsct / (union + eps)

# smallest enclosing box
xc1 = torch.min(x1, x1g)
Expand Down
23 changes: 5 additions & 18 deletions torchvision/ops/giou_loss.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
import torch
from torch import Tensor

from ..utils import _log_api_usage_once


def _upcast(t: Tensor) -> Tensor:
# Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
if t.dtype not in (torch.float32, torch.float64):
return t.float()
return t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the same _upcast as _utils. It doesn't support integers and converts everything to floats. Could you please review all the places where the giou_loss._upcast() was used and ensure the output will be a float? Basically box ops are OK to maintain things as integers but not losses.

Copy link
Contributor Author

@oke-aditya oke-aditya May 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I do understand that I have not used the _upcast_if_not_float. It's intentional, :) Since I would like to know why

box ops are OK to maintain things as integers but not losses.

Copy link
Contributor

@datumbox datumbox May 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _upcast on boxes was introduced for operators that estimate information related to a box. Things like the box area for instance. If you put integer boxes and you ask for the area of a box, you kind of expecting you receive the value in integers (that's debatable but that's how the operator worked). The issue was that if you used too small of a precision, the area estimation would overflow. So this method upcasts math operations to a space where it's safe to do multiplications without risking overflowing for most applications.

On the losses side, I'm not aware of any application that does things on integer space. Not only that but doing reduction == "mean" will break things. So we need to be careful to definitely not support ints in losses. Similar care might be needed for some box operators. Area might still make sense to return as integer but I'm not 100% sure if that's the case with all the IoU metrics we deal here.

So I think it's important prior merging this PR, to make an explicit decision of what has to support integers and what doesn't, handle it appropriately and add tests and xfails to ensure we are not breaking anything.

from ._utils import _upcast, _loss_inter_union


def generalized_box_iou_loss(
Expand Down Expand Up @@ -38,6 +31,9 @@ def generalized_box_iou_loss(
``'sum'``: The output will be summed. Default: ``'none'``
eps (float): small number to prevent division by zero. Default: 1e-7

Returns:
Tensor: Loss tensor with the reduction option applied.

Reference:
Hamid Rezatofighi et. al: Generalized Intersection over Union:
A Metric and A Loss for Bounding Box Regression:
Expand All @@ -51,16 +47,7 @@ def generalized_box_iou_loss(
x1, y1, x2, y2 = boxes1.unbind(dim=-1)
x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)

# Intersection keypoints
xkis1 = torch.max(x1, x1g)
ykis1 = torch.max(y1, y1g)
xkis2 = torch.min(x2, x2g)
ykis2 = torch.min(y2, y2g)

intsctk = torch.zeros_like(x1)
mask = (ykis2 > ykis1) & (xkis2 > xkis1)
intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk
intsctk, unionk = _loss_inter_union(boxes1, boxes2)
iouk = intsctk / (unionk + eps)

# smallest enclosing box
Expand Down