swiatkowski · Apr 2, 2020 · Mar 31, 2020 · Mar 31, 2020 · Apr 1, 2020 · Apr 1, 2020
diff --git a/config/docker_run_config.yaml b/config/docker_run_config.yaml
@@ -1,23 +1,7 @@
-peteflo-i7gtx:
-  peteflo:
-    path_to_data_directory: '/media/peteflo/3TBbackup/dense_correspondence'
-
-paladin-23:
-  manuelli:
-    path_to_data_directory: '/home/manuelli/data'
-
-paladin-44:
-  manuelli:
-    path_to_data_directory: '/media/hdd/data'
-
-iiwa-2:
-  manuelli:
-    path_to_data_directory: '/home/manuelli/data'
-
-wei:
-  wei:
-    path_to_data_directory: '/home/wei/data'
-
 jakub-ThinkPad-T480:
   jakub:
     path_to_data_directory: '/home/jakub/data'
+
+bigboy:
+  tomasz:
+    path_to_data_directory: '/home/tomasz/Workspace/general-dense-object-nets/data'
diff --git a/dense_correspondence/dataset/spartan_dataset_masked.py b/dense_correspondence/dataset/spartan_dataset_masked.py
@@ -269,7 +269,7 @@ def get_scene_list(self, mode=None):
             scene_list.append(scene_name)
 
         return scene_list
-    
+
     def get_list_of_objects(self):
         """
         Returns a list of object ids
@@ -643,6 +643,16 @@ def get_within_scene_data(self, scene_name, metadata, for_synthetic_multi_object
         image_a_depth_numpy = np.asarray(image_a_depth)
         image_b_depth_numpy = np.asarray(image_b_depth)
 
+        image_a_mask_numpy = np.asarray(image_a_mask)
+        image_b_mask_numpy = np.asarray(image_b_mask)
+        img_size = np.size(image_a_mask_numpy)
+        min_mask_size = 0.01*img_size
+
+        if (np.sum(image_a_mask_numpy) < min_mask_size) or (np.sum(image_b_mask_numpy) < min_mask_size):
+            logging.info("not enough pixels in mask, skipping")
+            image_a_rgb_tensor = self.rgb_image_to_tensor(image_a_rgb)
+            return self.return_empty_data(image_a_rgb_tensor, image_a_rgb_tensor)
+
         if self.sample_matches_only_off_mask:
             correspondence_mask = np.asarray(image_a_mask)
         else:
@@ -1212,7 +1222,7 @@ def get_first_image_index(self, scene_name):
     @property
     def config(self):
         return self._config
-    
+
     @staticmethod
     def merge_single_object_configs(config_list):
         """

diff --git a/dense_correspondence/loss_functions/ap_loss.py b/dense_correspondence/loss_functions/ap_loss.py
@@ -0,0 +1,140 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# this class is taken from https://github.com/naver/r2d2/blob/master/nets/ap_loss.py
+class APLoss (nn.Module):
+    """ differentiable AP loss, through quantization.
+
+        Input: (N, M)   values in [min, max]
+        label: (N, M)   values in {0, 1}
+
+        Returns: list of query AP (for each n in {1..N})
+                 Note: typically, you want to minimize 1 - mean(AP)
+    """
+    def __init__(self, nq=25, min=0, max=1):
+        nn.Module.__init__(self)
+        assert isinstance(nq, int) and 2 <= nq <= 100
+        self.nq = nq
+        self.min = min
+        self.max = max
+        gap = max - min
+        assert gap > 0
+
+        # init quantizer = non-learnable (fixed) convolution
+        self.quantizer = q = nn.Conv1d(1, 2*nq, kernel_size=1, bias=True)
+        a = (nq-1) / gap
+        #1st half = lines passing to (min+x,1) and (min+x+1/a,0) with x = {nq-1..0}*gap/(nq-1)
+        q.weight.data[:nq] = -a
+        q.bias.data[:nq] = torch.from_numpy(a*min + np.arange(nq, 0, -1)) # b = 1 + a*(min+x)
+        #2nd half = lines passing to (min+x,1) and (min+x-1/a,0) with x = {nq-1..0}*gap/(nq-1)
+        q.weight.data[nq:] = a
+        q.bias.data[nq:] = torch.from_numpy(np.arange(2-nq, 2, 1) - a*min) # b = 1 - a*(min+x)
+        # first and last one are special: just horizontal straight line
+        q.weight.data[0] = q.weight.data[-1] = 0
+        q.bias.data[0] = q.bias.data[-1] = 1
+
+    def compute_AP(self, x, label):
+        BS, N, M = x.shape
+
+        # quantize all predictions
+        # TODO: make to work over batch. Currently only BS=1
+        q = self.quantizer(x.squeeze(0).unsqueeze(1))
+        q = torch.min(q[:,:self.nq], q[:,self.nq:]).clamp(min=0) # N x Q x M
+
+        nbs = q.sum(dim=-1) # number of samples  N x Q = c
+        rec = (q * label.view(N,1,M).float()).sum(dim=-1) # nb of correct samples = c+ N x Q
+        prec = rec.cumsum(dim=-1) / (1e-16 + nbs.cumsum(dim=-1)) # precision
+        rec /= rec.sum(dim=-1).unsqueeze(1) # norm in [0,1]
+
+        ap = (prec * rec).sum(dim=-1) # per-image AP
+        return ap
+
+    def forward(self, x, label):
+        assert x.shape == label.shape # N x M
+        return self.compute_AP(x, label)
+
+# this class is inspired by PixelAPLoss from https://github.com/naver/r2d2/blob/master/nets/ap_loss.py
+class PixelAPLoss(nn.Module):
+    """
+    Computes the pixel-wise AP loss
+    """
+    def __init__(self, nq=20, sampler=None):
+        nn.Module.__init__(self)
+        self.aploss = APLoss(nq, min=0, max=1)
+        self.sampler = sampler
+
+    def compute_scores(self, descriptors1, descriptors2, indices_1, indices_2):
+        selected_descriptors_1 = descriptors1[:, indices_1, :]
+        selected_descriptors_2 = descriptors2[:, indices_2, :]
+
+        # crazily enough, if there is only one element to index_select into
+        # above, then the first dimension is collapsed down, and we end up
+        # with shape [D,], where we want [1,D]
+        # this unsqueeze fixes that case
+        if len(indices_1) == 1:
+            selected_descriptors_1 = selected_descriptors_1.unsqueeze(0)
+            selected_descriptors_2 = selected_descriptors_2.unsqueeze(0)
+
+        cosine_distance = (selected_descriptors_1 * selected_descriptors_2).sum(-1)
+        return cosine_distance
+
+    def combine_scores(self, positive_scores, negative_scores):
+        scores = torch.cat((positive_scores, negative_scores), dim=-1)
+        ground_truth = scores.new_zeros(scores.shape, dtype=torch.uint8)
+        ground_truth[:, :, :positive_scores.shape[2]] = 1
+        return scores, ground_truth
+
+
+    def get_indieces_from_points_and_offsets(self, matches, offsets):
+        offsetted_points = matches[:,None] + offsets
+        return offsetted_points.clamp(0, 480 * 640 - 1)
+
+    def forward(self, descriptors1, descriptors2, matches_1, matches_2):
+        non_matches_2 = self.get_indieces_from_points_and_offsets(matches_2, self.sampler.negative_offsets)
+        matches_1 = matches_1.unsqueeze(-1)
+        matches_2 = matches_2.unsqueeze(-1)
+
+        descriptors1 = F.normalize(descriptors1, p=2, dim=-1)
+        descriptors2 = F.normalize(descriptors2, p=2, dim=-1)
+
+        positive_scores = self.compute_scores(descriptors1, descriptors2, matches_1, matches_2)
+        negative_scores = self.compute_scores(descriptors1, descriptors2, matches_1, non_matches_2)
+        scores, ground_truth = self.combine_scores(positive_scores, negative_scores)
+
+        # compute pixel-wise AP
+        ap_score = self.aploss(scores, ground_truth)
+
+        # [WIP]
+        # this line shuld be changed if you want get more funky with ap loss
+        # for instance if you want to add reliabiliy map do sth like:
+        # 1 - ap_score * rel
+        ap_loss = 1 - ap_score
+        return ap_loss.mean()
+
+
+# this class is inspired by Ngh2Sampler from https://github.com/naver/r2d2/blob/master/nets/sampler.py
+class RingSampler(nn.Module):
+    """
+    Class for sampling non-correspondence.
+    Points are being drawn from the ring around true match
+    Radius is defined in pixel units.
+    """
+    def __init__(self, inner_radius=10, outter_radius=12):
+        nn.Module.__init__(self)
+        self.inner_radius = inner_radius
+        self.outter_radius = outter_radius
+        self.sample_offsets()
+
+    def sample_offsets(self, image_width=640):
+        inner_r2 = self.inner_radius**2
+        outer_r2 = self.outter_radius**2
+        neg = []
+        for j in range(-self.outter_radius-1, self.outter_radius+1):
+            for i in range(-self.outter_radius-1, self.outter_radius+1):
+                d2 = i*i + j*j
+                if inner_r2 <= d2 <= outer_r2:
+                    neg.append(i * image_width + j)
+
+        self.register_buffer('negative_offsets', torch.LongTensor(neg))
diff --git a/dense_correspondence/loss_functions/loss_composer.py b/dense_correspondence/loss_functions/loss_composer.py
@@ -4,7 +4,7 @@
 import torch
 from torch.autograd import Variable
 
-def get_loss(pixelwise_contrastive_loss, match_type, 
+def get_loss(pixelwise_contrastive_loss, match_type,
               image_a_pred, image_b_pred,
               matches_a,     matches_b,
               masked_non_matches_a, masked_non_matches_b,
@@ -85,16 +85,16 @@ def get_within_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred
 
     if pcl._config["use_l2_pixel_loss_on_background_non_matches"]:
         background_non_match_loss, num_background_hard_negatives =\
-            pixelwise_contrastive_loss.non_match_loss_with_l2_pixel_norm(image_a_pred, image_b_pred, matches_b, 
-                background_non_matches_a, background_non_matches_b, M_descriptor=pcl._config["M_background"])    
-        
+            pixelwise_contrastive_loss.non_match_loss_with_l2_pixel_norm(image_a_pred, image_b_pred, matches_b,
+                background_non_matches_a, background_non_matches_b, M_descriptor=pcl._config["M_background"])
+
     else:
         background_non_match_loss, num_background_hard_negatives =\
             pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                     background_non_matches_a, background_non_matches_b,
                                                                     M_descriptor=pcl._config["M_background"])
-        
-        
+
+
 
     blind_non_match_loss = zero_loss()
     num_blind_hard_negatives = 1
@@ -103,7 +103,7 @@ def get_within_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred
             pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                     blind_non_matches_a, blind_non_matches_b,
                                                                     M_descriptor=pcl._config["M_masked"])
-        
+
 
 
     total_num_hard_negatives = num_masked_hard_negatives + num_background_hard_negatives
@@ -138,7 +138,7 @@ def get_within_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred
     loss = pcl._config["match_loss_weight"] * match_loss + \
     pcl._config["non_match_loss_weight"] * non_match_loss
 
-    
+
 
     return loss, match_loss, masked_non_match_loss_scaled, background_non_match_loss_scaled, blind_non_match_loss_scaled
 
@@ -150,15 +150,15 @@ def get_within_scene_loss_triplet(pixelwise_contrastive_loss, image_a_pred, imag
     """
     Simple wrapper for pixelwise_contrastive_loss functions.  Args and return args documented above in get_loss()
     """
-    
+
     pcl = pixelwise_contrastive_loss
 
     masked_triplet_loss =\
-        pixelwise_contrastive_loss.get_triplet_loss(image_a_pred, image_b_pred, matches_a, 
+        pixelwise_contrastive_loss.get_triplet_loss(image_a_pred, image_b_pred, matches_a,
             matches_b, masked_non_matches_a, masked_non_matches_b, pcl._config["alpha_triplet"])
-        
+
     background_triplet_loss =\
-        pixelwise_contrastive_loss.get_triplet_loss(image_a_pred, image_b_pred, matches_a, 
+        pixelwise_contrastive_loss.get_triplet_loss(image_a_pred, image_b_pred, matches_a,
             matches_b, background_non_matches_a, background_non_matches_b, pcl._config["alpha_triplet"])
 
     total_loss = masked_triplet_loss + background_triplet_loss
@@ -180,7 +180,7 @@ def get_different_object_loss(pixelwise_contrastive_loss, image_a_pred, image_b_
             pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                     blind_non_matches_a, blind_non_matches_b,
                                                                     M_descriptor=M_descriptor)
-        
+
         if scale_by_hard_negatives:
             scale_factor = max(num_hard_negatives, 1)
         else:
@@ -215,6 +215,6 @@ def zero_loss():
     return Variable(torch.FloatTensor([0]).cuda())
 
 def is_zero_loss(loss):
+    if isinstance(loss, int):
+        return loss < 1e-20
     return loss.item() < 1e-20
-
-