diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ae7e697 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.swp +cachedir/* diff --git a/BatchProviderBase.lua b/BatchProviderBase.lua new file mode 100644 index 0000000..35e5ada --- /dev/null +++ b/BatchProviderBase.lua @@ -0,0 +1,149 @@ +local argcheck = require 'argcheck' + +local function createWindowBase(rec,i,j,is_bg) + local label = is_bg == true and 0+1 or rec.label[j]+1 + local window = {i,rec.boxes[j][1],rec.boxes[j][2], + rec.boxes[j][3],rec.boxes[j][4], + label} + return window +end + +local function createWindowAngle(rec,i,j,is_bg) + local label = is_bg == true and 0+1 or rec.label[j]+1 + --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and + -- rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0 + local ang + if is_bg == false and rec.objects[rec.correspondance[j] ] then + if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then + ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse + else + ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth + end + else + ang = 0 + end + local window = {i,rec.boxes[j][1],rec.boxes[j][2], + rec.boxes[j][3],rec.boxes[j][4], + label,ang} + return window +end + +--[[ +local argcheck = require 'argcheck' +local initcheck = argcheck{ + pack=true, + noordered=true, + {name="dataset", + type="nnf.DataSetPascal", + help="A dataset class" + }, + {name="batch_size", + type="number", + default=128, + help="batch size"}, + {name="fg_fraction", + type="number", + default=0.25, + help="foreground fraction in batch" + }, + {name="fg_threshold", + type="number", + default=0.5, + help="foreground threshold" + }, + {name="bg_threshold", + type="table", + default={0,0.5}, + help="background threshold, in the form {LO,HI}" + }, + {name="createWindow", + type="function", + default=createWindowBase, + help="" + }, + {name="do_flip", + type="boolean", + default=true, + help="sample batches with random flips" + }, +} +--]] + +local BatchProviderBase = torch.class('nnf.BatchProviderBase') + +function BatchProviderBase:__init(...) + + self.dataset = nil + self.batch_size = 128 + self.fg_fraction = 0.25 + self.fg_threshold = 0.5 + self.bg_threshold = {0,0.5} + self.createWindow = createWindowBase + self.do_flip = true + + --local opts = initcheck(...) + --for k,v in pairs(opts) do self[k] = v end + +end + +-- allow changing the way self.bboxes are formatted +function BatchProviderBase:setCreateWindow(createWindow) + self.createWindow = createWindow +end + +function BatchProviderBase:setupData() + local dataset = self.dataset + local bb = {} + local bbT = {} + + for i=0,dataset.num_classes do -- 0 because of background + bb[i] = {} + end + + for i=1,dataset.num_imgs do + bbT[i] = {} + end + + for i = 1,dataset.num_imgs do + if dataset.num_imgs > 10 then + xlua.progress(i,dataset.num_imgs) + end + + local rec = dataset:attachProposals(i) + + for j=1,rec:size() do + local id = rec.label[j] + local is_fg = (rec.overlap[j] >= self.fg_threshold) + local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1] and + rec.overlap[j] < self.bg_threshold[2]) + if is_fg then + local window = self.createWindow(rec,i,j,is_bg) + table.insert(bb[1], window) -- could be id instead of 1 + elseif is_bg then + local window = self.createWindow(rec,i,j,is_bg) + table.insert(bb[0], window) + end + + end + + for j=0,dataset.num_classes do -- 0 because of background + if #bb[j] > 0 then + bbT[i][j] = torch.FloatTensor(bb[j]) + end + end + + bb = {} + for i=0,dataset.num_classes do -- 0 because of background + bb[i] = {} + end + collectgarbage() + end + self.bboxes = bbT + --return bbT +end + +function BatchProviderBase:getBatch() + error("You can't use BatchProviderBase") + return input,target +end + diff --git a/BatchProviderIC.lua b/BatchProviderIC.lua new file mode 100644 index 0000000..da34d77 --- /dev/null +++ b/BatchProviderIC.lua @@ -0,0 +1,164 @@ +local BatchProvider, parent = torch.class('nnf.BatchProviderIC','nnf.BatchProviderBase') + +local argcheck = require 'argcheck' + +local env = require 'argcheck.env' -- retrieve argcheck environement +-- this is the default type function +-- which can be overrided by the user +function env.istype(obj, typename) + if typename == 'DataSet' then + return obj and obj._isDataSet + end + if typename == 'FeatureProvider' then + return obj and obj._isFeatureProvider + end + return torch.type(obj) == typename +end + + +local initcheck = argcheck{ + pack=true, + noordered=true, + {name="dataset", + type="DataSet", + help="A dataset class" + }, + {name="feat_provider", + type="nnf.FRCNN", + help="A feat provider class" + }, + {name="batch_size", + type="number", + opt=true, + help="batch size"}, + {name="imgs_per_batch", + type="number", + default=2, + help="number of images to sample in a batch"}, + {name="fg_fraction", + type="number", + default=0.25, + help="foreground fraction in batch" + }, + {name="fg_threshold", + type="number", + default=0.5, + help="foreground threshold" + }, + {name="bg_threshold", + type="table", + default={0.1,0.5}, + help="background threshold, in the form {LO,HI}" + }, + {name="do_flip", + type="boolean", + default=true, + help="sample batches with random flips" + }, +} + +function BatchProvider:__init(...) + parent.__init(self) + + local opts = initcheck(...) + for k,v in pairs(opts) do self[k] = v end +end + +-- setup is the same + +function BatchProvider:permuteIdx() + local total_img = self.dataset:size() + local imgs_per_batch = self.imgs_per_batch + + self._cur = self._cur or math.huge + + if self._cur + imgs_per_batch > total_img then + self._perm = torch.randperm(total_img) + self._cur = 1 + end + + local img_idx = self._perm[{{self._cur,self._cur + self.imgs_per_batch - 1}}] + self._cur = self._cur + self.imgs_per_batch + + local img_idx_end = imgs_per_batch + + local fg_windows = {} + local bg_windows = {} + for i=1,img_idx_end do + local curr_idx = img_idx[i] + bg_windows[i] = {} + if self.bboxes[curr_idx][0] then + for j=1,self.bboxes[curr_idx][0]:size(1) do + table.insert(bg_windows[i],{curr_idx,j}) + end + end + fg_windows[i] = {} + if self.bboxes[curr_idx][1] then + for j=1,self.bboxes[curr_idx][1]:size(1) do + table.insert(fg_windows[i],{curr_idx,j}) + end + end + end + local do_flip = torch.FloatTensor(imgs_per_batch):random(0,1) + local opts = {img_idx=img_idx,img_idx_end=img_idx_end,do_flip=do_flip} + return fg_windows,bg_windows,opts + +end + +function BatchProvider:selectBBoxes(fg_windows,bg_windows) + local fg_num_each = torch.round(self.fg_num_each/self.imgs_per_batch) + local bg_num_each = torch.round(self.bg_num_each/self.imgs_per_batch) + + local bboxes = {} + local labels = {} + for im=1,self.imgs_per_batch do + local window_idx = torch.randperm(#bg_windows[im]) + local end_idx = math.min(bg_num_each,#bg_windows[im]) + local bbox = {} + for i=1,end_idx do + local curr_idx = bg_windows[im][window_idx[i] ][1] + local position = bg_windows[im][window_idx[i] ][2] + local dd = self.bboxes[curr_idx][0][position][{{2,6}}] + table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]}) + table.insert(labels,dd[5]) + end + + window_idx = torch.randperm(#fg_windows[im]) + local end_idx = math.min(fg_num_each,#fg_windows[im]) + for i=1,end_idx do + local curr_idx = fg_windows[im][window_idx[i] ][1] + local position = fg_windows[im][window_idx[i] ][2] + local dd = self.bboxes[curr_idx][1][position][{{2,6}}] + table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]}) + table.insert(labels,dd[5]) + end + table.insert(bboxes,torch.FloatTensor(bbox)) + end + labels = torch.IntTensor(labels) + return bboxes, labels +end + +function BatchProvider:getBatch() + local dataset = self.dataset + + self.fg_num_each = self.fg_fraction * self.batch_size + self.bg_num_each = self.batch_size - self.fg_num_each + + local fg_windows,bg_windows,opts = self:permuteIdx() + + self.targets = self.targets or torch.FloatTensor() + + local batches = self.batches + local targets = self.targets + + local imgs = {} + for i=1,opts.img_idx:size(1) do + table.insert(imgs,dataset:getImage(opts.img_idx[i])) + end + local boxes,labels = self:selectBBoxes(fg_windows,bg_windows) + self.batches = self.feat_provider:getFeature(imgs,boxes,opts.do_flip) + + targets:resize(labels:size()):copy(labels) + + return self.batches, self.targets +end diff --git a/BatchProvider.lua b/BatchProviderRC.lua similarity index 57% rename from BatchProvider.lua rename to BatchProviderRC.lua index 977bdc7..2770036 100644 --- a/BatchProvider.lua +++ b/BatchProviderRC.lua @@ -1,109 +1,82 @@ -local BatchProvider = torch.class('nnf.BatchProvider') +local BatchProvider,parent = + torch.class('nnf.BatchProviderRC','nnf.BatchProviderBase') -local function createWindowBase(rec,i,j,is_bg) - local label = is_bg == true and 0+1 or rec.label[j]+1 - local window = {i,rec.boxes[j][1],rec.boxes[j][2], - rec.boxes[j][3],rec.boxes[j][4], - label} - return window -end -local function createWindowAngle(rec,i,j,is_bg) - local label = is_bg == true and 0+1 or rec.label[j]+1 - --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and - -- rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0 - local ang - if is_bg == false and rec.objects[rec.correspondance[j] ] then - if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then - ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse - else - ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth - end - else - ang = 0 - end - local window = {i,rec.boxes[j][1],rec.boxes[j][2], - rec.boxes[j][3],rec.boxes[j][4], - label,ang} - return window -end - -function BatchProvider:__init(feat_provider) - self.dataset = feat_provider.dataset - self.feat_provider = feat_provider +local argcheck = require 'argcheck' - self.nTimesMoreData = 10 - self.iter_per_batch = 500 - - self.batch_size = 128 - self.fg_fraction = 0.25 - - self.fg_threshold = 0.5 - self.bg_threshold = {0.0,0.5} - - self.createWindow = createWindowBase--createWindowAngle - - self.batch_dim = {256*50} - self.target_dim = 1 - - self.do_flip = true - - --self:setupData() +local env = require 'argcheck.env' -- retrieve argcheck environement +-- this is the default type function +-- which can be overrided by the user +function env.istype(obj, typename) + if typename == 'DataSet' then + return obj and obj._isDataSet + end + if typename == 'FeatureProvider' then + return obj and obj._isFeatureProvider + end + return torch.type(obj) == typename end -function BatchProvider:setupData() - local dataset = self.dataset - local bb = {} - local bbT = {} +local initcheck = argcheck{ + pack=true, + noordered=true, + {name="dataset", + type="DataSet", + help="A dataset class" + }, + {name="feat_provider", + type="FeatureProvider", + help="A feat provider class" + }, + {name="batch_size", + type="number", + default=128, + help="batch size"}, + {name="iter_per_batch", + type="number", + default=10, + help=""}, + {name="nTimesMoreData", + type="number", + default=10, + help=""}, + {name="fg_fraction", + type="number", + default=0.25, + help="foreground fraction in batch" + }, + {name="fg_threshold", + type="number", + default=0.5, + help="foreground threshold" + }, + {name="bg_threshold", + type="table", + default={0.1,0.5}, + help="background threshold, in the form {LO,HI}" + }, + {name="target_dim", + type="number", + default=1, + help=""}, + {name="do_flip", + type="boolean", + default=true, + help="sample batches with random flips" + }, +} - for i=0,dataset.num_classes do -- 0 because of background - bb[i] = {} - end +function BatchProvider:__init(...) + parent.__init(self) - for i=1,dataset.num_imgs do - bbT[i] = {} - end + local opts = initcheck(...) + for k,v in pairs(opts) do self[k] = v end - for i = 1,dataset.num_imgs do - if dataset.num_imgs > 10 then - xlua.progress(i,dataset.num_imgs) - end - - local rec = dataset:attachProposals(i) + self.batch_dim = self.feat_provider.output_size - for j=1,rec:size() do - local id = rec.label[j] - local is_fg = (rec.overlap[j] >= self.fg_threshold) - local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1] and - rec.overlap[j] < self.bg_threshold[2]) - if is_fg then - local window = self.createWindow(rec,i,j,is_bg) - table.insert(bb[1], window) -- could be id instead of 1 - elseif is_bg then - local window = self.createWindow(rec,i,j,is_bg) - table.insert(bb[0], window) - end - - end - - for j=0,dataset.num_classes do -- 0 because of background - if #bb[j] > 0 then - bbT[i][j] = torch.FloatTensor(bb[j]) - end - end - - bb = {} - for i=0,dataset.num_classes do -- 0 because of background - bb[i] = {} - end - collectgarbage() - end - self.bboxes = bbT - --return bbT end - function BatchProvider:permuteIdx() local fg_num_each = self.fg_num_each local bg_num_each = self.bg_num_each @@ -183,21 +156,12 @@ function BatchProvider:selectBBoxes(fg_windows,bg_windows) return fg_w,bg_w end - --- specific for angle estimation -local function flip_angle(x) - return (-x)%360 -end - -- depends on the model -function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg_label) +function BatchProvider:prepareFeatures(im_idx,bboxes,fg_label,bg_label) local num_pos = bboxes[1] and #bboxes[1] or 0 local num_neg = bboxes[0] and #bboxes[0] or 0 - fg_data:resize(num_pos,unpack(self.batch_dim)) - bg_data:resize(num_neg,unpack(self.batch_dim)) - fg_label:resize(num_pos,self.target_dim) bg_label:resize(num_neg,self.target_dim) @@ -205,36 +169,29 @@ function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg if self.do_flip then flip = torch.random(0,1) == 0 end - --print(bboxes) + + local s_boxes = {} for i=1,num_pos do - --local bbox = bboxes[1][{i,{2,5}}] local bbox = {bboxes[1][i][2],bboxes[1][i][3],bboxes[1][i][4],bboxes[1][i][5]} - fg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip) + table.insert(s_boxes,bbox) fg_label[i][1] = bboxes[1][i][6] ---[[ if flip then - fg_label[i][2] = flip_angle(bboxes[1][i][7]) - else - fg_label[i][2] = bboxes[1][i][7] - end -]] end for i=1,num_neg do - --local bbox = bboxes[0][{i,{2,5}}] local bbox = {bboxes[0][i][2],bboxes[0][i][3],bboxes[0][i][4],bboxes[0][i][5]} - bg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip) + table.insert(s_boxes,bbox) bg_label[i][1] = bboxes[0][i][6] ---[[ if flip then - bg_label[i][2] = flip_angle(bboxes[0][i][7]) - else - bg_label[i][2] = bboxes[0][i][7] - end]] end - --- return fg_data,bg_data,fg_label,bg_label + + -- compute the features + local feats = self.feat_provider:getFeature(im_idx,s_boxes,flip) + local fg_data = num_pos > 0 and feats:narrow(1,1,num_pos) or nil + local bg_data = num_neg > 0 and feats:narrow(1,num_pos+1,num_neg) or nil + + return fg_data, bg_data end -function BatchProvider:getBatch(batches,targets) +function BatchProvider:prepareBatch(batches,targets) local dataset = self.dataset self.fg_num_each = self.fg_fraction * self.batch_size @@ -257,11 +214,11 @@ function BatchProvider:getBatch(batches,targets) local bg_counter = 0 local fg_data,bg_data,fg_label,bg_label - fg_data = torch.FloatTensor() - bg_data = torch.FloatTensor() fg_label = torch.IntTensor() bg_label = torch.IntTensor() + local pass_index = torch.type(self.feat_provider) == 'nnf.SPP' and true or false + print('==> Preparing Batch Data') for i=1,opts.img_idx_end do xlua.progress(i,opts.img_idx_end) @@ -278,7 +235,13 @@ function BatchProvider:getBatch(batches,targets) bboxes[0] = bg_w[curr_idx] bboxes[1] = fg_w[curr_idx] - self:prepareFeatures(curr_idx,bboxes,fg_data,bg_data,fg_label,bg_label) + local data + if pass_index then + data = curr_idx + else + data = dataset:getImage(curr_idx) + end + fg_data,bg_data = self:prepareFeatures(data,bboxes,fg_label,bg_label) for j=1,nbg do bg_counter = bg_counter + 1 @@ -297,6 +260,24 @@ function BatchProvider:getBatch(batches,targets) batches[b][s]:copy(fg_data[j]) targets[b][s]:copy(fg_label[j]) end + collectgarbage() end + collectgarbage() return batches,targets end + +function BatchProvider:getBatch() + self._cur = self._cur or math.huge + -- we have reached the end of our batch pool, need to recompute + if self._cur > self.iter_per_batch then + self._batches,self._targets = self:prepareBatch(self._batches,self._targets) + self._cur = 1 + end + + self.batches = self._batches[self._cur] + self.targets = self._targets[self._cur] + self._cur = self._cur + 1 + + return self.batches, self.targets + +end diff --git a/DataSetCOCO.lua b/DataSetCOCO.lua new file mode 100644 index 0000000..6b2a2e0 --- /dev/null +++ b/DataSetCOCO.lua @@ -0,0 +1,155 @@ +--local json = require 'dkjson' + +local DataSetCOCO,parent = torch.class('nnf.DataSetCOCO', 'nnf.DataSetDetection') + +function DataSetCOCO:__init(annFile) + self.image_set = nil + self.dataset_name = 'COCO' + + local timer = torch.Timer() + local localtimer = torch.Timer() + print('Preparing COCO dataset...') + --[[ + if type(annFile) == 'string' then + local f = io.open(annFile) + local str = f:read('*all') + f:close() + + self.data = json.decode(str) + + else + self.data = torch.load(annFile) + end + --]] + self.data = torch.load('coco_val.t7') + print((' Loaded annotations file in %.2fs'):format(localtimer:time().real)) + localtimer:reset() + + -- mapping images + local img_idx = {} + local img_idx_map = {} + for i = 1, #self.data.images do + table.insert(img_idx,self.data.images[i].id) + img_idx_map[self.data.images[i].id] = i + end + print((' Mapped images in %.4fs'):format(localtimer:time().real)) + localtimer:reset() + + -- mapping annotations + local ann = self.data.annotations + local o = {} + + for k, v in ipairs(ann) do + table.insert(o,v.image_id*1e10 + v.category_id) + end + o = torch.LongTensor(o) + local _,ox = o:sort() + local o_data = ox:data() + local temp_ann = {} + for i=1 , o:size(1) do + table.insert(temp_ann, ann[ox[i] ]) + end + self.data.annotations = temp_ann + + local ann_idx = {} + local ann_idx_map = {} + local ann_img_idx = {} + local img_ann_idx_map = {} + for k,v in ipairs(temp_ann) do + table.insert(ann_idx, v.id) + ann_idx_map[v.id] = k + table.insert(ann_img_idx, v.image_id) + if not img_ann_idx_map[v.image_id] then + img_ann_idx_map[v.image_id] = {} + end + table.insert(img_ann_idx_map[v.image_id],v.id) + end + + self.inds = {img_idx = img_idx, + img_idx_map = img_idx_map, + ann_idx = ann_idx, + ann_idx_map = ann_idx_map, + ann_img_idx = ann_img_idx, + img_ann_idx_map = img_ann_idx_map + } + print((' Mapped annotations in %.4fs'):format(localtimer:time().real)) + localtimer:reset() + + -- mapping classes + self.classes = {} + self.class_to_id = {} + self.class_cont = {} + self.class_cont_map = {} + self.num_classes = 0 + for k,v in ipairs(self.data.categories) do + self.classes[v.id] = v.name + self.class_to_id[v.name] = v.id + table.insert(self.class_cont,v.id) + self.class_cont_map[v.id] = k + self.num_classes = self.num_classes + 1 + end + + print((' Total elapsed time: %.4fs'):format(timer:time().real)) + +end + +function DataSetCOCO:getImage(i) + local file_name = self.images[i].file_name + return image.load(paths.concat(self.imgpath,file_name),3,'float') +end + +function DataSetCOCO:getAnnotation(i) + local ann = {object = {}} + local im_id = self.inds.img_idx[i] + local ann_id = self.inds.img_ann_idx_map[im_id] or {} + for k,v in ipairs(ann_id) do + local lann = self.data.annotations[self.inds.ann_idx_map[v] ] + local bbox = {xmin=lann.bbox[1]+1,ymin=lann.bbox[2]+1, + xmax=lann.bbox[1]+lann.bbox[3]+1, + ymax=lann.bbox[2]+lann.bbox[4]+1, + } + local obj = {bndbox=bbox, + class=lann.category_id, + difficult = '0', + name = self.classes[lann.category_id] + } + table.insert(ann.object,obj) + end + return ann +end + +function DataSetCOCO:getGTBoxes(i) + local anno = self:getAnnotation(i) + local valid_objects = {} + local gt_boxes = torch.IntTensor() + local gt_classes = {} + + if self.with_hard_samples then -- inversed with respect to RCNN code + for idx,obj in ipairs(anno.object) do + if self.class_to_id[obj.name] then -- to allow a subset of the classes + table.insert(valid_objects,idx) + end + end + else + for idx,obj in ipairs(anno.object) do + if obj.difficult == '0' and self.class_to_id[obj.name] then + table.insert(valid_objects,idx) + end + end + end + + gt_boxes:resize(#valid_objects,4) + for idx0,idx in ipairs(valid_objects) do + gt_boxes[idx0][1] = anno.object[idx].bndbox.xmin + gt_boxes[idx0][2] = anno.object[idx].bndbox.ymin + gt_boxes[idx0][3] = anno.object[idx].bndbox.xmax + gt_boxes[idx0][4] = anno.object[idx].bndbox.ymax + + table.insert(gt_classes,self.class_cont_map[anno.object[idx].class]) + end + + return gt_boxes,gt_classes,valid_objects,anno + +end + + diff --git a/DataSetDetection.lua b/DataSetDetection.lua new file mode 100644 index 0000000..a557ece --- /dev/null +++ b/DataSetDetection.lua @@ -0,0 +1,113 @@ +local utilities = paths.dofile('utils.lua') +local concat = utilities.concat +local boxoverlap = utilities.boxoverlap + +local DataSetDetection = torch.class('nnf.DataSetDetection') +DataSetDetection._isDataSet = true + +function DataSetDetection:__init() + self.classes = nil + self.num_classes = nil + self.image_set = nil + self.dataset_name = nil +end + +function DataSetDetection:getImage(i) +end + +function DataSetDetection:getAnnotation(i) +end + +function DataSetDetection:getROIBoxes(i) +end + +function DataSetDetection:getGTBoxes(i) +end + +function DataSetDetection:size() + return #self.img_ids +end + +function DataSetDetection:__tostring__() + local str = torch.type(self) + str = str .. '\n Dataset Name: ' .. self.dataset_name + str = str .. '\n ImageSet: '.. self.image_set + str = str .. '\n Number of images: '.. self:size() + str = str .. '\n Classes:' + for k,v in ipairs(self.classes) do + str = str .. '\n '..v + end + return str +end + +function DataSetDetection:bestOverlap(all_boxes, gt_boxes, gt_classes) + local num_total_boxes = all_boxes:size(1) + local num_gt_boxes = gt_boxes:dim() > 0 and gt_boxes:size(1) or 0 + local overlap_class = torch.FloatTensor(num_total_boxes,self.num_classes):zero() + local overlap = torch.FloatTensor(num_total_boxes,num_gt_boxes):zero() + for idx=1,num_gt_boxes do + local o = boxoverlap(all_boxes,gt_boxes[idx]) + local tmp = overlap_class[{{},gt_classes[idx]}] -- pointer copy + tmp[tmp:lt(o)] = o[tmp:lt(o)] + overlap[{{},idx}] = o + end + -- get max class overlap + --rec.overlap,rec.label = rec.overlap:max(2) + --rec.overlap = torch.squeeze(rec.overlap,2) + --rec.label = torch.squeeze(rec.label,2) + --rec.label[rec.overlap:eq(0)] = 0 + local correspondance + if num_gt_boxes > 0 then + overlap,correspondance = overlap:max(2) + overlap = torch.squeeze(overlap,2) + correspondance = torch.squeeze(correspondance,2) + correspondance[overlap:eq(0)] = 0 + else + overlap = torch.FloatTensor(num_total_boxes):zero() + correspondance = torch.LongTensor(num_total_boxes):zero() + end + return overlap, correspondance, overlap_class +end + +function DataSetDetection:attachProposals(i) + + local boxes = self:getROIBoxes(i) + local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i) + + local all_boxes = concat(gt_boxes,boxes,1) + + local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0 + local num_gt_boxes = #gt_classes + + local rec = {} + rec.gt = concat(torch.ByteTensor(num_gt_boxes):fill(1), + torch.ByteTensor(num_boxes):fill(0) ) + + rec.overlap, rec.correspondance, rec.overlap_class = + self:bestOverlap(all_boxes,gt_boxes,gt_classes) + rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0) + for idx=1,(num_boxes+num_gt_boxes) do + local corr = rec.correspondance[idx] + if corr > 0 then + rec.label[idx] = gt_classes[corr] + end + end + + rec.boxes = all_boxes + rec.class = concat(torch.CharTensor(gt_classes), + torch.CharTensor(num_boxes):fill(0)) + + if self.save_objs then + rec.objects = {} + for _,idx in pairs(valid_objects) do + table.insert(rec.objects,anno.object[idx]) + end + end + + function rec:size() + return (num_boxes+num_gt_boxes) + end + + return rec +end + diff --git a/DataSetPascal.lua b/DataSetPascal.lua index 365f93f..9e403df 100644 --- a/DataSetPascal.lua +++ b/DataSetPascal.lua @@ -1,10 +1,13 @@ local matio = require 'matio' -local argcheck = require 'argcheck' +local argcheck = dofile'argcheck.lua'--require 'argcheck' local xml = require 'xml' +local utilities = paths.dofile('utils.lua') +local concat = utilities.concat +local boxoverlap = utilities.boxoverlap matio.use_lua_strings = true -local DataSetPascal = torch.class('nnf.DataSetPascal') +local DataSetPascal,parent = torch.class('nnf.DataSetPascal', 'nnf.DataSetDetection') local function lines_from(file) -- get all lines from a file, returns an empty @@ -59,6 +62,7 @@ local initcheck = argcheck{ if type(v) ~= 'string' then print('classes can only be of string input'); out = false + break end end return out @@ -102,7 +106,7 @@ local initcheck = argcheck{ } function DataSetPascal:__init(...) - + parent.__init(self) local args = initcheck(...) print(args) for k,v in pairs(args) do self[k] = v end @@ -167,7 +171,7 @@ function DataSetPascal:size() end function DataSetPascal:getImage(i) - return image.load(string.format(self.imgpath,self.img_ids[i])) + return image.load(string.format(self.imgpath,self.img_ids[i]),3,'float') end @@ -247,34 +251,6 @@ function DataSetPascal:getROIBoxes(i) return self.roidb[i]--self.roidb[self.img2roidb[self.img_ids[i] ] ] end -local function boxoverlap(a,b) - local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b - - local x1 = a:select(2,1):clone() - x1[x1:lt(b[1])] = b[1] - local y1 = a:select(2,2):clone() - y1[y1:lt(b[2])] = b[2] - local x2 = a:select(2,3):clone() - x2[x2:gt(b[3])] = b[3] - local y2 = a:select(2,4):clone() - y2[y2:gt(b[4])] = b[4] - - local w = x2-x1+1; - local h = y2-y1+1; - local inter = torch.cmul(w,h):float() - local aarea = torch.cmul((a:select(2,3)-a:select(2,1)+1) , - (a:select(2,4)-a:select(2,2)+1)):float() - local barea = (b[3]-b[1]+1) * (b[4]-b[2]+1); - - -- intersection over union overlap - local o = torch.cdiv(inter , (aarea+barea-inter)) - -- set invalid entries to 0 overlap - o[w:lt(0)] = 0 - o[h:lt(0)] = 0 - - return o -end - function DataSetPascal:getGTBoxes(i) local anno = self:getAnnotation(i) local valid_objects = {} @@ -309,113 +285,17 @@ function DataSetPascal:getGTBoxes(i) end -function DataSetPascal:attachProposals(i) - - if not self.roidb then - self:loadROIDB() - end - - local boxes = self:getROIBoxes(i) - local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i) - - local all_boxes - if anno.object then - if #valid_objects > 0 and boxes:dim() > 0 then - all_boxes = torch.cat(gt_boxes,boxes,1) - elseif boxes:dim() == 0 then - all_boxes = gt_boxes - else - all_boxes = boxes - end - else - gt_boxes = torch.IntTensor(0,4) - all_boxes = boxes - end - - local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0 - local num_gt_boxes = #gt_classes - - local rec = {} - if num_gt_boxes > 0 and num_boxes > 0 then - rec.gt = torch.cat(torch.ByteTensor(num_gt_boxes):fill(1), - torch.ByteTensor(num_boxes):fill(0) ) - elseif num_boxes > 0 then - rec.gt = torch.ByteTensor(num_boxes):fill(0) - elseif num_gt_boxes > 0 then - rec.gt = torch.ByteTensor(num_gt_boxes):fill(1) - else - rec.gt = torch.ByteTensor(0) - end - - rec.overlap_class = torch.FloatTensor(num_boxes+num_gt_boxes,self.num_classes):fill(0) - rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes,num_gt_boxes):fill(0) - for idx=1,num_gt_boxes do - local o = boxoverlap(all_boxes,gt_boxes[idx]) - local tmp = rec.overlap_class[{{},gt_classes[idx]}] -- pointer copy - tmp[tmp:lt(o)] = o[tmp:lt(o)] - rec.overlap[{{},idx}] = boxoverlap(all_boxes,gt_boxes[idx]) - end - -- get max class overlap - --rec.overlap,rec.label = rec.overlap:max(2) - --rec.overlap = torch.squeeze(rec.overlap,2) - --rec.label = torch.squeeze(rec.label,2) - --rec.label[rec.overlap:eq(0)] = 0 - - if num_gt_boxes > 0 then - rec.overlap,rec.correspondance = rec.overlap:max(2) - rec.overlap = torch.squeeze(rec.overlap,2) - rec.correspondance = torch.squeeze(rec.correspondance,2) - rec.correspondance[rec.overlap:eq(0)] = 0 - else - rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes):fill(0) - rec.correspondance = torch.LongTensor(num_boxes+num_gt_boxes):fill(0) - end - rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0) - for idx=1,(num_boxes+num_gt_boxes) do - local corr = rec.correspondance[idx] - if corr > 0 then - rec.label[idx] = self.class_to_id[anno.object[valid_objects[corr] ].name] - end - end - - rec.boxes = all_boxes - if num_gt_boxes > 0 and num_boxes > 0 then - rec.class = torch.cat(torch.CharTensor(gt_classes), - torch.CharTensor(num_boxes):fill(0)) - elseif num_boxes > 0 then - rec.class = torch.CharTensor(num_boxes):fill(0) - elseif num_gt_boxes > 0 then - rec.class = torch.CharTensor(gt_classes) - else - rec.class = torch.CharTensor(0) - end - - if self.save_objs then - rec.objects = {} - for _,idx in pairs(valid_objects) do - table.insert(rec.objects,anno.object[idx]) - end - else - rec.correspondance = nil - end - - function rec:size() - return (num_boxes+num_gt_boxes) - end - - return rec -end - function DataSetPascal:createROIs() if self.rois then return end self.rois = {} for i=1,self.num_imgs do - xlua.progress(i,self.num_imgs) table.insert(self.rois,self:attachProposals(i)) if i%500 == 0 then + xlua.progress(i,self.num_imgs) collectgarbage() end end + xlua.progress(self.num_imgs,self.num_imgs) end diff --git a/FRCNN.lua b/FRCNN.lua new file mode 100644 index 0000000..9947127 --- /dev/null +++ b/FRCNN.lua @@ -0,0 +1,185 @@ +local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes +local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped +local FRCNN = torch.class('nnf.FRCNN') +FRCNN._isFeatureProvider = true + +local argcheck = require 'argcheck' +local initcheck = argcheck{ + pack=true, + noordered=true, + {name="scale", + type="table", + default={600}, + help="image scales"}, + {name="max_size", + type="number", + default=1000, + help="maximum dimension of an image"}, + {name="inputArea", + type="number", + default=224^2, + help="input area of the bounding box"}, + {name="image_transformer", + type="nnf.ImageTransformer", + default=nnf.ImageTransformer{}, + help="Class to preprocess input images"}, +} + + +function FRCNN:__init(...) + + local opts = initcheck(...) + for k,v in pairs(opts) do self[k] = v end + + self.train = true +end + +function FRCNN:training() + self.train = true +end + +function FRCNN:evaluate() + self.train = false +end + +function FRCNN:processImages(input_imgs,do_flip) + local output_imgs = self._feat[1] + local num_images + local im + if self.train then + num_images = #input_imgs + else + num_images = #self.scale + im = self.image_transformer:preprocess(input_imgs[1]) + end + + local imgs = {} + local im_sizes = {} + local im_scales = {} + + for i=1,num_images do + local scale + if self.train then + im = input_imgs[i] + im = self.image_transformer:preprocess(im) + scale = self.scale[math.random(1,#self.scale)] + else + scale = self.scale[i] + end + local flip = do_flip and (do_flip[i] == 1) or false + if flip then + im = image.hflip(im) + end + local im_size = im[1]:size() + local im_size_min = math.min(im_size[1],im_size[2]) + local im_size_max = math.max(im_size[1],im_size[2]) + local im_scale = scale/im_size_min + if torch.round(im_scale*im_size_max) > self.max_size then + im_scale = self.max_size/im_size_max + end + local im_s = {torch.round(im_size[1]*im_scale),torch.round(im_size[2]*im_scale)} + table.insert(imgs,image.scale(im,im_s[2],im_s[1])) + table.insert(im_sizes,im_s) + table.insert(im_scales,im_scale) + end + -- create single tensor with all images, padding with zero for different sizes + im_sizes = torch.IntTensor(im_sizes) + local max_shape = im_sizes:max(1)[1] + output_imgs:resize(num_images,3,max_shape[1],max_shape[2]):zero() + for i=1,num_images do + output_imgs[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i]) + end + return im_scales,im_sizes +end + +function FRCNN:projectImageROIs(im_rois,scales,do_flip,imgs_size) + local rois = self._feat[2] + -- we consider two cases: + -- During training, the scales are sampled randomly per image, so + -- in the same image all the bboxes have the same scale, and we only + -- need to take into account the different images that are provided. + -- During testing, we consider that there is only one image at a time, + -- and the scale for each bbox is the one which makes its area closest + -- to self.inputArea + if self.train or #scales == 1 then + local total_bboxes = 0 + local cumul_bboxes = {0} + for i=1,#scales do + total_bboxes = total_bboxes + im_rois[i]:size(1) + table.insert(cumul_bboxes,total_bboxes) + end + rois:resize(total_bboxes,5) + for i=1,#scales do + local idx = {cumul_bboxes[i]+1,cumul_bboxes[i+1]} + rois[{idx,1}]:fill(i) + rois[{idx,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[i]):add(1) + if do_flip and do_flip[i] == 1 then + flipBoundingBoxes(rois[{idx,{2,5}}],imgs_size[{i,2}]) + end + end + else -- not yet tested + error('Multi-scale testing not yet tested') + local scales = torch.FloatTensor(scales) + im_rois = im_rois[1] + local widths = im_rois[{{},3}] - im_rois[{{},1}] + 1 + local heights = im_rois[{{},4}] - im_rois[{{}, 2}] + 1 + + local areas = widths * heights + local scaled_areas = areas:view(-1,1) * scales:view(1,-1):pow(2) + local diff_areas = scaled_areas:add(-1,self.inputArea):abs() -- no memory copy + local levels = select(2, diff_areas:min(2)) + + local num_boxes = im_rois:size(1) + rois:resize(num_boxes,5) + for i=1,num_boxes do + local s = levels[i] + rois[{i,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[s]):add(1) + rois[{i,1}] = s + end + end + return rois +end + +function FRCNN:getFeature(imgs,bboxes,flip) + self._feat = self._feat or {torch.FloatTensor(),torch.FloatTensor()} + + -- if it's in test mode, adapt inputs + if torch.isTensor(imgs) then + imgs = {imgs} + if type(bboxes) == 'table' then + bboxes = torch.FloatTensor(bboxes) + bboxes = bboxes:dim() == 1 and bboxes:view(1,-1) or bboxes + end + bboxes = {bboxes} + if flip == false then + flip = {0} + elseif flip == true then + flip = {1} + end + end + + local im_scales, im_sizes = self:processImages(imgs,flip) + self:projectImageROIs(bboxes,im_scales,flip,im_sizes) + + return self._feat +end + +-- do the bbox regression +function FRCNN:postProcess(im,boxes,output) + -- not implemented yet + return output,boxes +end + +function FRCNN:compute(model, inputs) + local ttype = model.output:type() -- fix when doing bbox regression + self.inputs,inputs = recursiveResizeAsCopyTyped(self.inputs,inputs,ttype) + return model:forward(self.inputs) +end + +function FRCNN:__tostring() + local str = torch.type(self) + str = str .. '\n Image scales: [' .. table.concat(self.scale,', ')..']' + str = str .. '\n Max image size: ' .. self.max_size + str = str .. '\n Input area: ' .. self.inputArea + return str +end diff --git a/ImageDetect.lua b/ImageDetect.lua new file mode 100644 index 0000000..d3140df --- /dev/null +++ b/ImageDetect.lua @@ -0,0 +1,22 @@ +local ImageDetect = torch.class('nnf.ImageDetect') +local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped + +function ImageDetect:__init(model, feat_provider) + self.model = model + self.feat_provider = feat_provider + --self.sm = nn.SoftMax():cuda() +end + +-- supposes boxes is in [x1,y1,x2,y2] format +function ImageDetect:detect(im,boxes) + local feat_provider = self.feat_provider + + local inputs = feat_provider:getFeature(im,boxes) + + local output0 = feat_provider:compute(self.model, inputs) + local output,boxes_p = feat_provider:postProcess(im,boxes,output0) + --self.sm:forward(output0) + + self.output,output = recursiveResizeAsCopyTyped(self.output,output,'torch.FloatTensor') + return self.output,boxes_p +end diff --git a/ImageTransformer.lua b/ImageTransformer.lua index d7b213b..3bdb175 100644 --- a/ImageTransformer.lua +++ b/ImageTransformer.lua @@ -37,3 +37,12 @@ function ImageTransformer:preprocess(I) return I end +function ImageTransformer:__tostring() + local str = torch.type(self) + if self.swap then + str = str .. '\n Channel swap: [' .. table.concat(self.swap,', ') .. ']' + end + str = str .. '\n Raw scale: '.. self.raw_scale + str = str .. '\n Mean pixel: [' .. table.concat(self.mean_pix,', ') .. ']' + return str +end diff --git a/RCNN.lua b/RCNN.lua index 03651d3..13b87a9 100644 --- a/RCNN.lua +++ b/RCNN.lua @@ -1,53 +1,53 @@ -local RCNN = torch.class('nnf.RCNN') +local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes -function RCNN:__init(dataset) - self.dataset = dataset - self.image_transformer = nnf.ImageTransformer{ - mean_pix={123.68/255,116.779/255,103.939/255}} - - self.crop_size = 227 - self.image_mean = nil - self.padding = 16 - self.use_square = false - -end +local argcheck = require 'argcheck' +local initcheck = argcheck{ + pack=true, + noordered=true, + {name="crop_size", + type="number", + default=227, + help="crop size"}, + {name="padding", + type="number", + default=16, + help="context padding"}, + {name="use_square", + type="boolean", + default=false, + help="force square crops"}, + {name="image_transformer", + type="nnf.ImageTransformer", + default=nnf.ImageTransformer{}, + help="Class to preprocess input images"}, + {name="max_batch_size", + type="number", + default=128, + help="maximum size of batches during evaluation"}, + {name="num_threads", + type="number", + default=8, + help="number of threads for bounding box cropping"}, + {name="iter_per_thread", + type="number", + default=8, + help="number of bbox croppings per thread"}, + {name="dataset", + type="nnf.DataSetPascal", -- change to allow other datasets + opt=true, + help="A dataset class"}, +} -function RCNN:getCrop(im_idx,bbox,flip) - -- suppose I is in BGR, as image_mean - -- [x1 y1 x2 y2] order - local flip = flip==nil and false or flip - - if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then - self.curr_im_idx = im_idx - self.curr_im_feats = self.dataset:getImage(im_idx):float() - self.curr_im_feats = self.image_transformer:preprocess(self.curr_im_feats) - if flip then - self.curr_im_feats = image.hflip(self.curr_im_feats) - end - self.curr_doflip = flip - end - - local I = self.curr_im_feats - local bbox = bbox - - if flip then - local tt = bbox[1] - bbox[1] = I:size(3)-bbox[3]+1 - bbox[3] = I:size(3)-tt +1 - end - - local crop_size = self.crop_size - local image_mean = self.image_mean - local padding = self.padding - local use_square = self.use_square +local RCNN = torch.class('nnf.RCNN') +RCNN._isFeatureProvider = true + +local function RCNNCrop(output,I,box,crop_size,padding,use_square,crop_buffer) local pad_w = 0; local pad_h = 0; local crop_width = crop_size; local crop_height = crop_size; - - --local bbox = {bbox[2],bbox[1],bbox[4],bbox[3]} - + local bbox = {box[1],box[2],box[3],box[4]} ------ if padding > 0 or use_square then local scale = crop_size/(crop_size - padding*2) @@ -98,30 +98,177 @@ function RCNN:getCrop(im_idx,bbox,flip) end -- padding > 0 || square ------ - --local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]); - local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]):float(); - local tmp = image.scale(patch,crop_width,crop_height,'bilinear'); + local patch = I[{{},{bbox[2],bbox[4]},{bbox[1],bbox[3]}}] + crop_buffer:resize(3,crop_height,crop_width) + image.scale(crop_buffer,patch,'bilinear'); + + output[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = crop_buffer - if image_mean then - tmp = tmp - image_mean[{{},{pad_h+1,pad_h+crop_height}, - {pad_w+1,pad_w+crop_width}}] +end + + +function RCNN:__init(...) + + local opts = initcheck(...) + for k,v in pairs(opts) do self[k] = v end + + self.output_size = {3,self.crop_size,self.crop_size} + self.train = true + + if self.num_threads > 1 then + local crop_size = self.crop_size + local threads = require 'threads' + threads.serialization('threads.sharedserialize') + self.donkeys = threads.Threads( + self.num_threads, + function() + require 'torch' + require 'image' + end, + function(idx) + RCNNCrop = RCNNCrop + torch.setheaptracking(true) + crop_buffer = torch.FloatTensor(3,crop_size,crop_size) + print(string.format('Starting RCNN thread with id: %d', idx)) + end + ) end +end + +function RCNN:training() + self.train = true +end + +function RCNN:evaluate() + self.train = false +end - --patch = torch.zeros(3,crop_size,crop_size):typeAs(I) - patch = torch.zeros(3,crop_size,crop_size):float() +function RCNN:getCrop(output,I,bbox) + -- [x1 y1 x2 y2] order + + local crop_size = self.crop_size + local padding = self.padding + local use_square = self.use_square - patch[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = tmp + self._crop_buffer = self._crop_buffer or torch.FloatTensor(3,crop_size,crop_size) + RCNNCrop(output,I,bbox,crop_size,padding,use_square,self._crop_buffer) - return patch + return output end -function RCNN:getFeature(im_idx,bbox,flip) +function RCNN:getFeature(im,bbox,flip) local flip = flip==nil and false or flip + + if type(im) == 'number' then + assert(self.dataset, 'you must provide a dataset if using numeric indices') + im = self.dataset:getImage(im) + end + + if torch.type(im) ~= 'torch.FloatTensor' then + -- force image to be float + self._im = self._im or torch.FloatTensor() + self._im:resize(im:size()):copy(im) + im = self._im + end + + if type(bbox) == 'table' then + bbox = torch.FloatTensor(bbox) + elseif torch.isTensor(bbox) and flip then + -- creates a copy of the bboxes to avoid modifying the original + -- bboxes in the flipping + self._bbox = self._bbox or torch.FloatTensor() + self._bbox:resize(bbox:size()):copy(bbox) + bbox = self._bbox + end - local crop_feat = self:getCrop(im_idx,bbox,flip) + im = self.image_transformer:preprocess(im) + bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox + local num_boxes = bbox:size(1) + + if flip then + im = image.hflip(im) + flipBoundingBoxes(bbox,im:size(3)) + end + + self._feat = self._feat or torch.FloatTensor() + + self._feat:resize(num_boxes,table.unpack(self.output_size)):zero() + + -- use threads to speed up bbox processing + if self.num_threads > 1 and num_boxes > self.iter_per_thread then + local feat = self._feat + local img = im + local bndbox = bbox + local crop_size = self.crop_size + local padding = self.padding + local use_square = self.use_square + local iter_per_thread = self.iter_per_thread + local num_launches = math.ceil(num_boxes/iter_per_thread) + for i=1,num_launches do + local iter_per_thread_local + if i == num_launches then + -- last thread launches the remainder of the bboxes + iter_per_thread_local = (num_boxes-1)%iter_per_thread + 1 + else + iter_per_thread_local = iter_per_thread + end + self.donkeys:addjob( + function() + for j=1,iter_per_thread_local do + local f = feat[(i-1)*iter_per_thread+j] + local boundingbox = bndbox[(i-1)*iter_per_thread+j] + -- crop_buffer is global in each thread + RCNNCrop(f,img,boundingbox,crop_size,padding,use_square,crop_buffer) + end + --collectgarbage() + return + end + ) + end + self.donkeys:synchronize() + + else + for i=1,num_boxes do + self:getCrop(self._feat[i],im,bbox[i]) + end + end - return crop_feat + return self._feat +end + +-- don't do anything. could be the bbox regression or SVM, but I won't add it here +function RCNN:postProcess(im,bbox,output) + return output,bbox end +function RCNN:compute(model,inputs) + local inputs_s = inputs:split(self.max_batch_size,1) + self.output = self.output or inputs.new() + + local ttype = model.output:type() + self.inputs = self.inputs or torch.Tensor():type(ttype) + + for idx, f in ipairs(inputs_s) do + self.inputs:resize(f:size()):copy(f) + local output0 = model:forward(self.inputs) + local fs = f:size(1) + if idx == 1 then + local ss = output0[1]:size():totable() + self.output:resize(inputs:size(1),table.unpack(ss)) + end + self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0) + end + return self.output +end + +function RCNN:__tostring() + local str = torch.type(self) + str = str .. '\n Crop size: ' .. self.crop_size + str = str .. '\n Context padding: ' .. self.padding + if self.use_square then + str = str .. '\n Use square: true' + end + return str +end diff --git a/README.md b/README.md index b1525db..eb80c08 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,209 @@ ## Object detection in torch -Implementation of some object detection frameworks in [torch](http://torch.ch). +This library aims to provide a simple architecture to easily perform object detection in [torch](http://torch.ch). +It currently contains code for training the following frameworks: [RCNN](http://arxiv.org/abs/1311.2524), [SPP](http://arxiv.org/abs/1406.4729) and [Fast-RCNN](http://arxiv.org/abs/1504.08083). + +It consists of 7 basic classes: + +* ImageTransformer: Preprocess an image before feeding it to the network +* DataSetDetection: Generic dataset class for object detection. + * DataSetPascal + * DataSetCOCO (not finished) +* [FeatureProvider](#feat_provider): Implements the necessary operations on images and bounding boxes + * [RCNN](#rcnn) + * [SPP](#spp) + * [Fast-RCNN](#frcnn) +* [BatchProvider](#batch_provider): Samples random patches + * [BatchProviderRC](#batch_provider_rc): ROI-Centric + * [BatchProviderIC](#batch_provider_ic): Image-Centric +* ImageDetect: Encapsulates a model and a feature provider to perform the detection +* Trainer: Simple class to perform the model training. +* Tester: Evaluate the detection using Pascal VOC approach. + + +### Feature Provider +The `FeatureProvider` class defines the way different algorithms process an image and a set of bounding boxes to feed it to the CNN. +It implements a `getFeature(image, boxes [,flip])` function, which computes the necessary transformations in the input data (the optional `flip` argument horizontaly flips the image and the bounding box correspondingly), a `postProcess()`, which takes the output of the network plus the original inputs and post-process them. This post-processing could be a bounding box regression step, for example. +Every Feature Provider constructor take as input a `ImageTransformer`, and a `max_batch_size` (used for evaluation). + + +#### RCNN +This is the first work that used CNNs for object detection using bounding box proposals. +The transformation is the simplest one. It crops the image at the specified positions given by the bounding boxes, and rescale them to be square. +The constructor has the following arguments: + * `crop_size` + * `padding` + * `use_square` + * `num_threads` number of parallel threads + + +#### SPP +Contrary to RCNN, SPP crops the images in the feature space (here, `conv5`). It allows to compute the convolutional features once for the entire image, making it much more efficient. +The constructor has the following arguments: + * `model` + * `pooling_scales` + * `num_feat_chns` + * `scales`: image scales + * `sz_conv_standard` + * `step_standard` + * `offset0` + * `offset` + * `inputArea` + * `use_cache` + * `cache_dir` + +SPP allows faster training/testing by caching the convolutional feature maps. You can provide to `getFeature` instead of an image `I` an image index `i` (from a `DataSetDetection` object), which will load the corresponding feature map from disk (if already computed and if `use_cache` is set to `true`). To easily cache all features of a dataset in disk, use the method `:saveConvCache()`. + + +#### Fast-RCNN +Similar to SPP, Fast-RCNN also crops the images in the feature space, but instead of keeping the convolutional layers fixed, they allow it to train together with the fully-connected layers. +The constructor has the following arguments: + * `scale` + * `max_size` + * `inputArea` + +The output of `getFeature()` is a table with two entries, the preprocessed image/images as the first element, and the projected bounding boxes. An example of a CNN model structure which can be used with Fast-RCNN is as follows: +```lua +-- define features and classifier as you wish. +-- Can use loadcaffe to read from a saved model, for example +features = torch.load('alexnet_features.t7') +classifier = torch.load('alexnet_classifier.t7') + +-- define the ROIPooling layer +-- can use either inn.ROIPooling or nnf.ROIPooling (with CPU support) +-- let's just use standard parameters from Fast-RCNN paper +local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16) + +-- create parallel model which takes as input the images and +-- bounding boxes, and pass the images through the convolutional +-- features and simply copy the bounding boxes +local prl = nn.ParallelTable() +prl:add(features) +prl:add(nn.Identity()) + +-- this is the final model +model = nn.Sequential() +model:add(prl) +model:add(ROIPooling) +model:add(nn.View(-1):setNumInputDims(3)) +model:add(classifier) +``` + + +### Batch Provider +This class implements sampling strategies for training Object Detectors. +In its constructor, it takes as argument a `DataSetDetection`, and a `FeatureProvider`. +It implements a `getBatch` function, which samples from the `DataSet` using `FeatureProvider`. +The following arguments are present for all derived classes: + * `DataSetDetection` + * `FeatureProvider` + * `batch_size` + * `fg_fraction` + * `fg_threshold` + * `bg_threshold` + * `do_flip` + + +#### BatchProviderRC +ROI-Centric Batch Provider, it samples the patches randomly over all the pool of patches. +To minimize the number of disk access, it reads the data for a specified number of batches and store it in memory. +The constructor take the following optional arguments: + * `iter_per_batch` + * `nTimesMoreData` + + +#### BatchProviderIC +Image-Centric Batch Provider, it first samples a set of images, and then a set of patches is sampled on those sampled images. +The constructor take the following optional arguments: + * `imgs_per_batch` + +### Examples +Here we show a simple example demonstrating how to perform object detection given an image and a set of bounding boxes. +Run it using `qlua` for the visualization part. A pre-trained model for Fast-RCNN can be found [here](https://drive.google.com/file/d/0B-TTdm1WNtyba3I4Vm1hbFRSS2c/view?usp=sharing). +```lua +require 'nnf' +require 'image' +require 'cudnn' +require 'inn' +require 'nn' + +-- load pre-trained Fast-RCNN model +params = torch.load('cachedir/frcnn_alexnet.t7') +loadModel = dofile 'models/frcnn_alexnet.lua' +model = loadModel(params) + +model:add(nn.SoftMax()) + +model:evaluate() +model:cuda() + +-- prepare detector +image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1}} +feat_provider = nnf.FRCNN{image_transformer=image_transformer} +feat_provider:evaluate() -- testing mode + +detector = nnf.ImageDetect(model, feat_provider) + +-- Load an image +I = image.lena() +-- generate some random bounding boxes +torch.manualSeed(500) -- fix seed for reproducibility +bboxes = torch.Tensor(100,4) +bboxes:select(2,1):random(1,I:size(3)/2) +bboxes:select(2,2):random(1,I:size(2)/2) +bboxes:select(2,3):random(I:size(3)/2+1,I:size(3)) +bboxes:select(2,4):random(I:size(2)/2+1,I:size(2)) + +-- detect ! +scores, bboxes = detector:detect(I, bboxes) + +-- visualization +dofile 'visualize_detections.lua' +threshold = 0.5 +-- classes from Pascal used for training the model +cls = {'aeroplane','bicycle','bird','boat','bottle','bus','car', + 'cat','chair','cow','diningtable','dog','horse','motorbike', + 'person','pottedplant','sheep','sofa','train','tvmonitor'} + +w = visualize_detections(I,bboxes,scores,threshold,cls) + +``` +This outputs the following + +![Lena](examples/example_frcnn_lena.jpg) + + +For an illustration on how to use this code to train a detector, or to evaluate it on Pascal, see the [examples](http://github.com/fmassa/object-detection.torch/tree/refactoring/examples). + +#### Bounding box proposals +Note that this repo doesn't contain code for generating bounding box proposals. For the moment, they are pre-computed and loaded at run time. + +#### Model definition +All the detection framework implemented here supposes that you already have a pre-trained classification network (trained for example on ImageNet). They reuse this pre-trained network as an initialization for the subsequent fine-tuning. + +In `models/` you will find the model definition for several classic networks used in object detection. + +The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0). +It is supposed to be at `data/models` +If you want to use your own model in SPP framework, make sure that it follows the pattern +``` +model = nn.Sequential() +model:add(features) +model:add(pooling_layer) +model:add(classifier) +``` +where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example. ### Dependencies It requires the following packages - - [xml](http://doc.lubyk.org/xml.html) - - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch) - - [hdf5](https://github.com/deepmind/torch-hdf5) - - [inn](https://github.com/szagoruyko/imagine-nn) + - [xml](http://doc.lubyk.org/xml.html) (For `DataSetPascal`) + - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch) (For `DataSetPascal`) + - [hdf5](https://github.com/deepmind/torch-hdf5) (for `SPP`) + - [inn](https://github.com/szagoruyko/imagine-nn) (for `SPP`) To install them all, do @@ -28,6 +222,10 @@ luarocks install matio To install `hdf5`, follow the instructions in [here](https://github.com/deepmind/torch-hdf5/blob/master/doc/usage.md) +### Old code +The old version of this repo can be found [here](https://github.com/fmassa/object-detection.torch/tree/legacy). + + ### Running this code First, clone this repo @@ -35,27 +233,5 @@ First, clone this repo git clone https://github.com/fmassa/object-detection.torch.git ``` -The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0). -It is supposed to be at `data/models`. -If you want to use your own model in SPP framework, make sure that it follows the pattern -``` -model = nn.Sequential() -model:add(features) -model:add(pooling_layer) -model:add(classifier) -``` -where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example. - -To finetune the network for detection, simply run -``` -th main.lua -``` - -To get an overview of the different parameters, do -``` -th main.lua -h -``` - The default is to consider that the dataset is present in `datasets/VOCdevkit/VOC2007/`. The default location of bounding boxes `.mat` files (in RCNN format) is supposed to be in `data/selective_search_data/`. - diff --git a/ROIPooling.lua b/ROIPooling.lua new file mode 100644 index 0000000..3ca6d82 --- /dev/null +++ b/ROIPooling.lua @@ -0,0 +1,86 @@ +local ROIPooling,parent = torch.class('nnf.ROIPooling','nn.Module') + +function ROIPooling:__init(W,H) + parent.__init(self) + self.W = W + self.H = H + self.pooler = {}--nn.SpatialAdaptiveMaxPooling(W,H) + self.spatial_scale = 1 + self.gradInput = {torch.Tensor()} +end + +function ROIPooling:setSpatialScale(scale) + self.spatial_scale = scale + return self +end + +function ROIPooling:updateOutput(input) + local data = input[1] + local rois = input[2] + + local num_rois = rois:size(1) + local s = data:size() + local ss = s:size(1) + self.output:resize(num_rois,s[ss-2],self.H,self.W) + + rois[{{},{2,5}}]:add(-1):mul(self.spatial_scale):add(1):round() + rois[{{},2}]:cmin(s[ss]) + rois[{{},3}]:cmin(s[ss-1]) + rois[{{},4}]:cmin(s[ss]) + rois[{{},5}]:cmin(s[ss-1]) + + -- element access is faster if not a cuda tensor + if rois:type() == 'torch.CudaTensor' then + self._rois = self._rois or torch.FloatTensor() + self._rois:resize(rois:size()):copy(rois) + rois = self._rois + end + + if not self._type then self._type = self.output:type() end + + if #self.pooler < num_rois then + local diff = num_rois - #self.pooler + for i=1,diff do + table.insert(self.pooler,nn.SpatialAdaptiveMaxPooling(self.W,self.H):type(self._type)) + end + end + + for i=1,num_rois do + local roi = rois[i] + local im_idx = roi[1] + local im = data[{im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}] + self.output[i] = self.pooler[i]:updateOutput(im) + end + return self.output +end + +function ROIPooling:updateGradInput(input,gradOutput) + local data = input[1] + local rois = input[2] + if rois:type() == 'torch.CudaTensor' then + rois = self._rois + end + local num_rois = rois:size(1) + local s = data:size() + local ss = s:size(1) + self.gradInput[1]:resizeAs(data):zero() + + for i=1,num_rois do + local roi = rois[i] + local im_idx = roi[1] + local r = {im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}} + local im = data[r] + local g = self.pooler[i]:updateGradInput(im,gradOutput[i]) + self.gradInput[1][r]:add(g) + end + return self.gradInput +end + +function ROIPooling:type(type) + parent.type(self,type) + for i=1,#self.pooler do + self.pooler[i]:type(type) + end + self._type = type + return self +end diff --git a/SPP.lua b/SPP.lua index cfd67a1..4456c2c 100644 --- a/SPP.lua +++ b/SPP.lua @@ -1,18 +1,89 @@ local hdf5 = require 'hdf5' +local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes local SPP = torch.class('nnf.SPP') - ---TODO vectorize code ? -function SPP:__init(dataset,model) +SPP._isFeatureProvider = true + +-- argcheck crashes with that many arguments, and using unordered +-- doesn't seems practical + +local argcheck = paths.dofile('argcheck.lua')--require 'argcheck' +local initcheck = argcheck{ + pack=true, + {name="model", + type="nn.Sequential", + help="conv5 model"}, + {name="dataset", + type="nnf.DataSetPascal", -- change to allow other datasets + opt=true, + help="A dataset class"}, + {name="pooling_scales", + type="table", + default={{1,1},{2,2},{3,3},{6,6}}, + help="pooling scales"}, + {name="num_feat_chns", + type="number", + default=256, + help="number of feature channels to be pooled"}, + {name="scales", + type="table", + default={480,576,688,874,1200}, + help="image scales"}, + {name="sz_conv_standard", + type="number", + default=13, + help=""}, + {name="step_standard", + type="number", + default=16, + help=""}, + {name="offset0", + type="number", + default=21, + help=""}, + {name="offset", + type="number", + default=6.5, + help=""}, + {name="inputArea", + type="number", + default=224^2, + help="input area"}, + {name="image_transformer", + type="nnf.ImageTransformer", + default=nnf.ImageTransformer{}, + help="Class to preprocess input images"}, + {name="use_cache", + type="boolean", + default=true, + help=""}, + {name="cachedir", + type="string", + opt=true, + help=""}, +} + + + +function SPP:__init(...) self.dataset = dataset self.model = model - self.spp_pooler = inn.SpatialPyramidPooling({{1,1},{2,2},{3,3},{6,6}}):float() - self.image_transformer = nnf.ImageTransformer{} + local opts = initcheck(...) + for k,v in pairs(opts) do self[k] = v end + + --self.num_feat_chns = 256 + --self.pooling_scales = {{1,1},{2,2},{3,3},{6,6}} + local pyr = torch.Tensor(self.pooling_scales):t() + local pooled_size = pyr[1]:dot(pyr[2]) + self.output_size = {self.num_feat_chns*pooled_size} + + --self.spp_pooler = inn.SpatialPyramidPooling(self.pooling_scales):float() + --self.image_transformer = nnf.ImageTransformer{} +--[[ -- paper=864, their code=874 self.scales = {480,576,688,874,1200} -- 874 - self.randomscale = true self.sz_conv_standard = 13 self.step_standard = 16 @@ -24,11 +95,20 @@ function SPP:__init(dataset,model) self.use_cache = true self.cachedir = nil - + --]] + self.train = true end +function SPP:training() + self.train = true +end -function SPP:getCrop(im_idx,bbox,flip) +function SPP:evaluate() + self.train = false +end + +-- here just to check +function SPP:getCrop_old(im_idx,bbox,flip) local flip = flip or false if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then @@ -36,52 +116,87 @@ function SPP:getCrop(im_idx,bbox,flip) self.curr_im_feats = self:getConv5(im_idx,flip) self.curr_doflip = flip end - - local bbox = bbox + if flip then - local tt = bbox[1] - bbox[1] = self.curr_im_feats.imSize[3]-bbox[3]+1 - bbox[3] = self.curr_im_feats.imSize[3]-tt +1 + flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3]) end local bestScale,bestBbox = self:getBestSPPScale(bbox,self.curr_im_feats.imSize,self.curr_im_feats.scales) local box_norm = self:getResposeBoxes(bestBbox) local crop_feat = self:getCroppedFeat(self.curr_im_feats.rsp[bestScale],box_norm) + + return crop_feat +end + +function SPP:getCrop(im_idx,bbox,flip) + local flip = flip or false + + if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then + self.curr_im_idx = im_idx + self.curr_im_feats = self:getConv5(im_idx,flip) + self.curr_doflip = flip + end + + if type(bbox) == 'table' then + bbox = torch.FloatTensor(bbox) + elseif torch.isTensor(bbox) and flip then + -- creates a copy of the bboxes to avoid modifying the original + -- bboxes in the flipping + self._bbox = self._bbox or torch.FloatTensor() + self._bbox:resize(bbox:size()):copy(bbox) + bbox = self._bbox + end + bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox + + if flip then + flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3]) + end + + local feat = self.curr_im_feats + local bestScale,bestbboxes,bboxes_norm,projected_bb = + self:projectBoxes(feat, bbox, feat.scales) + + local crop_feat = {} + for i=1,bbox:size(1) do + local bbox_ = projected_bb[i] + local patch = feat.rsp[bestScale[i]][{{},{bbox_[2],bbox_[4]},{bbox_[1],bbox_[3]}}] + table.insert(crop_feat,patch) + end return crop_feat end -function SPP:getFeature(im_idx,bbox,flip) +-- here just to check +function SPP:getFeature_old(im_idx,bbox,flip) local flip = flip or false - local crop_feat = self:getCrop(im_idx,bbox,flip) + local crop_feat = self:getCrop_old(im_idx,bbox,flip) local feat = self.spp_pooler:forward(crop_feat) - return feat end -local function cleaningForward(input,model) - local currentOutput = model.modules[1]:updateOutput(input) - for i=2,#model.modules do - collectgarbage() - collectgarbage() - currentOutput = model.modules[i]:updateOutput(currentOutput) - model.modules[i-1].output:resize() - model.modules[i-1].gradInput:resize() - if model.modules[i-1].gradWeight then - model.modules[i-1].gradWeight:resize() - end - if model.modules[i-1].gradBias then - model.modules[i-1].gradBias:resize() - end +function SPP:getFeature(im_idx,bbox,flip) + local flip = flip or false + + local crop_feat = self:getCrop(im_idx,bbox,flip) + + self._feat = self._feat or torch.FloatTensor() + self._feat:resize(#crop_feat,table.unpack(self.output_size)) + for i=1,#crop_feat do + self._feat[i]:copy(self.spp_pooler:forward(crop_feat[i])) end - model.output = currentOutput - return currentOutput + + return self._feat end +-- SPP is meant to keep a cache of the conv5 features +-- for fast training. In this case, we suppose that +-- we provide the image index in the dataset. +-- We can also use an image as input, in which case it +-- won't save a conv5 cache. function SPP:getConv5(im_idx,flip) local scales = self.scales local flip = flip or false @@ -93,8 +208,16 @@ function SPP:getConv5(im_idx,flip) if not cachedir then cachedir = '' end + + local im_name + if not self.dataset then + self.use_cache = false + im_name = '' + else + im_name = self.dataset.img_ids[im_idx] + end - local cachefile = paths.concat(self.cachedir,self.dataset.img_ids[im_idx]) + local cachefile = paths.concat(cachedir,im_name) if flip then cachefile = cachefile..'_flip' @@ -110,7 +233,12 @@ function SPP:getConv5(im_idx,flip) feats.rsp[tostring(i)] = nil end else - local I = self.dataset:getImage(im_idx):float() + local I + if type(im_idx) == 'number' and self.dataset then + I = self.dataset:getImage(im_idx):float() + elseif torch.isTensor(im_idx) then + I = im_idx + end I = self.image_transformer:preprocess(I) if flip then I = image.hflip(I) @@ -129,7 +257,6 @@ function SPP:getConv5(im_idx,flip) local Ir = image.scale(I,sc,sr):type(mtype) local f = self.model:forward(Ir) - --local f = cleaningForward(Ir,self.model) feats.rsp[i] = torch.FloatTensor(f:size()):copy(f) end @@ -180,7 +307,8 @@ function SPP:getBestSPPScale(bbox,imSize,scales) local bestScale - if self.randomscale then + if self.train then + -- in training, select the scales randomly bestScale = torch.random(1,num_scales) else local inputArea = self.inputArea @@ -253,6 +381,141 @@ function SPP:getCroppedFeat(feat,bbox) end + + +local function unique(bboxes) + local idx = {} + local is_unique = torch.ones(bboxes:size(1)) + for i=1,bboxes:size(1) do + local b = bboxes[i] + local n = b[1]..'_'..b[2]..'_'..b[3]..'_'..b[4]..'_'..b[5] + if idx[n] then + is_unique[i] = 0 + else + idx[n] = i + end + end + return is_unique +end + +-- given a table with the conv5 features at different scales and bboxes in +-- the original image, project the bboxes in the conv5 space +function SPP:projectBoxes(feat, bboxes, scales) + -- bboxes is a nx4 Tensor with candidate bounding boxes + -- in [x1, y1, x2, y2] format + local imSize = feat.imSize + + local scales = scales or self.scales + local min_dim = math.min(imSize[2],imSize[3]) + + local sz_conv_standard = self.sz_conv_standard + local step_standard = self.step_standard + + local nboxes = bboxes:size(1) + + -- get best SPP scale + local bestScale = torch.FloatTensor(nboxes) + + if self.train then + -- in training, select the scales randomly + bestScale:random(1,#scales) + else + local bboxArea = boxes.new():resize(nboxes):zero() + bboxArea:map2(bboxes[{{},3}],bboxes[{{},1}],function(xx,xx2,xx1) return xx2-xx1+1 end) + bboxArea:map2(bboxes[{{},4}],bboxes[{{},2}],function(xx,xx2,xx1) return xx*(xx2-xx1+1) end) + + local expected_scale = bboxArea:float():pow(-0.5):mul(sz_conv_standard*step_standard*min_dim) + expected_scale:round() + + local nbboxDiffArea = torch.FloatTensor(#scales,nboxes) + + for i=1,#scales do + nbboxDiffArea[i]:copy(expected_scale):add(-scales[i]):abs() + end + + bestScale = select(2,nbboxDiffArea:min(1))[1] + end + + local mul_factor = torch.FloatTensor(nboxes,1):copy(bestScale) + local idx = 0 + mul_factor:apply(function(x) + idx = idx + 1 + return (scales[x]-1)/(min_dim-1) + end) + + local bestbboxes = torch.FloatTensor(nboxes,4):copy(bboxes) + bestbboxes:add(-1):cmul(mul_factor:expand(nboxes,4)):add(1) + + -- response boxes + + local offset0 = self.offset0 + local offset = self.offset + + local bboxes_norm = bestbboxes:clone() + bboxes_norm[{{},{1,2}}]:add(-offset0 + offset):div(step_standard):add( 0.5) + bboxes_norm[{{},{1,2}}]:floor():add(1) + bboxes_norm[{{},{3,4}}]:add(-offset0 - offset):div(step_standard):add(-0.5) + bboxes_norm[{{},{3,4}}]:ceil():add(1) + + local x0gtx1 = bboxes_norm[{{},1}]:gt(bboxes_norm[{{},3}]) + local y0gty1 = bboxes_norm[{{},2}]:gt(bboxes_norm[{{},4}]) + + bboxes_norm[{{},1}][x0gtx1] = bboxes_norm[{{},1}][x0gtx1]:add(bboxes_norm[{{},3}][x0gtx1]):div(2) + bboxes_norm[{{},3}][x0gtx1] = (bboxes_norm[{{},1}][x0gtx1]) + + bboxes_norm[{{},2}][y0gty1] = bboxes_norm[{{},2}][y0gty1]:add(bboxes_norm[{{},4}][y0gty1]):div(2) + bboxes_norm[{{},4}][y0gty1] = (bboxes_norm[{{},2}][y0gty1]) + + -- remove repeated projections + if self.dedup then + local is_unique = unique(torch.cat(bboxes_norm,bestScale:view(-1,1),2)) + local lin = torch.range(1,is_unique:size(1)):long() -- can also use cumsum instead + bboxes_norm = bboxes_norm:index(1,lin[is_unique]) + end + -- clamp on boundaries + + local projected_bb = bboxes_norm:clone() + + for i=1,#scales do + local this_scale = bestScale:eq(i) + if this_scale:numel() > 0 then + projected_bb[{{},2}][this_scale] = projected_bb[{{},2}][this_scale]:clamp(1,feat.rsp[i]:size(2)) + projected_bb[{{},4}][this_scale] = projected_bb[{{},4}][this_scale]:clamp(1,feat.rsp[i]:size(2)) + projected_bb[{{},1}][this_scale] = projected_bb[{{},1}][this_scale]:clamp(1,feat.rsp[i]:size(3)) + projected_bb[{{},3}][this_scale] = projected_bb[{{},3}][this_scale]:clamp(1,feat.rsp[i]:size(3)) + end + end + + --projected_bb:floor() + return bestScale,bestbboxes,bboxes_norm,projected_bb +end + +-- don't do anything. could be the bbox regression or SVM, but I won't add it here +function SPP:postProcess(im,bbox,output) + return output,bbox +end + +function SPP:compute(model,inputs) + local inputs_s = inputs:split(self.max_batch_size,1) + + self.output = self.output or inputs.new() + + local ttype = model.output:type() + self.inputs = self.inputs or torch.Tensor():type(ttype) + + for idx, f in ipairs(inputs_s) do + self.inputs:resize(f:size()):copy(f) + local output0 = model:forward(self.inputs) + local fs = f:size(1) + if idx == 1 then + local ss = output0[1]:size():totable() + self.output:resize(inputs:size(1),table.unpack(ss)) + end + self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0) + end + return self.output +end + function SPP:type(t_type) self._type = t_type --self.spp_pooler = self.spp_pooler:type(t_type) @@ -270,3 +533,38 @@ end function SPP:cuda() return self:type('torch.CudaTensor') end + +function SPP:saveConvCache() + assert(self.dataset, 'need to set a dataset to save the cache') + assert(self.use_cache, 'use_cache need to be true') + assert(self.cachedir, 'cachedir need to be set') + + local dataset = self.dataset + + print('Caching features for '..dataset.dataset_name..' ' + ..dataset.image_set) + local feat_cachedir = self.cachedir + for i=1,dataset:size() do + xlua.progress(i,dataset:size()) + local im_name = dataset.img_ids[i] + local cachefile = paths.concat(feat_cachedir,im_name) + if not paths.filep(cachefile..'.h5') then + local f = self:getConv5(i) + end + if not paths.filep(cachefile..'_flip.h5') then + local f = self:getConv5(i,true) + end + if i%50 == 0 then + collectgarbage() + collectgarbage() + end + end +end + +function SPP:__tostring() + local str = torch.type(self) + str = str .. '\n Image scales: [' .. table.concat(self.scales,', ')..']' + str = str .. '\n Input area: ' .. self.inputArea + return str +end + diff --git a/SVMTrainer.lua b/SVMTrainer.lua index 6f857b1..61f6597 100644 --- a/SVMTrainer.lua +++ b/SVMTrainer.lua @@ -1,7 +1,7 @@ local SVMTrainer = torch.class('nnf.SVMTrainer') function SVMTrainer:__init(module,feat_provider) - self.dataset = feat_provider.dataset + --self.dataset = dataset self.module = module self.feat_provider = feat_provider @@ -21,58 +21,54 @@ function SVMTrainer:__init(module,feat_provider) self.evict_thresh = -1.2 self.hard_thresh = -1.0001 - self.pos_feat_type = 'mixed' -- real, mixed, synthetic + self.pos_feat_type = 'real' -- real, mixed, synthetic self.synth_neg = true - self:getFeatureStats() + --self:getFeatureStats() end -function SVMTrainer:getFeatureStats(feat_provider,module) +function SVMTrainer:getFeatureStats(dataset,feat_provider,module) - if true then - self.mean_norm = 30.578503376687 + if false then + self.mean_norm = 19.848824140978--30.578503376687 return end local feat_provider = feat_provider or self.feat_provider local module = module or self.module - local dataset = feat_provider.dataset + local dataset = dataset local boxes_per_image = 200 local num_images = math.min(dataset:size(),200) local valid_idx = torch.randperm(dataset:size()) valid_idx = valid_idx[{{1,num_images}}] - - local fc5_feat = torch.FloatTensor() - local fc7_feat = torch.FloatTensor() local feat_cumsum = 0 local feat_n = 0 + local bboxes = torch.IntTensor(boxes_per_image,4) print('Getting feature stats') for i=1,num_images do xlua.progress(i,num_images) local img_idx = valid_idx[i] + local I = dataset:getImage(img_idx) local rec = dataset:attachProposals(img_idx) local num_bbox = math.min(boxes_per_image,rec:size()) - fc5_feat:resize(num_bbox,unpack(self.feat_dim)) - fc7_feat:resize(num_bbox,4096) - - local bbox_idx = torch.randperm(rec:size()) + local bbox_idx = torch.randperm(rec:size()):long() bbox_idx = bbox_idx[{{1,num_bbox}}] - for j=1,num_bbox do - local bbox_id = bbox_idx[j] - fc5_feat[j] = feat_provider:getFeature(img_idx,rec.boxes[bbox_id]) - end - fc7_feat:copy(module:forward(fc5_feat:cuda())) - feat_n = feat_n + num_bbox - feat_cumsum = feat_cumsum + fc7_feat:pow(2):sum(2):sqrt():sum() + bboxes:index(rec.boxes,1,bbox_idx) + + local feat = feat_provider:getFeature(I,bboxes) + local final_feat = feat_provider:compute(module, feat) + + feat_n = feat_n + num_bbox + feat_cumsum = feat_cumsum + final_feat:pow(2):sum(2):sqrt():sum() end self.mean_norm = feat_cumsum/feat_n end @@ -82,10 +78,10 @@ function SVMTrainer:scaleFeatures(feat) feat:mul(target_norm/self.mean_norm) end -function SVMTrainer:getPositiveFeatures(feat_provider,module) +function SVMTrainer:getPositiveFeatures(dataset,feat_provider,module) local feat_provider = feat_provider or self.feat_provider local module = module or self.module - local dataset = feat_provider.dataset + local dataset = dataset module:evaluate() local positive_data = {} for cl_idx,cl_name in pairs(dataset.classes) do @@ -98,6 +94,11 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module) local not_done = torch.ByteTensor(dataset.num_classes):fill(1) for i=1,end_idx do xlua.progress(i,end_idx) + local I = dataset:getImage(i) + --local gt_boxes, gt_classes = dataset:getGTBoxes(i) + + + local rec = dataset:attachProposals(i) local overlap = rec.overlap_class local is_gt = rec.gt @@ -111,7 +112,10 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module) for j=1,rec:size() do if overlap[j][cl_idx]==1 and is_gt[j]==1 then count = count + 1 - fc5_feat[count] = feat_provider:getFeature(i,rec.boxes[j]) + local fff = feat_provider:getFeature(I,rec.boxes[j])[1] + --print(fff:size()) + --print(fc5_feat:size()) + fc5_feat[count] = fff end end if num_pos > 0 then @@ -133,15 +137,16 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module) return positive_data end -function SVMTrainer:sampleNegativeFeatures(ind,feat_provider,module) +function SVMTrainer:sampleNegativeFeatures(ind,dataset,feat_provider,module) local feat_provider = feat_provider or self.feat_provider - local dataset = feat_provider.dataset + local dataset = dataset local module = module or self.module module:evaluate() collectgarbage() local first_time = self.first_time + local I = dataset:getImage(ind) local rec = dataset:attachProposals(ind) local overlap = rec.overlap_class @@ -154,11 +159,9 @@ collectgarbage() caches[cl_name] = {X_neg = {},num_added = 0} end - fc5_feat:resize(rec:size(),unpack(self.feat_dim)) - for j=1,rec:size() do - fc5_feat[j] = feat_provider:getFeature(ind,rec.boxes[j]) - end - fc7_feat:resize(rec:size(),4096):copy(module:forward(fc5_feat:cuda())) + local feat = feat_provider:getFeature(I,rec.boxes) + local fc7_feat = feat_provider:compute(module, feat) + self:scaleFeatures(fc7_feat) if first_time then @@ -264,16 +267,16 @@ function SVMTrainer:addPositiveFeatures(feat_provider,module) end -function SVMTrainer:train() - local dataset = self.dataset +function SVMTrainer:train(dataset) + --local dataset = self.dataset - print('Experiment name: '..self.expname) + --print('Experiment name: '..self.expname) self.W = torch.Tensor(dataset.num_classes,4096) self.B = torch.Tensor(dataset.num_classes) --self:selectPositiveFeatures() - self:addPositiveFeatures() + --self:addPositiveFeatures() local caches = {} for cl_idx,cl_name in pairs(dataset.classes) do @@ -313,7 +316,7 @@ function SVMTrainer:train() X = self:sampleNegativeFeatures(i-num_synth) end else - X = self:sampleNegativeFeatures(i) + X = self:sampleNegativeFeatures(i,dataset) end for cl_idx,cl_name in pairs(dataset.classes) do @@ -396,7 +399,7 @@ function SVMTrainer:train() end first_time = false end - torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B}) + --torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B}) return caches--X_all end diff --git a/Tester.lua b/Tester.lua index 4c84ace..5ff2bc1 100644 --- a/Tester.lua +++ b/Tester.lua @@ -6,14 +6,11 @@ local VOCevaldet = utils.VOCevaldet local Tester = torch.class('nnf.Tester') -function Tester:__init(module,feat_provider) - self.dataset = feat_provider.dataset - self.module = module +function Tester:__init(module,feat_provider,dataset) + self.dataset = dataset self.feat_provider = feat_provider + self.module = module - self.feat_dim = {256*50} - self.max_batch_size = 4000 - self.cachefolder = nil self.cachename = nil self.suffix = '' @@ -58,30 +55,44 @@ function Tester:validate(criterion) return err/num_batches end +local function print_scores(dataset,res) + print('Results:') + -- print class names + io.write('|') + for i = 1, dataset.num_classes do + io.write(('%5s|'):format(dataset.classes[i])) + end + io.write('\n|') + -- print class scores + for i = 1, dataset.num_classes do + local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i] + local l = res[i] == res[i] and l-5 or l-3 + if l > 0 then + io.write(('%.3f%'..l..'s|'):format(res[i],' ')) + else + io.write(('%.3f|'):format(res[i])) + end + end + io.write('\n') + io.write(('mAP: %.4f\n'):format(res:mean(1)[1])) +end + + function Tester:test(iteration) local dataset = self.dataset local module = self.module local feat_provider = self.feat_provider - local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration) - paths.mkdir(pathfolder) - module:evaluate() + feat_provider:evaluate() dataset:loadROIDB() - local feats = torch.FloatTensor() - local feats_batched = {} - local feats_cuda = torch.CudaTensor() - - local output = torch.FloatTensor() - - local output_dim = module:get(module:size()) - - local softmax = nn.SoftMax():float() - + local detec = nnf.ImageDetect(module, feat_provider) local boxes - -- + local im + local output + local aboxes = {} for i=1,dataset.num_classes do table.insert(aboxes,{}) @@ -89,50 +100,41 @@ function Tester:test(iteration) local max_per_set = 5*dataset:size() local max_per_image = 100 - local thresh = torch.ones(dataset.num_classes):mul(-1.5) + local thresh = torch.ones(dataset.num_classes):mul(0.05) local scored_boxes = torch.FloatTensor() local timer = torch.Timer() local timer2 = torch.Timer() local timer3 = torch.Timer() - + + -- SPP is more efficient if we cache the features. We treat it differently then + -- the other feature providers + local pass_index = torch.type(feat_provider) == 'nnf.SPP' and true or false + for i=1,dataset:size() do timer:reset() io.write(('test: (%s) %5d/%-5d '):format(dataset.dataset_name,i,dataset:size())); - boxes = dataset:getROIBoxes(i):float() - local num_boxes = boxes:size(1) - -- compute image feature maps - timer3:reset() - feats:resize(num_boxes,unpack(self.feat_dim)) - for idx=1,num_boxes do - feats[idx] = feat_provider:getFeature(i,boxes[idx]) + + if pass_index then + im = i + else + im = dataset:getImage(i) end - local tt = timer3:time().real - -- compute classification scores - torch.split(feats_batched,feats,self.max_batch_size,1) + boxes = dataset:getROIBoxes(i):float() + timer3:reset() - for idx,f in ipairs(feats_batched) do - local fs = f:size(1) - feats_cuda:resize(fs,unpack(self.feat_dim)):copy(f) - module:forward(feats_cuda) - if idx == 1 then - local out_size = module.output:size():totable() - table.remove(out_size,1) - output:resize(num_boxes,unpack(out_size)) - end - output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(module.output) - end - local add_bg = 0 - if dataset.num_classes ~= output:size(2) then -- if there is no svm - output = softmax:forward(output) - add_bg = 1 - end - + output,boxes = detec:detect(im,boxes) + + local add_bg = 1 + local tt = 0 local tt2 = timer3:time().real timer2:reset() + -- do a NMS for each class, based on the scores from the classifier for j=1,dataset.num_classes do local scores = output:select(2,j+add_bg) + -- only select detections with a score greater than thresh + -- this avoid doing NMS on too many bboxes with low score local idx = torch.range(1,scores:numel()):long() local idx2 = scores:gt(thresh[j]) idx = idx[idx2] @@ -151,6 +153,7 @@ function Tester:test(iteration) aboxes[j][i] = torch.FloatTensor() end + -- remove low scoring boxes and update threshold if i%1000 == 0 then aboxes[j],thresh[j] = keep_top_k(aboxes[j],max_per_set) end @@ -158,10 +161,11 @@ function Tester:test(iteration) end io.write((' prepare feat time: %.3f, forward time: %.3f, select time: %.3fs, total time: %.3fs\n'):format(tt,tt2,timer2:time().real,timer:time().real)); - --collectgarbage() - --mattorch.save(paths.concat(pathfolder,dataset.img_ids[i]..'.mat'),output:double()) end + local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration) + paths.mkdir(pathfolder) + for i = 1,dataset.num_classes do -- go back through and prune out detections below the found threshold for j = 1,dataset:size() do @@ -174,10 +178,14 @@ function Tester:test(iteration) end end end - save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'.. - dataset.dataset_name..self.suffix) - torch.save(save_file, aboxes) + --save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'.. + -- dataset.dataset_name..self.suffix) + --torch.save(save_file, aboxes) end + save_file = paths.concat(pathfolder, 'boxes_'.. + dataset.dataset_name..self.suffix) + torch.save(save_file, aboxes) + local res = {} for i=1,dataset.num_classes do @@ -185,27 +193,11 @@ function Tester:test(iteration) res[i] = VOCevaldet(dataset,aboxes[i],cls) end res = torch.Tensor(res) - print('Results:') - -- print class names - io.write('|') - for i = 1, dataset.num_classes do - io.write(('%5s|'):format(dataset.classes[i])) - end - io.write('\n|') - -- print class scores - for i = 1, dataset.num_classes do - local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i] - local l = res[i] == res[i] and l-5 or l-3 - if l > 0 then - io.write(('%.3f%'..l..'s|'):format(res[i],' ')) - else - io.write(('%.3f|'):format(res[i])) - end - end - io.write('\n') - io.write(('mAP: %.4f\n'):format(res:mean(1)[1])) + + print_scores(dataset,res) -- clean roidb to free memory dataset.roidb = nil return res end + diff --git a/Trainer.lua b/Trainer.lua index 180b1eb..8ac9c47 100644 --- a/Trainer.lua +++ b/Trainer.lua @@ -1,18 +1,22 @@ require 'nn' require 'optim' require 'xlua' +local utils = paths.dofile('utils.lua') +local recursiveResizeAsCopyTyped = utils.recursiveResizeAsCopyTyped local Trainer = torch.class('nnf.Trainer') -function Trainer:__init(module,criterion) +function Trainer:__init(module,criterion,batch_provider,optimState) self.module = module self.criterion = criterion + self.batch_provider = batch_provider self.parameters,self.gradParameters = self.module:getParameters() - self.optimState = {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9, - learningRateDecay = 0} + self.optimState = optimState or + {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9, + learningRateDecay = 0, dampening = 0} self.epoch = 0 @@ -22,40 +26,39 @@ function Trainer:__init(module,criterion) end +function Trainer:train(maxIter) + local maxIter = maxIter or 20 + local ttype = self.parameters:type() -function Trainer:train(inputs,targets) - -- only for batches - assert(targets:dim()>2,'Trainer is only for batches') - self.module:training() - self._input = self._input or torch.CudaTensor() - self._target = self._target or torch.CudaTensor() local module = self.module + local batch_provider = self.batch_provider local parameters = self.parameters local gradParameters = self.gradParameters local criterion = self.criterion local optimState = self.optimState - local batchSize = inputs:size(2) - local maxIter = inputs:size(1) - if self.confusion then self.confusion:zero() end local err = 0 - self._input:resize(inputs[1]:size()) - self._target:resize(targets[1]:size()) - local input = self._input - local target = self._target - + local input + local target + for t=1,maxIter do xlua.progress(t,maxIter) - input:copy(inputs[t]) - target:copy(targets[t]) + -- get training batch + self.input0,self.target0 = batch_provider:getBatch() + + -- copy to ttype + self.input,self.input0 = recursiveResizeAsCopyTyped(self.input,self.input0,ttype) + self.target,self.target0 = recursiveResizeAsCopyTyped(self.target,self.target0,ttype) + input = self.input + target = self.target local feval = function(x) if x ~= parameters then @@ -70,11 +73,6 @@ function Trainer:train(inputs,targets) module:backward(input,df_do) - if self.normalize then - gradParameters:div(batchSize) - f = f/batchSize - end - if self.confusion then self.confusion:batchAdd(outputs,target) end @@ -88,6 +86,6 @@ function Trainer:train(inputs,targets) table.insert(self.fx,err/maxIter) - self.module:evaluate() + --self.module:evaluate() self.epoch = self.epoch + 1 end diff --git a/argcheck.lua b/argcheck.lua new file mode 100644 index 0000000..2ce4e3b --- /dev/null +++ b/argcheck.lua @@ -0,0 +1,73 @@ +local usage = require 'argcheck.usage' +local env = require 'argcheck.env' +-------------------------------------------------------------------------------- +-- Simple argument function with a similar interface to argcheck, but which +-- supports lots of default arguments for named rules. +-- Not as fast and elegant though. +-------------------------------------------------------------------------------- +local function argcheck(rules) + -- basic checks + assert(not (rules.noordered and rules.nonamed), 'rules must be at least ordered or named') + assert(rules.help == nil or type(rules.help) == 'string', 'rules help must be a string or nil') + assert(rules.doc == nil or type(rules.doc) == 'string', 'rules doc must be a string or nil') + assert(not rules.overload, 'rules overload not supported') + assert(not (rules.doc and rules.help), 'choose between doc or help, not both') + for _, rule in ipairs(rules) do + assert(rule.name, 'rule must have a name field') + assert(rule.type == nil or type(rule.type) == 'string', 'rule type must be a string or nil') + assert(rule.help == nil or type(rule.help) == 'string', 'rule help must be a string or nil') + assert(rule.doc == nil or type(rule.doc) == 'string', 'rule doc must be a string or nil') + assert(rule.check == nil or type(rule.check) == 'function', 'rule check must be a function or nil') + --assert(rule.defaulta == nil or type(rule.defaulta) == 'string', 'rule defaulta must be a string or nil') + --assert(rule.defaultf == nil or type(rule.defaultf) == 'function', 'rule defaultf must be a function or nil') + end + + if not (rules.pack == nil or rules.pack) then + error('pack need to be true') + end + if rules.nonamed then + error('only named arguments') + end + + local arginfo = {} + for k,v in ipairs(rules) do + arginfo[v.name] = k + end + + local function func(args) + + local iargs = {} + for _,rule in ipairs(rules) do + iargs[rule.name] = rule.default + if rule.default == nil and + args[rule.name] == nil and + rule.opt ~= true then + print(usage(rules)) + error('Missing argument: '..rule.name) + end + end + + for k,v in pairs(args) do + if not env.istype(v,rules[arginfo[k]].type) then + print(usage(rules)) + error('Wrong type: '.. k) + end + + if rules[arginfo[k]].check then + local c = rules[arginfo[k]].check(args[k]) + if not c then + print(usage(rules)) + error('check did not pass') + end + end + iargs[k] = args[k] + end + + return iargs + end + + return func + +end + +return argcheck diff --git a/config.lua b/config.lua new file mode 100644 index 0000000..0e0ea08 --- /dev/null +++ b/config.lua @@ -0,0 +1,112 @@ +require 'nnf' + +local configs = {} + +local image_transformer_params = { + mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1} +} + +configs.image_transformer_params = image_transformer_params + +configs.datasetDir = 'datasets/VOCdevkit' +configs.roidbDir = 'data/selective_search_data' + +-------------------------------------------------------------------------------- +-- Training Parameters +-------------------------------------------------------------------------------- + +local train_params = { + batch_size = 16,--128, + fg_fraction = 0.25, + fg_threshold = 0.5, + bg_threshold = {0.0,0.5}, + do_flip = true, +} + +configs.train_params = train_params + +-------------------------------------------------------------------------------- +-- Feature Provider Parameters +-------------------------------------------------------------------------------- + +configs.algo = {} + +-------------------------------------------------------------------------------- +-- RCNN +-------------------------------------------------------------------------------- + +local fp_params = { + crop_size = 227, + padding = 16, + use_square = false, +} +local bp_params = { + iter_per_batch = 100, + nTimesMoreData = 10, +} + +local RCNN = { + fp_params=fp_params, + bp_params=bp_params, + bp = nnf.BatchProviderRC +} + +configs.algo.RCNN = RCNN + +-------------------------------------------------------------------------------- +-- SPP +-------------------------------------------------------------------------------- +-- +local num_chns = 256 +local pooling_scales = {{1,1},{2,2},{3,3},{6,6}} +local pyr = torch.Tensor(pooling_scales):t() +local pooled_size = pyr[1]:dot(pyr[2]) +local feat_dim = {num_chns*pooled_size} + +local fp_params = { + scales = {480,576,688,874,1200}, + sz_conv_standard = 13, + step_standard = 16, + offset0 = 21, + offset = 6.5, + inputArea = 224^2, + pooling_scales = pooling_scales, + num_feat_chns = num_chns, +} +local bp_params = { + iter_per_batch = 500, + nTimesMoreData = 10, +} + +local SPP = { + fp_params=fp_params, + bp_params=bp_params, + bp = nnf.BatchProviderRC +} + +configs.algo.SPP = SPP + +-------------------------------------------------------------------------------- +-- Fast-RCNN +-------------------------------------------------------------------------------- + +local fp_params = { + scale = {600}, + max_size = 1000, +} +local bp_params = { + imgs_per_batch = 2, +} + +local FRCNN = { + fp_params=fp_params, + bp_params=bp_params, + bp = nnf.BatchProviderIC +} + +configs.algo.FRCNN = FRCNN + + +return configs diff --git a/data.lua b/data.lua index 655deb5..59d3284 100644 --- a/data.lua +++ b/data.lua @@ -1,145 +1,65 @@ -------------------------------------------------------------------------------- -- Prepare data model -------------------------------------------------------------------------------- -paths.mkdir(opt.save) -trainCache = paths.concat(opt.save_base,'trainCache.t7') -testCache = paths.concat(opt.save_base,'testCache.t7') +local trainCache = paths.concat(rundir,'trainCache.t7') +--testCache = paths.concat(opt.save_base,'testCache.t7') -local pooler -local feat_dim +local config = paths.dofile('config.lua') -if opt.algo == 'SPP' then - local conv_list = features:findModules(opt.backend..'.SpatialConvolution') - local num_chns = conv_list[#conv_list].nOutputPlane - pooler = model:get(2):clone():float() - local pyr = torch.Tensor(pooler.pyr):t() - local pooled_size = pyr[1]:dot(pyr[2]) - feat_dim = {num_chns*pooled_size} -elseif opt.algo == 'RCNN' then - feat_dim = {3,227,227} +image_transformer = nnf.ImageTransformer(config.image_transformer_params) + +local FP = nnf[opt.algo] +local fp_params = config.algo[opt.algo].fp_params +local bp_params = config.algo[opt.algo].bp_params +local BP = config.algo[opt.algo].bp + +local train_params = config.train_params + +-- add common parameters +fp_params.image_transformer = image_transformer +for k,v in pairs(train_params) do + bp_params[k] = v end -image_transformer = nnf.ImageTransformer{mean_pix=image_mean} +------------------------------------------------------------------------------- +-- Create structures +-------------------------------------------------------------------------------- + +ds_train = nnf.DataSetPascal{ + image_set='trainval', + year=2007,--opt.year, + datadir=config.datasetDir, + roidbdir=config.roidbDir +} + +feat_provider = FP(fp_params) +feat_provider:training() + +bp_params.dataset = ds_train +bp_params.feat_provider = feat_provider +batch_provider = BP(bp_params) if paths.filep(trainCache) then print('Loading train metadata from cache') - batch_provider = torch.load(trainCache) - feat_provider = batch_provider.feat_provider - ds_train = feat_provider.dataset - feat_provider.model = features + local metadata = torch.load(trainCache) + batch_provider.bboxes = metadata else - ds_train = nnf.DataSetPascal{image_set='trainval',classes=classes,year=opt.year, - datadir=opt.datadir,roidbdir=opt.roidbdir} - - if opt.algo == 'SPP' then - feat_provider = nnf.SPP(ds_train)-- remove features here to reduce cache size - feat_provider.cachedir = paths.concat(opt.cache,'features',opt.netType) - feat_provider.randomscale = true - feat_provider.scales = {600} - feat_provider.spp_pooler = pooler:clone() - feat_provider.image_transformer = image_transformer - elseif opt.algo == 'RCNN' then - feat_provider = nnf.RCNN(ds_train) - feat_provider.crop_size = feat_dim[2] - feat_provider.image_transformer = image_transformer - else - error(("Detection framework '%s' not available"):format(opt.algo)) - end - - print('==> Preparing BatchProvider for training') - batch_provider = nnf.BatchProvider(feat_provider) - batch_provider.iter_per_batch = opt.ipb - batch_provider.nTimesMoreData = opt.ntmd - batch_provider.fg_fraction = opt.fg_frac - batch_provider.bg_threshold = {0.0,0.5} - batch_provider.do_flip = true - batch_provider.batch_dim = feat_dim batch_provider:setupData() - - torch.save(trainCache,batch_provider) - feat_provider.model = features + torch.save(trainCache, batch_provider.bboxes) end -if paths.filep(testCache) then - print('Loading test metadata from cache') - batch_provider_test = torch.load(testCache) - feat_provider_test = batch_provider_test.feat_provider - ds_test = feat_provider_test.dataset - feat_provider_test.model = features -else - ds_test = nnf.DataSetPascal{image_set='test',classes=classes,year=opt.year, - datadir=opt.datadir,roidbdir=opt.roidbdir} - if opt.algo == 'SPP' then - feat_provider_test = nnf.SPP(ds_test) - feat_provider_test.randomscale = false - feat_provider_test.cachedir = paths.concat(opt.cache,'features',opt.netType) - feat_provider_test.scales = {600} - feat_provider_test.spp_pooler = pooler:clone() - feat_provider_test.image_transformer = image_transformer - elseif opt.algo == 'RCNN' then - feat_provider_test = nnf.RCNN(ds_test) - feat_provider_test.crop_size = feat_dim[2] - feat_provider_test.image_transformer = image_transformer - else - error(("Detection framework '%s' not available"):format(opt.algo)) - end - - print('==> Preparing BatchProvider for validation') - batch_provider_test = nnf.BatchProvider(feat_provider_test) - batch_provider_test.iter_per_batch = 500--opt.ipb - batch_provider_test.nTimesMoreData = 10--opt.ntmd - batch_provider_test.fg_fraction = opt.fg_frac - batch_provider_test.bg_threshold = {0.0,0.5} - batch_provider_test.do_flip = false - batch_provider_test.batch_dim = feat_dim - batch_provider_test:setupData() - - torch.save(testCache,batch_provider_test) - feat_provider_test.model = features -end - --------------------------------------------------------------------------------- --- Compute conv5 feature cache (for SPP) --------------------------------------------------------------------------------- -if opt.algo == 'SPP' then - print('Preparing conv5 features for '..ds_train.dataset_name..' ' - ..ds_train.image_set) - local feat_cachedir = feat_provider.cachedir - for i=1,ds_train:size() do - xlua.progress(i,ds_train:size()) - local im_name = ds_train.img_ids[i] - local cachefile = paths.concat(feat_cachedir,im_name) - if not paths.filep(cachefile..'.h5') then - local f = feat_provider:getConv5(i) - end - if not paths.filep(cachefile..'_flip.h5') then - local f = feat_provider:getConv5(i,true) - end - if i%50 == 0 then - collectgarbage() - collectgarbage() - end - end - - print('Preparing conv5 features for '..ds_test.dataset_name..' ' - ..ds_test.image_set) - local feat_cachedir = feat_provider_test.cachedir - for i=1,ds_test:size() do - xlua.progress(i,ds_test:size()) - local im_name = ds_test.img_ids[i] - local cachefile = paths.concat(feat_cachedir,im_name) - if not paths.filep(cachefile..'.h5') then - local f = feat_provider_test:getConv5(i) - end - if i%50 == 0 then - collectgarbage() - collectgarbage() - end - end -end +-- test +ds_test = nnf.DataSetPascal{ + image_set='test', + year=2007,--opt.year, + datadir=config.datasetDir, + roidbdir=config.roidbDir +} -features = nil -model = nil +-- only needed because of SPP +-- could be the same as the one for training +--feat_provider_test = FP(fp_params) +--feat_provider_test:evaluate() collectgarbage() diff --git a/examples/example_frcnn_lena.jpg b/examples/example_frcnn_lena.jpg new file mode 100644 index 0000000..e1919fa Binary files /dev/null and b/examples/example_frcnn_lena.jpg differ diff --git a/examples/train_test_rcnn.lua b/examples/train_test_rcnn.lua new file mode 100644 index 0000000..7701ad6 --- /dev/null +++ b/examples/train_test_rcnn.lua @@ -0,0 +1,190 @@ +require 'nnf' + +cmd = torch.CmdLine() +cmd:text('Example on how to train/test a RCNN based object detector on Pascal') +cmd:text('') +cmd:text('Options:') +cmd:option('-name', 'rcnn-example', 'base name') +cmd:option('-modelpath', '', 'path to the pre-trained model') +cmd:option('-lr', 1e-3, 'learning rate') +cmd:option('-num_iter', 40000, 'number of iterations') +cmd:option('-disp_iter', 100, 'display every n iterations') +cmd:option('-lr_step', 30000, 'step for reducing the learning rate') +cmd:option('-save_step', 10000, 'step for saving the model') +cmd:option('-gpu', 1, 'gpu to use (0 for cpu mode)') +cmd:option('-seed', 1, 'fix random seed (if ~= 0)') +cmd:option('-numthreads',6, 'number of threads') + +opt = cmd:parse(arg or {}) + +assert(paths.filep(opt.modelpath), 'need to provide the path for the pre-trained model') + +exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true, + modelpath=true}) + +rundir = '../cachedir/'..exp_name +paths.mkdir(rundir) + +cmd:log(paths.concat(rundir,'log'), opt) +cmd:addTime('RCNN Example') + +local tensor_type +if opt.gpu > 0 then + require 'cunn' + cutorch.setDevice(opt.gpu) + tensor_type = 'torch.CudaTensor' + print('Using GPU mode on device '..opt.gpu) +else + require 'nn' + tensor_type = 'torch.FloatTensor' + print('Using CPU mode') +end + +if opt.seed ~= 0 then + torch.manualSeed(opt.seed) + if opt.gpu > 0 then + cutorch.manualSeed(opt.seed) + end + print('Using fixed seed: '..opt.seed) +end + +torch.setnumthreads(opt.numthreads) + +-------------------------------------------------------------------------------- +-- define model and criterion +-------------------------------------------------------------------------------- +-- load pre-trained model for finetuning +-- should already have the right number of outputs in the last layer, +-- which can be done by removing the last layer and replacing it by a new one +-- for example: +-- pre_trained_model:remove() -- remove last layer +-- pre_trained_model:add(nn.Linear(4096,21)) -- add new layer +model = torch.load(opt.modelpath) + +criterion = nn.CrossEntropyCriterion() + +model:type(tensor_type) +criterion:type(tensor_type) + +print('Model:') +print(model) +print('Criterion:') +print(criterion) + +-- define the transformations to do in the image before +-- passing it to the network +local image_transformer= nnf.ImageTransformer{ + mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1} +} + +print(image_transformer) +-------------------------------------------------------------------------------- +-- define data for training +-------------------------------------------------------------------------------- + +-- this class holds all the necessary informationn regarding the dataset +ds = nnf.DataSetPascal{ + image_set='trainval', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data', + year=2007 +} +print('DataSet Training:') +print(ds) +-------------------------------------------------------------------------------- +-- define feature providers +-------------------------------------------------------------------------------- + +local crop_size = 224 + +-- the feature provider extract the features for a given image + bounding box +fp = nnf.RCNN{ + image_transformer=image_transformer, + crop_size=crop_size, + num_threads=opt.numthreads +} +-- different frameworks can behave differently during training and testing +fp:training() + +print('Feature Provider:') +print(fp) + +-------------------------------------------------------------------------------- +-- define batch providers +-------------------------------------------------------------------------------- + +bp = nnf.BatchProviderRC{ + dataset=ds, + feat_provider=fp, + bg_threshold={0.0,0.5}, + nTimesMoreData=2, + iter_per_batch=10,--100, +} +bp:setupData() + +print('Batch Provider:') +print(bp) +-------------------------------------------------------------------------------- +-- train +-------------------------------------------------------------------------------- + +trainer = nnf.Trainer(model, criterion, bp) + +local num_iter = opt.num_iter/opt.disp_iter +local lr_step = opt.lr_step/opt.disp_iter +local save_step = opt.save_step/opt.disp_iter + +trainer.optimState.learningRate = opt.lr + +local lightModel = model:clone('weight','bias') + +-- main training loop +for i=1,num_iter do + if i % lr_step == 0 then + trainer.optimState.learningRate = trainer.optimState.learningRate/10 + end + print(('Iteration %3d/%-3d'):format(i,num_iter)) + trainer:train(opt.disp_iter) + print((' Training error: %.5f'):format(trainer.fx[i])) + + if i% save_step == 0 then + torch.save(paths.concat(rundir, 'model.t7'), lightModel) + end +end + +torch.save(paths.concat(rundir, 'model.t7'), lightModel) + +-------------------------------------------------------------------------------- +-- evaluation +-------------------------------------------------------------------------------- +-- add softmax to classifier, because we were using nn.CrossEntropyCriterion +local softmax = nn.SoftMax() +softmax:type(tensor_type) +model:add(softmax) + +-- dataset for evaluation +dsv = nnf.DataSetPascal{ + image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data', + year=2007 +} +print('DataSet Evaluation:') +print(dsv) + +-- feature provider for evaluation +fpv = nnf.RCNN{ + image_transformer=image_transformer, + crop_size=crop_size, + num_threads=opt.numthreads +} +fpv:evaluate() +print('Feature Provider Evaluation:') +print(fpv) + +-- define the class to test the model on the full dataset +tester = nnf.Tester(model, fpv, dsv) +tester.cachefolder = rundir +tester:test(opt.num_iter) diff --git a/main.lua b/main.lua index 65a4b18..0a8705b 100644 --- a/main.lua +++ b/main.lua @@ -1,6 +1,7 @@ require 'nnf' -require 'cunn' +--require 'cunn' require 'optim' +require 'trepl' local opts = paths.dofile('opts.lua') opt = opts.parse(arg) @@ -8,116 +9,47 @@ print(opt) if opt.seed ~= 0 then torch.manualSeed(opt.seed) - cutorch.manualSeed(opt.seed) + if opt.gpu > 0 then + cutorch.manualSeed(opt.seed) + end end -cutorch.setDevice(opt.gpu) torch.setnumthreads(opt.numthreads) --------------------------------------------------------------------------------- --- Select target classes --------------------------------------------------------------------------------- - -if opt.classes == 'all' then - classes={'aeroplane','bicycle','bird','boat','bottle','bus','car', - 'cat','chair','cow','diningtable','dog','horse','motorbike', - 'person','pottedplant','sheep','sofa','train','tvmonitor'} +local tensor_type +if opt.gpu > 0 then + require 'cunn' + cutorch.setDevice(opt.gpu) + tensor_type = 'torch.CudaTensor' + print('Using GPU mode on device '..opt.gpu) else - classes = {opt.classes} + require 'nn' + tensor_type = 'torch.FloatTensor' + print('Using CPU mode') end -------------------------------------------------------------------------------- +model, criterion = paths.dofile('model.lua') +model:type(tensor_type) +criterion:type(tensor_type) -paths.dofile('model.lua') +-- prepate training and test data paths.dofile('data.lua') --------------------------------------------------------------------------------- --- Prepare training model --------------------------------------------------------------------------------- - -trainer = nnf.Trainer(classifier,criterion) -trainer.optimState.learningRate = opt.lr - -local conf_classes = {} -table.insert(conf_classes,'background') -for i=1,#classes do - table.insert(conf_classes,classes[i]) -end -trainer.confusion = optim.ConfusionMatrix(conf_classes) - -validator = nnf.Tester(classifier,feat_provider_test) -validator.cachefolder = opt.save_base -validator.cachename = 'validation_data.t7' -validator.batch_provider = batch_provider_test - -logger = optim.Logger(paths.concat(opt.save,'log.txt')) -val_err = {} -val_counter = 0 -reduc_counter = 0 - -inputs = torch.FloatTensor() -targets = torch.IntTensor() -for i=1,opt.num_iter do - - print('Iteration: '..i..'/'..opt.num_iter) - inputs,targets = batch_provider:getBatch(inputs,targets) - print('==> Training '..paths.basename(opt.save_base)) - trainer:train(inputs,targets) - print('==> Training Error: '..trainer.fx[i]) - print(trainer.confusion) - - collectgarbage() +-- Do training +paths.dofile('train.lua') - err = validator:validate(criterion) - print('==> Validation Error: '..err) - table.insert(val_err,err) - - logger:add{['train error (iters per batch='..batch_provider.iter_per_batch.. - ')']=trainer.fx[i],['val error']=err, - ['learning rate']=trainer.optimState.learningRate} - - val_counter = val_counter + 1 - - local val_err_t = torch.Tensor(val_err) - local _,lmin = val_err_t:min(1) - if val_counter-lmin[1] >= opt.nsmooth then - print('Reducing learning rate') - trainer.optimState.learningRate = trainer.optimState.learningRate/2 - if opt.nildfdx == true then - trainer.optimState.dfdx= nil - end - val_counter = 0 - val_err = {} - reduc_counter = reduc_counter + 1 - if reduc_counter >= opt.nred then - print('Stopping training at iteration '..i) - break - end - end - - collectgarbage() - collectgarbage() - --sanitize(model) - --torch.save(paths.concat(opt.save, 'model_' .. epoch .. '.t7'), classifier) - --torch.save(paths.concat(opt.save, 'optimState_' .. epoch .. '.t7'), trainer.optimState) -end - ---sanitize(classifier) -torch.save(paths.concat(opt.save, 'model.t7'), classifier) - -ds_train.roidb = nil -collectgarbage() -collectgarbage() - --------------------------------------------------------------------------------- --- Do full evaluation --------------------------------------------------------------------------------- - -print('==> Evaluation') -tester = nnf.Tester(classifier,feat_provider_test) -tester.cachefolder = paths.concat(opt.save,'evaluation',ds_test.dataset_name) +-- evaluation +print('==> Evaluating') +-- add softmax to classifier, because we were using nn.CrossEntropyCriterion +local softmax = nn.SoftMax() +softmax:type(tensor_type) +model:add(softmax) +feat_provider:evaluate() +-- define the class to test the model on the full dataset +tester = nnf.Tester(model, feat_provider, ds_test) +tester.cachefolder = rundir tester:test(opt.num_iter) - diff --git a/model.lua b/model.lua index 9700f0b..029c8a3 100644 --- a/model.lua +++ b/model.lua @@ -1,50 +1,26 @@ require 'nn' -require 'inn' -require 'cudnn' -local reshapeLastLinearLayer = paths.dofile('utils.lua').reshapeLastLinearLayer -local convertCaffeModelToTorch = paths.dofile('utils.lua').convertCaffeModelToTorch +--require 'inn' +--require 'cudnn' --- 1.1. Create Network -local config = opt.netType -local createModel = paths.dofile('models/' .. config .. '.lua') -print('=> Creating model from file: models/' .. config .. '.lua') -model = createModel(opt.backend) +local createModel = paths.dofile('models/' .. opt.netType .. '.lua') +print('=> Creating model from file: models/' .. opt.netType .. '.lua') +local model = createModel() --- convert to accept inputs in the range 0-1 RGB format -convertCaffeModelToTorch(model,{1,1}) +local criterion = nn.CrossEntropyCriterion() -reshapeLastLinearLayer(model,#classes+1) -image_mean = {128/255,128/255,128/255} - -if opt.algo == 'RCNN' then - classifier = model -elseif opt.algo == 'SPP' then - features = model:get(1) - classifier = model:get(3) -end - --- 2. Create Criterion -criterion = nn.CrossEntropyCriterion() - -print('=> Model') +print('Model:') print(model) - -print('=> Criterion') +print('Criterion:') print(criterion) --- 3. If preloading option is set, preload weights from existing models appropriately +-- If preloading option is set, preload weights from existing models appropriately if opt.retrain ~= 'none' then assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain) print('Loading model from file: ' .. opt.retrain); - classifier = torch.load(opt.retrain) + model = torch.load(opt.retrain) end --- 4. Convert model to CUDA -print('==> Converting model to CUDA') -model = model:cuda() -criterion:cuda() - collectgarbage() - +return model, criterion diff --git a/models/frcnn_alexnet.lua b/models/frcnn_alexnet.lua new file mode 100644 index 0000000..c8b033d --- /dev/null +++ b/models/frcnn_alexnet.lua @@ -0,0 +1,62 @@ +local function loadModel(params,backend) + + backend = backend or cudnn + + local features = nn.Sequential() + local classifier = nn.Sequential() + + features:add(backend.SpatialConvolution(3,96,11,11,4,4,5,5,1)) + features:add(backend.ReLU(true)) + features:add(backend.SpatialMaxPooling(3,3,2,2,1,1)) + features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1)) + + features:add(backend.SpatialConvolution(96,256,5,5,1,1,1,1,2)) + features:add(backend.ReLU(true)) + features:add(backend.SpatialMaxPooling(3,3,2,2,1,1)) + features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1)) + + features:add(backend.SpatialConvolution(256,384,3,3,1,1,1,1,1)) + features:add(backend.ReLU(true)) + + features:add(backend.SpatialConvolution(384,384,3,3,1,1,1,1,2)) + features:add(backend.ReLU(true)) + + features:add(backend.SpatialConvolution(384,256,3,3,1,1,1,1,2)) + features:add(backend.ReLU(true)) + + classifier:add(nn.Linear(9216,4096)) + classifier:add(backend.ReLU(true)) + classifier:add(nn.Dropout(0.5)) + + classifier:add(nn.Linear(4096,4096)) + classifier:add(backend.ReLU(true)) + classifier:add(nn.Dropout(0.5)) + + classifier:add(nn.Linear(4096,21)) + + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + + local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16) + + local model = nn.Sequential() + model:add(prl) + model:add(ROIPooling) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) + + if params then + local lparams = model:parameters() + assert(#lparams == #params, 'provided parameters does not match') + + for k,v in ipairs(lparams) do + local p = params[k] + assert(p:numel() == v:numel(), 'wrong number of parameter elements !') + v:copy(p) + end + end + return model +end + +return loadModel diff --git a/nnf.lua b/nnf.lua index a2e7831..d9fd777 100644 --- a/nnf.lua +++ b/nnf.lua @@ -1,20 +1,30 @@ require 'nn' require 'image' -require 'inn' +--require 'inn' require 'xlua' nnf = {} +torch.include('nnf','ImageTransformer.lua') + +torch.include('nnf','DataSetDetection.lua') torch.include('nnf','DataSetPascal.lua') -torch.include('nnf','BatchProvider.lua') +torch.include('nnf','DataSetCOCO.lua') + +torch.include('nnf','BatchProviderBase.lua') +torch.include('nnf','BatchProviderIC.lua') +torch.include('nnf','BatchProviderRC.lua') torch.include('nnf','SPP.lua') torch.include('nnf','RCNN.lua') +torch.include('nnf','FRCNN.lua') +torch.include('nnf','ROIPooling.lua') torch.include('nnf','Trainer.lua') torch.include('nnf','Tester.lua') +--torch.include('nnf','Tester_FRCNN.lua') torch.include('nnf','SVMTrainer.lua') -torch.include('nnf','ImageTransformer.lua') +torch.include('nnf','ImageDetect.lua') --return nnf diff --git a/opts.lua b/opts.lua index f07d8dc..457b6f2 100644 --- a/opts.lua +++ b/opts.lua @@ -8,55 +8,29 @@ function M.parse(arg) cmd:text() cmd:text('Options:') - local curr_dir = paths.cwd() - local defaultDataSetDir = paths.concat(curr_dir,'datasets') - local defaultDataDir = paths.concat(defaultDataSetDir,'VOCdevkit/') - local defaultROIDBDir = paths.concat(curr_dir,'data','selective_search_data/') - - cmd:text('Folder parameters') - cmd:option('-cache',paths.concat(curr_dir,'cachedir'),'Cache dir') - cmd:option('-datadir',defaultDataDir,'Path to dataset') - cmd:option('-roidbdir',defaultROIDBDir,'Path to ROIDB') - cmd:text() - cmd:text('Model parameters') - cmd:option('-algo','SPP','Detection framework. Options: RCNN | SPP') - cmd:option('-netType','zeiler','Options: zeiler | vgg') - cmd:option('-backend','cudnn','Options: nn | cudnn') - cmd:text() - cmd:text('Data parameters') - cmd:option('-year',2007,'DataSet year (for Pascal)') - cmd:option('-ipb',500,'iter per batch') - cmd:option('-ntmd',10,'nTimesMoreData') - cmd:option('-fg_frac',0.25,'fg_fraction') - cmd:option('-classes','all','use all classes (all) or given class') - cmd:text() - cmd:text('Training parameters') - cmd:option('-lr',1e-2,'learning rate') - cmd:option('-num_iter',300,'number of iterations') - cmd:option('-nsmooth',40,'number of iterations before reducing learning rate') - cmd:option('-nred',4,'number of divisions by 2 before stopping learning') - cmd:option('-nildfdx',false,'erase memory of gradients when reducing learning rate') - cmd:text() - cmd:text('Others') - cmd:option('-gpu',1,'gpu device to use') - cmd:option('-numthreads',6,'number of threads to use') - cmd:option('-comment','','additional comment to the name') - cmd:option('-seed',0,'random seed (0 = no fixed seed)') - cmd:option('-retrain','none','modelpath for finetuning') - cmd:text() - + cmd:option('-name', 'obj-detect', 'base name') + cmd:option('-algo', 'RCNN', 'Detection framework. Options: RCNN | FRCNN') + cmd:option('-netType', 'alexnet', 'Options: alexnet') + cmd:option('-lr', 1e-3, 'learning rate') + cmd:option('-num_iter', 40000, 'number of iterations') + cmd:option('-disp_iter', 100, 'display every n iterations') + cmd:option('-lr_step', 30000, 'step for reducing the learning rate') + cmd:option('-save_step', 10000, 'step for saving the model') + cmd:option('-gpu', 1, 'gpu to use (0 for cpu mode)') + cmd:option('-conf_mat', false, 'Compute confusion matrix during training') + cmd:option('-seed', 1, 'fix random seed (if ~= 0)') + cmd:option('-numthreads',6, 'number of threads') + cmd:option('-retrain', 'none', 'modelpath for finetuning') local opt = cmd:parse(arg or {}) - -- add commandline specified options - opt.save = paths.concat(opt.cache, - cmd:string(opt.netType, opt, - {retrain=true, optimState=true, cache=true, - data=true, gpu=true, numthread=true, - netType=true})) - -- add date/time - opt.save_base = opt.save - local date_time = os.date():gsub(' ','') - opt.save = paths.concat(opt.save, date_time) + + local exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true}) + + rundir = 'cachedir/'..exp_name + paths.mkdir(rundir) + + cmd:log(paths.concat(rundir,'log'), opt) + cmd:addTime('Object-Detection.Torch') return opt diff --git a/test_frcnn.lua b/test_frcnn.lua new file mode 100644 index 0000000..24bb23b --- /dev/null +++ b/test_frcnn.lua @@ -0,0 +1,282 @@ +require 'nnf' +require 'inn' +require 'cudnn' +require 'gnuplot' + +cutorch.setDevice(2) + +dt = torch.load('pascal_2007_train.t7') +if false then + ds = nnf.DataSetPascal{image_set='train', + datadir='/home/francisco/work/datasets/VOCdevkit', + roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data' + } +else + ds = nnf.DataSetPascal{image_set='trainval', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } +end + +if false then + ds.roidb = {} + for i=1,ds:size() do + ds.roidb[i] = torch.IntTensor(10,4):random(1,5) + ds.roidb[i][{{},{3,4}}]:add(6) + end +elseif false then + ds.roidb = dt.roidb +end + +local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},--{103.939, 116.779, 123.68}, + raw_scale = 255, + swap = {3,2,1}} +if true then + bp = nnf.BatchProviderROI(ds) + bp.image_transformer = image_transformer + bp.bg_threshold = {0.1,0.5} + bp:setupData() +else + bp = nnf.BatchProviderROI(ds) + bp.image_transformer = image_transformer + local temp = torch.load('pascal_2007_train_bp.t7') + bp.bboxes = temp.bboxes +end + + +if false then + local mytest = nnf.ROIPooling(50,50):float() + function do_mytest() + local input0,target0 = bp:getBatch(input0,target0) + local o = mytest:forward(input0) + return input0,target0,o + end + --input0,target0,o = do_mytest() +end + +--------------------------------------------------------------------------------------- +-- model +--------------------------------------------------------------------------------------- +do + + model = nn.Sequential() + local features = nn.Sequential() + local classifier = nn.Sequential() + + if false then + features:add(nn.SpatialConvolutionMM(3,96,11,11,4,4,5,5)) + features:add(nn.ReLU(true)) + features:add(nn.SpatialConvolutionMM(96,128,5,5,2,2,2,2)) + features:add(nn.ReLU(true)) + features:add(nn.SpatialMaxPooling(2,2,2,2)) + + classifier:add(nn.Linear(128*7*7,1024)) + classifier:add(nn.ReLU(true)) + classifier:add(nn.Dropout(0.5)) + classifier:add(nn.Linear(1024,21)) + + elseif false then + require 'loadcaffe' +-- local rcnnfold = '/home/francisco/work/libraries/rcnn/' +-- local base_model = loadcaffe.load( +-- rcnnfold..'model-defs/pascal_finetune_deploy.prototxt', +-- rcnnfold..'data/caffe_nets/finetune_voc_2012_train_iter_70k', +-- 'cudnn') + + local rcnnfold = '/home/francisco/work/libraries/caffe/examples/imagenet/' + local base_model = loadcaffe.load( + rcnnfold..'imagenet_deploy.prototxt', + rcnnfold..'caffe_reference_imagenet_model', + 'cudnn') + + + for i=1,14 do + features:add(base_model:get(i):clone()) + end + for i=17,22 do + classifier:add(base_model:get(i):clone()) + end + classifier:add(nn.Linear(4096,21):cuda()) + + collectgarbage() + + else + local fold = 'data/models/imagenet_models/alexnet/' + local m1 = torch.load(fold..'features.t7') + local m2 = torch.load(fold..'top.t7') + + for i=1,14 do + features:add(m1:get(i):clone()) + end + features:get(3).padW = 1 + features:get(3).padH = 1 + features:get(7).padW = 1 + features:get(7).padH = 1 + + for i=2,7 do + classifier:add(m2:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + end + collectgarbage() + + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + model:add(prl) + --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(inn.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) + +end +print(model) + +model:cuda() +parameters,gradParameters = model:getParameters() + +parameters2,gradParameters2 = model:parameters() + +lr = {0,0,1,2,1,2,1,2,1,2,1,2,1,2,1,2} +wd = {0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0} + +local function updateGPlrwd(clr) + local clr = clr or 1 + for i,p in pairs(gradParameters2) do + p:add(wd[i]*0.0005,parameters2[i]) + p:mul(lr[i]*clr) + end +end + +optimState = {learningRate = 1,--1e-3, + weightDecay = 0.000, momentum = 0.9, + learningRateDecay = 0, dampening=0} + +-------------------------------------------------------------------------- +-- training +-------------------------------------------------------------------------- + +confusion_matrix = optim.ConfusionMatrix(21) + + +model:training() + +savedModel = model:clone('weight','bias','running_mean','running_std') + +criterion = nn.CrossEntropyCriterion():cuda() +--criterion.nll.sizeAverage = false + +--normalize = true + +display_iter = 20 + +--inputs = {torch.CudaTensor(),torch.FloatTensor()} +inputs = {torch.CudaTensor(),torch.CudaTensor()} +target = torch.CudaTensor() + +learningRate = 1e-3 + +function train() + local err = 0 + for i=1,display_iter do + xlua.progress(i,display_iter) + inputs0,target0 = bp:getBatch(inputs0,target0) + inputs[1]:resize(inputs0[1]:size()):copy(inputs0[1]) + inputs[2]:resize(inputs0[2]:size()):copy(inputs0[2]) + target:resize(target0:size()):copy(target0) + local batchSize = target:size(1) + + local feval = function(x) + if x ~= parameters then + parameters:copy(x) + end + gradParameters:zero() + + local outputs = model:forward(inputs) + + local f = criterion:forward(outputs,target) + local df_do = criterion:backward(outputs,target) + + model:backward(inputs,df_do) + + -- mimic different learning rates per layer + -- without the cost of having a huge tensor + updateGPlrwd(learningRate) + + if normalize then + gradParameters:div(batchSize) + f = f/batchSize + end + + confusion_matrix:batchAdd(outputs,target) + + return f,gradParameters + end + + local x,fx = optim.sgd(feval,parameters,optimState) + err = err + fx[1] + end + print('Training error: '..err/display_iter) + return err/display_iter +end + +epoch_size = math.ceil(ds:size()/bp.imgs_per_batch) +stepsize = 30000--30000 +print_step = 10 +num_iter = 40000--40000 +num_iter = num_iter/display_iter--3000 + +confusion_matrix:zero() +train_err = {} +exp_name = 'frcnn_t11' + +paths.mkdir(paths.concat('cachedir',exp_name)) +--logger = optim.Logger(paths.concat('cachedir',exp_name,'train_err.log')) +train_acc = {} +for i=1,num_iter do + + if i%(stepsize/display_iter) == 0 then + --optimState.learningRate = optimState.learningRate/10 + learningRate = learningRate/10 + end + + --print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,optimState.learningRate)) + print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,learningRate)) + + local t_err = train() + table.insert(train_err,t_err) + + + if i%print_step == 0 then + print(confusion_matrix) + table.insert(train_acc,confusion_matrix.averageUnionValid*100) + gnuplot.epsfigure(paths.concat('cachedir',exp_name,'train_err.eps')) + gnuplot.plot('train',torch.Tensor(train_acc),'-') + gnuplot.xlabel('Iterations (200 batch update)') + gnuplot.ylabel('Training accuracy') + gnuplot.grid('on') + gnuplot.plotflush() + gnuplot.closeall() + + confusion_matrix:zero() + end + + if i%100 == 0 then + torch.save(paths.concat('cachedir',exp_name..'.t7'),savedModel) + end +end + +-- test +dsv = nnf.DataSetPascal{image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } + + +local fpv = {dataset=dsv} +tester = nnf.Tester_FRCNN(model,fpv) +tester.cachefolder = 'cachedir/'..exp_name +tester:test(num_iter) diff --git a/tests/test_full_frcnn.lua b/tests/test_full_frcnn.lua new file mode 100644 index 0000000..c49c2c6 --- /dev/null +++ b/tests/test_full_frcnn.lua @@ -0,0 +1,124 @@ +require 'nnf' +require 'inn' +require 'cudnn' +require 'loadcaffe' + +cutorch.setDevice(2) + +ds = nnf.DataSetPascal{image_set='trainval', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } +local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1}} + +fp = nnf.FRCNN{image_transformer=image_transformer} +fp:training() +-------------------------------------------------------------------------------- +-- define batch providers +-------------------------------------------------------------------------------- + +bp = nnf.BatchProviderROI{dataset=ds,feat_provider=fp, + bg_threshold={0.1,0.5} + } +bp:setupData() + +-------------------------------------------------------------------------------- +-- define model +-------------------------------------------------------------------------------- +model = nn.Sequential() +do + --[[ + local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/' + local base_model = loadcaffe.load( + rcnnfold..'CaffeNet_train.prototxt', + rcnnfold..'CaffeNet.v2.caffemodel', + 'cudnn') + for i=1,14 do + features:add(base_model:get(i):clone()) + end + for i=17,22 do + classifier:add(base_model:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + --]] + local features = nn.Sequential() + local classifier = nn.Sequential() + local fold = 'data/models/imagenet_models/alexnet/' + local m1 = torch.load(fold..'features.t7') + local m2 = torch.load(fold..'top.t7') + for i=1,14 do + features:add(m1:get(i):clone()) + end + features:get(3).padW = 1 + features:get(3).padH = 1 + features:get(7).padW = 1 + features:get(7).padH = 1 + for i=2,7 do + classifier:add(m2:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + collectgarbage() + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + model:add(prl) + --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(inn.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) +end +model:cuda() + +--model = nil +--collectgarbage() +--model = torch.load('test_model.t7') +--model:cuda() +collectgarbage() +-------------------------------------------------------------------------------- +-- train +-------------------------------------------------------------------------------- + +criterion = nn.CrossEntropyCriterion():cuda() + +trainer = nnf.Trainer(model,criterion,bp) + +savedModel = model:clone('weight','bias','running_mean','running_std') +for i=1,400 do + if i == 300 then + trainer.optimState.learningRate = trainer.optimState.learningRate/10 + end + print(('Iteration %3d/%-3d'):format(i,400)) + trainer:train(100) + print((' Train error: %g'):format(trainer.fx[i])) +end + +-------------------------------------------------------------------------------- +-- evaluate +-------------------------------------------------------------------------------- + +-- add softmax to classfier +model:add(nn.SoftMax():cuda()) + +dsv = nnf.DataSetPascal{image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } + + +fpv = nnf.FRCNN{image_transformer=image_transformer} +fpv:evaluate() +exp_name = 'test2_frcnn' + +tester = nnf.Tester(model,fpv,dsv) +tester.cachefolder = 'cachedir/'..exp_name +tester:test(40000) + +torch.save(paths.concat(tester.cachefolder,'model.t7'),savedModel) diff --git a/tests/test_full_rcnn.lua b/tests/test_full_rcnn.lua new file mode 100644 index 0000000..6abd6ab --- /dev/null +++ b/tests/test_full_rcnn.lua @@ -0,0 +1,120 @@ +require 'nnf' +require 'inn' +require 'cudnn' +require 'loadcaffe' + +cutorch.setDevice(2) + +ds = nnf.DataSetPascal{image_set='trainval', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } +local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1}} + +fp = nnf.RCNN{image_transformer=image_transformer, + crop_size=224} +fp:training() +-------------------------------------------------------------------------------- +-- define batch providers +-------------------------------------------------------------------------------- + +bp = nnf.BatchProvider{dataset=ds,feat_provider=fp, + bg_threshold={0.0,0.5}, + nTimesMoreData=2, + iter_per_batch=100, + } +bp:setupData() + +-------------------------------------------------------------------------------- +-- define model +-------------------------------------------------------------------------------- +model = nn.Sequential() +do + --[[ + local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/' + local base_model = loadcaffe.load( + rcnnfold..'CaffeNet_train.prototxt', + rcnnfold..'CaffeNet.v2.caffemodel', + 'cudnn') + for i=1,14 do + features:add(base_model:get(i):clone()) + end + for i=17,22 do + classifier:add(base_model:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + --]] + local features = nn.Sequential() + local classifier = nn.Sequential() + local fold = 'data/models/imagenet_models/alexnet/' + local m1 = torch.load(fold..'features.t7') + local m2 = torch.load(fold..'top.t7') + for i=1,14 do + features:add(m1:get(i):clone()) + end + features:get(3).padW = 1 + features:get(3).padH = 1 + features:get(7).padW = 1 + features:get(7).padH = 1 + for i=2,7 do + classifier:add(m2:get(i):clone()) + end + local linear = nn.Linear(4096,21):cuda() + linear.weight:normal(0,0.01) + linear.bias:zero() + classifier:add(linear) + collectgarbage() + --local prl = nn.ParallelTable() + --prl:add(features) + --prl:add(nn.Identity()) + --model:add(prl) + --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16)) + --model:add(inn.ROIPooling(6,6):setSpatialScale(1/16)) + model:add(features) + model:add(nn.SpatialAdaptiveMaxPooling(6,6)) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) +end +model:cuda() +-------------------------------------------------------------------------------- +-- train +-------------------------------------------------------------------------------- + +criterion = nn.CrossEntropyCriterion():cuda() + +trainer = nnf.Trainer(model,criterion,bp) + +for i=1,400 do + if i == 300 then + trainer.optimState.learningRate = trainer.optimState.learningRate/10 + end + print(('Iteration %3d/%-3d'):format(i,400)) + trainer:train(100) +end + +-------------------------------------------------------------------------------- +-- evaluate +-------------------------------------------------------------------------------- + +-- add softmax to classfier +model:add(nn.SoftMax():cuda()) + +dsv = nnf.DataSetPascal{image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } + + +fpv = nnf.RCNN{image_transformer=image_transformer, + crop_size=224} +fpv:evaluate() +exp_name = 'test1_rcnn' + +tester = nnf.Tester(model,fpv,dsv) +tester.cachefolder = 'cachedir/'..exp_name +tester:test(40000) diff --git a/tests/test_imdetect.lua b/tests/test_imdetect.lua new file mode 100644 index 0000000..9884cd7 --- /dev/null +++ b/tests/test_imdetect.lua @@ -0,0 +1,59 @@ +dofile 'test_utils.lua' + +detect1 = nnf.ImageDetect(model1,fp1) +detect = nnf.ImageDetect(model,fp2) + + +-------------------------------------------------------------------------------- +-- define batch providers +-------------------------------------------------------------------------------- + +bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1} +bp1.nTimesMoreData = 2 +bp1.iter_per_batch = 10 +bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2} + +bp1.bboxes = torch.load('tests/bproibox.t7') +bp2.bboxes = torch.load('tests/bproibox.t7') + +print('test1') +b,t = bp1:getBatch() +print('test2') +b,t = bp2:getBatch() + +-- mixing does not work for the moment, as FRCNN accepts a set of images as input +-- whereas RCNN and SPP supposes that only one image is provided at a time +--[[ +bp3 = nnf.BatchProviderROI(ds) +bp3.bboxes = torch.load('tests/bproibox.t7') +bp3.feat_provider = fp1 +print('test3') +b,t = bp3:getBatch() +--]] +-------------------------------------------------------------------------------- +-- +-------------------------------------------------------------------------------- + +idx = 100 +im = ds:getImage(idx) +boxes = ds:getROIBoxes(idx) + +--output = detect1:detect(im,boxes) +--output0 = detect:detect(im,boxes) + +-------------------------------------------------------------------------------- +-- compare old and new SPP implementations for the cropping +-------------------------------------------------------------------------------- +--[[ +output_old = {} +for i=1,boxes:size(1) do + tt0 = fp3:getCrop_old(im,boxes[i]) + output_old[i] = tt0 +end + +output_new = fp3:getCrop(im,boxes) --[881] + +for i=1,boxes:size(1) do + assert(output_old[i]:eq(output_new[i]):all(),'error '..i) +end +--]] diff --git a/tests/test_train.lua b/tests/test_train.lua new file mode 100644 index 0000000..7f50819 --- /dev/null +++ b/tests/test_train.lua @@ -0,0 +1,26 @@ +dofile 'tests/test_utils.lua' + +-------------------------------------------------------------------------------- +-- define batch providers +-------------------------------------------------------------------------------- + +bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1} +bp1.nTimesMoreData = 2 +bp1.iter_per_batch = 10 +bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2} + +bp1.bboxes = torch.load('tests/bproibox.t7') +bp2.bboxes = torch.load('tests/bproibox.t7') + +-------------------------------------------------------------------------------- +-- +-------------------------------------------------------------------------------- + +criterion = nn.CrossEntropyCriterion() + +trainer = nnf.Trainer(model1,criterion,bp1) + +for i=1,10 do + trainer:train(10) +end + diff --git a/tests/test_utils.lua b/tests/test_utils.lua new file mode 100644 index 0000000..e3d20dc --- /dev/null +++ b/tests/test_utils.lua @@ -0,0 +1,49 @@ +require 'nnf' +require 'nn' + +function getDS() + local dt = torch.load('pascal_2007_train.t7') + local ds = nnf.DataSetPascal{image_set='train', + datadir='/home/francisco/work/datasets/VOCdevkit', + roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data' + } + ds.roidb = dt.roidb + return ds +end + +function getModel() + local features = nn.Sequential() + features:add(nn.SpatialConvolutionMM(3,16,11,11,16,16,5,5)) + local classifier = nn.Sequential() + classifier:add(nn.Linear(7*7*16,21)) + local model1 = nn.Sequential() + model1:add(features) + model1:add(nn.SpatialMaxPooling(2,2,2,2)) + model1:add(nn.View(-1):setNumInputDims(3)) + model1:add(classifier) + local model = nn.Sequential() + local prl = nn.ParallelTable() + prl:add(features) + prl:add(nn.Identity()) + model:add(prl) + model:add(nnf.ROIPooling(7,7):setSpatialScale(1/16)) + model:add(nn.View(-1):setNumInputDims(3)) + model:add(classifier) + return model1, model, features, classifier +end + +-------------------------------------------------------------------------------- +-- define dataset, models and feature providers +-------------------------------------------------------------------------------- + +ds = getDS() + +model1, model, features, classifier = getModel() + +fp1 = nnf.RCNN{} +fp2 = nnf.FRCNN{} +fp3 = nnf.SPP{model=features} +fp3.use_cache = false +fp3:evaluate() + + diff --git a/tests/test_visualization.lua b/tests/test_visualization.lua new file mode 100644 index 0000000..b5d727a --- /dev/null +++ b/tests/test_visualization.lua @@ -0,0 +1,7 @@ +dofile 'tests/test_utils.lua' +I = ds:getImage(1) +boxes = ds:getROIBoxes(1) +scores = torch.rand(boxes:size(1),21) +dofile 'visualize_detections.lua' +visualize_detections(I,boxes,scores,0.9) + diff --git a/tests/test_visualization2.lua b/tests/test_visualization2.lua new file mode 100644 index 0000000..415f86a --- /dev/null +++ b/tests/test_visualization2.lua @@ -0,0 +1,42 @@ +require 'cutorch' +require 'nnf' +require 'cudnn' +require 'inn' +dofile 'visualize_detections.lua' + +cutorch.setDevice(2) + +--model = torch.load('cachedir/test2_frcnn/model.t7') +model = torch.load('cachedir/model.t7') +--model:add(nn.SoftMax():cuda()) + +image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717}, + raw_scale = 255, + swap = {3,2,1}} + + +ds = nnf.DataSetPascal{image_set='test', + datadir='datasets/VOCdevkit', + roidbdir='data/selective_search_data' + } + +fp = nnf.FRCNN{image_transformer=image_transformer} +fp:evaluate() +model:evaluate() +detect = nnf.ImageDetect(model,fp) + +im_idx = 700 + +I = ds:getImage(im_idx) +boxes = ds:getROIBoxes(im_idx) +--boxes = ds:getGTBoxes(im_idx) + +scores,bb = detect:detect(I,boxes) + +w = visualize_detections(I,boxes,scores,0.5,ds.classes) + +Im = w:image() +II = Im:toFloatTensor() + +image.save('example_frcnn.jpg',II) + diff --git a/train.lua b/train.lua new file mode 100644 index 0000000..8184922 --- /dev/null +++ b/train.lua @@ -0,0 +1,49 @@ +trainer = nnf.Trainer(model, criterion, batch_provider) + +local num_iter = opt.num_iter/opt.disp_iter +local lr_step = opt.lr_step/opt.disp_iter +local save_step = opt.save_step/opt.disp_iter + +trainer.optimState.learningRate = opt.lr + +logger = optim.Logger(paths.concat(rundir,'train.log')) + +if opt.conf_mat then + local conf_classes = {'background'} + for k,v in ipairs(ds_train.classes) do + table.insert(conf_classes,v) + end + trainer.confusion = optim.ConfusionMatrix(conf_classes) +end + +local lightModel = model:clone('weight','bias','running_mean','running_std') + +-- main training loop +for i=1,num_iter do + if i % lr_step == 0 then + trainer.optimState.learningRate = trainer.optimState.learningRate/10 + end + print(('Iteration %3d/%-3d'):format(i,num_iter)) + trainer:train(opt.disp_iter) + print((' Training error: %.5f'):format(trainer.fx[i])) + + if opt.conf_mat then + print(trainer.confusion) + logger:add{ + ['train error']=trainer.fx[i], + ['confusion matrix']=tostring(trainer.confusion), + ['learning rate']=trainer.optimState.learningRate + } + else + logger:add{ + ['train error']=trainer.fx[i], + ['learning rate']=trainer.optimState.learningRate + } + end + + if i% save_step == 0 then + torch.save(paths.concat(rundir, 'model.t7'), lightModel) + end +end + +torch.save(paths.concat(rundir, 'model.t7'), lightModel) diff --git a/utils.lua b/utils.lua index 0255907..689c00f 100644 --- a/utils.lua +++ b/utils.lua @@ -2,6 +2,7 @@ -- utility functions for the evaluation part -------------------------------------------------------------------------------- +-- can be replaced by the new torch.cat function local function joinTable(input,dim) local size = torch.LongStorage() local is_ok = false @@ -29,6 +30,50 @@ local function joinTable(input,dim) return output end +local function recursiveResizeAsCopyTyped(t1,t2,type) + if torch.type(t2) == 'table' then + t1 = (torch.type(t1) == 'table') and t1 or {t1} + for key,_ in pairs(t2) do + t1[key], t2[key] = recursiveResizeAsCopyTyped(t1[key], t2[key], type) + end + elseif torch.isTensor(t2) then + local type = type or t2:type() + t1 = torch.isTypeOf(t1,type) and t1 or torch.Tensor():type(type) + t1:resize(t2:size()):copy(t2) + else + error("expecting nested tensors or tables. Got ".. + torch.type(t1).." and "..torch.type(t2).." instead") + end + return t1, t2 +end + +local function concat(t1,t2,dim) + local out + assert(t1:type() == t2:type(),'tensors should have the same type') + if t1:dim() > 0 and t2:dim() > 0 then + dim = dim or t1:dim() + out = torch.cat(t1,t2,dim) + elseif t1:dim() > 0 then + out = t1:clone() + else + out = t2:clone() + end + return out +end + +-- modify bbox input +local function flipBoundingBoxes(bbox, im_width) + if bbox:dim() == 1 then + local tt = bbox[1] + bbox[1] = im_width-bbox[3]+1 + bbox[3] = im_width-tt +1 + else + local tt = bbox[{{},1}]:clone() + bbox[{{},1}]:fill(im_width+1):add(-1,bbox[{{},3}]) + bbox[{{},3}]:fill(im_width+1):add(-1,tt) + end +end + -------------------------------------------------------------------------------- local function keep_top_k(boxes,top_k) @@ -80,7 +125,6 @@ end -------------------------------------------------------------------------------- local function boxoverlap(a,b) - --local b = anno.objects[j] local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b local x1 = a:select(2,1):clone() @@ -267,6 +311,10 @@ utils.VOCap = VOCap utils.convertCaffeModelToTorch = convertCaffeModelToTorch utils.reshapeLastLinearLayer = reshapeLastLinearLayer utils.sanitize = sanitize +utils.recursiveResizeAsCopyTyped = recursiveResizeAsCopyTyped +utils.flipBoundingBoxes = flipBoundingBoxes +utils.concat = concat +utils.boxoverlap = boxoverlap return utils diff --git a/visualize_detections.lua b/visualize_detections.lua new file mode 100644 index 0000000..2381de4 --- /dev/null +++ b/visualize_detections.lua @@ -0,0 +1,62 @@ +local nms = dofile 'nms.lua' + +function visualize_detections(im,boxes,scores,thresh,cl_names) + local ok = pcall(require,'qt') + if not ok then + error('You need to run visualize_detections using qlua') + end + require 'qttorch' + require 'qtwidget' + + -- select best scoring boxes without background + local max_score,idx = scores[{{},{2,-1}}]:max(2) + + local idx_thresh = max_score:gt(thresh) + max_score = max_score[idx_thresh] + idx = idx[idx_thresh] + + local r = torch.range(1,boxes:size(1)):long() + local rr = r[idx_thresh] + if rr:numel() == 0 then + error('No detections with a score greater than the specified threshold') + end + local boxes_thresh = boxes:index(1,rr) + + local keep = nms(torch.cat(boxes_thresh:float(),max_score:float(),2),0.3) + + boxes_thresh = boxes_thresh:index(1,keep) + max_score = max_score:index(1,keep) + idx = idx:index(1,keep) + + local num_boxes = boxes_thresh:size(1) + local widths = boxes_thresh[{{},3}] - boxes_thresh[{{},1}] + local heights = boxes_thresh[{{},4}] - boxes_thresh[{{},2}] + + local x,y = im:size(3),im:size(2) + local w = qtwidget.newwindow(x,y,"Detections") + local qtimg = qt.QImage.fromTensor(im) + w:image(0,0,x,y,qtimg) + local fontsize = 15 + + for i=1,num_boxes do + local x,y = boxes_thresh[{i,1}],boxes_thresh[{i,2}] + local width,height = widths[i], heights[i] + + -- add bbox + w:rectangle(x,y,width,height) + + -- add score + w:moveto(x,y+fontsize) + w:setcolor("red") + w:setfont(qt.QFont{serif=true,italic=true,size=fontsize,bold=true}) + if cl_names then + w:show(string.format('%s: %.2f',cl_names[idx[i]],max_score[i])) + else + w:show(string.format('%d: %.2f',idx[i],max_score[i])) + end + end + w:setcolor("red") + w:setlinewidth(2) + w:stroke() + return w +end