diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ae7e697
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*~
+*.swp
+cachedir/*
diff --git a/BatchProviderBase.lua b/BatchProviderBase.lua
new file mode 100644
index 0000000..35e5ada
--- /dev/null
+++ b/BatchProviderBase.lua
@@ -0,0 +1,149 @@
+local argcheck = require 'argcheck'
+
+local function createWindowBase(rec,i,j,is_bg)
+ local label = is_bg == true and 0+1 or rec.label[j]+1
+ local window = {i,rec.boxes[j][1],rec.boxes[j][2],
+ rec.boxes[j][3],rec.boxes[j][4],
+ label}
+ return window
+end
+
+local function createWindowAngle(rec,i,j,is_bg)
+ local label = is_bg == true and 0+1 or rec.label[j]+1
+ --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and
+ -- rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0
+ local ang
+ if is_bg == false and rec.objects[rec.correspondance[j] ] then
+ if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then
+ ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse
+ else
+ ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth
+ end
+ else
+ ang = 0
+ end
+ local window = {i,rec.boxes[j][1],rec.boxes[j][2],
+ rec.boxes[j][3],rec.boxes[j][4],
+ label,ang}
+ return window
+end
+
+--[[
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+ pack=true,
+ noordered=true,
+ {name="dataset",
+ type="nnf.DataSetPascal",
+ help="A dataset class"
+ },
+ {name="batch_size",
+ type="number",
+ default=128,
+ help="batch size"},
+ {name="fg_fraction",
+ type="number",
+ default=0.25,
+ help="foreground fraction in batch"
+ },
+ {name="fg_threshold",
+ type="number",
+ default=0.5,
+ help="foreground threshold"
+ },
+ {name="bg_threshold",
+ type="table",
+ default={0,0.5},
+ help="background threshold, in the form {LO,HI}"
+ },
+ {name="createWindow",
+ type="function",
+ default=createWindowBase,
+ help=""
+ },
+ {name="do_flip",
+ type="boolean",
+ default=true,
+ help="sample batches with random flips"
+ },
+}
+--]]
+
+local BatchProviderBase = torch.class('nnf.BatchProviderBase')
+
+function BatchProviderBase:__init(...)
+
+ self.dataset = nil
+ self.batch_size = 128
+ self.fg_fraction = 0.25
+ self.fg_threshold = 0.5
+ self.bg_threshold = {0,0.5}
+ self.createWindow = createWindowBase
+ self.do_flip = true
+
+ --local opts = initcheck(...)
+ --for k,v in pairs(opts) do self[k] = v end
+
+end
+
+-- allow changing the way self.bboxes are formatted
+function BatchProviderBase:setCreateWindow(createWindow)
+ self.createWindow = createWindow
+end
+
+function BatchProviderBase:setupData()
+ local dataset = self.dataset
+ local bb = {}
+ local bbT = {}
+
+ for i=0,dataset.num_classes do -- 0 because of background
+ bb[i] = {}
+ end
+
+ for i=1,dataset.num_imgs do
+ bbT[i] = {}
+ end
+
+ for i = 1,dataset.num_imgs do
+ if dataset.num_imgs > 10 then
+ xlua.progress(i,dataset.num_imgs)
+ end
+
+ local rec = dataset:attachProposals(i)
+
+ for j=1,rec:size() do
+ local id = rec.label[j]
+ local is_fg = (rec.overlap[j] >= self.fg_threshold)
+ local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1] and
+ rec.overlap[j] < self.bg_threshold[2])
+ if is_fg then
+ local window = self.createWindow(rec,i,j,is_bg)
+ table.insert(bb[1], window) -- could be id instead of 1
+ elseif is_bg then
+ local window = self.createWindow(rec,i,j,is_bg)
+ table.insert(bb[0], window)
+ end
+
+ end
+
+ for j=0,dataset.num_classes do -- 0 because of background
+ if #bb[j] > 0 then
+ bbT[i][j] = torch.FloatTensor(bb[j])
+ end
+ end
+
+ bb = {}
+ for i=0,dataset.num_classes do -- 0 because of background
+ bb[i] = {}
+ end
+ collectgarbage()
+ end
+ self.bboxes = bbT
+ --return bbT
+end
+
+function BatchProviderBase:getBatch()
+ error("You can't use BatchProviderBase")
+ return input,target
+end
+
diff --git a/BatchProviderIC.lua b/BatchProviderIC.lua
new file mode 100644
index 0000000..da34d77
--- /dev/null
+++ b/BatchProviderIC.lua
@@ -0,0 +1,164 @@
+local BatchProvider, parent = torch.class('nnf.BatchProviderIC','nnf.BatchProviderBase')
+
+local argcheck = require 'argcheck'
+
+local env = require 'argcheck.env' -- retrieve argcheck environement
+-- this is the default type function
+-- which can be overrided by the user
+function env.istype(obj, typename)
+ if typename == 'DataSet' then
+ return obj and obj._isDataSet
+ end
+ if typename == 'FeatureProvider' then
+ return obj and obj._isFeatureProvider
+ end
+ return torch.type(obj) == typename
+end
+
+
+local initcheck = argcheck{
+ pack=true,
+ noordered=true,
+ {name="dataset",
+ type="DataSet",
+ help="A dataset class"
+ },
+ {name="feat_provider",
+ type="nnf.FRCNN",
+ help="A feat provider class"
+ },
+ {name="batch_size",
+ type="number",
+ opt=true,
+ help="batch size"},
+ {name="imgs_per_batch",
+ type="number",
+ default=2,
+ help="number of images to sample in a batch"},
+ {name="fg_fraction",
+ type="number",
+ default=0.25,
+ help="foreground fraction in batch"
+ },
+ {name="fg_threshold",
+ type="number",
+ default=0.5,
+ help="foreground threshold"
+ },
+ {name="bg_threshold",
+ type="table",
+ default={0.1,0.5},
+ help="background threshold, in the form {LO,HI}"
+ },
+ {name="do_flip",
+ type="boolean",
+ default=true,
+ help="sample batches with random flips"
+ },
+}
+
+function BatchProvider:__init(...)
+ parent.__init(self)
+
+ local opts = initcheck(...)
+ for k,v in pairs(opts) do self[k] = v end
+end
+
+-- setup is the same
+
+function BatchProvider:permuteIdx()
+ local total_img = self.dataset:size()
+ local imgs_per_batch = self.imgs_per_batch
+
+ self._cur = self._cur or math.huge
+
+ if self._cur + imgs_per_batch > total_img then
+ self._perm = torch.randperm(total_img)
+ self._cur = 1
+ end
+
+ local img_idx = self._perm[{{self._cur,self._cur + self.imgs_per_batch - 1}}]
+ self._cur = self._cur + self.imgs_per_batch
+
+ local img_idx_end = imgs_per_batch
+
+ local fg_windows = {}
+ local bg_windows = {}
+ for i=1,img_idx_end do
+ local curr_idx = img_idx[i]
+ bg_windows[i] = {}
+ if self.bboxes[curr_idx][0] then
+ for j=1,self.bboxes[curr_idx][0]:size(1) do
+ table.insert(bg_windows[i],{curr_idx,j})
+ end
+ end
+ fg_windows[i] = {}
+ if self.bboxes[curr_idx][1] then
+ for j=1,self.bboxes[curr_idx][1]:size(1) do
+ table.insert(fg_windows[i],{curr_idx,j})
+ end
+ end
+ end
+ local do_flip = torch.FloatTensor(imgs_per_batch):random(0,1)
+ local opts = {img_idx=img_idx,img_idx_end=img_idx_end,do_flip=do_flip}
+ return fg_windows,bg_windows,opts
+
+end
+
+function BatchProvider:selectBBoxes(fg_windows,bg_windows)
+ local fg_num_each = torch.round(self.fg_num_each/self.imgs_per_batch)
+ local bg_num_each = torch.round(self.bg_num_each/self.imgs_per_batch)
+
+ local bboxes = {}
+ local labels = {}
+ for im=1,self.imgs_per_batch do
+ local window_idx = torch.randperm(#bg_windows[im])
+ local end_idx = math.min(bg_num_each,#bg_windows[im])
+ local bbox = {}
+ for i=1,end_idx do
+ local curr_idx = bg_windows[im][window_idx[i] ][1]
+ local position = bg_windows[im][window_idx[i] ][2]
+ local dd = self.bboxes[curr_idx][0][position][{{2,6}}]
+ table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]})
+ table.insert(labels,dd[5])
+ end
+
+ window_idx = torch.randperm(#fg_windows[im])
+ local end_idx = math.min(fg_num_each,#fg_windows[im])
+ for i=1,end_idx do
+ local curr_idx = fg_windows[im][window_idx[i] ][1]
+ local position = fg_windows[im][window_idx[i] ][2]
+ local dd = self.bboxes[curr_idx][1][position][{{2,6}}]
+ table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]})
+ table.insert(labels,dd[5])
+ end
+ table.insert(bboxes,torch.FloatTensor(bbox))
+ end
+ labels = torch.IntTensor(labels)
+ return bboxes, labels
+end
+
+function BatchProvider:getBatch()
+ local dataset = self.dataset
+
+ self.fg_num_each = self.fg_fraction * self.batch_size
+ self.bg_num_each = self.batch_size - self.fg_num_each
+
+ local fg_windows,bg_windows,opts = self:permuteIdx()
+
+ self.targets = self.targets or torch.FloatTensor()
+
+ local batches = self.batches
+ local targets = self.targets
+
+ local imgs = {}
+ for i=1,opts.img_idx:size(1) do
+ table.insert(imgs,dataset:getImage(opts.img_idx[i]))
+ end
+ local boxes,labels = self:selectBBoxes(fg_windows,bg_windows)
+ self.batches = self.feat_provider:getFeature(imgs,boxes,opts.do_flip)
+
+ targets:resize(labels:size()):copy(labels)
+
+ return self.batches, self.targets
+end
diff --git a/BatchProvider.lua b/BatchProviderRC.lua
similarity index 57%
rename from BatchProvider.lua
rename to BatchProviderRC.lua
index 977bdc7..2770036 100644
--- a/BatchProvider.lua
+++ b/BatchProviderRC.lua
@@ -1,109 +1,82 @@
-local BatchProvider = torch.class('nnf.BatchProvider')
+local BatchProvider,parent =
+ torch.class('nnf.BatchProviderRC','nnf.BatchProviderBase')
-local function createWindowBase(rec,i,j,is_bg)
- local label = is_bg == true and 0+1 or rec.label[j]+1
- local window = {i,rec.boxes[j][1],rec.boxes[j][2],
- rec.boxes[j][3],rec.boxes[j][4],
- label}
- return window
-end
-local function createWindowAngle(rec,i,j,is_bg)
- local label = is_bg == true and 0+1 or rec.label[j]+1
- --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and
- -- rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0
- local ang
- if is_bg == false and rec.objects[rec.correspondance[j] ] then
- if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then
- ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse
- else
- ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth
- end
- else
- ang = 0
- end
- local window = {i,rec.boxes[j][1],rec.boxes[j][2],
- rec.boxes[j][3],rec.boxes[j][4],
- label,ang}
- return window
-end
-
-function BatchProvider:__init(feat_provider)
- self.dataset = feat_provider.dataset
- self.feat_provider = feat_provider
+local argcheck = require 'argcheck'
- self.nTimesMoreData = 10
- self.iter_per_batch = 500
-
- self.batch_size = 128
- self.fg_fraction = 0.25
-
- self.fg_threshold = 0.5
- self.bg_threshold = {0.0,0.5}
-
- self.createWindow = createWindowBase--createWindowAngle
-
- self.batch_dim = {256*50}
- self.target_dim = 1
-
- self.do_flip = true
-
- --self:setupData()
+local env = require 'argcheck.env' -- retrieve argcheck environement
+-- this is the default type function
+-- which can be overrided by the user
+function env.istype(obj, typename)
+ if typename == 'DataSet' then
+ return obj and obj._isDataSet
+ end
+ if typename == 'FeatureProvider' then
+ return obj and obj._isFeatureProvider
+ end
+ return torch.type(obj) == typename
end
-function BatchProvider:setupData()
- local dataset = self.dataset
- local bb = {}
- local bbT = {}
+local initcheck = argcheck{
+ pack=true,
+ noordered=true,
+ {name="dataset",
+ type="DataSet",
+ help="A dataset class"
+ },
+ {name="feat_provider",
+ type="FeatureProvider",
+ help="A feat provider class"
+ },
+ {name="batch_size",
+ type="number",
+ default=128,
+ help="batch size"},
+ {name="iter_per_batch",
+ type="number",
+ default=10,
+ help=""},
+ {name="nTimesMoreData",
+ type="number",
+ default=10,
+ help=""},
+ {name="fg_fraction",
+ type="number",
+ default=0.25,
+ help="foreground fraction in batch"
+ },
+ {name="fg_threshold",
+ type="number",
+ default=0.5,
+ help="foreground threshold"
+ },
+ {name="bg_threshold",
+ type="table",
+ default={0.1,0.5},
+ help="background threshold, in the form {LO,HI}"
+ },
+ {name="target_dim",
+ type="number",
+ default=1,
+ help=""},
+ {name="do_flip",
+ type="boolean",
+ default=true,
+ help="sample batches with random flips"
+ },
+}
- for i=0,dataset.num_classes do -- 0 because of background
- bb[i] = {}
- end
+function BatchProvider:__init(...)
+ parent.__init(self)
- for i=1,dataset.num_imgs do
- bbT[i] = {}
- end
+ local opts = initcheck(...)
+ for k,v in pairs(opts) do self[k] = v end
- for i = 1,dataset.num_imgs do
- if dataset.num_imgs > 10 then
- xlua.progress(i,dataset.num_imgs)
- end
-
- local rec = dataset:attachProposals(i)
+ self.batch_dim = self.feat_provider.output_size
- for j=1,rec:size() do
- local id = rec.label[j]
- local is_fg = (rec.overlap[j] >= self.fg_threshold)
- local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1] and
- rec.overlap[j] < self.bg_threshold[2])
- if is_fg then
- local window = self.createWindow(rec,i,j,is_bg)
- table.insert(bb[1], window) -- could be id instead of 1
- elseif is_bg then
- local window = self.createWindow(rec,i,j,is_bg)
- table.insert(bb[0], window)
- end
-
- end
-
- for j=0,dataset.num_classes do -- 0 because of background
- if #bb[j] > 0 then
- bbT[i][j] = torch.FloatTensor(bb[j])
- end
- end
-
- bb = {}
- for i=0,dataset.num_classes do -- 0 because of background
- bb[i] = {}
- end
- collectgarbage()
- end
- self.bboxes = bbT
- --return bbT
end
-
function BatchProvider:permuteIdx()
local fg_num_each = self.fg_num_each
local bg_num_each = self.bg_num_each
@@ -183,21 +156,12 @@ function BatchProvider:selectBBoxes(fg_windows,bg_windows)
return fg_w,bg_w
end
-
--- specific for angle estimation
-local function flip_angle(x)
- return (-x)%360
-end
-
-- depends on the model
-function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg_label)
+function BatchProvider:prepareFeatures(im_idx,bboxes,fg_label,bg_label)
local num_pos = bboxes[1] and #bboxes[1] or 0
local num_neg = bboxes[0] and #bboxes[0] or 0
- fg_data:resize(num_pos,unpack(self.batch_dim))
- bg_data:resize(num_neg,unpack(self.batch_dim))
-
fg_label:resize(num_pos,self.target_dim)
bg_label:resize(num_neg,self.target_dim)
@@ -205,36 +169,29 @@ function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg
if self.do_flip then
flip = torch.random(0,1) == 0
end
- --print(bboxes)
+
+ local s_boxes = {}
for i=1,num_pos do
- --local bbox = bboxes[1][{i,{2,5}}]
local bbox = {bboxes[1][i][2],bboxes[1][i][3],bboxes[1][i][4],bboxes[1][i][5]}
- fg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip)
+ table.insert(s_boxes,bbox)
fg_label[i][1] = bboxes[1][i][6]
---[[ if flip then
- fg_label[i][2] = flip_angle(bboxes[1][i][7])
- else
- fg_label[i][2] = bboxes[1][i][7]
- end
-]]
end
for i=1,num_neg do
- --local bbox = bboxes[0][{i,{2,5}}]
local bbox = {bboxes[0][i][2],bboxes[0][i][3],bboxes[0][i][4],bboxes[0][i][5]}
- bg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip)
+ table.insert(s_boxes,bbox)
bg_label[i][1] = bboxes[0][i][6]
---[[ if flip then
- bg_label[i][2] = flip_angle(bboxes[0][i][7])
- else
- bg_label[i][2] = bboxes[0][i][7]
- end]]
end
-
--- return fg_data,bg_data,fg_label,bg_label
+
+ -- compute the features
+ local feats = self.feat_provider:getFeature(im_idx,s_boxes,flip)
+ local fg_data = num_pos > 0 and feats:narrow(1,1,num_pos) or nil
+ local bg_data = num_neg > 0 and feats:narrow(1,num_pos+1,num_neg) or nil
+
+ return fg_data, bg_data
end
-function BatchProvider:getBatch(batches,targets)
+function BatchProvider:prepareBatch(batches,targets)
local dataset = self.dataset
self.fg_num_each = self.fg_fraction * self.batch_size
@@ -257,11 +214,11 @@ function BatchProvider:getBatch(batches,targets)
local bg_counter = 0
local fg_data,bg_data,fg_label,bg_label
- fg_data = torch.FloatTensor()
- bg_data = torch.FloatTensor()
fg_label = torch.IntTensor()
bg_label = torch.IntTensor()
+ local pass_index = torch.type(self.feat_provider) == 'nnf.SPP' and true or false
+
print('==> Preparing Batch Data')
for i=1,opts.img_idx_end do
xlua.progress(i,opts.img_idx_end)
@@ -278,7 +235,13 @@ function BatchProvider:getBatch(batches,targets)
bboxes[0] = bg_w[curr_idx]
bboxes[1] = fg_w[curr_idx]
- self:prepareFeatures(curr_idx,bboxes,fg_data,bg_data,fg_label,bg_label)
+ local data
+ if pass_index then
+ data = curr_idx
+ else
+ data = dataset:getImage(curr_idx)
+ end
+ fg_data,bg_data = self:prepareFeatures(data,bboxes,fg_label,bg_label)
for j=1,nbg do
bg_counter = bg_counter + 1
@@ -297,6 +260,24 @@ function BatchProvider:getBatch(batches,targets)
batches[b][s]:copy(fg_data[j])
targets[b][s]:copy(fg_label[j])
end
+ collectgarbage()
end
+ collectgarbage()
return batches,targets
end
+
+function BatchProvider:getBatch()
+ self._cur = self._cur or math.huge
+ -- we have reached the end of our batch pool, need to recompute
+ if self._cur > self.iter_per_batch then
+ self._batches,self._targets = self:prepareBatch(self._batches,self._targets)
+ self._cur = 1
+ end
+
+ self.batches = self._batches[self._cur]
+ self.targets = self._targets[self._cur]
+ self._cur = self._cur + 1
+
+ return self.batches, self.targets
+
+end
diff --git a/DataSetCOCO.lua b/DataSetCOCO.lua
new file mode 100644
index 0000000..6b2a2e0
--- /dev/null
+++ b/DataSetCOCO.lua
@@ -0,0 +1,155 @@
+--local json = require 'dkjson'
+
+local DataSetCOCO,parent = torch.class('nnf.DataSetCOCO', 'nnf.DataSetDetection')
+
+function DataSetCOCO:__init(annFile)
+ self.image_set = nil
+ self.dataset_name = 'COCO'
+
+ local timer = torch.Timer()
+ local localtimer = torch.Timer()
+ print('Preparing COCO dataset...')
+ --[[
+ if type(annFile) == 'string' then
+ local f = io.open(annFile)
+ local str = f:read('*all')
+ f:close()
+
+ self.data = json.decode(str)
+
+ else
+ self.data = torch.load(annFile)
+ end
+ --]]
+ self.data = torch.load('coco_val.t7')
+ print((' Loaded annotations file in %.2fs'):format(localtimer:time().real))
+ localtimer:reset()
+
+ -- mapping images
+ local img_idx = {}
+ local img_idx_map = {}
+ for i = 1, #self.data.images do
+ table.insert(img_idx,self.data.images[i].id)
+ img_idx_map[self.data.images[i].id] = i
+ end
+ print((' Mapped images in %.4fs'):format(localtimer:time().real))
+ localtimer:reset()
+
+ -- mapping annotations
+ local ann = self.data.annotations
+ local o = {}
+
+ for k, v in ipairs(ann) do
+ table.insert(o,v.image_id*1e10 + v.category_id)
+ end
+ o = torch.LongTensor(o)
+ local _,ox = o:sort()
+ local o_data = ox:data()
+ local temp_ann = {}
+ for i=1 , o:size(1) do
+ table.insert(temp_ann, ann[ox[i] ])
+ end
+ self.data.annotations = temp_ann
+
+ local ann_idx = {}
+ local ann_idx_map = {}
+ local ann_img_idx = {}
+ local img_ann_idx_map = {}
+ for k,v in ipairs(temp_ann) do
+ table.insert(ann_idx, v.id)
+ ann_idx_map[v.id] = k
+ table.insert(ann_img_idx, v.image_id)
+ if not img_ann_idx_map[v.image_id] then
+ img_ann_idx_map[v.image_id] = {}
+ end
+ table.insert(img_ann_idx_map[v.image_id],v.id)
+ end
+
+ self.inds = {img_idx = img_idx,
+ img_idx_map = img_idx_map,
+ ann_idx = ann_idx,
+ ann_idx_map = ann_idx_map,
+ ann_img_idx = ann_img_idx,
+ img_ann_idx_map = img_ann_idx_map
+ }
+ print((' Mapped annotations in %.4fs'):format(localtimer:time().real))
+ localtimer:reset()
+
+ -- mapping classes
+ self.classes = {}
+ self.class_to_id = {}
+ self.class_cont = {}
+ self.class_cont_map = {}
+ self.num_classes = 0
+ for k,v in ipairs(self.data.categories) do
+ self.classes[v.id] = v.name
+ self.class_to_id[v.name] = v.id
+ table.insert(self.class_cont,v.id)
+ self.class_cont_map[v.id] = k
+ self.num_classes = self.num_classes + 1
+ end
+
+ print((' Total elapsed time: %.4fs'):format(timer:time().real))
+
+end
+
+function DataSetCOCO:getImage(i)
+ local file_name = self.images[i].file_name
+ return image.load(paths.concat(self.imgpath,file_name),3,'float')
+end
+
+function DataSetCOCO:getAnnotation(i)
+ local ann = {object = {}}
+ local im_id = self.inds.img_idx[i]
+ local ann_id = self.inds.img_ann_idx_map[im_id] or {}
+ for k,v in ipairs(ann_id) do
+ local lann = self.data.annotations[self.inds.ann_idx_map[v] ]
+ local bbox = {xmin=lann.bbox[1]+1,ymin=lann.bbox[2]+1,
+ xmax=lann.bbox[1]+lann.bbox[3]+1,
+ ymax=lann.bbox[2]+lann.bbox[4]+1,
+ }
+ local obj = {bndbox=bbox,
+ class=lann.category_id,
+ difficult = '0',
+ name = self.classes[lann.category_id]
+ }
+ table.insert(ann.object,obj)
+ end
+ return ann
+end
+
+function DataSetCOCO:getGTBoxes(i)
+ local anno = self:getAnnotation(i)
+ local valid_objects = {}
+ local gt_boxes = torch.IntTensor()
+ local gt_classes = {}
+
+ if self.with_hard_samples then -- inversed with respect to RCNN code
+ for idx,obj in ipairs(anno.object) do
+ if self.class_to_id[obj.name] then -- to allow a subset of the classes
+ table.insert(valid_objects,idx)
+ end
+ end
+ else
+ for idx,obj in ipairs(anno.object) do
+ if obj.difficult == '0' and self.class_to_id[obj.name] then
+ table.insert(valid_objects,idx)
+ end
+ end
+ end
+
+ gt_boxes:resize(#valid_objects,4)
+ for idx0,idx in ipairs(valid_objects) do
+ gt_boxes[idx0][1] = anno.object[idx].bndbox.xmin
+ gt_boxes[idx0][2] = anno.object[idx].bndbox.ymin
+ gt_boxes[idx0][3] = anno.object[idx].bndbox.xmax
+ gt_boxes[idx0][4] = anno.object[idx].bndbox.ymax
+
+ table.insert(gt_classes,self.class_cont_map[anno.object[idx].class])
+ end
+
+ return gt_boxes,gt_classes,valid_objects,anno
+
+end
+
+
diff --git a/DataSetDetection.lua b/DataSetDetection.lua
new file mode 100644
index 0000000..a557ece
--- /dev/null
+++ b/DataSetDetection.lua
@@ -0,0 +1,113 @@
+local utilities = paths.dofile('utils.lua')
+local concat = utilities.concat
+local boxoverlap = utilities.boxoverlap
+
+local DataSetDetection = torch.class('nnf.DataSetDetection')
+DataSetDetection._isDataSet = true
+
+function DataSetDetection:__init()
+ self.classes = nil
+ self.num_classes = nil
+ self.image_set = nil
+ self.dataset_name = nil
+end
+
+function DataSetDetection:getImage(i)
+end
+
+function DataSetDetection:getAnnotation(i)
+end
+
+function DataSetDetection:getROIBoxes(i)
+end
+
+function DataSetDetection:getGTBoxes(i)
+end
+
+function DataSetDetection:size()
+ return #self.img_ids
+end
+
+function DataSetDetection:__tostring__()
+ local str = torch.type(self)
+ str = str .. '\n Dataset Name: ' .. self.dataset_name
+ str = str .. '\n ImageSet: '.. self.image_set
+ str = str .. '\n Number of images: '.. self:size()
+ str = str .. '\n Classes:'
+ for k,v in ipairs(self.classes) do
+ str = str .. '\n '..v
+ end
+ return str
+end
+
+function DataSetDetection:bestOverlap(all_boxes, gt_boxes, gt_classes)
+ local num_total_boxes = all_boxes:size(1)
+ local num_gt_boxes = gt_boxes:dim() > 0 and gt_boxes:size(1) or 0
+ local overlap_class = torch.FloatTensor(num_total_boxes,self.num_classes):zero()
+ local overlap = torch.FloatTensor(num_total_boxes,num_gt_boxes):zero()
+ for idx=1,num_gt_boxes do
+ local o = boxoverlap(all_boxes,gt_boxes[idx])
+ local tmp = overlap_class[{{},gt_classes[idx]}] -- pointer copy
+ tmp[tmp:lt(o)] = o[tmp:lt(o)]
+ overlap[{{},idx}] = o
+ end
+ -- get max class overlap
+ --rec.overlap,rec.label = rec.overlap:max(2)
+ --rec.overlap = torch.squeeze(rec.overlap,2)
+ --rec.label = torch.squeeze(rec.label,2)
+ --rec.label[rec.overlap:eq(0)] = 0
+ local correspondance
+ if num_gt_boxes > 0 then
+ overlap,correspondance = overlap:max(2)
+ overlap = torch.squeeze(overlap,2)
+ correspondance = torch.squeeze(correspondance,2)
+ correspondance[overlap:eq(0)] = 0
+ else
+ overlap = torch.FloatTensor(num_total_boxes):zero()
+ correspondance = torch.LongTensor(num_total_boxes):zero()
+ end
+ return overlap, correspondance, overlap_class
+end
+
+function DataSetDetection:attachProposals(i)
+
+ local boxes = self:getROIBoxes(i)
+ local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i)
+
+ local all_boxes = concat(gt_boxes,boxes,1)
+
+ local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0
+ local num_gt_boxes = #gt_classes
+
+ local rec = {}
+ rec.gt = concat(torch.ByteTensor(num_gt_boxes):fill(1),
+ torch.ByteTensor(num_boxes):fill(0) )
+
+ rec.overlap, rec.correspondance, rec.overlap_class =
+ self:bestOverlap(all_boxes,gt_boxes,gt_classes)
+ rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0)
+ for idx=1,(num_boxes+num_gt_boxes) do
+ local corr = rec.correspondance[idx]
+ if corr > 0 then
+ rec.label[idx] = gt_classes[corr]
+ end
+ end
+
+ rec.boxes = all_boxes
+ rec.class = concat(torch.CharTensor(gt_classes),
+ torch.CharTensor(num_boxes):fill(0))
+
+ if self.save_objs then
+ rec.objects = {}
+ for _,idx in pairs(valid_objects) do
+ table.insert(rec.objects,anno.object[idx])
+ end
+ end
+
+ function rec:size()
+ return (num_boxes+num_gt_boxes)
+ end
+
+ return rec
+end
+
diff --git a/DataSetPascal.lua b/DataSetPascal.lua
index 365f93f..9e403df 100644
--- a/DataSetPascal.lua
+++ b/DataSetPascal.lua
@@ -1,10 +1,13 @@
local matio = require 'matio'
-local argcheck = require 'argcheck'
+local argcheck = dofile'argcheck.lua'--require 'argcheck'
local xml = require 'xml'
+local utilities = paths.dofile('utils.lua')
+local concat = utilities.concat
+local boxoverlap = utilities.boxoverlap
matio.use_lua_strings = true
-local DataSetPascal = torch.class('nnf.DataSetPascal')
+local DataSetPascal,parent = torch.class('nnf.DataSetPascal', 'nnf.DataSetDetection')
local function lines_from(file)
-- get all lines from a file, returns an empty
@@ -59,6 +62,7 @@ local initcheck = argcheck{
if type(v) ~= 'string' then
print('classes can only be of string input');
out = false
+ break
end
end
return out
@@ -102,7 +106,7 @@ local initcheck = argcheck{
}
function DataSetPascal:__init(...)
-
+ parent.__init(self)
local args = initcheck(...)
print(args)
for k,v in pairs(args) do self[k] = v end
@@ -167,7 +171,7 @@ function DataSetPascal:size()
end
function DataSetPascal:getImage(i)
- return image.load(string.format(self.imgpath,self.img_ids[i]))
+ return image.load(string.format(self.imgpath,self.img_ids[i]),3,'float')
end
@@ -247,34 +251,6 @@ function DataSetPascal:getROIBoxes(i)
return self.roidb[i]--self.roidb[self.img2roidb[self.img_ids[i] ] ]
end
-local function boxoverlap(a,b)
- local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b
-
- local x1 = a:select(2,1):clone()
- x1[x1:lt(b[1])] = b[1]
- local y1 = a:select(2,2):clone()
- y1[y1:lt(b[2])] = b[2]
- local x2 = a:select(2,3):clone()
- x2[x2:gt(b[3])] = b[3]
- local y2 = a:select(2,4):clone()
- y2[y2:gt(b[4])] = b[4]
-
- local w = x2-x1+1;
- local h = y2-y1+1;
- local inter = torch.cmul(w,h):float()
- local aarea = torch.cmul((a:select(2,3)-a:select(2,1)+1) ,
- (a:select(2,4)-a:select(2,2)+1)):float()
- local barea = (b[3]-b[1]+1) * (b[4]-b[2]+1);
-
- -- intersection over union overlap
- local o = torch.cdiv(inter , (aarea+barea-inter))
- -- set invalid entries to 0 overlap
- o[w:lt(0)] = 0
- o[h:lt(0)] = 0
-
- return o
-end
-
function DataSetPascal:getGTBoxes(i)
local anno = self:getAnnotation(i)
local valid_objects = {}
@@ -309,113 +285,17 @@ function DataSetPascal:getGTBoxes(i)
end
-function DataSetPascal:attachProposals(i)
-
- if not self.roidb then
- self:loadROIDB()
- end
-
- local boxes = self:getROIBoxes(i)
- local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i)
-
- local all_boxes
- if anno.object then
- if #valid_objects > 0 and boxes:dim() > 0 then
- all_boxes = torch.cat(gt_boxes,boxes,1)
- elseif boxes:dim() == 0 then
- all_boxes = gt_boxes
- else
- all_boxes = boxes
- end
- else
- gt_boxes = torch.IntTensor(0,4)
- all_boxes = boxes
- end
-
- local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0
- local num_gt_boxes = #gt_classes
-
- local rec = {}
- if num_gt_boxes > 0 and num_boxes > 0 then
- rec.gt = torch.cat(torch.ByteTensor(num_gt_boxes):fill(1),
- torch.ByteTensor(num_boxes):fill(0) )
- elseif num_boxes > 0 then
- rec.gt = torch.ByteTensor(num_boxes):fill(0)
- elseif num_gt_boxes > 0 then
- rec.gt = torch.ByteTensor(num_gt_boxes):fill(1)
- else
- rec.gt = torch.ByteTensor(0)
- end
-
- rec.overlap_class = torch.FloatTensor(num_boxes+num_gt_boxes,self.num_classes):fill(0)
- rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes,num_gt_boxes):fill(0)
- for idx=1,num_gt_boxes do
- local o = boxoverlap(all_boxes,gt_boxes[idx])
- local tmp = rec.overlap_class[{{},gt_classes[idx]}] -- pointer copy
- tmp[tmp:lt(o)] = o[tmp:lt(o)]
- rec.overlap[{{},idx}] = boxoverlap(all_boxes,gt_boxes[idx])
- end
- -- get max class overlap
- --rec.overlap,rec.label = rec.overlap:max(2)
- --rec.overlap = torch.squeeze(rec.overlap,2)
- --rec.label = torch.squeeze(rec.label,2)
- --rec.label[rec.overlap:eq(0)] = 0
-
- if num_gt_boxes > 0 then
- rec.overlap,rec.correspondance = rec.overlap:max(2)
- rec.overlap = torch.squeeze(rec.overlap,2)
- rec.correspondance = torch.squeeze(rec.correspondance,2)
- rec.correspondance[rec.overlap:eq(0)] = 0
- else
- rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes):fill(0)
- rec.correspondance = torch.LongTensor(num_boxes+num_gt_boxes):fill(0)
- end
- rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0)
- for idx=1,(num_boxes+num_gt_boxes) do
- local corr = rec.correspondance[idx]
- if corr > 0 then
- rec.label[idx] = self.class_to_id[anno.object[valid_objects[corr] ].name]
- end
- end
-
- rec.boxes = all_boxes
- if num_gt_boxes > 0 and num_boxes > 0 then
- rec.class = torch.cat(torch.CharTensor(gt_classes),
- torch.CharTensor(num_boxes):fill(0))
- elseif num_boxes > 0 then
- rec.class = torch.CharTensor(num_boxes):fill(0)
- elseif num_gt_boxes > 0 then
- rec.class = torch.CharTensor(gt_classes)
- else
- rec.class = torch.CharTensor(0)
- end
-
- if self.save_objs then
- rec.objects = {}
- for _,idx in pairs(valid_objects) do
- table.insert(rec.objects,anno.object[idx])
- end
- else
- rec.correspondance = nil
- end
-
- function rec:size()
- return (num_boxes+num_gt_boxes)
- end
-
- return rec
-end
-
function DataSetPascal:createROIs()
if self.rois then
return
end
self.rois = {}
for i=1,self.num_imgs do
- xlua.progress(i,self.num_imgs)
table.insert(self.rois,self:attachProposals(i))
if i%500 == 0 then
+ xlua.progress(i,self.num_imgs)
collectgarbage()
end
end
+ xlua.progress(self.num_imgs,self.num_imgs)
end
diff --git a/FRCNN.lua b/FRCNN.lua
new file mode 100644
index 0000000..9947127
--- /dev/null
+++ b/FRCNN.lua
@@ -0,0 +1,185 @@
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
+local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped
+local FRCNN = torch.class('nnf.FRCNN')
+FRCNN._isFeatureProvider = true
+
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+ pack=true,
+ noordered=true,
+ {name="scale",
+ type="table",
+ default={600},
+ help="image scales"},
+ {name="max_size",
+ type="number",
+ default=1000,
+ help="maximum dimension of an image"},
+ {name="inputArea",
+ type="number",
+ default=224^2,
+ help="input area of the bounding box"},
+ {name="image_transformer",
+ type="nnf.ImageTransformer",
+ default=nnf.ImageTransformer{},
+ help="Class to preprocess input images"},
+}
+
+
+function FRCNN:__init(...)
+
+ local opts = initcheck(...)
+ for k,v in pairs(opts) do self[k] = v end
+
+ self.train = true
+end
+
+function FRCNN:training()
+ self.train = true
+end
+
+function FRCNN:evaluate()
+ self.train = false
+end
+
+function FRCNN:processImages(input_imgs,do_flip)
+ local output_imgs = self._feat[1]
+ local num_images
+ local im
+ if self.train then
+ num_images = #input_imgs
+ else
+ num_images = #self.scale
+ im = self.image_transformer:preprocess(input_imgs[1])
+ end
+
+ local imgs = {}
+ local im_sizes = {}
+ local im_scales = {}
+
+ for i=1,num_images do
+ local scale
+ if self.train then
+ im = input_imgs[i]
+ im = self.image_transformer:preprocess(im)
+ scale = self.scale[math.random(1,#self.scale)]
+ else
+ scale = self.scale[i]
+ end
+ local flip = do_flip and (do_flip[i] == 1) or false
+ if flip then
+ im = image.hflip(im)
+ end
+ local im_size = im[1]:size()
+ local im_size_min = math.min(im_size[1],im_size[2])
+ local im_size_max = math.max(im_size[1],im_size[2])
+ local im_scale = scale/im_size_min
+ if torch.round(im_scale*im_size_max) > self.max_size then
+ im_scale = self.max_size/im_size_max
+ end
+ local im_s = {torch.round(im_size[1]*im_scale),torch.round(im_size[2]*im_scale)}
+ table.insert(imgs,image.scale(im,im_s[2],im_s[1]))
+ table.insert(im_sizes,im_s)
+ table.insert(im_scales,im_scale)
+ end
+ -- create single tensor with all images, padding with zero for different sizes
+ im_sizes = torch.IntTensor(im_sizes)
+ local max_shape = im_sizes:max(1)[1]
+ output_imgs:resize(num_images,3,max_shape[1],max_shape[2]):zero()
+ for i=1,num_images do
+ output_imgs[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i])
+ end
+ return im_scales,im_sizes
+end
+
+function FRCNN:projectImageROIs(im_rois,scales,do_flip,imgs_size)
+ local rois = self._feat[2]
+ -- we consider two cases:
+ -- During training, the scales are sampled randomly per image, so
+ -- in the same image all the bboxes have the same scale, and we only
+ -- need to take into account the different images that are provided.
+ -- During testing, we consider that there is only one image at a time,
+ -- and the scale for each bbox is the one which makes its area closest
+ -- to self.inputArea
+ if self.train or #scales == 1 then
+ local total_bboxes = 0
+ local cumul_bboxes = {0}
+ for i=1,#scales do
+ total_bboxes = total_bboxes + im_rois[i]:size(1)
+ table.insert(cumul_bboxes,total_bboxes)
+ end
+ rois:resize(total_bboxes,5)
+ for i=1,#scales do
+ local idx = {cumul_bboxes[i]+1,cumul_bboxes[i+1]}
+ rois[{idx,1}]:fill(i)
+ rois[{idx,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[i]):add(1)
+ if do_flip and do_flip[i] == 1 then
+ flipBoundingBoxes(rois[{idx,{2,5}}],imgs_size[{i,2}])
+ end
+ end
+ else -- not yet tested
+ error('Multi-scale testing not yet tested')
+ local scales = torch.FloatTensor(scales)
+ im_rois = im_rois[1]
+ local widths = im_rois[{{},3}] - im_rois[{{},1}] + 1
+ local heights = im_rois[{{},4}] - im_rois[{{}, 2}] + 1
+
+ local areas = widths * heights
+ local scaled_areas = areas:view(-1,1) * scales:view(1,-1):pow(2)
+ local diff_areas = scaled_areas:add(-1,self.inputArea):abs() -- no memory copy
+ local levels = select(2, diff_areas:min(2))
+
+ local num_boxes = im_rois:size(1)
+ rois:resize(num_boxes,5)
+ for i=1,num_boxes do
+ local s = levels[i]
+ rois[{i,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[s]):add(1)
+ rois[{i,1}] = s
+ end
+ end
+ return rois
+end
+
+function FRCNN:getFeature(imgs,bboxes,flip)
+ self._feat = self._feat or {torch.FloatTensor(),torch.FloatTensor()}
+
+ -- if it's in test mode, adapt inputs
+ if torch.isTensor(imgs) then
+ imgs = {imgs}
+ if type(bboxes) == 'table' then
+ bboxes = torch.FloatTensor(bboxes)
+ bboxes = bboxes:dim() == 1 and bboxes:view(1,-1) or bboxes
+ end
+ bboxes = {bboxes}
+ if flip == false then
+ flip = {0}
+ elseif flip == true then
+ flip = {1}
+ end
+ end
+
+ local im_scales, im_sizes = self:processImages(imgs,flip)
+ self:projectImageROIs(bboxes,im_scales,flip,im_sizes)
+
+ return self._feat
+end
+
+-- do the bbox regression
+function FRCNN:postProcess(im,boxes,output)
+ -- not implemented yet
+ return output,boxes
+end
+
+function FRCNN:compute(model, inputs)
+ local ttype = model.output:type() -- fix when doing bbox regression
+ self.inputs,inputs = recursiveResizeAsCopyTyped(self.inputs,inputs,ttype)
+ return model:forward(self.inputs)
+end
+
+function FRCNN:__tostring()
+ local str = torch.type(self)
+ str = str .. '\n Image scales: [' .. table.concat(self.scale,', ')..']'
+ str = str .. '\n Max image size: ' .. self.max_size
+ str = str .. '\n Input area: ' .. self.inputArea
+ return str
+end
diff --git a/ImageDetect.lua b/ImageDetect.lua
new file mode 100644
index 0000000..d3140df
--- /dev/null
+++ b/ImageDetect.lua
@@ -0,0 +1,22 @@
+local ImageDetect = torch.class('nnf.ImageDetect')
+local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped
+
+function ImageDetect:__init(model, feat_provider)
+ self.model = model
+ self.feat_provider = feat_provider
+ --self.sm = nn.SoftMax():cuda()
+end
+
+-- supposes boxes is in [x1,y1,x2,y2] format
+function ImageDetect:detect(im,boxes)
+ local feat_provider = self.feat_provider
+
+ local inputs = feat_provider:getFeature(im,boxes)
+
+ local output0 = feat_provider:compute(self.model, inputs)
+ local output,boxes_p = feat_provider:postProcess(im,boxes,output0)
+ --self.sm:forward(output0)
+
+ self.output,output = recursiveResizeAsCopyTyped(self.output,output,'torch.FloatTensor')
+ return self.output,boxes_p
+end
diff --git a/ImageTransformer.lua b/ImageTransformer.lua
index d7b213b..3bdb175 100644
--- a/ImageTransformer.lua
+++ b/ImageTransformer.lua
@@ -37,3 +37,12 @@ function ImageTransformer:preprocess(I)
return I
end
+function ImageTransformer:__tostring()
+ local str = torch.type(self)
+ if self.swap then
+ str = str .. '\n Channel swap: [' .. table.concat(self.swap,', ') .. ']'
+ end
+ str = str .. '\n Raw scale: '.. self.raw_scale
+ str = str .. '\n Mean pixel: [' .. table.concat(self.mean_pix,', ') .. ']'
+ return str
+end
diff --git a/RCNN.lua b/RCNN.lua
index 03651d3..13b87a9 100644
--- a/RCNN.lua
+++ b/RCNN.lua
@@ -1,53 +1,53 @@
-local RCNN = torch.class('nnf.RCNN')
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
-function RCNN:__init(dataset)
- self.dataset = dataset
- self.image_transformer = nnf.ImageTransformer{
- mean_pix={123.68/255,116.779/255,103.939/255}}
-
- self.crop_size = 227
- self.image_mean = nil
- self.padding = 16
- self.use_square = false
-
-end
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+ pack=true,
+ noordered=true,
+ {name="crop_size",
+ type="number",
+ default=227,
+ help="crop size"},
+ {name="padding",
+ type="number",
+ default=16,
+ help="context padding"},
+ {name="use_square",
+ type="boolean",
+ default=false,
+ help="force square crops"},
+ {name="image_transformer",
+ type="nnf.ImageTransformer",
+ default=nnf.ImageTransformer{},
+ help="Class to preprocess input images"},
+ {name="max_batch_size",
+ type="number",
+ default=128,
+ help="maximum size of batches during evaluation"},
+ {name="num_threads",
+ type="number",
+ default=8,
+ help="number of threads for bounding box cropping"},
+ {name="iter_per_thread",
+ type="number",
+ default=8,
+ help="number of bbox croppings per thread"},
+ {name="dataset",
+ type="nnf.DataSetPascal", -- change to allow other datasets
+ opt=true,
+ help="A dataset class"},
+}
-function RCNN:getCrop(im_idx,bbox,flip)
- -- suppose I is in BGR, as image_mean
- -- [x1 y1 x2 y2] order
- local flip = flip==nil and false or flip
-
- if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
- self.curr_im_idx = im_idx
- self.curr_im_feats = self.dataset:getImage(im_idx):float()
- self.curr_im_feats = self.image_transformer:preprocess(self.curr_im_feats)
- if flip then
- self.curr_im_feats = image.hflip(self.curr_im_feats)
- end
- self.curr_doflip = flip
- end
-
- local I = self.curr_im_feats
- local bbox = bbox
-
- if flip then
- local tt = bbox[1]
- bbox[1] = I:size(3)-bbox[3]+1
- bbox[3] = I:size(3)-tt +1
- end
-
- local crop_size = self.crop_size
- local image_mean = self.image_mean
- local padding = self.padding
- local use_square = self.use_square
+local RCNN = torch.class('nnf.RCNN')
+RCNN._isFeatureProvider = true
+
+local function RCNNCrop(output,I,box,crop_size,padding,use_square,crop_buffer)
local pad_w = 0;
local pad_h = 0;
local crop_width = crop_size;
local crop_height = crop_size;
-
- --local bbox = {bbox[2],bbox[1],bbox[4],bbox[3]}
-
+ local bbox = {box[1],box[2],box[3],box[4]}
------
if padding > 0 or use_square then
local scale = crop_size/(crop_size - padding*2)
@@ -98,30 +98,177 @@ function RCNN:getCrop(im_idx,bbox,flip)
end -- padding > 0 || square
------
- --local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]);
- local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]):float();
- local tmp = image.scale(patch,crop_width,crop_height,'bilinear');
+ local patch = I[{{},{bbox[2],bbox[4]},{bbox[1],bbox[3]}}]
+ crop_buffer:resize(3,crop_height,crop_width)
+ image.scale(crop_buffer,patch,'bilinear');
+
+ output[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = crop_buffer
- if image_mean then
- tmp = tmp - image_mean[{{},{pad_h+1,pad_h+crop_height},
- {pad_w+1,pad_w+crop_width}}]
+end
+
+
+function RCNN:__init(...)
+
+ local opts = initcheck(...)
+ for k,v in pairs(opts) do self[k] = v end
+
+ self.output_size = {3,self.crop_size,self.crop_size}
+ self.train = true
+
+ if self.num_threads > 1 then
+ local crop_size = self.crop_size
+ local threads = require 'threads'
+ threads.serialization('threads.sharedserialize')
+ self.donkeys = threads.Threads(
+ self.num_threads,
+ function()
+ require 'torch'
+ require 'image'
+ end,
+ function(idx)
+ RCNNCrop = RCNNCrop
+ torch.setheaptracking(true)
+ crop_buffer = torch.FloatTensor(3,crop_size,crop_size)
+ print(string.format('Starting RCNN thread with id: %d', idx))
+ end
+ )
end
+end
+
+function RCNN:training()
+ self.train = true
+end
+
+function RCNN:evaluate()
+ self.train = false
+end
- --patch = torch.zeros(3,crop_size,crop_size):typeAs(I)
- patch = torch.zeros(3,crop_size,crop_size):float()
+function RCNN:getCrop(output,I,bbox)
+ -- [x1 y1 x2 y2] order
+
+ local crop_size = self.crop_size
+ local padding = self.padding
+ local use_square = self.use_square
- patch[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = tmp
+ self._crop_buffer = self._crop_buffer or torch.FloatTensor(3,crop_size,crop_size)
+ RCNNCrop(output,I,bbox,crop_size,padding,use_square,self._crop_buffer)
- return patch
+ return output
end
-function RCNN:getFeature(im_idx,bbox,flip)
+function RCNN:getFeature(im,bbox,flip)
local flip = flip==nil and false or flip
+
+ if type(im) == 'number' then
+ assert(self.dataset, 'you must provide a dataset if using numeric indices')
+ im = self.dataset:getImage(im)
+ end
+
+ if torch.type(im) ~= 'torch.FloatTensor' then
+ -- force image to be float
+ self._im = self._im or torch.FloatTensor()
+ self._im:resize(im:size()):copy(im)
+ im = self._im
+ end
+
+ if type(bbox) == 'table' then
+ bbox = torch.FloatTensor(bbox)
+ elseif torch.isTensor(bbox) and flip then
+ -- creates a copy of the bboxes to avoid modifying the original
+ -- bboxes in the flipping
+ self._bbox = self._bbox or torch.FloatTensor()
+ self._bbox:resize(bbox:size()):copy(bbox)
+ bbox = self._bbox
+ end
- local crop_feat = self:getCrop(im_idx,bbox,flip)
+ im = self.image_transformer:preprocess(im)
+ bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox
+ local num_boxes = bbox:size(1)
+
+ if flip then
+ im = image.hflip(im)
+ flipBoundingBoxes(bbox,im:size(3))
+ end
+
+ self._feat = self._feat or torch.FloatTensor()
+
+ self._feat:resize(num_boxes,table.unpack(self.output_size)):zero()
+
+ -- use threads to speed up bbox processing
+ if self.num_threads > 1 and num_boxes > self.iter_per_thread then
+ local feat = self._feat
+ local img = im
+ local bndbox = bbox
+ local crop_size = self.crop_size
+ local padding = self.padding
+ local use_square = self.use_square
+ local iter_per_thread = self.iter_per_thread
+ local num_launches = math.ceil(num_boxes/iter_per_thread)
+ for i=1,num_launches do
+ local iter_per_thread_local
+ if i == num_launches then
+ -- last thread launches the remainder of the bboxes
+ iter_per_thread_local = (num_boxes-1)%iter_per_thread + 1
+ else
+ iter_per_thread_local = iter_per_thread
+ end
+ self.donkeys:addjob(
+ function()
+ for j=1,iter_per_thread_local do
+ local f = feat[(i-1)*iter_per_thread+j]
+ local boundingbox = bndbox[(i-1)*iter_per_thread+j]
+ -- crop_buffer is global in each thread
+ RCNNCrop(f,img,boundingbox,crop_size,padding,use_square,crop_buffer)
+ end
+ --collectgarbage()
+ return
+ end
+ )
+ end
+ self.donkeys:synchronize()
+
+ else
+ for i=1,num_boxes do
+ self:getCrop(self._feat[i],im,bbox[i])
+ end
+ end
- return crop_feat
+ return self._feat
+end
+
+-- don't do anything. could be the bbox regression or SVM, but I won't add it here
+function RCNN:postProcess(im,bbox,output)
+ return output,bbox
end
+function RCNN:compute(model,inputs)
+ local inputs_s = inputs:split(self.max_batch_size,1)
+ self.output = self.output or inputs.new()
+
+ local ttype = model.output:type()
+ self.inputs = self.inputs or torch.Tensor():type(ttype)
+
+ for idx, f in ipairs(inputs_s) do
+ self.inputs:resize(f:size()):copy(f)
+ local output0 = model:forward(self.inputs)
+ local fs = f:size(1)
+ if idx == 1 then
+ local ss = output0[1]:size():totable()
+ self.output:resize(inputs:size(1),table.unpack(ss))
+ end
+ self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0)
+ end
+ return self.output
+end
+
+function RCNN:__tostring()
+ local str = torch.type(self)
+ str = str .. '\n Crop size: ' .. self.crop_size
+ str = str .. '\n Context padding: ' .. self.padding
+ if self.use_square then
+ str = str .. '\n Use square: true'
+ end
+ return str
+end
diff --git a/README.md b/README.md
index b1525db..eb80c08 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,209 @@
## Object detection in torch
-Implementation of some object detection frameworks in [torch](http://torch.ch).
+This library aims to provide a simple architecture to easily perform object detection in [torch](http://torch.ch).
+It currently contains code for training the following frameworks: [RCNN](http://arxiv.org/abs/1311.2524), [SPP](http://arxiv.org/abs/1406.4729) and [Fast-RCNN](http://arxiv.org/abs/1504.08083).
+
+It consists of 7 basic classes:
+
+* ImageTransformer: Preprocess an image before feeding it to the network
+* DataSetDetection: Generic dataset class for object detection.
+ * DataSetPascal
+ * DataSetCOCO (not finished)
+* [FeatureProvider](#feat_provider): Implements the necessary operations on images and bounding boxes
+ * [RCNN](#rcnn)
+ * [SPP](#spp)
+ * [Fast-RCNN](#frcnn)
+* [BatchProvider](#batch_provider): Samples random patches
+ * [BatchProviderRC](#batch_provider_rc): ROI-Centric
+ * [BatchProviderIC](#batch_provider_ic): Image-Centric
+* ImageDetect: Encapsulates a model and a feature provider to perform the detection
+* Trainer: Simple class to perform the model training.
+* Tester: Evaluate the detection using Pascal VOC approach.
+
+
+### Feature Provider
+The `FeatureProvider` class defines the way different algorithms process an image and a set of bounding boxes to feed it to the CNN.
+It implements a `getFeature(image, boxes [,flip])` function, which computes the necessary transformations in the input data (the optional `flip` argument horizontaly flips the image and the bounding box correspondingly), a `postProcess()`, which takes the output of the network plus the original inputs and post-process them. This post-processing could be a bounding box regression step, for example.
+Every Feature Provider constructor take as input a `ImageTransformer`, and a `max_batch_size` (used for evaluation).
+
+
+#### RCNN
+This is the first work that used CNNs for object detection using bounding box proposals.
+The transformation is the simplest one. It crops the image at the specified positions given by the bounding boxes, and rescale them to be square.
+The constructor has the following arguments:
+ * `crop_size`
+ * `padding`
+ * `use_square`
+ * `num_threads` number of parallel threads
+
+
+#### SPP
+Contrary to RCNN, SPP crops the images in the feature space (here, `conv5`). It allows to compute the convolutional features once for the entire image, making it much more efficient.
+The constructor has the following arguments:
+ * `model`
+ * `pooling_scales`
+ * `num_feat_chns`
+ * `scales`: image scales
+ * `sz_conv_standard`
+ * `step_standard`
+ * `offset0`
+ * `offset`
+ * `inputArea`
+ * `use_cache`
+ * `cache_dir`
+
+SPP allows faster training/testing by caching the convolutional feature maps. You can provide to `getFeature` instead of an image `I` an image index `i` (from a `DataSetDetection` object), which will load the corresponding feature map from disk (if already computed and if `use_cache` is set to `true`). To easily cache all features of a dataset in disk, use the method `:saveConvCache()`.
+
+
+#### Fast-RCNN
+Similar to SPP, Fast-RCNN also crops the images in the feature space, but instead of keeping the convolutional layers fixed, they allow it to train together with the fully-connected layers.
+The constructor has the following arguments:
+ * `scale`
+ * `max_size`
+ * `inputArea`
+
+The output of `getFeature()` is a table with two entries, the preprocessed image/images as the first element, and the projected bounding boxes. An example of a CNN model structure which can be used with Fast-RCNN is as follows:
+```lua
+-- define features and classifier as you wish.
+-- Can use loadcaffe to read from a saved model, for example
+features = torch.load('alexnet_features.t7')
+classifier = torch.load('alexnet_classifier.t7')
+
+-- define the ROIPooling layer
+-- can use either inn.ROIPooling or nnf.ROIPooling (with CPU support)
+-- let's just use standard parameters from Fast-RCNN paper
+local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16)
+
+-- create parallel model which takes as input the images and
+-- bounding boxes, and pass the images through the convolutional
+-- features and simply copy the bounding boxes
+local prl = nn.ParallelTable()
+prl:add(features)
+prl:add(nn.Identity())
+
+-- this is the final model
+model = nn.Sequential()
+model:add(prl)
+model:add(ROIPooling)
+model:add(nn.View(-1):setNumInputDims(3))
+model:add(classifier)
+```
+
+
+### Batch Provider
+This class implements sampling strategies for training Object Detectors.
+In its constructor, it takes as argument a `DataSetDetection`, and a `FeatureProvider`.
+It implements a `getBatch` function, which samples from the `DataSet` using `FeatureProvider`.
+The following arguments are present for all derived classes:
+ * `DataSetDetection`
+ * `FeatureProvider`
+ * `batch_size`
+ * `fg_fraction`
+ * `fg_threshold`
+ * `bg_threshold`
+ * `do_flip`
+
+
+#### BatchProviderRC
+ROI-Centric Batch Provider, it samples the patches randomly over all the pool of patches.
+To minimize the number of disk access, it reads the data for a specified number of batches and store it in memory.
+The constructor take the following optional arguments:
+ * `iter_per_batch`
+ * `nTimesMoreData`
+
+
+#### BatchProviderIC
+Image-Centric Batch Provider, it first samples a set of images, and then a set of patches is sampled on those sampled images.
+The constructor take the following optional arguments:
+ * `imgs_per_batch`
+
+### Examples
+Here we show a simple example demonstrating how to perform object detection given an image and a set of bounding boxes.
+Run it using `qlua` for the visualization part. A pre-trained model for Fast-RCNN can be found [here](https://drive.google.com/file/d/0B-TTdm1WNtyba3I4Vm1hbFRSS2c/view?usp=sharing).
+```lua
+require 'nnf'
+require 'image'
+require 'cudnn'
+require 'inn'
+require 'nn'
+
+-- load pre-trained Fast-RCNN model
+params = torch.load('cachedir/frcnn_alexnet.t7')
+loadModel = dofile 'models/frcnn_alexnet.lua'
+model = loadModel(params)
+
+model:add(nn.SoftMax())
+
+model:evaluate()
+model:cuda()
+
+-- prepare detector
+image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}}
+feat_provider = nnf.FRCNN{image_transformer=image_transformer}
+feat_provider:evaluate() -- testing mode
+
+detector = nnf.ImageDetect(model, feat_provider)
+
+-- Load an image
+I = image.lena()
+-- generate some random bounding boxes
+torch.manualSeed(500) -- fix seed for reproducibility
+bboxes = torch.Tensor(100,4)
+bboxes:select(2,1):random(1,I:size(3)/2)
+bboxes:select(2,2):random(1,I:size(2)/2)
+bboxes:select(2,3):random(I:size(3)/2+1,I:size(3))
+bboxes:select(2,4):random(I:size(2)/2+1,I:size(2))
+
+-- detect !
+scores, bboxes = detector:detect(I, bboxes)
+
+-- visualization
+dofile 'visualize_detections.lua'
+threshold = 0.5
+-- classes from Pascal used for training the model
+cls = {'aeroplane','bicycle','bird','boat','bottle','bus','car',
+ 'cat','chair','cow','diningtable','dog','horse','motorbike',
+ 'person','pottedplant','sheep','sofa','train','tvmonitor'}
+
+w = visualize_detections(I,bboxes,scores,threshold,cls)
+
+```
+This outputs the following
+
+
+
+
+For an illustration on how to use this code to train a detector, or to evaluate it on Pascal, see the [examples](http://github.com/fmassa/object-detection.torch/tree/refactoring/examples).
+
+#### Bounding box proposals
+Note that this repo doesn't contain code for generating bounding box proposals. For the moment, they are pre-computed and loaded at run time.
+
+#### Model definition
+All the detection framework implemented here supposes that you already have a pre-trained classification network (trained for example on ImageNet). They reuse this pre-trained network as an initialization for the subsequent fine-tuning.
+
+In `models/` you will find the model definition for several classic networks used in object detection.
+
+The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0).
+It is supposed to be at `data/models`
+If you want to use your own model in SPP framework, make sure that it follows the pattern
+```
+model = nn.Sequential()
+model:add(features)
+model:add(pooling_layer)
+model:add(classifier)
+```
+where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example.
### Dependencies
It requires the following packages
- - [xml](http://doc.lubyk.org/xml.html)
- - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch)
- - [hdf5](https://github.com/deepmind/torch-hdf5)
- - [inn](https://github.com/szagoruyko/imagine-nn)
+ - [xml](http://doc.lubyk.org/xml.html) (For `DataSetPascal`)
+ - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch) (For `DataSetPascal`)
+ - [hdf5](https://github.com/deepmind/torch-hdf5) (for `SPP`)
+ - [inn](https://github.com/szagoruyko/imagine-nn) (for `SPP`)
To install them all, do
@@ -28,6 +222,10 @@ luarocks install matio
To install `hdf5`, follow the instructions in [here](https://github.com/deepmind/torch-hdf5/blob/master/doc/usage.md)
+### Old code
+The old version of this repo can be found [here](https://github.com/fmassa/object-detection.torch/tree/legacy).
+
+
### Running this code
First, clone this repo
@@ -35,27 +233,5 @@ First, clone this repo
git clone https://github.com/fmassa/object-detection.torch.git
```
-The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0).
-It is supposed to be at `data/models`.
-If you want to use your own model in SPP framework, make sure that it follows the pattern
-```
-model = nn.Sequential()
-model:add(features)
-model:add(pooling_layer)
-model:add(classifier)
-```
-where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example.
-
-To finetune the network for detection, simply run
-```
-th main.lua
-```
-
-To get an overview of the different parameters, do
-```
-th main.lua -h
-```
-
The default is to consider that the dataset is present in `datasets/VOCdevkit/VOC2007/`.
The default location of bounding boxes `.mat` files (in RCNN format) is supposed to be in `data/selective_search_data/`.
-
diff --git a/ROIPooling.lua b/ROIPooling.lua
new file mode 100644
index 0000000..3ca6d82
--- /dev/null
+++ b/ROIPooling.lua
@@ -0,0 +1,86 @@
+local ROIPooling,parent = torch.class('nnf.ROIPooling','nn.Module')
+
+function ROIPooling:__init(W,H)
+ parent.__init(self)
+ self.W = W
+ self.H = H
+ self.pooler = {}--nn.SpatialAdaptiveMaxPooling(W,H)
+ self.spatial_scale = 1
+ self.gradInput = {torch.Tensor()}
+end
+
+function ROIPooling:setSpatialScale(scale)
+ self.spatial_scale = scale
+ return self
+end
+
+function ROIPooling:updateOutput(input)
+ local data = input[1]
+ local rois = input[2]
+
+ local num_rois = rois:size(1)
+ local s = data:size()
+ local ss = s:size(1)
+ self.output:resize(num_rois,s[ss-2],self.H,self.W)
+
+ rois[{{},{2,5}}]:add(-1):mul(self.spatial_scale):add(1):round()
+ rois[{{},2}]:cmin(s[ss])
+ rois[{{},3}]:cmin(s[ss-1])
+ rois[{{},4}]:cmin(s[ss])
+ rois[{{},5}]:cmin(s[ss-1])
+
+ -- element access is faster if not a cuda tensor
+ if rois:type() == 'torch.CudaTensor' then
+ self._rois = self._rois or torch.FloatTensor()
+ self._rois:resize(rois:size()):copy(rois)
+ rois = self._rois
+ end
+
+ if not self._type then self._type = self.output:type() end
+
+ if #self.pooler < num_rois then
+ local diff = num_rois - #self.pooler
+ for i=1,diff do
+ table.insert(self.pooler,nn.SpatialAdaptiveMaxPooling(self.W,self.H):type(self._type))
+ end
+ end
+
+ for i=1,num_rois do
+ local roi = rois[i]
+ local im_idx = roi[1]
+ local im = data[{im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}]
+ self.output[i] = self.pooler[i]:updateOutput(im)
+ end
+ return self.output
+end
+
+function ROIPooling:updateGradInput(input,gradOutput)
+ local data = input[1]
+ local rois = input[2]
+ if rois:type() == 'torch.CudaTensor' then
+ rois = self._rois
+ end
+ local num_rois = rois:size(1)
+ local s = data:size()
+ local ss = s:size(1)
+ self.gradInput[1]:resizeAs(data):zero()
+
+ for i=1,num_rois do
+ local roi = rois[i]
+ local im_idx = roi[1]
+ local r = {im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}
+ local im = data[r]
+ local g = self.pooler[i]:updateGradInput(im,gradOutput[i])
+ self.gradInput[1][r]:add(g)
+ end
+ return self.gradInput
+end
+
+function ROIPooling:type(type)
+ parent.type(self,type)
+ for i=1,#self.pooler do
+ self.pooler[i]:type(type)
+ end
+ self._type = type
+ return self
+end
diff --git a/SPP.lua b/SPP.lua
index cfd67a1..4456c2c 100644
--- a/SPP.lua
+++ b/SPP.lua
@@ -1,18 +1,89 @@
local hdf5 = require 'hdf5'
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
local SPP = torch.class('nnf.SPP')
-
---TODO vectorize code ?
-function SPP:__init(dataset,model)
+SPP._isFeatureProvider = true
+
+-- argcheck crashes with that many arguments, and using unordered
+-- doesn't seems practical
+
+local argcheck = paths.dofile('argcheck.lua')--require 'argcheck'
+local initcheck = argcheck{
+ pack=true,
+ {name="model",
+ type="nn.Sequential",
+ help="conv5 model"},
+ {name="dataset",
+ type="nnf.DataSetPascal", -- change to allow other datasets
+ opt=true,
+ help="A dataset class"},
+ {name="pooling_scales",
+ type="table",
+ default={{1,1},{2,2},{3,3},{6,6}},
+ help="pooling scales"},
+ {name="num_feat_chns",
+ type="number",
+ default=256,
+ help="number of feature channels to be pooled"},
+ {name="scales",
+ type="table",
+ default={480,576,688,874,1200},
+ help="image scales"},
+ {name="sz_conv_standard",
+ type="number",
+ default=13,
+ help=""},
+ {name="step_standard",
+ type="number",
+ default=16,
+ help=""},
+ {name="offset0",
+ type="number",
+ default=21,
+ help=""},
+ {name="offset",
+ type="number",
+ default=6.5,
+ help=""},
+ {name="inputArea",
+ type="number",
+ default=224^2,
+ help="input area"},
+ {name="image_transformer",
+ type="nnf.ImageTransformer",
+ default=nnf.ImageTransformer{},
+ help="Class to preprocess input images"},
+ {name="use_cache",
+ type="boolean",
+ default=true,
+ help=""},
+ {name="cachedir",
+ type="string",
+ opt=true,
+ help=""},
+}
+
+
+
+function SPP:__init(...)
self.dataset = dataset
self.model = model
- self.spp_pooler = inn.SpatialPyramidPooling({{1,1},{2,2},{3,3},{6,6}}):float()
- self.image_transformer = nnf.ImageTransformer{}
+ local opts = initcheck(...)
+ for k,v in pairs(opts) do self[k] = v end
+
+ --self.num_feat_chns = 256
+ --self.pooling_scales = {{1,1},{2,2},{3,3},{6,6}}
+ local pyr = torch.Tensor(self.pooling_scales):t()
+ local pooled_size = pyr[1]:dot(pyr[2])
+ self.output_size = {self.num_feat_chns*pooled_size}
+
+ --self.spp_pooler = inn.SpatialPyramidPooling(self.pooling_scales):float()
+ --self.image_transformer = nnf.ImageTransformer{}
+--[[
-- paper=864, their code=874
self.scales = {480,576,688,874,1200} -- 874
- self.randomscale = true
self.sz_conv_standard = 13
self.step_standard = 16
@@ -24,11 +95,20 @@ function SPP:__init(dataset,model)
self.use_cache = true
self.cachedir = nil
-
+ --]]
+ self.train = true
end
+function SPP:training()
+ self.train = true
+end
-function SPP:getCrop(im_idx,bbox,flip)
+function SPP:evaluate()
+ self.train = false
+end
+
+-- here just to check
+function SPP:getCrop_old(im_idx,bbox,flip)
local flip = flip or false
if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
@@ -36,52 +116,87 @@ function SPP:getCrop(im_idx,bbox,flip)
self.curr_im_feats = self:getConv5(im_idx,flip)
self.curr_doflip = flip
end
-
- local bbox = bbox
+
if flip then
- local tt = bbox[1]
- bbox[1] = self.curr_im_feats.imSize[3]-bbox[3]+1
- bbox[3] = self.curr_im_feats.imSize[3]-tt +1
+ flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3])
end
local bestScale,bestBbox = self:getBestSPPScale(bbox,self.curr_im_feats.imSize,self.curr_im_feats.scales)
local box_norm = self:getResposeBoxes(bestBbox)
local crop_feat = self:getCroppedFeat(self.curr_im_feats.rsp[bestScale],box_norm)
+
+ return crop_feat
+end
+
+function SPP:getCrop(im_idx,bbox,flip)
+ local flip = flip or false
+
+ if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
+ self.curr_im_idx = im_idx
+ self.curr_im_feats = self:getConv5(im_idx,flip)
+ self.curr_doflip = flip
+ end
+
+ if type(bbox) == 'table' then
+ bbox = torch.FloatTensor(bbox)
+ elseif torch.isTensor(bbox) and flip then
+ -- creates a copy of the bboxes to avoid modifying the original
+ -- bboxes in the flipping
+ self._bbox = self._bbox or torch.FloatTensor()
+ self._bbox:resize(bbox:size()):copy(bbox)
+ bbox = self._bbox
+ end
+ bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox
+
+ if flip then
+ flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3])
+ end
+
+ local feat = self.curr_im_feats
+ local bestScale,bestbboxes,bboxes_norm,projected_bb =
+ self:projectBoxes(feat, bbox, feat.scales)
+
+ local crop_feat = {}
+ for i=1,bbox:size(1) do
+ local bbox_ = projected_bb[i]
+ local patch = feat.rsp[bestScale[i]][{{},{bbox_[2],bbox_[4]},{bbox_[1],bbox_[3]}}]
+ table.insert(crop_feat,patch)
+ end
return crop_feat
end
-function SPP:getFeature(im_idx,bbox,flip)
+-- here just to check
+function SPP:getFeature_old(im_idx,bbox,flip)
local flip = flip or false
- local crop_feat = self:getCrop(im_idx,bbox,flip)
+ local crop_feat = self:getCrop_old(im_idx,bbox,flip)
local feat = self.spp_pooler:forward(crop_feat)
-
return feat
end
-local function cleaningForward(input,model)
- local currentOutput = model.modules[1]:updateOutput(input)
- for i=2,#model.modules do
- collectgarbage()
- collectgarbage()
- currentOutput = model.modules[i]:updateOutput(currentOutput)
- model.modules[i-1].output:resize()
- model.modules[i-1].gradInput:resize()
- if model.modules[i-1].gradWeight then
- model.modules[i-1].gradWeight:resize()
- end
- if model.modules[i-1].gradBias then
- model.modules[i-1].gradBias:resize()
- end
+function SPP:getFeature(im_idx,bbox,flip)
+ local flip = flip or false
+
+ local crop_feat = self:getCrop(im_idx,bbox,flip)
+
+ self._feat = self._feat or torch.FloatTensor()
+ self._feat:resize(#crop_feat,table.unpack(self.output_size))
+ for i=1,#crop_feat do
+ self._feat[i]:copy(self.spp_pooler:forward(crop_feat[i]))
end
- model.output = currentOutput
- return currentOutput
+
+ return self._feat
end
+-- SPP is meant to keep a cache of the conv5 features
+-- for fast training. In this case, we suppose that
+-- we provide the image index in the dataset.
+-- We can also use an image as input, in which case it
+-- won't save a conv5 cache.
function SPP:getConv5(im_idx,flip)
local scales = self.scales
local flip = flip or false
@@ -93,8 +208,16 @@ function SPP:getConv5(im_idx,flip)
if not cachedir then
cachedir = ''
end
+
+ local im_name
+ if not self.dataset then
+ self.use_cache = false
+ im_name = ''
+ else
+ im_name = self.dataset.img_ids[im_idx]
+ end
- local cachefile = paths.concat(self.cachedir,self.dataset.img_ids[im_idx])
+ local cachefile = paths.concat(cachedir,im_name)
if flip then
cachefile = cachefile..'_flip'
@@ -110,7 +233,12 @@ function SPP:getConv5(im_idx,flip)
feats.rsp[tostring(i)] = nil
end
else
- local I = self.dataset:getImage(im_idx):float()
+ local I
+ if type(im_idx) == 'number' and self.dataset then
+ I = self.dataset:getImage(im_idx):float()
+ elseif torch.isTensor(im_idx) then
+ I = im_idx
+ end
I = self.image_transformer:preprocess(I)
if flip then
I = image.hflip(I)
@@ -129,7 +257,6 @@ function SPP:getConv5(im_idx,flip)
local Ir = image.scale(I,sc,sr):type(mtype)
local f = self.model:forward(Ir)
- --local f = cleaningForward(Ir,self.model)
feats.rsp[i] = torch.FloatTensor(f:size()):copy(f)
end
@@ -180,7 +307,8 @@ function SPP:getBestSPPScale(bbox,imSize,scales)
local bestScale
- if self.randomscale then
+ if self.train then
+ -- in training, select the scales randomly
bestScale = torch.random(1,num_scales)
else
local inputArea = self.inputArea
@@ -253,6 +381,141 @@ function SPP:getCroppedFeat(feat,bbox)
end
+
+
+local function unique(bboxes)
+ local idx = {}
+ local is_unique = torch.ones(bboxes:size(1))
+ for i=1,bboxes:size(1) do
+ local b = bboxes[i]
+ local n = b[1]..'_'..b[2]..'_'..b[3]..'_'..b[4]..'_'..b[5]
+ if idx[n] then
+ is_unique[i] = 0
+ else
+ idx[n] = i
+ end
+ end
+ return is_unique
+end
+
+-- given a table with the conv5 features at different scales and bboxes in
+-- the original image, project the bboxes in the conv5 space
+function SPP:projectBoxes(feat, bboxes, scales)
+ -- bboxes is a nx4 Tensor with candidate bounding boxes
+ -- in [x1, y1, x2, y2] format
+ local imSize = feat.imSize
+
+ local scales = scales or self.scales
+ local min_dim = math.min(imSize[2],imSize[3])
+
+ local sz_conv_standard = self.sz_conv_standard
+ local step_standard = self.step_standard
+
+ local nboxes = bboxes:size(1)
+
+ -- get best SPP scale
+ local bestScale = torch.FloatTensor(nboxes)
+
+ if self.train then
+ -- in training, select the scales randomly
+ bestScale:random(1,#scales)
+ else
+ local bboxArea = boxes.new():resize(nboxes):zero()
+ bboxArea:map2(bboxes[{{},3}],bboxes[{{},1}],function(xx,xx2,xx1) return xx2-xx1+1 end)
+ bboxArea:map2(bboxes[{{},4}],bboxes[{{},2}],function(xx,xx2,xx1) return xx*(xx2-xx1+1) end)
+
+ local expected_scale = bboxArea:float():pow(-0.5):mul(sz_conv_standard*step_standard*min_dim)
+ expected_scale:round()
+
+ local nbboxDiffArea = torch.FloatTensor(#scales,nboxes)
+
+ for i=1,#scales do
+ nbboxDiffArea[i]:copy(expected_scale):add(-scales[i]):abs()
+ end
+
+ bestScale = select(2,nbboxDiffArea:min(1))[1]
+ end
+
+ local mul_factor = torch.FloatTensor(nboxes,1):copy(bestScale)
+ local idx = 0
+ mul_factor:apply(function(x)
+ idx = idx + 1
+ return (scales[x]-1)/(min_dim-1)
+ end)
+
+ local bestbboxes = torch.FloatTensor(nboxes,4):copy(bboxes)
+ bestbboxes:add(-1):cmul(mul_factor:expand(nboxes,4)):add(1)
+
+ -- response boxes
+
+ local offset0 = self.offset0
+ local offset = self.offset
+
+ local bboxes_norm = bestbboxes:clone()
+ bboxes_norm[{{},{1,2}}]:add(-offset0 + offset):div(step_standard):add( 0.5)
+ bboxes_norm[{{},{1,2}}]:floor():add(1)
+ bboxes_norm[{{},{3,4}}]:add(-offset0 - offset):div(step_standard):add(-0.5)
+ bboxes_norm[{{},{3,4}}]:ceil():add(1)
+
+ local x0gtx1 = bboxes_norm[{{},1}]:gt(bboxes_norm[{{},3}])
+ local y0gty1 = bboxes_norm[{{},2}]:gt(bboxes_norm[{{},4}])
+
+ bboxes_norm[{{},1}][x0gtx1] = bboxes_norm[{{},1}][x0gtx1]:add(bboxes_norm[{{},3}][x0gtx1]):div(2)
+ bboxes_norm[{{},3}][x0gtx1] = (bboxes_norm[{{},1}][x0gtx1])
+
+ bboxes_norm[{{},2}][y0gty1] = bboxes_norm[{{},2}][y0gty1]:add(bboxes_norm[{{},4}][y0gty1]):div(2)
+ bboxes_norm[{{},4}][y0gty1] = (bboxes_norm[{{},2}][y0gty1])
+
+ -- remove repeated projections
+ if self.dedup then
+ local is_unique = unique(torch.cat(bboxes_norm,bestScale:view(-1,1),2))
+ local lin = torch.range(1,is_unique:size(1)):long() -- can also use cumsum instead
+ bboxes_norm = bboxes_norm:index(1,lin[is_unique])
+ end
+ -- clamp on boundaries
+
+ local projected_bb = bboxes_norm:clone()
+
+ for i=1,#scales do
+ local this_scale = bestScale:eq(i)
+ if this_scale:numel() > 0 then
+ projected_bb[{{},2}][this_scale] = projected_bb[{{},2}][this_scale]:clamp(1,feat.rsp[i]:size(2))
+ projected_bb[{{},4}][this_scale] = projected_bb[{{},4}][this_scale]:clamp(1,feat.rsp[i]:size(2))
+ projected_bb[{{},1}][this_scale] = projected_bb[{{},1}][this_scale]:clamp(1,feat.rsp[i]:size(3))
+ projected_bb[{{},3}][this_scale] = projected_bb[{{},3}][this_scale]:clamp(1,feat.rsp[i]:size(3))
+ end
+ end
+
+ --projected_bb:floor()
+ return bestScale,bestbboxes,bboxes_norm,projected_bb
+end
+
+-- don't do anything. could be the bbox regression or SVM, but I won't add it here
+function SPP:postProcess(im,bbox,output)
+ return output,bbox
+end
+
+function SPP:compute(model,inputs)
+ local inputs_s = inputs:split(self.max_batch_size,1)
+
+ self.output = self.output or inputs.new()
+
+ local ttype = model.output:type()
+ self.inputs = self.inputs or torch.Tensor():type(ttype)
+
+ for idx, f in ipairs(inputs_s) do
+ self.inputs:resize(f:size()):copy(f)
+ local output0 = model:forward(self.inputs)
+ local fs = f:size(1)
+ if idx == 1 then
+ local ss = output0[1]:size():totable()
+ self.output:resize(inputs:size(1),table.unpack(ss))
+ end
+ self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0)
+ end
+ return self.output
+end
+
function SPP:type(t_type)
self._type = t_type
--self.spp_pooler = self.spp_pooler:type(t_type)
@@ -270,3 +533,38 @@ end
function SPP:cuda()
return self:type('torch.CudaTensor')
end
+
+function SPP:saveConvCache()
+ assert(self.dataset, 'need to set a dataset to save the cache')
+ assert(self.use_cache, 'use_cache need to be true')
+ assert(self.cachedir, 'cachedir need to be set')
+
+ local dataset = self.dataset
+
+ print('Caching features for '..dataset.dataset_name..' '
+ ..dataset.image_set)
+ local feat_cachedir = self.cachedir
+ for i=1,dataset:size() do
+ xlua.progress(i,dataset:size())
+ local im_name = dataset.img_ids[i]
+ local cachefile = paths.concat(feat_cachedir,im_name)
+ if not paths.filep(cachefile..'.h5') then
+ local f = self:getConv5(i)
+ end
+ if not paths.filep(cachefile..'_flip.h5') then
+ local f = self:getConv5(i,true)
+ end
+ if i%50 == 0 then
+ collectgarbage()
+ collectgarbage()
+ end
+ end
+end
+
+function SPP:__tostring()
+ local str = torch.type(self)
+ str = str .. '\n Image scales: [' .. table.concat(self.scales,', ')..']'
+ str = str .. '\n Input area: ' .. self.inputArea
+ return str
+end
+
diff --git a/SVMTrainer.lua b/SVMTrainer.lua
index 6f857b1..61f6597 100644
--- a/SVMTrainer.lua
+++ b/SVMTrainer.lua
@@ -1,7 +1,7 @@
local SVMTrainer = torch.class('nnf.SVMTrainer')
function SVMTrainer:__init(module,feat_provider)
- self.dataset = feat_provider.dataset
+ --self.dataset = dataset
self.module = module
self.feat_provider = feat_provider
@@ -21,58 +21,54 @@ function SVMTrainer:__init(module,feat_provider)
self.evict_thresh = -1.2
self.hard_thresh = -1.0001
- self.pos_feat_type = 'mixed' -- real, mixed, synthetic
+ self.pos_feat_type = 'real' -- real, mixed, synthetic
self.synth_neg = true
- self:getFeatureStats()
+ --self:getFeatureStats()
end
-function SVMTrainer:getFeatureStats(feat_provider,module)
+function SVMTrainer:getFeatureStats(dataset,feat_provider,module)
- if true then
- self.mean_norm = 30.578503376687
+ if false then
+ self.mean_norm = 19.848824140978--30.578503376687
return
end
local feat_provider = feat_provider or self.feat_provider
local module = module or self.module
- local dataset = feat_provider.dataset
+ local dataset = dataset
local boxes_per_image = 200
local num_images = math.min(dataset:size(),200)
local valid_idx = torch.randperm(dataset:size())
valid_idx = valid_idx[{{1,num_images}}]
-
- local fc5_feat = torch.FloatTensor()
- local fc7_feat = torch.FloatTensor()
local feat_cumsum = 0
local feat_n = 0
+ local bboxes = torch.IntTensor(boxes_per_image,4)
print('Getting feature stats')
for i=1,num_images do
xlua.progress(i,num_images)
local img_idx = valid_idx[i]
+ local I = dataset:getImage(img_idx)
local rec = dataset:attachProposals(img_idx)
local num_bbox = math.min(boxes_per_image,rec:size())
- fc5_feat:resize(num_bbox,unpack(self.feat_dim))
- fc7_feat:resize(num_bbox,4096)
-
- local bbox_idx = torch.randperm(rec:size())
+ local bbox_idx = torch.randperm(rec:size()):long()
bbox_idx = bbox_idx[{{1,num_bbox}}]
- for j=1,num_bbox do
- local bbox_id = bbox_idx[j]
- fc5_feat[j] = feat_provider:getFeature(img_idx,rec.boxes[bbox_id])
- end
- fc7_feat:copy(module:forward(fc5_feat:cuda()))
- feat_n = feat_n + num_bbox
- feat_cumsum = feat_cumsum + fc7_feat:pow(2):sum(2):sqrt():sum()
+ bboxes:index(rec.boxes,1,bbox_idx)
+
+ local feat = feat_provider:getFeature(I,bboxes)
+ local final_feat = feat_provider:compute(module, feat)
+
+ feat_n = feat_n + num_bbox
+ feat_cumsum = feat_cumsum + final_feat:pow(2):sum(2):sqrt():sum()
end
self.mean_norm = feat_cumsum/feat_n
end
@@ -82,10 +78,10 @@ function SVMTrainer:scaleFeatures(feat)
feat:mul(target_norm/self.mean_norm)
end
-function SVMTrainer:getPositiveFeatures(feat_provider,module)
+function SVMTrainer:getPositiveFeatures(dataset,feat_provider,module)
local feat_provider = feat_provider or self.feat_provider
local module = module or self.module
- local dataset = feat_provider.dataset
+ local dataset = dataset
module:evaluate()
local positive_data = {}
for cl_idx,cl_name in pairs(dataset.classes) do
@@ -98,6 +94,11 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
local not_done = torch.ByteTensor(dataset.num_classes):fill(1)
for i=1,end_idx do
xlua.progress(i,end_idx)
+ local I = dataset:getImage(i)
+ --local gt_boxes, gt_classes = dataset:getGTBoxes(i)
+
+
+
local rec = dataset:attachProposals(i)
local overlap = rec.overlap_class
local is_gt = rec.gt
@@ -111,7 +112,10 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
for j=1,rec:size() do
if overlap[j][cl_idx]==1 and is_gt[j]==1 then
count = count + 1
- fc5_feat[count] = feat_provider:getFeature(i,rec.boxes[j])
+ local fff = feat_provider:getFeature(I,rec.boxes[j])[1]
+ --print(fff:size())
+ --print(fc5_feat:size())
+ fc5_feat[count] = fff
end
end
if num_pos > 0 then
@@ -133,15 +137,16 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
return positive_data
end
-function SVMTrainer:sampleNegativeFeatures(ind,feat_provider,module)
+function SVMTrainer:sampleNegativeFeatures(ind,dataset,feat_provider,module)
local feat_provider = feat_provider or self.feat_provider
- local dataset = feat_provider.dataset
+ local dataset = dataset
local module = module or self.module
module:evaluate()
collectgarbage()
local first_time = self.first_time
+ local I = dataset:getImage(ind)
local rec = dataset:attachProposals(ind)
local overlap = rec.overlap_class
@@ -154,11 +159,9 @@ collectgarbage()
caches[cl_name] = {X_neg = {},num_added = 0}
end
- fc5_feat:resize(rec:size(),unpack(self.feat_dim))
- for j=1,rec:size() do
- fc5_feat[j] = feat_provider:getFeature(ind,rec.boxes[j])
- end
- fc7_feat:resize(rec:size(),4096):copy(module:forward(fc5_feat:cuda()))
+ local feat = feat_provider:getFeature(I,rec.boxes)
+ local fc7_feat = feat_provider:compute(module, feat)
+
self:scaleFeatures(fc7_feat)
if first_time then
@@ -264,16 +267,16 @@ function SVMTrainer:addPositiveFeatures(feat_provider,module)
end
-function SVMTrainer:train()
- local dataset = self.dataset
+function SVMTrainer:train(dataset)
+ --local dataset = self.dataset
- print('Experiment name: '..self.expname)
+ --print('Experiment name: '..self.expname)
self.W = torch.Tensor(dataset.num_classes,4096)
self.B = torch.Tensor(dataset.num_classes)
--self:selectPositiveFeatures()
- self:addPositiveFeatures()
+ --self:addPositiveFeatures()
local caches = {}
for cl_idx,cl_name in pairs(dataset.classes) do
@@ -313,7 +316,7 @@ function SVMTrainer:train()
X = self:sampleNegativeFeatures(i-num_synth)
end
else
- X = self:sampleNegativeFeatures(i)
+ X = self:sampleNegativeFeatures(i,dataset)
end
for cl_idx,cl_name in pairs(dataset.classes) do
@@ -396,7 +399,7 @@ function SVMTrainer:train()
end
first_time = false
end
- torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B})
+ --torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B})
return caches--X_all
end
diff --git a/Tester.lua b/Tester.lua
index 4c84ace..5ff2bc1 100644
--- a/Tester.lua
+++ b/Tester.lua
@@ -6,14 +6,11 @@ local VOCevaldet = utils.VOCevaldet
local Tester = torch.class('nnf.Tester')
-function Tester:__init(module,feat_provider)
- self.dataset = feat_provider.dataset
- self.module = module
+function Tester:__init(module,feat_provider,dataset)
+ self.dataset = dataset
self.feat_provider = feat_provider
+ self.module = module
- self.feat_dim = {256*50}
- self.max_batch_size = 4000
-
self.cachefolder = nil
self.cachename = nil
self.suffix = ''
@@ -58,30 +55,44 @@ function Tester:validate(criterion)
return err/num_batches
end
+local function print_scores(dataset,res)
+ print('Results:')
+ -- print class names
+ io.write('|')
+ for i = 1, dataset.num_classes do
+ io.write(('%5s|'):format(dataset.classes[i]))
+ end
+ io.write('\n|')
+ -- print class scores
+ for i = 1, dataset.num_classes do
+ local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i]
+ local l = res[i] == res[i] and l-5 or l-3
+ if l > 0 then
+ io.write(('%.3f%'..l..'s|'):format(res[i],' '))
+ else
+ io.write(('%.3f|'):format(res[i]))
+ end
+ end
+ io.write('\n')
+ io.write(('mAP: %.4f\n'):format(res:mean(1)[1]))
+end
+
+
function Tester:test(iteration)
local dataset = self.dataset
local module = self.module
local feat_provider = self.feat_provider
- local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration)
- paths.mkdir(pathfolder)
-
module:evaluate()
+ feat_provider:evaluate()
dataset:loadROIDB()
- local feats = torch.FloatTensor()
- local feats_batched = {}
- local feats_cuda = torch.CudaTensor()
-
- local output = torch.FloatTensor()
-
- local output_dim = module:get(module:size())
-
- local softmax = nn.SoftMax():float()
-
+ local detec = nnf.ImageDetect(module, feat_provider)
local boxes
- --
+ local im
+ local output
+
local aboxes = {}
for i=1,dataset.num_classes do
table.insert(aboxes,{})
@@ -89,50 +100,41 @@ function Tester:test(iteration)
local max_per_set = 5*dataset:size()
local max_per_image = 100
- local thresh = torch.ones(dataset.num_classes):mul(-1.5)
+ local thresh = torch.ones(dataset.num_classes):mul(0.05)
local scored_boxes = torch.FloatTensor()
local timer = torch.Timer()
local timer2 = torch.Timer()
local timer3 = torch.Timer()
-
+
+ -- SPP is more efficient if we cache the features. We treat it differently then
+ -- the other feature providers
+ local pass_index = torch.type(feat_provider) == 'nnf.SPP' and true or false
+
for i=1,dataset:size() do
timer:reset()
io.write(('test: (%s) %5d/%-5d '):format(dataset.dataset_name,i,dataset:size()));
- boxes = dataset:getROIBoxes(i):float()
- local num_boxes = boxes:size(1)
- -- compute image feature maps
- timer3:reset()
- feats:resize(num_boxes,unpack(self.feat_dim))
- for idx=1,num_boxes do
- feats[idx] = feat_provider:getFeature(i,boxes[idx])
+
+ if pass_index then
+ im = i
+ else
+ im = dataset:getImage(i)
end
- local tt = timer3:time().real
- -- compute classification scores
- torch.split(feats_batched,feats,self.max_batch_size,1)
+ boxes = dataset:getROIBoxes(i):float()
+
timer3:reset()
- for idx,f in ipairs(feats_batched) do
- local fs = f:size(1)
- feats_cuda:resize(fs,unpack(self.feat_dim)):copy(f)
- module:forward(feats_cuda)
- if idx == 1 then
- local out_size = module.output:size():totable()
- table.remove(out_size,1)
- output:resize(num_boxes,unpack(out_size))
- end
- output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(module.output)
- end
- local add_bg = 0
- if dataset.num_classes ~= output:size(2) then -- if there is no svm
- output = softmax:forward(output)
- add_bg = 1
- end
-
+ output,boxes = detec:detect(im,boxes)
+
+ local add_bg = 1
+ local tt = 0
local tt2 = timer3:time().real
timer2:reset()
+ -- do a NMS for each class, based on the scores from the classifier
for j=1,dataset.num_classes do
local scores = output:select(2,j+add_bg)
+ -- only select detections with a score greater than thresh
+ -- this avoid doing NMS on too many bboxes with low score
local idx = torch.range(1,scores:numel()):long()
local idx2 = scores:gt(thresh[j])
idx = idx[idx2]
@@ -151,6 +153,7 @@ function Tester:test(iteration)
aboxes[j][i] = torch.FloatTensor()
end
+ -- remove low scoring boxes and update threshold
if i%1000 == 0 then
aboxes[j],thresh[j] = keep_top_k(aboxes[j],max_per_set)
end
@@ -158,10 +161,11 @@ function Tester:test(iteration)
end
io.write((' prepare feat time: %.3f, forward time: %.3f, select time: %.3fs, total time: %.3fs\n'):format(tt,tt2,timer2:time().real,timer:time().real));
- --collectgarbage()
- --mattorch.save(paths.concat(pathfolder,dataset.img_ids[i]..'.mat'),output:double())
end
+ local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration)
+ paths.mkdir(pathfolder)
+
for i = 1,dataset.num_classes do
-- go back through and prune out detections below the found threshold
for j = 1,dataset:size() do
@@ -174,10 +178,14 @@ function Tester:test(iteration)
end
end
end
- save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'..
- dataset.dataset_name..self.suffix)
- torch.save(save_file, aboxes)
+ --save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'..
+ -- dataset.dataset_name..self.suffix)
+ --torch.save(save_file, aboxes)
end
+ save_file = paths.concat(pathfolder, 'boxes_'..
+ dataset.dataset_name..self.suffix)
+ torch.save(save_file, aboxes)
+
local res = {}
for i=1,dataset.num_classes do
@@ -185,27 +193,11 @@ function Tester:test(iteration)
res[i] = VOCevaldet(dataset,aboxes[i],cls)
end
res = torch.Tensor(res)
- print('Results:')
- -- print class names
- io.write('|')
- for i = 1, dataset.num_classes do
- io.write(('%5s|'):format(dataset.classes[i]))
- end
- io.write('\n|')
- -- print class scores
- for i = 1, dataset.num_classes do
- local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i]
- local l = res[i] == res[i] and l-5 or l-3
- if l > 0 then
- io.write(('%.3f%'..l..'s|'):format(res[i],' '))
- else
- io.write(('%.3f|'):format(res[i]))
- end
- end
- io.write('\n')
- io.write(('mAP: %.4f\n'):format(res:mean(1)[1]))
+
+ print_scores(dataset,res)
-- clean roidb to free memory
dataset.roidb = nil
return res
end
+
diff --git a/Trainer.lua b/Trainer.lua
index 180b1eb..8ac9c47 100644
--- a/Trainer.lua
+++ b/Trainer.lua
@@ -1,18 +1,22 @@
require 'nn'
require 'optim'
require 'xlua'
+local utils = paths.dofile('utils.lua')
+local recursiveResizeAsCopyTyped = utils.recursiveResizeAsCopyTyped
local Trainer = torch.class('nnf.Trainer')
-function Trainer:__init(module,criterion)
+function Trainer:__init(module,criterion,batch_provider,optimState)
self.module = module
self.criterion = criterion
+ self.batch_provider = batch_provider
self.parameters,self.gradParameters = self.module:getParameters()
- self.optimState = {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9,
- learningRateDecay = 0}
+ self.optimState = optimState or
+ {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9,
+ learningRateDecay = 0, dampening = 0}
self.epoch = 0
@@ -22,40 +26,39 @@ function Trainer:__init(module,criterion)
end
+function Trainer:train(maxIter)
+ local maxIter = maxIter or 20
+ local ttype = self.parameters:type()
-function Trainer:train(inputs,targets)
- -- only for batches
- assert(targets:dim()>2,'Trainer is only for batches')
-
self.module:training()
- self._input = self._input or torch.CudaTensor()
- self._target = self._target or torch.CudaTensor()
local module = self.module
+ local batch_provider = self.batch_provider
local parameters = self.parameters
local gradParameters = self.gradParameters
local criterion = self.criterion
local optimState = self.optimState
- local batchSize = inputs:size(2)
- local maxIter = inputs:size(1)
-
if self.confusion then
self.confusion:zero()
end
local err = 0
- self._input:resize(inputs[1]:size())
- self._target:resize(targets[1]:size())
- local input = self._input
- local target = self._target
-
+ local input
+ local target
+
for t=1,maxIter do
xlua.progress(t,maxIter)
- input:copy(inputs[t])
- target:copy(targets[t])
+ -- get training batch
+ self.input0,self.target0 = batch_provider:getBatch()
+
+ -- copy to ttype
+ self.input,self.input0 = recursiveResizeAsCopyTyped(self.input,self.input0,ttype)
+ self.target,self.target0 = recursiveResizeAsCopyTyped(self.target,self.target0,ttype)
+ input = self.input
+ target = self.target
local feval = function(x)
if x ~= parameters then
@@ -70,11 +73,6 @@ function Trainer:train(inputs,targets)
module:backward(input,df_do)
- if self.normalize then
- gradParameters:div(batchSize)
- f = f/batchSize
- end
-
if self.confusion then
self.confusion:batchAdd(outputs,target)
end
@@ -88,6 +86,6 @@ function Trainer:train(inputs,targets)
table.insert(self.fx,err/maxIter)
- self.module:evaluate()
+ --self.module:evaluate()
self.epoch = self.epoch + 1
end
diff --git a/argcheck.lua b/argcheck.lua
new file mode 100644
index 0000000..2ce4e3b
--- /dev/null
+++ b/argcheck.lua
@@ -0,0 +1,73 @@
+local usage = require 'argcheck.usage'
+local env = require 'argcheck.env'
+--------------------------------------------------------------------------------
+-- Simple argument function with a similar interface to argcheck, but which
+-- supports lots of default arguments for named rules.
+-- Not as fast and elegant though.
+--------------------------------------------------------------------------------
+local function argcheck(rules)
+ -- basic checks
+ assert(not (rules.noordered and rules.nonamed), 'rules must be at least ordered or named')
+ assert(rules.help == nil or type(rules.help) == 'string', 'rules help must be a string or nil')
+ assert(rules.doc == nil or type(rules.doc) == 'string', 'rules doc must be a string or nil')
+ assert(not rules.overload, 'rules overload not supported')
+ assert(not (rules.doc and rules.help), 'choose between doc or help, not both')
+ for _, rule in ipairs(rules) do
+ assert(rule.name, 'rule must have a name field')
+ assert(rule.type == nil or type(rule.type) == 'string', 'rule type must be a string or nil')
+ assert(rule.help == nil or type(rule.help) == 'string', 'rule help must be a string or nil')
+ assert(rule.doc == nil or type(rule.doc) == 'string', 'rule doc must be a string or nil')
+ assert(rule.check == nil or type(rule.check) == 'function', 'rule check must be a function or nil')
+ --assert(rule.defaulta == nil or type(rule.defaulta) == 'string', 'rule defaulta must be a string or nil')
+ --assert(rule.defaultf == nil or type(rule.defaultf) == 'function', 'rule defaultf must be a function or nil')
+ end
+
+ if not (rules.pack == nil or rules.pack) then
+ error('pack need to be true')
+ end
+ if rules.nonamed then
+ error('only named arguments')
+ end
+
+ local arginfo = {}
+ for k,v in ipairs(rules) do
+ arginfo[v.name] = k
+ end
+
+ local function func(args)
+
+ local iargs = {}
+ for _,rule in ipairs(rules) do
+ iargs[rule.name] = rule.default
+ if rule.default == nil and
+ args[rule.name] == nil and
+ rule.opt ~= true then
+ print(usage(rules))
+ error('Missing argument: '..rule.name)
+ end
+ end
+
+ for k,v in pairs(args) do
+ if not env.istype(v,rules[arginfo[k]].type) then
+ print(usage(rules))
+ error('Wrong type: '.. k)
+ end
+
+ if rules[arginfo[k]].check then
+ local c = rules[arginfo[k]].check(args[k])
+ if not c then
+ print(usage(rules))
+ error('check did not pass')
+ end
+ end
+ iargs[k] = args[k]
+ end
+
+ return iargs
+ end
+
+ return func
+
+end
+
+return argcheck
diff --git a/config.lua b/config.lua
new file mode 100644
index 0000000..0e0ea08
--- /dev/null
+++ b/config.lua
@@ -0,0 +1,112 @@
+require 'nnf'
+
+local configs = {}
+
+local image_transformer_params = {
+ mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}
+}
+
+configs.image_transformer_params = image_transformer_params
+
+configs.datasetDir = 'datasets/VOCdevkit'
+configs.roidbDir = 'data/selective_search_data'
+
+--------------------------------------------------------------------------------
+-- Training Parameters
+--------------------------------------------------------------------------------
+
+local train_params = {
+ batch_size = 16,--128,
+ fg_fraction = 0.25,
+ fg_threshold = 0.5,
+ bg_threshold = {0.0,0.5},
+ do_flip = true,
+}
+
+configs.train_params = train_params
+
+--------------------------------------------------------------------------------
+-- Feature Provider Parameters
+--------------------------------------------------------------------------------
+
+configs.algo = {}
+
+--------------------------------------------------------------------------------
+-- RCNN
+--------------------------------------------------------------------------------
+
+local fp_params = {
+ crop_size = 227,
+ padding = 16,
+ use_square = false,
+}
+local bp_params = {
+ iter_per_batch = 100,
+ nTimesMoreData = 10,
+}
+
+local RCNN = {
+ fp_params=fp_params,
+ bp_params=bp_params,
+ bp = nnf.BatchProviderRC
+}
+
+configs.algo.RCNN = RCNN
+
+--------------------------------------------------------------------------------
+-- SPP
+--------------------------------------------------------------------------------
+--
+local num_chns = 256
+local pooling_scales = {{1,1},{2,2},{3,3},{6,6}}
+local pyr = torch.Tensor(pooling_scales):t()
+local pooled_size = pyr[1]:dot(pyr[2])
+local feat_dim = {num_chns*pooled_size}
+
+local fp_params = {
+ scales = {480,576,688,874,1200},
+ sz_conv_standard = 13,
+ step_standard = 16,
+ offset0 = 21,
+ offset = 6.5,
+ inputArea = 224^2,
+ pooling_scales = pooling_scales,
+ num_feat_chns = num_chns,
+}
+local bp_params = {
+ iter_per_batch = 500,
+ nTimesMoreData = 10,
+}
+
+local SPP = {
+ fp_params=fp_params,
+ bp_params=bp_params,
+ bp = nnf.BatchProviderRC
+}
+
+configs.algo.SPP = SPP
+
+--------------------------------------------------------------------------------
+-- Fast-RCNN
+--------------------------------------------------------------------------------
+
+local fp_params = {
+ scale = {600},
+ max_size = 1000,
+}
+local bp_params = {
+ imgs_per_batch = 2,
+}
+
+local FRCNN = {
+ fp_params=fp_params,
+ bp_params=bp_params,
+ bp = nnf.BatchProviderIC
+}
+
+configs.algo.FRCNN = FRCNN
+
+
+return configs
diff --git a/data.lua b/data.lua
index 655deb5..59d3284 100644
--- a/data.lua
+++ b/data.lua
@@ -1,145 +1,65 @@
--------------------------------------------------------------------------------
-- Prepare data model
--------------------------------------------------------------------------------
-paths.mkdir(opt.save)
-trainCache = paths.concat(opt.save_base,'trainCache.t7')
-testCache = paths.concat(opt.save_base,'testCache.t7')
+local trainCache = paths.concat(rundir,'trainCache.t7')
+--testCache = paths.concat(opt.save_base,'testCache.t7')
-local pooler
-local feat_dim
+local config = paths.dofile('config.lua')
-if opt.algo == 'SPP' then
- local conv_list = features:findModules(opt.backend..'.SpatialConvolution')
- local num_chns = conv_list[#conv_list].nOutputPlane
- pooler = model:get(2):clone():float()
- local pyr = torch.Tensor(pooler.pyr):t()
- local pooled_size = pyr[1]:dot(pyr[2])
- feat_dim = {num_chns*pooled_size}
-elseif opt.algo == 'RCNN' then
- feat_dim = {3,227,227}
+image_transformer = nnf.ImageTransformer(config.image_transformer_params)
+
+local FP = nnf[opt.algo]
+local fp_params = config.algo[opt.algo].fp_params
+local bp_params = config.algo[opt.algo].bp_params
+local BP = config.algo[opt.algo].bp
+
+local train_params = config.train_params
+
+-- add common parameters
+fp_params.image_transformer = image_transformer
+for k,v in pairs(train_params) do
+ bp_params[k] = v
end
-image_transformer = nnf.ImageTransformer{mean_pix=image_mean}
+-------------------------------------------------------------------------------
+-- Create structures
+--------------------------------------------------------------------------------
+
+ds_train = nnf.DataSetPascal{
+ image_set='trainval',
+ year=2007,--opt.year,
+ datadir=config.datasetDir,
+ roidbdir=config.roidbDir
+}
+
+feat_provider = FP(fp_params)
+feat_provider:training()
+
+bp_params.dataset = ds_train
+bp_params.feat_provider = feat_provider
+batch_provider = BP(bp_params)
if paths.filep(trainCache) then
print('Loading train metadata from cache')
- batch_provider = torch.load(trainCache)
- feat_provider = batch_provider.feat_provider
- ds_train = feat_provider.dataset
- feat_provider.model = features
+ local metadata = torch.load(trainCache)
+ batch_provider.bboxes = metadata
else
- ds_train = nnf.DataSetPascal{image_set='trainval',classes=classes,year=opt.year,
- datadir=opt.datadir,roidbdir=opt.roidbdir}
-
- if opt.algo == 'SPP' then
- feat_provider = nnf.SPP(ds_train)-- remove features here to reduce cache size
- feat_provider.cachedir = paths.concat(opt.cache,'features',opt.netType)
- feat_provider.randomscale = true
- feat_provider.scales = {600}
- feat_provider.spp_pooler = pooler:clone()
- feat_provider.image_transformer = image_transformer
- elseif opt.algo == 'RCNN' then
- feat_provider = nnf.RCNN(ds_train)
- feat_provider.crop_size = feat_dim[2]
- feat_provider.image_transformer = image_transformer
- else
- error(("Detection framework '%s' not available"):format(opt.algo))
- end
-
- print('==> Preparing BatchProvider for training')
- batch_provider = nnf.BatchProvider(feat_provider)
- batch_provider.iter_per_batch = opt.ipb
- batch_provider.nTimesMoreData = opt.ntmd
- batch_provider.fg_fraction = opt.fg_frac
- batch_provider.bg_threshold = {0.0,0.5}
- batch_provider.do_flip = true
- batch_provider.batch_dim = feat_dim
batch_provider:setupData()
-
- torch.save(trainCache,batch_provider)
- feat_provider.model = features
+ torch.save(trainCache, batch_provider.bboxes)
end
-if paths.filep(testCache) then
- print('Loading test metadata from cache')
- batch_provider_test = torch.load(testCache)
- feat_provider_test = batch_provider_test.feat_provider
- ds_test = feat_provider_test.dataset
- feat_provider_test.model = features
-else
- ds_test = nnf.DataSetPascal{image_set='test',classes=classes,year=opt.year,
- datadir=opt.datadir,roidbdir=opt.roidbdir}
- if opt.algo == 'SPP' then
- feat_provider_test = nnf.SPP(ds_test)
- feat_provider_test.randomscale = false
- feat_provider_test.cachedir = paths.concat(opt.cache,'features',opt.netType)
- feat_provider_test.scales = {600}
- feat_provider_test.spp_pooler = pooler:clone()
- feat_provider_test.image_transformer = image_transformer
- elseif opt.algo == 'RCNN' then
- feat_provider_test = nnf.RCNN(ds_test)
- feat_provider_test.crop_size = feat_dim[2]
- feat_provider_test.image_transformer = image_transformer
- else
- error(("Detection framework '%s' not available"):format(opt.algo))
- end
-
- print('==> Preparing BatchProvider for validation')
- batch_provider_test = nnf.BatchProvider(feat_provider_test)
- batch_provider_test.iter_per_batch = 500--opt.ipb
- batch_provider_test.nTimesMoreData = 10--opt.ntmd
- batch_provider_test.fg_fraction = opt.fg_frac
- batch_provider_test.bg_threshold = {0.0,0.5}
- batch_provider_test.do_flip = false
- batch_provider_test.batch_dim = feat_dim
- batch_provider_test:setupData()
-
- torch.save(testCache,batch_provider_test)
- feat_provider_test.model = features
-end
-
---------------------------------------------------------------------------------
--- Compute conv5 feature cache (for SPP)
---------------------------------------------------------------------------------
-if opt.algo == 'SPP' then
- print('Preparing conv5 features for '..ds_train.dataset_name..' '
- ..ds_train.image_set)
- local feat_cachedir = feat_provider.cachedir
- for i=1,ds_train:size() do
- xlua.progress(i,ds_train:size())
- local im_name = ds_train.img_ids[i]
- local cachefile = paths.concat(feat_cachedir,im_name)
- if not paths.filep(cachefile..'.h5') then
- local f = feat_provider:getConv5(i)
- end
- if not paths.filep(cachefile..'_flip.h5') then
- local f = feat_provider:getConv5(i,true)
- end
- if i%50 == 0 then
- collectgarbage()
- collectgarbage()
- end
- end
-
- print('Preparing conv5 features for '..ds_test.dataset_name..' '
- ..ds_test.image_set)
- local feat_cachedir = feat_provider_test.cachedir
- for i=1,ds_test:size() do
- xlua.progress(i,ds_test:size())
- local im_name = ds_test.img_ids[i]
- local cachefile = paths.concat(feat_cachedir,im_name)
- if not paths.filep(cachefile..'.h5') then
- local f = feat_provider_test:getConv5(i)
- end
- if i%50 == 0 then
- collectgarbage()
- collectgarbage()
- end
- end
-end
+-- test
+ds_test = nnf.DataSetPascal{
+ image_set='test',
+ year=2007,--opt.year,
+ datadir=config.datasetDir,
+ roidbdir=config.roidbDir
+}
-features = nil
-model = nil
+-- only needed because of SPP
+-- could be the same as the one for training
+--feat_provider_test = FP(fp_params)
+--feat_provider_test:evaluate()
collectgarbage()
diff --git a/examples/example_frcnn_lena.jpg b/examples/example_frcnn_lena.jpg
new file mode 100644
index 0000000..e1919fa
Binary files /dev/null and b/examples/example_frcnn_lena.jpg differ
diff --git a/examples/train_test_rcnn.lua b/examples/train_test_rcnn.lua
new file mode 100644
index 0000000..7701ad6
--- /dev/null
+++ b/examples/train_test_rcnn.lua
@@ -0,0 +1,190 @@
+require 'nnf'
+
+cmd = torch.CmdLine()
+cmd:text('Example on how to train/test a RCNN based object detector on Pascal')
+cmd:text('')
+cmd:text('Options:')
+cmd:option('-name', 'rcnn-example', 'base name')
+cmd:option('-modelpath', '', 'path to the pre-trained model')
+cmd:option('-lr', 1e-3, 'learning rate')
+cmd:option('-num_iter', 40000, 'number of iterations')
+cmd:option('-disp_iter', 100, 'display every n iterations')
+cmd:option('-lr_step', 30000, 'step for reducing the learning rate')
+cmd:option('-save_step', 10000, 'step for saving the model')
+cmd:option('-gpu', 1, 'gpu to use (0 for cpu mode)')
+cmd:option('-seed', 1, 'fix random seed (if ~= 0)')
+cmd:option('-numthreads',6, 'number of threads')
+
+opt = cmd:parse(arg or {})
+
+assert(paths.filep(opt.modelpath), 'need to provide the path for the pre-trained model')
+
+exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true,
+ modelpath=true})
+
+rundir = '../cachedir/'..exp_name
+paths.mkdir(rundir)
+
+cmd:log(paths.concat(rundir,'log'), opt)
+cmd:addTime('RCNN Example')
+
+local tensor_type
+if opt.gpu > 0 then
+ require 'cunn'
+ cutorch.setDevice(opt.gpu)
+ tensor_type = 'torch.CudaTensor'
+ print('Using GPU mode on device '..opt.gpu)
+else
+ require 'nn'
+ tensor_type = 'torch.FloatTensor'
+ print('Using CPU mode')
+end
+
+if opt.seed ~= 0 then
+ torch.manualSeed(opt.seed)
+ if opt.gpu > 0 then
+ cutorch.manualSeed(opt.seed)
+ end
+ print('Using fixed seed: '..opt.seed)
+end
+
+torch.setnumthreads(opt.numthreads)
+
+--------------------------------------------------------------------------------
+-- define model and criterion
+--------------------------------------------------------------------------------
+-- load pre-trained model for finetuning
+-- should already have the right number of outputs in the last layer,
+-- which can be done by removing the last layer and replacing it by a new one
+-- for example:
+-- pre_trained_model:remove() -- remove last layer
+-- pre_trained_model:add(nn.Linear(4096,21)) -- add new layer
+model = torch.load(opt.modelpath)
+
+criterion = nn.CrossEntropyCriterion()
+
+model:type(tensor_type)
+criterion:type(tensor_type)
+
+print('Model:')
+print(model)
+print('Criterion:')
+print(criterion)
+
+-- define the transformations to do in the image before
+-- passing it to the network
+local image_transformer= nnf.ImageTransformer{
+ mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}
+}
+
+print(image_transformer)
+--------------------------------------------------------------------------------
+-- define data for training
+--------------------------------------------------------------------------------
+
+-- this class holds all the necessary informationn regarding the dataset
+ds = nnf.DataSetPascal{
+ image_set='trainval',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data',
+ year=2007
+}
+print('DataSet Training:')
+print(ds)
+--------------------------------------------------------------------------------
+-- define feature providers
+--------------------------------------------------------------------------------
+
+local crop_size = 224
+
+-- the feature provider extract the features for a given image + bounding box
+fp = nnf.RCNN{
+ image_transformer=image_transformer,
+ crop_size=crop_size,
+ num_threads=opt.numthreads
+}
+-- different frameworks can behave differently during training and testing
+fp:training()
+
+print('Feature Provider:')
+print(fp)
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProviderRC{
+ dataset=ds,
+ feat_provider=fp,
+ bg_threshold={0.0,0.5},
+ nTimesMoreData=2,
+ iter_per_batch=10,--100,
+}
+bp:setupData()
+
+print('Batch Provider:')
+print(bp)
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+trainer = nnf.Trainer(model, criterion, bp)
+
+local num_iter = opt.num_iter/opt.disp_iter
+local lr_step = opt.lr_step/opt.disp_iter
+local save_step = opt.save_step/opt.disp_iter
+
+trainer.optimState.learningRate = opt.lr
+
+local lightModel = model:clone('weight','bias')
+
+-- main training loop
+for i=1,num_iter do
+ if i % lr_step == 0 then
+ trainer.optimState.learningRate = trainer.optimState.learningRate/10
+ end
+ print(('Iteration %3d/%-3d'):format(i,num_iter))
+ trainer:train(opt.disp_iter)
+ print((' Training error: %.5f'):format(trainer.fx[i]))
+
+ if i% save_step == 0 then
+ torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+ end
+end
+
+torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+
+--------------------------------------------------------------------------------
+-- evaluation
+--------------------------------------------------------------------------------
+-- add softmax to classifier, because we were using nn.CrossEntropyCriterion
+local softmax = nn.SoftMax()
+softmax:type(tensor_type)
+model:add(softmax)
+
+-- dataset for evaluation
+dsv = nnf.DataSetPascal{
+ image_set='test',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data',
+ year=2007
+}
+print('DataSet Evaluation:')
+print(dsv)
+
+-- feature provider for evaluation
+fpv = nnf.RCNN{
+ image_transformer=image_transformer,
+ crop_size=crop_size,
+ num_threads=opt.numthreads
+}
+fpv:evaluate()
+print('Feature Provider Evaluation:')
+print(fpv)
+
+-- define the class to test the model on the full dataset
+tester = nnf.Tester(model, fpv, dsv)
+tester.cachefolder = rundir
+tester:test(opt.num_iter)
diff --git a/main.lua b/main.lua
index 65a4b18..0a8705b 100644
--- a/main.lua
+++ b/main.lua
@@ -1,6 +1,7 @@
require 'nnf'
-require 'cunn'
+--require 'cunn'
require 'optim'
+require 'trepl'
local opts = paths.dofile('opts.lua')
opt = opts.parse(arg)
@@ -8,116 +9,47 @@ print(opt)
if opt.seed ~= 0 then
torch.manualSeed(opt.seed)
- cutorch.manualSeed(opt.seed)
+ if opt.gpu > 0 then
+ cutorch.manualSeed(opt.seed)
+ end
end
-cutorch.setDevice(opt.gpu)
torch.setnumthreads(opt.numthreads)
---------------------------------------------------------------------------------
--- Select target classes
---------------------------------------------------------------------------------
-
-if opt.classes == 'all' then
- classes={'aeroplane','bicycle','bird','boat','bottle','bus','car',
- 'cat','chair','cow','diningtable','dog','horse','motorbike',
- 'person','pottedplant','sheep','sofa','train','tvmonitor'}
+local tensor_type
+if opt.gpu > 0 then
+ require 'cunn'
+ cutorch.setDevice(opt.gpu)
+ tensor_type = 'torch.CudaTensor'
+ print('Using GPU mode on device '..opt.gpu)
else
- classes = {opt.classes}
+ require 'nn'
+ tensor_type = 'torch.FloatTensor'
+ print('Using CPU mode')
end
--------------------------------------------------------------------------------
+model, criterion = paths.dofile('model.lua')
+model:type(tensor_type)
+criterion:type(tensor_type)
-paths.dofile('model.lua')
+-- prepate training and test data
paths.dofile('data.lua')
---------------------------------------------------------------------------------
--- Prepare training model
---------------------------------------------------------------------------------
-
-trainer = nnf.Trainer(classifier,criterion)
-trainer.optimState.learningRate = opt.lr
-
-local conf_classes = {}
-table.insert(conf_classes,'background')
-for i=1,#classes do
- table.insert(conf_classes,classes[i])
-end
-trainer.confusion = optim.ConfusionMatrix(conf_classes)
-
-validator = nnf.Tester(classifier,feat_provider_test)
-validator.cachefolder = opt.save_base
-validator.cachename = 'validation_data.t7'
-validator.batch_provider = batch_provider_test
-
-logger = optim.Logger(paths.concat(opt.save,'log.txt'))
-val_err = {}
-val_counter = 0
-reduc_counter = 0
-
-inputs = torch.FloatTensor()
-targets = torch.IntTensor()
-for i=1,opt.num_iter do
-
- print('Iteration: '..i..'/'..opt.num_iter)
- inputs,targets = batch_provider:getBatch(inputs,targets)
- print('==> Training '..paths.basename(opt.save_base))
- trainer:train(inputs,targets)
- print('==> Training Error: '..trainer.fx[i])
- print(trainer.confusion)
-
- collectgarbage()
+-- Do training
+paths.dofile('train.lua')
- err = validator:validate(criterion)
- print('==> Validation Error: '..err)
- table.insert(val_err,err)
-
- logger:add{['train error (iters per batch='..batch_provider.iter_per_batch..
- ')']=trainer.fx[i],['val error']=err,
- ['learning rate']=trainer.optimState.learningRate}
-
- val_counter = val_counter + 1
-
- local val_err_t = torch.Tensor(val_err)
- local _,lmin = val_err_t:min(1)
- if val_counter-lmin[1] >= opt.nsmooth then
- print('Reducing learning rate')
- trainer.optimState.learningRate = trainer.optimState.learningRate/2
- if opt.nildfdx == true then
- trainer.optimState.dfdx= nil
- end
- val_counter = 0
- val_err = {}
- reduc_counter = reduc_counter + 1
- if reduc_counter >= opt.nred then
- print('Stopping training at iteration '..i)
- break
- end
- end
-
- collectgarbage()
- collectgarbage()
- --sanitize(model)
- --torch.save(paths.concat(opt.save, 'model_' .. epoch .. '.t7'), classifier)
- --torch.save(paths.concat(opt.save, 'optimState_' .. epoch .. '.t7'), trainer.optimState)
-end
-
---sanitize(classifier)
-torch.save(paths.concat(opt.save, 'model.t7'), classifier)
-
-ds_train.roidb = nil
-collectgarbage()
-collectgarbage()
-
---------------------------------------------------------------------------------
--- Do full evaluation
---------------------------------------------------------------------------------
-
-print('==> Evaluation')
-tester = nnf.Tester(classifier,feat_provider_test)
-tester.cachefolder = paths.concat(opt.save,'evaluation',ds_test.dataset_name)
+-- evaluation
+print('==> Evaluating')
+-- add softmax to classifier, because we were using nn.CrossEntropyCriterion
+local softmax = nn.SoftMax()
+softmax:type(tensor_type)
+model:add(softmax)
+feat_provider:evaluate()
+-- define the class to test the model on the full dataset
+tester = nnf.Tester(model, feat_provider, ds_test)
+tester.cachefolder = rundir
tester:test(opt.num_iter)
-
diff --git a/model.lua b/model.lua
index 9700f0b..029c8a3 100644
--- a/model.lua
+++ b/model.lua
@@ -1,50 +1,26 @@
require 'nn'
-require 'inn'
-require 'cudnn'
-local reshapeLastLinearLayer = paths.dofile('utils.lua').reshapeLastLinearLayer
-local convertCaffeModelToTorch = paths.dofile('utils.lua').convertCaffeModelToTorch
+--require 'inn'
+--require 'cudnn'
--- 1.1. Create Network
-local config = opt.netType
-local createModel = paths.dofile('models/' .. config .. '.lua')
-print('=> Creating model from file: models/' .. config .. '.lua')
-model = createModel(opt.backend)
+local createModel = paths.dofile('models/' .. opt.netType .. '.lua')
+print('=> Creating model from file: models/' .. opt.netType .. '.lua')
+local model = createModel()
--- convert to accept inputs in the range 0-1 RGB format
-convertCaffeModelToTorch(model,{1,1})
+local criterion = nn.CrossEntropyCriterion()
-reshapeLastLinearLayer(model,#classes+1)
-image_mean = {128/255,128/255,128/255}
-
-if opt.algo == 'RCNN' then
- classifier = model
-elseif opt.algo == 'SPP' then
- features = model:get(1)
- classifier = model:get(3)
-end
-
--- 2. Create Criterion
-criterion = nn.CrossEntropyCriterion()
-
-print('=> Model')
+print('Model:')
print(model)
-
-print('=> Criterion')
+print('Criterion:')
print(criterion)
--- 3. If preloading option is set, preload weights from existing models appropriately
+-- If preloading option is set, preload weights from existing models appropriately
if opt.retrain ~= 'none' then
assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain)
print('Loading model from file: ' .. opt.retrain);
- classifier = torch.load(opt.retrain)
+ model = torch.load(opt.retrain)
end
--- 4. Convert model to CUDA
-print('==> Converting model to CUDA')
-model = model:cuda()
-criterion:cuda()
-
collectgarbage()
-
+return model, criterion
diff --git a/models/frcnn_alexnet.lua b/models/frcnn_alexnet.lua
new file mode 100644
index 0000000..c8b033d
--- /dev/null
+++ b/models/frcnn_alexnet.lua
@@ -0,0 +1,62 @@
+local function loadModel(params,backend)
+
+ backend = backend or cudnn
+
+ local features = nn.Sequential()
+ local classifier = nn.Sequential()
+
+ features:add(backend.SpatialConvolution(3,96,11,11,4,4,5,5,1))
+ features:add(backend.ReLU(true))
+ features:add(backend.SpatialMaxPooling(3,3,2,2,1,1))
+ features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1))
+
+ features:add(backend.SpatialConvolution(96,256,5,5,1,1,1,1,2))
+ features:add(backend.ReLU(true))
+ features:add(backend.SpatialMaxPooling(3,3,2,2,1,1))
+ features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1))
+
+ features:add(backend.SpatialConvolution(256,384,3,3,1,1,1,1,1))
+ features:add(backend.ReLU(true))
+
+ features:add(backend.SpatialConvolution(384,384,3,3,1,1,1,1,2))
+ features:add(backend.ReLU(true))
+
+ features:add(backend.SpatialConvolution(384,256,3,3,1,1,1,1,2))
+ features:add(backend.ReLU(true))
+
+ classifier:add(nn.Linear(9216,4096))
+ classifier:add(backend.ReLU(true))
+ classifier:add(nn.Dropout(0.5))
+
+ classifier:add(nn.Linear(4096,4096))
+ classifier:add(backend.ReLU(true))
+ classifier:add(nn.Dropout(0.5))
+
+ classifier:add(nn.Linear(4096,21))
+
+ local prl = nn.ParallelTable()
+ prl:add(features)
+ prl:add(nn.Identity())
+
+ local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16)
+
+ local model = nn.Sequential()
+ model:add(prl)
+ model:add(ROIPooling)
+ model:add(nn.View(-1):setNumInputDims(3))
+ model:add(classifier)
+
+ if params then
+ local lparams = model:parameters()
+ assert(#lparams == #params, 'provided parameters does not match')
+
+ for k,v in ipairs(lparams) do
+ local p = params[k]
+ assert(p:numel() == v:numel(), 'wrong number of parameter elements !')
+ v:copy(p)
+ end
+ end
+ return model
+end
+
+return loadModel
diff --git a/nnf.lua b/nnf.lua
index a2e7831..d9fd777 100644
--- a/nnf.lua
+++ b/nnf.lua
@@ -1,20 +1,30 @@
require 'nn'
require 'image'
-require 'inn'
+--require 'inn'
require 'xlua'
nnf = {}
+torch.include('nnf','ImageTransformer.lua')
+
+torch.include('nnf','DataSetDetection.lua')
torch.include('nnf','DataSetPascal.lua')
-torch.include('nnf','BatchProvider.lua')
+torch.include('nnf','DataSetCOCO.lua')
+
+torch.include('nnf','BatchProviderBase.lua')
+torch.include('nnf','BatchProviderIC.lua')
+torch.include('nnf','BatchProviderRC.lua')
torch.include('nnf','SPP.lua')
torch.include('nnf','RCNN.lua')
+torch.include('nnf','FRCNN.lua')
+torch.include('nnf','ROIPooling.lua')
torch.include('nnf','Trainer.lua')
torch.include('nnf','Tester.lua')
+--torch.include('nnf','Tester_FRCNN.lua')
torch.include('nnf','SVMTrainer.lua')
-torch.include('nnf','ImageTransformer.lua')
+torch.include('nnf','ImageDetect.lua')
--return nnf
diff --git a/opts.lua b/opts.lua
index f07d8dc..457b6f2 100644
--- a/opts.lua
+++ b/opts.lua
@@ -8,55 +8,29 @@ function M.parse(arg)
cmd:text()
cmd:text('Options:')
- local curr_dir = paths.cwd()
- local defaultDataSetDir = paths.concat(curr_dir,'datasets')
- local defaultDataDir = paths.concat(defaultDataSetDir,'VOCdevkit/')
- local defaultROIDBDir = paths.concat(curr_dir,'data','selective_search_data/')
-
- cmd:text('Folder parameters')
- cmd:option('-cache',paths.concat(curr_dir,'cachedir'),'Cache dir')
- cmd:option('-datadir',defaultDataDir,'Path to dataset')
- cmd:option('-roidbdir',defaultROIDBDir,'Path to ROIDB')
- cmd:text()
- cmd:text('Model parameters')
- cmd:option('-algo','SPP','Detection framework. Options: RCNN | SPP')
- cmd:option('-netType','zeiler','Options: zeiler | vgg')
- cmd:option('-backend','cudnn','Options: nn | cudnn')
- cmd:text()
- cmd:text('Data parameters')
- cmd:option('-year',2007,'DataSet year (for Pascal)')
- cmd:option('-ipb',500,'iter per batch')
- cmd:option('-ntmd',10,'nTimesMoreData')
- cmd:option('-fg_frac',0.25,'fg_fraction')
- cmd:option('-classes','all','use all classes (all) or given class')
- cmd:text()
- cmd:text('Training parameters')
- cmd:option('-lr',1e-2,'learning rate')
- cmd:option('-num_iter',300,'number of iterations')
- cmd:option('-nsmooth',40,'number of iterations before reducing learning rate')
- cmd:option('-nred',4,'number of divisions by 2 before stopping learning')
- cmd:option('-nildfdx',false,'erase memory of gradients when reducing learning rate')
- cmd:text()
- cmd:text('Others')
- cmd:option('-gpu',1,'gpu device to use')
- cmd:option('-numthreads',6,'number of threads to use')
- cmd:option('-comment','','additional comment to the name')
- cmd:option('-seed',0,'random seed (0 = no fixed seed)')
- cmd:option('-retrain','none','modelpath for finetuning')
- cmd:text()
-
+ cmd:option('-name', 'obj-detect', 'base name')
+ cmd:option('-algo', 'RCNN', 'Detection framework. Options: RCNN | FRCNN')
+ cmd:option('-netType', 'alexnet', 'Options: alexnet')
+ cmd:option('-lr', 1e-3, 'learning rate')
+ cmd:option('-num_iter', 40000, 'number of iterations')
+ cmd:option('-disp_iter', 100, 'display every n iterations')
+ cmd:option('-lr_step', 30000, 'step for reducing the learning rate')
+ cmd:option('-save_step', 10000, 'step for saving the model')
+ cmd:option('-gpu', 1, 'gpu to use (0 for cpu mode)')
+ cmd:option('-conf_mat', false, 'Compute confusion matrix during training')
+ cmd:option('-seed', 1, 'fix random seed (if ~= 0)')
+ cmd:option('-numthreads',6, 'number of threads')
+ cmd:option('-retrain', 'none', 'modelpath for finetuning')
local opt = cmd:parse(arg or {})
- -- add commandline specified options
- opt.save = paths.concat(opt.cache,
- cmd:string(opt.netType, opt,
- {retrain=true, optimState=true, cache=true,
- data=true, gpu=true, numthread=true,
- netType=true}))
- -- add date/time
- opt.save_base = opt.save
- local date_time = os.date():gsub(' ','')
- opt.save = paths.concat(opt.save, date_time)
+
+ local exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true})
+
+ rundir = 'cachedir/'..exp_name
+ paths.mkdir(rundir)
+
+ cmd:log(paths.concat(rundir,'log'), opt)
+ cmd:addTime('Object-Detection.Torch')
return opt
diff --git a/test_frcnn.lua b/test_frcnn.lua
new file mode 100644
index 0000000..24bb23b
--- /dev/null
+++ b/test_frcnn.lua
@@ -0,0 +1,282 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'gnuplot'
+
+cutorch.setDevice(2)
+
+dt = torch.load('pascal_2007_train.t7')
+if false then
+ ds = nnf.DataSetPascal{image_set='train',
+ datadir='/home/francisco/work/datasets/VOCdevkit',
+ roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data'
+ }
+else
+ ds = nnf.DataSetPascal{image_set='trainval',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+end
+
+if false then
+ ds.roidb = {}
+ for i=1,ds:size() do
+ ds.roidb[i] = torch.IntTensor(10,4):random(1,5)
+ ds.roidb[i][{{},{3,4}}]:add(6)
+ end
+elseif false then
+ ds.roidb = dt.roidb
+end
+
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},--{103.939, 116.779, 123.68},
+ raw_scale = 255,
+ swap = {3,2,1}}
+if true then
+ bp = nnf.BatchProviderROI(ds)
+ bp.image_transformer = image_transformer
+ bp.bg_threshold = {0.1,0.5}
+ bp:setupData()
+else
+ bp = nnf.BatchProviderROI(ds)
+ bp.image_transformer = image_transformer
+ local temp = torch.load('pascal_2007_train_bp.t7')
+ bp.bboxes = temp.bboxes
+end
+
+
+if false then
+ local mytest = nnf.ROIPooling(50,50):float()
+ function do_mytest()
+ local input0,target0 = bp:getBatch(input0,target0)
+ local o = mytest:forward(input0)
+ return input0,target0,o
+ end
+ --input0,target0,o = do_mytest()
+end
+
+---------------------------------------------------------------------------------------
+-- model
+---------------------------------------------------------------------------------------
+do
+
+ model = nn.Sequential()
+ local features = nn.Sequential()
+ local classifier = nn.Sequential()
+
+ if false then
+ features:add(nn.SpatialConvolutionMM(3,96,11,11,4,4,5,5))
+ features:add(nn.ReLU(true))
+ features:add(nn.SpatialConvolutionMM(96,128,5,5,2,2,2,2))
+ features:add(nn.ReLU(true))
+ features:add(nn.SpatialMaxPooling(2,2,2,2))
+
+ classifier:add(nn.Linear(128*7*7,1024))
+ classifier:add(nn.ReLU(true))
+ classifier:add(nn.Dropout(0.5))
+ classifier:add(nn.Linear(1024,21))
+
+ elseif false then
+ require 'loadcaffe'
+-- local rcnnfold = '/home/francisco/work/libraries/rcnn/'
+-- local base_model = loadcaffe.load(
+-- rcnnfold..'model-defs/pascal_finetune_deploy.prototxt',
+-- rcnnfold..'data/caffe_nets/finetune_voc_2012_train_iter_70k',
+-- 'cudnn')
+
+ local rcnnfold = '/home/francisco/work/libraries/caffe/examples/imagenet/'
+ local base_model = loadcaffe.load(
+ rcnnfold..'imagenet_deploy.prototxt',
+ rcnnfold..'caffe_reference_imagenet_model',
+ 'cudnn')
+
+
+ for i=1,14 do
+ features:add(base_model:get(i):clone())
+ end
+ for i=17,22 do
+ classifier:add(base_model:get(i):clone())
+ end
+ classifier:add(nn.Linear(4096,21):cuda())
+
+ collectgarbage()
+
+ else
+ local fold = 'data/models/imagenet_models/alexnet/'
+ local m1 = torch.load(fold..'features.t7')
+ local m2 = torch.load(fold..'top.t7')
+
+ for i=1,14 do
+ features:add(m1:get(i):clone())
+ end
+ features:get(3).padW = 1
+ features:get(3).padH = 1
+ features:get(7).padW = 1
+ features:get(7).padH = 1
+
+ for i=2,7 do
+ classifier:add(m2:get(i):clone())
+ end
+ local linear = nn.Linear(4096,21):cuda()
+ linear.weight:normal(0,0.01)
+ linear.bias:zero()
+ classifier:add(linear)
+ end
+ collectgarbage()
+
+ local prl = nn.ParallelTable()
+ prl:add(features)
+ prl:add(nn.Identity())
+ model:add(prl)
+ --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+ model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+ model:add(nn.View(-1):setNumInputDims(3))
+ model:add(classifier)
+
+end
+print(model)
+
+model:cuda()
+parameters,gradParameters = model:getParameters()
+
+parameters2,gradParameters2 = model:parameters()
+
+lr = {0,0,1,2,1,2,1,2,1,2,1,2,1,2,1,2}
+wd = {0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}
+
+local function updateGPlrwd(clr)
+ local clr = clr or 1
+ for i,p in pairs(gradParameters2) do
+ p:add(wd[i]*0.0005,parameters2[i])
+ p:mul(lr[i]*clr)
+ end
+end
+
+optimState = {learningRate = 1,--1e-3,
+ weightDecay = 0.000, momentum = 0.9,
+ learningRateDecay = 0, dampening=0}
+
+--------------------------------------------------------------------------
+-- training
+--------------------------------------------------------------------------
+
+confusion_matrix = optim.ConfusionMatrix(21)
+
+
+model:training()
+
+savedModel = model:clone('weight','bias','running_mean','running_std')
+
+criterion = nn.CrossEntropyCriterion():cuda()
+--criterion.nll.sizeAverage = false
+
+--normalize = true
+
+display_iter = 20
+
+--inputs = {torch.CudaTensor(),torch.FloatTensor()}
+inputs = {torch.CudaTensor(),torch.CudaTensor()}
+target = torch.CudaTensor()
+
+learningRate = 1e-3
+
+function train()
+ local err = 0
+ for i=1,display_iter do
+ xlua.progress(i,display_iter)
+ inputs0,target0 = bp:getBatch(inputs0,target0)
+ inputs[1]:resize(inputs0[1]:size()):copy(inputs0[1])
+ inputs[2]:resize(inputs0[2]:size()):copy(inputs0[2])
+ target:resize(target0:size()):copy(target0)
+ local batchSize = target:size(1)
+
+ local feval = function(x)
+ if x ~= parameters then
+ parameters:copy(x)
+ end
+ gradParameters:zero()
+
+ local outputs = model:forward(inputs)
+
+ local f = criterion:forward(outputs,target)
+ local df_do = criterion:backward(outputs,target)
+
+ model:backward(inputs,df_do)
+
+ -- mimic different learning rates per layer
+ -- without the cost of having a huge tensor
+ updateGPlrwd(learningRate)
+
+ if normalize then
+ gradParameters:div(batchSize)
+ f = f/batchSize
+ end
+
+ confusion_matrix:batchAdd(outputs,target)
+
+ return f,gradParameters
+ end
+
+ local x,fx = optim.sgd(feval,parameters,optimState)
+ err = err + fx[1]
+ end
+ print('Training error: '..err/display_iter)
+ return err/display_iter
+end
+
+epoch_size = math.ceil(ds:size()/bp.imgs_per_batch)
+stepsize = 30000--30000
+print_step = 10
+num_iter = 40000--40000
+num_iter = num_iter/display_iter--3000
+
+confusion_matrix:zero()
+train_err = {}
+exp_name = 'frcnn_t11'
+
+paths.mkdir(paths.concat('cachedir',exp_name))
+--logger = optim.Logger(paths.concat('cachedir',exp_name,'train_err.log'))
+train_acc = {}
+for i=1,num_iter do
+
+ if i%(stepsize/display_iter) == 0 then
+ --optimState.learningRate = optimState.learningRate/10
+ learningRate = learningRate/10
+ end
+
+ --print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,optimState.learningRate))
+ print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,learningRate))
+
+ local t_err = train()
+ table.insert(train_err,t_err)
+
+
+ if i%print_step == 0 then
+ print(confusion_matrix)
+ table.insert(train_acc,confusion_matrix.averageUnionValid*100)
+ gnuplot.epsfigure(paths.concat('cachedir',exp_name,'train_err.eps'))
+ gnuplot.plot('train',torch.Tensor(train_acc),'-')
+ gnuplot.xlabel('Iterations (200 batch update)')
+ gnuplot.ylabel('Training accuracy')
+ gnuplot.grid('on')
+ gnuplot.plotflush()
+ gnuplot.closeall()
+
+ confusion_matrix:zero()
+ end
+
+ if i%100 == 0 then
+ torch.save(paths.concat('cachedir',exp_name..'.t7'),savedModel)
+ end
+end
+
+-- test
+dsv = nnf.DataSetPascal{image_set='test',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+
+
+local fpv = {dataset=dsv}
+tester = nnf.Tester_FRCNN(model,fpv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(num_iter)
diff --git a/tests/test_full_frcnn.lua b/tests/test_full_frcnn.lua
new file mode 100644
index 0000000..c49c2c6
--- /dev/null
+++ b/tests/test_full_frcnn.lua
@@ -0,0 +1,124 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'loadcaffe'
+
+cutorch.setDevice(2)
+
+ds = nnf.DataSetPascal{image_set='trainval',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}}
+
+fp = nnf.FRCNN{image_transformer=image_transformer}
+fp:training()
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProviderROI{dataset=ds,feat_provider=fp,
+ bg_threshold={0.1,0.5}
+ }
+bp:setupData()
+
+--------------------------------------------------------------------------------
+-- define model
+--------------------------------------------------------------------------------
+model = nn.Sequential()
+do
+ --[[
+ local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/'
+ local base_model = loadcaffe.load(
+ rcnnfold..'CaffeNet_train.prototxt',
+ rcnnfold..'CaffeNet.v2.caffemodel',
+ 'cudnn')
+ for i=1,14 do
+ features:add(base_model:get(i):clone())
+ end
+ for i=17,22 do
+ classifier:add(base_model:get(i):clone())
+ end
+ local linear = nn.Linear(4096,21):cuda()
+ linear.weight:normal(0,0.01)
+ linear.bias:zero()
+ classifier:add(linear)
+ --]]
+ local features = nn.Sequential()
+ local classifier = nn.Sequential()
+ local fold = 'data/models/imagenet_models/alexnet/'
+ local m1 = torch.load(fold..'features.t7')
+ local m2 = torch.load(fold..'top.t7')
+ for i=1,14 do
+ features:add(m1:get(i):clone())
+ end
+ features:get(3).padW = 1
+ features:get(3).padH = 1
+ features:get(7).padW = 1
+ features:get(7).padH = 1
+ for i=2,7 do
+ classifier:add(m2:get(i):clone())
+ end
+ local linear = nn.Linear(4096,21):cuda()
+ linear.weight:normal(0,0.01)
+ linear.bias:zero()
+ classifier:add(linear)
+ collectgarbage()
+ local prl = nn.ParallelTable()
+ prl:add(features)
+ prl:add(nn.Identity())
+ model:add(prl)
+ --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+ model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+ model:add(nn.View(-1):setNumInputDims(3))
+ model:add(classifier)
+end
+model:cuda()
+
+--model = nil
+--collectgarbage()
+--model = torch.load('test_model.t7')
+--model:cuda()
+collectgarbage()
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion():cuda()
+
+trainer = nnf.Trainer(model,criterion,bp)
+
+savedModel = model:clone('weight','bias','running_mean','running_std')
+for i=1,400 do
+ if i == 300 then
+ trainer.optimState.learningRate = trainer.optimState.learningRate/10
+ end
+ print(('Iteration %3d/%-3d'):format(i,400))
+ trainer:train(100)
+ print((' Train error: %g'):format(trainer.fx[i]))
+end
+
+--------------------------------------------------------------------------------
+-- evaluate
+--------------------------------------------------------------------------------
+
+-- add softmax to classfier
+model:add(nn.SoftMax():cuda())
+
+dsv = nnf.DataSetPascal{image_set='test',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+
+
+fpv = nnf.FRCNN{image_transformer=image_transformer}
+fpv:evaluate()
+exp_name = 'test2_frcnn'
+
+tester = nnf.Tester(model,fpv,dsv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(40000)
+
+torch.save(paths.concat(tester.cachefolder,'model.t7'),savedModel)
diff --git a/tests/test_full_rcnn.lua b/tests/test_full_rcnn.lua
new file mode 100644
index 0000000..6abd6ab
--- /dev/null
+++ b/tests/test_full_rcnn.lua
@@ -0,0 +1,120 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'loadcaffe'
+
+cutorch.setDevice(2)
+
+ds = nnf.DataSetPascal{image_set='trainval',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}}
+
+fp = nnf.RCNN{image_transformer=image_transformer,
+ crop_size=224}
+fp:training()
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProvider{dataset=ds,feat_provider=fp,
+ bg_threshold={0.0,0.5},
+ nTimesMoreData=2,
+ iter_per_batch=100,
+ }
+bp:setupData()
+
+--------------------------------------------------------------------------------
+-- define model
+--------------------------------------------------------------------------------
+model = nn.Sequential()
+do
+ --[[
+ local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/'
+ local base_model = loadcaffe.load(
+ rcnnfold..'CaffeNet_train.prototxt',
+ rcnnfold..'CaffeNet.v2.caffemodel',
+ 'cudnn')
+ for i=1,14 do
+ features:add(base_model:get(i):clone())
+ end
+ for i=17,22 do
+ classifier:add(base_model:get(i):clone())
+ end
+ local linear = nn.Linear(4096,21):cuda()
+ linear.weight:normal(0,0.01)
+ linear.bias:zero()
+ classifier:add(linear)
+ --]]
+ local features = nn.Sequential()
+ local classifier = nn.Sequential()
+ local fold = 'data/models/imagenet_models/alexnet/'
+ local m1 = torch.load(fold..'features.t7')
+ local m2 = torch.load(fold..'top.t7')
+ for i=1,14 do
+ features:add(m1:get(i):clone())
+ end
+ features:get(3).padW = 1
+ features:get(3).padH = 1
+ features:get(7).padW = 1
+ features:get(7).padH = 1
+ for i=2,7 do
+ classifier:add(m2:get(i):clone())
+ end
+ local linear = nn.Linear(4096,21):cuda()
+ linear.weight:normal(0,0.01)
+ linear.bias:zero()
+ classifier:add(linear)
+ collectgarbage()
+ --local prl = nn.ParallelTable()
+ --prl:add(features)
+ --prl:add(nn.Identity())
+ --model:add(prl)
+ --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+ --model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+ model:add(features)
+ model:add(nn.SpatialAdaptiveMaxPooling(6,6))
+ model:add(nn.View(-1):setNumInputDims(3))
+ model:add(classifier)
+end
+model:cuda()
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion():cuda()
+
+trainer = nnf.Trainer(model,criterion,bp)
+
+for i=1,400 do
+ if i == 300 then
+ trainer.optimState.learningRate = trainer.optimState.learningRate/10
+ end
+ print(('Iteration %3d/%-3d'):format(i,400))
+ trainer:train(100)
+end
+
+--------------------------------------------------------------------------------
+-- evaluate
+--------------------------------------------------------------------------------
+
+-- add softmax to classfier
+model:add(nn.SoftMax():cuda())
+
+dsv = nnf.DataSetPascal{image_set='test',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+
+
+fpv = nnf.RCNN{image_transformer=image_transformer,
+ crop_size=224}
+fpv:evaluate()
+exp_name = 'test1_rcnn'
+
+tester = nnf.Tester(model,fpv,dsv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(40000)
diff --git a/tests/test_imdetect.lua b/tests/test_imdetect.lua
new file mode 100644
index 0000000..9884cd7
--- /dev/null
+++ b/tests/test_imdetect.lua
@@ -0,0 +1,59 @@
+dofile 'test_utils.lua'
+
+detect1 = nnf.ImageDetect(model1,fp1)
+detect = nnf.ImageDetect(model,fp2)
+
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1}
+bp1.nTimesMoreData = 2
+bp1.iter_per_batch = 10
+bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2}
+
+bp1.bboxes = torch.load('tests/bproibox.t7')
+bp2.bboxes = torch.load('tests/bproibox.t7')
+
+print('test1')
+b,t = bp1:getBatch()
+print('test2')
+b,t = bp2:getBatch()
+
+-- mixing does not work for the moment, as FRCNN accepts a set of images as input
+-- whereas RCNN and SPP supposes that only one image is provided at a time
+--[[
+bp3 = nnf.BatchProviderROI(ds)
+bp3.bboxes = torch.load('tests/bproibox.t7')
+bp3.feat_provider = fp1
+print('test3')
+b,t = bp3:getBatch()
+--]]
+--------------------------------------------------------------------------------
+--
+--------------------------------------------------------------------------------
+
+idx = 100
+im = ds:getImage(idx)
+boxes = ds:getROIBoxes(idx)
+
+--output = detect1:detect(im,boxes)
+--output0 = detect:detect(im,boxes)
+
+--------------------------------------------------------------------------------
+-- compare old and new SPP implementations for the cropping
+--------------------------------------------------------------------------------
+--[[
+output_old = {}
+for i=1,boxes:size(1) do
+ tt0 = fp3:getCrop_old(im,boxes[i])
+ output_old[i] = tt0
+end
+
+output_new = fp3:getCrop(im,boxes) --[881]
+
+for i=1,boxes:size(1) do
+ assert(output_old[i]:eq(output_new[i]):all(),'error '..i)
+end
+--]]
diff --git a/tests/test_train.lua b/tests/test_train.lua
new file mode 100644
index 0000000..7f50819
--- /dev/null
+++ b/tests/test_train.lua
@@ -0,0 +1,26 @@
+dofile 'tests/test_utils.lua'
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1}
+bp1.nTimesMoreData = 2
+bp1.iter_per_batch = 10
+bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2}
+
+bp1.bboxes = torch.load('tests/bproibox.t7')
+bp2.bboxes = torch.load('tests/bproibox.t7')
+
+--------------------------------------------------------------------------------
+--
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion()
+
+trainer = nnf.Trainer(model1,criterion,bp1)
+
+for i=1,10 do
+ trainer:train(10)
+end
+
diff --git a/tests/test_utils.lua b/tests/test_utils.lua
new file mode 100644
index 0000000..e3d20dc
--- /dev/null
+++ b/tests/test_utils.lua
@@ -0,0 +1,49 @@
+require 'nnf'
+require 'nn'
+
+function getDS()
+ local dt = torch.load('pascal_2007_train.t7')
+ local ds = nnf.DataSetPascal{image_set='train',
+ datadir='/home/francisco/work/datasets/VOCdevkit',
+ roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data'
+ }
+ ds.roidb = dt.roidb
+ return ds
+end
+
+function getModel()
+ local features = nn.Sequential()
+ features:add(nn.SpatialConvolutionMM(3,16,11,11,16,16,5,5))
+ local classifier = nn.Sequential()
+ classifier:add(nn.Linear(7*7*16,21))
+ local model1 = nn.Sequential()
+ model1:add(features)
+ model1:add(nn.SpatialMaxPooling(2,2,2,2))
+ model1:add(nn.View(-1):setNumInputDims(3))
+ model1:add(classifier)
+ local model = nn.Sequential()
+ local prl = nn.ParallelTable()
+ prl:add(features)
+ prl:add(nn.Identity())
+ model:add(prl)
+ model:add(nnf.ROIPooling(7,7):setSpatialScale(1/16))
+ model:add(nn.View(-1):setNumInputDims(3))
+ model:add(classifier)
+ return model1, model, features, classifier
+end
+
+--------------------------------------------------------------------------------
+-- define dataset, models and feature providers
+--------------------------------------------------------------------------------
+
+ds = getDS()
+
+model1, model, features, classifier = getModel()
+
+fp1 = nnf.RCNN{}
+fp2 = nnf.FRCNN{}
+fp3 = nnf.SPP{model=features}
+fp3.use_cache = false
+fp3:evaluate()
+
+
diff --git a/tests/test_visualization.lua b/tests/test_visualization.lua
new file mode 100644
index 0000000..b5d727a
--- /dev/null
+++ b/tests/test_visualization.lua
@@ -0,0 +1,7 @@
+dofile 'tests/test_utils.lua'
+I = ds:getImage(1)
+boxes = ds:getROIBoxes(1)
+scores = torch.rand(boxes:size(1),21)
+dofile 'visualize_detections.lua'
+visualize_detections(I,boxes,scores,0.9)
+
diff --git a/tests/test_visualization2.lua b/tests/test_visualization2.lua
new file mode 100644
index 0000000..415f86a
--- /dev/null
+++ b/tests/test_visualization2.lua
@@ -0,0 +1,42 @@
+require 'cutorch'
+require 'nnf'
+require 'cudnn'
+require 'inn'
+dofile 'visualize_detections.lua'
+
+cutorch.setDevice(2)
+
+--model = torch.load('cachedir/test2_frcnn/model.t7')
+model = torch.load('cachedir/model.t7')
+--model:add(nn.SoftMax():cuda())
+
+image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+ raw_scale = 255,
+ swap = {3,2,1}}
+
+
+ds = nnf.DataSetPascal{image_set='test',
+ datadir='datasets/VOCdevkit',
+ roidbdir='data/selective_search_data'
+ }
+
+fp = nnf.FRCNN{image_transformer=image_transformer}
+fp:evaluate()
+model:evaluate()
+detect = nnf.ImageDetect(model,fp)
+
+im_idx = 700
+
+I = ds:getImage(im_idx)
+boxes = ds:getROIBoxes(im_idx)
+--boxes = ds:getGTBoxes(im_idx)
+
+scores,bb = detect:detect(I,boxes)
+
+w = visualize_detections(I,boxes,scores,0.5,ds.classes)
+
+Im = w:image()
+II = Im:toFloatTensor()
+
+image.save('example_frcnn.jpg',II)
+
diff --git a/train.lua b/train.lua
new file mode 100644
index 0000000..8184922
--- /dev/null
+++ b/train.lua
@@ -0,0 +1,49 @@
+trainer = nnf.Trainer(model, criterion, batch_provider)
+
+local num_iter = opt.num_iter/opt.disp_iter
+local lr_step = opt.lr_step/opt.disp_iter
+local save_step = opt.save_step/opt.disp_iter
+
+trainer.optimState.learningRate = opt.lr
+
+logger = optim.Logger(paths.concat(rundir,'train.log'))
+
+if opt.conf_mat then
+ local conf_classes = {'background'}
+ for k,v in ipairs(ds_train.classes) do
+ table.insert(conf_classes,v)
+ end
+ trainer.confusion = optim.ConfusionMatrix(conf_classes)
+end
+
+local lightModel = model:clone('weight','bias','running_mean','running_std')
+
+-- main training loop
+for i=1,num_iter do
+ if i % lr_step == 0 then
+ trainer.optimState.learningRate = trainer.optimState.learningRate/10
+ end
+ print(('Iteration %3d/%-3d'):format(i,num_iter))
+ trainer:train(opt.disp_iter)
+ print((' Training error: %.5f'):format(trainer.fx[i]))
+
+ if opt.conf_mat then
+ print(trainer.confusion)
+ logger:add{
+ ['train error']=trainer.fx[i],
+ ['confusion matrix']=tostring(trainer.confusion),
+ ['learning rate']=trainer.optimState.learningRate
+ }
+ else
+ logger:add{
+ ['train error']=trainer.fx[i],
+ ['learning rate']=trainer.optimState.learningRate
+ }
+ end
+
+ if i% save_step == 0 then
+ torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+ end
+end
+
+torch.save(paths.concat(rundir, 'model.t7'), lightModel)
diff --git a/utils.lua b/utils.lua
index 0255907..689c00f 100644
--- a/utils.lua
+++ b/utils.lua
@@ -2,6 +2,7 @@
-- utility functions for the evaluation part
--------------------------------------------------------------------------------
+-- can be replaced by the new torch.cat function
local function joinTable(input,dim)
local size = torch.LongStorage()
local is_ok = false
@@ -29,6 +30,50 @@ local function joinTable(input,dim)
return output
end
+local function recursiveResizeAsCopyTyped(t1,t2,type)
+ if torch.type(t2) == 'table' then
+ t1 = (torch.type(t1) == 'table') and t1 or {t1}
+ for key,_ in pairs(t2) do
+ t1[key], t2[key] = recursiveResizeAsCopyTyped(t1[key], t2[key], type)
+ end
+ elseif torch.isTensor(t2) then
+ local type = type or t2:type()
+ t1 = torch.isTypeOf(t1,type) and t1 or torch.Tensor():type(type)
+ t1:resize(t2:size()):copy(t2)
+ else
+ error("expecting nested tensors or tables. Got "..
+ torch.type(t1).." and "..torch.type(t2).." instead")
+ end
+ return t1, t2
+end
+
+local function concat(t1,t2,dim)
+ local out
+ assert(t1:type() == t2:type(),'tensors should have the same type')
+ if t1:dim() > 0 and t2:dim() > 0 then
+ dim = dim or t1:dim()
+ out = torch.cat(t1,t2,dim)
+ elseif t1:dim() > 0 then
+ out = t1:clone()
+ else
+ out = t2:clone()
+ end
+ return out
+end
+
+-- modify bbox input
+local function flipBoundingBoxes(bbox, im_width)
+ if bbox:dim() == 1 then
+ local tt = bbox[1]
+ bbox[1] = im_width-bbox[3]+1
+ bbox[3] = im_width-tt +1
+ else
+ local tt = bbox[{{},1}]:clone()
+ bbox[{{},1}]:fill(im_width+1):add(-1,bbox[{{},3}])
+ bbox[{{},3}]:fill(im_width+1):add(-1,tt)
+ end
+end
+
--------------------------------------------------------------------------------
local function keep_top_k(boxes,top_k)
@@ -80,7 +125,6 @@ end
--------------------------------------------------------------------------------
local function boxoverlap(a,b)
- --local b = anno.objects[j]
local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b
local x1 = a:select(2,1):clone()
@@ -267,6 +311,10 @@ utils.VOCap = VOCap
utils.convertCaffeModelToTorch = convertCaffeModelToTorch
utils.reshapeLastLinearLayer = reshapeLastLinearLayer
utils.sanitize = sanitize
+utils.recursiveResizeAsCopyTyped = recursiveResizeAsCopyTyped
+utils.flipBoundingBoxes = flipBoundingBoxes
+utils.concat = concat
+utils.boxoverlap = boxoverlap
return utils
diff --git a/visualize_detections.lua b/visualize_detections.lua
new file mode 100644
index 0000000..2381de4
--- /dev/null
+++ b/visualize_detections.lua
@@ -0,0 +1,62 @@
+local nms = dofile 'nms.lua'
+
+function visualize_detections(im,boxes,scores,thresh,cl_names)
+ local ok = pcall(require,'qt')
+ if not ok then
+ error('You need to run visualize_detections using qlua')
+ end
+ require 'qttorch'
+ require 'qtwidget'
+
+ -- select best scoring boxes without background
+ local max_score,idx = scores[{{},{2,-1}}]:max(2)
+
+ local idx_thresh = max_score:gt(thresh)
+ max_score = max_score[idx_thresh]
+ idx = idx[idx_thresh]
+
+ local r = torch.range(1,boxes:size(1)):long()
+ local rr = r[idx_thresh]
+ if rr:numel() == 0 then
+ error('No detections with a score greater than the specified threshold')
+ end
+ local boxes_thresh = boxes:index(1,rr)
+
+ local keep = nms(torch.cat(boxes_thresh:float(),max_score:float(),2),0.3)
+
+ boxes_thresh = boxes_thresh:index(1,keep)
+ max_score = max_score:index(1,keep)
+ idx = idx:index(1,keep)
+
+ local num_boxes = boxes_thresh:size(1)
+ local widths = boxes_thresh[{{},3}] - boxes_thresh[{{},1}]
+ local heights = boxes_thresh[{{},4}] - boxes_thresh[{{},2}]
+
+ local x,y = im:size(3),im:size(2)
+ local w = qtwidget.newwindow(x,y,"Detections")
+ local qtimg = qt.QImage.fromTensor(im)
+ w:image(0,0,x,y,qtimg)
+ local fontsize = 15
+
+ for i=1,num_boxes do
+ local x,y = boxes_thresh[{i,1}],boxes_thresh[{i,2}]
+ local width,height = widths[i], heights[i]
+
+ -- add bbox
+ w:rectangle(x,y,width,height)
+
+ -- add score
+ w:moveto(x,y+fontsize)
+ w:setcolor("red")
+ w:setfont(qt.QFont{serif=true,italic=true,size=fontsize,bold=true})
+ if cl_names then
+ w:show(string.format('%s: %.2f',cl_names[idx[i]],max_score[i]))
+ else
+ w:show(string.format('%d: %.2f',idx[i],max_score[i]))
+ end
+ end
+ w:setcolor("red")
+ w:setlinewidth(2)
+ w:stroke()
+ return w
+end