diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ae7e697
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*~
+*.swp
+cachedir/*
diff --git a/BatchProviderBase.lua b/BatchProviderBase.lua
new file mode 100644
index 0000000..35e5ada
--- /dev/null
+++ b/BatchProviderBase.lua
@@ -0,0 +1,149 @@
+local argcheck = require 'argcheck'
+
+local function createWindowBase(rec,i,j,is_bg)
+  local label = is_bg == true and 0+1 or rec.label[j]+1
+  local window = {i,rec.boxes[j][1],rec.boxes[j][2],
+                    rec.boxes[j][3],rec.boxes[j][4],
+                    label}
+  return window
+end
+
+local function createWindowAngle(rec,i,j,is_bg)
+  local label = is_bg == true and 0+1 or rec.label[j]+1
+  --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and 
+  --                  rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0
+  local ang
+  if is_bg == false and rec.objects[rec.correspondance[j] ] then
+    if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then
+      ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse
+    else
+      ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth
+    end
+  else
+    ang = 0
+  end
+  local window = {i,rec.boxes[j][1],rec.boxes[j][2],
+                    rec.boxes[j][3],rec.boxes[j][4],
+                    label,ang}
+  return window
+end
+
+--[[
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+  pack=true,
+  noordered=true,
+  {name="dataset",
+   type="nnf.DataSetPascal",
+   help="A dataset class" 
+  },
+  {name="batch_size",
+   type="number",
+   default=128,
+   help="batch size"},
+  {name="fg_fraction",
+   type="number",
+   default=0.25,
+   help="foreground fraction in batch" 
+  },
+  {name="fg_threshold",
+   type="number",
+   default=0.5,
+   help="foreground threshold" 
+  },
+  {name="bg_threshold",
+   type="table",
+   default={0,0.5},
+   help="background threshold, in the form {LO,HI}" 
+  },
+  {name="createWindow",
+   type="function",
+   default=createWindowBase,
+   help="" 
+  },
+  {name="do_flip",
+   type="boolean",
+   default=true,
+   help="sample batches with random flips" 
+  },
+}
+--]]
+
+local BatchProviderBase = torch.class('nnf.BatchProviderBase')
+
+function BatchProviderBase:__init(...)
+  
+  self.dataset = nil
+  self.batch_size = 128
+  self.fg_fraction = 0.25
+  self.fg_threshold = 0.5
+  self.bg_threshold = {0,0.5}
+  self.createWindow = createWindowBase
+  self.do_flip = true
+
+  --local opts = initcheck(...)
+  --for k,v in pairs(opts) do self[k] = v end
+
+end
+
+-- allow changing the way self.bboxes are formatted
+function BatchProviderBase:setCreateWindow(createWindow)
+  self.createWindow = createWindow
+end
+
+function BatchProviderBase:setupData()
+  local dataset = self.dataset
+  local bb = {}
+  local bbT = {}
+
+  for i=0,dataset.num_classes do -- 0 because of background
+    bb[i] = {}
+  end
+
+  for i=1,dataset.num_imgs do
+    bbT[i] = {}
+  end
+
+  for i = 1,dataset.num_imgs do
+    if dataset.num_imgs > 10 then
+      xlua.progress(i,dataset.num_imgs)
+    end
+    
+    local rec = dataset:attachProposals(i)
+  
+    for j=1,rec:size() do    
+      local id = rec.label[j]
+      local is_fg = (rec.overlap[j] >= self.fg_threshold)
+      local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1]  and
+                                     rec.overlap[j] <  self.bg_threshold[2])
+      if is_fg then
+        local window = self.createWindow(rec,i,j,is_bg)
+        table.insert(bb[1], window) -- could be id instead of 1
+      elseif is_bg then
+        local window = self.createWindow(rec,i,j,is_bg)
+        table.insert(bb[0], window)
+      end
+      
+    end
+    
+    for j=0,dataset.num_classes do -- 0 because of background
+      if #bb[j] > 0 then
+        bbT[i][j] = torch.FloatTensor(bb[j])
+      end
+    end
+        
+    bb = {}
+    for i=0,dataset.num_classes do -- 0 because of background
+      bb[i] = {}
+    end
+    collectgarbage()
+  end
+  self.bboxes = bbT
+  --return bbT
+end
+
+function BatchProviderBase:getBatch()
+  error("You can't use BatchProviderBase")
+  return input,target
+end
+
diff --git a/BatchProviderIC.lua b/BatchProviderIC.lua
new file mode 100644
index 0000000..da34d77
--- /dev/null
+++ b/BatchProviderIC.lua
@@ -0,0 +1,164 @@
+local BatchProvider, parent = torch.class('nnf.BatchProviderIC','nnf.BatchProviderBase')
+
+local argcheck = require 'argcheck'
+
+local env = require 'argcheck.env' -- retrieve argcheck environement
+-- this is the default type function
+-- which can be overrided by the user
+function env.istype(obj, typename)
+  if typename == 'DataSet' then
+    return obj and obj._isDataSet
+  end
+  if typename == 'FeatureProvider' then
+    return obj and obj._isFeatureProvider
+  end
+  return torch.type(obj) == typename
+end
+
+
+local initcheck = argcheck{
+  pack=true,
+  noordered=true,
+  {name="dataset",
+   type="DataSet",
+   help="A dataset class" 
+  },
+  {name="feat_provider",
+   type="nnf.FRCNN",
+   help="A feat provider class" 
+  },
+  {name="batch_size",
+   type="number",
+   opt=true,
+   help="batch size"},
+  {name="imgs_per_batch",
+   type="number",
+   default=2,
+   help="number of images to sample in a batch"},
+  {name="fg_fraction",
+   type="number",
+   default=0.25,
+   help="foreground fraction in batch" 
+  },
+  {name="fg_threshold",
+   type="number",
+   default=0.5,
+   help="foreground threshold" 
+  },
+  {name="bg_threshold",
+   type="table",
+   default={0.1,0.5},
+   help="background threshold, in the form {LO,HI}" 
+  },
+  {name="do_flip",
+   type="boolean",
+   default=true,
+   help="sample batches with random flips" 
+  },
+}
+
+function BatchProvider:__init(...)
+  parent.__init(self)
+
+  local opts = initcheck(...)
+  for k,v in pairs(opts) do self[k] = v end
+end
+
+-- setup is the same
+
+function BatchProvider:permuteIdx()
+  local total_img    = self.dataset:size()
+  local imgs_per_batch = self.imgs_per_batch
+
+  self._cur = self._cur or math.huge
+
+  if self._cur + imgs_per_batch > total_img  then
+    self._perm = torch.randperm(total_img)
+    self._cur = 1
+  end
+
+  local img_idx = self._perm[{{self._cur,self._cur + self.imgs_per_batch - 1}}]
+  self._cur     = self._cur + self.imgs_per_batch
+
+  local img_idx_end  = imgs_per_batch
+
+  local fg_windows = {}
+  local bg_windows = {}
+  for i=1,img_idx_end do
+    local curr_idx = img_idx[i]
+    bg_windows[i] = {}
+    if self.bboxes[curr_idx][0] then
+      for j=1,self.bboxes[curr_idx][0]:size(1) do
+        table.insert(bg_windows[i],{curr_idx,j})
+      end
+    end
+    fg_windows[i] = {}
+    if self.bboxes[curr_idx][1] then
+      for j=1,self.bboxes[curr_idx][1]:size(1) do
+        table.insert(fg_windows[i],{curr_idx,j})
+      end
+    end
+  end
+  local do_flip = torch.FloatTensor(imgs_per_batch):random(0,1)
+  local opts = {img_idx=img_idx,img_idx_end=img_idx_end,do_flip=do_flip}
+  return fg_windows,bg_windows,opts
+
+end
+
+function BatchProvider:selectBBoxes(fg_windows,bg_windows)
+  local fg_num_each  = torch.round(self.fg_num_each/self.imgs_per_batch)
+  local bg_num_each  = torch.round(self.bg_num_each/self.imgs_per_batch)
+
+  local bboxes = {}
+  local labels = {}
+  for im=1,self.imgs_per_batch do
+    local window_idx = torch.randperm(#bg_windows[im])
+    local end_idx = math.min(bg_num_each,#bg_windows[im])
+    local bbox = {}
+    for i=1,end_idx do
+      local curr_idx = bg_windows[im][window_idx[i] ][1]
+      local position = bg_windows[im][window_idx[i] ][2]
+      local dd = self.bboxes[curr_idx][0][position][{{2,6}}]
+      table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]})
+      table.insert(labels,dd[5])
+    end
+
+    window_idx = torch.randperm(#fg_windows[im])
+    local end_idx = math.min(fg_num_each,#fg_windows[im])
+    for i=1,end_idx do
+      local curr_idx = fg_windows[im][window_idx[i] ][1]
+      local position = fg_windows[im][window_idx[i] ][2]
+      local dd = self.bboxes[curr_idx][1][position][{{2,6}}]
+      table.insert(bbox,{dd[1],dd[2],dd[3],dd[4]})
+      table.insert(labels,dd[5])
+    end
+    table.insert(bboxes,torch.FloatTensor(bbox))
+  end
+  labels = torch.IntTensor(labels)
+  return bboxes, labels
+end
+
+function BatchProvider:getBatch()
+  local dataset = self.dataset
+  
+  self.fg_num_each = self.fg_fraction * self.batch_size
+  self.bg_num_each = self.batch_size - self.fg_num_each
+  
+  local fg_windows,bg_windows,opts = self:permuteIdx()
+  
+  self.targets = self.targets or torch.FloatTensor()
+  
+  local batches = self.batches
+  local targets = self.targets
+  
+  local imgs = {}
+  for i=1,opts.img_idx:size(1) do
+    table.insert(imgs,dataset:getImage(opts.img_idx[i]))
+  end
+  local boxes,labels = self:selectBBoxes(fg_windows,bg_windows)
+  self.batches = self.feat_provider:getFeature(imgs,boxes,opts.do_flip)
+
+  targets:resize(labels:size()):copy(labels)
+  
+  return self.batches, self.targets
+end
diff --git a/BatchProvider.lua b/BatchProviderRC.lua
similarity index 57%
rename from BatchProvider.lua
rename to BatchProviderRC.lua
index 977bdc7..2770036 100644
--- a/BatchProvider.lua
+++ b/BatchProviderRC.lua
@@ -1,109 +1,82 @@
-local BatchProvider = torch.class('nnf.BatchProvider')
+local BatchProvider,parent = 
+                    torch.class('nnf.BatchProviderRC','nnf.BatchProviderBase')
 
-local function createWindowBase(rec,i,j,is_bg)
-  local label = is_bg == true and 0+1 or rec.label[j]+1
-  local window = {i,rec.boxes[j][1],rec.boxes[j][2],
-                    rec.boxes[j][3],rec.boxes[j][4],
-                    label}
-  return window
-end
 
-local function createWindowAngle(rec,i,j,is_bg)
-  local label = is_bg == true and 0+1 or rec.label[j]+1
-  --local ang = ( is_bg == false and rec.objects[rec.correspondance[j] ] ) and 
-  --                  rec.objects[rec.correspondance[j] ].viewpoint.azimuth or 0
-  local ang
-  if is_bg == false and rec.objects[rec.correspondance[j] ] then
-    if rec.objects[rec.correspondance[j] ].viewpoint.distance == '0' then
-      ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth_coarse
-    else
-      ang = rec.objects[rec.correspondance[j] ].viewpoint.azimuth
-    end
-  else
-    ang = 0
-  end
-  local window = {i,rec.boxes[j][1],rec.boxes[j][2],
-                    rec.boxes[j][3],rec.boxes[j][4],
-                    label,ang}
-  return window
-end
-
-function BatchProvider:__init(feat_provider)
-  self.dataset = feat_provider.dataset
-  self.feat_provider = feat_provider
+local argcheck = require 'argcheck'
 
-  self.nTimesMoreData = 10
-  self.iter_per_batch = 500
-  
-  self.batch_size = 128
-  self.fg_fraction = 0.25
-  
-  self.fg_threshold = 0.5
-  self.bg_threshold = {0.0,0.5}
-  
-  self.createWindow = createWindowBase--createWindowAngle
-  
-  self.batch_dim = {256*50}
-  self.target_dim = 1
-  
-  self.do_flip = true
-  
-  --self:setupData()
+local env = require 'argcheck.env' -- retrieve argcheck environement
+-- this is the default type function
+-- which can be overrided by the user
+function env.istype(obj, typename)
+  if typename == 'DataSet' then
+    return obj and obj._isDataSet
+  end
+  if typename == 'FeatureProvider' then
+    return obj and obj._isFeatureProvider
+  end
+  return torch.type(obj) == typename
 end
 
 
-function BatchProvider:setupData()
-  local dataset = self.dataset
-  local bb = {}
-  local bbT = {}
+local initcheck = argcheck{
+  pack=true,
+  noordered=true,
+  {name="dataset",
+   type="DataSet",
+   help="A dataset class" 
+  },
+  {name="feat_provider",
+   type="FeatureProvider",
+   help="A feat provider class"
+  },
+  {name="batch_size",
+   type="number",
+   default=128,
+   help="batch size"},
+  {name="iter_per_batch",
+   type="number",
+   default=10,
+   help=""},
+  {name="nTimesMoreData",
+   type="number",
+   default=10,
+   help=""},
+  {name="fg_fraction",
+   type="number",
+   default=0.25,
+   help="foreground fraction in batch" 
+  },
+  {name="fg_threshold",
+   type="number",
+   default=0.5,
+   help="foreground threshold" 
+  },
+  {name="bg_threshold",
+   type="table",
+   default={0.1,0.5},
+   help="background threshold, in the form {LO,HI}" 
+  },
+  {name="target_dim",
+   type="number",
+   default=1,
+   help=""},
+  {name="do_flip",
+   type="boolean",
+   default=true,
+   help="sample batches with random flips" 
+  },
+}
 
-  for i=0,dataset.num_classes do -- 0 because of background
-    bb[i] = {}
-  end
+function BatchProvider:__init(...)
+  parent.__init(self)
 
-  for i=1,dataset.num_imgs do
-    bbT[i] = {}
-  end
+  local opts = initcheck(...)
+  for k,v in pairs(opts) do self[k] = v end
 
-  for i = 1,dataset.num_imgs do
-    if dataset.num_imgs > 10 then
-      xlua.progress(i,dataset.num_imgs)
-    end
-    
-    local rec = dataset:attachProposals(i)
+  self.batch_dim = self.feat_provider.output_size
   
-    for j=1,rec:size() do    
-      local id = rec.label[j]
-      local is_fg = (rec.overlap[j] >= self.fg_threshold)
-      local is_bg = (not is_fg) and (rec.overlap[j] >= self.bg_threshold[1]  and
-                                     rec.overlap[j] <  self.bg_threshold[2])
-      if is_fg then
-        local window = self.createWindow(rec,i,j,is_bg)
-        table.insert(bb[1], window) -- could be id instead of 1
-      elseif is_bg then
-        local window = self.createWindow(rec,i,j,is_bg)
-        table.insert(bb[0], window)
-      end
-      
-    end
-    
-    for j=0,dataset.num_classes do -- 0 because of background
-      if #bb[j] > 0 then
-        bbT[i][j] = torch.FloatTensor(bb[j])
-      end
-    end
-        
-    bb = {}
-    for i=0,dataset.num_classes do -- 0 because of background
-      bb[i] = {}
-    end
-    collectgarbage()
-  end
-  self.bboxes = bbT
-  --return bbT
 end
 
-
 function BatchProvider:permuteIdx()
   local fg_num_each  = self.fg_num_each
   local bg_num_each  = self.bg_num_each
@@ -183,21 +156,12 @@ function BatchProvider:selectBBoxes(fg_windows,bg_windows)
   return fg_w,bg_w
 end
 
-
--- specific for angle estimation
-local function flip_angle(x)
-  return (-x)%360
-end
-
 -- depends on the model
-function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg_label)
+function BatchProvider:prepareFeatures(im_idx,bboxes,fg_label,bg_label)
 
   local num_pos = bboxes[1] and #bboxes[1] or 0
   local num_neg = bboxes[0] and #bboxes[0] or 0
 
-  fg_data:resize(num_pos,unpack(self.batch_dim))
-  bg_data:resize(num_neg,unpack(self.batch_dim))
-  
   fg_label:resize(num_pos,self.target_dim)
   bg_label:resize(num_neg,self.target_dim)
   
@@ -205,36 +169,29 @@ function BatchProvider:prepareFeatures(im_idx,bboxes,fg_data,bg_data,fg_label,bg
   if self.do_flip then
     flip = torch.random(0,1) == 0
   end
-  --print(bboxes)
+
+  local s_boxes = {}
   for i=1,num_pos do
-    --local bbox = bboxes[1][{i,{2,5}}]
     local bbox = {bboxes[1][i][2],bboxes[1][i][3],bboxes[1][i][4],bboxes[1][i][5]}
-    fg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip)
+    table.insert(s_boxes,bbox)
     fg_label[i][1] = bboxes[1][i][6]
---[[    if flip then
-      fg_label[i][2] = flip_angle(bboxes[1][i][7])
-    else
-      fg_label[i][2] = bboxes[1][i][7]
-    end
-]]    
   end
   
   for i=1,num_neg do
-    --local bbox = bboxes[0][{i,{2,5}}]
     local bbox = {bboxes[0][i][2],bboxes[0][i][3],bboxes[0][i][4],bboxes[0][i][5]}
-    bg_data[i] = self.feat_provider:getFeature(im_idx,bbox,flip)
+    table.insert(s_boxes,bbox)
     bg_label[i][1] = bboxes[0][i][6]
---[[    if flip then
-      bg_label[i][2] = flip_angle(bboxes[0][i][7])
-    else
-      bg_label[i][2] = bboxes[0][i][7]
-    end]]
   end
-  
---  return fg_data,bg_data,fg_label,bg_label
+
+  -- compute the features
+  local feats = self.feat_provider:getFeature(im_idx,s_boxes,flip)
+  local fg_data = num_pos > 0 and feats:narrow(1,1,num_pos) or nil
+  local bg_data = num_neg > 0 and feats:narrow(1,num_pos+1,num_neg) or nil
+
+  return fg_data, bg_data
 end
 
-function BatchProvider:getBatch(batches,targets)
+function BatchProvider:prepareBatch(batches,targets)
   local dataset = self.dataset
   
   self.fg_num_each = self.fg_fraction * self.batch_size
@@ -257,11 +214,11 @@ function BatchProvider:getBatch(batches,targets)
   local bg_counter = 0
   
   local fg_data,bg_data,fg_label,bg_label
-  fg_data  = torch.FloatTensor()
-  bg_data  = torch.FloatTensor()
   fg_label = torch.IntTensor()
   bg_label = torch.IntTensor()
 
+  local pass_index = torch.type(self.feat_provider) == 'nnf.SPP' and true or false
+
   print('==> Preparing Batch Data')
   for i=1,opts.img_idx_end do
     xlua.progress(i,opts.img_idx_end)
@@ -278,7 +235,13 @@ function BatchProvider:getBatch(batches,targets)
     bboxes[0] = bg_w[curr_idx]
     bboxes[1] = fg_w[curr_idx]
   
-    self:prepareFeatures(curr_idx,bboxes,fg_data,bg_data,fg_label,bg_label)
+    local data
+    if pass_index then
+      data = curr_idx
+    else
+      data = dataset:getImage(curr_idx)
+    end
+    fg_data,bg_data = self:prepareFeatures(data,bboxes,fg_label,bg_label)
     
     for j=1,nbg do
       bg_counter = bg_counter + 1
@@ -297,6 +260,24 @@ function BatchProvider:getBatch(batches,targets)
       batches[b][s]:copy(fg_data[j])
       targets[b][s]:copy(fg_label[j])
     end
+    collectgarbage()
   end
+  collectgarbage()
   return batches,targets
 end
+
+function BatchProvider:getBatch()
+  self._cur = self._cur or math.huge
+  -- we have reached the end of our batch pool, need to recompute
+  if self._cur > self.iter_per_batch then
+    self._batches,self._targets = self:prepareBatch(self._batches,self._targets)
+    self._cur = 1
+  end
+
+  self.batches = self._batches[self._cur]
+  self.targets = self._targets[self._cur]
+  self._cur = self._cur + 1
+
+  return self.batches, self.targets
+
+end
diff --git a/DataSetCOCO.lua b/DataSetCOCO.lua
new file mode 100644
index 0000000..6b2a2e0
--- /dev/null
+++ b/DataSetCOCO.lua
@@ -0,0 +1,155 @@
+--local json = require 'dkjson'
+
+local DataSetCOCO,parent = torch.class('nnf.DataSetCOCO', 'nnf.DataSetDetection')
+
+function DataSetCOCO:__init(annFile)
+  self.image_set = nil
+  self.dataset_name = 'COCO'
+
+  local timer = torch.Timer()
+  local localtimer = torch.Timer()
+  print('Preparing COCO dataset...')
+  --[[
+  if type(annFile) == 'string' then
+    local f = io.open(annFile)
+    local str = f:read('*all')
+    f:close()
+
+    self.data = json.decode(str)
+
+  else
+    self.data = torch.load(annFile)
+  end
+  --]]
+  self.data = torch.load('coco_val.t7')
+  print(('  Loaded annotations file in %.2fs'):format(localtimer:time().real))
+  localtimer:reset()
+
+  -- mapping images
+  local img_idx = {}
+  local img_idx_map = {}
+  for i = 1, #self.data.images do
+    table.insert(img_idx,self.data.images[i].id)
+    img_idx_map[self.data.images[i].id] = i
+  end
+  print(('  Mapped images in %.4fs'):format(localtimer:time().real))
+  localtimer:reset()
+
+  -- mapping annotations
+  local ann = self.data.annotations
+  local o = {}
+
+  for k, v in ipairs(ann) do
+    table.insert(o,v.image_id*1e10 + v.category_id)
+  end
+  o = torch.LongTensor(o)
+  local _,ox = o:sort()
+  local o_data = ox:data()
+  local temp_ann = {}
+  for i=1 , o:size(1) do
+    table.insert(temp_ann, ann[ox[i] ])
+  end
+  self.data.annotations = temp_ann
+  
+  local ann_idx = {}
+  local ann_idx_map = {}
+  local ann_img_idx = {}
+  local img_ann_idx_map = {}
+  for k,v in ipairs(temp_ann) do
+    table.insert(ann_idx, v.id)
+    ann_idx_map[v.id] = k
+    table.insert(ann_img_idx, v.image_id)
+    if not img_ann_idx_map[v.image_id] then
+      img_ann_idx_map[v.image_id] = {}
+    end
+    table.insert(img_ann_idx_map[v.image_id],v.id)
+  end
+
+  self.inds = {img_idx = img_idx,
+               img_idx_map = img_idx_map,
+               ann_idx = ann_idx,
+               ann_idx_map = ann_idx_map,
+               ann_img_idx = ann_img_idx,
+               img_ann_idx_map = img_ann_idx_map
+             }
+  print(('  Mapped annotations in %.4fs'):format(localtimer:time().real))
+  localtimer:reset()
+
+  -- mapping classes
+  self.classes = {}
+  self.class_to_id = {}
+  self.class_cont = {}
+  self.class_cont_map = {}
+  self.num_classes = 0
+  for k,v in ipairs(self.data.categories) do
+    self.classes[v.id] = v.name
+    self.class_to_id[v.name] = v.id
+    table.insert(self.class_cont,v.id)
+    self.class_cont_map[v.id] = k
+    self.num_classes = self.num_classes + 1
+  end
+
+  print(('  Total elapsed time: %.4fs'):format(timer:time().real))
+
+end
+
+function DataSetCOCO:getImage(i)
+  local file_name = self.images[i].file_name
+  return image.load(paths.concat(self.imgpath,file_name),3,'float')
+end
+
+function DataSetCOCO:getAnnotation(i)
+  local ann = {object = {}}
+  local im_id = self.inds.img_idx[i]
+  local ann_id = self.inds.img_ann_idx_map[im_id] or {}
+  for k,v in ipairs(ann_id) do
+    local lann = self.data.annotations[self.inds.ann_idx_map[v] ]
+    local bbox = {xmin=lann.bbox[1]+1,ymin=lann.bbox[2]+1,
+                  xmax=lann.bbox[1]+lann.bbox[3]+1,
+                  ymax=lann.bbox[2]+lann.bbox[4]+1,
+                 }
+    local obj = {bndbox=bbox,
+                 class=lann.category_id,
+                 difficult = '0',
+                 name = self.classes[lann.category_id]
+                }
+    table.insert(ann.object,obj)
+  end
+  return ann
+end
+
+function DataSetCOCO:getGTBoxes(i)
+  local anno = self:getAnnotation(i)
+  local valid_objects = {}
+  local gt_boxes = torch.IntTensor()
+  local gt_classes = {}
+
+  if self.with_hard_samples then -- inversed with respect to RCNN code
+    for idx,obj in ipairs(anno.object) do
+      if self.class_to_id[obj.name] then -- to allow a subset of the classes
+        table.insert(valid_objects,idx)
+      end
+    end
+  else
+    for idx,obj in ipairs(anno.object) do
+      if obj.difficult == '0' and self.class_to_id[obj.name] then
+        table.insert(valid_objects,idx)
+      end
+    end
+  end
+  
+  gt_boxes:resize(#valid_objects,4)
+  for idx0,idx in ipairs(valid_objects) do
+    gt_boxes[idx0][1] = anno.object[idx].bndbox.xmin
+    gt_boxes[idx0][2] = anno.object[idx].bndbox.ymin
+    gt_boxes[idx0][3] = anno.object[idx].bndbox.xmax
+    gt_boxes[idx0][4] = anno.object[idx].bndbox.ymax
+    
+    table.insert(gt_classes,self.class_cont_map[anno.object[idx].class])
+  end
+
+  return gt_boxes,gt_classes,valid_objects,anno
+ 
+end
+
+
diff --git a/DataSetDetection.lua b/DataSetDetection.lua
new file mode 100644
index 0000000..a557ece
--- /dev/null
+++ b/DataSetDetection.lua
@@ -0,0 +1,113 @@
+local utilities = paths.dofile('utils.lua')
+local concat = utilities.concat
+local boxoverlap = utilities.boxoverlap
+
+local DataSetDetection = torch.class('nnf.DataSetDetection')
+DataSetDetection._isDataSet = true
+
+function DataSetDetection:__init()
+  self.classes = nil
+  self.num_classes = nil
+  self.image_set = nil
+  self.dataset_name = nil
+end
+
+function DataSetDetection:getImage(i)
+end
+
+function DataSetDetection:getAnnotation(i)
+end
+
+function DataSetDetection:getROIBoxes(i)
+end
+
+function DataSetDetection:getGTBoxes(i)
+end
+
+function DataSetDetection:size()
+  return #self.img_ids
+end
+
+function DataSetDetection:__tostring__()
+  local str = torch.type(self)
+  str = str .. '\n  Dataset Name: ' .. self.dataset_name
+  str = str .. '\n  ImageSet: '.. self.image_set
+  str = str .. '\n  Number of images: '.. self:size()
+  str = str .. '\n  Classes:'
+  for k,v in ipairs(self.classes) do
+    str = str .. '\n    '..v
+  end
+  return str
+end
+
+function DataSetDetection:bestOverlap(all_boxes, gt_boxes, gt_classes)
+  local num_total_boxes = all_boxes:size(1)
+  local num_gt_boxes = gt_boxes:dim() > 0 and gt_boxes:size(1) or 0
+  local overlap_class = torch.FloatTensor(num_total_boxes,self.num_classes):zero()
+  local overlap = torch.FloatTensor(num_total_boxes,num_gt_boxes):zero()
+  for idx=1,num_gt_boxes do
+    local o = boxoverlap(all_boxes,gt_boxes[idx])
+    local tmp = overlap_class[{{},gt_classes[idx]}] -- pointer copy
+    tmp[tmp:lt(o)] = o[tmp:lt(o)]
+    overlap[{{},idx}] = o
+  end
+  -- get max class overlap
+  --rec.overlap,rec.label = rec.overlap:max(2)
+  --rec.overlap = torch.squeeze(rec.overlap,2)
+  --rec.label   = torch.squeeze(rec.label,2)
+  --rec.label[rec.overlap:eq(0)] = 0
+  local correspondance
+  if num_gt_boxes > 0 then
+    overlap,correspondance = overlap:max(2)
+    overlap = torch.squeeze(overlap,2)
+    correspondance   = torch.squeeze(correspondance,2)
+    correspondance[overlap:eq(0)] = 0
+  else
+    overlap = torch.FloatTensor(num_total_boxes):zero()
+    correspondance = torch.LongTensor(num_total_boxes):zero()
+  end
+  return overlap, correspondance, overlap_class
+end
+
+function DataSetDetection:attachProposals(i)
+
+  local boxes = self:getROIBoxes(i)
+  local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i)
+
+  local all_boxes = concat(gt_boxes,boxes,1)
+
+  local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0
+  local num_gt_boxes = #gt_classes
+  
+  local rec = {}
+  rec.gt = concat(torch.ByteTensor(num_gt_boxes):fill(1),
+                  torch.ByteTensor(num_boxes):fill(0)    )
+  
+  rec.overlap, rec.correspondance, rec.overlap_class =
+                    self:bestOverlap(all_boxes,gt_boxes,gt_classes)
+  rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0)
+  for idx=1,(num_boxes+num_gt_boxes) do
+    local corr = rec.correspondance[idx]
+    if corr > 0 then
+      rec.label[idx] = gt_classes[corr]
+    end
+  end
+  
+  rec.boxes = all_boxes
+  rec.class = concat(torch.CharTensor(gt_classes),
+                     torch.CharTensor(num_boxes):fill(0))
+
+  if self.save_objs then
+    rec.objects = {}
+    for _,idx in pairs(valid_objects) do
+      table.insert(rec.objects,anno.object[idx])
+    end
+  end
+  
+  function rec:size()
+    return (num_boxes+num_gt_boxes)
+  end
+  
+  return rec
+end
+
diff --git a/DataSetPascal.lua b/DataSetPascal.lua
index 365f93f..9e403df 100644
--- a/DataSetPascal.lua
+++ b/DataSetPascal.lua
@@ -1,10 +1,13 @@
 local matio = require 'matio'
-local argcheck = require 'argcheck'
+local argcheck = dofile'argcheck.lua'--require 'argcheck'
 local xml = require 'xml'
+local utilities = paths.dofile('utils.lua')
+local concat = utilities.concat
+local boxoverlap = utilities.boxoverlap
 
 matio.use_lua_strings = true
 
-local DataSetPascal = torch.class('nnf.DataSetPascal')
+local DataSetPascal,parent = torch.class('nnf.DataSetPascal', 'nnf.DataSetDetection')
 
 local function lines_from(file)
 -- get all lines from a file, returns an empty 
@@ -59,6 +62,7 @@ local initcheck = argcheck{
         if type(v) ~= 'string' then
           print('classes can only be of string input');
           out = false
+          break
         end
       end
       return out
@@ -102,7 +106,7 @@ local initcheck = argcheck{
 }
 
 function DataSetPascal:__init(...)
-  
+  parent.__init(self)
   local args = initcheck(...)
   print(args)
   for k,v in pairs(args) do self[k] = v end
@@ -167,7 +171,7 @@ function DataSetPascal:size()
 end
 
 function DataSetPascal:getImage(i)
-  return image.load(string.format(self.imgpath,self.img_ids[i]))
+  return image.load(string.format(self.imgpath,self.img_ids[i]),3,'float')
 end
 
 
@@ -247,34 +251,6 @@ function DataSetPascal:getROIBoxes(i)
   return self.roidb[i]--self.roidb[self.img2roidb[self.img_ids[i] ] ]
 end
 
-local function boxoverlap(a,b)
-  local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b
-    
-  local x1 = a:select(2,1):clone()
-  x1[x1:lt(b[1])] = b[1] 
-  local y1 = a:select(2,2):clone()
-  y1[y1:lt(b[2])] = b[2]
-  local x2 = a:select(2,3):clone()
-  x2[x2:gt(b[3])] = b[3]
-  local y2 = a:select(2,4):clone()
-  y2[y2:gt(b[4])] = b[4]
-  
-  local w = x2-x1+1;
-  local h = y2-y1+1;
-  local inter = torch.cmul(w,h):float()
-  local aarea = torch.cmul((a:select(2,3)-a:select(2,1)+1) ,
-                           (a:select(2,4)-a:select(2,2)+1)):float()
-  local barea = (b[3]-b[1]+1) * (b[4]-b[2]+1);
-  
-  -- intersection over union overlap
-  local o = torch.cdiv(inter , (aarea+barea-inter))
-  -- set invalid entries to 0 overlap
-  o[w:lt(0)] = 0
-  o[h:lt(0)] = 0
-  
-  return o
-end
-
 function DataSetPascal:getGTBoxes(i)
   local anno = self:getAnnotation(i)
   local valid_objects = {}
@@ -309,113 +285,17 @@ function DataSetPascal:getGTBoxes(i)
  
 end
 
-function DataSetPascal:attachProposals(i)
-
-  if not self.roidb then
-    self:loadROIDB()
-  end
-
-  local boxes = self:getROIBoxes(i)
-  local gt_boxes,gt_classes,valid_objects,anno = self:getGTBoxes(i)
-
-  local all_boxes
-  if anno.object then
-    if #valid_objects > 0 and boxes:dim() > 0 then
-      all_boxes = torch.cat(gt_boxes,boxes,1)
-    elseif boxes:dim() == 0 then
-      all_boxes = gt_boxes
-    else
-      all_boxes = boxes
-    end
-  else
-    gt_boxes = torch.IntTensor(0,4)
-    all_boxes = boxes
-  end
-
-  local num_boxes = boxes:dim() > 0 and boxes:size(1) or 0
-  local num_gt_boxes = #gt_classes
-  
-  local rec = {}
-  if num_gt_boxes > 0 and num_boxes > 0 then
-  rec.gt = torch.cat(torch.ByteTensor(num_gt_boxes):fill(1),
-                     torch.ByteTensor(num_boxes):fill(0)    )
-  elseif num_boxes > 0 then
-    rec.gt = torch.ByteTensor(num_boxes):fill(0)
-  elseif num_gt_boxes > 0 then
-    rec.gt = torch.ByteTensor(num_gt_boxes):fill(1)
-  else
-    rec.gt = torch.ByteTensor(0)
-  end
-  
-  rec.overlap_class = torch.FloatTensor(num_boxes+num_gt_boxes,self.num_classes):fill(0)
-  rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes,num_gt_boxes):fill(0)
-  for idx=1,num_gt_boxes do
-    local o = boxoverlap(all_boxes,gt_boxes[idx])
-    local tmp = rec.overlap_class[{{},gt_classes[idx]}] -- pointer copy
-    tmp[tmp:lt(o)] = o[tmp:lt(o)]
-    rec.overlap[{{},idx}] = boxoverlap(all_boxes,gt_boxes[idx])
-  end
-  -- get max class overlap
-  --rec.overlap,rec.label = rec.overlap:max(2)
-  --rec.overlap = torch.squeeze(rec.overlap,2)
-  --rec.label   = torch.squeeze(rec.label,2)
-  --rec.label[rec.overlap:eq(0)] = 0
-  
-  if num_gt_boxes > 0 then
-    rec.overlap,rec.correspondance = rec.overlap:max(2)
-    rec.overlap = torch.squeeze(rec.overlap,2)
-    rec.correspondance   = torch.squeeze(rec.correspondance,2)
-    rec.correspondance[rec.overlap:eq(0)] = 0
-  else
-    rec.overlap = torch.FloatTensor(num_boxes+num_gt_boxes):fill(0)
-    rec.correspondance = torch.LongTensor(num_boxes+num_gt_boxes):fill(0)
-  end
-  rec.label = torch.IntTensor(num_boxes+num_gt_boxes):fill(0)
-  for idx=1,(num_boxes+num_gt_boxes) do
-    local corr = rec.correspondance[idx]
-    if corr > 0 then
-      rec.label[idx] = self.class_to_id[anno.object[valid_objects[corr] ].name]
-    end
-  end
-  
-  rec.boxes = all_boxes
-  if num_gt_boxes > 0 and num_boxes > 0 then
-  rec.class = torch.cat(torch.CharTensor(gt_classes),
-                        torch.CharTensor(num_boxes):fill(0))
-  elseif num_boxes > 0 then
-    rec.class = torch.CharTensor(num_boxes):fill(0)
-  elseif num_gt_boxes > 0 then
-    rec.class = torch.CharTensor(gt_classes)
-  else
-    rec.class = torch.CharTensor(0)
-  end
-  
-  if self.save_objs then
-    rec.objects = {}
-    for _,idx in pairs(valid_objects) do
-      table.insert(rec.objects,anno.object[idx])
-    end
-  else
-    rec.correspondance = nil
-  end
-  
-  function rec:size()
-    return (num_boxes+num_gt_boxes)
-  end
-  
-  return rec
-end
-
 function DataSetPascal:createROIs()
   if self.rois then
     return
   end
   self.rois = {}
   for i=1,self.num_imgs do
-    xlua.progress(i,self.num_imgs)
     table.insert(self.rois,self:attachProposals(i))
     if i%500 == 0 then
+      xlua.progress(i,self.num_imgs)
       collectgarbage()
     end
   end
+  xlua.progress(self.num_imgs,self.num_imgs)
 end
diff --git a/FRCNN.lua b/FRCNN.lua
new file mode 100644
index 0000000..9947127
--- /dev/null
+++ b/FRCNN.lua
@@ -0,0 +1,185 @@
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
+local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped
+local FRCNN = torch.class('nnf.FRCNN')
+FRCNN._isFeatureProvider = true
+
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+  pack=true,
+  noordered=true,
+  {name="scale",
+   type="table",
+   default={600},
+   help="image scales"},
+  {name="max_size",
+   type="number",
+   default=1000,
+   help="maximum dimension of an image"},
+  {name="inputArea",
+   type="number",
+   default=224^2,
+   help="input area of the bounding box"},
+  {name="image_transformer",
+   type="nnf.ImageTransformer",
+   default=nnf.ImageTransformer{},
+   help="Class to preprocess input images"},
+}
+
+
+function FRCNN:__init(...)
+
+  local opts = initcheck(...)
+  for k,v in pairs(opts) do self[k] = v end
+
+  self.train = true
+end
+
+function FRCNN:training()
+  self.train = true
+end
+
+function FRCNN:evaluate()
+  self.train = false
+end
+
+function FRCNN:processImages(input_imgs,do_flip)
+  local output_imgs = self._feat[1]
+  local num_images
+  local im
+  if self.train then
+    num_images = #input_imgs
+  else
+    num_images = #self.scale
+    im = self.image_transformer:preprocess(input_imgs[1])
+  end
+
+  local imgs = {}
+  local im_sizes = {}
+  local im_scales = {}
+
+  for i=1,num_images do
+    local scale
+    if self.train then
+      im = input_imgs[i]
+      im = self.image_transformer:preprocess(im)
+      scale = self.scale[math.random(1,#self.scale)]
+    else
+      scale = self.scale[i]
+    end
+    local flip = do_flip and (do_flip[i] == 1) or false
+    if flip then
+      im = image.hflip(im)
+    end
+    local im_size = im[1]:size()
+    local im_size_min = math.min(im_size[1],im_size[2])
+    local im_size_max = math.max(im_size[1],im_size[2])
+    local im_scale = scale/im_size_min
+    if torch.round(im_scale*im_size_max) > self.max_size then
+       im_scale = self.max_size/im_size_max
+    end
+    local im_s = {torch.round(im_size[1]*im_scale),torch.round(im_size[2]*im_scale)}
+    table.insert(imgs,image.scale(im,im_s[2],im_s[1]))
+    table.insert(im_sizes,im_s)
+    table.insert(im_scales,im_scale)
+  end
+  -- create single tensor with all images, padding with zero for different sizes
+  im_sizes = torch.IntTensor(im_sizes)
+  local max_shape = im_sizes:max(1)[1]
+  output_imgs:resize(num_images,3,max_shape[1],max_shape[2]):zero()
+  for i=1,num_images do
+    output_imgs[i][{{},{1,imgs[i]:size(2)},{1,imgs[i]:size(3)}}]:copy(imgs[i])
+  end
+  return im_scales,im_sizes
+end
+
+function FRCNN:projectImageROIs(im_rois,scales,do_flip,imgs_size)
+  local rois = self._feat[2]
+  -- we consider two cases:
+  -- During training, the scales are sampled randomly per image, so
+  -- in the same image all the bboxes have the same scale, and we only
+  -- need to take into account the different images that are provided.
+  -- During testing, we consider that there is only one image at a time,
+  -- and the scale for each bbox is the one which makes its area closest
+  -- to self.inputArea
+  if self.train or #scales == 1 then
+    local total_bboxes = 0
+    local cumul_bboxes = {0}
+    for i=1,#scales do
+      total_bboxes = total_bboxes + im_rois[i]:size(1)
+      table.insert(cumul_bboxes,total_bboxes)
+    end
+    rois:resize(total_bboxes,5)
+    for i=1,#scales do
+      local idx = {cumul_bboxes[i]+1,cumul_bboxes[i+1]}
+      rois[{idx,1}]:fill(i)
+      rois[{idx,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[i]):add(1)
+      if do_flip and do_flip[i] == 1 then
+        flipBoundingBoxes(rois[{idx,{2,5}}],imgs_size[{i,2}])
+      end
+    end
+  else -- not yet tested
+    error('Multi-scale testing not yet tested')
+    local scales = torch.FloatTensor(scales)
+    im_rois = im_rois[1]
+    local widths = im_rois[{{},3}] - im_rois[{{},1}] + 1
+    local heights = im_rois[{{},4}] - im_rois[{{}, 2}] + 1
+
+    local areas = widths * heights
+    local scaled_areas = areas:view(-1,1) * scales:view(1,-1):pow(2)
+    local diff_areas = scaled_areas:add(-1,self.inputArea):abs() -- no memory copy
+    local levels = select(2, diff_areas:min(2))
+
+    local num_boxes = im_rois:size(1)
+    rois:resize(num_boxes,5)
+    for i=1,num_boxes do
+      local s = levels[i]
+      rois[{i,{2,5}}]:copy(im_rois[i]):add(-1):mul(scales[s]):add(1)
+      rois[{i,1}] = s
+    end
+  end
+  return rois
+end
+
+function FRCNN:getFeature(imgs,bboxes,flip)
+  self._feat = self._feat or {torch.FloatTensor(),torch.FloatTensor()}
+
+  -- if it's in test mode, adapt inputs
+  if torch.isTensor(imgs) then
+    imgs = {imgs}
+    if type(bboxes) == 'table' then
+      bboxes = torch.FloatTensor(bboxes)
+      bboxes = bboxes:dim() == 1 and bboxes:view(1,-1) or bboxes
+    end
+    bboxes = {bboxes}
+    if flip == false then
+      flip = {0}
+    elseif flip == true then
+      flip = {1}
+    end
+  end
+
+  local im_scales, im_sizes = self:processImages(imgs,flip)
+  self:projectImageROIs(bboxes,im_scales,flip,im_sizes)
+  
+  return self._feat
+end
+
+-- do the bbox regression
+function FRCNN:postProcess(im,boxes,output)
+  -- not implemented yet
+  return output,boxes
+end
+
+function FRCNN:compute(model, inputs)
+  local ttype = model.output:type() -- fix when doing bbox regression
+  self.inputs,inputs = recursiveResizeAsCopyTyped(self.inputs,inputs,ttype)
+  return model:forward(self.inputs)
+end
+
+function FRCNN:__tostring()
+  local str = torch.type(self)
+  str = str .. '\n  Image scales: [' .. table.concat(self.scale,', ')..']'
+  str = str .. '\n  Max image size: ' .. self.max_size
+  str = str .. '\n  Input area: ' .. self.inputArea
+  return str
+end
diff --git a/ImageDetect.lua b/ImageDetect.lua
new file mode 100644
index 0000000..d3140df
--- /dev/null
+++ b/ImageDetect.lua
@@ -0,0 +1,22 @@
+local ImageDetect = torch.class('nnf.ImageDetect')
+local recursiveResizeAsCopyTyped = paths.dofile('utils.lua').recursiveResizeAsCopyTyped
+
+function ImageDetect:__init(model, feat_provider)
+  self.model = model
+  self.feat_provider = feat_provider
+  --self.sm = nn.SoftMax():cuda()
+end
+
+-- supposes boxes is in [x1,y1,x2,y2] format
+function ImageDetect:detect(im,boxes)
+  local feat_provider = self.feat_provider
+
+  local inputs = feat_provider:getFeature(im,boxes)
+
+  local output0 = feat_provider:compute(self.model, inputs)
+  local output,boxes_p = feat_provider:postProcess(im,boxes,output0)
+  --self.sm:forward(output0)
+
+  self.output,output = recursiveResizeAsCopyTyped(self.output,output,'torch.FloatTensor')
+  return self.output,boxes_p
+end
diff --git a/ImageTransformer.lua b/ImageTransformer.lua
index d7b213b..3bdb175 100644
--- a/ImageTransformer.lua
+++ b/ImageTransformer.lua
@@ -37,3 +37,12 @@ function ImageTransformer:preprocess(I)
   return I
 end
 
+function ImageTransformer:__tostring()
+  local str = torch.type(self)
+  if self.swap then
+    str = str .. '\n  Channel swap: [' .. table.concat(self.swap,', ') .. ']'
+  end
+  str = str .. '\n  Raw scale: '.. self.raw_scale
+  str = str .. '\n  Mean pixel: [' .. table.concat(self.mean_pix,', ') .. ']'
+  return str
+end
diff --git a/RCNN.lua b/RCNN.lua
index 03651d3..13b87a9 100644
--- a/RCNN.lua
+++ b/RCNN.lua
@@ -1,53 +1,53 @@
-local RCNN = torch.class('nnf.RCNN')
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
 
-function RCNN:__init(dataset)
-  self.dataset = dataset
-  self.image_transformer = nnf.ImageTransformer{
-                                  mean_pix={123.68/255,116.779/255,103.939/255}}
-  
-  self.crop_size = 227
-  self.image_mean = nil
-  self.padding = 16
-  self.use_square = false
-  
-end
+local argcheck = require 'argcheck'
+local initcheck = argcheck{
+  pack=true,
+  noordered=true,
+  {name="crop_size",
+   type="number",
+   default=227,
+   help="crop size"},
+  {name="padding",
+   type="number",
+   default=16,
+   help="context padding"},
+  {name="use_square",
+   type="boolean",
+   default=false,
+   help="force square crops"},
+  {name="image_transformer",
+   type="nnf.ImageTransformer",
+   default=nnf.ImageTransformer{},
+   help="Class to preprocess input images"},
+  {name="max_batch_size",
+   type="number",
+   default=128,
+   help="maximum size of batches during evaluation"},
+  {name="num_threads",
+   type="number",
+   default=8,
+   help="number of threads for bounding box cropping"},
+  {name="iter_per_thread",
+   type="number",
+   default=8,
+   help="number of bbox croppings per thread"},
+  {name="dataset",
+   type="nnf.DataSetPascal", -- change to allow other datasets
+   opt=true,
+   help="A dataset class"},
+}
 
-function RCNN:getCrop(im_idx,bbox,flip)
-  -- suppose I is in BGR, as image_mean
-  -- [x1 y1 x2 y2] order
-  local flip = flip==nil and false or flip
-  
-  if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
-    self.curr_im_idx = im_idx
-    self.curr_im_feats = self.dataset:getImage(im_idx):float()
-    self.curr_im_feats = self.image_transformer:preprocess(self.curr_im_feats)
-    if flip then
-      self.curr_im_feats = image.hflip(self.curr_im_feats)
-    end
-    self.curr_doflip = flip
-  end
-  
-  local I = self.curr_im_feats
-  local bbox = bbox
-  
-  if flip then
-    local tt = bbox[1]
-    bbox[1] = I:size(3)-bbox[3]+1
-    bbox[3] = I:size(3)-tt     +1
-  end
-  
-  local crop_size = self.crop_size
-  local image_mean = self.image_mean
-  local padding = self.padding
-  local use_square = self.use_square
 
+local RCNN = torch.class('nnf.RCNN')
+RCNN._isFeatureProvider = true
+
+local function RCNNCrop(output,I,box,crop_size,padding,use_square,crop_buffer)
   local pad_w = 0;
   local pad_h = 0;
   local crop_width = crop_size;
   local crop_height = crop_size;
-
-  --local bbox = {bbox[2],bbox[1],bbox[4],bbox[3]}
-
+  local bbox = {box[1],box[2],box[3],box[4]}
   ------
   if padding > 0 or use_square then
     local scale = crop_size/(crop_size - padding*2)
@@ -98,30 +98,177 @@ function RCNN:getCrop(im_idx,bbox,flip)
   end -- padding > 0 || square
   ------
 
-  --local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]);
-  local patch = image.crop(I,bbox[1],bbox[2],bbox[3],bbox[4]):float();
-  local tmp = image.scale(patch,crop_width,crop_height,'bilinear');
+  local patch = I[{{},{bbox[2],bbox[4]},{bbox[1],bbox[3]}}]
+  crop_buffer:resize(3,crop_height,crop_width)
+  image.scale(crop_buffer,patch,'bilinear');
+
+  output[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = crop_buffer
 
-  if image_mean then
-    tmp = tmp - image_mean[{{},{pad_h+1,pad_h+crop_height},
-                               {pad_w+1,pad_w+crop_width}}]
+end
+
+
+function RCNN:__init(...)
+  
+  local opts = initcheck(...)
+  for k,v in pairs(opts) do self[k] = v end
+
+  self.output_size = {3,self.crop_size,self.crop_size}
+  self.train = true
+
+  if self.num_threads > 1 then
+    local crop_size = self.crop_size
+    local threads = require 'threads'
+    threads.serialization('threads.sharedserialize')
+    self.donkeys = threads.Threads(
+      self.num_threads,
+      function()
+        require 'torch'
+        require 'image'
+      end,
+      function(idx)
+        RCNNCrop = RCNNCrop
+        torch.setheaptracking(true)
+        crop_buffer = torch.FloatTensor(3,crop_size,crop_size)
+        print(string.format('Starting RCNN thread with id: %d', idx))
+      end
+      )
   end
+end
+
+function RCNN:training()
+  self.train = true
+end
+
+function RCNN:evaluate()
+  self.train = false
+end
 
-  --patch = torch.zeros(3,crop_size,crop_size):typeAs(I)
-  patch = torch.zeros(3,crop_size,crop_size):float()
+function RCNN:getCrop(output,I,bbox)
+  -- [x1 y1 x2 y2] order
+
+  local crop_size = self.crop_size
+  local padding = self.padding
+  local use_square = self.use_square
 
-  patch[{{},{pad_h+1,pad_h+crop_height}, {pad_w+1,pad_w+crop_width}}] = tmp
+  self._crop_buffer = self._crop_buffer or torch.FloatTensor(3,crop_size,crop_size)
+  RCNNCrop(output,I,bbox,crop_size,padding,use_square,self._crop_buffer)
 
-  return patch
+  return output
 
 end
 
-function RCNN:getFeature(im_idx,bbox,flip)
+function RCNN:getFeature(im,bbox,flip)
   local flip = flip==nil and false or flip
+
+  if type(im) == 'number' then
+    assert(self.dataset, 'you must provide a dataset if using numeric indices')
+    im = self.dataset:getImage(im)
+  end
+
+  if torch.type(im) ~= 'torch.FloatTensor' then
+    -- force image to be float
+    self._im = self._im or torch.FloatTensor()
+    self._im:resize(im:size()):copy(im)
+    im = self._im
+  end
+
+  if type(bbox) == 'table' then
+    bbox = torch.FloatTensor(bbox)
+  elseif torch.isTensor(bbox) and flip then
+    -- creates a copy of the bboxes to avoid modifying the original
+    -- bboxes in the flipping
+    self._bbox = self._bbox or torch.FloatTensor()
+    self._bbox:resize(bbox:size()):copy(bbox)
+    bbox = self._bbox
+  end
   
-  local crop_feat = self:getCrop(im_idx,bbox,flip)
+  im = self.image_transformer:preprocess(im)
+  bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox
+  local num_boxes = bbox:size(1)
+
+  if flip then
+    im = image.hflip(im)
+    flipBoundingBoxes(bbox,im:size(3))
+  end
+
+  self._feat = self._feat or torch.FloatTensor()
+
+  self._feat:resize(num_boxes,table.unpack(self.output_size)):zero()
+
+  -- use threads to speed up bbox processing
+  if self.num_threads > 1 and num_boxes > self.iter_per_thread then
+    local feat = self._feat
+    local img = im
+    local bndbox = bbox
+    local crop_size = self.crop_size
+    local padding = self.padding
+    local use_square = self.use_square
+    local iter_per_thread = self.iter_per_thread
+    local num_launches = math.ceil(num_boxes/iter_per_thread)
+    for i=1,num_launches do
+      local iter_per_thread_local
+      if i == num_launches then
+        -- last thread launches the remainder of the bboxes
+        iter_per_thread_local = (num_boxes-1)%iter_per_thread + 1
+      else
+        iter_per_thread_local = iter_per_thread
+      end
+      self.donkeys:addjob(
+      function()
+        for j=1,iter_per_thread_local do
+          local f = feat[(i-1)*iter_per_thread+j]
+          local boundingbox = bndbox[(i-1)*iter_per_thread+j]
+          -- crop_buffer is global in each thread
+          RCNNCrop(f,img,boundingbox,crop_size,padding,use_square,crop_buffer)
+        end
+        --collectgarbage()
+        return
+      end
+      )
+    end
+    self.donkeys:synchronize()
+
+  else
+    for i=1,num_boxes do
+      self:getCrop(self._feat[i],im,bbox[i])
+    end
+  end
   
-  return crop_feat
+  return self._feat
+end
+
+-- don't do anything. could be the bbox regression or SVM, but I won't add it here
+function RCNN:postProcess(im,bbox,output)
+  return output,bbox
 end
 
+function RCNN:compute(model,inputs)
+  local inputs_s = inputs:split(self.max_batch_size,1)
 
+  self.output = self.output or inputs.new()
+
+  local ttype = model.output:type()
+  self.inputs = self.inputs or torch.Tensor():type(ttype)
+
+  for idx, f in ipairs(inputs_s) do
+    self.inputs:resize(f:size()):copy(f)
+    local output0 = model:forward(self.inputs)
+    local fs = f:size(1)
+    if idx == 1 then
+      local ss = output0[1]:size():totable()
+      self.output:resize(inputs:size(1),table.unpack(ss))
+    end
+    self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0)
+  end
+  return self.output
+end
+
+function RCNN:__tostring()
+  local str = torch.type(self)
+  str = str .. '\n  Crop size: ' .. self.crop_size
+  str = str .. '\n  Context padding: ' .. self.padding
+  if self.use_square then
+    str = str .. '\n  Use square: true'
+  end
+  return str
+end
diff --git a/README.md b/README.md
index b1525db..eb80c08 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,209 @@
 ## Object detection in torch
 
-Implementation of some object detection frameworks in [torch](http://torch.ch).
+This library aims to provide a simple architecture to easily perform object detection in [torch](http://torch.ch).
+It currently contains code for training the following frameworks: [RCNN](http://arxiv.org/abs/1311.2524), [SPP](http://arxiv.org/abs/1406.4729) and [Fast-RCNN](http://arxiv.org/abs/1504.08083).
+
+It consists of 7 basic classes:
+
+* ImageTransformer: Preprocess an image before feeding it to the network
+* DataSetDetection: Generic dataset class for object detection.
+  * DataSetPascal
+  * DataSetCOCO (not finished)
+* [FeatureProvider](#feat_provider): Implements the necessary operations on images and bounding boxes
+  * [RCNN](#rcnn)
+  * [SPP](#spp)
+  * [Fast-RCNN](#frcnn)
+* [BatchProvider](#batch_provider): Samples random patches
+  * [BatchProviderRC](#batch_provider_rc): ROI-Centric
+  * [BatchProviderIC](#batch_provider_ic): Image-Centric
+* ImageDetect: Encapsulates a model and a feature provider to perform the detection
+* Trainer: Simple class to perform the model training.
+* Tester: Evaluate the detection using Pascal VOC approach.
+
+<a name="feat_provider"></a>
+### Feature Provider
+The `FeatureProvider` class defines the way different algorithms process an image and a set of bounding boxes to feed it to the CNN.
+It implements a `getFeature(image, boxes [,flip])` function, which computes the necessary transformations in the input data (the optional `flip` argument horizontaly flips the image and the bounding box correspondingly), a `postProcess()`, which takes the output of the network plus the original inputs and post-process them. This post-processing could be a bounding box regression step, for example.
+Every Feature Provider constructor take as input a `ImageTransformer`, and a `max_batch_size` (used for evaluation).
+
+<a name="rcnn"></a>
+#### RCNN
+This is the first work that used CNNs for object detection using bounding box proposals.
+The transformation is the simplest one. It crops the image at the specified positions given by the bounding boxes, and rescale them to be square.
+The constructor has the following arguments:
+  * `crop_size`
+  * `padding`
+  * `use_square`
+  * `num_threads` number of parallel threads
+
+<a name="spp"></a>
+#### SPP
+Contrary to RCNN, SPP crops the images in the feature space (here, `conv5`). It allows to compute the convolutional features once for the entire image, making it much more efficient.
+The constructor has the following arguments:
+  * `model`
+  * `pooling_scales`
+  * `num_feat_chns`
+  * `scales`: image scales
+  * `sz_conv_standard`
+  * `step_standard`
+  * `offset0`
+  * `offset`
+  * `inputArea`
+  * `use_cache`
+  * `cache_dir`
+
+SPP allows faster training/testing by caching the convolutional feature maps. You can provide to `getFeature` instead of an image `I` an image index `i` (from a `DataSetDetection` object), which will load the corresponding feature map from disk (if already computed and if `use_cache` is set to `true`). To easily cache all features of a dataset in disk, use the method `:saveConvCache()`.
+
+<a name="frcnn"></a>
+#### Fast-RCNN
+Similar to SPP, Fast-RCNN also crops the images in the feature space, but instead of keeping the convolutional layers fixed, they allow it to train together with the fully-connected layers.
+The constructor has the following arguments:
+  * `scale`
+  * `max_size`
+  * `inputArea`
+
+The output of `getFeature()` is a table with two entries, the preprocessed image/images as the first element, and the projected bounding boxes. An example of a CNN model structure which can be used with Fast-RCNN is as follows:
+```lua
+-- define features and classifier as you wish.
+-- Can use loadcaffe to read from a saved model, for example
+features   = torch.load('alexnet_features.t7')
+classifier = torch.load('alexnet_classifier.t7')
+
+-- define the ROIPooling layer
+-- can use either inn.ROIPooling or nnf.ROIPooling (with CPU support)
+-- let's just use standard parameters from Fast-RCNN paper
+local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16)
+
+-- create parallel model which takes as input the images and
+-- bounding boxes, and pass the images through the convolutional
+-- features and simply copy the bounding boxes
+local prl = nn.ParallelTable()
+prl:add(features)
+prl:add(nn.Identity())
+
+-- this is the final model
+model = nn.Sequential()
+model:add(prl)
+model:add(ROIPooling)
+model:add(nn.View(-1):setNumInputDims(3))
+model:add(classifier)
+```
+
+<a name="batch_provider"></a>
+### Batch Provider
+This class implements sampling strategies for training Object Detectors.
+In its constructor, it takes as argument a `DataSetDetection`, and a `FeatureProvider`.
+It implements a `getBatch` function, which samples from the `DataSet` using `FeatureProvider`.
+The following arguments are present for all derived classes:
+  * `DataSetDetection`
+  * `FeatureProvider`
+  * `batch_size`
+  * `fg_fraction`
+  * `fg_threshold`
+  * `bg_threshold`
+  * `do_flip`
+
+<a name="batch_provider_rc"></a>
+#### BatchProviderRC
+ROI-Centric Batch Provider, it samples the patches randomly over all the pool of patches.
+To minimize the number of disk access, it reads the data for a specified number of batches and store it in memory.
+The constructor take the following optional arguments:
+  * `iter_per_batch`
+  * `nTimesMoreData`
+
+<a name="batch_provider_ic"></a>
+#### BatchProviderIC
+Image-Centric Batch Provider, it first samples a set of images, and then a set of patches is sampled on those sampled images.
+The constructor take the following optional arguments:
+  * `imgs_per_batch`
+
+### Examples
+Here we show a simple example demonstrating how to perform object detection given an image and a set of bounding boxes.
+Run it using `qlua` for the visualization part. A pre-trained model for Fast-RCNN can be found [here](https://drive.google.com/file/d/0B-TTdm1WNtyba3I4Vm1hbFRSS2c/view?usp=sharing).
+```lua
+require 'nnf'
+require 'image'
+require 'cudnn'
+require 'inn'
+require 'nn'
+
+-- load pre-trained Fast-RCNN model
+params = torch.load('cachedir/frcnn_alexnet.t7')
+loadModel = dofile 'models/frcnn_alexnet.lua'
+model = loadModel(params)
+
+model:add(nn.SoftMax())
+
+model:evaluate()
+model:cuda()
+
+-- prepare detector
+image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+                                        raw_scale = 255,
+                                        swap = {3,2,1}}
+feat_provider = nnf.FRCNN{image_transformer=image_transformer}
+feat_provider:evaluate() -- testing mode
+
+detector = nnf.ImageDetect(model, feat_provider)
+
+-- Load an image
+I = image.lena()
+-- generate some random bounding boxes
+torch.manualSeed(500) -- fix seed for reproducibility
+bboxes = torch.Tensor(100,4)
+bboxes:select(2,1):random(1,I:size(3)/2)
+bboxes:select(2,2):random(1,I:size(2)/2)
+bboxes:select(2,3):random(I:size(3)/2+1,I:size(3))
+bboxes:select(2,4):random(I:size(2)/2+1,I:size(2))
+
+-- detect !
+scores, bboxes = detector:detect(I, bboxes)
+
+-- visualization
+dofile 'visualize_detections.lua'
+threshold = 0.5
+-- classes from Pascal used for training the model
+cls = {'aeroplane','bicycle','bird','boat','bottle','bus','car',
+  'cat','chair','cow','diningtable','dog','horse','motorbike',
+  'person','pottedplant','sheep','sofa','train','tvmonitor'}
+
+w = visualize_detections(I,bboxes,scores,threshold,cls)
+
+```
+This outputs the following
+
+![Lena](examples/example_frcnn_lena.jpg)
+
+
+For an illustration on how to use this code to train a detector, or to evaluate it on Pascal, see the [examples](http://github.com/fmassa/object-detection.torch/tree/refactoring/examples).
+
+#### Bounding box proposals
+Note that this repo doesn't contain code for generating bounding box proposals. For the moment, they are pre-computed and loaded at run time.
+
+#### Model definition
+All the detection framework implemented here supposes that you already have a pre-trained classification network (trained for example on ImageNet). They reuse this pre-trained network as an initialization for the subsequent fine-tuning.
+
+In `models/` you will find the model definition for several classic networks used in object detection.
+
+The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0).
+It is supposed to be at `data/models`
+If you want to use your own model in SPP framework, make sure that it follows the pattern
+```
+model = nn.Sequential()
+model:add(features)
+model:add(pooling_layer)
+model:add(classifier)
+```
+where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example.
 
 ### Dependencies
 
 It requires the following packages
 
- - [xml](http://doc.lubyk.org/xml.html)
- - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch)
- - [hdf5](https://github.com/deepmind/torch-hdf5)
- - [inn](https://github.com/szagoruyko/imagine-nn)
+ - [xml](http://doc.lubyk.org/xml.html) (For `DataSetPascal`)
+ - [matio-ffi.torch](https://github.com/soumith/matio-ffi.torch) (For `DataSetPascal`)
+ - [hdf5](https://github.com/deepmind/torch-hdf5) (for `SPP`)
+ - [inn](https://github.com/szagoruyko/imagine-nn) (for `SPP`)
 
 To install them all, do
 
@@ -28,6 +222,10 @@ luarocks install matio
 
 To install `hdf5`, follow the instructions in [here](https://github.com/deepmind/torch-hdf5/blob/master/doc/usage.md)
 
+### Old code
+The old version of this repo can be found [here](https://github.com/fmassa/object-detection.torch/tree/legacy).
+
+
 ### Running this code
 
 First, clone this repo
@@ -35,27 +233,5 @@ First, clone this repo
 git clone https://github.com/fmassa/object-detection.torch.git
 ```
 
-The zeiler pretrained model is available at [https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0](https://drive.google.com/open?id=0B-TTdm1WNtybdzdMUHhLc05PSE0&authuser=0).
-It is supposed to be at `data/models`.
-If you want to use your own model in SPP framework, make sure that it follows the pattern
-```
-model = nn.Sequential()
-model:add(features)
-model:add(pooling_layer)
-model:add(classifier)
-```
-where `features` can be a `nn.Sequential` of several convolutions and `pooling_layer` is the last pooling with reshaping of the data to feed it to the classifer. See `models/zeiler.lua` for an example.
-
-To finetune the network for detection, simply run
-```
-th main.lua
-```
-
-To get an overview of the different parameters, do
-```
-th main.lua -h
-```
-
 The default is to consider that the dataset is present in `datasets/VOCdevkit/VOC2007/`.
 The default location of bounding boxes `.mat` files (in RCNN format) is supposed to be in `data/selective_search_data/`.
-
diff --git a/ROIPooling.lua b/ROIPooling.lua
new file mode 100644
index 0000000..3ca6d82
--- /dev/null
+++ b/ROIPooling.lua
@@ -0,0 +1,86 @@
+local ROIPooling,parent = torch.class('nnf.ROIPooling','nn.Module')
+
+function ROIPooling:__init(W,H)
+  parent.__init(self)
+  self.W = W
+  self.H = H
+  self.pooler = {}--nn.SpatialAdaptiveMaxPooling(W,H)
+  self.spatial_scale = 1
+  self.gradInput = {torch.Tensor()}
+end
+
+function ROIPooling:setSpatialScale(scale)
+  self.spatial_scale = scale
+  return self
+end
+
+function ROIPooling:updateOutput(input)
+  local data = input[1]
+  local rois = input[2]
+
+  local num_rois = rois:size(1)
+  local s = data:size()
+  local ss = s:size(1)
+  self.output:resize(num_rois,s[ss-2],self.H,self.W)
+
+  rois[{{},{2,5}}]:add(-1):mul(self.spatial_scale):add(1):round()
+  rois[{{},2}]:cmin(s[ss])
+  rois[{{},3}]:cmin(s[ss-1])
+  rois[{{},4}]:cmin(s[ss])
+  rois[{{},5}]:cmin(s[ss-1])
+
+  -- element access is faster if not a cuda tensor
+  if rois:type() == 'torch.CudaTensor' then
+    self._rois = self._rois or torch.FloatTensor()
+    self._rois:resize(rois:size()):copy(rois)
+    rois = self._rois
+  end
+
+  if not self._type then self._type = self.output:type() end
+
+  if #self.pooler < num_rois then
+    local diff = num_rois - #self.pooler
+    for i=1,diff do
+      table.insert(self.pooler,nn.SpatialAdaptiveMaxPooling(self.W,self.H):type(self._type))
+    end
+  end
+
+  for i=1,num_rois do
+    local roi = rois[i]
+    local im_idx = roi[1]
+    local im = data[{im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}]
+    self.output[i] = self.pooler[i]:updateOutput(im)
+  end
+  return self.output
+end
+
+function ROIPooling:updateGradInput(input,gradOutput)
+  local data = input[1]
+  local rois = input[2]
+  if rois:type() == 'torch.CudaTensor' then
+    rois = self._rois
+  end
+  local num_rois = rois:size(1)
+  local s = data:size()
+  local ss = s:size(1)
+  self.gradInput[1]:resizeAs(data):zero()
+
+  for i=1,num_rois do
+    local roi = rois[i]
+    local im_idx = roi[1]
+    local r = {im_idx,{},{roi[3],roi[5]},{roi[2],roi[4]}}
+    local im = data[r]
+    local g  = self.pooler[i]:updateGradInput(im,gradOutput[i])
+    self.gradInput[1][r]:add(g)
+  end
+  return self.gradInput
+end
+
+function ROIPooling:type(type)
+  parent.type(self,type)
+  for i=1,#self.pooler do
+    self.pooler[i]:type(type)
+  end
+  self._type = type
+  return self
+end
diff --git a/SPP.lua b/SPP.lua
index cfd67a1..4456c2c 100644
--- a/SPP.lua
+++ b/SPP.lua
@@ -1,18 +1,89 @@
 local hdf5 = require 'hdf5'
+local flipBoundingBoxes = paths.dofile('utils.lua').flipBoundingBoxes
 
 local SPP = torch.class('nnf.SPP')
-
---TODO vectorize code ?
-function SPP:__init(dataset,model)
+SPP._isFeatureProvider = true
+
+-- argcheck crashes with that many arguments, and using unordered
+-- doesn't seems practical
+
+local argcheck = paths.dofile('argcheck.lua')--require 'argcheck'
+local initcheck = argcheck{
+  pack=true,
+  {name="model",
+   type="nn.Sequential",
+   help="conv5 model"},
+  {name="dataset",
+   type="nnf.DataSetPascal", -- change to allow other datasets
+   opt=true,
+   help="A dataset class"},
+  {name="pooling_scales",
+   type="table",
+   default={{1,1},{2,2},{3,3},{6,6}},
+   help="pooling scales"},
+  {name="num_feat_chns",
+   type="number",
+   default=256,
+   help="number of feature channels to be pooled"},
+  {name="scales",
+   type="table",
+   default={480,576,688,874,1200},
+   help="image scales"},
+  {name="sz_conv_standard",
+   type="number",
+   default=13,
+   help=""},
+  {name="step_standard",
+   type="number",
+   default=16,
+   help=""},
+  {name="offset0",
+   type="number",
+   default=21,
+   help=""},
+  {name="offset",
+   type="number",
+   default=6.5,
+   help=""},
+  {name="inputArea",
+   type="number",
+   default=224^2,
+   help="input area"},
+  {name="image_transformer",
+   type="nnf.ImageTransformer",
+   default=nnf.ImageTransformer{},
+   help="Class to preprocess input images"},
+  {name="use_cache",
+   type="boolean",
+   default=true,
+   help=""},
+  {name="cachedir",
+   type="string",
+   opt=true,
+   help=""},
+}
+
+
+
+function SPP:__init(...)
 
   self.dataset = dataset
   self.model = model
-  self.spp_pooler = inn.SpatialPyramidPooling({{1,1},{2,2},{3,3},{6,6}}):float()
-  self.image_transformer = nnf.ImageTransformer{}
 
+  local opts = initcheck(...)
+  for k,v in pairs(opts) do self[k] = v end
+
+  --self.num_feat_chns = 256
+  --self.pooling_scales = {{1,1},{2,2},{3,3},{6,6}}
+  local pyr = torch.Tensor(self.pooling_scales):t()
+  local pooled_size = pyr[1]:dot(pyr[2])
+  self.output_size = {self.num_feat_chns*pooled_size}
+
+  --self.spp_pooler = inn.SpatialPyramidPooling(self.pooling_scales):float()
+  --self.image_transformer = nnf.ImageTransformer{}
+--[[
 -- paper=864, their code=874 
   self.scales = {480,576,688,874,1200} -- 874
-  self.randomscale = true
   
   self.sz_conv_standard = 13
   self.step_standard = 16
@@ -24,11 +95,20 @@ function SPP:__init(dataset,model)
   self.use_cache = true
 
   self.cachedir = nil
-  
+  --]]
+  self.train = true
 end
 
+function SPP:training()
+  self.train = true
+end
 
-function SPP:getCrop(im_idx,bbox,flip)
+function SPP:evaluate()
+  self.train = false
+end
+
+-- here just to check
+function SPP:getCrop_old(im_idx,bbox,flip)
   local flip = flip or false
   
   if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
@@ -36,52 +116,87 @@ function SPP:getCrop(im_idx,bbox,flip)
     self.curr_im_feats = self:getConv5(im_idx,flip)
     self.curr_doflip = flip
   end
-  
-  local bbox = bbox
+
   if flip then
-    local tt = bbox[1]
-    bbox[1] = self.curr_im_feats.imSize[3]-bbox[3]+1
-    bbox[3] = self.curr_im_feats.imSize[3]-tt     +1
+    flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3])
   end
   
   local bestScale,bestBbox = self:getBestSPPScale(bbox,self.curr_im_feats.imSize,self.curr_im_feats.scales)
   local box_norm = self:getResposeBoxes(bestBbox)
 
   local crop_feat = self:getCroppedFeat(self.curr_im_feats.rsp[bestScale],box_norm)
+
+  return crop_feat
+end
+
+function SPP:getCrop(im_idx,bbox,flip)
+  local flip = flip or false
+  
+  if self.curr_im_idx ~= im_idx or self.curr_doflip ~= flip then
+    self.curr_im_idx = im_idx
+    self.curr_im_feats = self:getConv5(im_idx,flip)
+    self.curr_doflip = flip
+  end
+
+  if type(bbox) == 'table' then
+    bbox = torch.FloatTensor(bbox)
+  elseif torch.isTensor(bbox) and flip then
+    -- creates a copy of the bboxes to avoid modifying the original
+    -- bboxes in the flipping
+    self._bbox = self._bbox or torch.FloatTensor()
+    self._bbox:resize(bbox:size()):copy(bbox)
+    bbox = self._bbox
+  end
+  bbox = bbox:dim() == 1 and bbox:view(1,-1) or bbox
+
+  if flip then
+    flipBoundingBoxes(bbox,self.curr_im_feats.imSize[3])
+  end
+  
+  local feat = self.curr_im_feats
+  local bestScale,bestbboxes,bboxes_norm,projected_bb =
+            self:projectBoxes(feat, bbox, feat.scales)
+
+  local crop_feat = {}
+  for i=1,bbox:size(1) do
+    local bbox_ = projected_bb[i]
+    local patch = feat.rsp[bestScale[i]][{{},{bbox_[2],bbox_[4]},{bbox_[1],bbox_[3]}}]
+    table.insert(crop_feat,patch)
+  end
   
   return crop_feat  
 end
 
-function SPP:getFeature(im_idx,bbox,flip)
+-- here just to check
+function SPP:getFeature_old(im_idx,bbox,flip)
   local flip = flip or false
 
-  local crop_feat = self:getCrop(im_idx,bbox,flip)
+  local crop_feat = self:getCrop_old(im_idx,bbox,flip)
 
   local feat = self.spp_pooler:forward(crop_feat)
-
   return feat
 end
 
 
-local function cleaningForward(input,model)
-  local currentOutput = model.modules[1]:updateOutput(input)
-  for i=2,#model.modules do
-    collectgarbage()
-    collectgarbage()
-    currentOutput = model.modules[i]:updateOutput(currentOutput)
-    model.modules[i-1].output:resize()
-    model.modules[i-1].gradInput:resize()
-    if model.modules[i-1].gradWeight then
-      model.modules[i-1].gradWeight:resize()
-    end
-    if model.modules[i-1].gradBias then
-      model.modules[i-1].gradBias:resize()
-    end
+function SPP:getFeature(im_idx,bbox,flip)
+  local flip = flip or false
+
+  local crop_feat = self:getCrop(im_idx,bbox,flip)
+
+  self._feat = self._feat or torch.FloatTensor()
+  self._feat:resize(#crop_feat,table.unpack(self.output_size))
+  for i=1,#crop_feat do
+    self._feat[i]:copy(self.spp_pooler:forward(crop_feat[i]))
   end
-  model.output = currentOutput
-  return currentOutput
+
+  return self._feat
 end
 
+-- SPP is meant to keep a cache of the conv5 features
+-- for fast training. In this case, we suppose that
+-- we provide the image index in the dataset.
+-- We can also use an image as input, in which case it
+-- won't save a conv5 cache.
 function SPP:getConv5(im_idx,flip)
   local scales = self.scales
   local flip = flip or false
@@ -93,8 +208,16 @@ function SPP:getConv5(im_idx,flip)
   if not cachedir then
     cachedir = ''
   end
+
+  local im_name
+  if not self.dataset then
+    self.use_cache = false
+    im_name = ''
+  else
+    im_name = self.dataset.img_ids[im_idx]
+  end
   
-  local cachefile = paths.concat(self.cachedir,self.dataset.img_ids[im_idx])
+  local cachefile = paths.concat(cachedir,im_name)
 
   if flip then
     cachefile = cachefile..'_flip'
@@ -110,7 +233,12 @@ function SPP:getConv5(im_idx,flip)
       feats.rsp[tostring(i)] = nil
     end
   else
-    local I = self.dataset:getImage(im_idx):float()
+    local I
+    if type(im_idx) == 'number' and self.dataset then
+      I = self.dataset:getImage(im_idx):float()
+    elseif torch.isTensor(im_idx) then
+      I = im_idx
+    end
     I = self.image_transformer:preprocess(I)
     if flip then
       I = image.hflip(I)
@@ -129,7 +257,6 @@ function SPP:getConv5(im_idx,flip)
       local Ir = image.scale(I,sc,sr):type(mtype)
       
       local f = self.model:forward(Ir)
-      --local f = cleaningForward(Ir,self.model)
       
       feats.rsp[i] = torch.FloatTensor(f:size()):copy(f)
     end
@@ -180,7 +307,8 @@ function SPP:getBestSPPScale(bbox,imSize,scales)
 
   local bestScale
 
-  if self.randomscale then
+  if self.train then
+    -- in training, select the scales randomly
     bestScale = torch.random(1,num_scales)
   else
     local inputArea = self.inputArea
@@ -253,6 +381,141 @@ function SPP:getCroppedFeat(feat,bbox)
 
 end
 
+
+
+local function unique(bboxes)
+  local idx = {}
+  local is_unique = torch.ones(bboxes:size(1))
+  for i=1,bboxes:size(1) do
+    local b = bboxes[i]
+    local n = b[1]..'_'..b[2]..'_'..b[3]..'_'..b[4]..'_'..b[5]
+    if idx[n] then
+      is_unique[i] = 0
+    else
+      idx[n] = i
+    end
+  end
+  return is_unique
+end
+
+-- given a table with the conv5 features at different scales and bboxes in
+-- the original image, project the bboxes in the conv5 space
+function SPP:projectBoxes(feat, bboxes, scales)
+  -- bboxes is a nx4 Tensor with candidate bounding boxes
+  -- in [x1, y1, x2, y2] format
+  local imSize = feat.imSize
+
+  local scales = scales or self.scales
+  local min_dim = math.min(imSize[2],imSize[3])
+
+  local sz_conv_standard = self.sz_conv_standard
+  local step_standard = self.step_standard
+
+  local nboxes = bboxes:size(1)
+
+  -- get best SPP scale
+  local bestScale = torch.FloatTensor(nboxes)
+
+  if self.train then
+    -- in training, select the scales randomly
+    bestScale:random(1,#scales)
+  else
+    local bboxArea = boxes.new():resize(nboxes):zero()
+    bboxArea:map2(bboxes[{{},3}],bboxes[{{},1}],function(xx,xx2,xx1) return xx2-xx1+1 end)
+    bboxArea:map2(bboxes[{{},4}],bboxes[{{},2}],function(xx,xx2,xx1) return xx*(xx2-xx1+1) end)
+
+    local expected_scale = bboxArea:float():pow(-0.5):mul(sz_conv_standard*step_standard*min_dim)
+    expected_scale:round()
+
+    local nbboxDiffArea = torch.FloatTensor(#scales,nboxes)
+
+    for i=1,#scales do
+      nbboxDiffArea[i]:copy(expected_scale):add(-scales[i]):abs()
+    end
+
+    bestScale = select(2,nbboxDiffArea:min(1))[1]
+  end
+
+  local mul_factor = torch.FloatTensor(nboxes,1):copy(bestScale)
+  local idx = 0
+  mul_factor:apply(function(x)
+                     idx = idx + 1
+                     return (scales[x]-1)/(min_dim-1)
+                   end)
+
+  local bestbboxes = torch.FloatTensor(nboxes,4):copy(bboxes)
+  bestbboxes:add(-1):cmul(mul_factor:expand(nboxes,4)):add(1)
+
+  -- response boxes
+
+  local offset0 = self.offset0
+  local offset = self.offset
+
+  local bboxes_norm = bestbboxes:clone()
+  bboxes_norm[{{},{1,2}}]:add(-offset0 + offset):div(step_standard):add( 0.5)
+  bboxes_norm[{{},{1,2}}]:floor():add(1)
+  bboxes_norm[{{},{3,4}}]:add(-offset0 - offset):div(step_standard):add(-0.5)
+  bboxes_norm[{{},{3,4}}]:ceil():add(1)
+
+  local x0gtx1 = bboxes_norm[{{},1}]:gt(bboxes_norm[{{},3}])
+  local y0gty1 = bboxes_norm[{{},2}]:gt(bboxes_norm[{{},4}])
+
+  bboxes_norm[{{},1}][x0gtx1] = bboxes_norm[{{},1}][x0gtx1]:add(bboxes_norm[{{},3}][x0gtx1]):div(2)
+  bboxes_norm[{{},3}][x0gtx1] = (bboxes_norm[{{},1}][x0gtx1])
+
+  bboxes_norm[{{},2}][y0gty1] = bboxes_norm[{{},2}][y0gty1]:add(bboxes_norm[{{},4}][y0gty1]):div(2)
+  bboxes_norm[{{},4}][y0gty1] = (bboxes_norm[{{},2}][y0gty1])
+
+  -- remove repeated projections
+  if self.dedup then
+    local is_unique = unique(torch.cat(bboxes_norm,bestScale:view(-1,1),2))
+    local lin = torch.range(1,is_unique:size(1)):long() -- can also use cumsum instead
+    bboxes_norm = bboxes_norm:index(1,lin[is_unique])
+  end
+  -- clamp on boundaries
+
+  local projected_bb = bboxes_norm:clone()
+
+  for i=1,#scales do
+    local this_scale = bestScale:eq(i)
+    if this_scale:numel() > 0 then
+      projected_bb[{{},2}][this_scale] = projected_bb[{{},2}][this_scale]:clamp(1,feat.rsp[i]:size(2))
+      projected_bb[{{},4}][this_scale] = projected_bb[{{},4}][this_scale]:clamp(1,feat.rsp[i]:size(2))
+      projected_bb[{{},1}][this_scale] = projected_bb[{{},1}][this_scale]:clamp(1,feat.rsp[i]:size(3))
+      projected_bb[{{},3}][this_scale] = projected_bb[{{},3}][this_scale]:clamp(1,feat.rsp[i]:size(3))
+    end
+  end
+
+  --projected_bb:floor()
+  return bestScale,bestbboxes,bboxes_norm,projected_bb
+end
+
+-- don't do anything. could be the bbox regression or SVM, but I won't add it here
+function SPP:postProcess(im,bbox,output)
+  return output,bbox
+end
+
+function SPP:compute(model,inputs)
+  local inputs_s = inputs:split(self.max_batch_size,1)
+
+  self.output = self.output or inputs.new()
+
+  local ttype = model.output:type()
+  self.inputs = self.inputs or torch.Tensor():type(ttype)
+
+  for idx, f in ipairs(inputs_s) do
+    self.inputs:resize(f:size()):copy(f)
+    local output0 = model:forward(self.inputs)
+    local fs = f:size(1)
+    if idx == 1 then
+      local ss = output0[1]:size():totable()
+      self.output:resize(inputs:size(1),table.unpack(ss))
+    end
+    self.output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(output0)
+  end
+  return self.output
+end
+
 function SPP:type(t_type)
   self._type = t_type
   --self.spp_pooler = self.spp_pooler:type(t_type)
@@ -270,3 +533,38 @@ end
 function SPP:cuda()
   return self:type('torch.CudaTensor')
 end
+
+function SPP:saveConvCache()
+  assert(self.dataset, 'need to set a dataset to save the cache')
+  assert(self.use_cache, 'use_cache need to be true')
+  assert(self.cachedir, 'cachedir need to be set')
+
+  local dataset = self.dataset
+
+  print('Caching features for '..dataset.dataset_name..' '
+        ..dataset.image_set)
+  local feat_cachedir = self.cachedir
+  for i=1,dataset:size() do
+    xlua.progress(i,dataset:size())
+    local im_name = dataset.img_ids[i]
+    local cachefile = paths.concat(feat_cachedir,im_name)
+    if not paths.filep(cachefile..'.h5') then
+      local f = self:getConv5(i)
+    end
+    if not paths.filep(cachefile..'_flip.h5') then
+      local f = self:getConv5(i,true)
+    end
+    if i%50 == 0 then
+      collectgarbage()
+      collectgarbage()
+    end
+  end
+end
+
+function SPP:__tostring()
+  local str = torch.type(self)
+  str = str .. '\n  Image scales: [' .. table.concat(self.scales,', ')..']'
+  str = str .. '\n  Input area: ' .. self.inputArea
+  return str
+end
+
diff --git a/SVMTrainer.lua b/SVMTrainer.lua
index 6f857b1..61f6597 100644
--- a/SVMTrainer.lua
+++ b/SVMTrainer.lua
@@ -1,7 +1,7 @@
 local SVMTrainer = torch.class('nnf.SVMTrainer')
 
 function SVMTrainer:__init(module,feat_provider)
-  self.dataset = feat_provider.dataset
+  --self.dataset = dataset
   self.module = module
   self.feat_provider = feat_provider
 
@@ -21,58 +21,54 @@ function SVMTrainer:__init(module,feat_provider)
   self.evict_thresh = -1.2
   self.hard_thresh = -1.0001
 
-  self.pos_feat_type = 'mixed' -- real, mixed, synthetic
+  self.pos_feat_type = 'real' -- real, mixed, synthetic
  
   self.synth_neg = true
 
-  self:getFeatureStats()
+  --self:getFeatureStats()
 end
 
 
-function SVMTrainer:getFeatureStats(feat_provider,module)
+function SVMTrainer:getFeatureStats(dataset,feat_provider,module)
 
-  if true then
-    self.mean_norm = 30.578503376687
+  if false then
+    self.mean_norm = 19.848824140978--30.578503376687
     return
   end
 
   local feat_provider = feat_provider or self.feat_provider
   local module = module or self.module
-  local dataset = feat_provider.dataset
+  local dataset = dataset
 
   local boxes_per_image = 200
   local num_images = math.min(dataset:size(),200)
 
   local valid_idx = torch.randperm(dataset:size())
   valid_idx = valid_idx[{{1,num_images}}]
- 
-  local fc5_feat = torch.FloatTensor()
-  local fc7_feat = torch.FloatTensor()
 
   local feat_cumsum = 0
   local feat_n = 0
+  local bboxes = torch.IntTensor(boxes_per_image,4)
   
   print('Getting feature stats')
   for i=1,num_images do
     xlua.progress(i,num_images)
     local img_idx = valid_idx[i]
+    local I = dataset:getImage(img_idx)
     local rec = dataset:attachProposals(img_idx)
     
     local num_bbox = math.min(boxes_per_image,rec:size())
 
-    fc5_feat:resize(num_bbox,unpack(self.feat_dim))
-    fc7_feat:resize(num_bbox,4096)
-
-    local bbox_idx = torch.randperm(rec:size())
+    local bbox_idx = torch.randperm(rec:size()):long()
     bbox_idx = bbox_idx[{{1,num_bbox}}]
 
-    for j=1,num_bbox do
-      local bbox_id = bbox_idx[j]
-      fc5_feat[j] = feat_provider:getFeature(img_idx,rec.boxes[bbox_id])
-    end
-     fc7_feat:copy(module:forward(fc5_feat:cuda()))
-     feat_n = feat_n + num_bbox
-     feat_cumsum = feat_cumsum + fc7_feat:pow(2):sum(2):sqrt():sum()
+    bboxes:index(rec.boxes,1,bbox_idx)
+    
+    local feat = feat_provider:getFeature(I,bboxes)
+    local final_feat = feat_provider:compute(module, feat)
+
+    feat_n = feat_n + num_bbox
+    feat_cumsum = feat_cumsum + final_feat:pow(2):sum(2):sqrt():sum()
   end
   self.mean_norm = feat_cumsum/feat_n
 end
@@ -82,10 +78,10 @@ function SVMTrainer:scaleFeatures(feat)
   feat:mul(target_norm/self.mean_norm)
 end
 
-function SVMTrainer:getPositiveFeatures(feat_provider,module)
+function SVMTrainer:getPositiveFeatures(dataset,feat_provider,module)
   local feat_provider = feat_provider or self.feat_provider
   local module = module or self.module
-  local dataset = feat_provider.dataset
+  local dataset = dataset
   module:evaluate()
   local positive_data = {}
   for cl_idx,cl_name in pairs(dataset.classes) do
@@ -98,6 +94,11 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
   local not_done = torch.ByteTensor(dataset.num_classes):fill(1)
   for i=1,end_idx do
     xlua.progress(i,end_idx)
+    local I = dataset:getImage(i)
+    --local gt_boxes, gt_classes = dataset:getGTBoxes(i)
+
+
+
     local rec = dataset:attachProposals(i)
     local overlap = rec.overlap_class
     local is_gt = rec.gt
@@ -111,7 +112,10 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
         for j=1,rec:size() do
           if overlap[j][cl_idx]==1 and is_gt[j]==1 then
             count = count + 1
-            fc5_feat[count] = feat_provider:getFeature(i,rec.boxes[j])
+            local fff = feat_provider:getFeature(I,rec.boxes[j])[1]
+            --print(fff:size())
+            --print(fc5_feat:size())
+            fc5_feat[count] = fff
           end
         end
         if num_pos > 0 then
@@ -133,15 +137,16 @@ function SVMTrainer:getPositiveFeatures(feat_provider,module)
   return positive_data
 end
 
-function SVMTrainer:sampleNegativeFeatures(ind,feat_provider,module)
+function SVMTrainer:sampleNegativeFeatures(ind,dataset,feat_provider,module)
 
   local feat_provider = feat_provider or self.feat_provider
-  local dataset = feat_provider.dataset
+  local dataset = dataset
   local module = module or self.module
   module:evaluate()
 collectgarbage()
   local first_time = self.first_time
 
+  local I = dataset:getImage(ind)
   local rec = dataset:attachProposals(ind)
   local overlap = rec.overlap_class
 
@@ -154,11 +159,9 @@ collectgarbage()
     caches[cl_name] = {X_neg = {},num_added = 0}
   end
 
-  fc5_feat:resize(rec:size(),unpack(self.feat_dim))
-  for j=1,rec:size() do
-    fc5_feat[j] = feat_provider:getFeature(ind,rec.boxes[j])
-  end
-  fc7_feat:resize(rec:size(),4096):copy(module:forward(fc5_feat:cuda()))
+  local feat = feat_provider:getFeature(I,rec.boxes)
+  local fc7_feat = feat_provider:compute(module, feat)
+
   self:scaleFeatures(fc7_feat)
 
   if first_time then
@@ -264,16 +267,16 @@ function SVMTrainer:addPositiveFeatures(feat_provider,module)
 end
 
 
-function SVMTrainer:train()
-  local dataset = self.dataset
+function SVMTrainer:train(dataset)
+  --local dataset = self.dataset
   
-  print('Experiment name: '..self.expname)
+  --print('Experiment name: '..self.expname)
 
   self.W = torch.Tensor(dataset.num_classes,4096)
   self.B = torch.Tensor(dataset.num_classes)
 
   --self:selectPositiveFeatures()
-  self:addPositiveFeatures()
+  --self:addPositiveFeatures()
   
   local caches = {}
   for cl_idx,cl_name in pairs(dataset.classes) do
@@ -313,7 +316,7 @@ function SVMTrainer:train()
         X = self:sampleNegativeFeatures(i-num_synth)
       end
     else
-      X = self:sampleNegativeFeatures(i)
+      X = self:sampleNegativeFeatures(i,dataset)
     end
 
     for cl_idx,cl_name in pairs(dataset.classes) do
@@ -396,7 +399,7 @@ function SVMTrainer:train()
     end
     first_time = false
   end
-  torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B})
+  --torch.save('/home/francisco/work/projects/cross_domain/cachedir/svm_models/svm_model,'..self.expname..'.t7',{W=self.W,B=self.B})
   return caches--X_all
 end
 
diff --git a/Tester.lua b/Tester.lua
index 4c84ace..5ff2bc1 100644
--- a/Tester.lua
+++ b/Tester.lua
@@ -6,14 +6,11 @@ local VOCevaldet = utils.VOCevaldet
 
 local Tester = torch.class('nnf.Tester')
 
-function Tester:__init(module,feat_provider)
-  self.dataset = feat_provider.dataset
-  self.module = module
+function Tester:__init(module,feat_provider,dataset)
+  self.dataset = dataset
   self.feat_provider = feat_provider
+  self.module = module
 
-  self.feat_dim = {256*50}
-  self.max_batch_size = 4000
-  
   self.cachefolder = nil
   self.cachename = nil
   self.suffix = ''
@@ -58,30 +55,44 @@ function Tester:validate(criterion)
   return err/num_batches
 end
 
+local function print_scores(dataset,res)
+  print('Results:')
+  -- print class names
+  io.write('|')
+  for i = 1, dataset.num_classes do
+    io.write(('%5s|'):format(dataset.classes[i]))
+  end
+  io.write('\n|')
+  -- print class scores
+  for i = 1, dataset.num_classes do
+    local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i]
+    local l = res[i] == res[i] and l-5 or l-3
+    if l > 0 then
+      io.write(('%.3f%'..l..'s|'):format(res[i],' '))
+    else
+      io.write(('%.3f|'):format(res[i]))
+    end
+  end
+  io.write('\n')
+  io.write(('mAP: %.4f\n'):format(res:mean(1)[1]))
+end
+
+
 function Tester:test(iteration)
   
   local dataset = self.dataset
   local module = self.module
   local feat_provider = self.feat_provider
 
-  local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration)
-  paths.mkdir(pathfolder)  
-
   module:evaluate()
+  feat_provider:evaluate()
   dataset:loadROIDB()
   
-  local feats = torch.FloatTensor()
-  local feats_batched = {}
-  local feats_cuda = torch.CudaTensor()
-  
-  local output = torch.FloatTensor()
-  
-  local output_dim = module:get(module:size())
-  
-  local softmax = nn.SoftMax():float()
-  
+  local detec = nnf.ImageDetect(module, feat_provider)
   local boxes
-  -- 
+  local im
+  local output
+
   local aboxes = {}
   for i=1,dataset.num_classes do
     table.insert(aboxes,{})
@@ -89,50 +100,41 @@ function Tester:test(iteration)
   
   local max_per_set = 5*dataset:size()
   local max_per_image = 100
-  local thresh = torch.ones(dataset.num_classes):mul(-1.5)
+  local thresh = torch.ones(dataset.num_classes):mul(0.05)
   local scored_boxes = torch.FloatTensor()
   
   local timer = torch.Timer()
   local timer2 = torch.Timer()
   local timer3 = torch.Timer()
-  
+
+  -- SPP is more efficient if we cache the features. We treat it differently then
+  -- the other feature providers
+  local pass_index = torch.type(feat_provider) == 'nnf.SPP' and true or false
+
   for i=1,dataset:size() do
     timer:reset()
     io.write(('test: (%s) %5d/%-5d '):format(dataset.dataset_name,i,dataset:size()));
-    boxes = dataset:getROIBoxes(i):float()
-    local num_boxes = boxes:size(1)
-    -- compute image feature maps
-    timer3:reset()
-    feats:resize(num_boxes,unpack(self.feat_dim))
-    for idx=1,num_boxes do
-      feats[idx] = feat_provider:getFeature(i,boxes[idx])
+
+    if pass_index then
+      im = i
+    else
+      im = dataset:getImage(i)
     end
-    local tt = timer3:time().real
-    -- compute classification scores
-    torch.split(feats_batched,feats,self.max_batch_size,1)
+    boxes = dataset:getROIBoxes(i):float()
+
     timer3:reset()
-    for idx,f in ipairs(feats_batched) do
-      local fs = f:size(1)
-      feats_cuda:resize(fs,unpack(self.feat_dim)):copy(f)
-      module:forward(feats_cuda)
-      if idx == 1 then
-        local out_size = module.output:size():totable()
-        table.remove(out_size,1)
-        output:resize(num_boxes,unpack(out_size))
-      end
-      output:narrow(1,(idx-1)*self.max_batch_size+1,fs):copy(module.output)
-    end
-    local add_bg = 0
-    if dataset.num_classes ~= output:size(2) then -- if there is no svm
-      output = softmax:forward(output) 
-      add_bg = 1
-    end
-    
+    output,boxes = detec:detect(im,boxes)
+
+    local add_bg = 1
+    local tt = 0 
     local tt2 = timer3:time().real
     
     timer2:reset()
+    -- do a NMS for each class, based on the scores from the classifier
     for j=1,dataset.num_classes do
       local scores = output:select(2,j+add_bg)
+      -- only select detections with a score greater than thresh
+      -- this avoid doing NMS on too many bboxes with low score
       local idx = torch.range(1,scores:numel()):long()
       local idx2 = scores:gt(thresh[j])
       idx = idx[idx2]
@@ -151,6 +153,7 @@ function Tester:test(iteration)
         aboxes[j][i] = torch.FloatTensor()
       end
       
+      -- remove low scoring boxes and update threshold
       if i%1000 == 0 then
         aboxes[j],thresh[j] = keep_top_k(aboxes[j],max_per_set)
       end
@@ -158,10 +161,11 @@ function Tester:test(iteration)
     end
 
     io.write((' prepare feat time: %.3f, forward time: %.3f, select time: %.3fs, total time: %.3fs\n'):format(tt,tt2,timer2:time().real,timer:time().real));
-    --collectgarbage()
-    --mattorch.save(paths.concat(pathfolder,dataset.img_ids[i]..'.mat'),output:double())
   end
 
+  local pathfolder = paths.concat(self.cachefolder,'test_iter'..iteration)
+  paths.mkdir(pathfolder)
+
   for i = 1,dataset.num_classes do
     -- go back through and prune out detections below the found threshold
     for j = 1,dataset:size() do
@@ -174,10 +178,14 @@ function Tester:test(iteration)
         end
       end
     end
-    save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'.. 
-                             dataset.dataset_name..self.suffix)
-    torch.save(save_file, aboxes)
+    --save_file = paths.concat(pathfolder, dataset.classes[i].. '_boxes_'..
+    --                         dataset.dataset_name..self.suffix)
+    --torch.save(save_file, aboxes)
   end
+  save_file = paths.concat(pathfolder, 'boxes_'..
+                           dataset.dataset_name..self.suffix)
+  torch.save(save_file, aboxes)
+
 
   local res = {}
   for i=1,dataset.num_classes do
@@ -185,27 +193,11 @@ function Tester:test(iteration)
     res[i] = VOCevaldet(dataset,aboxes[i],cls)
   end
   res = torch.Tensor(res)
-  print('Results:')
-  -- print class names
-  io.write('|')
-  for i = 1, dataset.num_classes do
-    io.write(('%5s|'):format(dataset.classes[i]))
-  end
-  io.write('\n|')
-  -- print class scores
-  for i = 1, dataset.num_classes do
-    local l = #dataset.classes[i] < 5 and 5 or #dataset.classes[i]
-    local l = res[i] == res[i] and l-5 or l-3
-    if l > 0 then
-      io.write(('%.3f%'..l..'s|'):format(res[i],' '))
-    else
-      io.write(('%.3f|'):format(res[i]))
-    end
-  end
-  io.write('\n')
-  io.write(('mAP: %.4f\n'):format(res:mean(1)[1]))
+
+  print_scores(dataset,res)
 
   -- clean roidb to free memory
   dataset.roidb = nil
   return res
 end
+
diff --git a/Trainer.lua b/Trainer.lua
index 180b1eb..8ac9c47 100644
--- a/Trainer.lua
+++ b/Trainer.lua
@@ -1,18 +1,22 @@
 require 'nn'
 require 'optim'
 require 'xlua'
+local utils = paths.dofile('utils.lua')
+local recursiveResizeAsCopyTyped = utils.recursiveResizeAsCopyTyped
 
 local Trainer = torch.class('nnf.Trainer')
 
-function Trainer:__init(module,criterion)
+function Trainer:__init(module,criterion,batch_provider,optimState)
   
   self.module = module
   self.criterion = criterion
+  self.batch_provider = batch_provider
   
   self.parameters,self.gradParameters = self.module:getParameters()
   
-  self.optimState = {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9,
-                     learningRateDecay = 0}
+  self.optimState = optimState or
+                    {learningRate = 1e-3, weightDecay = 0.0005, momentum = 0.9,
+                     learningRateDecay = 0, dampening = 0}
                      
   self.epoch = 0
 
@@ -22,40 +26,39 @@ function Trainer:__init(module,criterion)
   
 end
 
+function Trainer:train(maxIter)
+  local maxIter = maxIter or 20
+  local ttype = self.parameters:type()
 
-function Trainer:train(inputs,targets)
-  -- only for batches
-  assert(targets:dim()>2,'Trainer is only for batches')
-  
   self.module:training()
-  self._input = self._input or torch.CudaTensor()
-  self._target = self._target or torch.CudaTensor()
 
   local module = self.module
+  local batch_provider = self.batch_provider
   local parameters = self.parameters
   local gradParameters = self.gradParameters
   
   local criterion = self.criterion
   local optimState = self.optimState
     
-  local batchSize = inputs:size(2)
-  local maxIter = inputs:size(1)
-  
   if self.confusion then
     self.confusion:zero()
   end
   local err = 0
   
-  self._input:resize(inputs[1]:size())
-  self._target:resize(targets[1]:size())
-  local input = self._input
-  local target = self._target
-  
+  local input
+  local target
+
   for t=1,maxIter do
     xlua.progress(t,maxIter)
 
-    input:copy(inputs[t])
-    target:copy(targets[t])
+    -- get training batch
+    self.input0,self.target0 = batch_provider:getBatch()
+
+    -- copy to ttype
+    self.input,self.input0   = recursiveResizeAsCopyTyped(self.input,self.input0,ttype)
+    self.target,self.target0 = recursiveResizeAsCopyTyped(self.target,self.target0,ttype)
+    input = self.input
+    target = self.target
 
     local feval = function(x)
       if x ~= parameters then
@@ -70,11 +73,6 @@ function Trainer:train(inputs,targets)
       
       module:backward(input,df_do)
       
-      if self.normalize then
-        gradParameters:div(batchSize)
-        f = f/batchSize
-      end
-
       if self.confusion then
         self.confusion:batchAdd(outputs,target)
       end
@@ -88,6 +86,6 @@ function Trainer:train(inputs,targets)
   
   table.insert(self.fx,err/maxIter)
   
-  self.module:evaluate()
+  --self.module:evaluate()
   self.epoch = self.epoch + 1
 end
diff --git a/argcheck.lua b/argcheck.lua
new file mode 100644
index 0000000..2ce4e3b
--- /dev/null
+++ b/argcheck.lua
@@ -0,0 +1,73 @@
+local usage = require 'argcheck.usage'
+local env = require 'argcheck.env'
+--------------------------------------------------------------------------------
+-- Simple argument function with a similar interface to argcheck, but which
+-- supports lots of default arguments for named rules.
+-- Not as fast and elegant though.
+--------------------------------------------------------------------------------
+local function argcheck(rules)
+  -- basic checks
+  assert(not (rules.noordered and rules.nonamed), 'rules must be at least ordered or named')
+  assert(rules.help == nil or type(rules.help) == 'string', 'rules help must be a string or nil')
+  assert(rules.doc == nil or type(rules.doc) == 'string', 'rules doc must be a string or nil')
+  assert(not rules.overload, 'rules overload not supported')
+  assert(not (rules.doc and rules.help), 'choose between doc or help, not both')
+  for _, rule in ipairs(rules) do
+    assert(rule.name, 'rule must have a name field')
+    assert(rule.type == nil or type(rule.type) == 'string', 'rule type must be a string or nil')
+    assert(rule.help == nil or type(rule.help) == 'string', 'rule help must be a string or nil')
+    assert(rule.doc == nil or type(rule.doc) == 'string', 'rule doc must be a string or nil')
+    assert(rule.check == nil or type(rule.check) == 'function', 'rule check must be a function or nil')
+    --assert(rule.defaulta == nil or type(rule.defaulta) == 'string', 'rule defaulta must be a string or nil')
+    --assert(rule.defaultf == nil or type(rule.defaultf) == 'function', 'rule defaultf must be a function or nil')
+  end
+
+  if not (rules.pack == nil or rules.pack) then
+    error('pack need to be true')
+  end
+  if rules.nonamed then
+    error('only named arguments')
+  end
+
+  local arginfo = {}
+  for k,v in ipairs(rules) do
+    arginfo[v.name] = k
+  end
+
+  local function func(args)
+
+    local iargs = {}
+    for _,rule in ipairs(rules) do
+      iargs[rule.name] = rule.default
+      if rule.default == nil and 
+        args[rule.name] == nil and 
+        rule.opt ~= true then
+        print(usage(rules))
+        error('Missing argument: '..rule.name)
+      end
+    end
+
+    for k,v in pairs(args) do
+      if not env.istype(v,rules[arginfo[k]].type) then
+        print(usage(rules))
+        error('Wrong type: '.. k)
+      end
+
+      if rules[arginfo[k]].check then
+        local c = rules[arginfo[k]].check(args[k])
+        if not c then
+          print(usage(rules))
+          error('check did not pass')
+        end
+      end
+      iargs[k] = args[k]
+    end
+
+    return iargs
+  end
+
+  return func
+
+end
+
+return argcheck
diff --git a/config.lua b/config.lua
new file mode 100644
index 0000000..0e0ea08
--- /dev/null
+++ b/config.lua
@@ -0,0 +1,112 @@
+require 'nnf'
+
+local configs = {}
+
+local image_transformer_params = {
+  mean_pix={102.9801,115.9465,122.7717},
+  raw_scale = 255,
+  swap = {3,2,1}
+}
+
+configs.image_transformer_params = image_transformer_params
+
+configs.datasetDir = 'datasets/VOCdevkit'
+configs.roidbDir   = 'data/selective_search_data'
+
+--------------------------------------------------------------------------------
+-- Training Parameters
+--------------------------------------------------------------------------------
+
+local train_params = {
+  batch_size = 16,--128,
+  fg_fraction = 0.25,
+  fg_threshold = 0.5,
+  bg_threshold = {0.0,0.5},
+  do_flip = true,
+}
+
+configs.train_params = train_params
+
+--------------------------------------------------------------------------------
+-- Feature Provider Parameters
+--------------------------------------------------------------------------------
+
+configs.algo = {}
+
+--------------------------------------------------------------------------------
+-- RCNN
+--------------------------------------------------------------------------------
+
+local fp_params = {
+  crop_size         = 227,
+  padding           = 16,
+  use_square        = false,
+}
+local bp_params = {
+  iter_per_batch = 100,
+  nTimesMoreData = 10,
+}
+
+local RCNN = {
+  fp_params=fp_params,
+  bp_params=bp_params,
+  bp = nnf.BatchProviderRC
+}
+
+configs.algo.RCNN = RCNN
+
+--------------------------------------------------------------------------------
+-- SPP
+--------------------------------------------------------------------------------
+--
+local num_chns = 256
+local pooling_scales = {{1,1},{2,2},{3,3},{6,6}}
+local pyr = torch.Tensor(pooling_scales):t()
+local pooled_size = pyr[1]:dot(pyr[2])
+local feat_dim = {num_chns*pooled_size}
+
+local fp_params = {
+  scales            = {480,576,688,874,1200},
+  sz_conv_standard  = 13,
+  step_standard     = 16,
+  offset0           = 21,
+  offset            = 6.5,
+  inputArea         = 224^2,
+  pooling_scales    = pooling_scales,
+  num_feat_chns     = num_chns,
+}
+local bp_params = {
+  iter_per_batch = 500,
+  nTimesMoreData = 10,
+}
+
+local SPP = {
+  fp_params=fp_params,
+  bp_params=bp_params,
+  bp = nnf.BatchProviderRC
+}
+
+configs.algo.SPP = SPP
+
+--------------------------------------------------------------------------------
+-- Fast-RCNN
+--------------------------------------------------------------------------------
+
+local fp_params = {
+  scale             = {600},
+  max_size          = 1000,
+}
+local bp_params = {
+  imgs_per_batch = 2,
+}
+
+local FRCNN = {
+  fp_params=fp_params,
+  bp_params=bp_params,
+  bp = nnf.BatchProviderIC
+}
+
+configs.algo.FRCNN = FRCNN
+
+
+return configs
diff --git a/data.lua b/data.lua
index 655deb5..59d3284 100644
--- a/data.lua
+++ b/data.lua
@@ -1,145 +1,65 @@
 --------------------------------------------------------------------------------
 -- Prepare data model
 --------------------------------------------------------------------------------
-paths.mkdir(opt.save)
 
-trainCache = paths.concat(opt.save_base,'trainCache.t7')
-testCache = paths.concat(opt.save_base,'testCache.t7')
+local trainCache = paths.concat(rundir,'trainCache.t7')
+--testCache = paths.concat(opt.save_base,'testCache.t7')
 
-local pooler
-local feat_dim
+local config = paths.dofile('config.lua')
 
-if opt.algo == 'SPP' then
-  local conv_list = features:findModules(opt.backend..'.SpatialConvolution')
-  local num_chns = conv_list[#conv_list].nOutputPlane
-  pooler = model:get(2):clone():float()
-  local pyr = torch.Tensor(pooler.pyr):t()
-  local pooled_size = pyr[1]:dot(pyr[2])
-  feat_dim = {num_chns*pooled_size}
-elseif opt.algo == 'RCNN' then
-  feat_dim = {3,227,227}
+image_transformer = nnf.ImageTransformer(config.image_transformer_params)
+
+local FP        = nnf[opt.algo]
+local fp_params = config.algo[opt.algo].fp_params
+local bp_params = config.algo[opt.algo].bp_params
+local BP        = config.algo[opt.algo].bp
+
+local train_params = config.train_params
+
+-- add common parameters
+fp_params.image_transformer = image_transformer
+for k,v in pairs(train_params) do
+  bp_params[k] = v
 end
 
-image_transformer = nnf.ImageTransformer{mean_pix=image_mean}
+-------------------------------------------------------------------------------
+-- Create structures
+--------------------------------------------------------------------------------
+
+ds_train = nnf.DataSetPascal{
+  image_set='trainval',
+  year=2007,--opt.year,
+  datadir=config.datasetDir,
+  roidbdir=config.roidbDir
+}
+
+feat_provider = FP(fp_params)
+feat_provider:training()
+
+bp_params.dataset = ds_train
+bp_params.feat_provider = feat_provider
+batch_provider = BP(bp_params)
 
 if paths.filep(trainCache) then
   print('Loading train metadata from cache')
-  batch_provider = torch.load(trainCache)
-  feat_provider = batch_provider.feat_provider
-  ds_train = feat_provider.dataset
-  feat_provider.model = features
+  local metadata = torch.load(trainCache)
+  batch_provider.bboxes = metadata
 else
-  ds_train = nnf.DataSetPascal{image_set='trainval',classes=classes,year=opt.year,
-                         datadir=opt.datadir,roidbdir=opt.roidbdir}
-  
-  if opt.algo == 'SPP' then
-    feat_provider = nnf.SPP(ds_train)-- remove features here to reduce cache size
-    feat_provider.cachedir = paths.concat(opt.cache,'features',opt.netType)
-    feat_provider.randomscale = true
-    feat_provider.scales = {600}
-    feat_provider.spp_pooler = pooler:clone()
-    feat_provider.image_transformer = image_transformer
-  elseif opt.algo == 'RCNN' then
-    feat_provider = nnf.RCNN(ds_train)
-    feat_provider.crop_size = feat_dim[2]
-    feat_provider.image_transformer = image_transformer
-  else
-    error(("Detection framework '%s' not available"):format(opt.algo))
-  end
-  
-  print('==> Preparing BatchProvider for training')
-  batch_provider = nnf.BatchProvider(feat_provider)
-  batch_provider.iter_per_batch = opt.ipb
-  batch_provider.nTimesMoreData = opt.ntmd
-  batch_provider.fg_fraction = opt.fg_frac
-  batch_provider.bg_threshold = {0.0,0.5}
-  batch_provider.do_flip = true
-  batch_provider.batch_dim = feat_dim
   batch_provider:setupData()
-  
-  torch.save(trainCache,batch_provider)
-  feat_provider.model = features
+  torch.save(trainCache, batch_provider.bboxes)
 end
 
-if paths.filep(testCache) then
-  print('Loading test metadata from cache')
-  batch_provider_test = torch.load(testCache)
-  feat_provider_test = batch_provider_test.feat_provider
-  ds_test = feat_provider_test.dataset
-  feat_provider_test.model = features
-else
-  ds_test = nnf.DataSetPascal{image_set='test',classes=classes,year=opt.year,
-                              datadir=opt.datadir,roidbdir=opt.roidbdir}
-  if opt.algo == 'SPP' then
-    feat_provider_test = nnf.SPP(ds_test)
-    feat_provider_test.randomscale = false
-    feat_provider_test.cachedir = paths.concat(opt.cache,'features',opt.netType)
-    feat_provider_test.scales = {600}
-    feat_provider_test.spp_pooler = pooler:clone()
-    feat_provider_test.image_transformer = image_transformer
-  elseif opt.algo == 'RCNN' then
-    feat_provider_test = nnf.RCNN(ds_test)
-    feat_provider_test.crop_size = feat_dim[2]
-    feat_provider_test.image_transformer = image_transformer
-  else
-    error(("Detection framework '%s' not available"):format(opt.algo))
-  end
-  
-  print('==> Preparing BatchProvider for validation')
-  batch_provider_test = nnf.BatchProvider(feat_provider_test)
-  batch_provider_test.iter_per_batch = 500--opt.ipb
-  batch_provider_test.nTimesMoreData = 10--opt.ntmd
-  batch_provider_test.fg_fraction = opt.fg_frac
-  batch_provider_test.bg_threshold = {0.0,0.5}
-  batch_provider_test.do_flip = false
-  batch_provider_test.batch_dim = feat_dim
-  batch_provider_test:setupData()
-  
-  torch.save(testCache,batch_provider_test)
-  feat_provider_test.model = features
-end
-
---------------------------------------------------------------------------------
--- Compute conv5 feature cache (for SPP)
---------------------------------------------------------------------------------
-if opt.algo == 'SPP' then
-  print('Preparing conv5 features for '..ds_train.dataset_name..' '
-        ..ds_train.image_set)
-  local feat_cachedir = feat_provider.cachedir
-  for i=1,ds_train:size() do
-    xlua.progress(i,ds_train:size())
-    local im_name = ds_train.img_ids[i]
-    local cachefile = paths.concat(feat_cachedir,im_name)
-    if not paths.filep(cachefile..'.h5') then
-      local f = feat_provider:getConv5(i)
-    end
-    if not paths.filep(cachefile..'_flip.h5') then
-      local f = feat_provider:getConv5(i,true)
-    end
-    if i%50 == 0 then
-      collectgarbage()
-      collectgarbage()
-    end
-  end
-  
-  print('Preparing conv5 features for '..ds_test.dataset_name..' '
-        ..ds_test.image_set)
-  local feat_cachedir = feat_provider_test.cachedir
-  for i=1,ds_test:size() do
-    xlua.progress(i,ds_test:size())
-    local im_name = ds_test.img_ids[i]
-    local cachefile = paths.concat(feat_cachedir,im_name)
-    if not paths.filep(cachefile..'.h5') then
-      local f = feat_provider_test:getConv5(i)
-    end
-    if i%50 == 0 then
-      collectgarbage()
-      collectgarbage()
-    end
-  end
-end
+-- test
+ds_test = nnf.DataSetPascal{
+  image_set='test',
+  year=2007,--opt.year,
+  datadir=config.datasetDir,
+  roidbdir=config.roidbDir
+}
 
-features = nil
-model = nil
+-- only needed because of SPP
+-- could be the same as the one for training
+--feat_provider_test = FP(fp_params)
+--feat_provider_test:evaluate()
 
 collectgarbage()
diff --git a/examples/example_frcnn_lena.jpg b/examples/example_frcnn_lena.jpg
new file mode 100644
index 0000000..e1919fa
Binary files /dev/null and b/examples/example_frcnn_lena.jpg differ
diff --git a/examples/train_test_rcnn.lua b/examples/train_test_rcnn.lua
new file mode 100644
index 0000000..7701ad6
--- /dev/null
+++ b/examples/train_test_rcnn.lua
@@ -0,0 +1,190 @@
+require 'nnf'
+
+cmd = torch.CmdLine()
+cmd:text('Example on how to train/test a RCNN based object detector on Pascal')
+cmd:text('')
+cmd:text('Options:')
+cmd:option('-name',      'rcnn-example', 'base name')
+cmd:option('-modelpath', '',             'path to the pre-trained model')
+cmd:option('-lr',        1e-3,           'learning rate')
+cmd:option('-num_iter',  40000,          'number of iterations')
+cmd:option('-disp_iter', 100,            'display every n iterations')
+cmd:option('-lr_step',   30000,          'step for reducing the learning rate')
+cmd:option('-save_step', 10000,          'step for saving the model')
+cmd:option('-gpu',       1,              'gpu to use (0 for cpu mode)')
+cmd:option('-seed',      1,              'fix random seed (if ~= 0)')
+cmd:option('-numthreads',6,              'number of threads')
+
+opt = cmd:parse(arg or {})
+
+assert(paths.filep(opt.modelpath), 'need to provide the path for the pre-trained model')
+
+exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true,
+                                      modelpath=true})
+
+rundir = '../cachedir/'..exp_name
+paths.mkdir(rundir)
+
+cmd:log(paths.concat(rundir,'log'), opt)
+cmd:addTime('RCNN Example')
+
+local tensor_type
+if opt.gpu > 0 then
+  require 'cunn'
+  cutorch.setDevice(opt.gpu)
+  tensor_type = 'torch.CudaTensor'
+  print('Using GPU mode on device '..opt.gpu)
+else
+  require 'nn'
+  tensor_type = 'torch.FloatTensor'
+  print('Using CPU mode')
+end
+
+if opt.seed ~= 0 then
+  torch.manualSeed(opt.seed)
+  if opt.gpu > 0 then
+    cutorch.manualSeed(opt.seed)
+  end
+  print('Using fixed seed: '..opt.seed)
+end
+
+torch.setnumthreads(opt.numthreads)
+
+--------------------------------------------------------------------------------
+-- define model and criterion
+--------------------------------------------------------------------------------
+-- load pre-trained model for finetuning
+-- should already have the right number of outputs in the last layer,
+-- which can be done by removing the last layer and replacing it by a new one
+-- for example:
+-- pre_trained_model:remove() -- remove last layer
+-- pre_trained_model:add(nn.Linear(4096,21)) -- add new layer
+model = torch.load(opt.modelpath)
+
+criterion = nn.CrossEntropyCriterion()
+
+model:type(tensor_type)
+criterion:type(tensor_type)
+
+print('Model:')
+print(model)
+print('Criterion:')
+print(criterion)
+
+-- define the transformations to do in the image before
+-- passing it to the network
+local image_transformer= nnf.ImageTransformer{
+  mean_pix={102.9801,115.9465,122.7717},
+  raw_scale = 255,
+  swap = {3,2,1}
+}
+
+print(image_transformer)
+--------------------------------------------------------------------------------
+-- define data for training
+--------------------------------------------------------------------------------
+
+-- this class holds all the necessary informationn regarding the dataset
+ds = nnf.DataSetPascal{
+  image_set='trainval',
+  datadir='datasets/VOCdevkit',
+  roidbdir='data/selective_search_data',
+  year=2007
+}
+print('DataSet Training:')
+print(ds)
+--------------------------------------------------------------------------------
+-- define feature providers
+--------------------------------------------------------------------------------
+
+local crop_size = 224
+
+-- the feature provider extract the features for a given image + bounding box
+fp = nnf.RCNN{
+  image_transformer=image_transformer,
+  crop_size=crop_size,
+  num_threads=opt.numthreads
+}
+-- different frameworks can behave differently during training and testing
+fp:training()
+
+print('Feature Provider:')
+print(fp)
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProviderRC{
+  dataset=ds,
+  feat_provider=fp,
+  bg_threshold={0.0,0.5},
+  nTimesMoreData=2,
+  iter_per_batch=10,--100,
+}
+bp:setupData()
+
+print('Batch Provider:')
+print(bp)
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+trainer = nnf.Trainer(model, criterion, bp)
+
+local num_iter = opt.num_iter/opt.disp_iter
+local lr_step = opt.lr_step/opt.disp_iter
+local save_step = opt.save_step/opt.disp_iter
+
+trainer.optimState.learningRate = opt.lr
+
+local lightModel = model:clone('weight','bias')
+
+-- main training loop
+for i=1,num_iter do
+  if i % lr_step == 0 then
+    trainer.optimState.learningRate = trainer.optimState.learningRate/10
+  end
+  print(('Iteration %3d/%-3d'):format(i,num_iter))
+  trainer:train(opt.disp_iter)
+  print(('  Training error: %.5f'):format(trainer.fx[i]))
+
+  if i% save_step == 0 then
+    torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+  end
+end
+
+torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+
+--------------------------------------------------------------------------------
+-- evaluation
+--------------------------------------------------------------------------------
+-- add softmax to classifier, because we were using nn.CrossEntropyCriterion
+local softmax = nn.SoftMax()
+softmax:type(tensor_type)
+model:add(softmax)
+
+-- dataset for evaluation
+dsv = nnf.DataSetPascal{
+  image_set='test',
+  datadir='datasets/VOCdevkit',
+  roidbdir='data/selective_search_data',
+  year=2007
+}
+print('DataSet Evaluation:')
+print(dsv)
+
+-- feature provider for evaluation
+fpv = nnf.RCNN{
+  image_transformer=image_transformer,
+  crop_size=crop_size,
+  num_threads=opt.numthreads
+}
+fpv:evaluate()
+print('Feature Provider Evaluation:')
+print(fpv)
+
+-- define the class to test the model on the full dataset
+tester = nnf.Tester(model, fpv, dsv)
+tester.cachefolder = rundir
+tester:test(opt.num_iter)
diff --git a/main.lua b/main.lua
index 65a4b18..0a8705b 100644
--- a/main.lua
+++ b/main.lua
@@ -1,6 +1,7 @@
 require 'nnf'
-require 'cunn'
+--require 'cunn'
 require 'optim'
+require 'trepl'
 
 local opts = paths.dofile('opts.lua')
 opt = opts.parse(arg)
@@ -8,116 +9,47 @@ print(opt)
 
 if opt.seed ~= 0 then
   torch.manualSeed(opt.seed)
-  cutorch.manualSeed(opt.seed)
+  if opt.gpu > 0 then
+    cutorch.manualSeed(opt.seed)
+  end
 end
 
-cutorch.setDevice(opt.gpu)
 torch.setnumthreads(opt.numthreads)
 
---------------------------------------------------------------------------------
--- Select target classes
---------------------------------------------------------------------------------
-
-if opt.classes == 'all' then
-  classes={'aeroplane','bicycle','bird','boat','bottle','bus','car',
-           'cat','chair','cow','diningtable','dog','horse','motorbike',
-           'person','pottedplant','sheep','sofa','train','tvmonitor'}
+local tensor_type
+if opt.gpu > 0 then
+  require 'cunn'
+  cutorch.setDevice(opt.gpu)
+  tensor_type = 'torch.CudaTensor'
+  print('Using GPU mode on device '..opt.gpu)
 else
-  classes = {opt.classes}
+  require 'nn'
+  tensor_type = 'torch.FloatTensor'
+  print('Using CPU mode')
 end
 
 --------------------------------------------------------------------------------
 
+model, criterion = paths.dofile('model.lua')
+model:type(tensor_type)
+criterion:type(tensor_type)
 
-paths.dofile('model.lua')
+-- prepate training and test data
 paths.dofile('data.lua')
 
---------------------------------------------------------------------------------
--- Prepare training model
---------------------------------------------------------------------------------
-
-trainer = nnf.Trainer(classifier,criterion)
-trainer.optimState.learningRate = opt.lr
-
-local conf_classes = {}
-table.insert(conf_classes,'background')
-for i=1,#classes do
-  table.insert(conf_classes,classes[i])
-end
-trainer.confusion = optim.ConfusionMatrix(conf_classes)
-
-validator = nnf.Tester(classifier,feat_provider_test)
-validator.cachefolder = opt.save_base
-validator.cachename = 'validation_data.t7'
-validator.batch_provider = batch_provider_test
-
-logger = optim.Logger(paths.concat(opt.save,'log.txt'))
-val_err = {}
-val_counter = 0
-reduc_counter = 0
-
-inputs = torch.FloatTensor()
-targets = torch.IntTensor()
-for i=1,opt.num_iter do
-
-  print('Iteration: '..i..'/'..opt.num_iter)
-  inputs,targets = batch_provider:getBatch(inputs,targets)
-  print('==> Training '..paths.basename(opt.save_base))
-  trainer:train(inputs,targets)
-  print('==> Training Error: '..trainer.fx[i])
-  print(trainer.confusion)
-
-  collectgarbage() 
+-- Do training
+paths.dofile('train.lua')
 
-  err = validator:validate(criterion)
-  print('==> Validation Error: '..err)
-  table.insert(val_err,err)
-
-  logger:add{['train error (iters per batch='..batch_provider.iter_per_batch..
-              ')']=trainer.fx[i],['val error']=err,
-              ['learning rate']=trainer.optimState.learningRate}
-
-  val_counter = val_counter + 1
-
-  local val_err_t = torch.Tensor(val_err)
-  local _,lmin = val_err_t:min(1)
-  if val_counter-lmin[1] >= opt.nsmooth then
-    print('Reducing learning rate')
-    trainer.optimState.learningRate = trainer.optimState.learningRate/2
-    if opt.nildfdx == true then
-      trainer.optimState.dfdx= nil
-    end
-    val_counter = 0
-    val_err = {}
-    reduc_counter = reduc_counter + 1
-    if reduc_counter >= opt.nred then
-      print('Stopping training at iteration '..i)
-      break
-    end
-  end
-
-  collectgarbage()
-  collectgarbage()
-  --sanitize(model)
-  --torch.save(paths.concat(opt.save, 'model_' .. epoch .. '.t7'), classifier)
-  --torch.save(paths.concat(opt.save, 'optimState_' .. epoch .. '.t7'), trainer.optimState)
-end
-
---sanitize(classifier)
-torch.save(paths.concat(opt.save, 'model.t7'), classifier)
-
-ds_train.roidb = nil
-collectgarbage()
-collectgarbage()
-
---------------------------------------------------------------------------------
--- Do full evaluation
---------------------------------------------------------------------------------
-
-print('==> Evaluation')
-tester = nnf.Tester(classifier,feat_provider_test)
-tester.cachefolder = paths.concat(opt.save,'evaluation',ds_test.dataset_name)
+-- evaluation
+print('==> Evaluating')
+-- add softmax to classifier, because we were using nn.CrossEntropyCriterion
+local softmax = nn.SoftMax()
+softmax:type(tensor_type)
+model:add(softmax)
 
+feat_provider:evaluate()
 
+-- define the class to test the model on the full dataset
+tester = nnf.Tester(model, feat_provider, ds_test)
+tester.cachefolder = rundir
 tester:test(opt.num_iter)
-
diff --git a/model.lua b/model.lua
index 9700f0b..029c8a3 100644
--- a/model.lua
+++ b/model.lua
@@ -1,50 +1,26 @@
 require 'nn'
-require 'inn'
-require 'cudnn'
-local reshapeLastLinearLayer = paths.dofile('utils.lua').reshapeLastLinearLayer
-local convertCaffeModelToTorch = paths.dofile('utils.lua').convertCaffeModelToTorch
+--require 'inn'
+--require 'cudnn'
 
--- 1.1. Create Network
-local config = opt.netType
-local createModel = paths.dofile('models/' .. config .. '.lua')
-print('=> Creating model from file: models/' .. config .. '.lua')
-model = createModel(opt.backend)
+local createModel = paths.dofile('models/' .. opt.netType .. '.lua')
+print('=> Creating model from file: models/' .. opt.netType .. '.lua')
+local model = createModel()
 
--- convert to accept inputs in the range 0-1 RGB format
-convertCaffeModelToTorch(model,{1,1})
+local criterion = nn.CrossEntropyCriterion()
 
-reshapeLastLinearLayer(model,#classes+1)
-image_mean = {128/255,128/255,128/255}
-
-if opt.algo == 'RCNN' then
-  classifier = model
-elseif opt.algo == 'SPP' then
-  features = model:get(1)
-  classifier = model:get(3)
-end
-
--- 2. Create Criterion
-criterion = nn.CrossEntropyCriterion()
-
-print('=> Model')
+print('Model:')
 print(model)
-
-print('=> Criterion')
+print('Criterion:')
 print(criterion)
 
--- 3. If preloading option is set, preload weights from existing models appropriately
+-- If preloading option is set, preload weights from existing models appropriately
 if opt.retrain ~= 'none' then
   assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain)
   print('Loading model from file: ' .. opt.retrain);
-  classifier = torch.load(opt.retrain)
+  model = torch.load(opt.retrain)
 end
 
--- 4. Convert model to CUDA
-print('==> Converting model to CUDA')
-model = model:cuda()
-criterion:cuda()
-
 collectgarbage()
 
-
+return model, criterion
 
diff --git a/models/frcnn_alexnet.lua b/models/frcnn_alexnet.lua
new file mode 100644
index 0000000..c8b033d
--- /dev/null
+++ b/models/frcnn_alexnet.lua
@@ -0,0 +1,62 @@
+local function loadModel(params,backend)
+
+  backend = backend or cudnn
+
+  local features   = nn.Sequential()
+  local classifier = nn.Sequential()
+  
+  features:add(backend.SpatialConvolution(3,96,11,11,4,4,5,5,1))
+  features:add(backend.ReLU(true))
+  features:add(backend.SpatialMaxPooling(3,3,2,2,1,1))
+  features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1))
+  
+  features:add(backend.SpatialConvolution(96,256,5,5,1,1,1,1,2))
+  features:add(backend.ReLU(true))
+  features:add(backend.SpatialMaxPooling(3,3,2,2,1,1))
+  features:add(backend.SpatialCrossMapLRN(5,0.0001,0.75,1))
+  
+  features:add(backend.SpatialConvolution(256,384,3,3,1,1,1,1,1))
+  features:add(backend.ReLU(true))
+
+  features:add(backend.SpatialConvolution(384,384,3,3,1,1,1,1,2))
+  features:add(backend.ReLU(true))
+  
+  features:add(backend.SpatialConvolution(384,256,3,3,1,1,1,1,2))
+  features:add(backend.ReLU(true))
+  
+  classifier:add(nn.Linear(9216,4096))
+  classifier:add(backend.ReLU(true))
+  classifier:add(nn.Dropout(0.5))
+
+  classifier:add(nn.Linear(4096,4096))
+  classifier:add(backend.ReLU(true))
+  classifier:add(nn.Dropout(0.5))
+  
+  classifier:add(nn.Linear(4096,21))
+
+  local prl = nn.ParallelTable()
+  prl:add(features)
+  prl:add(nn.Identity())
+  
+  local ROIPooling = inn.ROIPooling(6,6):setSpatialScale(1/16)
+
+  local model = nn.Sequential()
+  model:add(prl)
+  model:add(ROIPooling)
+  model:add(nn.View(-1):setNumInputDims(3))
+  model:add(classifier)
+  
+  if params then
+    local lparams = model:parameters()
+    assert(#lparams == #params, 'provided parameters does not match')
+
+    for k,v in ipairs(lparams) do
+      local p = params[k]
+      assert(p:numel() == v:numel(), 'wrong number of parameter elements !')
+      v:copy(p)
+    end
+  end
+  return model
+end
+
+return loadModel
diff --git a/nnf.lua b/nnf.lua
index a2e7831..d9fd777 100644
--- a/nnf.lua
+++ b/nnf.lua
@@ -1,20 +1,30 @@
 require 'nn'
 require 'image'
-require 'inn'
+--require 'inn'
 require 'xlua'
 
 nnf = {}
 
+torch.include('nnf','ImageTransformer.lua')
+
+torch.include('nnf','DataSetDetection.lua')
 torch.include('nnf','DataSetPascal.lua')
-torch.include('nnf','BatchProvider.lua')
+torch.include('nnf','DataSetCOCO.lua')
+
+torch.include('nnf','BatchProviderBase.lua')
+torch.include('nnf','BatchProviderIC.lua')
+torch.include('nnf','BatchProviderRC.lua')
 
 torch.include('nnf','SPP.lua')
 torch.include('nnf','RCNN.lua')
+torch.include('nnf','FRCNN.lua')
+torch.include('nnf','ROIPooling.lua')
 
 torch.include('nnf','Trainer.lua')
 torch.include('nnf','Tester.lua')
+--torch.include('nnf','Tester_FRCNN.lua')
 
 torch.include('nnf','SVMTrainer.lua')
 
-torch.include('nnf','ImageTransformer.lua')
+torch.include('nnf','ImageDetect.lua')
 --return nnf
diff --git a/opts.lua b/opts.lua
index f07d8dc..457b6f2 100644
--- a/opts.lua
+++ b/opts.lua
@@ -8,55 +8,29 @@ function M.parse(arg)
   cmd:text()
   cmd:text('Options:')
 
-  local curr_dir = paths.cwd()
-  local defaultDataSetDir = paths.concat(curr_dir,'datasets')
-  local defaultDataDir = paths.concat(defaultDataSetDir,'VOCdevkit/')
-  local defaultROIDBDir = paths.concat(curr_dir,'data','selective_search_data/')
-  
-  cmd:text('Folder parameters')
-  cmd:option('-cache',paths.concat(curr_dir,'cachedir'),'Cache dir')
-  cmd:option('-datadir',defaultDataDir,'Path to dataset')
-  cmd:option('-roidbdir',defaultROIDBDir,'Path to ROIDB')
-  cmd:text()
-  cmd:text('Model parameters')
-  cmd:option('-algo','SPP','Detection framework. Options: RCNN | SPP')
-  cmd:option('-netType','zeiler','Options: zeiler | vgg')
-  cmd:option('-backend','cudnn','Options: nn | cudnn')
-  cmd:text()
-  cmd:text('Data parameters')
-  cmd:option('-year',2007,'DataSet year (for Pascal)')
-  cmd:option('-ipb',500,'iter per batch')
-  cmd:option('-ntmd',10,'nTimesMoreData')
-  cmd:option('-fg_frac',0.25,'fg_fraction')
-  cmd:option('-classes','all','use all classes (all) or given class')
-  cmd:text()
-  cmd:text('Training parameters')
-  cmd:option('-lr',1e-2,'learning rate')
-  cmd:option('-num_iter',300,'number of iterations')
-  cmd:option('-nsmooth',40,'number of iterations before reducing learning rate')
-  cmd:option('-nred',4,'number of divisions by 2 before stopping learning')
-  cmd:option('-nildfdx',false,'erase memory of gradients when reducing learning rate')
-  cmd:text()
-  cmd:text('Others')
-  cmd:option('-gpu',1,'gpu device to use')
-  cmd:option('-numthreads',6,'number of threads to use')
-  cmd:option('-comment','','additional comment to the name')
-  cmd:option('-seed',0,'random seed (0 = no fixed seed)')
-  cmd:option('-retrain','none','modelpath for finetuning')
-  cmd:text()
-
+  cmd:option('-name',      'obj-detect',   'base name')
+  cmd:option('-algo',      'RCNN',         'Detection framework. Options: RCNN | FRCNN')
+  cmd:option('-netType',   'alexnet',      'Options: alexnet')
+  cmd:option('-lr',        1e-3,           'learning rate')
+  cmd:option('-num_iter',  40000,          'number of iterations')
+  cmd:option('-disp_iter', 100,            'display every n iterations')
+  cmd:option('-lr_step',   30000,          'step for reducing the learning rate')
+  cmd:option('-save_step', 10000,          'step for saving the model')
+  cmd:option('-gpu',       1,              'gpu to use (0 for cpu mode)')
+  cmd:option('-conf_mat',  false,          'Compute confusion matrix during training')
+  cmd:option('-seed',      1,              'fix random seed (if ~= 0)')
+  cmd:option('-numthreads',6,              'number of threads')
+  cmd:option('-retrain',   'none',         'modelpath for finetuning')
 
   local opt = cmd:parse(arg or {})
-  -- add commandline specified options
-  opt.save = paths.concat(opt.cache,
-                          cmd:string(opt.netType, opt,
-                                     {retrain=true, optimState=true, cache=true,
-                                      data=true, gpu=true, numthread=true,
-                                      netType=true}))
-  -- add date/time
-  opt.save_base = opt.save
-  local date_time = os.date():gsub(' ','')
-  opt.save = paths.concat(opt.save, date_time)
+
+  local exp_name = cmd:string(opt.name, opt, {name=true, gpu=true, numthreads=true})
+
+  rundir = 'cachedir/'..exp_name
+  paths.mkdir(rundir)
+
+  cmd:log(paths.concat(rundir,'log'), opt)
+  cmd:addTime('Object-Detection.Torch')
 
   return opt
 
diff --git a/test_frcnn.lua b/test_frcnn.lua
new file mode 100644
index 0000000..24bb23b
--- /dev/null
+++ b/test_frcnn.lua
@@ -0,0 +1,282 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'gnuplot'
+
+cutorch.setDevice(2)
+
+dt = torch.load('pascal_2007_train.t7')
+if false then
+  ds = nnf.DataSetPascal{image_set='train',
+                         datadir='/home/francisco/work/datasets/VOCdevkit',
+                         roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data'
+                        }
+else
+  ds = nnf.DataSetPascal{image_set='trainval',
+                         datadir='datasets/VOCdevkit',
+                         roidbdir='data/selective_search_data'
+                         }
+end
+
+if false then
+  ds.roidb = {}
+  for i=1,ds:size() do
+    ds.roidb[i] = torch.IntTensor(10,4):random(1,5)
+    ds.roidb[i][{{},{3,4}}]:add(6)
+  end
+elseif false then
+  ds.roidb = dt.roidb
+end
+
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},--{103.939, 116.779, 123.68},
+                                              raw_scale = 255,
+                                              swap = {3,2,1}}
+if true then
+  bp = nnf.BatchProviderROI(ds)
+  bp.image_transformer = image_transformer
+  bp.bg_threshold = {0.1,0.5}
+  bp:setupData()
+else
+  bp = nnf.BatchProviderROI(ds)
+  bp.image_transformer = image_transformer
+  local temp = torch.load('pascal_2007_train_bp.t7')
+  bp.bboxes = temp.bboxes
+end
+
+
+if false then
+  local mytest = nnf.ROIPooling(50,50):float()
+  function do_mytest()
+    local input0,target0 = bp:getBatch(input0,target0)
+    local o = mytest:forward(input0)
+    return input0,target0,o
+  end
+  --input0,target0,o = do_mytest()
+end
+
+---------------------------------------------------------------------------------------
+-- model
+---------------------------------------------------------------------------------------
+do
+
+  model = nn.Sequential()
+  local features = nn.Sequential()
+  local classifier = nn.Sequential()
+
+  if false then
+    features:add(nn.SpatialConvolutionMM(3,96,11,11,4,4,5,5))
+    features:add(nn.ReLU(true))
+    features:add(nn.SpatialConvolutionMM(96,128,5,5,2,2,2,2))
+    features:add(nn.ReLU(true))
+    features:add(nn.SpatialMaxPooling(2,2,2,2))
+
+    classifier:add(nn.Linear(128*7*7,1024))
+    classifier:add(nn.ReLU(true))
+    classifier:add(nn.Dropout(0.5))
+    classifier:add(nn.Linear(1024,21))
+  
+  elseif false then
+    require 'loadcaffe'
+--    local rcnnfold = '/home/francisco/work/libraries/rcnn/'
+--    local base_model = loadcaffe.load(
+--          rcnnfold..'model-defs/pascal_finetune_deploy.prototxt',
+--          rcnnfold..'data/caffe_nets/finetune_voc_2012_train_iter_70k',
+--    'cudnn')
+
+  local rcnnfold = '/home/francisco/work/libraries/caffe/examples/imagenet/'
+  local base_model = loadcaffe.load(
+                rcnnfold..'imagenet_deploy.prototxt',
+                rcnnfold..'caffe_reference_imagenet_model',
+                'cudnn')
+
+
+    for i=1,14 do
+      features:add(base_model:get(i):clone())
+    end
+    for i=17,22 do
+      classifier:add(base_model:get(i):clone())
+    end
+    classifier:add(nn.Linear(4096,21):cuda())
+    
+    collectgarbage()
+
+  else
+    local fold = 'data/models/imagenet_models/alexnet/'
+    local m1 = torch.load(fold..'features.t7')
+    local m2 = torch.load(fold..'top.t7')
+
+    for i=1,14 do
+      features:add(m1:get(i):clone())
+    end
+    features:get(3).padW = 1
+    features:get(3).padH = 1
+    features:get(7).padW = 1
+    features:get(7).padH = 1
+ 
+    for i=2,7 do
+      classifier:add(m2:get(i):clone())
+    end
+    local linear = nn.Linear(4096,21):cuda()
+    linear.weight:normal(0,0.01)
+    linear.bias:zero()
+    classifier:add(linear)
+  end
+  collectgarbage()
+
+  local prl = nn.ParallelTable()
+  prl:add(features)
+  prl:add(nn.Identity())
+  model:add(prl)
+  --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+  model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+  model:add(nn.View(-1):setNumInputDims(3))
+  model:add(classifier)
+
+end
+print(model)
+
+model:cuda()
+parameters,gradParameters = model:getParameters()
+
+parameters2,gradParameters2 = model:parameters()
+
+lr = {0,0,1,2,1,2,1,2,1,2,1,2,1,2,1,2}
+wd = {0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}
+
+local function updateGPlrwd(clr)
+  local clr = clr or 1
+  for i,p in pairs(gradParameters2) do
+    p:add(wd[i]*0.0005,parameters2[i])
+    p:mul(lr[i]*clr)
+  end
+end
+
+optimState = {learningRate = 1,--1e-3,
+              weightDecay = 0.000, momentum = 0.9,
+              learningRateDecay = 0, dampening=0}
+
+--------------------------------------------------------------------------
+-- training
+--------------------------------------------------------------------------
+
+confusion_matrix = optim.ConfusionMatrix(21)
+
+
+model:training()
+
+savedModel = model:clone('weight','bias','running_mean','running_std')
+
+criterion = nn.CrossEntropyCriterion():cuda()
+--criterion.nll.sizeAverage = false
+
+--normalize = true
+
+display_iter = 20
+
+--inputs = {torch.CudaTensor(),torch.FloatTensor()}
+inputs = {torch.CudaTensor(),torch.CudaTensor()}
+target = torch.CudaTensor()
+
+learningRate = 1e-3
+
+function train()
+  local err = 0
+  for i=1,display_iter do
+    xlua.progress(i,display_iter)
+    inputs0,target0 = bp:getBatch(inputs0,target0)
+    inputs[1]:resize(inputs0[1]:size()):copy(inputs0[1])
+    inputs[2]:resize(inputs0[2]:size()):copy(inputs0[2])
+    target:resize(target0:size()):copy(target0)
+    local batchSize = target:size(1)
+
+    local feval = function(x)
+      if x ~= parameters then
+        parameters:copy(x)
+      end
+      gradParameters:zero()
+
+      local outputs = model:forward(inputs)
+
+      local f = criterion:forward(outputs,target)
+      local df_do = criterion:backward(outputs,target)
+
+      model:backward(inputs,df_do)
+
+      -- mimic different learning rates per layer
+      -- without the cost of having a huge tensor
+      updateGPlrwd(learningRate)
+
+      if normalize then
+        gradParameters:div(batchSize)
+        f = f/batchSize
+      end
+      
+      confusion_matrix:batchAdd(outputs,target)
+
+      return f,gradParameters
+    end
+
+    local x,fx = optim.sgd(feval,parameters,optimState)
+    err = err + fx[1]
+  end
+  print('Training error: '..err/display_iter)
+  return err/display_iter
+end
+
+epoch_size = math.ceil(ds:size()/bp.imgs_per_batch)
+stepsize = 30000--30000
+print_step = 10
+num_iter = 40000--40000
+num_iter = num_iter/display_iter--3000
+
+confusion_matrix:zero()
+train_err = {}
+exp_name = 'frcnn_t11'
+
+paths.mkdir(paths.concat('cachedir',exp_name))
+--logger = optim.Logger(paths.concat('cachedir',exp_name,'train_err.log'))
+train_acc = {}
+for i=1,num_iter do
+
+  if i%(stepsize/display_iter) == 0 then
+    --optimState.learningRate = optimState.learningRate/10
+    learningRate = learningRate/10
+  end
+  
+  --print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,optimState.learningRate))
+  print(('Iteration: %d/%d, lr: %.5f'):format(i,num_iter,learningRate))
+
+  local t_err = train()
+  table.insert(train_err,t_err)
+
+
+  if i%print_step == 0 then
+    print(confusion_matrix)
+    table.insert(train_acc,confusion_matrix.averageUnionValid*100)
+    gnuplot.epsfigure(paths.concat('cachedir',exp_name,'train_err.eps'))
+    gnuplot.plot('train',torch.Tensor(train_acc),'-')
+    gnuplot.xlabel('Iterations (200 batch update)')
+    gnuplot.ylabel('Training accuracy')
+    gnuplot.grid('on')
+    gnuplot.plotflush()
+    gnuplot.closeall()
+
+    confusion_matrix:zero()
+  end
+
+  if i%100 == 0 then
+    torch.save(paths.concat('cachedir',exp_name..'.t7'),savedModel)
+  end
+end
+
+-- test
+dsv = nnf.DataSetPascal{image_set='test',
+                         datadir='datasets/VOCdevkit',
+                         roidbdir='data/selective_search_data'
+                         }
+
+
+local fpv = {dataset=dsv}
+tester = nnf.Tester_FRCNN(model,fpv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(num_iter)
diff --git a/tests/test_full_frcnn.lua b/tests/test_full_frcnn.lua
new file mode 100644
index 0000000..c49c2c6
--- /dev/null
+++ b/tests/test_full_frcnn.lua
@@ -0,0 +1,124 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'loadcaffe'
+
+cutorch.setDevice(2)
+
+ds = nnf.DataSetPascal{image_set='trainval',
+                       datadir='datasets/VOCdevkit',
+                       roidbdir='data/selective_search_data'
+                       }
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+                                              raw_scale = 255,
+                                              swap = {3,2,1}}
+
+fp = nnf.FRCNN{image_transformer=image_transformer}
+fp:training()
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProviderROI{dataset=ds,feat_provider=fp,
+                          bg_threshold={0.1,0.5}
+                         }
+bp:setupData()
+
+--------------------------------------------------------------------------------
+-- define model
+--------------------------------------------------------------------------------
+model = nn.Sequential()
+do 
+  --[[
+  local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/'
+  local base_model = loadcaffe.load(
+  rcnnfold..'CaffeNet_train.prototxt',
+  rcnnfold..'CaffeNet.v2.caffemodel',
+  'cudnn')
+  for i=1,14 do
+    features:add(base_model:get(i):clone())
+  end
+  for i=17,22 do
+    classifier:add(base_model:get(i):clone())
+  end
+  local linear = nn.Linear(4096,21):cuda()
+  linear.weight:normal(0,0.01)
+  linear.bias:zero()
+  classifier:add(linear)
+  --]]
+  local features = nn.Sequential()
+  local classifier = nn.Sequential()
+  local fold = 'data/models/imagenet_models/alexnet/'
+  local m1 = torch.load(fold..'features.t7')
+  local m2 = torch.load(fold..'top.t7')
+    for i=1,14 do
+      features:add(m1:get(i):clone())
+    end
+    features:get(3).padW = 1
+    features:get(3).padH = 1
+    features:get(7).padW = 1
+    features:get(7).padH = 1
+    for i=2,7 do
+      classifier:add(m2:get(i):clone())
+    end
+    local linear = nn.Linear(4096,21):cuda()
+    linear.weight:normal(0,0.01)
+    linear.bias:zero()
+    classifier:add(linear)
+  collectgarbage()
+  local prl = nn.ParallelTable()
+  prl:add(features)
+  prl:add(nn.Identity())
+  model:add(prl)
+  --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+  model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+  model:add(nn.View(-1):setNumInputDims(3))
+  model:add(classifier)
+end
+model:cuda()
+
+--model = nil
+--collectgarbage()
+--model = torch.load('test_model.t7')
+--model:cuda()
+collectgarbage()
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion():cuda()
+
+trainer = nnf.Trainer(model,criterion,bp)
+
+savedModel = model:clone('weight','bias','running_mean','running_std')
+for i=1,400 do
+  if i == 300 then
+    trainer.optimState.learningRate = trainer.optimState.learningRate/10
+  end
+  print(('Iteration %3d/%-3d'):format(i,400))
+  trainer:train(100)
+  print(('  Train error: %g'):format(trainer.fx[i]))
+end
+
+--------------------------------------------------------------------------------
+-- evaluate
+--------------------------------------------------------------------------------
+
+-- add softmax to classfier
+model:add(nn.SoftMax():cuda())
+
+dsv = nnf.DataSetPascal{image_set='test',
+                         datadir='datasets/VOCdevkit',
+                         roidbdir='data/selective_search_data'
+                         }
+
+
+fpv = nnf.FRCNN{image_transformer=image_transformer}
+fpv:evaluate()
+exp_name = 'test2_frcnn'
+
+tester = nnf.Tester(model,fpv,dsv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(40000)
+
+torch.save(paths.concat(tester.cachefolder,'model.t7'),savedModel)
diff --git a/tests/test_full_rcnn.lua b/tests/test_full_rcnn.lua
new file mode 100644
index 0000000..6abd6ab
--- /dev/null
+++ b/tests/test_full_rcnn.lua
@@ -0,0 +1,120 @@
+require 'nnf'
+require 'inn'
+require 'cudnn'
+require 'loadcaffe'
+
+cutorch.setDevice(2)
+
+ds = nnf.DataSetPascal{image_set='trainval',
+                       datadir='datasets/VOCdevkit',
+                       roidbdir='data/selective_search_data'
+                       }
+local image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+                                              raw_scale = 255,
+                                              swap = {3,2,1}}
+
+fp = nnf.RCNN{image_transformer=image_transformer,
+              crop_size=224}
+fp:training()
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp = nnf.BatchProvider{dataset=ds,feat_provider=fp,
+                       bg_threshold={0.0,0.5},
+                       nTimesMoreData=2,
+                       iter_per_batch=100,
+                      }
+bp:setupData()
+
+--------------------------------------------------------------------------------
+-- define model
+--------------------------------------------------------------------------------
+model = nn.Sequential()
+do 
+  --[[
+  local rcnnfold = '/home/francisco/work/projects/object-detection.torch/data/models/imagenet_models/'
+  local base_model = loadcaffe.load(
+  rcnnfold..'CaffeNet_train.prototxt',
+  rcnnfold..'CaffeNet.v2.caffemodel',
+  'cudnn')
+  for i=1,14 do
+    features:add(base_model:get(i):clone())
+  end
+  for i=17,22 do
+    classifier:add(base_model:get(i):clone())
+  end
+  local linear = nn.Linear(4096,21):cuda()
+  linear.weight:normal(0,0.01)
+  linear.bias:zero()
+  classifier:add(linear)
+  --]]
+  local features = nn.Sequential()
+  local classifier = nn.Sequential()
+  local fold = 'data/models/imagenet_models/alexnet/'
+  local m1 = torch.load(fold..'features.t7')
+  local m2 = torch.load(fold..'top.t7')
+    for i=1,14 do
+      features:add(m1:get(i):clone())
+    end
+    features:get(3).padW = 1
+    features:get(3).padH = 1
+    features:get(7).padW = 1
+    features:get(7).padH = 1
+    for i=2,7 do
+      classifier:add(m2:get(i):clone())
+    end
+    local linear = nn.Linear(4096,21):cuda()
+    linear.weight:normal(0,0.01)
+    linear.bias:zero()
+    classifier:add(linear)
+  collectgarbage()
+  --local prl = nn.ParallelTable()
+  --prl:add(features)
+  --prl:add(nn.Identity())
+  --model:add(prl)
+  --model:add(nnf.ROIPooling(6,6):setSpatialScale(1/16))
+  --model:add(inn.ROIPooling(6,6):setSpatialScale(1/16))
+  model:add(features)
+  model:add(nn.SpatialAdaptiveMaxPooling(6,6))
+  model:add(nn.View(-1):setNumInputDims(3))
+  model:add(classifier)
+end
+model:cuda()
+--------------------------------------------------------------------------------
+-- train
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion():cuda()
+
+trainer = nnf.Trainer(model,criterion,bp)
+
+for i=1,400 do
+  if i == 300 then
+    trainer.optimState.learningRate = trainer.optimState.learningRate/10
+  end
+  print(('Iteration %3d/%-3d'):format(i,400))
+  trainer:train(100)
+end
+
+--------------------------------------------------------------------------------
+-- evaluate
+--------------------------------------------------------------------------------
+
+-- add softmax to classfier
+model:add(nn.SoftMax():cuda())
+
+dsv = nnf.DataSetPascal{image_set='test',
+                         datadir='datasets/VOCdevkit',
+                         roidbdir='data/selective_search_data'
+                         }
+
+
+fpv = nnf.RCNN{image_transformer=image_transformer,
+               crop_size=224}
+fpv:evaluate()
+exp_name = 'test1_rcnn'
+
+tester = nnf.Tester(model,fpv,dsv)
+tester.cachefolder = 'cachedir/'..exp_name
+tester:test(40000)
diff --git a/tests/test_imdetect.lua b/tests/test_imdetect.lua
new file mode 100644
index 0000000..9884cd7
--- /dev/null
+++ b/tests/test_imdetect.lua
@@ -0,0 +1,59 @@
+dofile 'test_utils.lua'
+
+detect1 = nnf.ImageDetect(model1,fp1)
+detect = nnf.ImageDetect(model,fp2)
+
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1}
+bp1.nTimesMoreData = 2
+bp1.iter_per_batch = 10
+bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2}
+
+bp1.bboxes = torch.load('tests/bproibox.t7')
+bp2.bboxes = torch.load('tests/bproibox.t7')
+
+print('test1')
+b,t = bp1:getBatch()
+print('test2')
+b,t = bp2:getBatch()
+
+-- mixing does not work for the moment, as FRCNN accepts a set of images as input
+-- whereas RCNN and SPP supposes that only one image is provided at a time
+--[[
+bp3 = nnf.BatchProviderROI(ds)
+bp3.bboxes = torch.load('tests/bproibox.t7')
+bp3.feat_provider = fp1
+print('test3')
+b,t = bp3:getBatch()
+--]]
+--------------------------------------------------------------------------------
+--
+--------------------------------------------------------------------------------
+
+idx = 100
+im = ds:getImage(idx)
+boxes = ds:getROIBoxes(idx)
+
+--output = detect1:detect(im,boxes)
+--output0 = detect:detect(im,boxes)
+
+--------------------------------------------------------------------------------
+-- compare old and new SPP implementations for the cropping
+--------------------------------------------------------------------------------
+--[[
+output_old = {}
+for i=1,boxes:size(1) do
+  tt0 = fp3:getCrop_old(im,boxes[i])
+  output_old[i] = tt0
+end
+
+output_new = fp3:getCrop(im,boxes) --[881]
+
+for i=1,boxes:size(1) do
+  assert(output_old[i]:eq(output_new[i]):all(),'error '..i)
+end
+--]]
diff --git a/tests/test_train.lua b/tests/test_train.lua
new file mode 100644
index 0000000..7f50819
--- /dev/null
+++ b/tests/test_train.lua
@@ -0,0 +1,26 @@
+dofile 'tests/test_utils.lua'
+
+--------------------------------------------------------------------------------
+-- define batch providers
+--------------------------------------------------------------------------------
+
+bp1 = nnf.BatchProvider{dataset=ds,feat_provider=fp1}
+bp1.nTimesMoreData = 2
+bp1.iter_per_batch = 10
+bp2 = nnf.BatchProviderROI{dataset=ds,feat_provider=fp2}
+
+bp1.bboxes = torch.load('tests/bproibox.t7')
+bp2.bboxes = torch.load('tests/bproibox.t7')
+
+--------------------------------------------------------------------------------
+--
+--------------------------------------------------------------------------------
+
+criterion = nn.CrossEntropyCriterion()
+
+trainer = nnf.Trainer(model1,criterion,bp1)
+
+for i=1,10 do
+  trainer:train(10)
+end
+
diff --git a/tests/test_utils.lua b/tests/test_utils.lua
new file mode 100644
index 0000000..e3d20dc
--- /dev/null
+++ b/tests/test_utils.lua
@@ -0,0 +1,49 @@
+require 'nnf'
+require 'nn'
+
+function getDS()
+  local dt = torch.load('pascal_2007_train.t7')
+  local ds = nnf.DataSetPascal{image_set='train',
+                             datadir='/home/francisco/work/datasets/VOCdevkit',
+                             roidbdir='/home/francisco/work/datasets/rcnn/selective_search_data'
+                             }
+  ds.roidb = dt.roidb
+  return ds
+end
+
+function getModel()
+  local features = nn.Sequential()
+  features:add(nn.SpatialConvolutionMM(3,16,11,11,16,16,5,5))
+  local classifier = nn.Sequential()
+  classifier:add(nn.Linear(7*7*16,21))
+  local model1 = nn.Sequential()
+  model1:add(features)
+  model1:add(nn.SpatialMaxPooling(2,2,2,2))
+  model1:add(nn.View(-1):setNumInputDims(3))
+  model1:add(classifier)
+  local model = nn.Sequential()
+  local prl = nn.ParallelTable()
+  prl:add(features)
+  prl:add(nn.Identity())
+  model:add(prl)
+  model:add(nnf.ROIPooling(7,7):setSpatialScale(1/16))
+  model:add(nn.View(-1):setNumInputDims(3))
+  model:add(classifier)
+  return model1, model, features, classifier
+end
+
+--------------------------------------------------------------------------------
+-- define dataset, models and feature providers
+--------------------------------------------------------------------------------
+
+ds = getDS()
+
+model1, model, features, classifier = getModel()
+  
+fp1 = nnf.RCNN{}
+fp2 = nnf.FRCNN{}
+fp3 = nnf.SPP{model=features}
+fp3.use_cache = false
+fp3:evaluate()
+
+
diff --git a/tests/test_visualization.lua b/tests/test_visualization.lua
new file mode 100644
index 0000000..b5d727a
--- /dev/null
+++ b/tests/test_visualization.lua
@@ -0,0 +1,7 @@
+dofile 'tests/test_utils.lua'
+I = ds:getImage(1)
+boxes = ds:getROIBoxes(1)
+scores = torch.rand(boxes:size(1),21)
+dofile 'visualize_detections.lua' 
+visualize_detections(I,boxes,scores,0.9)
+
diff --git a/tests/test_visualization2.lua b/tests/test_visualization2.lua
new file mode 100644
index 0000000..415f86a
--- /dev/null
+++ b/tests/test_visualization2.lua
@@ -0,0 +1,42 @@
+require 'cutorch'
+require 'nnf'
+require 'cudnn'
+require 'inn'
+dofile 'visualize_detections.lua'
+
+cutorch.setDevice(2)
+
+--model = torch.load('cachedir/test2_frcnn/model.t7')
+model = torch.load('cachedir/model.t7')
+--model:add(nn.SoftMax():cuda())
+
+image_transformer= nnf.ImageTransformer{mean_pix={102.9801,115.9465,122.7717},
+                                              raw_scale = 255,
+                                              swap = {3,2,1}}
+
+
+ds = nnf.DataSetPascal{image_set='test',
+                         datadir='datasets/VOCdevkit',
+                         roidbdir='data/selective_search_data'
+                         }
+
+fp = nnf.FRCNN{image_transformer=image_transformer}
+fp:evaluate()
+model:evaluate()
+detect = nnf.ImageDetect(model,fp)
+
+im_idx = 700
+
+I = ds:getImage(im_idx)
+boxes = ds:getROIBoxes(im_idx)
+--boxes = ds:getGTBoxes(im_idx)
+
+scores,bb = detect:detect(I,boxes)
+
+w = visualize_detections(I,boxes,scores,0.5,ds.classes)
+
+Im = w:image()
+II = Im:toFloatTensor()
+
+image.save('example_frcnn.jpg',II)
+
diff --git a/train.lua b/train.lua
new file mode 100644
index 0000000..8184922
--- /dev/null
+++ b/train.lua
@@ -0,0 +1,49 @@
+trainer = nnf.Trainer(model, criterion, batch_provider)
+
+local num_iter = opt.num_iter/opt.disp_iter
+local lr_step = opt.lr_step/opt.disp_iter
+local save_step = opt.save_step/opt.disp_iter
+
+trainer.optimState.learningRate = opt.lr
+
+logger = optim.Logger(paths.concat(rundir,'train.log'))
+
+if opt.conf_mat then
+  local conf_classes = {'background'}
+  for k,v in ipairs(ds_train.classes) do
+    table.insert(conf_classes,v)
+  end
+  trainer.confusion = optim.ConfusionMatrix(conf_classes)
+end
+
+local lightModel = model:clone('weight','bias','running_mean','running_std')
+
+-- main training loop
+for i=1,num_iter do
+  if i % lr_step == 0 then
+    trainer.optimState.learningRate = trainer.optimState.learningRate/10
+  end
+  print(('Iteration %3d/%-3d'):format(i,num_iter))
+  trainer:train(opt.disp_iter)
+  print(('  Training error: %.5f'):format(trainer.fx[i]))
+
+  if opt.conf_mat then
+    print(trainer.confusion)
+    logger:add{
+      ['train error']=trainer.fx[i],
+      ['confusion matrix']=tostring(trainer.confusion),
+      ['learning rate']=trainer.optimState.learningRate
+    }
+  else
+    logger:add{
+      ['train error']=trainer.fx[i],
+      ['learning rate']=trainer.optimState.learningRate
+    }
+  end
+
+  if i% save_step == 0 then
+    torch.save(paths.concat(rundir, 'model.t7'), lightModel)
+  end
+end
+
+torch.save(paths.concat(rundir, 'model.t7'), lightModel)
diff --git a/utils.lua b/utils.lua
index 0255907..689c00f 100644
--- a/utils.lua
+++ b/utils.lua
@@ -2,6 +2,7 @@
 -- utility functions for the evaluation part
 --------------------------------------------------------------------------------
 
+-- can be replaced by the new torch.cat function
 local function joinTable(input,dim)
   local size = torch.LongStorage()
   local is_ok = false
@@ -29,6 +30,50 @@ local function joinTable(input,dim)
   return output
 end
 
+local function recursiveResizeAsCopyTyped(t1,t2,type)
+  if torch.type(t2) == 'table' then
+    t1 = (torch.type(t1) == 'table') and t1 or {t1}
+    for key,_ in pairs(t2) do
+      t1[key], t2[key] = recursiveResizeAsCopyTyped(t1[key], t2[key], type)
+    end
+  elseif torch.isTensor(t2) then
+    local type = type or t2:type()
+    t1 = torch.isTypeOf(t1,type) and t1 or torch.Tensor():type(type)
+    t1:resize(t2:size()):copy(t2)
+  else
+    error("expecting nested tensors or tables. Got "..
+    torch.type(t1).." and "..torch.type(t2).." instead")
+  end
+  return t1, t2
+end
+
+local function concat(t1,t2,dim)
+  local out
+  assert(t1:type() == t2:type(),'tensors should have the same type')
+  if t1:dim() > 0 and t2:dim() > 0 then
+    dim = dim or t1:dim()
+    out = torch.cat(t1,t2,dim)
+  elseif t1:dim() > 0 then
+    out = t1:clone()
+  else
+    out = t2:clone()
+  end
+  return out
+end
+
+-- modify bbox input
+local function flipBoundingBoxes(bbox, im_width)
+  if bbox:dim() == 1 then 
+    local tt = bbox[1]
+    bbox[1] = im_width-bbox[3]+1
+    bbox[3] = im_width-tt     +1
+  else
+    local tt = bbox[{{},1}]:clone()
+    bbox[{{},1}]:fill(im_width+1):add(-1,bbox[{{},3}])
+    bbox[{{},3}]:fill(im_width+1):add(-1,tt)
+  end
+end
+
 --------------------------------------------------------------------------------
 
 local function keep_top_k(boxes,top_k)
@@ -80,7 +125,6 @@ end
 --------------------------------------------------------------------------------
 
 local function boxoverlap(a,b)
-  --local b = anno.objects[j]
   local b = b.xmin and {b.xmin,b.ymin,b.xmax,b.ymax} or b
     
   local x1 = a:select(2,1):clone()
@@ -267,6 +311,10 @@ utils.VOCap = VOCap
 utils.convertCaffeModelToTorch = convertCaffeModelToTorch
 utils.reshapeLastLinearLayer = reshapeLastLinearLayer
 utils.sanitize = sanitize
+utils.recursiveResizeAsCopyTyped = recursiveResizeAsCopyTyped
+utils.flipBoundingBoxes = flipBoundingBoxes
+utils.concat = concat
+utils.boxoverlap = boxoverlap
 
 return utils
 
diff --git a/visualize_detections.lua b/visualize_detections.lua
new file mode 100644
index 0000000..2381de4
--- /dev/null
+++ b/visualize_detections.lua
@@ -0,0 +1,62 @@
+local nms = dofile 'nms.lua'
+
+function visualize_detections(im,boxes,scores,thresh,cl_names)
+  local ok = pcall(require,'qt')
+  if not ok then
+    error('You need to run visualize_detections using qlua')
+  end
+  require 'qttorch'
+  require 'qtwidget'
+
+  -- select best scoring boxes without background
+  local max_score,idx = scores[{{},{2,-1}}]:max(2)
+
+  local idx_thresh = max_score:gt(thresh)
+  max_score = max_score[idx_thresh]
+  idx = idx[idx_thresh]
+
+  local r = torch.range(1,boxes:size(1)):long()
+  local rr = r[idx_thresh]
+  if rr:numel() == 0 then
+    error('No detections with a score greater than the specified threshold')
+  end
+  local boxes_thresh = boxes:index(1,rr)
+  
+  local keep = nms(torch.cat(boxes_thresh:float(),max_score:float(),2),0.3)
+  
+  boxes_thresh = boxes_thresh:index(1,keep)
+  max_score = max_score:index(1,keep)
+  idx = idx:index(1,keep)
+
+  local num_boxes = boxes_thresh:size(1)
+  local widths  = boxes_thresh[{{},3}] - boxes_thresh[{{},1}]
+  local heights = boxes_thresh[{{},4}] - boxes_thresh[{{},2}]
+
+  local x,y = im:size(3),im:size(2)
+  local w = qtwidget.newwindow(x,y,"Detections")
+  local qtimg = qt.QImage.fromTensor(im)
+  w:image(0,0,x,y,qtimg)
+  local fontsize = 15
+
+  for i=1,num_boxes do
+    local x,y = boxes_thresh[{i,1}],boxes_thresh[{i,2}]
+    local width,height = widths[i], heights[i]
+    
+    -- add bbox
+    w:rectangle(x,y,width,height)
+    
+    -- add score
+    w:moveto(x,y+fontsize)
+    w:setcolor("red")
+    w:setfont(qt.QFont{serif=true,italic=true,size=fontsize,bold=true})
+    if cl_names then
+      w:show(string.format('%s: %.2f',cl_names[idx[i]],max_score[i]))
+    else
+      w:show(string.format('%d: %.2f',idx[i],max_score[i]))
+    end
+  end
+  w:setcolor("red")
+  w:setlinewidth(2)
+  w:stroke()
+  return w
+end