koreader / koreader-base

I was thinking about writing a module that would allow users to double tap a panel in manga to zoom to a selected panel(basically cut out part of the image and display it).
Base image:

Cut out panels:

To get this result I used opencv library and a short python script https://pastebin.com/hKKGgzNw. My question is whether it's reasonable to include OpenCV, and if so, how would I go around it?

OpenCV seems like it might be rather excessively large for our purposes, even setting aside some potential doubts about its usability on a poor little Kindle or Kobo.

But perhaps more important, Leptonica should do roughly the same things faster and we already have it. In fact I implicitly mentioned it in koreader/koreader#6408 (comment) although I didn't speak of some of the underlying technology. In fact the current zoom in page flipping mode already functions mostly the way you sketched. ;-)

Some examples:

koreader-base/ffi/koptcontext.lua

Lines 232 to 258 in 0e64066

    
           function KOPTContext_mt.__index:findPageBlocks() 
        
               if self.src.data then 
        
                   local pixs = k2pdfopt.bitmap2pix(self.src, 
        
                       0, 0, self.src.width, self.src.height) 
        
                   local pixr = leptonica.pixThresholdToBinary(pixs, 128) 
        
                   leptonica.pixDestroy(ffi.new('PIX *[1]', pixs)) 
        
                   local pixtb = ffi.new("PIX *[1]") 
        
                   local status = leptonica.pixGetRegionsBinary(pixr, nil, nil, pixtb, nil) 
        
                   if status == 0 then 
        
                       self.nboxa = leptonica.pixSplitIntoBoxa(pixtb[0], 5, 10, 20, 80, 10, 0) 
        
                       for i = 0, leptonica.boxaGetCount(self.nboxa) - 1 do 
        
                           local box = leptonica.boxaGetBox(self.nboxa, i, C.L_CLONE) 
        
                           leptonica.boxAdjustSides(box, box, -1, 0, -1, 0) 
        
                       end 
        
                       self.rboxa = leptonica.boxaCombineOverlaps(self.nboxa) 
        
                       self.page_width = leptonica.pixGetWidth(pixr) 
        
                       self.page_height = leptonica.pixGetHeight(pixr) 
        
                       -- uncomment this to show text blocks in situ 
        
                       --leptonica.pixWritePng("textblock-mask.png", pixtb[0], 0.0) 
        
                       leptonica.pixDestroy(ffi.new('PIX *[1]', pixtb)) 
        
                   end 
        
                   leptonica.pixDestroy(ffi.new('PIX *[1]', pixr)) 
        
               end 
        
           end

koreader-base/ffi/koptcontext.lua

Lines 260 to 335 in 0e64066

    
           --[[ 
        
           -- get page block in location x, y both of which in range [0, 1] relative to page 
        
           -- width and height respectively 
        
           --]] 
        
           function KOPTContext_mt.__index:getPageBlock(x_rel, y_rel) 
        
               local block = nil 
        
               if self.src.data and self.nboxa ~= nil and self.rboxa ~= nil then 
        
                   local w, h = self:getPageDim() 
        
                   local tbox = leptonica.boxCreate(0, y_rel * h, w, 2) 
        
                   local boxa = leptonica.boxaClipToBox(self.nboxa, tbox) 
        
                   leptonica.boxDestroy(ffi.new('BOX *[1]', tbox)) 
        
                   for i = 0, leptonica.boxaGetCount(boxa) - 1 do 
        
                       local box = leptonica.boxaGetBox(boxa, i, C.L_CLONE) 
        
                       leptonica.boxAdjustSides(box, box, -1, 0, -1, 0) 
        
                   end 
        
                   local boxatb = leptonica.boxaCombineOverlaps(boxa) 
        
                   leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxa)) 
        
                   local clipped_box, unclipped_box 
        
                   for i = 0, leptonica.boxaGetCount(boxatb) - 1 do 
        
                       local box = leptonica.boxaGetBox(boxatb, i, C.L_CLONE) 
        
                       if box.x / w <= x_rel and (box.x + box.w) / w >= x_rel then 
        
                           clipped_box = leptonica.boxCreate(box.x, 0, box.w, h) 
        
                       end 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', box)) 
        
                       if clipped_box ~= nil then break end 
        
                   end 
        
                   for i = 0, leptonica.boxaGetCount(self.rboxa) - 1 do 
        
                       local box = leptonica.boxaGetBox(self.rboxa, i, C.L_CLONE) 
        
                       if box.x / w <= x_rel and (box.x + box.w) / w >= x_rel 
        
                           and box.y / h <= y_rel and (box.y + box.h) / h >= y_rel then 
        
                           unclipped_box = leptonica.boxCreate(box.x, box.y, box.w, box.h) 
        
                       end 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', box)) 
        
                       if unclipped_box ~= nil then break end 
        
                   end 
        
                   if clipped_box ~= nil and unclipped_box ~= nil then 
        
                       local box = leptonica.boxOverlapRegion(clipped_box, unclipped_box) 
        
                       if box ~= nil then 
        
                           block = { 
        
                               x0 = box.x / w, y0 = box.y / h, 
        
                               x1 = (box.x + box.w) / w, 
        
                               y1 = (box.y + box.h) / h, 
        
                           } 
        
                       end 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', box)) 
        
                   end 
        
                   if clipped_box ~= nil then 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', clipped_box)) 
        
                   end 
        
                   if unclipped_box ~= nil then 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', unclipped_box)) 
        
                   end 
        
                   -- uncomment this to show text blocks in situ 
        
                   --[[ 
        
                   if block then 
        
                       local w, h = self.src.width, self.src.height 
        
                       local box = leptonica.boxCreate(block.x0*w, block.y0*h, 
        
                           (block.x1-block.x0)*w, (block.y1-block.y0)*h) 
        
                       local boxa = leptonica.boxaCreate(1) 
        
                       leptonica.boxaAddBox(boxa, box, C.L_COPY) 
        
                       local pixs = k2pdfopt.bitmap2pix(self.src, 
        
                           0, 0, self.src.width, self.src.height) 
        
                       local pixc = leptonica.pixDrawBoxaRandom(pixs, boxa, 8) 
        
                       leptonica.pixWritePng("textblock.png", pixc, 0.0) 
        
                       leptonica.pixDestroy(ffi.new('PIX *[1]', pixc)) 
        
                       leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxa)) 
        
                       leptonica.boxDestroy(ffi.new('BOX *[1]', box)) 
        
                   end 
        
                   --]] 
        
                   leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxatb)) 
        
               end 
        
               return block 
        
           end

Clearly the fact that it's hidden in page flipping mode means almost no one knows it exists. So there are multiple issues.

The zoom to box feature is great as a starting point and it's neat most of the time but it doesn't always work out. You want much freer zoom.
Some details on things we might want in free zoom here: koreader/koreader#5524
Ideally all this would somehow be available in the main reader mode easily without having to trigger a mostly hidden special mode.
I'm not sure page flipping mode still has much purpose with the greatly improved skim widget.

Basically what @Frenzie said ;).

(OpenCV is humongous, and I don't think it has any arm-specific codepaths).

Thanks. Just like I thought. I'll take a look at leptonica, but to be fair, there doesn't seem to be a lot of resources to learn from, so I'll probably give it up.

	function KOPTContext_mt.__index:findPageBlocks()
	if self.src.data then
	local pixs = k2pdfopt.bitmap2pix(self.src,
	0, 0, self.src.width, self.src.height)
	local pixr = leptonica.pixThresholdToBinary(pixs, 128)
	leptonica.pixDestroy(ffi.new('PIX *[1]', pixs))

	local pixtb = ffi.new("PIX *[1]")
	local status = leptonica.pixGetRegionsBinary(pixr, nil, nil, pixtb, nil)
	if status == 0 then
	self.nboxa = leptonica.pixSplitIntoBoxa(pixtb[0], 5, 10, 20, 80, 10, 0)
	for i = 0, leptonica.boxaGetCount(self.nboxa) - 1 do
	local box = leptonica.boxaGetBox(self.nboxa, i, C.L_CLONE)
	leptonica.boxAdjustSides(box, box, -1, 0, -1, 0)
	end
	self.rboxa = leptonica.boxaCombineOverlaps(self.nboxa)
	self.page_width = leptonica.pixGetWidth(pixr)
	self.page_height = leptonica.pixGetHeight(pixr)

	-- uncomment this to show text blocks in situ
	--leptonica.pixWritePng("textblock-mask.png", pixtb[0], 0.0)

	leptonica.pixDestroy(ffi.new('PIX *[1]', pixtb))
	end
	leptonica.pixDestroy(ffi.new('PIX *[1]', pixr))
	end
	end

	--[[
	-- get page block in location x, y both of which in range [0, 1] relative to page
	-- width and height respectively
	--]]
	function KOPTContext_mt.__index:getPageBlock(x_rel, y_rel)
	local block = nil
	if self.src.data and self.nboxa ~= nil and self.rboxa ~= nil then
	local w, h = self:getPageDim()
	local tbox = leptonica.boxCreate(0, y_rel * h, w, 2)
	local boxa = leptonica.boxaClipToBox(self.nboxa, tbox)
	leptonica.boxDestroy(ffi.new('BOX *[1]', tbox))
	for i = 0, leptonica.boxaGetCount(boxa) - 1 do
	local box = leptonica.boxaGetBox(boxa, i, C.L_CLONE)
	leptonica.boxAdjustSides(box, box, -1, 0, -1, 0)
	end
	local boxatb = leptonica.boxaCombineOverlaps(boxa)
	leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxa))
	local clipped_box, unclipped_box
	for i = 0, leptonica.boxaGetCount(boxatb) - 1 do
	local box = leptonica.boxaGetBox(boxatb, i, C.L_CLONE)
	if box.x / w <= x_rel and (box.x + box.w) / w >= x_rel then
	clipped_box = leptonica.boxCreate(box.x, 0, box.w, h)
	end
	leptonica.boxDestroy(ffi.new('BOX *[1]', box))
	if clipped_box ~= nil then break end
	end
	for i = 0, leptonica.boxaGetCount(self.rboxa) - 1 do
	local box = leptonica.boxaGetBox(self.rboxa, i, C.L_CLONE)
	if box.x / w <= x_rel and (box.x + box.w) / w >= x_rel
	and box.y / h <= y_rel and (box.y + box.h) / h >= y_rel then
	unclipped_box = leptonica.boxCreate(box.x, box.y, box.w, box.h)
	end
	leptonica.boxDestroy(ffi.new('BOX *[1]', box))
	if unclipped_box ~= nil then break end
	end
	if clipped_box ~= nil and unclipped_box ~= nil then
	local box = leptonica.boxOverlapRegion(clipped_box, unclipped_box)
	if box ~= nil then
	block = {
	x0 = box.x / w, y0 = box.y / h,
	x1 = (box.x + box.w) / w,
	y1 = (box.y + box.h) / h,
	}
	end
	leptonica.boxDestroy(ffi.new('BOX *[1]', box))
	end
	if clipped_box ~= nil then
	leptonica.boxDestroy(ffi.new('BOX *[1]', clipped_box))
	end
	if unclipped_box ~= nil then
	leptonica.boxDestroy(ffi.new('BOX *[1]', unclipped_box))
	end

	-- uncomment this to show text blocks in situ
	--[[
	if block then
	local w, h = self.src.width, self.src.height
	local box = leptonica.boxCreate(block.x0w, block.y0h,
	(block.x1-block.x0)w, (block.y1-block.y0)h)
	local boxa = leptonica.boxaCreate(1)
	leptonica.boxaAddBox(boxa, box, C.L_COPY)
	local pixs = k2pdfopt.bitmap2pix(self.src,
	0, 0, self.src.width, self.src.height)
	local pixc = leptonica.pixDrawBoxaRandom(pixs, boxa, 8)
	leptonica.pixWritePng("textblock.png", pixc, 0.0)
	leptonica.pixDestroy(ffi.new('PIX *[1]', pixc))
	leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxa))
	leptonica.boxDestroy(ffi.new('BOX *[1]', box))
	end
	--]]

	leptonica.boxaDestroy(ffi.new('BOXA *[1]', boxatb))
	end

	return block
	end

Is it doable/reasonable to add opencv?