davisking / dlib

A toolkit for making real world machine learning and data analysis applications in C++

Home Page:http://dlib.net

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Should one pull request an optional feature

VisionEp1 opened this issue · comments

Hi @davisking,
from time to time i write some small functions to adjust or improve dlib to my needs.
For example the extract_image_chips scales linear (obviously) with the number of crops. In my system it can take around 8 ms per crop creation and with 8 crops that adds up per image.
So i build a small "extract_image_chips_parallel" which uses the nice dlib::parallel_for the two main loops.

My question is should i create pull request for trivial functions like this:

template <
	typename image_type1,
	typename image_type2,
	typename interpolation_type
>
void extract_image_chips_parallel(
	const image_type1& img,
	const std::vector<dlib::chip_details>& chip_locations,
	dlib::array<image_type2>& chips,
	const interpolation_type& interp
)
{
	// make sure requires clause is not broken
#ifdef ENABLE_ASSERTS
	for (unsigned long i = 0; i < chip_locations.size(); ++i)
	{
		DLIB_CASSERT(chip_locations[i].size() != 0 &&
			chip_locations[i].rect.is_empty() == false,
			"\t void extract_image_chips()"
			<< "\n\t Invalid inputs were given to this function."
			<< "\n\t chip_locations[" << i << "].size():            " << chip_locations[i].size()
			<< "\n\t chip_locations[" << i << "].rect.is_empty(): " << chip_locations[i].rect.is_empty()
		);
	}
#endif 

	// If nearest-neighbor interpolation is wanted, then don't use an image pyramid.
	constexpr bool image_pyramid_enabled = !std::is_same<
		typename std::remove_const<typename std::remove_reference<decltype(interp)>::type>::type,
		dlib::interpolate_nearest_neighbor
	>::value;

	dlib::pyramid_down<2> pyr;
	long max_depth = 0;
	// If the chip is supposed to be much smaller than the source subwindow then you
	// can't just extract it using bilinear interpolation since at a high enough
	// downsampling amount it would effectively turn into nearest neighbor
	// interpolation.  So we use an image pyramid to make sure the interpolation is
	// fast but also high quality.  The first thing we do is figure out how deep the
	// image pyramid needs to be.
	dlib::rectangle bounding_box;
	dlib::mutex m;
	dlib::parallel_for(0, chip_locations.size(), [&](long i) 
		//for (unsigned long i = 0; i < chip_locations.size(); ++i)
		{
			long depth = 0;
			double grow = 2;
			dlib::drectangle rect = pyr.rect_down(chip_locations[i].rect);
			while (rect.area() > chip_locations[i].size() && image_pyramid_enabled)
			{
				rect = pyr.rect_down(rect);
				++depth;
				// We drop the image size by a factor of 2 each iteration and then assume a
				// border of 2 pixels is needed to avoid any border effects of the crop.
				grow = grow * 2 + 2;
			}
			dlib::drectangle rot_rect;
			const dlib::vector<double, 2> cent = center(chip_locations[i].rect);
			rot_rect += dlib::rotate_point<double>(cent, chip_locations[i].rect.tl_corner(), chip_locations[i].angle);
			rot_rect += dlib::rotate_point<double>(cent, chip_locations[i].rect.tr_corner(), chip_locations[i].angle);
			rot_rect += dlib::rotate_point<double>(cent, chip_locations[i].rect.bl_corner(), chip_locations[i].angle);
			rot_rect += dlib::rotate_point<double>(cent, chip_locations[i].rect.br_corner(), chip_locations[i].angle);
			dlib::auto_mutex lock(m);
			bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img));
			max_depth = std::max(depth, max_depth);
		});

	//std::cout << "max_depth: " << max_depth << std::endl;
	//std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl;

	// now make an image pyramid
	dlib::array<dlib::array2d<typename dlib::image_traits<image_type1>::pixel_type> > levels(max_depth);
	if (levels.size() != 0)
		pyr(sub_image(img, bounding_box), levels[0]);
	for (unsigned long i = 1; i < levels.size(); ++i)
		pyr(levels[i - 1], levels[i]);



	// now pull out the chips
	chips.resize(chip_locations.size());
	//#pragma omp parallel for
	dlib::parallel_for(0, chips.size(), [&](long i)
		//for (unsigned long i = 0; i < chips.size(); ++i)
		{

			// If the chip doesn't have any rotation or scaling then use the basic version
			// of chip extraction that just does a fast copy.
			if (chip_locations[i].angle == 0 &&
				chip_locations[i].rows == chip_locations[i].rect.height() &&
				chip_locations[i].cols == chip_locations[i].rect.width())
			{
				dlib::impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]);
			}
			else
			{
				std::vector<dlib::vector<double, 2> > from, to;
				set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols);

				// figure out which level in the pyramid to use to extract the chip
				int level = -1;
				dlib::drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner());
				while (pyr.rect_down(rect).area() > chip_locations[i].size() && image_pyramid_enabled)
				{
					++level;
					rect = pyr.rect_down(rect);
				}

				// find the appropriate transformation that maps from the chip to the input
				// image

				from.push_back(get_rect(chips[i]).tl_corner());  to.push_back(dlib::rotate_point<double>(center(rect), rect.tl_corner(), chip_locations[i].angle));
				from.push_back(get_rect(chips[i]).tr_corner());  to.push_back(dlib::rotate_point<double>(center(rect), rect.tr_corner(), chip_locations[i].angle));
				from.push_back(get_rect(chips[i]).bl_corner());  to.push_back(dlib::rotate_point<double>(center(rect), rect.bl_corner(), chip_locations[i].angle));
				dlib::point_transform_affine trns = find_affine_transform(from, to);

				// now extract the actual chip
				if (level == -1)
					transform_image(sub_image(img, bounding_box), chips[i], interp, trns);
				else
					transform_image(levels[level], chips[i], interp, trns);
			}
		});
}
 

or will this make dlib to bloated and users should just create and store there own functions somewhere?

I have this issue a couple times a year and obviously its a case by case thing but i am asking with this example so i can get a feeling what would be considered wanted or what not

Yeah IDK. It depends. This kind of thing is special case enough that I think different people will want to do it differently (like I have done it differently) so I wouldn't bother adding it to dlib. Sometimes though, if it's like "yeah that's definitely something a bunch of people will want".

yeah that makes sense, thanks!