demo_caffe in c++ code instead of python

Question

demo_caffe in c++ code instead of python

liwei46 opened this issue 6 years ago · comments

liwei46 commented 6 years ago

HI all,

I am trying to run the converted caffe model in c++ code. But cannot get the same results as in demo_caffe.py.

Anyone tried this? Thanks.

Following is my code, modified based on ssd_detect.cpp

// This is a demo code for using a SSD model to do detection.
// The code is modified from examples/cpp_classification/classification.cpp.
// Usage:
// ssd_detect [FLAGS] model_file weights_file list_file
//
// where model_file is the .prototxt file defining the network architecture, and
// weights_file is the .caffemodel file containing the network parameters, and
// list_file contains a list of image files with the format as follows:
// folder/img1.JPEG
// folder/img2.JPEG
// list_file can also contain a list of video files with the format as follows:
// folder/video1.mp4
// folder/video2.mp4
//
#include <caffe/caffe.hpp>
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
// using namespace cv;
#endif // USE_OPENCV
#include
#include
#include
#include
#include
#include
#include
#include
// using namespace std;

using namespace caffe; // NOLINT(build/namespaces)

class Detector {
public:
Detector(const string& model_file,
const string& weights_file);

std::vector<vector > Detect(const cv::Mat& img);

private:
void SetMean(const string& mean_file, const string& mean_value);

void WrapInputLayer(std::vectorcv::Mat* input_channels);

void Preprocess(const cv::Mat& img,
std::vectorcv::Mat* input_channels);

private:
shared_ptr<Net > net_;
cv::Size input_geometry_;
int num_channels_;
// cv::Mat mean_;
};

Detector::Detector(const string& model_file,
const string& weights_file) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif

/* Load the network. */
net_.reset(new Net(model_file, TEST));
net_->CopyTrainedLayersFrom(weights_file);

CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";

Blob* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());

}

std::vector<vector > Detector::Detect(const cv::Mat& img) {
Blob* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();

std::vectorcv::Mat input_channels;
WrapInputLayer(&input_channels);

Preprocess(img, &input_channels);

net_->Forward();

/* Copy the output layer to a std::vector /
Blob result_blob = net_->output_blobs()[0];
const float* result = result_blob->cpu_data();
const int num_det = result_blob->height();
vector<vector > detections;
for (int k = 0; k < num_det; ++k) {
if (result[0] == -1) {
// Skip invalid detection.
result += 7;
continue;
}
vector detection(result, result + 7);
detections.push_back(detection);
result += 7;
}
return detections;
}

/* Wrap the input layer of the network in separate cv::Mat objects

(one per channel). This way we save one memcpy operation and we
don't need to rely on cudaMemcpy2D. The last preprocessing
operation will write the separate channels directly to the input
layer. /
void Detector::WrapInputLayer(std::vectorcv::Mat input_channels) {
Blob* input_layer = net_->input_blobs()[0];

int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}

void Detector::Preprocess(const cv::Mat& img,
std::vectorcv::Mat* input_channels) {
/* Convert the input image to the input image format of the network. */
cv::Mat sample;
if (img.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
else if (img.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
else if (img.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
else if (img.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
else
sample = img;

cv::Mat sample_resized;
if (sample.size() != input_geometry_)
cv::resize(sample, sample_resized, input_geometry_);
else
sample_resized = sample;

// cv::cvtColor(sample_resized, sample_resized, CV_BGR2RGB);

cv::Mat sample_float;
if (num_channels_ == 3)
sample_resized.convertTo(sample_float, CV_32FC3);
else
sample_resized.convertTo(sample_float, CV_32FC1);

// cv::Mat sample_normalized;
// cv::subtract(sample_float, mean_, sample_normalized);
//
// normalize image

float *im_data = (float*)sample_float.data;
for(int i = 0; i < num_channels_  * sample_float.rows * sample_float.cols; i ++){
    im_data[i] -= 127.5;
    im_data[i] /= 127.5;
}

/* This operation will write the separate BGR planes directly to the

input layer of the network because it is wrapped by the cv::Mat
objects in input_channels. */
cv::split(sample_float, *input_channels);

CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
}

DEFINE_string(mean_file, "",
"The mean file used to subtract from the input image.");
DEFINE_string(mean_value, "127.5", // 104,117,123
"If specified, can be one value or can be same as image channels"
" - would subtract from the corresponding channel). Separated by ','."
"Either mean_file or mean_value should be provided, not both.");
DEFINE_string(file_type, "image",
"The file type in the list_file. Currently support image and video.");
DEFINE_string(out_file, "",
"If provided, store the detection results in the out_file.");
DEFINE_double(confidence_threshold, 0.01,
"Only store detections with score higher than the threshold.");

static std::string SplitFilename (const std::string& str)
{
size_t found;
// std::cout << "Splitting: " << str << std::endl;
found=str.find_last_of ( "/\" );
// string rawname = fullname.substr(0, lastindex);
// std::cout << " folder: " << str.substr(0,found) << std::endl;
// std::cout << " file: " << str.substr(found+1) << std::endl;
std::string fname = str.substr ( found+1);
size_t lastindex = fname.find_last_of ( "." );
return fname.substr(0, lastindex);
}

static std::string gen_name(std::string dir, string fname){
return dir + "/" + fname + "_mnet2_cf.jpg";
}

int main(int argc, char** argv)
{
const string& model_file = argv[1];
const string& weights_file = argv[2];
const string& in_file = argv[3];

string fname = SplitFilename(in_file);    
const string& out_dir = argv[4];
string out_file = gen_name(out_dir, fname); 
const float confidence_threshold = FLAGS_confidence_threshold;

// Initialize the network.
Detector detector ( model_file, weights_file );

cv::Mat img = cv::imread ( in_file, -1 );
CHECK ( !img.empty() ) << "Unable to decode image " << in_file;
std::vector<vector<float> > detections = detector.Detect ( img );

/* Print the detection results. */
for ( int i = 0; i < detections.size(); ++i ) {
    const vector<float>& d = detections[i];
    // Detection format: [image_id, label, score, xmin, ymin, xmax, ymax].
    CHECK_EQ ( d.size(), 7 );
    const float score = d[2];
    if ( score >= confidence_threshold ) {
        std::cout << in_file << " ";
        std::cout << static_cast<int> ( d[1] ) << " ";
        std::cout << score << " ";
        std::cout << static_cast<int> ( d[3] * img.cols ) << " ";
        std::cout << static_cast<int> ( d[4] * img.rows ) << " ";
        std::cout << static_cast<int> ( d[5] * img.cols ) << " ";
        std::cout << static_cast<int> ( d[6] * img.rows ) << std::endl;

        int x = int ( d[3] * img.cols );
        int y = int ( d[4] * img.rows );
        int w = int ( d[5] * img.cols ) - x + 1;
        int h = int ( d[6] * img.cols ) - y + 1;

        cv::Rect rt ( x, y, w, h );
        cv::Scalar clr = cv::Scalar ( 0, 255, 0 );
        cv::rectangle ( img, rt, clr );
        cv::putText ( img, std::to_string ( d[1] ), cv::Point ( x-10,y-10 ), cv::FONT_HERSHEY_SIMPLEX, 1, clr, 2 );
    }
}
imwrite (out_file,img);
return 0;

}

liwei46 · Answer 1 · Tue Sep 25 2018 14:09:09 GMT+0800 (China Standard Time)

works now. the code updated.

simmonchan · Answer 2 · Sun Jan 27 2019 00:42:07 GMT+0800 (China Standard Time)

how about the speed?