nesting thread pool not work
wyfSunflower opened this issue · comments
Describe the new feature
I want to parallel a series code ,it has two for loop as following, where rows is a vector<vector<size_t>>.
// tmr.start();
for (i = 0; i < num_rows; i++)
{
double h = 0.0;
for (ci = rows[i].begin(); ci != rows[i].end(); ci++)
{
double h_v = (num_outgoing[*ci])
? 1.0 / static_cast<double>(num_outgoing[*ci])
: 0.0;
if (num_iterations == 0 && trace)
{
cout << "h[" << i << "," << *ci << "]=" << h_v << endl;
}
h += h_v * old_pr[*ci];
}
h *= alpha;
pr[i] = h + one_Av + one_Iv;
diff += fabs(pr[i] - old_pr[i]);
}
Code example
I create two independent BS::thread_pool_light variable, here is my parallel code:
auto loopDiff = [&](const size_t start, const size_t end)
{
for (size_t k = start; k < end; k++)
{
for (size_t j = 0; j < vectors.at(k).size(); j++)
{
size_t i = vector_size * k + j;
if (i >= num_rows)
break;
double h = 0.0;
size_t nb_cnt = rows.at(i).size();
size_t vector_size_nb = nb_cnt / nproc + 1;
vector<double> vectorH(nb_cnt);
auto loopH = [&](const size_t startH, const size_t endH)
{
for (size_t iH = startH; iH < endH; iH++)
{
for (size_t jH = 0; jH < vector_size_nb; jH++)
{
size_t idxH = vector_size_nb * iH + jH;
if (idxH >= nb_cnt)
break;
auto outEdge = rows.at(i).at(idxH);
double h_v = (num_outgoing.at(outEdge))
? 1.0 / static_cast<double>(num_outgoing.at(outEdge))
: 0.0;
if (num_iterations == 0 && trace)
cout << "h[" << i << "," << outEdge << "]=" << h_v << endl;
vectorH.at(iH) += h_v * old_pr.at(outEdge);
}
}
};
pool_neighbor.push_loop(nproc, loopH);
pool_neighbor.wait_for_tasks();
for (auto hh : vectorH)
h += hh;
h *= alpha;
pr[i] = h + one_Av + one_Iv;
vectorDiff.at(i) = fabs(pr[i] - old_pr[i]);
}
}
};
pool.push_loop(nproc, loopDiff);
pool.wait_for_tasks();
for (auto d : vectorDiff)
diff += d;
Additional information
It does not have parallel acceleration, could you please give me a little advice, thank you .
It looks like you are parallelizing a loop where each block in the loop then itself parallelizes and waits for another loop using a separate thread pool. Instead you should only have one thread pool, and parallelize only one loop at a time. If you have any further questions, please post on StackOverflow or StackExchange Code Review. Good luck!