Skip to content
Snippets Groups Projects
Commit ed9ed49e authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Improved Scheduler diagnostic

parent f43905b6
No related branches found
No related tags found
No related merge requests found
......@@ -82,6 +82,14 @@ protected:
std::chrono::time_point<std::chrono::high_resolution_clock> end; /** Actual end time of execution */
};
public:
enum class AvailableDataStatus {
Connected,
UpperNodeInputFound,
UpperNodeInputConnected,
ValidTensor,
NotConnected
};
/**
* @struct PriorProducersConsumers
* @brief Manages producer-consumer relationships for nodes.
......@@ -179,7 +187,7 @@ protected:
*/
std::set<std::shared_ptr<Node>> getConsumers(const std::set<std::shared_ptr<Node>>& producers) const;
Elts_t getNbAvailableData(const std::shared_ptr<Node>& node, const IOIndex_t inputIdx) const;
Elts_t getNbAvailableData(const std::shared_ptr<Node>& node, const IOIndex_t inputIdx, AvailableDataStatus& status) const;
/**
* @brief Get the prior producers and consumers for a node.
......@@ -233,4 +241,23 @@ protected:
};
} // namespace Aidge
namespace Aidge {
inline auto format_as(Scheduler::AvailableDataStatus status) {
switch (status) {
case Scheduler::AvailableDataStatus::Connected:
return "The input is connected to a Node.";
case Scheduler::AvailableDataStatus::UpperNodeInputFound:
return "The input is an upper node input, but is not connected in any GraphView.";
case Scheduler::AvailableDataStatus::UpperNodeInputConnected:
return "The input is an upper node input and is connected to a Node.";
case Scheduler::AvailableDataStatus::ValidTensor:
return "The input is not connected in the current GraphView but has a valid tensor assigned.";
case Scheduler::AvailableDataStatus::NotConnected:
return "The input is not connected in the current GraphView.";
default:
return "UNKNOWN STATUS.";
}
}
}
#endif /* AIDGE_CORE_SCHEDULER_SCHEDULER_H_ */
......@@ -82,6 +82,8 @@ std::vector<Aidge::Scheduler::StaticSchedulingElement*> Aidge::Scheduler::genera
// the requiredProducers list.
std::set<std::shared_ptr<Node>> consumers = mGraphView->outputNodes();
std::set<std::shared_ptr<Node>> producers;
std::string level1Diagnostic;
std::string level2Diagnostic;
do {
// 2) From the current consumers list, check if any prior consumer node
......@@ -144,22 +146,37 @@ std::vector<Aidge::Scheduler::StaticSchedulingElement*> Aidge::Scheduler::genera
// there is multiple successive priors for example).
std::set<std::shared_ptr<Node>> runnableConsumers;
Log::debug("Updated list of consumers:");
level1Diagnostic.clear();
level2Diagnostic.clear();
for (const auto& consumer : consumers) {
summarizeConsumerState(consumer, namePtrTable.at(consumer)); // debug print
bool isRunnable = true;
for (IOIndex_t inputIdx = 0; inputIdx < consumer->nbInputs(); ++inputIdx) {
AIDGE_LOG_CONTEXT("Consumer node {} input #{}", namePtrTable.at(consumer), inputIdx);
AvailableDataStatus status;
if ((consumer->getOperator()->getNbConsumedData(inputIdx) + consumer->getOperator()->getNbRequiredData(inputIdx)) >
getNbAvailableData(consumer, inputIdx)) {
getNbAvailableData(consumer, inputIdx, status)) {
Log::debug(" not runnable: C{} + R{} > P{} for input #{}",
consumer->getOperator()->getNbConsumedData(inputIdx),
consumer->getOperator()->getNbRequiredData(inputIdx),
getNbAvailableData(consumer, inputIdx), inputIdx);
getNbAvailableData(consumer, inputIdx, status), inputIdx);
// not enough data to run
isRunnable = false;
if (status == Scheduler::AvailableDataStatus::UpperNodeInputFound
|| status == Scheduler::AvailableDataStatus::NotConnected)
{
level1Diagnostic += fmt::format("- No data available for node {} input #{}. {}\n", namePtrTable.at(consumer), inputIdx, fmt::styled(status, fmt::fg(fmt::color::red)));
}
else {
level2Diagnostic += fmt::format("- No data available for node {} input #{}. {}\n", namePtrTable.at(consumer), inputIdx, fmt::styled(status, fmt::fg(fmt::color::green)));
level2Diagnostic += fmt::format(" ↳ Available data is {}, but {} was already consummed and {} more is required.\n",
getNbAvailableData(consumer, inputIdx, status),
consumer->getOperator()->getNbConsumedData(inputIdx),
consumer->getOperator()->getNbRequiredData(inputIdx));
}
break;
}
}
......@@ -204,12 +221,13 @@ std::vector<Aidge::Scheduler::StaticSchedulingElement*> Aidge::Scheduler::genera
for (IOIndex_t inputIdx = 0; inputIdx < consumer->nbInputs(); ++inputIdx) {
if (consumer->inputCategory(inputIdx) == InputCategory::Data) {
AIDGE_LOG_CONTEXT("Consumer node {} input #{}", namePtrTable.at(consumer), inputIdx);
AvailableDataStatus status;
if (consumer->getOperator()->getNbConsumedData(inputIdx) <
getNbAvailableData(consumer, inputIdx)) {
getNbAvailableData(consumer, inputIdx, status)) {
Log::debug(" still consumer: C{} < P{} for input #{}",
consumer->getOperator()->getNbConsumedData(inputIdx),
getNbAvailableData(consumer, inputIdx), inputIdx);
getNbAvailableData(consumer, inputIdx, status), inputIdx);
// there is still data to consume
isStillConsumer = true;
......@@ -293,7 +311,15 @@ std::vector<Aidge::Scheduler::StaticSchedulingElement*> Aidge::Scheduler::genera
std::back_inserter(consumersName),
[&namePtrTable](auto val){ return namePtrTable.at(val); });
Log::warn("Remaining consumers: {}. Possible dead-lock.", consumersName);
Log::warn("Remaining consumers: {}.", consumersName);
Log::info("Reasons:");
if (!level1Diagnostic.empty()) {
Log::info(level1Diagnostic);
}
else {
Log::info(level2Diagnostic);
}
}
return schedule;
......@@ -650,23 +676,27 @@ std::set<std::shared_ptr<Aidge::Node>> Aidge::Scheduler::getConsumers(
return consumers;
}
Aidge::Elts_t Aidge::Scheduler::getNbAvailableData(const std::shared_ptr<Node>& node, const IOIndex_t inputIdx) const {
Aidge::Elts_t Aidge::Scheduler::getNbAvailableData(const std::shared_ptr<Node>& node, const IOIndex_t inputIdx, AvailableDataStatus& status) const {
const auto parent = node->inputs()[inputIdx];
if (parent.first) {
// Parent is connected, everything if fine!
status = AvailableDataStatus::Connected;
return parent.first->getOperator()->getNbProducedData(parent.second);
}
else if (std::shared_ptr<Node> upperNode = mUpperNode.lock()) {
// We are inside an upper operator (for instance a MetaOperator)
// We need to connect the "local" producer-consumer model to the upper
// one, by mapping local node inputs to the upper node inputs.
// We are inside an upper operator (for instance a MetaOperator).
// Check if the node input is also an upper node input...
IOIndex_t upperInputIdx = 0;
for (const auto& input : mGraphView->getOrderedInputs()) {
if (input.first == node && input.second == inputIdx) {
// Current node is an input
// Current node is an input!
// We need to connect the "local" producer-consumer model to the upper
// one, by mapping local node inputs to the upper node inputs.
status = AvailableDataStatus::UpperNodeInputFound;
const auto upperInput = upperNode->inputs()[upperInputIdx];
if (upperInput.first) {
status = AvailableDataStatus::UpperNodeInputConnected;
return upperInput.first->getOperator()->getNbProducedData(upperInput.second);
}
}
......@@ -678,6 +708,7 @@ Aidge::Elts_t Aidge::Scheduler::getNbAvailableData(const std::shared_ptr<Node>&
// - There is no data, it is assumed to be an optional input
// - A valid tensor exists:
if (node->getOperator()->getRawInput(inputIdx)) {
status = AvailableDataStatus::ValidTensor;
// => This means data was fed manually to the input, without a Producer
// In this case, we assume a single-use data (unlike a Producer, which
// keep producing the data each time it is needed).
......@@ -685,6 +716,7 @@ Aidge::Elts_t Aidge::Scheduler::getNbAvailableData(const std::shared_ptr<Node>&
return Elts_t::DataElts(std::static_pointer_cast<Tensor>(node->getOperator()->getRawInput(inputIdx))->size());
}
status = AvailableDataStatus::NotConnected;
return Elts_t::NoneElts();
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment