33 template <
typename Archiver>
34 auto save(Archiver& ar)
const {
35 return ar(name, type, beg, end);
38 template <
typename Archiver>
39 auto load(Archiver& ar) {
40 return ar(name, type, beg, end);
47 ) : name {n}, type {t}, beg {b}, end {e} {
63 std::vector<std::vector<std::vector<Segment>>> segments;
67 Timeline(
const Timeline& rhs) =
delete;
68 Timeline(Timeline&& rhs) =
default;
70 Timeline& operator = (
const Timeline& rhs) =
delete;
71 Timeline& operator = (Timeline&& rhs) =
default;
73 template <
typename Archiver>
74 auto save(Archiver& ar)
const {
75 return ar(uid, origin, segments);
78 template <
typename Archiver>
79 auto load(Archiver& ar) {
80 return ar(uid, origin, segments);
89 std::vector<Timeline> timelines;
91 ProfileData() =
default;
93 ProfileData(
const ProfileData& rhs) =
delete;
94 ProfileData(ProfileData&& rhs) =
default;
96 ProfileData& operator = (
const ProfileData& rhs) =
delete;
97 ProfileData& operator = (ProfileData&&) =
default;
99 template <
typename Archiver>
100 auto save(Archiver& ar)
const {
101 return ar(timelines);
104 template <
typename Archiver>
105 auto load(Archiver& ar) {
106 return ar(timelines);
182 virtual void set_up(
size_t num_workers) = 0;
231 friend class Executor;
242 const std::string& n,
251 std::vector<std::vector<Segment>> segments;
252 std::vector<std::stack<observer_stamp_t>> stacks;
261 void dump(std::ostream& ostream)
const;
266 inline std::string
dump()
const;
280 inline void set_up(
size_t num_workers)
override final;
288inline ChromeObserver::Segment::Segment(
291 name {n}, beg {b}, end {e} {
295inline void ChromeObserver::set_up(
size_t num_workers) {
296 _timeline.segments.resize(num_workers);
297 _timeline.stacks.resize(num_workers);
299 for(
size_t w=0; w<num_workers; ++w) {
300 _timeline.segments[w].reserve(32);
303 _timeline.origin = observer_stamp_t::clock::now();
308 _timeline.stacks[wv.id()].push(observer_stamp_t::clock::now());
316 assert(!_timeline.stacks[w].empty());
318 auto beg = _timeline.stacks[w].top();
319 _timeline.stacks[w].pop();
321 _timeline.segments[w].emplace_back(
322 tv.name(), beg, observer_stamp_t::clock::now()
328 for(
size_t w=0; w<_timeline.segments.size(); ++w) {
329 _timeline.segments[w].clear();
330 while(!_timeline.stacks[w].empty()) {
331 _timeline.stacks[w].pop();
339 using namespace std::chrono;
343 for(first = 0; first<_timeline.segments.size(); ++first) {
344 if(_timeline.segments[first].size() > 0) {
351 for(
size_t w=first; w<_timeline.segments.size(); w++) {
353 if(w != first && _timeline.segments[w].size() > 0) {
357 for(
size_t i=0; i<_timeline.segments[w].size(); i++) {
359 os <<
'{'<<
"\"cat\":\"ChromeObserver\",";
363 if(_timeline.segments[w][i].name.empty()) {
367 os << _timeline.segments[w][i].name;
372 os <<
"\"ph\":\"X\","
374 <<
"\"tid\":" << w <<
','
375 <<
"\"ts\":" << duration_cast<microseconds>(
376 _timeline.segments[w][i].beg - _timeline.origin
378 <<
"\"dur\":" << duration_cast<microseconds>(
379 _timeline.segments[w][i].end - _timeline.segments[w][i].beg
382 if(i != _timeline.segments[w].size() - 1) {
395 std::ostringstream oss;
402 return std::accumulate(
403 _timeline.segments.begin(), _timeline.segments.end(),
size_t{0},
404 [](
size_t sum,
const auto& exe){
405 return sum + exe.size();
443 friend class Executor;
444 friend class TFProfManager;
451 size_t total_span {0};
455 float avg_span()
const {
return total_span * 1.0f / count; }
461 struct WorkerSummary {
466 size_t total_span {0};
470 std::array<TaskSummary, TASK_TYPES.size()> tsum;
472 float avg_span()
const {
return total_span * 1.0f / count; }
480 std::array<TaskSummary, TASK_TYPES.size()> tsum;
481 std::vector<WorkerSummary> wsum;
483 void dump_tsum(std::ostream&)
const;
484 void dump_wsum(std::ostream&)
const;
485 void dump(std::ostream&)
const;
494 void dump(std::ostream& ostream)
const;
499 std::string
dump()
const;
504 void summary(std::ostream& ostream)
const;
530 std::vector<std::stack<observer_stamp_t>> _stacks;
532 inline void set_up(
size_t num_workers)
override final;
539inline void TFProfObserver::Summary::dump_tsum(std::ostream& os)
const {
542 size_t type_w{10}, count_w{5}, time_w{9}, avg_w{8}, min_w{8}, max_w{8};
544 std::for_each(tsum.begin(), tsum.end(), [&](
const auto& i){
545 if(i.count == 0) return;
546 count_w = (std::max)(count_w, std::to_string(i.count).size());
549 std::for_each(tsum.begin(), tsum.end(), [&](
const auto& i){
550 if(i.count == 0) return;
551 time_w = (std::max)(time_w, std::to_string(i.total_span).size());
554 std::for_each(tsum.begin(), tsum.end(), [&](
const auto& i){
555 if(i.count == 0) return;
556 avg_w = (std::max)(time_w, std::to_string(i.avg_span()).size());
559 std::for_each(tsum.begin(), tsum.end(), [&](
const auto& i){
560 if(i.count == 0) return;
561 min_w = (std::max)(min_w, std::to_string(i.min_span).size());
564 std::for_each(tsum.begin(), tsum.end(), [&](
const auto& i){
565 if(i.count == 0) return;
566 max_w = (std::max)(max_w, std::to_string(i.max_span).size());
569 os << std::setw(type_w) <<
"-Task-"
570 << std::setw(count_w+2) <<
"Count"
571 << std::setw(time_w+2) <<
"Time (us)"
572 << std::setw(avg_w+2) <<
"Avg (us)"
573 << std::setw(min_w+2) <<
"Min (us)"
574 << std::setw(max_w+2) <<
"Max (us)"
577 for(
size_t i=0; i<TASK_TYPES.size(); i++) {
578 if(tsum[i].count == 0) {
581 os << std::setw(type_w) <<
to_string(TASK_TYPES[i])
582 << std::setw(count_w+2) << tsum[i].count
583 << std::setw(time_w+2) << tsum[i].total_span
584 << std::setw(avg_w+2) << std::to_string(tsum[i].avg_span())
585 << std::setw(min_w+2) << tsum[i].min_span
586 << std::setw(max_w+2) << tsum[i].max_span
592inline void TFProfObserver::Summary::dump_wsum(std::ostream& os)
const {
595 size_t w_w{10}, t_w{10}, l_w{5}, c_w{5}, d_w{9}, avg_w{8}, min_w{8}, max_w{8};
597 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
598 if(i.count == 0) return;
599 l_w = (std::max)(l_w, std::to_string(i.level).size());
602 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
603 if(i.count == 0) return;
604 c_w = (std::max)(c_w, std::to_string(i.count).size());
607 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
608 if(i.count == 0) return;
609 d_w = (std::max)(d_w, std::to_string(i.total_span).size());
612 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
613 if(i.count == 0) return;
614 avg_w = (std::max)(avg_w, std::to_string(i.avg_span()).size());
617 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
618 if(i.count == 0) return;
619 min_w = (std::max)(min_w, std::to_string(i.min_span).size());
622 std::for_each(wsum.begin(), wsum.end(), [&](
const auto& i){
623 if(i.count == 0) return;
624 max_w = (std::max)(max_w, std::to_string(i.max_span).size());
627 os << std::setw(w_w) <<
"-Worker-"
628 << std::setw(l_w+2) <<
"Level"
629 << std::setw(t_w) <<
"Task"
630 << std::setw(c_w+2) <<
"Count"
631 << std::setw(d_w+2) <<
"Time (us)"
632 << std::setw(avg_w+2) <<
"Avg (us)"
633 << std::setw(min_w+2) <<
"Min (us)"
634 << std::setw(max_w+2) <<
"Max (us)"
637 for(
const auto& ws : wsum) {
643 os << std::setw(w_w) << ws.id
644 << std::setw(l_w+2) << ws.level;
647 for(
size_t i=0; i<TASK_TYPES.size(); i++) {
649 if(ws.tsum[i].count == 0) {
653 os << (first ? std::setw(t_w) : std::setw(w_w + l_w + 2 + t_w));
657 << std::setw(c_w+2) << ws.tsum[i].count
658 << std::setw(d_w+2) << ws.tsum[i].total_span
659 << std::setw(avg_w+2) << std::to_string(ws.tsum[i].avg_span())
660 << std::setw(min_w+2) << ws.tsum[i].min_span
661 << std::setw(max_w+2) << ws.tsum[i].max_span
666 os << std::setw(w_w + l_w + t_w + c_w + 4) << ws.count
667 << std::setw(d_w+2) << ws.total_span
668 << std::setw(avg_w+2) << std::to_string(ws.avg_span())
669 << std::setw(min_w+2) << ws.min_span
670 << std::setw(max_w+2) << ws.max_span
680inline void TFProfObserver::Summary::dump(std::ostream& os)
const {
687inline void TFProfObserver::set_up(
size_t num_workers) {
689 _timeline.origin = observer_stamp_t::clock::now();
695inline void TFProfObserver::on_entry(WorkerView wv, TaskView) {
696 _stacks[wv.id()].push(observer_stamp_t::clock::now());
700inline void TFProfObserver::on_exit(WorkerView wv, TaskView tv) {
704 assert(!_stacks[w].empty());
706 if(_stacks[w].size() > _timeline.segments[w].size()) {
707 _timeline.segments[w].resize(_stacks[w].size());
710 auto beg = _stacks[w].top();
713 _timeline.segments[w][_stacks[w].size()].emplace_back(
714 tv.name(), tv.type(), beg, observer_stamp_t::clock::now()
720 for(
size_t w=0; w<_timeline.segments.size(); ++w) {
721 for(
size_t l=0; l<_timeline.segments[w].size(); ++l) {
722 _timeline.segments[w][l].clear();
724 while(!_stacks[w].empty()) {
733 using namespace std::chrono;
737 for(first = 0; first<_timeline.segments.size(); ++first) {
738 if(_timeline.segments[first].size() > 0) {
744 if(first == _timeline.segments.size()) {
749 os <<
"{\"executor\":\"" << _timeline.uid <<
"\",\"data\":[";
753 for(
size_t w=first; w<_timeline.segments.size(); w++) {
754 for(
size_t l=0; l<_timeline.segments[w].size(); l++) {
756 if(_timeline.segments[w][l].empty()) {
767 os <<
"{\"worker\":" << w <<
",\"level\":" << l <<
",\"data\":[";
768 for(
size_t i=0; i<_timeline.segments[w][l].size(); ++i) {
770 const auto& s = _timeline.segments[w][l][i];
776 << duration_cast<microseconds>(s.beg - _timeline.origin).count()
778 << duration_cast<microseconds>(s.end - _timeline.origin).count()
792 os <<
"\"type\":\"" <<
to_string(s.type) <<
"\"";
805 std::ostringstream oss;
813 using namespace std::chrono;
816 std::optional<observer_stamp_t> view_beg, view_end;
820 for(first = 0; first<_timeline.segments.size(); ++first) {
821 if(_timeline.segments[first].size() > 0) {
827 if(first == _timeline.segments.size()) {
831 for(
size_t w=first; w<_timeline.segments.size(); w++) {
832 for(
size_t l=0; l<_timeline.segments[w].size(); l++) {
834 if(_timeline.segments[w][l].empty()) {
842 ws.count = _timeline.segments[w][l].size();
845 for(
size_t i=0; i<_timeline.segments[w][l].size(); ++i) {
848 auto& s = _timeline.segments[w][l][i];
849 view_beg = view_beg ? (std::min)(*view_beg, s.beg) : s.beg;
850 view_end = view_end ? (std::max)(*view_end, s.end) : s.end;
853 size_t t = duration_cast<microseconds>(s.end - s.beg).count();
855 auto& x =
summary.tsum[
static_cast<int>(s.type)];
858 x.min_span = (x.count == 1) ? t : (std::min)(t, x.min_span);
859 x.max_span = (x.count == 1) ? t : (std::max)(t, x.max_span);
863 ws.min_span = (i == 0) ? t : (std::min)(t, ws.min_span);
864 ws.max_span = (i == 0) ? t : (std::max)(t, ws.max_span);
866 auto&y = ws.tsum[
static_cast<int>(s.type)];
869 y.min_span = (y.count == 1) ? t : (std::min)(t, y.min_span);
870 y.max_span = (y.count == 1) ? t : (std::max)(t, y.max_span);
889 if(view_beg && view_end) {
890 view = duration_cast<microseconds>(*view_end - *view_beg).count();
893 os <<
"==Observer " << _timeline.uid <<
": "
903 std::ostringstream oss;
911 for(
size_t w=0; w<_timeline.segments.size(); ++w) {
912 for(
size_t l=0; l<_timeline.segments[w].size(); ++l) {
913 s += _timeline.segments[w][l].size();
922 for(
size_t i=0; i<_timeline.segments.size(); ++i) {
923 w += (!_timeline.segments[i].empty());
944 TFProfManager(
const TFProfManager&) =
delete;
945 TFProfManager& operator=(
const TFProfManager&) =
delete;
947 static TFProfManager& get();
949 void dump(std::ostream& ostream)
const;
953 const std::string _fpath;
956 std::vector<std::shared_ptr<TFProfObserver>> _observers;
960 void _manage(std::shared_ptr<TFProfObserver> observer);
964inline TFProfManager::TFProfManager() :
965 _fpath {
get_env(TF_ENABLE_PROFILER)} {
970inline void TFProfManager::_manage(std::shared_ptr<TFProfObserver> observer) {
971 std::lock_guard lock(_mutex);
972 _observers.push_back(std::move(observer));
976inline void TFProfManager::dump(std::ostream& os)
const {
977 for(
size_t i=0; i<_observers.size(); ++i) {
979 _observers[i]->dump(os);
984inline TFProfManager::~TFProfManager() {
985 std::ofstream ofs(_fpath);
988 if(_fpath.rfind(
".tfp") != std::string::npos) {
990 data.timelines.reserve(_observers.size());
991 for(
size_t i=0; i<_observers.size(); ++i) {
992 data.timelines.push_back(std::move(_observers[i]->_timeline));
994 Serializer<std::ofstream> serializer(ofs);
1000 for(
size_t i=0; i<_observers.size(); ++i) {
1002 _observers[i]->dump(ofs);
1009 std::ostringstream oss;
1010 for(
size_t i=0; i<_observers.size(); ++i) {
1011 _observers[i]->summary(oss);
1013 fprintf(stderr,
"%s", oss.str().c_str());
1018inline TFProfManager& TFProfManager::get() {
1019 static TFProfManager mgr;
1043 case ObserverType::TFPROF:
return "tfprof";
1044 case ObserverType::CHROME:
return "chrome";
1045 default:
return "undefined";
class to create an observer based on Chrome tracing format
Definition observer.hpp:229
void clear()
clears the timeline data
Definition observer.hpp:327
std::string dump() const
dumps the timelines into a Chrome Tracing format
Definition observer.hpp:394
size_t num_tasks() const
queries the number of tasks observed
Definition observer.hpp:401
class to create an executor
Definition executor.hpp:62
class to derive an executor observer
Definition observer.hpp:169
virtual void set_up(size_t num_workers)=0
constructor-like method to call when the executor observer is fully created
virtual void on_entry(WorkerView wv, TaskView task_view)=0
method to call before a worker thread executes a closure
virtual void on_exit(WorkerView wv, TaskView task_view)=0
method to call after a worker thread executed a closure
virtual ~ObserverInterface()=default
virtual destructor
class to create an observer based on the built-in taskflow profiler format
Definition observer.hpp:441
std::string dump() const
dumps the timelines into a JSON string
Definition observer.hpp:804
void clear()
clears the timeline data
Definition observer.hpp:719
void summary(std::ostream &ostream) const
shows the summary report through an output stream
Definition observer.hpp:811
size_t num_workers() const
queries the number of observed workers
Definition observer.hpp:920
std::string summary() const
returns the summary report in a string
Definition observer.hpp:902
size_t num_tasks() const
queries the number of tasks observed
Definition observer.hpp:909
class to access task information from the observer interface
Definition task.hpp:1235
class to create an immutable view of a worker
Definition worker.hpp:114
taskflow namespace
Definition small_vector.hpp:20
T unique_id()
generates a program-wide unique ID of the given type in a thread-safe manner
Definition math.hpp:182
TaskType
enumeration of all task types
Definition task.hpp:21
@ UNDEFINED
undefined task type (for internal use only)
Definition task.hpp:37
const char * to_string(TaskType type)
convert a task type to a human-readable string
Definition task.hpp:66
ObserverType
enumeration of all observer types
Definition observer.hpp:1032
std::chrono::time_point< std::chrono::steady_clock > observer_stamp_t
default time point type of observers
Definition observer.hpp:20
std::string get_env(const std::string &str)
retrieves the value of an environment variable
Definition os.hpp:183