forked from halide/Halide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHalideTraceViz.cpp
1415 lines (1238 loc) · 53.2 KB
/
HalideTraceViz.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <iostream>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <io.h>
#ifndef STDIN_FILENO
#define STDIN_FILENO 0
#endif
#ifndef STDOUT_FILENO
#define STDOUT_FILENO 1
#endif
#else
#include <unistd.h>
#endif
#include "inconsolata.h"
#include "HalideRuntime.h"
#include "halide_trace_config.h"
using namespace Halide;
using namespace Halide::Trace;
namespace {
// -------------------------------------------------------------
bool verbose = false;
// Log informational output to stderr, but only in verbose mode
struct info {
std::ostringstream msg;
template<typename T>
info &operator<<(const T &x) {
if (verbose) {
msg << x;
}
return *this;
}
~info() {
if (verbose) {
if (msg.str().back() != '\n') {
msg << '\n';
}
std::cerr << msg.str();
}
}
};
// Log warnings to stderr
struct warn {
std::ostringstream msg;
template<typename T>
warn &operator<<(const T &x) {
msg << x;
return *this;
}
~warn() {
if (msg.str().back() != '\n') {
msg << '\n';
}
std::cerr << "Warning: " << msg.str();
}
};
// Log unrecoverable errors to stderr, then exit
struct fail {
std::ostringstream msg;
template<typename T>
fail &operator<<(const T &x) {
msg << x;
return *this;
}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4722) // destructor never returns, potential memory leak
#endif
~fail() {
if (msg.str().back() != '\n') {
msg << '\n';
}
std::cerr << msg.str();
exit(1);
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
};
// -------------------------------------------------------------
// Combine type-and-code into a single integer to avoid nested switches.
// Must be constexpr to allow use in case clauses.
inline static constexpr int halide_type_code(halide_type_code_t code, int bits) {
return (((int) code) << 8) | bits;
}
template<typename T>
T value_as(const halide_type_t &type, const halide_scalar_value_t& value) {
switch (halide_type_code((halide_type_code_t) type.code, type.bits)) {
case halide_type_code(halide_type_int, 8): return (T) value.u.i8;
case halide_type_code(halide_type_int, 16): return (T) value.u.i16;
case halide_type_code(halide_type_int, 32): return (T) value.u.i32;
case halide_type_code(halide_type_int, 64): return (T) value.u.i64;
case halide_type_code(halide_type_uint, 1): return (T) value.u.b;
case halide_type_code(halide_type_uint, 8): return (T) value.u.u8;
case halide_type_code(halide_type_uint, 16): return (T) value.u.u16;
case halide_type_code(halide_type_uint, 32): return (T) value.u.u32;
case halide_type_code(halide_type_uint, 64): return (T) value.u.u64;
case halide_type_code(halide_type_float, 32): return (T) value.u.f32;
case halide_type_code(halide_type_float, 64): return (T) value.u.f64;
default:
fail() << "Can't convert packet with type: " << (int) type.code << "bits: " << type.bits;
return (T) 0;
}
}
template<typename T>
T get_value_as(const halide_trace_packet_t &p, int idx) {
const uint8_t *val = (const uint8_t *)(p.value()) + idx * p.type.bytes();
// 'val' may not be aligned: memcpy it to an aligned local
// so that value_as<>() won't complain under sanitizers.
halide_scalar_value_t aligned_value;
// Only copy the number of bytes in the type: the stream isn't guaranteed
// to be padded to sizeof(halide_scalar_value_t).
memcpy(&aligned_value, val, p.type.bits / 8);
return value_as<double>(p.type, aligned_value);
}
struct PacketAndPayload : public halide_trace_packet_t {
uint8_t payload[4096];
static bool read_or_die(void *buf, size_t count) {
char *p = (char *)buf;
char *p_end = p + count;
while (p < p_end) {
int64_t bytes_read = ::read(STDIN_FILENO, p, p_end - p);
if (bytes_read == 0) {
return false; // EOF
} else if (bytes_read < 0) {
fail() << "Unable to read packet";
}
p += bytes_read;
}
assert(p == p_end);
return true;
}
bool read() {
constexpr size_t header_size = sizeof(halide_trace_packet_t);
if (!read_or_die(this, header_size)) {
return false; // EOF
}
const size_t payload_size = this->size - header_size;
if (payload_size > sizeof(this->payload) || !read_or_die(this->payload, payload_size)) {
// Shouldn't ever get EOF here
fail() << "Unable to read packet payload of size " << payload_size;
}
return true;
}
};
// -------------------------------------------------------------
// A struct specifying how a single Func will get visualized.
struct FuncInfo {
// Info about Funcs type and touched-extent, emitted
// by the tracing code.
FuncTypeAndDim type_and_dim;
bool type_and_dim_valid = false;
int layout_order = -1;
// Configuration for how the func should be drawn
FuncConfig config;
bool config_valid = false;
// Information about actual observed values gathered while parsing the trace
struct Observed {
std::string qualified_name;
int first_draw_time = -1, first_packet_idx = -1;
double min_value = 0.0, max_value = 0.0;
int min_coord[16];
int max_coord[16];
int num_realizations = 0, num_productions = 0;
uint64_t stores = 0, loads = 0;
Observed() {
memset(min_coord, 0, sizeof(min_coord));
memset(max_coord, 0, sizeof(max_coord));
}
void observe_load(const halide_trace_packet_t &p) {
observe_load_or_store(p);
loads += p.type.lanes;
}
void observe_store(const halide_trace_packet_t &p) {
observe_load_or_store(p);
stores += p.type.lanes;
}
void observe_load_or_store(const halide_trace_packet_t &p) {
const int *coords = p.coordinates();
for (int i = 0; i < std::min(16, p.dimensions / p.type.lanes); i++) {
for (int lane = 0; lane < p.type.lanes; lane++) {
int coord = coords[i*p.type.lanes + lane];
if (loads + stores == 0 && lane == 0) {
min_coord[i] = coord;
max_coord[i] = coord + 1;
} else {
min_coord[i] = std::min(min_coord[i], coord);
max_coord[i] = std::max(max_coord[i], coord + 1);
}
}
}
for (int i = 0; i < p.type.lanes; i++) {
double value = get_value_as<double>(p, i);
if (stores + loads == 0) {
min_value = value;
max_value = value;
} else {
min_value = std::min(min_value, value);
max_value = std::max(max_value, value);
}
}
}
void report() {
std::ostringstream o;
for (int i = 0; i < 16; i++) {
if (min_coord[i] == 0 && max_coord[i] == 0) {
break;
}
if (i > 0) {
o << " x ";
}
o << "[" << min_coord[i] << ", " << max_coord[i] << ")";
}
info()
<< "Func " << qualified_name << ":\n"
<< o.str() << "\n"
<< " range of values: [" << min_value << ", " << max_value << "]\n"
<< " number of realizations: " << num_realizations << "\n"
<< " number of productions: " << num_productions << "\n"
<< " number of loads: " << loads << "\n"
<< " number of stores: " << stores << "\n";
}
} stats;
};
struct VizState {
GlobalConfig globals;
std::map<std::string, FuncInfo> funcs;
};
// -------------------------------------------------------------
// -------------------------------------------------------------
std::string usage() {
return
R"USAGE(
HalideTraceViz accepts Halide-generated binary tracing packets from
stdin, and outputs them as raw 8-bit rgba32 pixel values to
stdout. You should pipe the output of HalideTraceViz into a video
encoder or player.
E.g. to encode a video:
HL_TARGET=host-trace_all <command to make pipeline> && \
HL_TRACE_FILE=/dev/stdout <command to run pipeline> | \
HalideTraceViz -s 1920 1080 -t 10000 <the -f args> | \
avconv -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 output.avi
To just watch the trace instead of encoding a video replace the last
line with something like:
mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
The arguments to HalideTraceViz specify how to lay out and render the
Funcs of interest. It acts like a stateful drawing API. The following
parameters should be set zero or one times:
--size width height: The size of the output frames. Defaults to
1920x1080.
--timestep timestep: How many Halide computations should be covered
by each frame. Defaults to 10000.
--decay A B: How quickly should the yellow and blue highlights decay
over time. This is a two-stage exponential decay with a knee in
it. A controls the rate at which they decay while a value is in
the process of being computed, and B controls the rate at which
they decay over time after the corresponding value has finished
being computed. 1 means never decay, 2 means halve in opacity
every frame, and 256 or larger means instant decay. The default
values for A and B are 1 and 2 respectively, which means that the
highlight holds while the value is being computed, and then
decays slowly.
--hold frames: How many frames to output after the end of the
trace. Defaults to 250.
The following parameters can be set once per Func. With the exception
of label, they continue to take effect for all subsequently defined
Funcs.
--min: The minimum value taken on by a Func. Maps to black.
--max: The maximum value taken on by a Func. Maps to white.
--rgb dim: Render Funcs as rgb, with the dimension dim indexing the
color channels.
--gray: Render Funcs as grayscale.
--blank: Specify that the output occupied by a Func should be set to
black on its end-realization event.
--no-blank: The opposite of --blank. Leaves the Func's values on the
screen. This is the default
--zoom factor: Each value of a Func will draw as a factor x factor
box in the output. Fractional values are allowed.
--load time: Each load from a Func costs the given number of ticks.
--store time: Each store to a Func costs the given number of ticks.
--move x y: Sets the position on the screen corresponding to the
Func's 0, 0 coordinate.
--left dx: Moves the currently set position leftward by the given
amount.
--right dx: Moves the currently set position rightward by the given
amount.
--up dy: Moves the currently set position upward by the given amount.
--down dy: Moves the currently set position downward by the given
amount.
--push: Copies the currently set position onto a stack of positions.
--pop: Sets the current position to the value most-recently pushed,
and removes it from the stack.
--strides ... : Specifies the matrix that maps the coordinates of the
Func to screen pixels. Specified column major. For example,
--strides 1 0 0 1 0 0 specifies that the Func has three
dimensions where the first one maps to screen-space x
coordinates, the second one maps to screen-space y coordinates,
and the third one does not affect screen-space coordinates.
--uninit r g b : Specifies the on-screen color corresponding to
uninitialized memory. Defaults to black.
--func name: Mark a Func to be visualized. Uses the currently set
values of the parameters above to specify how.
--label func label n: When the named Func is first touched, the label
appears with its bottom left corner at the current coordinates
and fades in over n frames.
--rlabel func label dx dy n: Like "--label", but relative to the Func's
position, using dx and dy as an offset.
))USAGE";
}
// Calculate the maximum 2d rendered size for a given Box and stride, assuming
// a zoom factor of 1. This uses the same recursive approach as fill_realization()
// for simplicity.
void calc_2d_size(const std::vector<Range> &dims, const std::vector<Point> &strides, Range *x, Range *y,
int current_dimension = 0, int x_off = 0, int y_off = 0) {
if (current_dimension == 0) {
x->min = 2147483647;
x->extent = -2147483647;
y->min = 2147483647;
y->extent = -2147483647;
}
if (current_dimension == (int)dims.size()) {
x->min = std::min(x->min, x_off);
x->extent = std::max(x->extent, x_off);
y->min = std::min(y->min, y_off);
y->extent = std::max(y->extent, y_off);
} else {
const auto &m = dims.at(current_dimension);
const Point &stride = strides.at(current_dimension);
x_off += stride.x * m.min;
y_off += stride.y * m.min;
for (int i = 0; i < m.extent; i++) {
calc_2d_size(dims, strides, x, y, current_dimension + 1, x_off, y_off);
x_off += stride.x;
y_off += stride.y;
}
}
if (current_dimension == 0) {
x->extent = std::max(1, x->extent - x->min + 1);
y->extent = std::max(1, y->extent - y->min + 1);
}
}
// -------------------------------------------------------------
// Given a FuncConfig, check each field for "use some reasonable default"
// value and fill in something reasonable.
void finalize_func_config_values(const GlobalConfig &globals, FuncInfo &fi) {
// Make a FuncConfig with 'safe' defaults for everything,
// then merge the existing cfg into it.
FuncConfig safe;
safe.zoom = 1.f;
safe.load_cost = 0;
safe.store_cost = 1;
safe.pos = {0, 0};
safe.strides = { {1, 0}, {0, 1} };
safe.color_dim = -1;
safe.min = 0.0;
safe.max = 1.0;
safe.labels = {};
safe.blank_on_end_realization = 0;
safe.uninitialized_memory_color = globals.default_uninitialized_memory_color;
if (fi.type_and_dim_valid) {
// Try to choose better values for min and max based on type.
// TODO: only considers the first type given; in general,
// HTV doesn't deal with Tuple-valued Funcs very well.
const halide_type_t &type = fi.type_and_dim.types.at(0);
if (type.code == halide_type_uint) {
safe.max = (double) ((1 << type.bits) - 1);
} else if (type.code == halide_type_int) {
double d = (double) (1 << (type.bits - 1));
safe.max = d - 1;
// safe.min = -d;
// In practice, assuming a min of zero (rather then -INT_MIN)
// for signed types produces less-weird results.
safe.min = 0.0;
}
}
safe.merge_from(fi.config);
safe.uninitialized_memory_color |= 0xff000000;
fi.config = safe;
}
// Given a FuncConfig, check each field for "use some reasonable default"
// value and fill in something reasonable.
void finalize_func_config_values(const GlobalConfig &globals, std::map<std::string, FuncInfo> &funcs) {
for (auto &p : funcs) {
auto &fi = p.second;
finalize_func_config_values(globals, fi);
}
}
void do_auto_layout(const GlobalConfig &globals, const std::string &func_name, FuncInfo &fi) {
assert(fi.type_and_dim_valid);
const Point &pad = globals.auto_layout_pad;
Point cell_size = {
globals.frame_size.x / globals.auto_layout_grid.x,
globals.frame_size.y / globals.auto_layout_grid.y
};
info() << "cell_size is " << cell_size << "\n";
info() << "auto_layout_pad is " << pad << "\n";
int row = fi.layout_order / globals.auto_layout_grid.x;
int col = fi.layout_order % globals.auto_layout_grid.x;
if (fi.config.color_dim < -1) {
// If color_dim is unspecified and it looks like a 2d RGB Func, make it one
const auto &dims = fi.type_and_dim.dims;
if (dims.size() == 3) {
if ((dims[2].extent == 3 || dims[2].extent == 4)) {
fi.config.color_dim = 2;
} else if ((dims[0].extent == 3 || dims[0].extent == 4)) {
fi.config.color_dim = 0;
if (fi.config.strides.empty()) {
fi.config.strides = { {0, 0}, {1, 0}, {0, 1} };
}
}
}
}
if (fi.config.zoom < 0.f) {
// Ensure that all of the FuncInfos have strides that match
// the number of dimensions expected by FuncTypeAndDim, adding
// zero-stride pairs as needed (this simplifies rendering checks
// later on)
if (fi.config.strides.empty()) {
fi.config.strides = { {1, 0}, {0, 1} };
}
while (fi.config.strides.size() < fi.type_and_dim.dims.size()) {
fi.config.strides.push_back({0, 0});
}
// Calc the 2d size that this would render at (including stride-stretching) for zoom=1
Range xr, yr;
calc_2d_size(fi.type_and_dim.dims, fi.config.strides, &xr, &yr);
info() << "calc_2d_size for " << func_name << " is " << xr << ", " << yr << "\n";
// Use that size to calculate the zoom we need -- this chooses
// a zoom that maximizes the size within the cell.
float zoom_x = (float) (cell_size.x - pad.x) / (float) xr.extent;
float zoom_y = (float) (cell_size.y - pad.y) / (float) yr.extent;
fi.config.zoom = std::min(zoom_x, zoom_y);
// Try to choose an even-multiple zoom for better display
// and just less weirdness.
if (fi.config.zoom > 100.f) {
// Zooms this large are usually for things like input matrices.
// Perhaps clamp at something smaller?
fi.config.zoom = floor(fi.config.zoom / 100.f) * 100.f;
} else if (fi.config.zoom > 10.f) {
fi.config.zoom = floor(fi.config.zoom / 10.f) * 10.f;
} else if (fi.config.zoom > 1.f) {
fi.config.zoom = floor(fi.config.zoom * 2.f) / 2.f;
} else if (fi.config.zoom < 1.f) {
fi.config.zoom = ceil(fi.config.zoom * 20.f) / 20.f;
}
info() << "zoom for " << func_name << " is " << zoom_x << " " << zoom_y << " -> " << fi.config.zoom << "\n";
}
// Put the image at the top-left of the cell. (Should we try to
// center within the cell?)
if (fi.config.pos.x < 0 && fi.config.pos.y < 0) {
fi.config.pos.x = col * cell_size.x + pad.x;
fi.config.pos.y = row * cell_size.y + pad.y;
}
info() << "pos for " << func_name << " is " << fi.config.pos.x << " " << fi.config.pos.y << "\n";
if (fi.config.labels.empty()) {
std::string label = func_name + " (" + std::to_string((int) (fi.config.zoom * 100)) + "%)";
const int label_width = label.size() * inconsolata_char_width;
const int label_space = cell_size.x - pad.x*2;
float h_scale = 1.f;
if (label_width > label_space) {
h_scale = std::max(0.25f, std::min(1.f, (float) label_space / (float) label_width));
info() << "h_scale for label (" << label << " is " << h_scale << "\n";
}
fi.config.labels.push_back({label, {0, 0}, 10, h_scale});
}
fi.config_valid = true;
}
void do_auto_layout(VizState &state) {
if (!state.globals.auto_layout) {
return;
}
for (auto &p : state.funcs) {
const auto &func_name = p.first;
auto &fi = p.second;
do_auto_layout(state.globals, func_name, fi);
}
}
float calc_side_length(int min_cells, int width, int height) {
const float aspect_ratio = (float) width / (float) height;
const float p = ceil(sqrt(min_cells * aspect_ratio));
const float par = p / aspect_ratio;
const float s = floor(par) * p < min_cells ?
height / ceil(par) :
width / p;
return s;
}
// Calculate the 'best' cell size such that we can fit at least min_cells
// into the given width x height. Currently this calculates perfectly
// square cells, which is OK but a little wasteful (eg for min_cells=20
// and size 1920x1080, it calculates a grid of 7x4 which wastes 8 cells).
// We could probably do better if we just tried to keep the cells 'nearly'
// square (aspect ratio <= 1.25).
Point best_cell_size(int min_cells, int width, int height) {
const float sx = calc_side_length(min_cells, width, height);
const float sy = calc_side_length(min_cells, height, width);
const int edge = floor(std::max(sx, sy));
return {edge, edge};
}
// -------------------------------------------------------------
void process_args(int argc, char **argv, VizState *state) {
GlobalConfig &globals = state->globals;
std::map<std::string, FuncInfo> &funcs = state->funcs;
// The struct's default values are what we want
FuncConfig config;
std::vector<Point> pos_stack;
std::set<std::string> labels_seen;
// If the condition is false, print usage and exit with error.
const auto expect = [](bool cond, int i) {
if (!cond) {
if (i) {
fail() << "Argument parsing failed at argument " << i << "\n" << usage();
} else {
fail() << usage();
}
}
};
const auto parse_int = [](const char *str) -> int {
char *endptr = nullptr;
errno = 0;
long result = strtol(str, &endptr, 0);
if (errno == ERANGE || str == endptr) {
fail() << "Unable to parse '" << str << "' as an int\n" << usage();
}
return (int) result;
};
const auto parse_float = [](const char *str) -> float {
char *endptr = nullptr;
errno = 0;
float result = strtof(str, &endptr);
if (errno == ERANGE || str == endptr) {
fail() << "Unable to parse '" << str << "' as a float\n" << usage();
}
return result;
};
const auto parse_double = [](const char *str) -> double {
char *endptr = nullptr;
errno = 0;
double result = strtod(str, &endptr);
if (errno == ERANGE || str == endptr) {
fail() << "Unable to parse '" << str << "' as a double\n" << usage();
}
return result;
};
// Parse command line args
int i = 1;
while (i < argc) {
std::string next = argv[i];
if (next == "--size") {
expect(i + 2 < argc, i);
globals.frame_size.x = parse_int(argv[++i]);
globals.frame_size.y = parse_int(argv[++i]);
} else if (next == "--func") {
expect(i + 1 < argc, i);
const char *func = argv[++i];
FuncInfo &fi = funcs[func];
fi.config.merge_from(config);
fi.config_valid = true;
} else if (next == "--min") {
expect(i + 1 < argc, i);
config.min = parse_double(argv[++i]);
} else if (next == "--max") {
expect(i + 1 < argc, i);
config.max = parse_double(argv[++i]);
} else if (next == "--move") {
expect(i + 2 < argc, i);
config.pos.x = parse_int(argv[++i]);
config.pos.y = parse_int(argv[++i]);
} else if (next == "--left") {
expect(i + 1 < argc, i);
config.pos.x -= parse_int(argv[++i]);
} else if (next == "--right") {
expect(i + 1 < argc, i);
config.pos.x += parse_int(argv[++i]);
} else if (next == "--up") {
expect(i + 1 < argc, i);
config.pos.y -= parse_int(argv[++i]);
} else if (next == "--down") {
expect(i + 1 < argc, i);
config.pos.y += parse_int(argv[++i]);
} else if (next == "--push") {
pos_stack.push_back(config.pos);
} else if (next == "--pop") {
expect(!pos_stack.empty(), i);
config.pos = pos_stack.back();
pos_stack.pop_back();
} else if (next == "--rgb") {
expect(i + 1 < argc, i);
config.color_dim = parse_int(argv[++i]);
} else if (next == "--gray") {
config.color_dim = -1;
} else if (next == "--blank") {
config.blank_on_end_realization = 1;
} else if (next == "--no-blank") {
config.blank_on_end_realization = 0;
} else if (next == "--zoom") {
expect(i + 1 < argc, i);
config.zoom = parse_float(argv[++i]);
} else if (next == "--load") {
expect(i + 1 < argc, i);
config.load_cost = parse_int(argv[++i]);
} else if (next == "--store") {
expect(i + 1 < argc, i);
config.store_cost = parse_int(argv[++i]);
} else if (next == "--strides") {
config.strides.clear();
while (i + 1 < argc) {
const char *next_arg = argv[i + 1];
if (next_arg[0] == '-' &&
next_arg[1] == '-') {
break;
}
expect(i + 2 < argc, i);
int x = parse_int(argv[++i]);
int y = parse_int(argv[++i]);
config.strides.push_back({x, y});
}
} else if (next == "--label") {
expect(i + 3 < argc, i);
char *func = argv[++i];
char *text = argv[++i];
int n = parse_int(argv[++i]);
FuncInfo &fi = funcs[func];
// A Label's position is relative to its Func's position;
// the --label flag has always expected an absolute position,
// so convert it to an offset.
Point offset = { config.pos.x - fi.config.pos.x, config.pos.y - fi.config.pos.y };
if (!labels_seen.count(func)) {
// If there is at least one --label specified for a Func,
// it overrides the entire previous std::set of labels, rather
// than simply appending.
fi.config.labels.clear();
labels_seen.insert(func);
}
fi.config.labels.push_back({text, offset, n});
} else if (next == "--rlabel") {
expect(i + 5 < argc, i);
char *func = argv[++i];
char *text = argv[++i];
int dx = parse_int(argv[++i]);
int dy = parse_int(argv[++i]);
int n = parse_int(argv[++i]);
FuncInfo &fi = funcs[func];
Point offset = { dx, dy };
if (!labels_seen.count(func)) {
// If there is at least one --label specified for a Func,
// it overrides the entire previous std::set of labels, rather
// than simply appending.
fi.config.labels.clear();
labels_seen.insert(func);
}
fi.config.labels.push_back({text, offset, n});
} else if (next == "--timestep") {
expect(i + 1 < argc, i);
globals.timestep = parse_int(argv[++i]);
} else if (next == "--decay") {
expect(i + 2 < argc, i);
globals.decay_factor_during_compute = parse_int(argv[++i]);
globals.decay_factor_after_compute = parse_int(argv[++i]);
} else if (next == "--hold") {
expect(i + 1 < argc, i);
globals.hold_frames = parse_int(argv[++i]);
} else if (next == "--uninit") {
expect(i + 3 < argc, i);
int r = parse_int(argv[++i]);
int g = parse_int(argv[++i]);
int b = parse_int(argv[++i]);
config.uninitialized_memory_color = ((b & 255) << 16) | ((g & 255) << 8) | (r & 255);
} else if (next == "--auto_layout") {
globals.auto_layout = true;
} else if (next == "--no-auto_layout") {
globals.auto_layout = false;
} else if (next == "--auto_layout_grid") {
expect(i + 2 < argc, i);
globals.auto_layout_grid.x = parse_int(argv[++i]);
globals.auto_layout_grid.y = parse_int(argv[++i]);
} else if (next == "--uninit_default") {
expect(i + 3 < argc, i);
int r = parse_int(argv[++i]);
int g = parse_int(argv[++i]);
int b = parse_int(argv[++i]);
globals.default_uninitialized_memory_color = ((b & 255) << 16) | ((g & 255) << 8) | (r & 255);
} else if (next == "--ignore_tags" || next == "--no-ignore_tags") {
// Already processed, just continue
} else if (next == "--verbose" || next == "--no-verbose") {
// Already processed, just continue
} else {
expect(false, i);
}
i++;
}
}
// There are three layers - image data, an animation on top of
// it, and text labels. These layers get composited.
struct Surface {
const Point frame_size;
std::vector<uint32_t> image, anim, anim_decay, text_buf, blend;
// Composite a single pixel of 'over' over a single pixel of 'under', writing the result into dst.
// Note that under or over might be dst.
static void composite_one(const uint32_t *under, const uint32_t *over, uint32_t *dst) {
const uint32_t o = *over;
const uint8_t alpha = o >> 24;
// alpha is almost always 0 or 255.
if (alpha == 0) {
*dst = *under;
} else if (alpha == 255) {
*dst = o;
} else {
// TODO: this could be done using 64-bit ops more simply
const uint8_t *a = (const uint8_t*)under;
const uint8_t *b = (const uint8_t*)over;
uint8_t *d = (uint8_t*)dst;
d[0] = (alpha * b[0] + (255 - alpha) * a[0]) / 255;
d[1] = (alpha * b[1] + (255 - alpha) * a[1]) / 255;
d[2] = (alpha * b[2] + (255 - alpha) * a[2]) / 255;
d[3] = 255 - (((255 - a[3]) * (255 - alpha)) / 255);
}
}
void do_decay(int decay_factor, uint32_t *dst) {
if (decay_factor != 1) {
const uint32_t inv_d1 = (1 << 24) / std::max(1, decay_factor);
for (uint32_t *dst_end = dst + frame_elems(); dst < dst_end; ++dst) {
uint32_t color = *dst;
uint32_t rgb = color & 0x00ffffff;
uint32_t alpha = (color >> 24);
alpha *= inv_d1;
alpha &= 0xff000000;
*dst = alpha | rgb;
}
}
}
// TODO this doesn't bounds-check against frame_size
void do_draw_pixel(const float zoom, const int x, const int y, const uint32_t color, uint32_t *dst) {
const int izoom = (int) ceil(zoom);
const int y_advance = frame_size.x - izoom;
dst += frame_size.x * y + x;
for (int dy = 0; dy < izoom; dy++) {
for (int dx = 0; dx < izoom; dx++) {
*dst++ = color;
}
dst += y_advance;
}
}
// Fill a rectangle in dst with color.
// opaque RGB(1,1,1) is a "magic" color that means "fill with checkerboard".
// dst is assumed to point to the start of a frame_size buffer.
void fill_rect(int left, int top, int width, int height, uint32_t color, uint32_t *dst) {
const int x_min = std::max(left, 0);
const int x_end = std::min(left + width, frame_size.x);
const int y_min = std::max(top, 0);
const int y_end = std::min(top + height, frame_size.y);
const int y_stride = frame_size.x - (x_end - x_min);
dst += y_min * frame_size.x + x_min;
if (color == 0xff010101) {
for (int y = y_min; y < y_end; y++) {
for (int x = x_min; x < x_end; x++) {
const int check = ((x / 16) % 2) ^ ((y / 16) % 2);
*dst++ = check ? 0xff808080 : 0xffffffff;
}
dst += y_stride;
}
} else {
for (int y = y_min; y < y_end; y++) {
for (int x = x_min; x < x_end; x++) {
*dst++ = color;
}
dst += y_stride;
}
}
}
// Set all boxes corresponding to positions in a Func's allocation to
// the given color. Recursive to handle arbitrary
// dimensionalities. Used by begin and end realization events.
void do_fill_realization(uint32_t *dst, uint32_t color,
const FuncInfo &fi, const halide_trace_packet_t &p,
int current_dimension = 0, int x_off = 0, int y_off = 0) {
if (2 * current_dimension == p.dimensions) {
const int x_min = x_off * fi.config.zoom + fi.config.pos.x;
const int y_min = y_off * fi.config.zoom + fi.config.pos.y;
const int izoom = (int) ceil(fi.config.zoom);
fill_rect(x_min, y_min, izoom, izoom, color, dst);
} else {
const int *coords = p.coordinates();
const int min = coords[current_dimension * 2 + 0];
const int extent = coords[current_dimension * 2 + 1];
// If we don't have enough strides, assume subsequent dimensions have stride (0, 0)
const Point pt = current_dimension < (int)fi.config.strides.size() ? fi.config.strides.at(current_dimension) : Point{0, 0};
x_off += pt.x * min;
y_off += pt.y * min;
for (int i = 0; i < extent; i++) {
do_fill_realization(dst, color, fi, p, current_dimension + 1, x_off, y_off);
x_off += pt.x;
y_off += pt.y;
}
}
}
public:
Surface(const Point &fs)
: frame_size(fs),
image(frame_elems()),
anim(frame_elems()),
anim_decay(frame_elems()),
text_buf(frame_elems()),
blend(frame_elems()) {}
Surface(const Surface &) = delete;
void operator=(const Surface &) = delete;
size_t frame_elems() const {
return frame_size.x * frame_size.y;
}
const uint32_t *frame_data() const {
return this->blend.data();
}
uint32_t get_image_pixel(const int x, const int y) const {
return image[frame_size.x * y + x];
}
void draw_text(const std::string &text, const Point &pos, uint32_t color, float h_scale = 1.0f) {
uint32_t *dst = text_buf.data();
// Drop any alpha component of color
color &= 0xffffff;
int c = -1;
for (int chr : text) {
++c;
// We only handle a subset of ascii
if (chr < 32 || chr >= 32 + inconsolata_char_count) {
chr = 32;
}
chr -= 32;
const uint8_t *font_ptr = inconsolata_raw + chr * (inconsolata_char_width * inconsolata_char_height);
const int h_scale_numerator = std::ceil(std::min(1.f, h_scale) * 256);
for (int fy = 0; fy < inconsolata_char_height; fy++) {
for (int fx = 0; fx < inconsolata_char_width; fx++) {
int px = pos.x + (((inconsolata_char_width*c + fx) * h_scale_numerator) >> 8);
int py = pos.y - inconsolata_char_height + fy + 1;
if (px < 0 || px >= frame_size.x ||
py < 0 || py >= frame_size.y) continue;
dst[py * frame_size.x + px] = (font_ptr[fy * inconsolata_char_width + fx] << 24) | color;
}
}
}
}
void draw_anim_pixel(const float zoom, int x, int y, uint32_t color) {
do_draw_pixel(zoom, x, y, color, anim.data());
}
void draw_image_pixel(const float zoom, int x, int y, uint32_t color) {
do_draw_pixel(zoom, x, y, color, image.data());
}
void fill_realization(uint32_t color, const FuncInfo &fi, const halide_trace_packet_t &p) {
do_fill_realization(image.data(), color, fi, p);
}
void composite() {
// Composite text over anim over image
uint32_t *anim_decay_px = anim_decay.data();
uint32_t *anim_px = anim.data();
uint32_t *image_px = image.data();
uint32_t *text_px = text_buf.data();
uint32_t *blend_px = blend.data();
for (size_t i = 0; i < image.size(); i++) {
// anim over anim_decay -> anim_decay
composite_one(anim_decay_px, anim_px, anim_decay_px);
// anim_decay over image -> blend
composite_one(image_px, anim_decay_px, blend_px);
// text over blend -> blend
composite_one(blend_px, text_px, blend_px);
anim_decay_px++;
anim_px++;
image_px++;
text_px++;
blend_px++;
}
}