-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDSWP_3.cpp
229 lines (200 loc) · 7.64 KB
/
DSWP_3.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
//3rd step: thread partition
#include "DSWP.h"
using namespace llvm;
using namespace std;
void DSWP::threadPartition(Loop *L) {
//1. get total latency and latency for
assigned.clear();
sccLatency.clear();
int totalLatency = 0;
for (int i = 0; i < sccNum; i++)
sccLatency.push_back(0);
for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
BasicBlock *BB = *bi;
for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) {
Instruction *inst = &(*ii);
int latency = getLatency(inst);
sccLatency[sccId[inst]] += latency;
totalLatency += latency;
}
}
int averLatency = totalLatency / MAX_THREAD;
cout << "latency info:" << totalLatency << " " << averLatency << endl;
for (int i = 0; i < sccNum; i++)
assigned.push_back(-2);
//-2: not assigned, and not in queue
//-1: not assigned, but in queue
//0 ~ MAX_THREAD, already been assigned, not in queue
int estLatency[MAX_THREAD] = {};
//TODO NOT SURE HERE IS RIGHT! header vs. preheader
int start = sccId[L->getHeader()->getFirstNonPHI()];
priority_queue<QNode> Q;
Q.push(QNode(start, sccLatency[start]));
for (int i = 0; i < MAX_THREAD; i++) {
while (!Q.empty()) {
QNode top = Q.top(); Q.pop();
assigned[top.u] = i;
estLatency[i] += top.latency;
//update the list
for (unsigned j = 0; j < dag[top.u]->size(); j++) {
int v = dag[top.u]->at(j);
if (assigned[v] > -2)
continue;
assigned[v] = -1;
Q.push(QNode(v, sccLatency[v]));
}
//check load balance
if (estLatency[i] >= averLatency) {
// cout << estLatency[i] << endl;
break;
}
}
}
//following is impossible since every time I let it bigger that aver
// if (!Q.empty()) {
// error("queue should be empty!");
// }
while (!Q.empty()) {
assigned[Q.top().u] = MAX_THREAD - 1;
Q.pop();
}
for (int i = 0; i < MAX_THREAD; i++)
part[i].clear();
for (int i = 0; i < sccNum; i++) {
// cout << i << " " << assigned[i] << endl;
part[assigned[i]].push_back(i);
}
for (int i = 0; i < MAX_THREAD; i++) {
cout << part[i].size() << endl;
}
}
int DSWP::getLatency(Instruction *I) { //Instruction Latency for Core 2, 65nm
int opcode = I->getOpcode();
int cost;
switch (opcode) {
// Terminator instructions
case 1: cost=1; break; // Return
case 2: cost=0; break; // Branc
case 3: cost=0; break; // Switch
// Standard binary operators
case 8: cost=1; break; // Add
case 9: cost=4; break; // FAdd
case 10: cost=1; break; // Sub
case 11: cost=4; break; // FSub
case 12: cost=3; break; // Mul
case 13: cost=4; break; // FMul
case 14: cost=17; break; // UDiv
case 15: cost=17; break; // SDiv
case 16: cost=24; break; // FDiv
case 17: cost=17; break; // URem
case 18: cost=17; break; // SRem
case 19: cost=24; break; // FRem
// logical operators (integer operands)
case 20: cost=7; break; // Shift left
case 21: cost=7; break; // Shift right
case 22: cost=7; break; // Shifr right (arithmetic)
case 23: cost=1; break; // And
case 24: cost=1; break; // Or
case 25: cost=1; break; // Xor
// Memory ops
case 26: cost=2; break; // Alloca
case 27: cost=2; break; // Load
case 28: cost=2; break; // Store
case 29: cost=1; break; // GetElementPtr
// Cast operators
case 30: cost=1; break; // Truncate integers
case 31: cost=1; break; // Zero extend integers
case 32: cost=1; break; // Sign extend integers
case 33: cost=4; break; // FPtoUI
case 34: cost=4; break; // FPtoSI
case 35: cost=4; break; // UItoFP
case 36: cost=4; break; // SItoFP
case 37: cost=4; break; // FPTrunc
case 38: cost=4; break; // FPExt
case 39: cost=2; break; // PtrToInt
case 40: cost=2; break; // IntToPtr
case 41: cost=1; break; // Type cast
// Other
case 42: cost=1; break; // Integer compare
case 43: cost=1; break; // Float compare
case 44: cost=1; break; // PHI node
case 45: cost=50; break; // Call function (this one is really variable)
default: cost=1;
}
return (cost);
}
//int DSWP::getLatency(Instruction *I) {
// int opcode = I->getOpcode();
//
// //copy from dswp583 google project site TODO this table is obviously not right!
// int cost;
// // These are instruction latencies for an AMD K10
// // (Well, most of them are, some I just made up)
// switch (opcode) {
// // Terminator instructions
// case 1: cost=2; break; // Return
// case 2: cost=3; break; // Branch //TODO NOT RIGHT
// case 3: cost=3; break; // Switch
//
// // Standard binary operators
// case 8: cost=1; break; // Add
// case 9: cost=4; break; // FAdd
// case 10: cost=1; break; // Sub
// case 11: cost=4; break; // FSub
// case 12: cost=3; break; // Mul
// case 13: cost=4; break; // FMul
// case 14: cost=17; break; // UDiv
// case 15: cost=17; break; // SDiv
// case 16: cost=24; break; // FDiv
// case 17: cost=17; break; // URem
// case 18: cost=17; break; // SRem
// case 19: cost=24; break; // FRem
//
// // logical operators (integer operands)
// case 20: cost=7; break; // Shift left
// case 21: cost=7; break; // Shift right
// case 22: cost=7; break; // Shifr right (arithmetic)
// case 23: cost=1; break; // And
// case 24: cost=1; break; // Or
// case 25: cost=1; break; // Xor
//
// // Memory ops
// case 26: cost=2; break; // Alloca
// case 27: cost=2; break; // Load
// case 28: cost=2; break; // Store
// case 29: cost=1; break; // GetElementPtr
//
// // Cast operators
// case 30: cost=1; break; // Truncate integers
// case 31: cost=1; break; // Zero extend integers
// case 32: cost=1; break; // Sign extend integers
// case 33: cost=4; break; // FPtoUI
// case 34: cost=4; break; // FPtoSI
// case 35: cost=4; break; // UItoFP
// case 36: cost=4; break; // SItoFP
// case 37: cost=4; break; // FPTrunc
// case 38: cost=4; break; // FPExt
// case 39: cost=2; break; // PtrToInt
// case 40: cost=2; break; // IntToPtr
// case 41: cost=1; break; // Type cast
//
// // Other
// case 42: cost=1; break; // Integer compare
// case 43: cost=1; break; // Float compare
// case 44: cost=1; break; // PHI node
// case 45: cost=50; break; // Call function (this one is really variable)
//
// default: cost=1;
// }
//
// return (cost);
//}
int DSWP::getInstAssigned(Value *inst) {
return assigned[sccId[dyn_cast<Instruction>(inst)]];
}
int DSWP::getNewInstAssigned(Value *inst) {
if (isa<TerminatorInst>(inst)) {
error("cannot be term");
}
return getInstAssigned(newToOld[inst]);
}