1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
# mach: bfin
// The assembly program uses two instructions to speed the decoder inner loop:
// R6= VMAX/VMAX (R5, R4) A0>>2;
// R2 =H+L (SGN(R0)*R1);
// VMAX is a 2-way parallel comparison of four updated path metrics, resulting
// in 2 new path metrics as well as a 2 bit field indicating the selection
// results. This 2 bit field is shifted into accumulator A0. This instruction
// implements the selections of a complete butterfly for a rate 1/n system.
// The H+L(SGN) instruction is used to compute the branch metric used by each
// butterfly. It takes as input a pair of values representing the received
// symbol, and another pair of values which are +1 or -1. The latter come
// from a pre-computed table that holds all the branch metric information for
// a specific set of polynomials. As all symbols are assumed to be binary,
// distance metrics between a received symbol and a branch metric are computed
// by adding and subtracting the values of the symbol according to the
// transition of a branch.
.include "testutils.inc"
start
// 16 in bytes for M2
// A few pointer initializations
// P2 points to decision history, where outputs are stored
loadsym P2, DecisionHistory
// P4 holds address of APMFrom
loadsym P4, APMFrom;
// P5 holds address of APMTo
loadsym P5, APMTo;
// I0 points to precomputed d's
loadsym I0, BranchStorage;
M2.L = 32;
loadsym P0, InputData;
// storage for all precomputed branch metrics
loadsym P1, BranchStorage;
R6 = 0; R0 = 0; // inits
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 0 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 4 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 8 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 12 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 16 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 20 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 24 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 28 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 32 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 36 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 40 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 44 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 48 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 52 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 56 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 60 ] = R0;
P1 = 18;
LSETUP ( L$0 , L$0end ) LC0 = P1; // SymNo loop start
L$0:
// Get a symbol and leave it resident in R1
R1 = [ P0 ]; // R1=(InputData[SymNo*2+1] InputData[SymNo*2])
P0 += 4;
A0 = 0;
// I0 points to precomputed D1, D0
loadsym I0, BranchStorage;
I1 = P4; // I1 points to APM[From]
I2 = P4;
I2 += M2; // I2 points to APM[From+16]
I3 = P5; // I3 points to APM[To]
P1 = 16;
P1 += -1;
LSETUP ( L$1 , L$1end ) LC1 = P1;
// APMFrom and APMTo are in alternate
// memory banks.
R0 = [ I0 ++ ]; // load R0 = (D1 D0)
R3.L = W [ I1 ++ ]; // load RL3 = PM0
// (R1 holds current symbol)
R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L; // apply sum-on-sign instruction
R3.H = W [ I2 ++ ]; // now, R3 = (PM1 PM0)
L$1:
R5 = R3 +|- R2 , R4 = R3 -|+ R2 || R0 = [ I0 ++ ] || NOP;
// R5 = (PM11 PM01) R4 = (PM10 PM00)
// and load next (D1 D0)
R6 = VIT_MAX( R5 , R4 ) (ASR) || R3.L = W [ I1 ++ ] || NOP;
// do 2 ACS in parallel
// R6 = (nPM1 nPM0) and update to A0
L$1end:
R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L || R3.H = W [ I2 ++ ] || [ I3 ++ ] = R6;
// store new path metrics in
// two consecutive locations
R5 = R3 +|- R2 , R4 = R3 -|+ R2;
R6 = VIT_MAX( R5 , R4 ) (ASR);
[ I3 ++ ] = R6;
R7 = A0.w;
[ P2 ] = R7;
P2 += 4; // store history
FP = P4; // swap pointers From <--> To
P4 = P5;
L$0end:
P5 = FP;
// check results
loadsym I0, DecisionHistory
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6ff2 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xf99f );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9909 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6666 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x0096 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9309 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0000 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xffff );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xffff );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xf0ff );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xcf00 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9009 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x07f6 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6004 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x8338 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x3443 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6bd6 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6197 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6c26 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0990 );
pass
.data
.align 8
InputData:
.dw 0x0001
.dw 0x0001
.dw 0xffff
.dw 0xfffb
.dw 0x0005
.dw 0x0001
.dw 0xfffd
.dw 0xfffd
.dw 0x0005
.dw 0x0001
.dw 0x0001
.dw 0x0001
.dw 0xffff
.dw 0xfffb
.dw 0x0005
.dw 0x0001
.dw 0xfffd
.dw 0xfffd
.dw 0x0005
.dw 0x0001
.align 8
APMFrom:
.dw 0xc000
.dw 0x0
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.align 8
APMTo:
.space (32*8)
.align 8
BranchStorage:
.space (32*8)
.align 8
DecisionHistory:
.space (18*4)
|