39{
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185 char * one;
186 Int i1, j1, m1, mn, n1, size;
188
189
190
191
192 if( ( M <= 0 ) || ( N <= 0 ) ) return;
193
195 {
197 {
199 mn =
MAX( 0, -IOFFD );
200
201 if( ( n1 =
MIN( mn, N ) ) > 0 )
202 {
204 &LDA, BR, &LDBR, one, CC, &LDCC );
206 &LDBC, A, &LDA, one, CR, &LDCR );
207 }
208 n1 = M - IOFFD;
209 if( ( n1 =
MIN( n1, N ) - mn ) > 0 )
210 {
211 i1 = ( j1 = mn ) + IOFFD;
213 Mptr( A, i1, j1, LDA, size ), &LDA,
Mptr( BC, i1, 0,
214 LDBC, size ), &LDBC, one,
Mptr( CC, i1, 0, LDCC,
215 size ), &LDCC );
216 if( ( m1 = M - mn - n1 - IOFFD ) > 0 )
217 {
218 i1 += n1;
220 Mptr( A, i1, j1, LDA, size ), &LDA,
Mptr( BR, 0, j1, LDBR,
221 size ), &LDBR, one,
Mptr( CC, i1, 0, LDCC, size ), &LDCC );
223 Mptr( BC, i1, 0, LDBC, size ), &LDBC,
Mptr( A, i1, j1, LDA,
224 size ), &LDA, one,
Mptr( CR, 0, j1, LDCR, size ), &LDCR );
225 }
226 }
227 }
229 {
231 mn =
MIN( M - IOFFD, N );
232
233 if( ( n1 = mn -
MAX( 0, -IOFFD ) ) > 0 )
234 {
235 j1 = mn - n1;
236 if( ( m1 =
MAX( 0, IOFFD ) ) > 0 )
237 {
239 ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC );
241 ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR );
242 }
244 Mptr( A, m1, j1, LDA, size ), &LDA,
245 Mptr( BC, m1, 0, LDBC, size ), &LDBC, one,
246 Mptr( CC, m1, 0, LDCC, size ), &LDCC );
247 }
248 if( ( n1 = N -
MAX( 0, mn ) ) > 0 )
249 {
250 j1 = N - n1;
252 ALPHA,
Mptr( A, 0, j1, LDA, size ), &LDA,
Mptr( BR, 0,
253 j1, LDBR, size ), &LDBR, one, CC, &LDCC );
255 ALPHA, BC, &LDBC,
Mptr( A, 0, j1, LDA, size ), &LDA,
256 one,
Mptr( CR, 0, j1, LDCR, size ), &LDCR );
257 }
258 }
259 else
260 {
261 one =
TYPE->one; gemm =
TYPE->Fgemm;
263 BR, &LDBR, one, CC, &LDCC );
265 &LDBC, A, &LDA, one, CR, &LDCR );
266 }
267 }
268 else
269 {
271 {
273 mn =
MAX( 0, -IOFFD );
274 if( ( n1 =
MIN( mn, N ) ) > 0 )
275 {
277 &LDA, BR, &LDBR, one, CC, &LDCC );
279 &LDBC, A, &LDA, one, CR, &LDCR );
280 }
281 n1 = M - IOFFD;
282 if( ( n1 =
MIN( n1, N ) - mn ) > 0 )
283 {
284 i1 = ( j1 = mn ) + IOFFD;
286 Mptr( A, i1, j1, LDA, size ), &LDA,
287 Mptr( BR, 0, j1, LDBR, size ), &LDBR, one,
288 Mptr( CR, 0, j1, LDCR, size ), &LDCR );
289 if( ( m1 = M - mn - n1 - IOFFD ) > 0 )
290 {
291 i1 += n1;
293 Mptr( A, i1, j1, LDA, size ), &LDA,
Mptr( BR, 0, j1, LDBR,
294 size ), &LDBR, one,
Mptr( CC, i1, 0, LDCC, size ), &LDCC );
296 Mptr( BC, i1, 0, LDBC, size ), &LDBC,
Mptr( A, i1, j1, LDA,
297 size ), &LDA, one,
Mptr( CR, 0, j1, LDCR, size ), &LDCR );
298 }
299 }
300 }
302 {
304 mn =
MIN( M - IOFFD, N );
305 if( ( n1 = mn -
MAX( 0, -IOFFD ) ) > 0 )
306 {
307 j1 = mn - n1;
308 if( ( m1 =
MAX( 0, IOFFD ) ) > 0 )
309 {
311 A, &LDA, BR, &LDBR, one, CC, &LDCC );
313 BC, &LDBC, A, &LDA, one, CR, &LDCR );
314 }
316 Mptr( A, m1, j1, LDA, size ), &LDA,
Mptr( BR, 0, j1,
317 LDBR, size ), &LDBR, one,
Mptr( CR, 0, j1, LDCR,
318 size ), &LDCR );
319 }
320 if( ( n1 = N -
MAX( 0, mn ) ) > 0 )
321 {
322 j1 = N - n1;
324 Mptr( A, 0, j1, LDA, size ), &LDA,
Mptr( BR, 0, j1, LDBR,
325 size ), &LDBR, one, CC, &LDCC );
327 &LDBC,
Mptr( A, 0, j1, LDA, size ), &LDA, one,
Mptr( CR, 0,
328 j1, LDCR, size ), &LDCR );
329 }
330 }
331 else
332 {
333 one =
TYPE->one; gemm =
TYPE->Fgemm;
335 BR, &LDBR, one, CC, &LDCC );
337 &LDBC, A, &LDA, one, CR, &LDCR );
338 }
339 }
340
341
342
343}