1 https://github.com/xianyi/OpenBLAS/commit/f703846ad9400a8ea175cb8dd43e18c152aeab93
2 https://github.com/xianyi/OpenBLAS/commit/515cf269291bec0d43651fe7bf99a71fb074a0ad
3 https://github.com/xianyi/OpenBLAS/commit/91110f92d218492d0efbdc1fdf34277ca45f4b36
4 https://github.com/xianyi/OpenBLAS/commit/9402df5604e69f86f58953e3883f33f98c930baf
5 https://github.com/xianyi/OpenBLAS/commit/101a2c77c3f3610933f450cefca3e312edab2186
6 https://src.fedoraproject.org/rpms/openblas/c/5f27d51cebe1c1bb6598d38326ece8dc0ac71ec7?branch=rawhide
7
8 From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001
9 From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
10 Date: Sat, 13 Aug 2022 11:38:27 +0200
11 Subject: [PATCH] Add function prototypes
12
13 --- a/exports/gensymbol
14 +++ b/exports/gensymbol
15 @@ -4000,6 +4000,22 @@ case "$p1" in
16 no_underscore_objs="$no_underscore_objs $misc_common_objs"
17
18 printf 'int main(void){\n'
19 + for obj in $underscore_objs; do
20 + [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
21 + "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
22 + done
23 +
24 + for obj in $need_2underscore_objs; do
25 + printf 'extern void %s%s%s%s%s();\n' \
26 + "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
27 + done
28 +
29 + for obj in $no_underscore_objs; do
30 + printf 'extern void %s%s%s();\n' \
31 + "$symbolprefix" "$obj" "$symbolsuffix"
32 + done
33 +
34 + printf '\n'
35 for obj in $underscore_objs; do
36 [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
37 "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
38 --- a/exports/gensymbol.pl
39 +++ b/exports/gensymbol.pl
40 @@ -3955,6 +3955,18 @@
41 @no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
42
43 print "int main(void){\n";
44 + foreach $objs (@underscore_objs) {
45 + print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
46 + }
47 +
48 + foreach $objs (@need_2underscore_objs) {
49 + print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
50 + }
51 +
52 + foreach $objs (@no_underscore_objs) {
53 + print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
54 + }
55 +
56 foreach $objs (@underscore_objs) {
57 print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
58 }
59
60 From 515cf269291bec0d43651fe7bf99a71fb074a0ad Mon Sep 17 00:00:00 2001
61 From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
62 Date: Wed, 14 Sep 2022 11:48:36 +0200
63 Subject: [PATCH] Fix pointer/integer argument mismatch in calls to pow()
64
65 --- a/lapack-netlib/SRC/claed0.c
66 +++ b/lapack-netlib/SRC/claed0.c
67 @@ -796,10 +796,10 @@ f"> */
68
69 temp = log((real) (*n)) / log(2.f);
70 lgn = (integer) temp;
71 - if (pow_ii(&c__2, &lgn) < *n) {
72 + if (pow_ii(c__2, lgn) < *n) {
73 ++lgn;
74 }
75 - if (pow_ii(&c__2, &lgn) < *n) {
76 + if (pow_ii(c__2, lgn) < *n) {
77 ++lgn;
78 }
79 iprmpt = indxq + *n + 1;
80 --- a/lapack-netlib/SRC/claed7.c
81 +++ b/lapack-netlib/SRC/claed7.c
82 @@ -864,11 +864,11 @@ f"> */
83 /* Form the z-vector which consists of the last row of Q_1 and the */
84 /* first row of Q_2. */
85
86 - ptr = pow_ii(&c__2, tlvls) + 1;
87 + ptr = pow_ii(c__2, *tlvls) + 1;
88 i__1 = *curlvl - 1;
89 for (i__ = 1; i__ <= i__1; ++i__) {
90 i__2 = *tlvls - i__;
91 - ptr += pow_ii(&c__2, &i__2);
92 + ptr += pow_ii(c__2, i__2);
93 /* L10: */
94 }
95 curr = ptr + *curpbm;
96 --- a/lapack-netlib/SRC/clalsa.c
97 +++ b/lapack-netlib/SRC/clalsa.c
98 @@ -1051,7 +1051,7 @@ f"> */
99 /* Finally go through the left singular vector matrices of all */
100 /* the other subproblems bottom-up on the tree. */
101
102 - j = pow_ii(&c__2, &nlvl);
103 + j = pow_ii(c__2, nlvl);
104 sqre = 0;
105
106 for (lvl = nlvl; lvl >= 1; --lvl) {
107 @@ -1065,7 +1065,7 @@ f"> */
108 ll = 1;
109 } else {
110 i__1 = lvl - 1;
111 - lf = pow_ii(&c__2, &i__1);
112 + lf = pow_ii(c__2, i__1);
113 ll = (lf << 1) - 1;
114 }
115 i__1 = ll;
116 @@ -1110,7 +1110,7 @@ f"> */
117 ll = 1;
118 } else {
119 i__2 = lvl - 1;
120 - lf = pow_ii(&c__2, &i__2);
121 + lf = pow_ii(c__2, i__2);
122 ll = (lf << 1) - 1;
123 }
124 i__2 = lf;
125 --- a/lapack-netlib/SRC/cstedc.c
126 +++ b/lapack-netlib/SRC/cstedc.c
127 @@ -836,10 +836,10 @@ f"> */
128 lrwmin = *n - 1 << 1;
129 } else if (icompz == 1) {
130 lgn = (integer) (log((real) (*n)) / log(2.f));
131 - if (pow_ii(&c__2, &lgn) < *n) {
132 + if (pow_ii(c__2, lgn) < *n) {
133 ++lgn;
134 }
135 - if (pow_ii(&c__2, &lgn) < *n) {
136 + if (pow_ii(c__2, lgn) < *n) {
137 ++lgn;
138 }
139 lwmin = *n * *n;
140 --- a/lapack-netlib/SRC/dlaed0.c
141 +++ b/lapack-netlib/SRC/dlaed0.c
142 @@ -827,10 +827,10 @@ f"> */
143
144 temp = log((doublereal) (*n)) / log(2.);
145 lgn = (integer) temp;
146 - if (pow_ii(&c__2, &lgn) < *n) {
147 + if (pow_ii(c__2, lgn) < *n) {
148 ++lgn;
149 }
150 - if (pow_ii(&c__2, &lgn) < *n) {
151 + if (pow_ii(c__2, lgn) < *n) {
152 ++lgn;
153 }
154 iprmpt = indxq + *n + 1;
155 --- a/lapack-netlib/SRC/dlaed7.c
156 +++ b/lapack-netlib/SRC/dlaed7.c
157 @@ -885,11 +885,11 @@ f"> */
158 /* Form the z-vector which consists of the last row of Q_1 and the */
159 /* first row of Q_2. */
160
161 - ptr = pow_ii(&c__2, tlvls) + 1;
162 + ptr = pow_ii(c__2, *tlvls) + 1;
163 i__1 = *curlvl - 1;
164 for (i__ = 1; i__ <= i__1; ++i__) {
165 i__2 = *tlvls - i__;
166 - ptr += pow_ii(&c__2, &i__2);
167 + ptr += pow_ii(c__2, i__2);
168 /* L10: */
169 }
170 curr = ptr + *curpbm;
171 --- a/lapack-netlib/SRC/dlaeda.c
172 +++ b/lapack-netlib/SRC/dlaeda.c
173 @@ -754,7 +754,7 @@ f"> */
174 /* scheme */
175
176 i__1 = *curlvl - 1;
177 - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
178 + curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
179
180 /* Determine size of these matrices. We add HALF to the value of */
181 /* the SQRT in case the machine underestimates one of these square */
182 @@ -781,12 +781,12 @@ f"> */
183 /* rotations and permutation and then multiplying the center matrices */
184 /* against the current Z. */
185
186 - ptr = pow_ii(&c__2, tlvls) + 1;
187 + ptr = pow_ii(c__2, *tlvls) + 1;
188 i__1 = *curlvl - 1;
189 for (k = 1; k <= i__1; ++k) {
190 i__2 = *curlvl - k;
191 i__3 = *curlvl - k - 1;
192 - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
193 + curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
194 1;
195 psiz1 = prmptr[curr + 1] - prmptr[curr];
196 psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
197 @@ -847,7 +847,7 @@ f"> */
198 c__1);
199
200 i__2 = *tlvls - k;
201 - ptr += pow_ii(&c__2, &i__2);
202 + ptr += pow_ii(c__2, i__2);
203 /* L70: */
204 }
205
206 --- a/lapack-netlib/SRC/dlalsa.c
207 +++ b/lapack-netlib/SRC/dlalsa.c
208 @@ -951,7 +951,7 @@ f"> */
209 /* Finally go through the left singular vector matrices of all */
210 /* the other subproblems bottom-up on the tree. */
211
212 - j = pow_ii(&c__2, &nlvl);
213 + j = pow_ii(c__2, nlvl);
214 sqre = 0;
215
216 for (lvl = nlvl; lvl >= 1; --lvl) {
217 @@ -965,7 +965,7 @@ f"> */
218 ll = 1;
219 } else {
220 i__1 = lvl - 1;
221 - lf = pow_ii(&c__2, &i__1);
222 + lf = pow_ii(c__2, i__1);
223 ll = (lf << 1) - 1;
224 }
225 i__1 = ll;
226 @@ -1010,7 +1010,7 @@ f"> */
227 ll = 1;
228 } else {
229 i__2 = lvl - 1;
230 - lf = pow_ii(&c__2, &i__2);
231 + lf = pow_ii(c__2, i__2);
232 ll = (lf << 1) - 1;
233 }
234 i__2 = lf;
235 --- a/lapack-netlib/SRC/dlasd0.c
236 +++ b/lapack-netlib/SRC/dlasd0.c
237 @@ -824,7 +824,7 @@ f"> */
238 ll = 1;
239 } else {
240 i__1 = lvl - 1;
241 - lf = pow_ii(&c__2, &i__1);
242 + lf = pow_ii(c__2, i__1);
243 ll = (lf << 1) - 1;
244 }
245 i__1 = ll;
246 --- a/lapack-netlib/SRC/dlasda.c
247 +++ b/lapack-netlib/SRC/dlasda.c
248 @@ -1027,7 +1027,7 @@ f"> */
249
250 /* Now conquer each subproblem bottom-up. */
251
252 - j = pow_ii(&c__2, &nlvl);
253 + j = pow_ii(c__2, nlvl);
254 for (lvl = nlvl; lvl >= 1; --lvl) {
255 lvl2 = (lvl << 1) - 1;
256
257 @@ -1039,7 +1039,7 @@ f"> */
258 ll = 1;
259 } else {
260 i__1 = lvl - 1;
261 - lf = pow_ii(&c__2, &i__1);
262 + lf = pow_ii(c__2, i__1);
263 ll = (lf << 1) - 1;
264 }
265 i__1 = ll;
266 --- a/lapack-netlib/SRC/dstedc.c
267 +++ b/lapack-netlib/SRC/dstedc.c
268 @@ -806,10 +806,10 @@ f"> */
269 lwmin = *n - 1 << 1;
270 } else {
271 lgn = (integer) (log((doublereal) (*n)) / log(2.));
272 - if (pow_ii(&c__2, &lgn) < *n) {
273 + if (pow_ii(c__2, lgn) < *n) {
274 ++lgn;
275 }
276 - if (pow_ii(&c__2, &lgn) < *n) {
277 + if (pow_ii(c__2, lgn) < *n) {
278 ++lgn;
279 }
280 if (icompz == 1) {
281 --- a/lapack-netlib/SRC/slaed0.c
282 +++ b/lapack-netlib/SRC/slaed0.c
283 @@ -823,10 +823,10 @@ f"> */
284
285 temp = log((real) (*n)) / log(2.f);
286 lgn = (integer) temp;
287 - if (pow_ii(&c__2, &lgn) < *n) {
288 + if (pow_ii(c__2, lgn) < *n) {
289 ++lgn;
290 }
291 - if (pow_ii(&c__2, &lgn) < *n) {
292 + if (pow_ii(c__2, lgn) < *n) {
293 ++lgn;
294 }
295 iprmpt = indxq + *n + 1;
296 --- a/lapack-netlib/SRC/slaed7.c
297 +++ b/lapack-netlib/SRC/slaed7.c
298 @@ -883,11 +883,11 @@ f"> */
299 /* Form the z-vector which consists of the last row of Q_1 and the */
300 /* first row of Q_2. */
301
302 - ptr = pow_ii(&c__2, tlvls) + 1;
303 + ptr = pow_ii(c__2, *tlvls) + 1;
304 i__1 = *curlvl - 1;
305 for (i__ = 1; i__ <= i__1; ++i__) {
306 i__2 = *tlvls - i__;
307 - ptr += pow_ii(&c__2, &i__2);
308 + ptr += pow_ii(c__2, i__2);
309 /* L10: */
310 }
311 curr = ptr + *curpbm;
312 --- a/lapack-netlib/SRC/slaeda.c
313 +++ b/lapack-netlib/SRC/slaeda.c
314 @@ -753,7 +753,7 @@ f"> */
315 /* scheme */
316
317 i__1 = *curlvl - 1;
318 - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
319 + curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
320
321 /* Determine size of these matrices. We add HALF to the value of */
322 /* the SQRT in case the machine underestimates one of these square */
323 @@ -779,12 +779,12 @@ f"> */
324 /* rotations and permutation and then multiplying the center matrices */
325 /* against the current Z. */
326
327 - ptr = pow_ii(&c__2, tlvls) + 1;
328 + ptr = pow_ii(c__2, *tlvls) + 1;
329 i__1 = *curlvl - 1;
330 for (k = 1; k <= i__1; ++k) {
331 i__2 = *curlvl - k;
332 i__3 = *curlvl - k - 1;
333 - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
334 + curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
335 1;
336 psiz1 = prmptr[curr + 1] - prmptr[curr];
337 psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
338 @@ -844,7 +844,7 @@ f"> */
339 c__1);
340
341 i__2 = *tlvls - k;
342 - ptr += pow_ii(&c__2, &i__2);
343 + ptr += pow_ii(c__2, i__2);
344 /* L70: */
345 }
346
347 --- a/lapack-netlib/SRC/slalsa.c
348 +++ b/lapack-netlib/SRC/slalsa.c
349 @@ -946,7 +946,7 @@ f"> */
350 /* Finally go through the left singular vector matrices of all */
351 /* the other subproblems bottom-up on the tree. */
352
353 - j = pow_ii(&c__2, &nlvl);
354 + j = pow_ii(c__2, nlvl);
355 sqre = 0;
356
357 for (lvl = nlvl; lvl >= 1; --lvl) {
358 @@ -960,7 +960,7 @@ f"> */
359 ll = 1;
360 } else {
361 i__1 = lvl - 1;
362 - lf = pow_ii(&c__2, &i__1);
363 + lf = pow_ii(c__2, i__1);
364 ll = (lf << 1) - 1;
365 }
366 i__1 = ll;
367 @@ -1005,7 +1005,7 @@ f"> */
368 ll = 1;
369 } else {
370 i__2 = lvl - 1;
371 - lf = pow_ii(&c__2, &i__2);
372 + lf = pow_ii(c__2, i__2);
373 ll = (lf << 1) - 1;
374 }
375 i__2 = lf;
376 --- a/lapack-netlib/SRC/slasd0.c
377 +++ b/lapack-netlib/SRC/slasd0.c
378 @@ -821,7 +821,7 @@ f"> */
379 ll = 1;
380 } else {
381 i__1 = lvl - 1;
382 - lf = pow_ii(&c__2, &i__1);
383 + lf = pow_ii(c__2, i__1);
384 ll = (lf << 1) - 1;
385 }
386 i__1 = ll;
387 --- a/lapack-netlib/SRC/slasda.c
388 +++ b/lapack-netlib/SRC/slasda.c
389 @@ -1023,7 +1023,7 @@ f"> */
390
391 /* Now conquer each subproblem bottom-up. */
392
393 - j = pow_ii(&c__2, &nlvl);
394 + j = pow_ii(c__2, nlvl);
395 for (lvl = nlvl; lvl >= 1; --lvl) {
396 lvl2 = (lvl << 1) - 1;
397
398 @@ -1035,7 +1035,7 @@ f"> */
399 ll = 1;
400 } else {
401 i__1 = lvl - 1;
402 - lf = pow_ii(&c__2, &i__1);
403 + lf = pow_ii(c__2, i__1);
404 ll = (lf << 1) - 1;
405 }
406 i__1 = ll;
407 --- a/lapack-netlib/SRC/sstedc.c
408 +++ b/lapack-netlib/SRC/sstedc.c
409 @@ -804,10 +804,10 @@ f"> */
410 lwmin = *n - 1 << 1;
411 } else {
412 lgn = (integer) (log((real) (*n)) / log(2.f));
413 - if (pow_ii(&c__2, &lgn) < *n) {
414 + if (pow_ii(c__2, lgn) < *n) {
415 ++lgn;
416 }
417 - if (pow_ii(&c__2, &lgn) < *n) {
418 + if (pow_ii(c__2, lgn) < *n) {
419 ++lgn;
420 }
421 if (icompz == 1) {
422 --- a/lapack-netlib/SRC/zlaed0.c
423 +++ b/lapack-netlib/SRC/zlaed0.c
424 @@ -793,10 +793,10 @@ f"> */
425
426 temp = log((doublereal) (*n)) / log(2.);
427 lgn = (integer) temp;
428 - if (pow_ii(&c__2, &lgn) < *n) {
429 + if (pow_ii(c__2, lgn) < *n) {
430 ++lgn;
431 }
432 - if (pow_ii(&c__2, &lgn) < *n) {
433 + if (pow_ii(c__2, lgn) < *n) {
434 ++lgn;
435 }
436 iprmpt = indxq + *n + 1;
437 --- a/lapack-netlib/SRC/zlaed7.c
438 +++ b/lapack-netlib/SRC/zlaed7.c
439 @@ -864,11 +864,11 @@ f"> */
440 /* Form the z-vector which consists of the last row of Q_1 and the */
441 /* first row of Q_2. */
442
443 - ptr = pow_ii(&c__2, tlvls) + 1;
444 + ptr = pow_ii(c__2, *tlvls) + 1;
445 i__1 = *curlvl - 1;
446 for (i__ = 1; i__ <= i__1; ++i__) {
447 i__2 = *tlvls - i__;
448 - ptr += pow_ii(&c__2, &i__2);
449 + ptr += pow_ii(c__2, i__2);
450 /* L10: */
451 }
452 curr = ptr + *curpbm;
453 --- a/lapack-netlib/SRC/zlalsa.c
454 +++ b/lapack-netlib/SRC/zlalsa.c
455 @@ -1051,7 +1051,7 @@ f"> */
456 /* Finally go through the left singular vector matrices of all */
457 /* the other subproblems bottom-up on the tree. */
458
459 - j = pow_ii(&c__2, &nlvl);
460 + j = pow_ii(c__2, nlvl);
461 sqre = 0;
462
463 for (lvl = nlvl; lvl >= 1; --lvl) {
464 @@ -1065,7 +1065,7 @@ f"> */
465 ll = 1;
466 } else {
467 i__1 = lvl - 1;
468 - lf = pow_ii(&c__2, &i__1);
469 + lf = pow_ii(c__2, i__1);
470 ll = (lf << 1) - 1;
471 }
472 i__1 = ll;
473 @@ -1110,7 +1110,7 @@ f"> */
474 ll = 1;
475 } else {
476 i__2 = lvl - 1;
477 - lf = pow_ii(&c__2, &i__2);
478 + lf = pow_ii(c__2, i__2);
479 ll = (lf << 1) - 1;
480 }
481 i__2 = lf;
482 --- a/lapack-netlib/SRC/zstedc.c
483 +++ b/lapack-netlib/SRC/zstedc.c
484 @@ -836,10 +836,10 @@ f"> */
485 lrwmin = *n - 1 << 1;
486 } else if (icompz == 1) {
487 lgn = (integer) (log((doublereal) (*n)) / log(2.));
488 - if (pow_ii(&c__2, &lgn) < *n) {
489 + if (pow_ii(c__2, lgn) < *n) {
490 ++lgn;
491 }
492 - if (pow_ii(&c__2, &lgn) < *n) {
493 + if (pow_ii(c__2, lgn) < *n) {
494 ++lgn;
495 }
496 lwmin = *n * *n;
497
498 From 91110f92d218492d0efbdc1fdf34277ca45f4b36 Mon Sep 17 00:00:00 2001
499 From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
500 Date: Wed, 14 Sep 2022 14:03:31 +0200
501 Subject: [PATCH] fix missing return type in function declaration
502
503 --- a/ctest/c_sblat1c.c
504 +++ b/ctest/c_sblat1c.c
505 @@ -969,7 +969,7 @@ real *sfac;
506 1.17 };
507
508 /* Local variables */
509 - extern /* Subroutine */ srottest_();
510 + extern /* Subroutine */ void srottest_();
511 static integer i__, k, ksize;
512 extern /* Subroutine */ int stest_(), srotmtest_();
513 static integer ki, kn;
514
515 From 9402df5604e69f86f58953e3883f33f98c930baf Mon Sep 17 00:00:00 2001
516 From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
517 Date: Wed, 14 Sep 2022 21:44:34 +0200
518 Subject: [PATCH] Fix missing external declaration
519
520 --- a/driver/others/blas_server_omp.c
521 +++ b/driver/others/blas_server_omp.c
522 @@ -69,6 +69,8 @@
523
524 int blas_server_avail = 0;
525
526 +extern int openblas_omp_adaptive_env();
527 +
528 static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
529 #ifdef HAVE_C11
530 static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
531
532 From 101a2c77c3f3610933f450cefca3e312edab2186 Mon Sep 17 00:00:00 2001
533 From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
534 Date: Thu, 15 Sep 2022 09:19:19 +0200
535 Subject: [PATCH] Fix warnings
536
537 --- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c
538 +++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c
539 @@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
540 FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
541 FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
542 FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
543 - FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
544 - FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
545 - FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
546 - FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
547 - FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
548 - FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
549 - FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
550 - FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
551 - FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
552 - FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
553 - FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
554 - FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
555 + FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
556 + FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
557 + FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
558 + FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
559 + FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
560 + FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
561 + FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
562 + FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
563 + FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
564 + FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
565 + FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
566 + FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;
567
568
569 aoffset = a;
570 --- a/kernel/x86_64/omatcopy_rt.c
571 +++ b/kernel/x86_64/omatcopy_rt.c
572 @@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
573 ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
574 }
575 int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
576 - float *src, *dst, *dst_tmp, *src_base, *dst_base;
577 + float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
578 uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
579 BLASLONG cols_left, rows_done; float ALPHA = alpha;
580 if(ALPHA==0.0){
|