This is the mail archive of the
egcs-bugs@egcs.cygnus.com
mailing list for the EGCS project.
Re: Bugreport on g77-2.95 19990629 (prerelease)
- To: egcs-bugs@egcs.cygnus.com
- Subject: Re: Bugreport on g77-2.95 19990629 (prerelease)
- From: toon@moene.indiv.nluug.nl
- Date: Mon, 5 Jul 1999 23:24:15 +0200
I was able to reproduce Mathias findings as follows
- I compiled all of LAPACK with -O2 -g
- This passes the test
- Then I compiled sgeqpf.f with -O2 -g -march=i686 - hang
=======================================================
cat sgeqpf.f
SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO )
*
* -- LAPACK test routine (version 2.0) --
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
* Courant Institute, Argonne National Lab, and Rice University
* March 31, 1993
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER JPVT( * )
REAL A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* SGEQPF computes a QR factorization with column pivoting of a
* real M-by-N matrix A: A*P = Q*R.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0
*
* A (input/output) REAL array, dimension (LDA,N)
* On entry, the M-by-N matrix A.
* On exit, the upper triangle of the array contains the
* min(M,N)-by-N upper triangular matrix R; the elements
* below the diagonal, together with the array TAU,
* represent the orthogonal matrix Q as a product of
* min(m,n) elementary reflectors.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* JPVT (input/output) INTEGER array, dimension (N)
* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
* to the front of A*P (a leading column); if JPVT(i) = 0,
* the i-th column of A is a free column.
* On exit, if JPVT(i) = k, then the i-th column of A*P
* was the k-th column of A.
*
* TAU (output) REAL array, dimension (min(M,N))
* The scalar factors of the elementary reflectors.
*
* WORK (workspace) REAL array, dimension (3*N)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* Further Details
* ===============
*
* The matrix Q is represented as a product of elementary reflectors
*
* Q = H(1) H(2) . . . H(n)
*
* Each H(i) has the form
*
* H = I - tau * v * v'
*
* where tau is a real scalar, and v is a real vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i).
*
* The matrix P is represented in jpvt as follows: If
* jpvt(j) = i
* then the jth column of P is the ith canonical unit vector.
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
INTEGER I, ITEMP, J, MA, MN, PVT
REAL AII, TEMP, TEMP2
* ..
* .. External Subroutines ..
EXTERNAL SGEQR2, SLARF, SLARFG, SORM2R, SSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. External Functions ..
INTEGER ISAMAX
REAL SNRM2
EXTERNAL ISAMAX, SNRM2
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGEQPF', -INFO )
RETURN
END IF
*
MN = MIN( M, N )
*
* Move initial columns up front
*
ITEMP = 1
DO 10 I = 1, N
IF( JPVT( I ).NE.0 ) THEN
IF( I.NE.ITEMP ) THEN
CALL SSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 )
JPVT( I ) = JPVT( ITEMP )
JPVT( ITEMP ) = I
ELSE
JPVT( I ) = I
END IF
ITEMP = ITEMP + 1
ELSE
JPVT( I ) = I
END IF
10 CONTINUE
ITEMP = ITEMP - 1
*
* Compute the QR factorization and update remaining columns
*
IF( ITEMP.GT.0 ) THEN
MA = MIN( ITEMP, M )
CALL SGEQR2( M, MA, A, LDA, TAU, WORK, INFO )
IF( MA.LT.N ) THEN
CALL SORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU,
$ A( 1, MA+1 ), LDA, WORK, INFO )
END IF
END IF
*
IF( ITEMP.LT.MN ) THEN
*
* Initialize partial column norms. The first n elements of
* work store the exact column norms.
*
DO 20 I = ITEMP + 1, N
WORK( I ) = SNRM2( M-ITEMP, A( ITEMP+1, I ), 1 )
WORK( N+I ) = WORK( I )
20 CONTINUE
*
* Compute factorization
*
DO 40 I = ITEMP + 1, MN
*
* Determine ith pivot column and swap if necessary
*
PVT = ( I-1 ) + ISAMAX( N-I+1, WORK( I ), 1 )
*
IF( PVT.NE.I ) THEN
CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
ITEMP = JPVT( PVT )
JPVT( PVT ) = JPVT( I )
JPVT( I ) = ITEMP
WORK( PVT ) = WORK( I )
WORK( N+PVT ) = WORK( N+I )
END IF
*
* Generate elementary reflector H(i)
*
IF( I.LT.M ) THEN
CALL SLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) )
ELSE
CALL SLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) )
END IF
*
IF( I.LT.N ) THEN
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
AII = A( I, I )
A( I, I ) = ONE
CALL SLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ),
$ A( I, I+1 ), LDA, WORK( 2*N+1 ) )
A( I, I ) = AII
END IF
*
* Update partial column norms
*
DO 30 J = I + 1, N
IF( WORK( J ).NE.ZERO ) THEN
TEMP = ONE - ( ABS( A( I, J ) ) / WORK( J ) )**2
TEMP = MAX( TEMP, ZERO )
TEMP2 = ONE + 0.05*TEMP*( WORK( J ) / WORK( N+J ) )**2
IF( TEMP2.EQ.ONE ) THEN
IF( M-I.GT.0 ) THEN
WORK( J ) = SNRM2( M-I, A( I+1, J ), 1 )
WORK( N+J ) = WORK( J )
ELSE
WORK( J ) = ZERO
WORK( N+J ) = ZERO
END IF
ELSE
WORK( J ) = WORK( J )*SQRT( TEMP )
END IF
END IF
30 CONTINUE
*
40 CONTINUE
END IF
RETURN
*
* End of SGEQPF
*
END
=======================================================
diff sgeqpf.s.noi686 sgeqpf.s.i686
93,96c93
< movl (%eax),%edx
< leal 0(,%edx,4),%ecx
< movl %ecx,-56(%ebp)
< .stabn 68,0,102,.LM2-sgeqpf_
---
> .stabn 68,0,101,.LM2-sgeqpf_
98,100d94
< movl 8(%ebp),%ecx
< .stabn 68,0,101,.LM3-sgeqpf_
< .LM3:
102c96,98
< movl $0,(%ebx)
---
> .stabn 68,0,1,.LM3-sgeqpf_
> .LM3:
> movl (%eax),%ecx
105,108c101,102
< movl (%ecx),%eax
< testl %eax,%eax
< jge .L3
< .stabn 68,0,103,.LM5-sgeqpf_
---
> movl 8(%ebp),%eax
> .stabn 68,0,1,.LM5-sgeqpf_
110,111c104,106
< movl $-1,(%ebx)
< .stabn 68,0,104,.LM6-sgeqpf_
---
> leal 0(,%ecx,4),%edx
> movl %edx,-56(%ebp)
> .stabn 68,0,101,.LM6-sgeqpf_
112a108,118
> movl $0,(%ebx)
> .stabn 68,0,102,.LM7-sgeqpf_
> .LM7:
> movl (%eax),%edx
> testl %edx,%edx
> jge .L3
> .stabn 68,0,103,.LM8-sgeqpf_
> .LM8:
> movl $-1,(%ebx)
> .stabn 68,0,104,.LM9-sgeqpf_
> .LM9:
119,120c125,126
< .stabn 68,0,105,.LM7-sgeqpf_
< .LM7:
---
> .stabn 68,0,105,.LM10-sgeqpf_
> .LM10:
123,124c129,130
< .stabn 68,0,106,.LM8-sgeqpf_
< .LM8:
---
> .stabn 68,0,106,.LM11-sgeqpf_
> .LM11:
128,129d133
< cmpl $1,%eax
< jge .L8
131,132c135,137
< .L8:
< cmpl %eax,%edx
---
> testl %edx,%edx
> cmovg %edx,%eax
> cmpl %eax,%ecx
134,135c139,140
< .stabn 68,0,107,.LM9-sgeqpf_
< .LM9:
---
> .stabn 68,0,107,.LM12-sgeqpf_
> .LM12:
139,140c144,145
< .stabn 68,0,109,.LM10-sgeqpf_
< .LM10:
---
> .stabn 68,0,109,.LM13-sgeqpf_
> .LM13:
145,146c150,151
< .stabn 68,0,110,.LM11-sgeqpf_
< .LM11:
---
> .stabn 68,0,110,.LM14-sgeqpf_
> .LM14:
155,156c160,161
< .stabn 68,0,111,.LM12-sgeqpf_
< .LM12:
---
> .stabn 68,0,111,.LM15-sgeqpf_
> .LM15:
160,161c165,167
< .stabn 68,0,114,.LM13-sgeqpf_
< .LM13:
---
> .stabn 68,0,114,.LM16-sgeqpf_
> .LM16:
> .LBB3:
165d170
< movl %ebx,-68(%ebp)
167,173c172,176
< cmpl %eax,%ebx
< jle .L10
< movl %eax,-68(%ebp)
< .L10:
< .stabn 68,0,118,.LM14-sgeqpf_
< .LM14:
< .LBB3:
---
> cmpl %eax,-68(%ebp)
> cmovg %eax,%ebx
> movl %ebx,-68(%ebp)
> .stabn 68,0,118,.LM17-sgeqpf_
> .LM17:
175,176c178,179
< .stabn 68,0,119,.LM15-sgeqpf_
< .LM15:
---
> .stabn 68,0,119,.LM18-sgeqpf_
> .LM18:
187,188c190,191
< .stabn 68,0,120,.LM16-sgeqpf_
< .LM16:
---
> .stabn 68,0,120,.LM19-sgeqpf_
> .LM19:
191,192c194,195
< .stabn 68,0,121,.LM17-sgeqpf_
< .LM17:
---
> .stabn 68,0,121,.LM20-sgeqpf_
> .LM20:
196,197c199,200
< .stabn 68,0,122,.LM18-sgeqpf_
< .LM18:
---
> .stabn 68,0,122,.LM21-sgeqpf_
> .LM21:
207,208c210,211
< .stabn 68,0,123,.LM19-sgeqpf_
< .LM19:
---
> .stabn 68,0,123,.LM22-sgeqpf_
> .LM22:
211,212c214,215
< .stabn 68,0,124,.LM20-sgeqpf_
< .LM20:
---
> .stabn 68,0,124,.LM23-sgeqpf_
> .LM23:
215,216c218,219
< .stabn 68,0,125,.LM21-sgeqpf_
< .LM21:
---
> .stabn 68,0,125,.LM24-sgeqpf_
> .LM24:
221,222c224,225
< .stabn 68,0,126,.LM22-sgeqpf_
< .LM22:
---
> .stabn 68,0,126,.LM25-sgeqpf_
> .LM25:
226,227c229,230
< .stabn 68,0,128,.LM23-sgeqpf_
< .LM23:
---
> .stabn 68,0,128,.LM26-sgeqpf_
> .LM26:
232,233c235,236
< .stabn 68,0,129,.LM24-sgeqpf_
< .LM24:
---
> .stabn 68,0,129,.LM27-sgeqpf_
> .LM27:
237,238c240,241
< .stabn 68,0,130,.LM25-sgeqpf_
< .LM25:
---
> .stabn 68,0,130,.LM28-sgeqpf_
> .LM28:
241,242c244,245
< .stabn 68,0,132,.LM26-sgeqpf_
< .LM26:
---
> .stabn 68,0,132,.LM29-sgeqpf_
> .LM29:
251,252c254,255
< .stabn 68,0,133,.LM27-sgeqpf_
< .LM27:
---
> .stabn 68,0,133,.LM30-sgeqpf_
> .LM30:
254,255c257,258
< .stabn 68,0,137,.LM28-sgeqpf_
< .LM28:
---
> .stabn 68,0,137,.LM31-sgeqpf_
> .LM31:
258,259c261,262
< .stabn 68,0,138,.LM29-sgeqpf_
< .LM29:
---
> .stabn 68,0,138,.LM32-sgeqpf_
> .LM32:
268,269c271,272
< .stabn 68,0,139,.LM30-sgeqpf_
< .LM30:
---
> .stabn 68,0,139,.LM33-sgeqpf_
> .LM33:
286,287c289,290
< .stabn 68,0,140,.LM31-sgeqpf_
< .LM31:
---
> .stabn 68,0,140,.LM34-sgeqpf_
> .LM34:
294,295c297,298
< .stabn 68,0,141,.LM32-sgeqpf_
< .LM32:
---
> .stabn 68,0,141,.LM35-sgeqpf_
> .LM35:
325,326c328,329
< .stabn 68,0,143,.LM33-sgeqpf_
< .LM33:
---
> .stabn 68,0,143,.LM36-sgeqpf_
> .LM36:
329,330c332,333
< .stabn 68,0,146,.LM34-sgeqpf_
< .LM34:
---
> .stabn 68,0,146,.LM37-sgeqpf_
> .LM37:
334,335c337,338
< .stabn 68,0,151,.LM35-sgeqpf_
< .LM35:
---
> .stabn 68,0,151,.LM38-sgeqpf_
> .LM38:
362,363c365,366
< .stabn 68,0,152,.LM36-sgeqpf_
< .LM36:
---
> .stabn 68,0,152,.LM39-sgeqpf_
> .LM39:
377,378c380,381
< .stabn 68,0,153,.LM37-sgeqpf_
< .LM37:
---
> .stabn 68,0,153,.LM40-sgeqpf_
> .LM40:
383,384c386,387
< .stabn 68,0,154,.LM38-sgeqpf_
< .LM38:
---
> .stabn 68,0,154,.LM41-sgeqpf_
> .LM41:
386,387c389,390
< .stabn 68,0,153,.LM39-sgeqpf_
< .LM39:
---
> .stabn 68,0,153,.LM42-sgeqpf_
> .LM42:
390,391c393,394
< .stabn 68,0,154,.LM40-sgeqpf_
< .LM40:
---
> .stabn 68,0,154,.LM43-sgeqpf_
> .LM43:
398,399c401,402
< .stabn 68,0,158,.LM41-sgeqpf_
< .LM41:
---
> .stabn 68,0,158,.LM44-sgeqpf_
> .LM44:
427,428c430,431
< .stabn 68,0,162,.LM42-sgeqpf_
< .LM42:
---
> .stabn 68,0,162,.LM45-sgeqpf_
> .LM45:
447,448c450,451
< .stabn 68,0,164,.LM43-sgeqpf_
< .LM43:
---
> .stabn 68,0,164,.LM46-sgeqpf_
> .LM46:
453,454c456,457
< .stabn 68,0,165,.LM44-sgeqpf_
< .LM44:
---
> .stabn 68,0,165,.LM47-sgeqpf_
> .LM47:
470,471c473,474
< .stabn 68,0,166,.LM45-sgeqpf_
< .LM45:
---
> .stabn 68,0,166,.LM48-sgeqpf_
> .LM48:
473,474c476,477
< .stabn 68,0,167,.LM46-sgeqpf_
< .LM46:
---
> .stabn 68,0,167,.LM49-sgeqpf_
> .LM49:
476,477c479,480
< .stabn 68,0,166,.LM47-sgeqpf_
< .LM47:
---
> .stabn 68,0,166,.LM50-sgeqpf_
> .LM50:
481,482c484,485
< .stabn 68,0,167,.LM48-sgeqpf_
< .LM48:
---
> .stabn 68,0,167,.LM51-sgeqpf_
> .LM51:
485,486c488,489
< .stabn 68,0,168,.LM49-sgeqpf_
< .LM49:
---
> .stabn 68,0,168,.LM52-sgeqpf_
> .LM52:
489,490c492,493
< .stabn 68,0,169,.LM50-sgeqpf_
< .LM50:
---
> .stabn 68,0,169,.LM53-sgeqpf_
> .LM53:
492,493c495,496
< .stabn 68,0,171,.LM51-sgeqpf_
< .LM51:
---
> .stabn 68,0,171,.LM54-sgeqpf_
> .LM54:
495,496c498,499
< .stabn 68,0,170,.LM52-sgeqpf_
< .LM52:
---
> .stabn 68,0,170,.LM55-sgeqpf_
> .LM55:
498,499c501,502
< .stabn 68,0,169,.LM53-sgeqpf_
< .LM53:
---
> .stabn 68,0,169,.LM56-sgeqpf_
> .LM56:
502,503c505,506
< .stabn 68,0,170,.LM54-sgeqpf_
< .LM54:
---
> .stabn 68,0,170,.LM57-sgeqpf_
> .LM57:
512,513c515,516
< .stabn 68,0,175,.LM55-sgeqpf_
< .LM55:
---
> .stabn 68,0,175,.LM58-sgeqpf_
> .LM58:
518,519c521,522
< .stabn 68,0,176,.LM56-sgeqpf_
< .LM56:
---
> .stabn 68,0,176,.LM59-sgeqpf_
> .LM59:
538,539c541,542
< .stabn 68,0,177,.LM57-sgeqpf_
< .LM57:
---
> .stabn 68,0,177,.LM60-sgeqpf_
> .LM60:
543,544c546,547
< .stabn 68,0,178,.LM58-sgeqpf_
< .LM58:
---
> .stabn 68,0,178,.LM61-sgeqpf_
> .LM61:
560,561c563,564
< .stabn 68,0,179,.LM59-sgeqpf_
< .LM59:
---
> .stabn 68,0,179,.LM62-sgeqpf_
> .LM62:
563,564c566,567
< .stabn 68,0,181,.LM60-sgeqpf_
< .LM60:
---
> .stabn 68,0,181,.LM63-sgeqpf_
> .LM63:
569,570c572,573
< .stabn 68,0,185,.LM61-sgeqpf_
< .LM61:
---
> .stabn 68,0,185,.LM64-sgeqpf_
> .LM64:
574,575c577,578
< .stabn 68,0,187,.LM62-sgeqpf_
< .LM62:
---
> .stabn 68,0,187,.LM65-sgeqpf_
> .LM65:
578,579c581,582
< .stabn 68,0,185,.LM63-sgeqpf_
< .LM63:
---
> .stabn 68,0,185,.LM66-sgeqpf_
> .LM66:
582,583c585,586
< .stabn 68,0,186,.LM64-sgeqpf_
< .LM64:
---
> .stabn 68,0,186,.LM67-sgeqpf_
> .LM67:
585,586c588,589
< .stabn 68,0,187,.LM65-sgeqpf_
< .LM65:
---
> .stabn 68,0,187,.LM68-sgeqpf_
> .LM68:
622,623c625,626
< .stabn 68,0,189,.LM66-sgeqpf_
< .LM66:
---
> .stabn 68,0,189,.LM69-sgeqpf_
> .LM69:
627,628c630,631
< .stabn 68,0,190,.LM67-sgeqpf_
< .LM67:
---
> .stabn 68,0,190,.LM70-sgeqpf_
> .LM70:
631,632c634,635
< .stabn 68,0,194,.LM68-sgeqpf_
< .LM68:
---
> .stabn 68,0,194,.LM71-sgeqpf_
> .LM71:
671,672c674,675
< .stabn 68,0,195,.LM69-sgeqpf_
< .LM69:
---
> .stabn 68,0,195,.LM72-sgeqpf_
> .LM72:
674,677c677,680
< fucom %st(1)
< fnstsw %ax
< andb $69,%ah
< cmpb $64,%ah
---
> fucomi %st(1),%st
> setne %al
> setp %ah
> orb %al,%ah
679,680c682,683
< .stabn 68,0,196,.LM70-sgeqpf_
< .LM70:
---
> .stabn 68,0,196,.LM73-sgeqpf_
> .LM73:
690,695c693,696
< .stabn 68,0,197,.LM71-sgeqpf_
< .LM71:
< fcom %st(3)
< fnstsw %ax
< andb $5,%ah
< je .L44
---
> .stabn 68,0,197,.LM74-sgeqpf_
> .LM74:
> fcomi %st(3),%st
> jae .L44
700,701c701,702
< .stabn 68,0,198,.LM72-sgeqpf_
< .LM72:
---
> .stabn 68,0,198,.LM75-sgeqpf_
> .LM75:
713,718c714,720
< .stabn 68,0,199,.LM73-sgeqpf_
< .LM73:
< fucompp
< fnstsw %ax
< andb $68,%ah
< xorb $64,%ah
---
> .stabn 68,0,199,.LM76-sgeqpf_
> .LM76:
> fucomip %st(1),%st
> fstp %st(0)
> setne %al
> setp %ah
> orb %al,%ah
721,722c723,724
< .stabn 68,0,200,.LM74-sgeqpf_
< .LM74:
---
> .stabn 68,0,200,.LM77-sgeqpf_
> .LM77:
728,729c730,731
< .stabn 68,0,201,.LM75-sgeqpf_
< .LM75:
---
> .stabn 68,0,201,.LM78-sgeqpf_
> .LM78:
745,746c747,748
< .stabn 68,0,202,.LM76-sgeqpf_
< .LM76:
---
> .stabn 68,0,202,.LM79-sgeqpf_
> .LM79:
750,751c752,753
< .stabn 68,0,203,.LM77-sgeqpf_
< .LM77:
---
> .stabn 68,0,203,.LM80-sgeqpf_
> .LM80:
753,754c755,756
< .stabn 68,0,202,.LM78-sgeqpf_
< .LM78:
---
> .stabn 68,0,202,.LM81-sgeqpf_
> .LM81:
757,758c759,760
< .stabn 68,0,203,.LM79-sgeqpf_
< .LM79:
---
> .stabn 68,0,203,.LM82-sgeqpf_
> .LM82:
763,764c765,766
< .stabn 68,0,204,.LM80-sgeqpf_
< .LM80:
---
> .stabn 68,0,204,.LM83-sgeqpf_
> .LM83:
766,767c768,769
< .stabn 68,0,205,.LM81-sgeqpf_
< .LM81:
---
> .stabn 68,0,205,.LM84-sgeqpf_
> .LM84:
773,774c775,776
< .stabn 68,0,207,.LM82-sgeqpf_
< .LM82:
---
> .stabn 68,0,207,.LM85-sgeqpf_
> .LM85:
778,779c780,781
< .stabn 68,0,208,.LM83-sgeqpf_
< .LM83:
---
> .stabn 68,0,208,.LM86-sgeqpf_
> .LM86:
785,786c787,788
< .stabn 68,0,211,.LM84-sgeqpf_
< .LM84:
---
> .stabn 68,0,211,.LM87-sgeqpf_
> .LM87:
801,802c803,804
< .stabn 68,0,213,.LM85-sgeqpf_
< .LM85:
---
> .stabn 68,0,213,.LM88-sgeqpf_
> .LM88:
811,812c813,814
< .stabn 68,0,215,.LM86-sgeqpf_
< .LM86:
---
> .stabn 68,0,215,.LM89-sgeqpf_
> .LM89:
=======================================================
Any Intel Insider seeing what is wrong ... Toon.