This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

libgcc patch applied to SH port


Tested for sh64-elf and sh-elf.

-- 
--------------------------
SuperH
2430 Aztec West / Almondsbury / BRISTOL / BS32 4AQ
T:+44 1454 462330
Mon Jun 24 21:05:09 2002  J"orn Rennecke <joern.rennecke@superh.com>

	* lib1funcs.asm (sdivsi3): Add optimized SH64 implementations.
	(udivsi3): Likewise.  Rewrite SH1 implementation.
	(udivdi3, divdi3, umoddi3, moddi3): New SHmedia functions.
	* sh.md (R20_REG, R21_REG, R22_REG, R23_REG, FR23_REG): New constants.
	(udivsi3_i1_media, divsi3_i1_media): Fix clobber list.
	* config/sh/t-sh64 (LIB1ASMFUNCS): (_udivdi3, _divdi3, _umoddi3): Add.
	(_moddi3): Likewise.

	* lib1funcs.asm (ic_invalidate): Add data cache line writeback.

Index: config/sh/lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/lib1funcs.asm,v
retrieving revision 1.22
diff -p -r1.22 lib1funcs.asm
*** config/sh/lib1funcs.asm	17 May 2002 21:05:32 -0000	1.22
--- config/sh/lib1funcs.asm	24 Jun 2002 17:52:46 -0000
*************** GLOBAL(sdivsi3_i4):
*** 930,935 ****
--- 930,936 ----
  	.text
  #endif
  	.align	2
+ #if 0
  /* The assembly code that follows is a hand-optimized version of the C
     code that follows.  Note that the registers that are modified are
     exactly those listed as clobbered in the patterns divsi3_i1 and
*************** LOCAL(sdivsi3_dontadd):
*** 987,993 ****
  	muls.l	r0, r2, r0
  	add.l	r0, r63, r0
  	blink	tr0, r63
! #else
  GLOBAL(sdivsi3):
  	mov	r4,r1
  	mov	r5,r0
--- 988,1087 ----
  	muls.l	r0, r2, r0
  	add.l	r0, r63, r0
  	blink	tr0, r63
! #else /* ! 0 */
!  // inputs: r4,r5
!  // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
!  // result in r0
! GLOBAL(sdivsi3):
!  // can create absolute value without extra latency,
!  // but dependent on proper sign extension of inputs:
!  // shari.l r5,31,r2
!  // xor r5,r2,r20
!  // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
!  shari.l r5,31,r2
!  ori r2,1,r2
!  muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
!  movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
!  shari.l r4,31,r3
!  nsb r20,r0
!  shlld r20,r0,r25
!  shlri r25,48,r25
!  sub r19,r25,r1
!  mmulfx.w r1,r1,r2
!  mshflo.w r1,r63,r1
!  // If r4 was to be used in-place instead of r21, could use this sequence
!  // to compute absolute:
!  // sub r63,r4,r19 // compute absolute value of r4
!  // shlri r4,32,r3 // into lower 32 bit of r4, keeping
!  // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
!  ori r3,1,r3
!  mmulfx.w r25,r2,r2
!  sub r19,r0,r0
!  muls.l r4,r3,r21
!  msub.w r1,r2,r2
!  addi r2,-2,r1
!  mulu.l r21,r1,r19
!  mmulfx.w r2,r2,r2
!  shlli r1,15,r1
!  shlrd r19,r0,r19
!  mulu.l r19,r20,r3
!  mmacnfx.wl r25,r2,r1
!  ptabs r18,tr0
!  sub r21,r3,r25
! 
!  mulu.l r25,r1,r2
!  addi r0,14,r0
!  xor r4,r5,r18
!  shlrd r2,r0,r2
!  mulu.l r2,r20,r3
!  add r19,r2,r19
!  shari.l r18,31,r18
!  sub r25,r3,r25
! 
!  mulu.l r25,r1,r2
!  sub r25,r20,r25
!  add r19,r18,r19
!  shlrd r2,r0,r2
!  mulu.l r2,r20,r3
!  addi r25,1,r25
!  add r19,r2,r19
! 
!  cmpgt r25,r3,r25
!  add.l r19,r25,r0
!  xor r0,r18,r0
!  blink tr0,r63
! #endif
! #elif defined __SHMEDIA__
! /* m5compact-nofpu */
!  // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! GLOBAL(sdivsi3):
! 	pt/l LOCAL(sdivsi3_dontsub), tr0
! 	pt/l LOCAL(sdivsi3_loop), tr1
! 	ptabs/l r18,tr2
! 	shari.l r4,31,r18
! 	shari.l r5,31,r19
! 	xor r4,r18,r20
! 	xor r5,r19,r21
! 	sub.l r20,r18,r20
! 	sub.l r21,r19,r21
! 	xor r18,r19,r19
! 	shlli r21,32,r25
! 	addi r25,-1,r21
! 	addz.l r20,r63,r20
! LOCAL(sdivsi3_loop):
! 	shlli r20,1,r20
! 	bgeu/u r21,r20,tr0
! 	sub r20,r21,r20
! LOCAL(sdivsi3_dontsub):
! 	addi.l r25,-1,r25
! 	bnei r25,-32,tr1
! 	xor r20,r19,r20
! 	sub.l r20,r19,r0
! 	blink tr2,r63
! #else /* ! __SHMEDIA__ */
  GLOBAL(sdivsi3):
  	mov	r4,r1
  	mov	r5,r0
*************** L1:
*** 1187,1197 ****
  /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
     sh3e code.  */
  #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
- !!
- !! Steve Chamberlain
- !! sac@cygnus.com
- !!
- !!
  
  !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
  	.global	GLOBAL(udivsi3)
--- 1281,1286 ----
*************** L1:
*** 1203,1208 ****
--- 1292,1298 ----
  	.text
  #endif
  	.align	2
+ #if 0
  /* The assembly code that follows is a hand-optimized version of the C
     code that follows.  Note that the registers that are modified are
     exactly those listed as clobbered in the patterns udivsi3_i1 and
*************** LOCAL(udivsi3_dontadd):
*** 1248,1303 ****
  	blink	tr0, r63
  #else
  GLOBAL(udivsi3):
! longway:
! 	mov	#0,r0
! 	div0u
! 	! get one bit from the msb of the numerator into the T
! 	! bit and divide it by whats in r5.  Put the answer bit
! 	! into the T bit so it can come out again at the bottom
! 
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! shortway:
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 
! vshortway:
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4 ; div1 r5,r0
! 	rotcl	r4
! ret:	rts
! 	mov	r4,r0
  
  #endif /* ! __SHMEDIA__ */
  #endif /* __SH4__ */
! #endif
  #ifdef L_set_fpscr
  #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
  #ifdef __SH5__
--- 1338,1772 ----
  	blink	tr0, r63
  #else
  GLOBAL(udivsi3):
!  // inputs: r4,r5
!  // clobbered: r18,r19,r20,r21,r22,r25,tr0
!  // result in r0.
!  addz.l r5,r63,r22
!  nsb r22,r0
!  shlld r22,r0,r25
!  shlri r25,48,r25
!  movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
!  sub r20,r25,r21
!  mmulfx.w r21,r21,r19
!  mshflo.w r21,r63,r21
!  ptabs r18,tr0
!  mmulfx.w r25,r19,r19
!  sub r20,r0,r0
!  /* bubble */
!  msub.w r21,r19,r19
!  addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
! 		    before the msub.w, but we need a different value for
! 		    r19 to keep errors under control.  */
!  mulu.l r4,r21,r18
!  mmulfx.w r19,r19,r19
!  shlli r21,15,r21
!  shlrd r18,r0,r18
!  mulu.l r18,r22,r20
!  mmacnfx.wl r25,r19,r21
!  /* bubble */
!  sub r4,r20,r25
! 
!  mulu.l r25,r21,r19
!  addi r0,14,r0
!  /* bubble */
!  shlrd r19,r0,r19
!  mulu.l r19,r22,r20
!  add r18,r19,r18
!  /* bubble */
!  sub.l r25,r20,r25
! 
!  mulu.l r25,r21,r19
!  addz.l r25,r63,r25
!  sub r25,r22,r25
!  shlrd r19,r0,r19
!  mulu.l r19,r22,r20
!  addi r25,1,r25
!  add r18,r19,r18
! 
!  cmpgt r25,r20,r25
!  add.l r18,r25,r0
!  blink tr0,r63
! #endif
! #elif defined (__SHMEDIA__)
! /* m5compact-nofpu - more emphasis on code size than on speed, but don't
!    ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
!    So use a short shmedia loop.  */
!  // clobbered: r20,r21,r25,tr0,tr1,tr2
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! GLOBAL(udivsi3):
!  pt/l LOCAL(udivsi3_dontsub), tr0
!  pt/l LOCAL(udivsi3_loop), tr1
!  ptabs/l r18,tr2
!  shlli r5,32,r25
!  addi r25,-1,r21
!  addz.l r4,r63,r20
! LOCAL(udivsi3_loop):
!  shlli r20,1,r20
!  bgeu/u r21,r20,tr0
!  sub r20,r21,r20
! LOCAL(udivsi3_dontsub):
!  addi.l r25,-1,r25
!  bnei r25,-32,tr1
!  add.l r20,r63,r0
!  blink tr2,r63
! #else /* ! defined (__SHMEDIA__) */
! LOCAL(div8):
!  div1 r5,r4
! LOCAL(div7):
!  div1 r5,r4; div1 r5,r4; div1 r5,r4
!  div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
! 
! LOCAL(divx4):
!  div1 r5,r0; rotcl r4
!  div1 r5,r0; rotcl r4
!  div1 r5,r0; rotcl r4
!  rts; div1 r5,r0
! 
! GLOBAL(udivsi3):
!  sts.l pr,@-r15
!  extu.w r5,r0
!  cmp/eq r5,r0
! #ifdef __sh1__
!  bf LOCAL(large_divisor)
! #else
!  bf/s LOCAL(large_divisor)
! #endif
!  div0u
!  swap.w r4,r0
!  shlr16 r4
!  bsr LOCAL(div8)
!  shll16 r5
!  bsr LOCAL(div7)
!  div1 r5,r4
!  xtrct r4,r0
!  xtrct r0,r4
!  bsr LOCAL(div8)
!  swap.w r4,r4
!  bsr LOCAL(div7)
!  div1 r5,r4
!  lds.l @r15+,pr
!  xtrct r4,r0
!  swap.w r0,r0
!  rts
!  rotcl r0
! 
! LOCAL(large_divisor):
! #ifdef __sh1__
!  div0u
! #endif
!  mov #0,r0
!  xtrct r4,r0
!  xtrct r0,r4
!  bsr LOCAL(divx4)
!  rotcl r4
!  bsr LOCAL(divx4)
!  rotcl r4
!  bsr LOCAL(divx4)
!  rotcl r4
!  bsr LOCAL(divx4)
!  rotcl r4
!  lds.l @r15+,pr
!  rts
!  rotcl r0
  
  #endif /* ! __SHMEDIA__ */
  #endif /* __SH4__ */
! #endif /* L_udivsi3 */
! 
! #ifdef L_udivdi3
! #ifdef __SHMEDIA__
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! 	.global	GLOBAL(udivdi3)
! GLOBAL(udivdi3):
! 	shlri r3,1,r4
! 	nsb r4,r22
! 	shlld r3,r22,r6
! 	shlri r6,49,r5
! 	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
! 	sub r21,r5,r1
! 	mmulfx.w r1,r1,r4
! 	mshflo.w r1,r63,r1
! 	sub r63,r22,r20 // r63 == 64 % 64
! 	mmulfx.w r5,r4,r4
! 	pta LOCAL(large_divisor),tr0
! 	addi r20,32,r9
! 	msub.w r1,r4,r1
! 	madd.w r1,r1,r1
! 	mmulfx.w r1,r1,r4
! 	shlri r6,32,r7
! 	bgt/u r9,r63,tr0 // large_divisor
! 	mmulfx.w r5,r4,r4
! 	shlri r2,32,r19
! 	addi r20,14-1,r0
! 	msub.w r1,r4,r1
! 
! 	mulu.l r1,r7,r4
! 	addi r1,-3,r5
! 	mulu.l r5,r19,r5
! 	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
! 	                 the case may be, %0000000000000000 000.11111111111, still */
! 	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
! 	shlrd r5,r0,r8
! 	mulu.l r8,r3,r5
! 	mshalds.l r1,r21,r1
! 	shari r4,26,r4
! 	shlli r5,32,r5
! 	sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
! 	sub r2,r5,r2
! 	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
! 
! 	shlri r2,22,r21
! 	mulu.l r21,r1,r21
! 	addi r20,30-22,r0
! 	shlli r8,32,r8
! 	shlrd r21,r0,r21
! 	mulu.l r21,r3,r5
! 	add r8,r21,r8
! 	mcmpeq.l r21,r63,r21 // See Note 1
! 	addi r20,30,r0
! 	mshfhi.l r63,r21,r21
! 	sub r2,r5,r2
! 	andc r2,r21,r2
! 
! 	/* small divisor: need a third divide step */
! 	mulu.l r2,r1,r7
! 	ptabs r18,tr0
! 	addi r2,1,r2
! 	shlrd r7,r0,r7
! 	mulu.l r7,r3,r5
! 	add r8,r7,r8
! 	sub r2,r3,r2
! 	cmpgt r2,r5,r5
! 	add r8,r5,r2
! 	/* could test r3 here to check for divide by zero.  */
! 	blink tr0,r63
! 
! LOCAL(large_divisor):
! 	mmulfx.w r5,r4,r4
! 	shlrd r2,r9,r25
! 	shlri r25,32,r8
! 	msub.w r1,r4,r1
! 
! 	mulu.l r1,r7,r4
! 	addi r1,-3,r5
! 	mulu.l r5,r8,r5
! 	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
! 	                 the case may be, %0000000000000000 000.11111111111, still */
! 	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
! 	shlri r5,14-1+32,r8
! 	mulu.l r8,r7,r5
! 	mshalds.l r1,r21,r1
! 	shari r4,26,r4
! 	sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
! 	sub r25,r5,r25
! 	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
! 
! 	shlri r25,22,r21
! 	mulu.l r21,r1,r21
! 	pta LOCAL(no_lo_adj),tr0
! 	addi r22,32,r0
! 	shlri r21,40,r21
! 	mulu.l r21,r7,r5
! 	add r8,r21,r8
! 	shlld r2,r0,r2
! 	sub r25,r5,r25
! 	mextr4 r2,r25,r2
! 	bgtu/u r6,r2,tr0 // no_lo_adj
! 	addi r8,1,r8
! 	sub r2,r6,r2
! LOCAL(no_lo_adj):
! 
! 	/* large_divisor: only needs a few adjustments.  */
! 	mulu.l r8,r6,r5
! 	ptabs r18,tr0
! 	/* bubble */
! 	cmpgtu r5,r2,r5
! 	sub r8,r5,r2
! 	blink tr0,r63
! /* Note 1: To shift the result of the second divide stage so that the result
!    always fits into 32 bits, yet we still reduce the rest sufficiently
!    would require a lot of instructions to do the shifts just right.  Using
!    the full 64 bit shift result to multiply with the divisor would require
!    four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
!    Fortunately, if the upper 32 bits of the shift result are non-zero, we
!    know that the rest after taking this partial result into account will
!    fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
!    upper 32 bits of the partial result are non-zero.  */
! #endif /* __SHMEDIA__ */
! #endif /* L_udivdi3 */
! 
! #ifdef L_divdi3
! #ifdef __SHMEDIA__
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! 	.global	GLOBAL(divdi3)
! GLOBAL(divdi3):
! 	pta GLOBAL(udivdi3),tr0
! 	shari r2,63,r22
! 	shari r3,63,r23
! 	xor r2,r22,r2
! 	xor r3,r23,r3
! 	sub r2,r22,r2
! 	sub r3,r23,r3
! 	beq/u r22,r23,tr0
! 	ptabs r18,tr1
! 	blink tr0,r18
! 	sub r63,r2,r2
! 	blink tr1,r63
! #endif /* __SHMEDIA__ */
! #endif /* L_divdi3 */
! 
! #ifdef L_umoddi3
! #ifdef __SHMEDIA__
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! 	.global	GLOBAL(umoddi3)
! GLOBAL(umoddi3):
! 	shlri r3,1,r4
! 	nsb r4,r22
! 	shlld r3,r22,r6
! 	shlri r6,49,r5
! 	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
! 	sub r21,r5,r1
! 	mmulfx.w r1,r1,r4
! 	mshflo.w r1,r63,r1
! 	sub r63,r22,r20 // r63 == 64 % 64
! 	mmulfx.w r5,r4,r4
! 	pta LOCAL(large_divisor),tr0
! 	addi r20,32,r9
! 	msub.w r1,r4,r1
! 	madd.w r1,r1,r1
! 	mmulfx.w r1,r1,r4
! 	shlri r6,32,r7
! 	bgt/u r9,r63,tr0 // large_divisor
! 	mmulfx.w r5,r4,r4
! 	shlri r2,32,r19
! 	addi r20,14-1,r0
! 	msub.w r1,r4,r1
! 
! 	mulu.l r1,r7,r4
! 	addi r1,-3,r5
! 	mulu.l r5,r19,r5
! 	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
! 	                 the case may be, %0000000000000000 000.11111111111, still */
! 	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
! 	shlrd r5,r0,r8
! 	mulu.l r8,r3,r5
! 	mshalds.l r1,r21,r1
! 	shari r4,26,r4
! 	shlli r5,32,r5
! 	sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
! 	sub r2,r5,r2
! 	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
! 
! 	shlri r2,22,r21
! 	mulu.l r21,r1,r21
! 	addi r20,30-22,r0
! 	/* bubble */ /* could test r3 here to check for divide by zero.  */
! 	shlrd r21,r0,r21
! 	mulu.l r21,r3,r5
! 	mcmpeq.l r21,r63,r21 // See Note 1
! 	addi r20,30,r0
! 	mshfhi.l r63,r21,r21
! 	sub r2,r5,r2
! 	andc r2,r21,r2
! 
! 	/* small divisor: need a third divide step */
! 	mulu.l r2,r1,r7
! 	ptabs r18,tr0
! 	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
! 	shlrd r7,r0,r7
! 	mulu.l r7,r3,r5
! 	/* bubble */
! 	addi r8,1,r7
! 	cmpgt r7,r5,r7
! 	cmvne r7,r8,r2
! 	sub r2,r5,r2
! 	blink tr0,r63
! 
! LOCAL(large_divisor):
! 	mmulfx.w r5,r4,r4
! 	shlrd r2,r9,r25
! 	shlri r25,32,r8
! 	msub.w r1,r4,r1
! 
! 	mulu.l r1,r7,r4
! 	addi r1,-3,r5
! 	mulu.l r5,r8,r5
! 	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
! 	                 the case may be, %0000000000000000 000.11111111111, still */
! 	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
! 	shlri r5,14-1+32,r8
! 	mulu.l r8,r7,r5
! 	mshalds.l r1,r21,r1
! 	shari r4,26,r4
! 	sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
! 	sub r25,r5,r25
! 	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
! 
! 	shlri r25,22,r21
! 	mulu.l r21,r1,r21
! 	pta LOCAL(no_lo_adj),tr0
! 	addi r22,32,r0
! 	shlri r21,40,r21
! 	mulu.l r21,r7,r5
! 	add r8,r21,r8
! 	shlld r2,r0,r2
! 	sub r25,r5,r25
! 	mextr4 r2,r25,r2
! 	bgtu/u r6,r2,tr0 // no_lo_adj
! 	addi r8,1,r8
! 	sub r2,r6,r2
! LOCAL(no_lo_adj):
! 
! 	/* large_divisor: only needs a few adjustments.  */
! 	mulu.l r8,r6,r5
! 	ptabs r18,tr0
! 	add r2,r3,r7
! 	cmpgtu r5,r2,r8
! 	cmvne r8,r7,r2
! 	sub r2,r5,r2
! 	blink tr0,r63
! /* Note 1: To shift the result of the second divide stage so that the result
!    always fits into 32 bits, yet we still reduce the rest sufficiently
!    would require a lot of instructions to do the shifts just right.  Using
!    the full 64 bit shift result to multiply with the divisor would require
!    four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
!    Fortunately, if the upper 32 bits of the shift result are non-zero, we
!    know that the rest after taking this partial result into account will
!    fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
!    upper 32 bits of the partial result are non-zero.  */
! #endif /* __SHMEDIA__ */
! #endif /* L_umoddi3 */
! 
! #ifdef L_moddi3
! #ifdef __SHMEDIA__
! 	.mode	SHmedia
! 	.section	.text..SHmedia32,"ax"
! 	.align	2
! 	.global	GLOBAL(moddi3)
! GLOBAL(moddi3):
! 	pta GLOBAL(umoddi3),tr0
! 	shari r2,63,r22
! 	shari r3,63,r23
! 	xor r2,r22,r2
! 	xor r3,r23,r3
! 	sub r2,r22,r2
! 	sub r3,r23,r3
! 	beq/u r22,r63,tr0
! 	ptabs r18,tr1
! 	blink tr0,r18
! 	sub r63,r2,r2
! 	blink tr1,r63
! #endif /* __SHMEDIA__ */
! #endif /* L_moddi3 */
! 
  #ifdef L_set_fpscr
  #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
  #ifdef __SH5__
*************** LOCAL(set_fpscr_L1):
*** 1350,1355 ****
--- 1819,1826 ----
  	.align	2
  	.global	GLOBAL(ic_invalidate)
  GLOBAL(ic_invalidate):
+ 	ocbwb	r0,0
+ 	synco
  	icbi	r0, 0
  	ptabs	r18, tr0
  	synci
Index: config/sh/sh.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.md,v
retrieving revision 1.105
diff -p -r1.105 sh.md
*** config/sh/sh.md	17 Jun 2002 16:28:29 -0000	1.105
--- config/sh/sh.md	24 Jun 2002 17:52:46 -0000
***************
*** 99,108 ****
--- 99,113 ----
    (R8_REG	8)
    (R9_REG	9)
    (R10_REG	10)
+   (R20_REG	20)
+   (R21_REG	21)
+   (R22_REG	22)
+   (R23_REG	23)
  
    (DR0_REG	64)
    (DR2_REG	66)
    (DR4_REG	68)
+   (FR23_REG	87)
  
    (TR0_REG	128)
    (TR1_REG	129)
***************
*** 1281,1292 ****
    [(set_attr "type" "sfunc")
     (set_attr "needs_delay_slot" "yes")])
  
  (define_insn "udivsi3_i1_media"
    [(set (match_operand:SI 0 "register_operand" "=z")
  	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
     (clobber (reg:SI T_MEDIA_REG))
     (clobber (reg:SI PR_MEDIA_REG))
!    (clobber (reg:SI R4_REG))
     (clobber (reg:DI TR0_REG))
     (clobber (reg:DI TR1_REG))
     (clobber (reg:DI TR2_REG))
--- 1286,1305 ----
    [(set_attr "type" "sfunc")
     (set_attr "needs_delay_slot" "yes")])
  
+ ; Since shmedia-nofpu code could be linked against shcompact code, and
+ ; the udivsi3 libcall has the same name, we must consider all registers
+ ; clobbered that are in the union of the registers clobbered by the
+ ; shmedia and the shcompact implementation.  Note, if the shcompact
+ ; implemenation actually used shcompact code, we'd need to clobber
+ ; also r23 and fr23.
  (define_insn "udivsi3_i1_media"
    [(set (match_operand:SI 0 "register_operand" "=z")
  	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
     (clobber (reg:SI T_MEDIA_REG))
     (clobber (reg:SI PR_MEDIA_REG))
!    (clobber (reg:SI R20_REG))
!    (clobber (reg:SI R21_REG))
!    (clobber (reg:SI R22_REG))
     (clobber (reg:DI TR0_REG))
     (clobber (reg:DI TR1_REG))
     (clobber (reg:DI TR2_REG))
***************
*** 1430,1435 ****
--- 1443,1454 ----
    [(set_attr "type" "sfunc")
     (set_attr "needs_delay_slot" "yes")])
  
+ ; Since shmedia-nofpu code could be linked against shcompact code, and
+ ; the udivsi3 libcall has the same name, we must consider all registers
+ ; clobbered that are in the union of the registers clobbered by the
+ ; shmedia and the shcompact implementation.  Note, if the shcompact
+ ; implemenation actually used shcompact code, we'd need to clobber
+ ; also r22, r23 and fr23.
  (define_insn "divsi3_i1_media"
    [(set (match_operand:SI 0 "register_operand" "=z")
  	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
***************
*** 1438,1443 ****
--- 1457,1464 ----
     (clobber (reg:SI R1_REG))
     (clobber (reg:SI R2_REG))
     (clobber (reg:SI R3_REG))
+    (clobber (reg:SI R20_REG))
+    (clobber (reg:SI R21_REG))
     (clobber (reg:DI TR0_REG))
     (clobber (reg:DI TR1_REG))
     (clobber (reg:DI TR2_REG))
Index: config/sh/t-sh64
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/t-sh64,v
retrieving revision 1.5
diff -p -r1.5 t-sh64
*** config/sh/t-sh64	18 Jun 2002 19:56:54 -0000	1.5
--- config/sh/t-sh64	24 Jun 2002 17:52:46 -0000
*************** LIB1ASMFUNCS = \
*** 4,10 ****
    _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
    _shcompact_call_trampoline _shcompact_return_trampoline \
    _shcompact_incoming_args _ic_invalidate _nested_trampoline \
!   _push_pop_shmedia_regs
  
  MULTILIB_OPTIONS = $(MULTILIB_ENDIAN) m5-32media-nofpu/m5-compact/m5-compact-nofpu/m5-64media/m5-64media-nofpu
  MULTILIB_DIRNAMES= $(MULTILIB_ENDIAN) nofpu compact nofpu/compact media64 nofpu/media64
--- 4,11 ----
    _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
    _shcompact_call_trampoline _shcompact_return_trampoline \
    _shcompact_incoming_args _ic_invalidate _nested_trampoline \
!   _push_pop_shmedia_regs \
!   _udivdi3 _divdi3 _umoddi3 _moddi3
  
  MULTILIB_OPTIONS = $(MULTILIB_ENDIAN) m5-32media-nofpu/m5-compact/m5-compact-nofpu/m5-64media/m5-64media-nofpu
  MULTILIB_DIRNAMES= $(MULTILIB_ENDIAN) nofpu compact nofpu/compact media64 nofpu/media64

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]