[committed] Fix PR target/20754: ACATS cxg1005 fails at runtime on hppa-linux

John David Anglin dave@hiauly1.hia.nrc.ca
Wed Jan 11 01:15:00 GMT 2006


The enclosed change fixes target/20754.  The failure of cxg1005 was a
regression introduced in 4.0.  However, the bug has existed in the PA
target since SECONDARY_MEMORY_NEEDED_RTX was defined.  There may be a
problem on 3.4 but there's no evidence of a regression at the moment.

The 32-bit port uses a location in the frame marker to copy values between
general and floating-point register.  We are limited in our selection of
memory locations for this operations as floating-point loads and stores
in PA 1.x only support 5-bit offsets.  It would take two additional
instructions for the copy if we allowed an arbitrary offset.  So, we use
a fixed location in the frame marker which has a small offset from the
stack pointer for these copies.

The problems start when we define SECONDARY_MEMORY_NEEDED_RTX to be
this memory locations.  Postreload scheduling can separate the two
instructions used in the copy.  Unfortunately, we still have patterns
which assume this location is available during code generation.  Further,
we may do a copy in a pure leaf function without a frame.  This is the
circumstance in the PR.  Finally, it appears possible that a dynamic
stack allocation could occur between the store and load used in the
copys, and we aren't copying this location in the frame marker.

Because of these issues, I decided that it would be best if we hid
the copy from the compiler.  Thus, I've removed the define for
SECONDARY_MEMORY_NEEDED_RTX and added the alternatives to the 32-bit
move patterns to do copies between general and float-point registers.
We lose the scheduling benefit that we might have had.  However,
PA-RISC processors do quite a bit of internal scheduling, and we
don't have to force leaf functions that do a copy between general and
floating-point registers to have a frame.  We also gain in being able
to use this location in any insn that needs a scratch memory location.

Tested on hppa-unknown-linux-gnu on 4.0, 4.1 and trunk.  Tested on
hppa2.0w-hp-hpux11.11 on 4.1 and trunk.  Tested on hppa64-hp-hpux11.11
on 4.1.  Committed to 4.0, 4.1 and trunk.  No regressions were observed.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

2006-01-10  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>

	PR target/20754
	* pa.md: Create separate 32 and 64-bit move patterns for SI, DI, SF
	and DF modes.  Add alternatives to copy between general and floating
	point registers to the 32-bit patterns.
	* pa-64.h (SECONDARY_MEMORY_NEEDED_RTX): Delete undefine.
	* pa.h (SECONDARY_MEMORY_NEEDED_RTX): Delete define.
	(SECONDARY_MEMORY_NEEDED): Secondary memory is only needed when
	generating 64-bit code.
	* pa.c (output_move_double): Handle copies between general and
	floating registers.

Index: config/pa/pa.md
===================================================================
--- config/pa/pa.md	(revision 109455)
+++ config/pa/pa.md	(working copy)
@@ -2307,12 +2307,41 @@
 
 (define_insn ""
   [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,r,f")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,f,r"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,sgl %f1,%0
+   fldw%F1 %1,%0
+   fstw%F0 %1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
 			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
 	(match_operand:SI 1 "move_src_operand"
 			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
   "(register_operand (operands[0], SImode)
     || reg_or_0_operand (operands[1], SImode))
-   && !TARGET_SOFT_FLOAT"
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
   "@
    ldw RT'%A1,%0
    copy %1,%0
@@ -3840,9 +3869,9 @@
 
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
-			  "=f,*r,Q,?o,?Q,f,*r,*r")
+			  "=f,*r,Q,?o,?Q,f,*r,*r,r,f")
 	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
-			  "fG,*rG,f,*r,*r,RQ,o,RQ"))]
+			  "fG,*rG,f,*r,*r,RQ,o,RQ,f,r"))]
   "(register_operand (operands[0], DFmode)
     || reg_or_0_operand (operands[1], DFmode))
    && !(GET_CODE (operands[1]) == CONST_DOUBLE
@@ -3851,13 +3880,15 @@
    && !TARGET_SOFT_FLOAT"
   "*
 {
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1])
-      || operands[1] == CONST0_RTX (DFmode))
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
     return output_fp_move_double (operands);
   return output_move_double (operands);
 }"
-  [(set_attr "type" "fpalu,move,fpstore,store,store,fpload,load,load")
-   (set_attr "length" "4,8,4,8,16,4,8,16")])
+  [(set_attr "type" "fpalu,move,fpstore,store,store,fpload,load,load,move,move")
+   (set_attr "length" "4,8,4,8,16,4,8,16,12,12")])
 
 (define_insn ""
   [(set (match_operand:DF 0 "indexed_memory_operand" "=R")
@@ -4012,9 +4043,9 @@
 
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
-			  "=r,?o,?Q,r,r")
+			  "=r,?o,?Q,r,r,r,f")
 	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
-			  "rG,r,r,o,RQ"))]
+			  "rG,r,r,o,RQ,f,r"))]
   "(register_operand (operands[0], DFmode)
     || reg_or_0_operand (operands[1], DFmode))
    && !TARGET_64BIT
@@ -4023,8 +4054,8 @@
 {
   return output_move_double (operands);
 }"
-  [(set_attr "type" "move,store,store,load,load")
-   (set_attr "length" "8,8,16,8,16")])
+  [(set_attr "type" "move,store,store,load,load,move,move")
+   (set_attr "length" "8,8,16,8,16,12,12")])
 
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
@@ -4154,22 +4185,25 @@
 
 (define_insn ""
   [(set (match_operand:DI 0 "move_dest_operand"
-			  "=r,o,Q,r,r,r,*f,*f,T")
+			  "=r,o,Q,r,r,r,*f,*f,T,r,f")
 	(match_operand:DI 1 "general_operand"
-			  "rM,r,r,o*R,Q,i,*fM,RT,*f"))]
+			  "rM,r,r,o*R,Q,i,*fM,RT,*f,f,r"))]
   "(register_operand (operands[0], DImode)
     || reg_or_0_operand (operands[1], DImode))
    && !TARGET_64BIT
    && !TARGET_SOFT_FLOAT"
   "*
 {
-  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1])
-      || (operands[1] == CONST0_RTX (DImode)))
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
     return output_fp_move_double (operands);
   return output_move_double (operands);
 }"
-  [(set_attr "type" "move,store,store,load,load,multi,fpalu,fpload,fpstore")
-   (set_attr "length" "8,8,16,8,16,16,4,4,4")])
+  [(set_attr "type"
+    "move,store,store,load,load,multi,fpalu,fpload,fpstore,move,move")
+   (set_attr "length" "8,8,16,8,16,16,4,4,4,12,12")])
 
 (define_insn ""
   [(set (match_operand:DI 0 "move_dest_operand"
@@ -4380,12 +4414,35 @@
 
 (define_insn ""
   [(set (match_operand:SF 0 "move_dest_operand"
+			  "=f,!*r,f,*r,Q,Q,r,f")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,!*rG,RQ,RQ,f,*rG,f,r"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   fcpy,sgl %f1,%0
+   copy %r1,%0
+   fldw%F1 %1,%0
+   ldw%M1 %1,%0
+   fstw%F0 %1,%0
+   stw%M0 %r1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
 			  "=f,!*r,f,*r,Q,Q")
 	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
 			  "fG,!*rG,RQ,RQ,f,*rG"))]
   "(register_operand (operands[0], SFmode)
     || reg_or_0_operand (operands[1], SFmode))
-   && !TARGET_SOFT_FLOAT"
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
   "@
    fcpy,sgl %f1,%0
    copy %r1,%0
Index: config/pa/pa-64.h
===================================================================
--- config/pa/pa-64.h	(revision 109455)
+++ config/pa/pa-64.h	(working copy)
@@ -70,10 +70,6 @@
    relocs which appear in stabs.  */
 #undef DBX_DEBUGGING_INFO
 
-/* We want the compiler to select a suitable secondary memory location.
-   ?!? This may not work reliably.  Keep an eye out for problems.  */
-#undef SECONDARY_MEMORY_NEEDED_RTX
-
 /* ?!? This needs to be made compile-time selectable.
 
    The PA64 runtime model has arguments that grow to higher addresses
Index: config/pa/pa.c
===================================================================
--- config/pa/pa.c	(revision 109455)
+++ config/pa/pa.c	(working copy)
@@ -2209,6 +2209,25 @@
      supposed to allow to happen.  */
   gcc_assert (optype0 == REGOP || optype1 == REGOP);
 
+  /* Handle copies between general and floating registers.  */
+
+  if (optype0 == REGOP && optype1 == REGOP
+      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
+    {
+      if (FP_REG_P (operands[0]))
+	{
+	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
+	  return "{fldds|fldd} -16(%%sp),%0";
+	}
+      else
+	{
+	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
+	  return "{ldws|ldw} -12(%%sp),%R0";
+	}
+    }
+
    /* Handle auto decrementing and incrementing loads and stores
      specifically, since the structure of the function doesn't work
      for them without major modification.  Do it better when we learn
Index: config/pa/pa.h
===================================================================
--- config/pa/pa.h	(revision 109455)
+++ config/pa/pa.h	(working copy)
@@ -531,14 +531,15 @@
   reg_classes_intersect_p ((CLASS), FP_REGS)
 
 /* On the PA it is not possible to directly move data between
-   GENERAL_REGS and FP_REGS.  */
-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)		\
-  (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)	\
-   || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1))
-
-/* Return the stack location to use for secondary memory needed reloads.  */
-#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
-  gen_rtx_MEM (MODE, gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-16)))
+   GENERAL_REGS and FP_REGS.  On the 32-bit port, we use the
+   location at SP-16.  We don't expose this location in the RTL to
+   avoid scheduling related problems.  For example, the store and
+   load could be separated by a call to a pure or const function
+   which has no frame and uses SP-16.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
+  (TARGET_64BIT								\
+   && (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)		\
+       || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1)))
 
 
 /* Stack layout; function entry, exit and calling.  */



More information about the Gcc-patches mailing list