This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

fix target/21101


So the test case here is that we have an HImode quantity that we want to
duplicate across a V4HImode vector.  We have an instruction to do that,
provided we can get the HImode value into an MMX register.  The trick
is, we have no HImode load, so we can't allow HImode values in MMX regs.

So vec_dupv4hi is modeled with an SImode input that gets truncated in
the pattern.  Now, when we have a real HImode value to give to this
instruction, we wind up generating a paradoxical subreg so that the
modes line up.

So far so good.

The problem comes in push_reload, which attempts to get rid of subregs
(paradoxical and otherwise) in order to make things easier for itself.
It determines that it can throw away the SImode subreg, leaving us with
a bare HImode register to allocate to an MMX input.  Except we've rightly
declared that illegal.  Which leads directly to the abort.

Fixed by telling the middle-end that MMX/SSE registers can't hold HImode
and QImode values even if they're hidden inside subregs.  This forces
reload to spill the value to a stack slot, and we reload the full SImode
quantity from there.


r~


        * config/i386/i386.h (CANNOT_CHANGE_MODE_CLASS): Move guts to ...
        * config/i386/i386.c (ix86_cannot_change_mode_class): ... here.
        Deny modes smaller than 4 bytes.
        * config/i386/i386-protos.h: Update.

Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.131
diff -u -p -r1.131 i386-protos.h
--- config/i386/i386-protos.h	2 Feb 2005 00:30:25 -0000	1.131
+++ config/i386/i386-protos.h	24 Apr 2005 07:52:37 -0000
@@ -186,6 +186,8 @@ extern int ix86_register_move_cost (enum
 				    enum reg_class);
 extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class,
 					 enum machine_mode, int);
+extern bool ix86_cannot_change_mode_class (enum machine_mode,
+					   enum machine_mode, enum reg_class);
 extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
 extern void emit_i387_cw_initialization (rtx, rtx, int);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.795.6.5
diff -u -p -r1.795.6.5 i386.c
--- config/i386/i386.c	23 Apr 2005 17:13:08 -0000	1.795.6.5
+++ config/i386/i386.c	24 Apr 2005 07:52:39 -0000
@@ -14999,6 +14999,41 @@ ix86_secondary_memory_needed (enum reg_c
   return false;
 }
 
+/* Return true if the registers in CLASS cannot represent the change from
+   modes FROM to TO.  */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			       enum reg_class class)
+{
+  if (from == to)
+    return false;
+
+  /* x87 registers can't do subreg at all, as all values are reformated
+     to extended precision.  */
+  if (MAYBE_FLOAT_CLASS_P (class))
+    return true;
+
+  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
+    {
+      /* Vector registers do not support QI or HImode loads.  If we don't
+	 disallow a change to these modes, reload will assume it's ok to
+	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
+	 the vec_dupv4hi pattern.  */
+      if (GET_MODE_SIZE (from) < 4)
+	return true;
+
+      /* Vector registers do not support subreg with nonzero offsets, which
+	 are otherwise valid for integer registers.  Since we can't see 
+	 whether we have a nonzero offset from here, prohibit all
+         nonparadoxical subregs changing size.  */
+      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+	return true;
+    }
+
+  return false;
+}
+
 /* Return the cost of moving data from a register in class CLASS1 to
    one in class CLASS2.
 
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.421.6.1
diff -u -p -r1.421.6.1 i386.h
--- config/i386/i386.h	5 Apr 2005 22:59:21 -0000	1.421.6.1
+++ config/i386/i386.h	24 Apr 2005 07:52:39 -0000
@@ -1589,19 +1589,10 @@ enum reg_class
    || ((CLASS) == FP_TOP_REG)						\
    || ((CLASS) == FP_SECOND_REG))
 
-/* Return a class of registers that cannot change FROM mode to TO mode.
+/* Return a class of registers that cannot change FROM mode to TO mode.  */
 
-   x87 registers can't do subreg as all values are reformated to extended
-   precision.  XMM registers does not support with nonzero offsets equal
-   to 4, 8 and 12 otherwise valid for integer registers. Since we can't
-   determine these, prohibit all nonparadoxical subregs changing size.  */
-
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
-  (GET_MODE_SIZE (TO) < GET_MODE_SIZE (FROM)		\
-   ? reg_classes_intersect_p (FLOAT_SSE_REGS, (CLASS))	\
-     || MAYBE_MMX_CLASS_P (CLASS) 			\
-   : GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
-   ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  ix86_cannot_change_mode_class (FROM, TO, CLASS)
 
 /* Stack layout; function entry, exit and calling.  */
 
Index: testsuite/gcc.target/i386/pr21101.c
===================================================================
RCS file: testsuite/gcc.target/i386/pr21101.c
diff -N testsuite/gcc.target/i386/pr21101.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.target/i386/pr21101.c	24 Apr 2005 07:52:39 -0000
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -march=nocona" } */
+
+#include <mmintrin.h>
+
+int W;
+void f()
+{
+  int j;
+  int B, C;
+  unsigned char* S;
+  __m64 *T = (__m64 *) &W;
+
+  for (j = 0; j < 16; j++, T++)
+  {
+    T[0] = T[1] = _mm_set1_pi8(*S);
+    S += W;
+  }
+
+  C = 3 * B;
+
+  __m64 E = _mm_set_pi16(3 * B, 3 * B, 3 * B, 5 * B);
+  __m64 G = _mm_set1_pi16(3 * B);
+
+  for (j = 0; j < 16; j++)
+  {
+    __m64 R = _mm_set1_pi16(B + j * C);
+    R = _m_paddw(R, E);
+    R = _m_paddw(R, G);
+    T[0] = _mm_srai_pi16(R, 3);
+  }
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]