This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AVX]: More AVX updates


On Sat, May 24, 2008 at 10:19:06AM -0700, H.J. Lu wrote:
> On Fri, May 23, 2008 at 02:48:45PM -0700, H.J. Lu wrote:
> > Hi Uros,
> > 
> > How about this patch?
> > 
> 
> Here is the updated patch. I added %v as prefix and %d to
> print_reg. We can add AVX support to most SSE patterns directly.
> 
> 
> H.J.
> ---
> 2008-05-24  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	* config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
> 	define.  Use ASM_OUTPUT_AVX_PREFIX.
> 
> 	* config/i386/i386.c (print_reg): Handle 'd' to duplicate
> 	the operand.
> 	(print_operand): Handle 'd'.
> 
> 	* config/i386/i386.h (ASM_OUTPUT_AVX_PREFIX): New.
> 	(ASM_OUTPUT_OPCODE): Likewise.
> 
> 	* config/i386/i386.md (*movdi_2): Support AVX.
> 	(*movdf_nointeger): Likewise.
> 
> 	* config/i386/mmx.md (*mov<mode>_internal_rex64_avx): Removed.
> 	(*mov<mode>_internal_rex64): Support AVX.
> 
> 	* config/i386/sse.md (*avx_storehps): Removed.
> 	(sse_storehps): Support AVX.
> 	(*vec_dupv2df): Remove AVX support.
> 

We should print duplicated register operand only for AVX instruction.
Here is the updated patch.


H.J.
---
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 135851)
+++ config/i386/i386.h	(working copy)
@@ -2246,6 +2246,29 @@ do {									\
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
   ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
 
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+   true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    {						\
+      if (TARGET_AVX)				\
+	(PTR) += 1;				\
+      else					\
+	(PTR) += 2;				\
+    }						\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  This is the case for x86_64 on Mach-O for example.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 135851)
+++ config/i386/i386.md	(working copy)
@@ -2295,15 +2295,19 @@
    pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movq\t{%1, %0|%0, %1}
-   pxor\t%0, %0
-   movq\t{%1, %0|%0, %1}
-   movdqa\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
    xorps\t%0, %0
    movlps\t{%1, %0|%0, %1}
    movaps\t{%1, %0|%0, %1}
    movlps\t{%1, %0|%0, %1}"
   [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
 
 (define_split
@@ -2928,11 +2932,11 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "xorps\t%0, %0";
+	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
-	  return "xorpd\t%0, %0";
+	  return "%vxorpd\t%0, %d0";
 	case MODE_TI:
-	  return "pxor\t%0, %0";
+	  return "%vpxor\t%0, %d0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2942,19 +2946,43 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return "movapd\t{%1, %0|%0, %1}";
+	  return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
-	  return "movq\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
-	  return "movsd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
 	case MODE_V1DF:
-	  return "movlpd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
 	case MODE_V2SF:
-	  return "movlps\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2964,6 +2992,10 @@
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
 		 (const_string "DF")
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md	(revision 135851)
+++ config/i386/mmx.md	(working copy)
@@ -63,12 +63,12 @@
   DONE;
 })
 
-(define_insn "*mov<mode>_internal_rex64_avx"
+(define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
 				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_AVX
+  "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     mov{q}\t{%1, %0|%0, %1}
@@ -78,43 +78,19 @@
     movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
-    vpxor\t%0, %0, %0
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}"
+    %vpxor\t%0, %d0
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
    (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
    (set (attr "prefix")
      (if_then_else (eq_attr "alternative" "7,8,9,10,11")
-       (const_string "vex")
+       (const_string "maybe_vex")
        (const_string "orig")))
    (set_attr "mode" "DI")])
 
-(define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
-	(match_operand:MMXMODEI8 1 "vector_move_operand"
-				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_MMX
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-    mov{q}\t{%1, %0|%0, %1}
-    mov{q}\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
-   (set_attr "mode" "DI")])
-
 (define_insn "*mov<mode>_internal_avx"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
Index: config/i386/gas.h
===================================================================
--- config/i386/gas.h	(revision 135851)
+++ config/i386/gas.h	(working copy)
@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3.  
    GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
    So use `repe' instead.  */
 
+#undef ASM_OUTPUT_OPCODE
 #define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
 {									\
   if ((PTR)[0] == 'r'							\
@@ -103,6 +104,8 @@ along with GCC; see the file COPYING3.  
 	  (PTR) += 5;							\
 	}								\
     }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
 }
 
 /* Define macro used to output shift-double opcodes when the shift
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 135851)
+++ config/i386/sse.md	(working copy)
@@ -3295,20 +3295,6 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*avx_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_AVX"
-  "@
-   vmovhps\t{%1, %0|%0, %1}
-   vmovhlps\t{%1, %0, %0|%0, %0, %1}
-   vmovlps\t{%H1, %0, %0|%0, %0, %H1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
 (define_insn "sse_storehps"
   [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
 	(vec_select:V2SF
@@ -3316,10 +3302,11 @@
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_SSE"
   "@
-   movhps\t{%1, %0|%0, %1}
-   movhlps\t{%1, %0|%0, %1}
-   movlps\t{%H1, %0|%0, %H1}"
+   %vmovhps\t{%1, %0|%0, %1}
+   %vmovhlps\t{%1, %d0|%d0, %1}
+   %vmovlps\t{%H1, %d0|%d0, %H1}"
   [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
 (define_expand "sse_loadhps_exp"
@@ -4425,10 +4412,8 @@
 	(vec_duplicate:V2DF
 	  (match_operand:DF 1 "register_operand" "0")))]
   "TARGET_SSE2"
-  "* return TARGET_AVX ? \"vunpcklpd\t%0, %0, %0\"
-                       : \"unpcklpd\t%0, %0\";"
+  "unpcklpd\t%0, %0"
   [(set_attr "type" "sselog1")
-   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
 (define_insn "*vec_concatv2df_sse3"
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135851)
+++ config/i386/i386.c	(working copy)
@@ -8890,11 +8890,16 @@ put_condition_code (enum rtx_code code, 
    If CODE is 'x', pretend the mode is V4SFmode.
    If CODE is 't', pretend the mode is V8SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
-   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
+   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'd', duplicate the operand for AVX instruction.
+ */
 
 void
 print_reg (rtx x, int code, FILE *file)
 {
+  const char *reg;
+  bool duplicated = code == 'd' && TARGET_AVX;
+
   gcc_assert (x == pc_rtx
 	      || (REGNO (x) != ARG_POINTER_REGNUM
 		  && REGNO (x) != FRAME_POINTER_REGNUM
@@ -8959,12 +8964,14 @@ print_reg (rtx x, int code, FILE *file)
 	}
       return;
     }
+
+  reg = NULL;
   switch (code)
     {
     case 3:
       if (STACK_TOP_P (x))
 	{
-	  fputs ("st(0)", file);
+	  reg = "st(0)";
 	  break;
 	}
       /* FALLTHRU */
@@ -8977,28 +8984,39 @@ print_reg (rtx x, int code, FILE *file)
     case 16:
     case 2:
     normal:
-      fputs (hi_reg_name[REGNO (x)], file);
+      reg = hi_reg_name[REGNO (x)];
       break;
     case 1:
       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
 	goto normal;
-      fputs (qi_reg_name[REGNO (x)], file);
+      reg = qi_reg_name[REGNO (x)];
       break;
     case 0:
       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
 	goto normal;
-      fputs (qi_high_reg_name[REGNO (x)], file);
+      reg = qi_high_reg_name[REGNO (x)];
       break;
     case 32:
       if (SSE_REG_P (x))
 	{
+	  gcc_assert (!duplicated);
 	  putc ('y', file);
 	  fputs (hi_reg_name[REGNO (x)] + 1, file);
+	  return;
 	}
       break;
     default:
       gcc_unreachable ();
     }
+
+  fputs (reg, file);
+  if (duplicated)
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	fprintf (file, ", %%%s", reg);
+      else
+	fprintf (file, ", %s", reg);
+    }
 }
 
 /* Locate some local-dynamic symbol still in use by this function
@@ -9059,6 +9077,7 @@ get_some_local_dynamic_name (void)
    t --  likewise, print the V8SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
+   d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
    P -- if PIC, print an @PLT suffix.
    X -- don't print any sort of PIC '@' suffix for a symbol.
@@ -9204,6 +9223,7 @@ print_operand (FILE *file, rtx x, int co
 	      gcc_unreachable ();
 	    }
 
+	case 'd':
 	case 'b':
 	case 'w':
 	case 'k':


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]