[PATCH] Fix regression introduced by r12-5536.

Uros Bizjak ubizjak@gmail.com
Mon Nov 29 21:20:56 GMT 2021


On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > There're several failures reported in [1]:
> > > 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > > %vpextrw should be used in output templates.
> > > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > > are marked as TYPE_SSELOG.
> > > Explicitly set memory_attr for those alternatives.
> > >
> > > Also this patch fixs a typo and some latent bugs which are related to
> > > moving HImode from/to sse register w/o TARGET_AVX512FP16.

Here are some more fixes:

i386: Fix and improve movhi_internal and movhf_internal some more.

An (*v,C) alternative can be added to movhi_internal to directly load
HImode constant 0 to xmm register. Also, V4SFmode moves can be used
for xmm->xmm moves instead of TImode moves when optimizing for size.
Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
register for AVX targets.

Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
Fix pinsrw and pextrw templates for AVX targets. Use sselog1
instead of sselog type.  Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.

2021-11-29  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    PR target/102811
    * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
    Do not allocate non-GPR registers.  Optimize xmm->xmm moves when
    optimizing for size.  Fix vpinsrw insn template.
    (*movhf_internal): Fix pinsrw and pextrw insn templates for
    AVX targets. Use sselog1 type instead of sselog.  Optimize GPR moves.
    Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
    and TARGET_SSE_SPLIT_REGS targets.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and
w/o -mf16c.

Pushed to master.

Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a384dae23e2..c88374c9d2b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2495,11 +2495,12 @@
 	   (symbol_ref "true")))])
 
 (define_insn "*movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m")
-	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand"
+    "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
+	(match_operand:HI 1 "general_operand"
+    "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r  ,C ,*v,m ,*v"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
-
 {
   switch (get_attr_type (insn))
     {
@@ -2526,10 +2527,13 @@
       return ix86_output_ssemov (insn, operands);
 
     case TYPE_SSELOG1:
+      if (satisfies_constraint_C (operands[1]))
+	return standard_sse_constant_opcode (insn, operands);
+
       if (SSE_REG_P (operands[0]))
 	return MEM_P (operands[1])
-	  ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+	  ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+	  : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
       else
 	return MEM_P (operands[0])
 	  ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
@@ -2550,23 +2554,25 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12")
+	(cond [(eq_attr "alternative" "9,10,11,12,13")
 		  (const_string "sse2")
-	       (eq_attr "alternative" "13")
+	       (eq_attr "alternative" "14")
 		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "9,10,12,13")
+     (cond [(eq_attr "alternative" "4,5,6,7")
+	      (const_string "mskmov")
+	    (eq_attr "alternative" "8")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "9,10,13,14")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "ssemov")
 		(const_string "sselog1"))
-	    (eq_attr "alternative" "4,5,6,7")
-	      (const_string "mskmov")
 	    (eq_attr "alternative" "11")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "12")
 	      (const_string "ssemov")
-	    (eq_attr "alternative" "8")
-	      (const_string "msklog")
 	    (match_test "optimize_function_for_size_p (cfun)")
 	      (const_string "imov")
 	    (and (eq_attr "alternative" "0")
@@ -2581,33 +2587,54 @@
 	      (const_string "imovx")
 	   ]
 	   (const_string "imov")))
-    (set (attr "prefix")
-	 (cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "maybe_evex")
-		(eq_attr "alternative" "4,5,6,7,8")
-		  (const_string "vex")
-	       ]
-	       (const_string "orig")))
-    (set (attr "mode")
-      (cond [(eq_attr "type" "imovx")
-	       (const_string "SI")
-	     (eq_attr "alternative" "9,10,12,13")
-	       (if_then_else (match_test "TARGET_AVX512FP16")
-		 (const_string "HI")
-		 (const_string "TI"))
-	     (eq_attr "alternative" "11")
-	       (if_then_else (match_test "TARGET_AVX512FP16")
-		 (const_string "HF")
-		 (const_string "SF"))
-	     (and (eq_attr "alternative" "1,2")
-		  (match_operand:HI 1 "aligned_operand"))
-	       (const_string "SI")
-	     (and (eq_attr "alternative" "0")
-		  (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
-		       (not (match_test "TARGET_HIMODE_MATH"))))
-	       (const_string "SI")
+   (set (attr "prefix")
+	(cond [(eq_attr "alternative" "4,5,6,7,8")
+		 (const_string "vex")
+	       (eq_attr "alternative" "9,10,11,12,13,14")
+		 (const_string "maybe_evex")
+	      ]
+	      (const_string "orig")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "9,10,13,14")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "HI")
+		(const_string "TI"))
+	    (eq_attr "alternative" "11")
+	      (cond [(match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "optimize_function_for_size_p (cfun)"))
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+	    (eq_attr "alternative" "12")
+	      (cond [(match_test "TARGET_AVX512FP16")
+		       (const_string "HI")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "optimize_function_for_size_p (cfun)"))
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+	    (eq_attr "type" "imovx")
+	      (const_string "SI")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand"))
+	      (const_string "SI")
+	    (and (eq_attr "alternative" "0")
+		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+		      (not (match_test "TARGET_HIMODE_MATH"))))
+	      (const_string "SI")
 	    ]
-	    (const_string "HI")))])
+	    (const_string "HI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "9")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "10")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+	   (symbol_ref "true")))])
 
 ;; Situation is quite tricky about when to choose full sized (SImode) move
 ;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
@@ -3774,92 +3801,110 @@
 
 (define_insn "*movhf_internal"
  [(set (match_operand:HF 0 "nonimmediate_operand"
-	 "=?r,?m,v,v,?r,m,?v,v")
+	 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
        (match_operand:HF 1 "general_operand"
-	 "rmF,rF,C,v, v,v, r,m"))]
+	 "r  ,F ,m ,rF,C,v, v,v,r ,m"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress
       || reload_completed
       || !CONST_DOUBLE_P (operands[1])
-      || (TARGET_SSE && TARGET_SSE_MATH
+      || (TARGET_SSE2
 	  && standard_sse_constant_p (operands[1], HFmode) == 1)
       || memory_operand (operands[0], HFmode))"
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_IMOV:
-      return "mov{w}\t{%1, %0|%0, %1}";
-
-    case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands);
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
 
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
-    case TYPE_SSELOG:
+    case TYPE_SSELOG1:
+      if (satisfies_constraint_C (operands[1]))
+	return standard_sse_constant_opcode (insn, operands);
+
       if (SSE_REG_P (operands[0]))
 	return MEM_P (operands[1])
-	       ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+	       ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+	       : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
       else
 	return MEM_P (operands[0])
-	       ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
+	       ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+	       : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
 
     default:
-      gcc_unreachable ();
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+	return "mov{w}\t{%1, %0|%0, %1}";
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "2,3,4,6,7")
+	(cond [(eq_attr "alternative" "4,5,6,8,9")
 		 (const_string "sse2")
-	       (eq_attr "alternative" "5")
+	       (eq_attr "alternative" "7")
 		 (const_string "sse4")
 	      ]
 	      (const_string "*")))
    (set (attr "type")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "imov")
-	       (eq_attr "alternative" "2")
+	(cond [(eq_attr "alternative" "4")
 		 (const_string "sselog1")
-	       (eq_attr "alternative" "4,5,6,7")
+	       (eq_attr "alternative" "5")
+		 (const_string "ssemov")
+	       (eq_attr "alternative" "6,7,8,9")
 		 (if_then_else
 		   (match_test ("TARGET_AVX512FP16"))
 		   (const_string "ssemov")
-		   (const_string "sselog"))
-	      ]
-	      (const_string "ssemov")))
-   (set (attr "memory")
-	(cond [(eq_attr "alternative" "4,6")
-		 (const_string "none")
-	       (eq_attr "alternative" "5")
-		 (const_string "store")
-	       (eq_attr "alternative" "7")
-		 (const_string "load")
-	      ]
-	      (const_string "*")))
+		   (const_string "sselog1"))
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "imov")
+	       (and (eq_attr "alternative" "0")
+		    (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+			 (not (match_test "TARGET_HIMODE_MATH"))))
+		 (const_string "imov")
+	       (and (eq_attr "alternative" "1,2")
+		    (match_operand:HI 1 "aligned_operand"))
+		 (const_string "imov")
+	       (and (match_test "TARGET_MOVX")
+		    (eq_attr "alternative" "0,2"))
+		 (const_string "imovx")
+		 ]
+	      (const_string "imov")))
    (set (attr "prefix")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "orig")
+	(cond [(eq_attr "alternative" "4,5,6,7,8,9")
+		 (const_string "maybe_vex")
 	      ]
-	      (const_string "maybe_vex")))
+	      (const_string "orig")))
    (set (attr "mode")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "HI")
-	       (eq_attr "alternative" "2")
+	(cond [(eq_attr "alternative" "4")
 		 (const_string "V4SF")
-	       (eq_attr "alternative" "4,5,6,7")
+	       (eq_attr "alternative" "6,7,8,9")
 		 (if_then_else
 		   (match_test "TARGET_AVX512FP16")
 		   (const_string "HI")
 		   (const_string "TI"))
-	       (eq_attr "alternative" "3")
-		 (if_then_else
-		   (match_test "TARGET_AVX512FP16")
-		   (const_string "HF")
-		   (const_string "SF"))
+	       (eq_attr "alternative" "5")
+		 (cond [(match_test "TARGET_AVX512FP16")
+			  (const_string "HF")
+			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			     (match_test "TARGET_SSE_SPLIT_REGS"))
+			  (const_string "V4SF")
+		       ]
+		       (const_string "SF"))
+	       (eq_attr "type" "imovx")
+		 (const_string "SI")
+	       (and (eq_attr "alternative" "1,2")
+		    (match_operand:HI 1 "aligned_operand"))
+		 (const_string "SI")
+	       (and (eq_attr "alternative" "0")
+		    (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+			 (not (match_test "TARGET_HIMODE_MATH"))))
+		 (const_string "SI")
 	      ]
-	      (const_string "*")))])
+	      (const_string "HI")))])
 
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")


More information about the Gcc-patches mailing list