[PATCH, i386]: Fix PR 89071, AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions

Uros Bizjak ubizjak@gmail.com
Thu Jan 31 11:32:00 GMT 2019


Hello!

Attached patch (partially) avoids emitting XOR dependency breaking
insn by removing SSE reg dependency in the AVX instructions
themselves.

2019-01-31  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/89071
    * config/i386/i386.md (*extendsfdf2): Split out reg->reg
    alternative to avoid partial SSE register stall for TARGET_AVX.
    (truncdfsf2): Ditto.
    (sse4_1_round<mode>2): Ditto.

Bootstrapped on x86_64-linux-gnu {,-m32}, regression test in progress.

Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d085e88bc61d..744f155fca6f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4370,9 +4370,9 @@
 })
 
 (define_insn "*extendsfdf2"
-  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
         (float_extend:DF
-	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   switch (which_alternative)
@@ -4382,15 +4382,17 @@
       return output_387_reg_move (insn, operands);
 
     case 2:
+      return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
+    case 3:
       return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,ssecvt")
-   (set_attr "prefix" "orig,orig,maybe_vex")
-   (set_attr "mode" "SF,XF,DF")
+  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SF,XF,DF,DF")
    (set (attr "enabled")
      (if_then_else
        (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
@@ -4481,7 +4483,7 @@
   "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
+       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
@@ -4534,9 +4536,9 @@
 ;; Conversion from DFmode to SFmode.
 
 (define_insn "truncdfsf2"
-  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v")
+  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
 	(float_truncate:SF
-	  (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))]
+	  (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   switch (which_alternative)
@@ -4546,13 +4548,15 @@
       return output_387_reg_move (insn, operands);
 
     case 2:
+      return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
+    case 3:
       return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,ssecvt")
+  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
    (set_attr "mode" "SF")
    (set (attr "enabled")
      (if_then_else
@@ -4639,7 +4643,7 @@
   "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
+       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
@@ -16171,19 +16175,20 @@
 

 
 (define_insn "sse4_1_round<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
-	(unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm")
-		       (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v")
+	(unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm")
+		       (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
 		      UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "@
+   %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
    %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
    vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
   [(set_attr "type" "ssecvt")
-   (set_attr "prefix_extra" "1,*")
-   (set_attr "length_immediate" "*,1")
-   (set_attr "prefix" "maybe_vex,evex")
-   (set_attr "isa" "noavx512f,avx512f")
+   (set_attr "prefix_extra" "1,1,*")
+   (set_attr "length_immediate" "*,*,1")
+   (set_attr "prefix" "maybe_vex,maybe_vex,evex")
+   (set_attr "isa" "noavx512f,noavx512f,avx512f")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "rintxf2"


More information about the Gcc-patches mailing list