[Bug target/101185] pr96814 failed after r12-1669 on non-avx512 platform

crazylht at gmail dot com gcc-bugzilla@gcc.gnu.org
Thu Jun 24 01:44:50 GMT 2021


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101185

--- Comment #1 from Hongtao.liu <crazylht at gmail dot com> ---
Alloc order is just another kind of cost which can be compensated by increasing
cost of mask->integer and integer->mask.

With below patch , pr96814 wouldn't generate any mask intructions execept for 

        kmovd   %eax, %k1
        vpcmpeqd        %ymm1, %ymm1, %ymm1
        vmovdqu8        %ymm1, %ymm0{%k1}{z}

which is what we want.


modified   gcc/config/i386/i386.md
@@ -1335,7 +1335,7 @@
 (define_insn "*cmp<mode>_ccz_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:SWI1248_AVX512BWDQ_64 0
-                       "nonimmediate_operand" "<r>,?m<r>,$k")
+                       "nonimmediate_operand" "<r>,?m<r>,*k")
                 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
   "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
   "@
modified   gcc/config/i386/x86-tune-costs.h
@@ -2768,7 +2768,7 @@ struct processor_costs intel_cost = {
   {6, 6, 6, 6, 6},                     /* cost of storing SSE registers
                                           in 32,64,128,256 and 512-bit */
   4, 4,                                /* SSE->integer and integer->SSE moves
*/
-  4, 4,                                /* mask->integer and integer->mask
moves */
+  6, 6,                                /* mask->integer and integer->mask
moves */
   {4, 4, 4},                           /* cost of loading mask register
                                           in QImode, HImode, SImode.  */
   {6, 6, 6},                           /* cost if storing mask register
@@ -2882,7 +2882,7 @@ struct processor_costs generic_cost = {
   {6, 6, 6, 10, 15},                   /* cost of storing SSE registers
                                           in 32,64,128,256 and 512-bit */
   6, 6,                                /* SSE->integer and integer->SSE moves
*/
-  6, 6,                                /* mask->integer and integer->mask
moves */
+  8, 8,                                /* mask->integer and integer->mask
moves */
   {6, 6, 6},                           /* cost of loading mask register
                                           in QImode, HImode, SImode.  */
   {6, 6, 6},                   /* cost if storing mask register


So would the solution of increasing one more unit(or maybe more) for cost of
mask->integer and integer->mask as compensation for changing alloca order be
acceptable for you? or do you insist on reverting the
x86_order_regs_for_local_alloc part?


More information about the Gcc-bugs mailing list