[Bug tree-optimization/91201] [7/8/9/10 Regression] SIMD not generated for horizontal sum of bytes in array
jakub at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Tue Jul 30 10:18:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91201
--- Comment #7 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Untested patch to add the reduc_plus_scal_v{16,32,64}qi expanders.
Wonder if we don't need also reduc_plus_scal_v8qi expander for
TARGET_MMX_WITH_SSE.
--- gcc/config/i386/sse.md.jj 2019-07-28 17:29:41.488143221 +0200
+++ gcc/config/i386/sse.md 2019-07-30 12:05:34.249034097 +0200
@@ -2728,9 +2728,30 @@ (define_expand "reduc_plus_scal_<mode>"
DONE;
})
+(define_expand "reduc_plus_scal_v16qi"
+ [(plus:V16QI
+ (match_operand:QI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand"))]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ rtx tmp2 = gen_reg_rtx (V16QImode);
+ emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
+ rtx tmp3 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp3, CONST0_RTX (V16QImode));
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
+ tmp4 = gen_lowpart (V16QImode, tmp4);
+ emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
+ DONE;
+})
+
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
- (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
+ (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE
@@ -2741,8 +2762,8 @@ (define_expand "reduc_plus_scal_<mode>"
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_add<ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
+ rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+ emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
DONE;
})
More information about the Gcc-bugs
mailing list