[PATCH, rs6000] Optimization for vec_xl_sext
HAO CHEN GUI
guihaoc@linux.ibm.com
Fri Sep 10 05:45:08 GMT 2021
Bill,
Thanks so much for your advice.
I refined the patch and passed the bootstrap and regression test.
Just one thing, the test case becomes unsupported on P9 if I set "{
dg-require-effective-target power10_ok }". I just want the test case to
be compiled and check its assembly. Do we need set "power10_ok"?
I tried to disable line wrap in my email editor. Please let me know
if you still see line wrap. Thanks.
ChangeLog
2021-09-10 Haochen Gui <guihaoc@linux.ibm.com>
gcc/
* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
Modify the expansion for sign extension. All extentions are done
within VSX resgisters.
* gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
gcc/testsuite/
* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl
if (sign_extend)
{
- rtx discratch = gen_reg_rtx (DImode);
+ rtx discratch = gen_reg_rtx (V2DImode);
rtx tiscratch = gen_reg_rtx (TImode);
/* Emit the lxvr*x insn. */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
return 0;
emit_insn (pat);
- /* Emit a sign extension from QI,HI,WI to double (DI). */
- rtx scratch = gen_lowpart (smode, tiscratch);
+ /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
+ rtx temp1, temp2;
if (icode == CODE_FOR_vsx_lxvrbx)
- emit_insn (gen_extendqidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrhx)
- emit_insn (gen_extendhidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrwx)
- emit_insn (gen_extendsidi2 (discratch, scratch));
- /* Assign discratch directly if scratch is already DI. */
- if (icode == CODE_FOR_vsx_lxvrdx)
- discratch = scratch;
+ {
+ temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+ }
+ else if (icode == CODE_FOR_vsx_lxvrdx)
+ discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+ else
+ gcc_unreachable ();
- /* Emit the sign extension from DI (double) to TI (quad). */
- emit_insn (gen_extendditi2 (target, discratch));
+ /* Emit the sign extension from V2DI (double) to TI (quad). */
+ temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+ emit_insn (gen_extendditi2_vector (target, temp2));
return target;
}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
"vextsh2<wd> %0,%1"
[(set_attr "type" "vecexts")])
-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..fcecc542d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
On 10/9/2021 上午 4:49, Bill Schmidt wrote:
> Hi Haochen,
>
> This patch was sent with "format=flowed", so it doesn't apply. That
> makes it harder to review. Can you please make sure you disable line
> wrap from your patch submissions, at least in the patch part?
>
> On 9/6/21 1:18 AM, HAO CHEN GUI wrote:
>> Hi,
>>
>> The patch optimized the code generation for vec_xl_sext builtin.
>> Now
>> all the sign extensions are done on VSX registers directly.
>>
>> Bootstrapped and tested on powerpc64le-linux with no
>> regressions. Is
>> this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>>
>> 2021-09-06 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>> Modify the expansion for sign extension.
>> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>>
>> gcc/testsuite/
>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>
>>
>> patch.diff
>>
>> diff --git a/gcc/config/rs6000/rs6000-call.c
>> b/gcc/config/rs6000/rs6000-call.c
>> index b4e13af4dc6..54fdaf47be3 100644
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>> tree exp, rtx target, bool bl
>
> Above is the first wrapped line that causes the patch to not apply.
>>
>> if (sign_extend)
>> {
>> - rtx discratch = gen_reg_rtx (DImode);
>> + rtx discratch = gen_reg_rtx (V2DImode);
>> rtx tiscratch = gen_reg_rtx (TImode);
>>
>> /* Emit the lxvr*x insn. */
>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
>> icode, tree exp, rtx target, bool bl
>> return 0;
>> emit_insn (pat);
>>
>> - /* Emit a sign extension from QI,HI,WI to double (DI). */
>> - rtx scratch = gen_lowpart (smode, tiscratch);
>> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
>> + rtx temp1, temp2;
>> if (icode == CODE_FOR_vsx_lxvrbx)
>> - emit_insn (gen_extendqidi2 (discratch, scratch));
>> + {
>> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>> + }
>> else if (icode == CODE_FOR_vsx_lxvrhx)
>> - emit_insn (gen_extendhidi2 (discratch, scratch));
>> + {
>> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>> + }
>> else if (icode == CODE_FOR_vsx_lxvrwx)
>> - emit_insn (gen_extendsidi2 (discratch, scratch));
>> - /* Assign discratch directly if scratch is already DI. */
>> - if (icode == CODE_FOR_vsx_lxvrdx)
>> - discratch = scratch;
>> + {
>> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>> + }
>> + else if (icode == CODE_FOR_vsx_lxvrdx)
>> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>> + else
>> + return 0;
>
>
> Please call gcc_unreachable () here, instead of returning 0. If we
> call this function with an unexpected insn_code, we want to ICE right
> away to make it easier to debug the problem.
>
>>
>> - /* Emit the sign extension from DI (double) to TI (quad). */
>> - emit_insn (gen_extendditi2 (target, discratch));
>> + /* Emit the sign extension from V2DI (double) to TI (quad). */
>> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>> + emit_insn (gen_extendditi2_vector (target, temp2));
>>
>> return target;
>> }
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index bcb92be2f5c..987f21bbc22 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>> "vextsh2<wd> %0,%1"
>> [(set_attr "type" "vecexts")])
>>
>> -(define_insn "*vsx_sign_extend_si_v2di"
>> +(define_insn "vsx_sign_extend_si_v2di"
>> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>> UNSPEC_VSX_SIGN_EXTEND))]
>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> new file mode 100644
>> index 00000000000..e5b84e6167a
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> @@ -0,0 +1,32 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>
> You need a { dg-require-effective-target int128 } directive. Also {
> dg-require-effective-target power10_ok }.
>> +
>> +#include <altivec.h>
>> +
>> +vector signed __int128
>> +foo1 (signed long a, signed char *b)
>> +{
>> + return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo2 (signed long a, signed short *b)
>> +{
>> + return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo3 (signed long a, signed int *b)
>> +{
>> + return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo4 (signed long a, signed long *b)
>> +{
>> + return vec_xl_sext (a, b);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>> +/* { dg-final { scan-assembler-times
>> {\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */
>
> I'd rather see the exact number of times each instruction is expected,
> since this would match if all of foo1, foo2, and foo3 emitted
> vextsb2d, for example.
>
> I don't see any additional problems...over to Segher.
>
> Thanks for the patch!
> Bill
>
>> +/* { dg-final { scan-assembler-not "mtvsrdd" } } */
>>
More information about the Gcc-patches
mailing list