+2018-10-11 Will Schmidt <will_schmidt@vnet.ibm.com>
+
+ * gcc.target/powerpc/fold-vec-insert-char-p8.c: New.
+ * gcc.target/powerpc/fold-vec-insert-char-p9.c: New.
+ * gcc.target/powerpc/fold-vec-insert-double.c: New.
+ * gcc.target/powerpc/fold-vec-insert-float-p8.c: New.
+ * gcc.target/powerpc/fold-vec-insert-float-p9.c: New.
+ * gcc.target/powerpc/fold-vec-insert-int-p8.c: New.
+ * gcc.target/powerpc/fold-vec-insert-int-p9.c: New.
+ * gcc.target/powerpc/fold-vec-insert-longlong.c: New.
+ * gcc.target/powerpc/fold-vec-insert-short-p8.c: New.
+ * gcc.target/powerpc/fold-vec-insert-short-p9.c: New.
+
2018-10-11 Will Schmidt <will_schmidt@vnet.ibm.com>
* gcc.target/powerpc/fold-vec-extract-char.p7.c: New.
* gcc.dg/sinatan-2.c: New test.
* gcc.dg/sinatan-3.c: New test.
-2018-10-11 Will Schmidt <will_schmidt@vnet.ibm.com>
-
- * gcc.target/powerpc/fold-vec-select-char.c: New.
- * gcc.target/powerpc/fold-vec-select-double.c: New.
- * gcc.target/powerpc/fold-vec-select-float.c: New.
- * gcc.target/powerpc/fold-vec-select-int.c: New.
- * gcc.target/powerpc/fold-vec-select-longlong.c: New.
- * gcc.target/powerpc/fold-vec-select-short.c: New.
-
2018-10-11 Will Schmidt <will_schmidt@vnet.ibm.com>
* gcc.target/powerpc/fold-vec-mergeeo-floatdouble.c: New.
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert () with char
+ inputs produce the right codegen. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* The below contains vec_insert () calls with both variable and constant
+ values. Only the constant value calls are early-gimple folded, but all
+ are tested for coverage. */
+
+#include <altivec.h>
+
+vector bool char testub_var (unsigned char x, vector bool char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector signed char testss_var (signed char x, vector signed char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector unsigned char testsu_var (signed char x, vector unsigned char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector unsigned char testuu_var (unsigned char x, vector unsigned char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector bool char testub_cst (unsigned char x, vector bool char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector signed char testss_cst (signed char x, vector signed char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector unsigned char testsu_cst (signed char x, vector unsigned char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector unsigned char testuu_cst (unsigned char x, vector unsigned char v)
+{
+ return vec_insert (x, v, 12);
+}
+
+/* one store per _var test */
+/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 4 } } */
+/* one store-byte per test */
+/* { dg-final { scan-assembler-times {\mstb\M} 8 } } */
+/* one load per test */
+/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 } } */
+
+/* one lvebx per _cst test.*/
+/* { dg-final { scan-assembler-times {\mlvebx\M} 4 } } */
+/* one vperm per _cst test.*/
+/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */
+
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert () with char
+ inputs produce the right codegen. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+/* The below contains vec_insert () calls with both variable and constant
+ values. Only the constant value calls are early-gimple folded, but all
+ are tested for coverage. */
+
+#include <altivec.h>
+
+vector bool char testub_var (unsigned char x, vector bool char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector signed char testss_var (signed char x, vector signed char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector unsigned char testsu_var (signed char x, vector unsigned char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector unsigned char testuu_var (unsigned char x, vector unsigned char v, signed int i)
+{
+ return vec_insert (x, v, i);
+}
+vector bool char testub_cst (unsigned char x, vector bool char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector signed char testss_cst (signed char x, vector signed char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector unsigned char testsu_cst (signed char x, vector unsigned char v)
+{
+ return vec_insert (x, v, 12);
+}
+vector unsigned char testuu_cst (unsigned char x, vector unsigned char v)
+{
+ return vec_insert (x, v, 12);
+}
+
+/* load immediate, add, store, stb, load variable test. */
+/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mstb\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mlvebx\M|\mlxv\M|\mlvx\M} 4 { target lp64} } } */
+/* an insert and a move per constant test. */
+/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mvinsertb\M} 4 { target lp64 } } } */
+
+/* -m32 codegen. */
+/* { dg-final { scan-assembler-times {\mrlwinm\M} 4 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\madd\M} 4 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 4 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstb\M} 8 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 8 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlvebx\M} 4 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert with
+ double inputs produce the right codegen. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+#include <altivec.h>
+
+vector double
+testd_var (double d, vector double vd, signed int si)
+{
+ return vec_insert (d, vd, si);
+}
+
+vector double
+testd_cst (double d, vector double vd)
+{
+ return vec_insert (d, vd, 1);
+}
+/* The number of xxpermdi instructions varies between
+ P7,P8,P9, ensure at least one hit. */
+/* { dg-final { scan-assembler {\mxxpermdi\M} } } */
+
+/* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 } } */
+
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert with float
+ inputs produce the right codegen. Power8 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+#include <altivec.h>
+
+vector float
+testf_var (float f, vector float vf, signed int i)
+{
+ return vec_insert (f, vf, i);
+}
+
+vector float
+testf_cst (float f, vector float vf)
+{
+ return vec_insert (f, vf, 12);
+}
+
+/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 1 } } */
+/* cst tests has stfs instead of stfsx. */
+/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlvx\M|\mlxv\M|\mlxvd2x\M|\mlxvw4x\M} 2 } } */
+
+/* cst test has a lvewx,vperm combo */
+/* { dg-final { scan-assembler-times {\mlvewx\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 1 } } */
+
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert with float
+ inputs produce the right codegen. Power9 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector float
+testf_var (float f, vector float vf, signed int i)
+{
+ return vec_insert (f, vf, i);
+}
+
+vector float
+testf_cst (float f, vector float vf)
+{
+ return vec_insert (f, vf, 12);
+}
+
+/* var test has a load and store. */
+/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mstfsx\M} 1 { target lp64} } } */
+
+/* cst test have a xscvdpspn,xxextractuw,xxinsertw combo */
+/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mxxextractuw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mxxinsertw\M} 1 { target lp64 } } } */
+
+/* { dg-final { scan-assembler-times {\mstfs\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlvewx\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 1 { target ilp32 } } } */
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert() with int
+ inputs produce the right codegen. Power8 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+#include <altivec.h>
+
+vector bool int
+testbi_var(unsigned int x, vector bool int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector signed int
+testsi_var(signed int x, vector signed int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned int
+testui1_var(signed int x, vector unsigned int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned int
+testui2_var(unsigned int x, vector unsigned int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector bool int
+testbi_cst(unsigned int x, vector bool int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector signed int
+testsi_cst(signed int x, vector signed int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned int
+testui1_cst(signed int x, vector unsigned int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned int
+testui2_cst(unsigned int x, vector unsigned int v)
+{
+ return vec_insert(x, v, 12);
+}
+
+/* Each test has lvx (8). cst tests have additional lvewx. (4) */
+/* var tests have both stwx (4) and stvx (4). cst tests have stw (4).*/
+/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 } } */
+
+/* { dg-final { scan-assembler-times {\mlvewx\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert() with int
+ inputs produce the right codegen. Power9 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector bool int
+testbi_var(unsigned int x, vector bool int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector signed int
+testsi_var(signed int x, vector signed int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned int
+testui1_var(signed int x, vector unsigned int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned int
+testui2_var(unsigned int x, vector unsigned int v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector bool int
+testbi_cst(unsigned int x, vector bool int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector signed int
+testsi_cst(signed int x, vector signed int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned int
+testui1_cst(signed int x, vector unsigned int v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned int
+testui2_cst(unsigned int x, vector unsigned int v)
+{
+ return vec_insert(x, v, 12);
+}
+
+
+/* load immediate, add, store, stb, load variable test. */
+/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mstwx\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 4 { target lp64 } } } */
+
+/* an insert and a move per constant test. */
+/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mxxinsertw\M} 4 { target lp64 } } } */
+
+
+/* { dg-final { scan-assembler-times {\maddi\M} 12 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstw\M} 8 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 8 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mlvewx\M} 4 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 4 { target ilp32 } } } */
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert() with long long
+ inputs produce the right codegen. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+#include <altivec.h>
+
+vector bool long long
+testbl_var(unsigned long long x, vector bool long long v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+
+vector signed long long
+testsl_var(signed long long x, vector signed long long v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+
+vector unsigned long long
+testul1_var(signed long long x, vector unsigned long long v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+
+vector unsigned long long
+testul2_var(unsigned long long x, vector unsigned long long v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+
+vector bool long long
+testbl_cst(unsigned long long x, vector bool long long v)
+{
+ return vec_insert(x, v, 12);
+}
+
+vector signed long long
+testsl_cst(signed long long x, vector signed long long v)
+{
+ return vec_insert(x, v, 12);
+}
+
+vector unsigned long long
+testul1_cst(signed long long x, vector unsigned long long v)
+{
+ return vec_insert(x, v, 12);
+}
+
+vector unsigned long long
+testul2_cst(unsigned long long x, vector unsigned long long v)
+{
+ return vec_insert(x, v, 12);
+}
+
+/* Number of xxpermdi insns varies between power targets. ensure at least one. */
+/* { dg-final { scan-assembler {\mxxpermdi\M} } } */
+
+/* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 4 } } */
+
+/* The number of addi instructions decreases on newer systems. Measured as 8 on
+ power7 and power8 targets, and drops to 4 on power9 targets that use the
+ newer stxv,lxv instructions. For this test ensure we get at least one. */
+/* { dg-final { scan-assembler {\maddi\M} } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mstdx\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mstw\M} 8 { target ilp32 } } } */
+
+/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 4 } } */
+
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert() with short
+ inputs produce the right codegen. Power8 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+#include <altivec.h>
+
+vector bool short
+testbs_var(unsigned short x, vector bool short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector signed short
+testss_var(signed short x, vector signed short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned short
+testus1_var(signed short x, vector unsigned short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned short
+testus2_var(unsigned short x, vector unsigned short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector bool short
+testbs_cst(signed short x, vector bool short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector signed short
+testss_cst(signed short x, vector signed short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned short
+testus1_cst(signed short x, vector unsigned short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned short
+testus2_cst(unsigned short x, vector unsigned short v)
+{
+ return vec_insert(x, v, 12);
+}
+
+/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 8 } } */
+/* stores.. 2 each per variable tests, 1 each per cst test. */
+/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 12 } } */
+
+/* { dg-final { scan-assembler-times {\mlvehx\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */
+
--- /dev/null
+/* Verify that overloaded built-ins for vec_insert() with short
+ inputs produce the right codegen. Power9 variant. */
+
+/* { dg-do compile { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector bool short
+testbs_var(unsigned short x, vector bool short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector signed short
+testss_var(signed short x, vector signed short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned short
+testus1_var(signed short x, vector unsigned short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector unsigned short
+testus2_var(unsigned short x, vector unsigned short v, signed int i)
+{
+ return vec_insert(x, v, i);
+}
+vector bool short
+testbs_cst(signed short x, vector bool short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector signed short
+testss_cst(signed short x, vector signed short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned short
+testus1_cst(signed short x, vector unsigned short v)
+{
+ return vec_insert(x, v, 12);
+}
+vector unsigned short
+testus2_cst(unsigned short x, vector unsigned short v)
+{
+ return vec_insert(x, v, 12);
+}
+
+/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mvinserth\M} 4 { target lp64 } } } */
+
+/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 4 { target lp64 }} } */
+
+/* -m32 uses sth/lvehx as part of the sequence. */
+/* { dg-final { scan-assembler-times {\msth\M} 8 { target ilp32 }} } */
+/* { dg-final { scan-assembler-times {\mlvehx\M} 4 { target ilp32 }} } */
+/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 8 { target ilp32 }} } */
+