This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp<ssemodesuffixf2c>


On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> This patch restores proper checking the third argument on blendpd and
> and blendps. ?It also adds 2 tests, including pblendw. ?Tested on
> Linux/Intel64. OK to install?

The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
src3 is used uninitialized.

Richard.

> Thanks.
>
> H.J.
> ---
> gcc/
>
> 2008-03-29 ?H.J. Lu ?<hongjiu.lu@intel.com>
>
> ? ? ? ?PR target/35757
> ? ? ? ?* config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
> ? ? ? ?proper error message for the third argument on blendpd and
> ? ? ? ?blendps.
>
> ? ? ? ?* config/i386/sse.md (blendbits): New.
> ? ? ? ?(sse4_1_blendp<ssemodesuffixf2c>): Use it.
>
> gcc/testsuite/
>
> 2008-03-29 ?H.J. Lu ?<hongjiu.lu@intel.com>
>
> ? ? ? ?PR target/35757
> ? ? ? ?* gcc.target/i386/sse4_1-blendps-2.c: New.
> ? ? ? ?* gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
>
> --- gcc/config/i386/i386.c.imm ?2008-03-29 07:29:40.000000000 -0700
> +++ gcc/config/i386/i386.c ? ? ?2008-03-29 13:55:36.000000000 -0700
> @@ -19791,9 +19791,14 @@ ix86_expand_sse_4_operands_builtin (enum
>
> ? ? ? case CODE_FOR_sse4_1_roundsd:
> ? ? ? case CODE_FOR_sse4_1_roundss:
> + ? ? ?case CODE_FOR_sse4_1_blendps:
> ? ? ? ?error ("the third argument must be a 4-bit immediate");
> ? ? ? ?return const0_rtx;
>
> + ? ? ?case CODE_FOR_sse4_1_blendpd:
> + ? ? ? error ("the third argument must be a 2-bit immediate");
> + ? ? ? return const0_rtx;
> +
> ? ? ? default:
> ? ? ? ?error ("the third argument must be an 8-bit immediate");
> ? ? ? ?return const0_rtx;
> --- gcc/config/i386/sse.md.imm ?2008-03-29 07:29:40.000000000 -0700
> +++ gcc/config/i386/sse.md ? ? ?2008-03-29 14:01:10.000000000 -0700
> @@ -53,6 +53,9 @@
> ?;; Mapping of vector modes back to the scalar modes
> ?(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
>
> +;; Mapping of immediate bits for blend instructions
> +(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
> +
> ?;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
>
> ?;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> @@ -6306,7 +6309,7 @@
> ? ? ? ?(vec_merge:SSEMODEF2P
> ? ? ? ? ?(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
> ? ? ? ? ?(match_operand:SSEMODEF2P 1 "register_operand" "0")
> - ? ? ? ? (match_operand:SI 3 "const_0_to_3_operand" "n")))]
> + ? ? ? ? (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
> ? "TARGET_SSE4_1"
> ? "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
> ? [(set_attr "type" "ssemov")
> --- gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c.imm ? ? ? ?2008-03-29 09:54:08.000000000 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c ? ?2008-03-29 09:57:35.000000000 -0700
> @@ -0,0 +1,77 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include <smmintrin.h>
> +#include <string.h>
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xe
> +
> +static void
> +init_blendps (float *src1, float *src2)
> +{
> + ?int i, sign = 1;
> +
> + ?for (i = 0; i < NUM * 4; i++)
> + ? ?{
> + ? ? ?src1[i] = i * i * sign;
> + ? ? ?src2[i] = (i + 20) * sign;
> + ? ? ?sign = -sign;
> + ? ?}
> +}
> +
> +static int
> +check_blendps (__m128 *dst, float *src1, float *src2)
> +{
> + ?float tmp[4];
> + ?int j;
> +
> + ?memcpy (&tmp[0], src1, sizeof (tmp));
> + ?for (j = 0; j < 4; j++)
> + ? ?if ((MASK & (1 << j)))
> + ? ? ?tmp[j] = src2[j];
> +
> + ?return memcmp (dst, &tmp[0], sizeof (tmp));
> +}
> +
> +static void
> +sse4_1_test (void)
> +{
> + ?__m128 x, y;
> + ?union
> + ? ?{
> + ? ? ?__m128 x[NUM];
> + ? ? ?float f[NUM * 4];
> + ? ?} dst, src1, src2;
> + ?union
> + ? ?{
> + ? ? ?__m128 x;
> + ? ? ?float f[4];
> + ? ?} src3;
> + ?int i;
> +
> + ?init_blendps (src1.f, src2.f);
> +
> + ?/* Check blendps imm8, m128, xmm */
> + ?for (i = 0; i < NUM; i++)
> + ? ?{
> + ? ? ?dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
> + ? ? ?if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
> + ? ? ? abort ();
> + ? ?}
> +
> + ? /* Check blendps imm8, xmm, xmm */
> + ?x = _mm_blend_ps (dst.x[2], src3.x, MASK);
> + ?y = _mm_blend_ps (src3.x, dst.x[2], MASK);
> +
> + ?if (check_blendps (&x, &dst.f[8], &src3.f[0]))
> + ? ?abort ();
> +
> + ?if (check_blendps (&y, &src3.f[0], &dst.f[8]))
> + ? ?abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c.imm ? ? ? ?2008-03-29 09:55:29.000000000 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c ? ?2008-03-29 09:57:25.000000000 -0700
> @@ -0,0 +1,79 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include <smmintrin.h>
> +#include <string.h>
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xfe
> +
> +static void
> +init_pblendw (short *src1, short *src2)
> +{
> + ?int i, sign = 1;
> +
> + ?for (i = 0; i < NUM * 8; i++)
> + ? ?{
> + ? ? ?src1[i] = i * i * sign;
> + ? ? ?src2[i] = (i + 20) * sign;
> + ? ? ?sign = -sign;
> + ? ?}
> +}
> +
> +static int
> +check_pblendw (__m128i *dst, short *src1, short *src2)
> +{
> + ?short tmp[8];
> + ?int j;
> +
> + ?memcpy (&tmp[0], src1, sizeof (tmp));
> + ?for (j = 0; j < 8; j++)
> + ? ?if ((MASK & (1 << j)))
> + ? ? ?tmp[j] = src2[j];
> +
> + ?return memcmp (dst, &tmp[0], sizeof (tmp));
> +}
> +
> +static void
> +sse4_1_test (void)
> +{
> + ?__m128i x, y;
> + ?union
> + ? ?{
> + ? ? ?__m128i x[NUM];
> + ? ? ?short s[NUM * 8];
> + ? ?} dst, src1, src2;
> + ?union
> + ? ?{
> + ? ? ?__m128i x;
> + ? ? ?short s[8];
> + ? ?} src3;
> + ?int i;
> +
> + ?init_pblendw (src1.s, src2.s);
> +
> + ?/* Check pblendw imm8, m128, xmm */
> + ?for (i = 0; i < NUM; i++)
> + ? ?{
> + ? ? ?dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
> + ? ? ?if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
> + ? ? ? abort ();
> + ? ?}
> +
> + ? /* Check pblendw imm8, xmm, xmm */
> + ?src3.x = _mm_setzero_si128 ();
> +
> + ?x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
> + ?y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
> +
> + ?if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
> + ? ?abort ();
> +
> + ?if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
> + ? ?abort ();
> +}
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]