This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp<ssemodesuffixf2c>
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 29 Mar 2008 14:11:43 -0700
- Subject: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp<ssemodesuffixf2c>
This patch restores proper checking the third argument on blendpd and
and blendps. It also adds 2 tests, including pblendw. Tested on
Linux/Intel64. OK to install?
Thanks.
H.J.
---
gcc/
2008-03-29 H.J. Lu <hongjiu.lu@intel.com>
PR target/35757
* config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
proper error message for the third argument on blendpd and
blendps.
* config/i386/sse.md (blendbits): New.
(sse4_1_blendp<ssemodesuffixf2c>): Use it.
gcc/testsuite/
2008-03-29 H.J. Lu <hongjiu.lu@intel.com>
PR target/35757
* gcc.target/i386/sse4_1-blendps-2.c: New.
* gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
--- gcc/config/i386/i386.c.imm 2008-03-29 07:29:40.000000000 -0700
+++ gcc/config/i386/i386.c 2008-03-29 13:55:36.000000000 -0700
@@ -19791,9 +19791,14 @@ ix86_expand_sse_4_operands_builtin (enum
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
error ("the third argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the third argument must be a 2-bit immediate");
+ return const0_rtx;
+
default:
error ("the third argument must be an 8-bit immediate");
return const0_rtx;
--- gcc/config/i386/sse.md.imm 2008-03-29 07:29:40.000000000 -0700
+++ gcc/config/i386/sse.md 2008-03-29 14:01:10.000000000 -0700
@@ -53,6 +53,9 @@
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -6306,7 +6309,7 @@
(vec_merge:SSEMODEF2P
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
(match_operand:SSEMODEF2P 1 "register_operand" "0")
- (match_operand:SI 3 "const_0_to_3_operand" "n")))]
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
"TARGET_SSE4_1"
"blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
--- gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c.imm 2008-03-29 09:54:08.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c 2008-03-29 09:57:35.000000000 -0700
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xe
+
+static void
+init_blendps (float *src1, float *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 4; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_blendps (__m128 *dst, float *src1, float *src2)
+{
+ float tmp[4];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 4; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128 x, y;
+ union
+ {
+ __m128 x[NUM];
+ float f[NUM * 4];
+ } dst, src1, src2;
+ union
+ {
+ __m128 x;
+ float f[4];
+ } src3;
+ int i;
+
+ init_blendps (src1.f, src2.f);
+
+ /* Check blendps imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
+ if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
+ abort ();
+ }
+
+ /* Check blendps imm8, xmm, xmm */
+ x = _mm_blend_ps (dst.x[2], src3.x, MASK);
+ y = _mm_blend_ps (src3.x, dst.x[2], MASK);
+
+ if (check_blendps (&x, &dst.f[8], &src3.f[0]))
+ abort ();
+
+ if (check_blendps (&y, &src3.f[0], &dst.f[8]))
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c.imm 2008-03-29 09:55:29.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c 2008-03-29 09:57:25.000000000 -0700
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xfe
+
+static void
+init_pblendw (short *src1, short *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 8; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendw (__m128i *dst, short *src1, short *src2)
+{
+ short tmp[8];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 8; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128i x, y;
+ union
+ {
+ __m128i x[NUM];
+ short s[NUM * 8];
+ } dst, src1, src2;
+ union
+ {
+ __m128i x;
+ short s[8];
+ } src3;
+ int i;
+
+ init_pblendw (src1.s, src2.s);
+
+ /* Check pblendw imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
+ if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
+ abort ();
+ }
+
+ /* Check pblendw imm8, xmm, xmm */
+ src3.x = _mm_setzero_si128 ();
+
+ x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
+ y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
+
+ if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
+ abort ();
+
+ if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
+ abort ();
+}