This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Committed: Fix PR target/22152


Hello!

Attached patch substantially improves handling of MMX DImode values. It turns out, that in order to reliably separate "native" and MMX DImode values, a new mode is needed. Since we are using vector registers, the natural choice is V1DImode, vector mode with one DImode element. After this change, V1DI mode is considered to be native MMX mode (native MMX modes were successfully separated out from non-vector modes by the patch, committed a couple of weeks ago).

Patch rewrites all MMX DImode patterns into V1DImode, also updating mmintrin.h for the new/changed builtins on the way. The patch also rewrites MMX shift patterns in the same way as SSE shift patterns were changed some time ago.

The benefits of the patch can be seen from the original testcase from PR target/22152. For a slightly changed testcase (to avoid uninitialized variables), non-patched gcc creates barely recognizable code:

unsigned_add3:
       pushl   %ebp
       pxor    %mm1, %mm1
       movl    %esp, %ebp
       pushl   %edi
       pushl   %esi
       pushl   %ebx
       xorl    %ebx, %ebx
       subl    $84, %esp
       movl    12(%ebp), %edi
       movl    8(%ebp), %eax
       movl    20(%ebp), %esi
       movq    (%eax), %mm0
       cmpl    $1, %esi
       movq    %mm0, -56(%ebp)
       movl    (%edi), %eax
       movl    4(%edi), %edx
       movq    -56(%ebp), %mm0
       movl    %eax, -48(%ebp)
       movl    %edx, -44(%ebp)
       pcmpeqb %mm0, %mm0
       movq    -56(%ebp), %mm3
       psubq   %mm0, %mm1
       movl    $1, %eax
       movq    %mm1, %mm2
       movq    -56(%ebp), %mm0
       movq    -48(%ebp), %mm1
       paddq   %mm0, %mm1
       psrlq   $1, %mm0
       movq    %mm1, -72(%ebp)
       movq    -48(%ebp), %mm1
       pand    (%edi), %mm3
       psrlq   $1, %mm1
       pand    %mm2, %mm3
       paddq   %mm1, %mm0
       movl    -72(%ebp), %edx
       paddq   %mm3, %mm0
       movl    -68(%ebp), %ecx
       psrlq   $63, %mm0
       movq    %mm0, -32(%ebp)
       jbe     .L3
       .p2align 4,,10
       .p2align 3
.L6:
       movl    16(%ebp), %ebx
       movl    %edx, -8(%ebx,%eax,8)
       movl    %ecx, -4(%ebx,%eax,8)
       movl    8(%ebp), %edx
       movq    (%edx,%eax,8), %mm0
       movq    %mm0, -88(%ebp)
       pand    (%edi,%eax,8), %mm0
       movl    -88(%ebp), %edx
       pand    %mm2, %mm0
       movl    %edx, -40(%ebp)
       movl    -84(%ebp), %ecx
       movl    %ecx, -36(%ebp)
       movq    -40(%ebp), %mm1
       movq    -40(%ebp), %mm3
       paddq   (%edi,%eax,8), %mm1
       paddq   %mm0, %mm3
       paddq   -32(%ebp), %mm1
       movq    -32(%ebp), %mm0
       movq    %mm1, -96(%ebp)
       pand    %mm3, %mm0
       incl    %eax
       paddq   %mm0, %mm3
       movl    -96(%ebp), %edx
       movl    -92(%ebp), %ecx
       movq    %mm3, -32(%ebp)
       cmpl    %eax, %esi
       ja      .L6
       leal    -8(,%esi,8), %ebx
.L3:
       movl    16(%ebp), %eax
       movl    %edx, (%eax,%ebx)
       movl    %ecx, 4(%eax,%ebx)
       movq    -32(%ebp), %mm0
       addl    $84, %esp
       popl    %ebx
       popl    %esi
       popl    %edi
       leave
       ret

The difference with the patch is noticeable (please note the code in the loop):

unsigned_add3:
       pushl   %ebp
       pxor    %mm0, %mm0
       movl    %esp, %ebp
       xorl    %eax, %eax
       pushl   %edi
       movl    20(%ebp), %edx
       pushl   %esi
       leal    -1(%edx), %ecx
       pushl   %ebx
       movl    8(%ebp), %esi
       movl    12(%ebp), %ebx
       movq    (%esi), %mm2
       movq    (%ebx), %mm4
       movq    %mm2, %mm1
       movq    %mm4, %mm3
       pcmpeqb %mm2, %mm1
       psrlq   $1, %mm3
       psubq   %mm1, %mm0
       xorl    %edx, %edx
       movq    %mm0, %mm6
       movq    %mm2, %mm1
       movq    %mm2, %mm0
       paddq   %mm4, %mm1
       pand    %mm4, %mm2
       psrlq   $1, %mm0
       pand    %mm6, %mm2
       paddq   %mm3, %mm0
       movl    16(%ebp), %edi
       paddq   %mm2, %mm0
       movq    %mm1, %mm5
       psrlq   $63, %mm0
       cmpl    $1, 20(%ebp)
       movq    %mm0, %mm3
       jbe     .L3
       .align 16
.L6:
       movq    %mm5, (%edi,%eax,8)
       movq    8(%esi,%eax,8), %mm0
       movq    8(%ebx,%eax,8), %mm2
       movq    %mm0, %mm1
       incl    %eax
       pand    %mm2, %mm1
       cmpl    %ecx, %eax
       pand    %mm6, %mm1
       paddq   %mm0, %mm2
       paddq   %mm1, %mm0
       paddq   %mm3, %mm2
       movq    %mm3, %mm1
       movq    %mm2, %mm5
       pand    %mm0, %mm1
       paddq   %mm1, %mm0
       movq    %mm0, %mm3
       jne     .L6
       movl    20(%ebp), %eax
       leal    -8(,%eax,8), %edx
.L3:
       movq    %mm5, (%edi,%edx)
       movq    %mm3, %mm0
       popl    %ebx
       popl    %esi
       popl    %edi
       leave
       ret

(FWIW, the result is 50% shorter object code).

Fortunately, none of the changed builtins were documented in the documentation, so IMO we are free to change arguments to builtin functions, whereas intrinsic function were not changed at all.

2008-03-07 Uros Bizjak <ubizjak@gmail.com>

PR target/22152
* config/i386/i386-modes.def (V1DI): New vector mode.
* config/i386/i386.h (VALID_MMX_REG_MODE): Add V1DImode.
* config/i386/mmx.md (MMXMODEI8): New mode iterator.
(MMXMODE248): Ditto.
(MMXMODE): Add V1DI mode.
(mmxvecsize): Change DI mode to V1DI mode.
("mov<mode>): Use MMXMODEI8 mode iterator.
("*mov<mode>_internal_rex64"): Ditto.
("*mov<mode>_internal"): Ditto.
("mmx_add<mode>3"): Ditto. Handle V1DImode for TARGET_SSE2.
("mmx_sub<mode>3"): Ditto.
("mmx_adddi3"): Remove insn pattern.
("mmx_subdi3"): Ditto.
("mmx_ashr<mode>3"): Use SImode and "yN" constraint for operand 2.
("mmx_lshr<mode>3"): Ditto. Use MMXMODE248 mode iterator.
("mmx_ashl<mode>3"): Ditto.
("mmx_lshrdi3"): Remove insn pattern.
("mmx_ashldi3"): Ditto.
* config/i386/i386.c (classify_argument): Handle V1DImode.
(function_arg_advance_32): Ditto.
(function_arg_32): Ditto.
(struct builtin_description) [IX86_BUILTIN_PADDQ]: Use
mmx_addv1di3 insn pattern.
[IX86_BUILTIN_PSUBQ]: Use mmx_subv1di3 insn pattern.
[IX86_BUILTIN_PSLL?, IX86_BUILTIN_PSRL?, IX86_BUILTIN_PSRA?,
IX86_BUILTIN_PSLL?I, IX86_BUILTIN_PSRL?I, IX86_BUILTIN_PSRA?I,
IX86_BUILTIN_PSLL?I128, IX86_BUILTIN_PSRL?I128, IX86_BUILTIN_PSRA?I128]:
Remove definitions.
(V1DI_type_node): New node.
(v1di_ftype_v1di_int): Ditto.
(v1di_ftype_v1di_v1di): Ditto.
(v2si_ftype_v2si_si): Ditto.
(v4hi_ftype_v4hi_di): Remove node.
(v2si_ftype_v2si_di): Ditto.
(ix86_init_mmx_sse_builtins): Handle V1DImode.
(__builtin_ia32_psll?, __builtin_ia32_psrl?, __builtin_ia32_psra?):
Redefine builtins using def_builtin_const with *_ftype_*_int node.
(__builtin_ia32_psll?i, __builtin_ia32_psrl?i, __builtin_ia32_psra?i):
Add new builtins using def_builtin_const.
(ix86_expand_builtin) [IX86_BUILTIN_PSLL?, IX86_BUILTIN_PSRL?,
IX86_BUILTIN_PSRA?, IX86_BUILTIN_PSLL?I, IX86_BUILTIN_PSRL?I,
IX86_BUILTIN_PSRA?I]: Handle builtin definitions.
* config/i386/mmintrin.h (__v1di): New typedef.
(_mm_add_si64): Cast arguments to __v1di type.
(_mm_sub_si64): Ditto.
(_mm_sll_pi16): Cast __count to __v4hi type.
(_mm_sll_pi32): Cast __count to __v2si type.
(_mm_sll_si64): Cast arguments to __v1di type.
(_mm_srl_pi16): Cast __count to __v4hi type.
(_mm_srl_pi32): Cast __count to __v2si type.
(_mm_srl_si64): Cast arguments to __v1di type.
(_mm_sra_pi16): Cast __count to __v4hi type.
(_mm_sra_pi32): Cast __count to __v2si type.
(_mm_slli_pi16): Use __builtin_ia32_psllwi.
(_mm_slli_pi32): Use __builtin_ia32_pslldi.
(_mm_slli_si64): Use __builtin_ia32_psllqi. Cast __m to __v1di type.
(_mm_srli_pi16): Use __builtin_ia32_psrlwi.
(_mm_srli_pi32): Use __builtin_ia32_psrldi.
(_mm_srli_si64): Use __builtin_ia32_psrlqi. Cast __m to __v1di type.
(_mm_srai_pi16): Use __builtin_ia32_psrawi.
(_mm_srai_pi32): Use __builtin_ia32_psradi.
* config/i386/i386.md (UNSPEC_NOP): Remove unspec definition.
* doc/extend.texi (X86 Built-in Functions) [__builtin_ia32_psll?,
__builtin_ia32_psrl?, __builtin_ia32_psra?, __builtin_ia32_psll?i,
__builtin_ia32_psrl?i, __builtin_ia32_psra?i]: Add new builtins.


Patch was bootstrapped and regression tested on i686-pc-linux-gnu and x86_64-pc-linux-gnu {,-m32}. The testcase will be committed in a separate commit, as I have to clean it a bit.

The patch is committed to SVN.

Uros.

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 133009)
+++ config/i386/i386.h	(working copy)
@@ -1120,8 +1120,9 @@ do {									\
   ((MODE) == V2SFmode || (MODE) == SFmode)
 
 #define VALID_MMX_REG_MODE(MODE)					\
-    ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
-     || (MODE) == V2SImode || (MODE) == SImode)
+  ((MODE == V1DImode) || (MODE) == DImode				\
+   || (MODE) == V2SImode || (MODE) == SImode				\
+   || (MODE) == V4HImode || (MODE) == V8QImode)
 
 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
    place emms and femms instructions.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 133009)
+++ config/i386/i386.md	(working copy)
@@ -95,7 +95,6 @@
    (UNSPEC_RCP			45)
    (UNSPEC_RSQRT		46)
    (UNSPEC_SFENCE		47)
-   (UNSPEC_NOP			48)	; prevents combiner cleverness
    (UNSPEC_PFRCP		49)
    (UNSPEC_PFRCPIT1		40)
    (UNSPEC_PFRCPIT2		41)
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md	(revision 133009)
+++ config/i386/mmx.md	(working copy)
@@ -32,16 +32,18 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
 
 ;; All 8-byte vector modes handled by MMX
-(define_mode_iterator MMXMODE [V8QI V4HI V2SI V2SF])
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
 
 ;; Mix-n-match
 (define_mode_iterator MMXMODE12 [V8QI V4HI])
 (define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 
 ;; Mapping from integer vector mode to mnemonic suffix
-(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")])
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -53,8 +55,8 @@
 ;; This is essential for maintaining stable calling conventions.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
-	(match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -62,9 +64,9 @@
 })
 
 (define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!y,!y ,m ,!y,Y2,x,x ,m,r,x")
-	(match_operand:MMXMODEI 1 "vector_move_operand"
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
 				"Cr ,m,C ,!ym,!y,Y2,!y,C,xm,x,x,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -86,9 +88,9 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!y,!y ,m ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,?r ,?m")
-	(match_operand:MMXMODEI 1 "vector_move_operand"
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
 			"C  ,!ym,!y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -557,26 +559,16 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "mmx_add<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
-        (plus:MMXMODEI
-	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (plus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
   "padd<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_adddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
-		   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-	 UNSPEC_NOP))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, DImode, operands)"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_ssadd<mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
         (ss_plus:MMXMODE12
@@ -598,26 +590,15 @@
    (set_attr "mode" "DI")])
 
 (define_insn "mmx_sub<mode>3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
-        (minus:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_operand" "0")
-	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (minus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "register_operand" "0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))"
   "psub<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(minus:DI (match_operand:DI 1 "register_operand" "0")
-		    (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-	 UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_sssub<mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
         (ss_minus:MMXMODE12
@@ -778,54 +759,32 @@
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
         (ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psra<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
 (define_insn "mmx_lshr<mode>3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-        (lshiftrt:MMXMODE24
-	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (lshiftrt:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_lshrdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	  [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
-		       (match_operand:DI 2 "nonmemory_operand" "yi"))]
-	  UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
 (define_insn "mmx_ashl<mode>3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-        (ashift:MMXMODE24
-	  (match_operand:MMXMODE24 1 "register_operand" "0")
-	  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (ashift:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psll<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_ashldi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-	 [(ashift:DI (match_operand:DI 1 "register_operand" "0")
-		     (match_operand:DI 2 "nonmemory_operand" "yi"))]
-	 UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons
Index: config/i386/mmintrin.h
===================================================================
--- config/i386/mmintrin.h	(revision 133009)
+++ config/i386/mmintrin.h	(working copy)
@@ -42,6 +42,7 @@ typedef int __m64 __attribute__ ((__vect
 typedef int __v2si __attribute__ ((__vector_size__ (8)));
 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
 
 /* Empty the multimedia state.  */
 static __inline void __attribute__((__always_inline__, __artificial__))
@@ -309,7 +310,7 @@ _m_paddd (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_add_si64 (__m64 __m1, __m64 __m2)
 {
-  return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
+  return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
 }
 #endif
 
@@ -413,7 +414,7 @@ _m_psubd (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sub_si64 (__m64 __m1, __m64 __m2)
 {
-  return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
+  return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
 }
 #endif
 
@@ -520,7 +521,7 @@ _m_pmullw (__m64 __m1, __m64 __m2)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -532,7 +533,7 @@ _m_psllw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -545,7 +546,7 @@ _m_psllwi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -557,7 +558,7 @@ _m_pslld (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -570,7 +571,7 @@ _m_pslldi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sll_si64 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -582,7 +583,7 @@ _m_psllq (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_slli_si64 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -595,7 +596,7 @@ _m_psllqi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sra_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -607,7 +608,7 @@ _m_psraw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srai_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -620,7 +621,7 @@ _m_psrawi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_sra_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -632,7 +633,7 @@ _m_psrad (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srai_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -645,7 +646,7 @@ _m_psradi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_pi16 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -657,7 +658,7 @@ _m_psrlw (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_pi16 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
+  return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -670,7 +671,7 @@ _m_psrlwi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_pi32 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -682,7 +683,7 @@ _m_psrld (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_pi32 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
+  return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -695,7 +696,7 @@ _m_psrldi (__m64 __m, int __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srl_si64 (__m64 __m, __m64 __count)
 {
-  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
@@ -707,7 +708,7 @@ _m_psrlq (__m64 __m, __m64 __count)
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
 _mm_srli_si64 (__m64 __m, int __count)
 {
-  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
+  return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
 }
 
 static __inline __m64 __attribute__((__always_inline__, __artificial__))
Index: config/i386/i386-modes.def
===================================================================
--- config/i386/i386-modes.def	(revision 133009)
+++ config/i386/i386-modes.def	(working copy)
@@ -79,6 +79,7 @@ VECTOR_MODES (INT, 8);        /*       V
 VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
 VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 1);     /*                 V1DI */
 VECTOR_MODE (INT, QI, 2);     /*                 V2QI */
 VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
 VECTOR_MODE (INT, SI, 8);     /*                 V8SI */
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 133009)
+++ config/i386/i386.c	(working copy)
@@ -3838,7 +3838,7 @@ classify_argument (enum machine_mode mod
     }
 
   /* for V1xx modes, just use the base mode */
-  if (VECTOR_MODE_P (mode)
+  if (VECTOR_MODE_P (mode) && mode != V1DImode
       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
     mode = GET_MODE_INNER (mode);
 
@@ -3910,6 +3910,7 @@ classify_argument (enum machine_mode mod
       classes[0] = X86_64_SSE_CLASS;
       classes[1] = X86_64_SSEUP_CLASS;
       return 2;
+    case V1DImode:
     case V2SFmode:
     case V2SImode:
     case V4HImode:
@@ -4211,6 +4212,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS
     case V4HImode:
     case V2SImode:
     case V2SFmode:
+    case V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
 	  cum->mmx_words += words;
@@ -4374,6 +4376,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, e
     case V4HImode:
     case V2SImode:
     case V2SFmode:
+    case V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
 	  if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
@@ -17955,11 +17958,11 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
 
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
@@ -18010,25 +18013,6 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
 
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
-
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
 
@@ -18140,17 +18124,6 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
 
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
-
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
-
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
@@ -18212,6 +18185,7 @@ static const struct builtin_description 
 
 static const struct builtin_description bdesc_1arg[] =
 {
+  /* SSE */
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
 
@@ -18228,6 +18202,7 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
 
+  /* SSE2 */
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
 
@@ -18585,6 +18560,8 @@ ix86_init_mmx_sse_builtins (void)
 
   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  tree V1DI_type_node
+    = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
   tree V2DI_type_node
     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
@@ -18649,14 +18626,13 @@ ix86_init_mmx_sse_builtins (void)
   tree v4hi_ftype_v4hi_int
     = build_function_type_list (V4HI_type_node,
 				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_di
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, long_long_unsigned_type_node,
-				NULL_TREE);
-  tree v2si_ftype_v2si_di
+  tree v2si_ftype_v2si_int
     = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, long_long_unsigned_type_node,
-				NULL_TREE);
+				V2SI_type_node, integer_type_node, NULL_TREE);
+  tree v1di_ftype_v1di_int
+    = build_function_type_list (V1DI_type_node,
+				V1DI_type_node, integer_type_node, NULL_TREE);
+
   tree void_ftype_void
     = build_function_type (void_type_node, void_list_node);
   tree void_ftype_unsigned
@@ -18723,10 +18699,9 @@ ix86_init_mmx_sse_builtins (void)
   tree v2si_ftype_v2si_v2si
     = build_function_type_list (V2SI_type_node,
 				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree di_ftype_di_di
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node, NULL_TREE);
+  tree v1di_ftype_v1di_v1di
+    = build_function_type_list (V1DI_type_node,
+				V1DI_type_node, V1DI_type_node, NULL_TREE);
 
   tree di_ftype_di_di_int
     = build_function_type_list (long_long_unsigned_type_node,
@@ -19182,8 +19157,8 @@ ix86_init_mmx_sse_builtins (void)
 	case V2SImode:
 	  type = v2si_ftype_v2si_v2si;
 	  break;
-	case DImode:
-	  type = di_ftype_di_di;
+	case V1DImode:
+	  type = v1di_ftype_v1di_v1di;
 	  break;
 
 	default:
@@ -19275,16 +19250,25 @@ ix86_init_mmx_sse_builtins (void)
 
   /* Add the remaining MMX insns with somewhat more complicated types.  */
   def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
-
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
 
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
-  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
 
   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
@@ -20829,6 +20813,39 @@ ix86_expand_builtin (tree exp, rtx targe
       emit_insn (pat);
       return target;
 
+    case IX86_BUILTIN_PSLLW:
+    case IX86_BUILTIN_PSLLWI:
+      icode = CODE_FOR_mmx_ashlv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSLLD:
+    case IX86_BUILTIN_PSLLDI:
+      icode = CODE_FOR_mmx_ashlv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSLLQ:
+    case IX86_BUILTIN_PSLLQI:
+      icode = CODE_FOR_mmx_ashlv1di3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRAW:
+    case IX86_BUILTIN_PSRAWI:
+      icode = CODE_FOR_mmx_ashrv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRAD:
+    case IX86_BUILTIN_PSRADI:
+      icode = CODE_FOR_mmx_ashrv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLW:
+    case IX86_BUILTIN_PSRLWI:
+      icode = CODE_FOR_mmx_lshrv4hi3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLD:
+    case IX86_BUILTIN_PSRLDI:
+      icode = CODE_FOR_mmx_lshrv2si3;
+      goto do_pshift;
+    case IX86_BUILTIN_PSRLQ:
+    case IX86_BUILTIN_PSRLQI:
+      icode = CODE_FOR_mmx_lshrv1di3;
+      goto do_pshift;
+
     case IX86_BUILTIN_PSLLW128:
     case IX86_BUILTIN_PSLLWI128:
       icode = CODE_FOR_ashlv8hi3;
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 133009)
+++ doc/extend.texi	(working copy)
@@ -7476,6 +7476,24 @@ v2si __builtin_ia32_punpckldq (v2si, v2s
 v8qi __builtin_ia32_packsswb (v4hi, v4hi)
 v4hi __builtin_ia32_packssdw (v2si, v2si)
 v8qi __builtin_ia32_packuswb (v4hi, v4hi)
+
+v8hi __builtin_ia32_psllw (v4hi, v4hi)
+v4si __builtin_ia32_pslld (v2si, v2si)
+v2di __builtin_ia32_psllq (v1di, v1di)
+v8hi __builtin_ia32_psrlw (v8hi, v2si)
+v4si __builtin_ia32_psrld (v4si, v2si)
+v2di __builtin_ia32_psrlq (v1di, v1di)
+v8hi __builtin_ia32_psraw (v4hi, v4hi)
+v4si __builtin_ia32_psrad (v2si, v2si)
+v8hi __builtin_ia32_psllwi (v4hi, int)
+v4si __builtin_ia32_pslldi (v2si, int)
+v2di __builtin_ia32_psllqi (v1di, int)
+v8hi __builtin_ia32_psrlwi (v4hi, int)
+v4si __builtin_ia32_psrldi (v2si, int)
+v2di __builtin_ia32_psrlqi (v1di, int)
+v8hi __builtin_ia32_psrawi (v4hi, int)
+v4si __builtin_ia32_psradi (v2si, int)
+
 @end smallexample
 
 The following built-in functions are made available either with


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]