This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AVX]: Add _mm256_set_epi64x and _mm256_set1_epi64x
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Joey Ye <joey dot ye at intel dot com>, Xuepeng Guo <xuepeng dot guo at intel dot com>
- Date: Sun, 11 May 2008 17:13:18 -0700
- Subject: [AVX]: Add _mm256_set_epi64x and _mm256_set1_epi64x
Hi,
I am checking in this patch to add _mm256_set_epi64x and
_mm256_set1_epi64x, which are GNU extensions, similar to
_mm_set_epi64x and _mm_set1_epi64x.
H.J.
----
2008-05-11 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/gmmintrin.h (_mm256_set_epi64x): New.
(_mm256_set1_epi64x): Likewise.
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Support
V4DImode.
(ix86_expand_vector_init_general): Likewise.
* config/i386/sse.md (AVX256MODE3P): New.
(avxhalfvecmode): Handle V4DI.
(vec_init<mode>): Replace AVX256MODE2P with AVX256MODE3P.
(*vec_concat<mode>_avx): Likewise.
Index: config/i386/gmmintrin.h
===================================================================
--- config/i386/gmmintrin.h (revision 2544)
+++ config/i386/gmmintrin.h (working copy)
@@ -1126,6 +1126,13 @@ _mm256_set_epi32 (int __A, int __B, int
__D, __C, __B, __A };
}
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
/* Create a vector with all elements equal to A. */
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_pd (double __A)
@@ -1149,6 +1156,12 @@ _mm256_set1_epi32 (int __A)
__A, __A, __A, __A };
}
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+ return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
/* Casts between various SP, DP, INT vector types. Note that these do no
conversion of values, they just change the type. */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 2544)
+++ config/i386/sse.md (working copy)
@@ -53,6 +53,7 @@
(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE3P [V8SI V4DI V8SF V4DF])
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
@@ -90,7 +91,7 @@
[(V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
(V4SF "V8SF") (V2DF "V4DF")])
(define_mode_attr avxhalfvecmode
- [(V4SF "V2SF") (V8SI "V4SI") (V8SF "V4SF") (V4DF "V2DF")])
+ [(V4SF "V2SF") (V8SI "V4SI") (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
(define_mode_attr avxscalarmode
[(V4SF "SF") (V2DF "DF") (V8SF "SF") (V4DF "DF")])
(define_mode_attr avxcvtvecmode
@@ -10273,7 +10274,7 @@
(const_string "*")))])
(define_expand "vec_init<mode>"
- [(match_operand:AVX256MODE2P 0 "register_operand" "")
+ [(match_operand:AVX256MODE3P 0 "register_operand" "")
(match_operand 1 "" "")]
"TARGET_AVX"
{
@@ -10282,8 +10283,8 @@
})
(define_insn "*vec_concat<mode>_avx"
- [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
- (vec_concat:AVX256MODE2P
+ [(set (match_operand:AVX256MODE3P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE3P
(match_operand:<avxhalfvecmode> 1 "register_operand" "x")
(match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 2544)
+++ config/i386/i386.c (working copy)
@@ -24806,6 +24806,9 @@ ix86_expand_vector_init_duplicate (bool
case V4DFmode:
hmode = V2DFmode;
goto half;
+ case V4DImode:
+ hmode = V2DImode;
+ goto half;
case V8SFmode:
hmode = V4SFmode;
goto half;
@@ -25092,6 +25095,9 @@ quarter:
case V4DFmode:
half_mode = V2DFmode;
goto half;
+ case V4DImode:
+ half_mode = V2DImode;
+ goto half;
case V4SFmode:
half_mode = V2SFmode;
goto half;