This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH i386 8/8] [AVX-512] Add SHA support.


Hello,
On 19 Nov 15:36, Uros Bizjak wrote:
> Please also add new command options to g++.dg/other/sse-2.C and
> g++.dg/other/sse-3.C
Done (to i386-[23].C).
> > --mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd @gol
> > +-mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -msha -mno-sha @gol
> 
> No need to document negative option here.
Fxed.

I am testing patch in the bootom and will commit it tomorrow if no more inputs
and testing will pass.

--
Thanks, K

---
 gcc/common/config/i386/i386-common.c          | 18 ++++-
 gcc/config.gcc                                |  6 +-
 gcc/config/i386/cpuid.h                       |  1 +
 gcc/config/i386/driver-i386.c                 |  6 +-
 gcc/config/i386/i386-c.c                      |  2 +
 gcc/config/i386/i386.c                        | 46 ++++++++++++-
 gcc/config/i386/i386.h                        |  2 +
 gcc/config/i386/i386.opt                      |  4 ++
 gcc/config/i386/immintrin.h                   |  2 +
 gcc/config/i386/shaintrin.h                   | 99 +++++++++++++++++++++++++++
 gcc/config/i386/sse.md                        | 90 ++++++++++++++++++++++++
 gcc/doc/invoke.texi                           |  8 ++-
 gcc/testsuite/g++.dg/other/i386-2.C           |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C           |  2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c         |  3 +
 gcc/testsuite/gcc.target/i386/i386.exp        | 14 ++++
 gcc/testsuite/gcc.target/i386/sha-check.h     | 37 ++++++++++
 gcc/testsuite/gcc.target/i386/sha1msg1-1.c    | 13 ++++
 gcc/testsuite/gcc.target/i386/sha1msg1-2.c    | 42 ++++++++++++
 gcc/testsuite/gcc.target/i386/sha1msg2-1.c    | 13 ++++
 gcc/testsuite/gcc.target/i386/sha1msg2-2.c    | 44 ++++++++++++
 gcc/testsuite/gcc.target/i386/sha1nexte-1.c   | 13 ++++
 gcc/testsuite/gcc.target/i386/sha1nexte-2.c   | 36 ++++++++++
 gcc/testsuite/gcc.target/i386/sha1rnds4-1.c   | 13 ++++
 gcc/testsuite/gcc.target/i386/sha1rnds4-2.c   | 93 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/sha256msg1-1.c  | 13 ++++
 gcc/testsuite/gcc.target/i386/sha256msg1-2.c  | 48 +++++++++++++
 gcc/testsuite/gcc.target/i386/sha256msg2-1.c  | 13 ++++
 gcc/testsuite/gcc.target/i386/sha256msg2-2.c  | 49 +++++++++++++
 gcc/testsuite/gcc.target/i386/sha256rnds2-1.c | 13 ++++
 gcc/testsuite/gcc.target/i386/sha256rnds2-2.c | 85 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/sse-13.c        |  3 +
 gcc/testsuite/gcc.target/i386/sse-14.c        |  6 +-
 gcc/testsuite/gcc.target/i386/sse-22.c        |  9 ++-
 gcc/testsuite/gcc.target/i386/sse-23.c        |  5 +-
 35 files changed, 836 insertions(+), 17 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index e07479d..3d87a62 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -84,9 +84,11 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_LWP_SET \
   OPTION_MASK_ISA_LWP
 
-/* AES and PCLMUL need SSE2 because they use xmm registers */
+/* AES, SHA and PCLMUL need SSE2 because they use xmm registers.  */
 #define OPTION_MASK_ISA_AES_SET \
   (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SHA_SET \
+  (OPTION_MASK_ISA_SHA | OPTION_MASK_ISA_SSE2_SET)
 #define OPTION_MASK_ISA_PCLMUL_SET \
   (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
 
@@ -166,6 +168,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
 
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
+#define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
@@ -611,6 +614,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_msha:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SHA_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SHA_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SHA_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SHA_UNSET;
+	}
+      return true;
+
     case OPT_mpclmul:
       if (value)
 	{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9c9aa0d..69d5776 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -375,7 +375,8 @@ i[34567]86-*-*)
 		       avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h
 		       rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h
 		       adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
-		       avx512cdintrin.h avx512erintrin.h avx512pfintrin.h"
+		       avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
+		       shaintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -391,7 +392,8 @@ x86_64-*-*)
 		       avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h
 		       rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h
 		       adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
-		       avx512cdintrin.h avx512erintrin.h avx512pfintrin.h"
+		       avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
+		       shaintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index aa91e1a..de1a463 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -77,6 +77,7 @@
 #define bit_AVX512PF	(1 << 26)
 #define bit_AVX512ER	(1 << 27)
 #define bit_AVX512CD	(1 << 28)
+#define bit_SHA		(1 << 29)
 
 /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
 #define bit_XSAVEOPT	(1 << 0)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index e02d05d..985db95 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -409,7 +409,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
   unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
   unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
-  unsigned int has_avx512f = 0;
+  unsigned int has_avx512f = 0, has_sha = 0;
 
   bool arch;
 
@@ -485,6 +485,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_avx512er = ebx & bit_AVX512ER;
       has_avx512pf = ebx & bit_AVX512PF;
       has_avx512cd = ebx & bit_AVX512CD;
+      has_sha = ebx & bit_SHA;
     }
 
   if (max_level >= 13)
@@ -850,6 +851,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
       const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
       const char *aes = has_aes ? " -maes" : " -mno-aes";
+      const char *sha = has_sha ? " -msha" : " -mno-sha";
       const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
       const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
       const char *abm = has_abm ? " -mabm" : " -mno-abm";
@@ -882,7 +884,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
 
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
-			sse4a, cx16, sahf, movbe, aes, pclmul,
+			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
 			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
 			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3710c6e..cc6af7ea 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -327,6 +327,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__SSE4_2__");
   if (isa_flag & OPTION_MASK_ISA_AES)
     def_or_undef (parse_in, "__AES__");
+  if (isa_flag & OPTION_MASK_ISA_SHA)
+    def_or_undef (parse_in, "__SHA__");
   if (isa_flag & OPTION_MASK_ISA_PCLMUL)
     def_or_undef (parse_in, "__PCLMUL__");
   if (isa_flag & OPTION_MASK_ISA_AVX)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7ad2153..90473b3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2534,6 +2534,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
     { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
     { "-maes",		OPTION_MASK_ISA_AES },
+    { "-msha",		OPTION_MASK_ISA_SHA },
     { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
     { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
     { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
@@ -3029,6 +3030,7 @@ ix86_option_override_internal (bool main_args_p,
 #define PTA_AVX512ER		(HOST_WIDE_INT_1 << 41)
 #define PTA_AVX512PF		(HOST_WIDE_INT_1 << 42)
 #define PTA_AVX512CD		(HOST_WIDE_INT_1 << 43)
+#define PTA_SHA			(HOST_WIDE_INT_1 << 45)
 
 #define PTA_CORE2 \
   (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
@@ -3526,8 +3528,11 @@ ix86_option_override_internal (bool main_args_p,
 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
 	if (processor_alias_table[i].flags & PTA_AES
-	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
-	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES;
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
+	if (processor_alias_table[i].flags & PTA_SHA
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SHA;
 	if (processor_alias_table[i].flags & PTA_PCLMUL
 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
@@ -4416,6 +4421,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
     IX86_ATTR_ISA ("aes",	OPT_maes),
+    IX86_ATTR_ISA ("sha",	OPT_msha),
     IX86_ATTR_ISA ("avx",	OPT_mavx),
     IX86_ATTR_ISA ("avx2",	OPT_mavx2),
     IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
@@ -28262,6 +28268,15 @@ enum ix86_builtins
   IX86_BUILTIN_RSQRT28PD,
   IX86_BUILTIN_RSQRT28PS,
 
+  /* SHA builtins.  */
+  IX86_BUILTIN_SHA1MSG1,
+  IX86_BUILTIN_SHA1MSG2,
+  IX86_BUILTIN_SHA1NEXTE,
+  IX86_BUILTIN_SHA1RNDS4,
+  IX86_BUILTIN_SHA256MSG1,
+  IX86_BUILTIN_SHA256MSG2,
+  IX86_BUILTIN_SHA256RNDS2,
+
   /* TFmode support builtins.  */
   IX86_BUILTIN_INFQ,
   IX86_BUILTIN_HUGE_VALQ,
@@ -29908,6 +29923,15 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+
+  /* SHA */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
 };
 
 /* Builtins with rounding support.  */
@@ -30736,6 +30760,22 @@ ix86_init_mmx_sse_builtins (void)
 	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
 	       IX86_BUILTIN_SCATTERPFQPS);
 
+  /* SHA */
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
+		     V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
+		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
+  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
+		     V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
+
   /* RTM.  */
   def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
@@ -33465,6 +33505,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V8SF_FTYPE_V8DF_V8SF_QI:
     case V8SI_FTYPE_V8DF_V8SI_QI:
     case V8SI_FTYPE_V8DI_V8SI_QI:
+    case V4SI_FTYPE_V4SI_V4SI_V4SI:
       nargs = 3;
       break;
     case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -33684,6 +33725,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
 		error ("the last argument must be a 4-bit immediate");
 		return const0_rtx;
 
+	      case CODE_FOR_sha1rnds4:
 	      case CODE_FOR_sse4_1_blendpd:
 	      case CODE_FOR_avx_vpermilv2df:
 	      case CODE_FOR_xop_vpermil2v2df3:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 5976435..efb7551 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -102,6 +102,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_CRC32_P(x)	TARGET_ISA_CRC32_P(x)
 #define TARGET_AES	TARGET_ISA_AES
 #define TARGET_AES_P(x)	TARGET_ISA_AES_P(x)
+#define TARGET_SHA	TARGET_ISA_SHA
+#define TARGET_SHA_P(x)	TARGET_ISA_SHA_P(x)
 #define TARGET_PCLMUL	TARGET_ISA_PCLMUL
 #define TARGET_PCLMUL_P(x)	TARGET_ISA_PCLMUL_P(x)
 #define TARGET_CMPXCHG16B	TARGET_ISA_CX16
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 1704c52..e86a850 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -725,6 +725,10 @@ maes
 Target Report Mask(ISA_AES) Var(ix86_isa_flags) Save
 Support AES built-in functions and code generation
 
+msha
+Target Report Mask(ISA_SHA) Var(ix86_isa_flags) Save
+Support SHA1 and SHA256 built-in functions and code generation
+
 mpclmul
 Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) Save
 Support PCLMUL built-in functions and code generation
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index fa75a30..4fdf000 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -50,6 +50,8 @@
 
 #include <avx512cdintrin.h>
 
+#include <shaintrin.h>
+
 #include <lzcntintrin.h>
 
 #include <bmiintrin.h>
diff --git a/gcc/config/i386/shaintrin.h b/gcc/config/i386/shaintrin.h
new file mode 100644
index 0000000..58c5c5d
--- /dev/null
+++ b/gcc/config/i386/shaintrin.h
@@ -0,0 +1,99 @@
+/* Copyright (C) 2013
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SHAINTRIN_H_INCLUDED
+#define _SHAINTRIN_H_INCLUDED
+
+#ifndef __SHA__
+#pragma GCC push_options
+#pragma GCC target("sha")
+#define __DISABLE_SHA__
+#endif /* __SHA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
+{
+  return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
+}
+#else
+#define _mm_sha1rnds4_epu32(A, B, I)				    \
+  ((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)A,	    \
+				       (__v4si)(__m128i)B, (int)I))
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
+					       (__v4si) __C);
+}
+
+#ifdef __DISABLE_SHA__
+#undef __DISABLE_SHA__
+#pragma GCC pop_options
+#endif /* __DISABLE_SHA__ */
+
+#endif /* _SHAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a3c0e0c..5005a47 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -119,6 +119,15 @@
   UNSPEC_EXP2
   UNSPEC_RCP28
   UNSPEC_RSQRT28
+
+  ;; For SHA support
+  UNSPEC_SHA1MSG1
+  UNSPEC_SHA1MSG2
+  UNSPEC_SHA1NEXTE
+  UNSPEC_SHA1RNDS4
+  UNSPEC_SHA256MSG1
+  UNSPEC_SHA256MSG2
+  UNSPEC_SHA256RNDS2
 ])
 
 (define_c_enum "unspecv" [
@@ -15210,3 +15219,84 @@
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sha1msg1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1MSG1))]
+  "TARGET_SHA"
+  "sha1msg1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1msg2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1MSG2))]
+  "TARGET_SHA"
+  "sha1msg2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1nexte"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA1NEXTE))]
+  "TARGET_SHA"
+  "sha1nexte\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha1rnds4"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
+	  UNSPEC_SHA1RNDS4))]
+  "TARGET_SHA"
+  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256msg1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA256MSG1))]
+  "TARGET_SHA"
+  "sha256msg1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256msg2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_SHA256MSG2))]
+  "TARGET_SHA"
+  "sha256msg2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sha256rnds2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI
+	  [(match_operand:V4SI 1 "register_operand" "0")
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	   (match_operand:V4SI 3 "register_operand" "Yz")]
+	  UNSPEC_SHA256RNDS2))]
+  "TARGET_SHA"
+  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d4ca2bf..ab6c8b7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -666,7 +666,7 @@ Objective-C and Objective-C++ Dialects}.
 -mrecip -mrecip=@var{opt} @gol
 -mvzeroupper -mprefer-avx128 @gol
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
--mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd @gol
+-mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -msha @gol
 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
 -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
 -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mthreads @gol
@@ -15181,6 +15181,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mno-avx512er
 @itemx -mavx512cd
 @itemx -mno-avx512cd
+@itemx -msha
+@itemx -mno-sha
 @itemx -maes
 @itemx -mno-aes
 @itemx -mpclmul
@@ -15229,8 +15231,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex mno-3dnow
 These switches enable or disable the use of instructions in the MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
-AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2,
-FXSR, XSAVE, XSAVEOPT, LZCNT, RTM or 3DNow!@:
+SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
+BMI, BMI2, FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, or 3DNow!@:
 extended instruction sets.
 These extensions are also available as built-in functions: see
 @ref{X86 Built-in Functions}, for details of the functions enabled and
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 73729eb..55e5f35 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -msha" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index f73d8d7..4a4c755 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -msha" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 75b6f04..0d38f30 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -341,6 +341,9 @@
 #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1)
 #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1)
 
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+
 #include <wmmintrin.h>
 #include <immintrin.h>
 #include <mm3dnow.h>
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 5d70292..a383940 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -293,6 +293,20 @@ proc check_effective_target_avx512er { } {
    } "-Wno-psabi -mavx512er" ]
 }
 
+# Return 1 if sha instructions can be compiled.
+proc check_effective_target_sha { } {
+    return [check_no_compiler_messages sha object {
+	typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+	typedef int __v4si __attribute__ ((__vector_size__ (16)));
+
+	__m128i _mm_sha1msg1_epu32 (__m128i __X, __m128i __Y)
+	{
+            return (__m128i) __builtin_ia32_sha1msg1 ((__v4si)__X,
+						      (__v4si)__Y);
+	}
+    } "-O2 -msha" ]
+}
+
 # If the linker used understands -M <mapfile>, pass it to clear hardware
 # capabilities set by the Sun assembler.
 # Try mapfile syntax v2 first which is the only way to clear hwcap_2 flags.
diff --git a/gcc/testsuite/gcc.target/i386/sha-check.h b/gcc/testsuite/gcc.target/i386/sha-check.h
new file mode 100644
index 0000000..e0a1807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha-check.h
@@ -0,0 +1,37 @@
+#include <stdlib.h>
+#include "cpuid.h"
+
+static void sha_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  sha_test ();
+}
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  if (__get_cpuid_max (0, NULL) >= 7)
+    {
+      __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+      /* Run SHA test only if host has SHA support.  */
+      if (ebx & bit_SHA)
+	{
+	  do_test ();
+#ifdef DEBUG
+	  printf ("PASSED\n");
+#endif
+	  return 0;
+	}
+    }
+
+#ifdef DEBUG
+  printf ("SKIPPED\n");
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg1-1.c b/gcc/testsuite/gcc.target/i386/sha1msg1-1.c
new file mode 100644
index 0000000..808f361
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg1-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1msg1\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha1msg1_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg1-2.c b/gcc/testsuite/gcc.target/i386/sha1msg1-2.c
new file mode 100644
index 0000000..35a6057
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg1-2.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <immintrin.h>
+
+static void
+compute_sha1msg1 (int *s1, int *s2, int *r)
+{
+  int w0, w1, w2, w3, w4, w5;
+
+  w0 = s1[3];
+  w1 = s1[2];
+  w2 = s1[1];
+  w3 = s1[0];
+  w4 = s2[3];
+  w5 = s2[2];
+
+  r[0] = w5 ^ w3;
+  r[1] = w4 ^ w2;
+  r[2] = w3 ^ w1;
+  r[3] = w2 ^ w0;
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 222, 333, 444);
+  s2.x = _mm_set_epi32 (555, 666, 0, 0);
+
+  res.x = _mm_sha1msg1_epu32 (s1.x, s2.x);
+
+  compute_sha1msg1 (s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg2-1.c b/gcc/testsuite/gcc.target/i386/sha1msg2-1.c
new file mode 100644
index 0000000..9c0ffc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1msg2\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha1msg2_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg2-2.c b/gcc/testsuite/gcc.target/i386/sha1msg2-2.c
new file mode 100644
index 0000000..21eaf8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg2-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static void
+compute_sha1msg2 (int *s1, int *s2, int *r)
+{
+  int w13, w14, w15, w16, w17, w18, w19;
+
+  w13 = s2[2];
+  w14 = s2[1];
+  w15 = s2[0];
+  w16 = __rold (s1[3] ^ w13, 1);
+  w17 = __rold (s1[2] ^ w14, 1);
+  w18 = __rold (s1[1] ^ w15, 1);
+  w19 = __rold (s1[0] ^ w16, 1);
+
+  r[0] = w19;
+  r[1] = w18;
+  r[2] = w17;
+  r[3] = w16;
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 222, 333, 444);
+  s2.x = _mm_set_epi32 (555, 666, 777, 0);
+
+  res.x = _mm_sha1msg2_epu32 (s1.x, s2.x);
+
+  compute_sha1msg2 (s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1nexte-1.c b/gcc/testsuite/gcc.target/i386/sha1nexte-1.c
new file mode 100644
index 0000000..40edc78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1nexte-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1nexte\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha1nexte_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1nexte-2.c b/gcc/testsuite/gcc.target/i386/sha1nexte-2.c
new file mode 100644
index 0000000..f0dc6cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1nexte-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static void
+compute_sha1nexte (int *s1, int *s2, int *r)
+{
+  int tmp = __rold (s1[3], 30);
+
+  r[0] = s2[0];
+  r[1] = s2[1];
+  r[2] = s2[2];
+  r[3] = s2[3] + tmp;
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 0, 0, 0);
+  s2.x = _mm_set_epi32 (222, 333, 444, 555);
+
+  res.x = _mm_sha1nexte_epu32 (s1.x, s2.x);
+
+  compute_sha1nexte (s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c b/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c
new file mode 100644
index 0000000..c9da57d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1rnds4\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha1rnds4_epu32 (x, x, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c b/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c
new file mode 100644
index 0000000..91210b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c
@@ -0,0 +1,93 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+f0 (int b, int c, int d)
+{
+  return (b & c) ^ (~b & d);
+}
+
+static int
+f1 (int b, int c, int d)
+{
+  return b ^ c ^ d;
+}
+
+static int
+f2 (int b, int c, int d)
+{
+  return (b & c) ^ (b & d) ^ (c & d);
+}
+
+int (*f_arr[4])(int, int, int) = { f0, f1, f2, f1 };
+const int k_arr[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 };
+
+
+static void
+compute_sha1rnds4 (int *src1, int *src2, int imm, int *res)
+{
+  int k = k_arr[imm];
+  int (*f)(int, int, int) = f_arr[imm];
+
+  int w[4] = { src2[3], src2[2], src2[1], src2[0] };
+  int a[5], b[5], c[5], d[5], e[5];
+
+  a[0] = src1[3];
+  b[0] = src1[2];
+  c[0] = src1[1];
+  d[0] = src1[0];
+  e[0] = 0;
+
+  int i;
+  for (i = 0; i <= 3; i++)
+    {
+      a[i+1] = f(b[i], c[i], d[i]) + __rold (a[i], 5) + w[i] + e[i] + k;
+      b[i+1] = a[i];
+      c[i+1] = __rold (b[i], 30);
+      d[i+1] = c[i];
+      e[i+1] = d[i];
+    }
+
+  res[0] = d[4];
+  res[1] = c[4];
+  res[2] = b[4];
+  res[3] = a[4];
+}
+
+
+static void
+sha_test (void)
+{
+  int imm;
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 222, 333, 444);
+  s2.x = _mm_set_epi32 (555, 666, 777, 888);
+
+  res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 0);
+  compute_sha1rnds4 (s1.a, s2.a, 0, res_ref);
+  if (check_union128i_d (res, res_ref))
+    abort ();
+
+  res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 1);
+  compute_sha1rnds4 (s1.a, s2.a, 1, res_ref);
+  if (check_union128i_d (res, res_ref))
+    abort ();
+
+  res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 2);
+  compute_sha1rnds4 (s1.a, s2.a, 2, res_ref);
+  if (check_union128i_d (res, res_ref))
+    abort ();
+
+  res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 3);
+  compute_sha1rnds4 (s1.a, s2.a, 3, res_ref);
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg1-1.c b/gcc/testsuite/gcc.target/i386/sha256msg1-1.c
new file mode 100644
index 0000000..020874e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg1-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256msg1\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha256msg1_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg1-2.c b/gcc/testsuite/gcc.target/i386/sha256msg1-2.c
new file mode 100644
index 0000000..2b70920
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg1-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+s0 (int w)
+{
+  return __rord (w, 7) ^ __rord (w, 18) ^ (w >> 3);
+}
+
+static void
+compute_sha256msg1 (int *src1, int *src2, int *res)
+{
+  int w0, w1, w2, w3, w4;
+
+  w0 = src1[0];
+  w1 = src1[1];
+  w2 = src1[2];
+  w3 = src1[3];
+  w4 = src2[0];
+
+  res[0] = w0 + s0 (w1);
+  res[1] = w1 + s0 (w2);
+  res[2] = w2 + s0 (w3);
+  res[3] = w3 + s0 (w4);
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 222, 333, 444);
+  s2.x = _mm_set_epi32 (0, 0, 0, 555);
+
+  res.x = _mm_sha256msg1_epu32 (s1.x, s2.x);
+
+  compute_sha256msg1 (s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg2-1.c b/gcc/testsuite/gcc.target/i386/sha256msg2-1.c
new file mode 100644
index 0000000..88a9a03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256msg2\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha256msg2_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg2-2.c b/gcc/testsuite/gcc.target/i386/sha256msg2-2.c
new file mode 100644
index 0000000..ffb0c25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg2-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+s1 (int w)
+{
+  return __rord (w, 17) ^ __rord (w, 19) ^ (w >> 10);
+}
+
+static void
+compute_sha256msg2 (int *src1, int *src2, int *res)
+{
+  int w14, w15, w16, w17, w18, w19;
+
+  w14 = src2[2];
+  w15 = src2[3];
+  w16 = src1[0] + s1 (w14);
+  w17 = src1[1] + s1 (w15);
+  w18 = src1[2] + s1 (w16);
+  w19 = src1[3] + s1 (w17);
+
+  res[0] = w16;
+  res[1] = w17;
+  res[2] = w18;
+  res[3] = w19;
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s1, s2, res;
+  int res_ref[4];
+
+  s1.x = _mm_set_epi32 (111, 222, 333, 444);
+  s2.x = _mm_set_epi32 (555, 666, 0, 0);
+
+  res.x = _mm_sha256msg2_epu32 (s1.x, s2.x);
+
+  compute_sha256msg2 (s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c b/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c
new file mode 100644
index 0000000..8bdf664
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256rnds2\[ \\t\]+\[^\n\]*%xmm0\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+  x = _mm_sha256rnds2_epu32 (x, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c b/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c
new file mode 100644
index 0000000..4e58674
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+ch (int e, int f, int g)
+{
+  return (e & f) ^ (~e & g);
+}
+
+static int
+maj (int a, int b, int c)
+{
+  return (a & b) ^ (a & c) ^ (b & c);
+}
+
+static int
+s0 (int a)
+{
+  return __rord (a, 2) ^ __rord (a, 13) ^ __rord (a, 22);
+}
+
+static int
+s1 (int e)
+{
+  return __rord (e, 6) ^ __rord (e, 11) ^ __rord (e, 25);
+}
+
+static void
+compute_sha256rnds2 (int *src0, int *src1, int *src2, int *res)
+{
+  int wk[2] = { src0[0], src0[1] };
+  int a[3], b[3], c[3], d[3], e[3], f[3], g[3], h[3];
+
+  a[0] = src2[3];
+  b[0] = src2[2];
+  c[0] = src1[3];
+  d[0] = src1[2];
+  e[0] = src2[1];
+  f[0] = src2[0];
+  g[0] = src1[1];
+  h[0] = src1[0];
+
+  int i;
+  for (i = 0; i <= 1; i++)
+    {
+      a[i+1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i]
+	       + maj (a[i], b[i], c[i]) + s0 (a[i]);
+      b[i+1] = a[i];
+      c[i+1] = b[i];
+      d[i+1] = c[i];
+      e[i+1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i] + d[i];
+      f[i+1] = e[i];
+      g[i+1] = f[i];
+      h[i+1] = g[i];
+    }
+
+  res[0] = f[2];
+  res[1] = e[2];
+  res[2] = b[2];
+  res[3] = a[2];
+}
+
+static void
+sha_test (void)
+{
+  union128i_d s0, s1, s2, res;
+  int res_ref[4];
+
+  s0.x = _mm_set_epi32 (0, 0, 111, 222);
+  s1.x = _mm_set_epi32 (333, 444, 555, 666);
+  s2.x = _mm_set_epi32 (777, 888, 999, 123);
+
+  res.x = _mm_sha256rnds2_epu32 (s1.x, s2.x, s0.x);
+
+  compute_sha256rnds2 (s0.a, s1.a, s2.a, res_ref);
+
+  if (check_union128i_d (res, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 73aa472..569eacf 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -379,3 +379,6 @@
 #define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1)
 #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
 #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 623b56b..e8cb533 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er  -mavx512pf -mavx512cd" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er  -mavx512pf -mavx512cd -msha" } */
+
 #include <mm_malloc.h>
 
 /* Test that the intrinsics compile without optimization.  All of them are
@@ -497,6 +498,9 @@ test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1)
 test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
 test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1)
 
+/* shaintrin.h */
+test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
+
 /* wmmintrin.h */
 test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
 test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 6f625ad..05b4af0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -99,7 +99,7 @@
 
 
 #ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512pf,avx512er,avx512cd")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512pf,avx512er,avx512cd,sha")
 #endif
 
 /* Following intrinsics require immediate arguments.  They
@@ -212,9 +212,9 @@ test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1)
 test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1)
 test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
 
-/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F) */
+/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
 #ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha")
 #endif
 #include <immintrin.h>
 test_1 (_cvtss_sh, unsigned short, float, 1)
@@ -666,6 +666,9 @@ test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1)
 test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
 test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1)
 
+/* shaintrin.h */
+test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
+
 /* wmmintrin.h (AES/PCLMUL).  */
 #ifdef DIFFERENT_PRAGMAS
 #pragma GCC target ("aes,pclmul")
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index f993c07..0123538 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -356,7 +356,10 @@
 #define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1)
 #define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1)
 
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512pf,avx512cd")
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512pf,avx512cd,sha")
 #include <wmmintrin.h>
 #include <smmintrin.h>
 #include <mm3dnow.h>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]